Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
Information Retrieval
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gijs Hendriksen
Information Retrieval
Commits
755e1489
Commit
755e1489
authored
Dec 06, 2019
by
Gijs Hendriksen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix MonetDB bulk index and add installation instructions
parent
1fc7afbc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
13 deletions
+38
-13
README.md
README.md
+24
-0
index.py
index.py
+14
-13
No files found.
README.md
0 → 100644
View file @
755e1489
# Installation
*
Install poetry
*
Run
`poetry install`
to install the virtual environment
*
Run
`poetry shell`
to enter the virtual environment
## MonetDB
For MonetDB, you have to setup the actual database. You can do this as follows:
*
Run the MonetDB docker container:
```
$ docker run -d -p 0.0.0.0:50000:50000 --name monetdb --volume /path/to/project/.monetdb:/app monetdb/monetdb
```
*
Connect to the container using:
```
$ docker exec -it monetdb /bin/bash
```
*
In the docker shell, run the following commands:
```
# monetdb create oldduck
# monetdb release oldduck
# monetdb start oldduck
```
index.py
View file @
755e1489
...
...
@@ -183,7 +183,7 @@ class DuckDBIndex(Index):
docs
.
append
({
'docid'
:
docid
,
'name'
:
document
[
'name'
],
'length'
:
len
(
doc_terms
),
'length'
:
sum
(
doc_terms
.
values
()
),
})
amount_of_digits
=
math
.
floor
(
math
.
log10
(
len
(
data
)))
+
1
...
...
@@ -241,12 +241,15 @@ class MonetDBIndex(Index):
hostname
=
'localhost'
,
database
=
db
).
cursor
()
self
.
cursor
.
execute
(
'CREATE TABLE IF NOT EXISTS dict('
'termid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'term VARCHAR(32) NOT NULL UNIQUE,'
# 'termid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'termid INTEGER NOT NULL,'
# 'term VARCHAR(32) NOT NULL UNIQUE,'
'term VARCHAR(64) NOT NULL,'
'df INTEGER NOT NULL)'
)
self
.
cursor
.
execute
(
'CREATE TABLE IF NOT EXISTS docs('
'docid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'name VARCHAR(32) NOT NULL,'
# 'docid INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,'
'docid INTEGER NOT NULL,'
'name VARCHAR(64) NOT NULL,'
'length INTEGER NOT NULL)'
)
self
.
cursor
.
execute
(
'CREATE TABLE IF NOT EXISTS terms('
'termid INTEGER NOT NULL,'
...
...
@@ -323,7 +326,7 @@ class MonetDBIndex(Index):
docs
.
append
({
'docid'
:
docid
,
'name'
:
document
[
'name'
],
'length'
:
len
(
doc_terms
),
'length'
:
sum
(
doc_terms
.
values
()
),
})
amount_of_digits
=
math
.
floor
(
math
.
log10
(
len
(
data
)))
+
1
...
...
@@ -334,17 +337,15 @@ class MonetDBIndex(Index):
dict_table
[
'termid'
]
=
dict_table
.
index
dict_table
=
dict_table
[[
'termid'
,
'term'
,
'df'
]]
dict_table
.
to_csv
(
'dict.csv'
,
header
=
False
,
index
=
False
)
doc_table
.
to_csv
(
'docs.csv'
,
header
=
False
,
index
=
False
)
term_table
.
to_csv
(
'terms.csv'
,
header
=
False
,
index
=
False
)
dict_table
.
to_csv
(
'
.monetdb/
dict.csv'
,
header
=
False
,
index
=
False
)
doc_table
.
to_csv
(
'
.monetdb/
docs.csv'
,
header
=
False
,
index
=
False
)
term_table
.
to_csv
(
'
.monetdb/
terms.csv'
,
header
=
False
,
index
=
False
)
for
table
in
(
'dict'
,
'docs'
,
'terms'
):
filename
=
os
.
path
.
abspath
(
f'
{
table
}
.csv'
)
self
.
cursor
.
execute
(
f'DELETE FROM
{
table
}
'
)
self
.
cursor
.
execute
(
f"COPY INTO
{
table
}
FROM '
{
filename
}
'"
)
self
.
cursor
.
execute
(
f"COPY INTO
{
table
}
FROM '
/app/
{
table
}
.csv' USING DELIMITERS ',
'"
)
os
.
remove
(
f'
{
table
}
.csv'
)
os
.
remove
(
f'
.monetdb/
{
table
}
.csv'
)
self
.
cursor
.
execute
(
'COMMIT'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment