Commit df3bb8ca authored by Gijs Hendriksen's avatar Gijs Hendriksen

Implement bulk index and index clearing

parent f8f1c0a3
[
{
"name": "doc1",
"body": "I put on my robe and wizard hat"
},
{
"name": "doc2",
"body": "I put my wizard hat in the wizard closet"
}
]
import duckdb
import json
import os
from collections import defaultdict
......@@ -73,8 +74,16 @@ class Index:
self.cursor.execute(f"INSERT INTO terms VALUES ({term_id}, {doc_id}, {frequency})")
def bulk_index(self, filename):
# TODO read data from filename and index documents
pass
with open(filename) as _file:
data = json.load(_file)
for document in data:
self.index(document)
def clear(self):
self.cursor.execute("DELETE FROM terms")
self.cursor.execute("DELETE FROM docs")
self.cursor.execute("DELETE FROM dict")
def print_index(self):
print('dict')
......
......@@ -6,6 +6,7 @@ from index import Index
def bulk_index(index, args):
filename = args.data
index.bulk_index(filename)
index.print_index()
def query_index(index, args):
......@@ -13,6 +14,10 @@ def query_index(index, args):
# TODO use query terms to query index
def clear_index(index, args):
index.clear()
def main():
parser = ArgumentParser(prog='old_duck', description='OldDuck - A Python implementation of OldDog, using DuckDB')
......@@ -21,12 +26,16 @@ def main():
parser_index = subparsers.add_parser('index')
parser_index.add_argument('database', help='The database file to index the files to')
parser_index.add_argument('data', help='The file to read and index documents from')
parser.set_defaults(func=bulk_index)
parser_index.set_defaults(func=bulk_index)
parser_query = subparsers.add_parser('query')
parser_query.add_argument('database', help='The database file to index the files to')
parser_query.add_argument('database', help='The database file to query')
parser_query.add_argument('terms', help='The query terms', nargs='*')
parser.set_defaults(func=query_index)
parser_query.set_defaults(func=query_index)
parser_clear = subparsers.add_parser('clear')
parser_clear.add_argument('database', help='The database file to clear')
parser_clear.set_defaults(func=clear_index)
args = parser.parse_args()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment