Commit df3bb8ca authored by Gijs Hendriksen's avatar Gijs Hendriksen

Implement bulk index and index clearing

parent f8f1c0a3
[
{
"name": "doc1",
"body": "I put on my robe and wizard hat"
},
{
"name": "doc2",
"body": "I put my wizard hat in the wizard closet"
}
]
import duckdb import duckdb
import json
import os import os
from collections import defaultdict from collections import defaultdict
...@@ -73,8 +74,16 @@ class Index: ...@@ -73,8 +74,16 @@ class Index:
self.cursor.execute(f"INSERT INTO terms VALUES ({term_id}, {doc_id}, {frequency})") self.cursor.execute(f"INSERT INTO terms VALUES ({term_id}, {doc_id}, {frequency})")
def bulk_index(self, filename): def bulk_index(self, filename):
# TODO read data from filename and index documents with open(filename) as _file:
pass data = json.load(_file)
for document in data:
self.index(document)
def clear(self):
self.cursor.execute("DELETE FROM terms")
self.cursor.execute("DELETE FROM docs")
self.cursor.execute("DELETE FROM dict")
def print_index(self): def print_index(self):
print('dict') print('dict')
......
...@@ -6,6 +6,7 @@ from index import Index ...@@ -6,6 +6,7 @@ from index import Index
def bulk_index(index, args): def bulk_index(index, args):
filename = args.data filename = args.data
index.bulk_index(filename) index.bulk_index(filename)
index.print_index()
def query_index(index, args): def query_index(index, args):
...@@ -13,6 +14,10 @@ def query_index(index, args): ...@@ -13,6 +14,10 @@ def query_index(index, args):
# TODO use query terms to query index # TODO use query terms to query index
def clear_index(index, args):
index.clear()
def main(): def main():
parser = ArgumentParser(prog='old_duck', description='OldDuck - A Python implementation of OldDog, using DuckDB') parser = ArgumentParser(prog='old_duck', description='OldDuck - A Python implementation of OldDog, using DuckDB')
...@@ -21,12 +26,16 @@ def main(): ...@@ -21,12 +26,16 @@ def main():
parser_index = subparsers.add_parser('index') parser_index = subparsers.add_parser('index')
parser_index.add_argument('database', help='The database file to index the files to') parser_index.add_argument('database', help='The database file to index the files to')
parser_index.add_argument('data', help='The file to read and index documents from') parser_index.add_argument('data', help='The file to read and index documents from')
parser.set_defaults(func=bulk_index) parser_index.set_defaults(func=bulk_index)
parser_query = subparsers.add_parser('query') parser_query = subparsers.add_parser('query')
parser_query.add_argument('database', help='The database file to index the files to') parser_query.add_argument('database', help='The database file to query')
parser_query.add_argument('terms', help='The query terms', nargs='*') parser_query.add_argument('terms', help='The query terms', nargs='*')
parser.set_defaults(func=query_index) parser_query.set_defaults(func=query_index)
parser_clear = subparsers.add_parser('clear')
parser_clear.add_argument('database', help='The database file to clear')
parser_clear.set_defaults(func=clear_index)
args = parser.parse_args() args = parser.parse_args()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment