diff --git a/data.json b/data.json new file mode 100644 index 0000000000000000000000000000000000000000..01d2d7a40f228978863d35cf21757efa14fadf1e --- /dev/null +++ b/data.json @@ -0,0 +1,10 @@ +[ + { + "name": "doc1", + "body": "I put on my robe and wizard hat" + }, + { + "name": "doc2", + "body": "I put my wizard hat in the wizard closet" + } +] diff --git a/index.py b/index.py index 04a95696257bfd425ee9e57b5b237c1b21e769a0..696075e790e9f573c37a8468cb92b8fceeb28b7c 100644 --- a/index.py +++ b/index.py @@ -1,4 +1,5 @@ import duckdb +import json import os from collections import defaultdict @@ -73,8 +74,16 @@ class Index: self.cursor.execute(f"INSERT INTO terms VALUES ({term_id}, {doc_id}, {frequency})") def bulk_index(self, filename): - # TODO read data from filename and index documents - pass + with open(filename) as _file: + data = json.load(_file) + + for document in data: + self.index(document) + + def clear(self): + self.cursor.execute("DELETE FROM terms") + self.cursor.execute("DELETE FROM docs") + self.cursor.execute("DELETE FROM dict") def print_index(self): print('dict') diff --git a/main.py b/main.py index 665d6e3b3c80008964b493879c0e2be13499aac7..951cdcc48361c2471031dbbd52f35531f2b34d66 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ from index import Index def bulk_index(index, args): filename = args.data index.bulk_index(filename) + index.print_index() def query_index(index, args): @@ -13,6 +14,10 @@ def query_index(index, args): # TODO use query terms to query index +def clear_index(index, args): + index.clear() + + def main(): parser = ArgumentParser(prog='old_duck', description='OldDuck - A Python implementation of OldDog, using DuckDB') @@ -21,12 +26,16 @@ def main(): parser_index = subparsers.add_parser('index') parser_index.add_argument('database', help='The database file to index the files to') parser_index.add_argument('data', help='The file to read and index documents from') - parser.set_defaults(func=bulk_index) + parser_index.set_defaults(func=bulk_index) parser_query = subparsers.add_parser('query') - parser_query.add_argument('database', help='The database file to index the files to') + parser_query.add_argument('database', help='The database file to query') parser_query.add_argument('terms', help='The query terms', nargs='*') - parser.set_defaults(func=query_index) + parser_query.set_defaults(func=query_index) + + parser_clear = subparsers.add_parser('clear') + parser_clear.add_argument('database', help='The database file to clear') + parser_clear.set_defaults(func=clear_index) args = parser.parse_args()