Commit 9b5c4bf3 authored by Gijs Hendriksen's avatar Gijs Hendriksen

Add explicit benchmarking command

parent 755e1489
......@@ -168,7 +168,7 @@ class DuckDBIndex(Index):
'df': 0,
} for term in doc_terms])
dict_table = (pd.concat([dict_table, new_dict], ignore_index=True)
dict_table = (pd.concat([dict_table, new_dict], ignore_index=True, sort=False)
.drop_duplicates('term'))
dict_table.loc[dict_table['term'].isin(doc_terms), 'df'] += 1
......@@ -311,7 +311,7 @@ class MonetDBIndex(Index):
'df': 0,
} for term in doc_terms])
dict_table = (pd.concat([dict_table, new_dict], ignore_index=True)
dict_table = (pd.concat([dict_table, new_dict], ignore_index=True, sort=False)
.drop_duplicates('term'))
dict_table.loc[dict_table['term'].isin(doc_terms), 'df'] += 1
......
This diff is collapsed.
def bm25(terms, disjunctive=False):
def bm25(terms, disjunctive=True):
term_list = ', '.join([f"'{term}'" for term in terms])
constraint = '' if disjunctive else 'HAVING COUNT(distinct termid) = (SELECT COUNT(*) FROM termids)'
......@@ -20,7 +20,3 @@ def bm25(terms, disjunctive=False):
FROM subscores GROUP BY docid) AS scores JOIN docs ON
scores.docid=docs.docid ORDER BY score DESC;
"""
def tfidf(terms, disjunctive=False):
return ''
......@@ -11,8 +11,6 @@ class Search:
def search(self, terms, method='bm25'):
if method == 'bm25':
sql_query = query.bm25(terms)
elif method == 'tfidf':
sql_query = query.tfidf(terms)
else:
raise NotImplementedError(f'Search method "{method}" was not implemented')
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment