Verified Commit 7f9aa332 authored by Camil Staps's avatar Camil Staps 🚀

Use ngram matching ratios in ranking instead of arbitrary function

parent 48d7b500
......@@ -68,7 +68,8 @@ NGRAMS_CI :== True
* Annotations to store during search.
*/
:: Annotation
= NGramDistance !Int //* For name search, the distance based on the number of matching ngrams
= MatchingNGramsQuery !Real //* The number of matching ngrams in the query
| MatchingNGramsResult !Real //* The number of matching ngrams in the result
| Unifier !Unifier //* For type search, the unifier
| RequiredContext !(Maybe [(String,[LocationResult])]) //* For type search, context after unification
| UsedSynonyms !Int //* The number of synonyms used for unification
......
......@@ -21,7 +21,7 @@ from Data.Func import $, on, `on`, mapSt
import Data.Functor
import Data.GenLexOrd
import Data.Graphviz
from Data.List import concatMap, groupBy, intercalate, intersect,
from Data.List import concatMap, groupBy, intercalate, intersect, partition,
tails, instance Functor [], instance Foldable []
from Data.Map import :: Map(..), elems, filterWithKey, foldrNoKey,
foldrWithKey, fromList, get, mapSize, alter, mapWithKey, newMap, put,
......@@ -391,9 +391,15 @@ where
getIndexWithDistance idx n db
# (e,db) = 'DB'.getIndex idx db
# name = getName $ fromJust $ getLocation e.value
= ((idx, [NGramDistance (0 - toInt (100.0 * toReal n ^ 2.0 / toReal (size name)))]), db)
# rn = toReal n
# annots =
[ MatchingNGramsQuery (rn / qsize)
, MatchingNGramsResult (rn / toReal (length $ 'NGrams'.ngrams NGRAMS_CI NGRAMS_N name))
]
= ((idx, annots), db)
cs = [c \\ c <-: s]
qsize = toReal $ length $ 'NGrams'.ngrams NGRAMS_CI NGRAMS_N s
syntaxSearch :: CloogleEntry -> (Bool, [a])
syntaxSearch (SyntaxEntry se) = (any (not o isEmpty o flip match cs) se.syntax_patterns, [])
......@@ -526,7 +532,12 @@ where
where
updateAnnots :: ![Annotation] ![Annotation] -> [Annotation]
updateAnnots [] m = m
updateAnnots [a=:NGramDistance _:as] m = updateAnnots as [a:[a \\ a <- m | not (a=:NGramDistance _)]]
updateAnnots [MatchingNGramsQuery r:as] m
= updateAnnots as [MatchingNGramsQuery $ maxList [r:[r \\ MatchingNGramsQuery r <- match]]:nomatch]
where (match,nomatch) = partition (\a->a=:MatchingNGramsQuery _) m
updateAnnots [MatchingNGramsResult r:as] m
= updateAnnots as [MatchingNGramsResult $ maxList [r:[r \\ MatchingNGramsResult r <- match]]:nomatch]
where (match,nomatch) = partition (\a->a=:MatchingNGramsResult _) m
updateAnnots [a=:Unifier _:as] m = updateAnnots as [a:[a \\ a <- m | not (a=:Unifier _)]]
updateAnnots [a=:ExactResult:as] m = updateAnnots as [a:[a \\ a <- m | not a=:ExactResult]]
......
......@@ -12,7 +12,8 @@ from Cloogle.DB import :: Annotation, :: CloogleEntry, :: CloogleDB
* are given by this record.
*/
:: RankSettings =
{ rs_ngram_distance :: !Real //* n-gram distance
{ rs_matching_ngrams_q :: !Real //* matching n-grams in the query
, rs_matching_ngrams_r :: !Real //* matching n-grams in the result
, rs_exact_result :: !Real //* results with an exact match
, rs_record_field :: !Real //* record fields
......
......@@ -28,7 +28,8 @@ import Cloogle.Search
distance :: !RankSettings !CloogleEntry ![Annotation] -> Real
distance settings entry annots = let info = symbolicDistance entry annots in
settings.rs_ngram_distance * info.rs_ngram_distance +
settings.rs_matching_ngrams_q * info.rs_matching_ngrams_q +
settings.rs_matching_ngrams_r * info.rs_matching_ngrams_r +
settings.rs_exact_result * info.rs_exact_result +
settings.rs_record_field * info.rs_record_field +
settings.rs_constructor * info.rs_constructor +
......@@ -43,7 +44,8 @@ distance settings entry annots = let info = symbolicDistance entry annots in
symbolicDistance :: !CloogleEntry ![Annotation] -> RankInformation
symbolicDistance entry annots =
{ rs_ngram_distance = case [d \\ NGramDistance d <- annots] of [d:_] -> toReal d; _ -> 0.0
{ rs_matching_ngrams_q = case [r \\ MatchingNGramsQuery r <- annots] of [r:_] -> r; _ -> 0.0
, rs_matching_ngrams_r = case [r \\ MatchingNGramsResult r <- annots] of [r:_] -> r; _ -> 0.0
, rs_exact_result = if (isEmpty [a \\ a=:ExactResult <- annots]) 0.0 1.0
, rs_record_field = if entry=:(FunctionEntry {fe_kind=RecordField}) 1.0 0.0
, rs_constructor = if entry=:(FunctionEntry {fe_kind=Constructor}) 1.0 0.0
......@@ -123,7 +125,8 @@ findRankSettings constraints cdb w
| isError out = (Error "Failed to read z3 output", cdb, w)
# out = split "\n" $ fromOk out
# settings = findSettings out
{ rs_ngram_distance = 0.0
{ rs_matching_ngrams_q = 0.0
, rs_matching_ngrams_r = 0.0
, rs_exact_result = 0.0
, rs_record_field = 0.0
, rs_constructor = 0.0
......@@ -144,7 +147,8 @@ where
# name = s % (14,size s-9) // strip off ' (define-fun ' and ' () Real'
# val = toReal {#c \\ c <-: v | isDigit c || c == '.' || c == '-'}
# rs = case name of
"rs_ngram_distance" -> {rs & rs_ngram_distance =val}
"rs_matching_ngrams_q" -> {rs & rs_matching_ngrams_q =val}
"rs_matching_ngrams_r" -> {rs & rs_matching_ngrams_r =val}
"rs_exact_result" -> {rs & rs_exact_result =val}
"rs_record_field" -> {rs & rs_record_field =val}
"rs_constructor" -> {rs & rs_constructor =val}
......@@ -166,7 +170,8 @@ rankConstraints constraints cdb
= (default ++ constraints,cdb)
where
default =
[ "(declare-const rs_ngram_distance Real)"
[ "(declare-const rs_matching_ngrams_q Real)"
, "(declare-const rs_matching_ngrams_r Real)"
, "(declare-const rs_exact_result Real)"
, "(declare-const rs_record_field Real)"
, "(declare-const rs_constructor Real)"
......@@ -217,7 +222,8 @@ where
formula :: !RankInformation -> String
formula ri = sum
[ "* rs_ngram_distance " <+ ri.rs_ngram_distance
[ "* rs_matching_ngrams_q " <+ ri.rs_matching_ngrams_q
, "* rs_matching_ngrams_r " <+ ri.rs_matching_ngrams_r
, "* rs_exact_result " <+ ri.rs_exact_result
, "* rs_record_field " <+ ri.rs_record_field
, "* rs_constructor " <+ ri.rs_constructor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment