Verified Commit 7f9aa332 authored by Camil Staps's avatar Camil Staps 🙂

Use ngram matching ratios in ranking instead of arbitrary function

parent 48d7b500
...@@ -68,7 +68,8 @@ NGRAMS_CI :== True ...@@ -68,7 +68,8 @@ NGRAMS_CI :== True
* Annotations to store during search. * Annotations to store during search.
*/ */
:: Annotation :: Annotation
= NGramDistance !Int //* For name search, the distance based on the number of matching ngrams = MatchingNGramsQuery !Real //* The number of matching ngrams in the query
| MatchingNGramsResult !Real //* The number of matching ngrams in the result
| Unifier !Unifier //* For type search, the unifier | Unifier !Unifier //* For type search, the unifier
| RequiredContext !(Maybe [(String,[LocationResult])]) //* For type search, context after unification | RequiredContext !(Maybe [(String,[LocationResult])]) //* For type search, context after unification
| UsedSynonyms !Int //* The number of synonyms used for unification | UsedSynonyms !Int //* The number of synonyms used for unification
......
...@@ -21,7 +21,7 @@ from Data.Func import $, on, `on`, mapSt ...@@ -21,7 +21,7 @@ from Data.Func import $, on, `on`, mapSt
import Data.Functor import Data.Functor
import Data.GenLexOrd import Data.GenLexOrd
import Data.Graphviz import Data.Graphviz
from Data.List import concatMap, groupBy, intercalate, intersect, from Data.List import concatMap, groupBy, intercalate, intersect, partition,
tails, instance Functor [], instance Foldable [] tails, instance Functor [], instance Foldable []
from Data.Map import :: Map(..), elems, filterWithKey, foldrNoKey, from Data.Map import :: Map(..), elems, filterWithKey, foldrNoKey,
foldrWithKey, fromList, get, mapSize, alter, mapWithKey, newMap, put, foldrWithKey, fromList, get, mapSize, alter, mapWithKey, newMap, put,
...@@ -391,9 +391,15 @@ where ...@@ -391,9 +391,15 @@ where
getIndexWithDistance idx n db getIndexWithDistance idx n db
# (e,db) = 'DB'.getIndex idx db # (e,db) = 'DB'.getIndex idx db
# name = getName $ fromJust $ getLocation e.value # name = getName $ fromJust $ getLocation e.value
= ((idx, [NGramDistance (0 - toInt (100.0 * toReal n ^ 2.0 / toReal (size name)))]), db) # rn = toReal n
# annots =
[ MatchingNGramsQuery (rn / qsize)
, MatchingNGramsResult (rn / toReal (length $ 'NGrams'.ngrams NGRAMS_CI NGRAMS_N name))
]
= ((idx, annots), db)
cs = [c \\ c <-: s] cs = [c \\ c <-: s]
qsize = toReal $ length $ 'NGrams'.ngrams NGRAMS_CI NGRAMS_N s
syntaxSearch :: CloogleEntry -> (Bool, [a]) syntaxSearch :: CloogleEntry -> (Bool, [a])
syntaxSearch (SyntaxEntry se) = (any (not o isEmpty o flip match cs) se.syntax_patterns, []) syntaxSearch (SyntaxEntry se) = (any (not o isEmpty o flip match cs) se.syntax_patterns, [])
...@@ -526,7 +532,12 @@ where ...@@ -526,7 +532,12 @@ where
where where
updateAnnots :: ![Annotation] ![Annotation] -> [Annotation] updateAnnots :: ![Annotation] ![Annotation] -> [Annotation]
updateAnnots [] m = m updateAnnots [] m = m
updateAnnots [a=:NGramDistance _:as] m = updateAnnots as [a:[a \\ a <- m | not (a=:NGramDistance _)]] updateAnnots [MatchingNGramsQuery r:as] m
= updateAnnots as [MatchingNGramsQuery $ maxList [r:[r \\ MatchingNGramsQuery r <- match]]:nomatch]
where (match,nomatch) = partition (\a->a=:MatchingNGramsQuery _) m
updateAnnots [MatchingNGramsResult r:as] m
= updateAnnots as [MatchingNGramsResult $ maxList [r:[r \\ MatchingNGramsResult r <- match]]:nomatch]
where (match,nomatch) = partition (\a->a=:MatchingNGramsResult _) m
updateAnnots [a=:Unifier _:as] m = updateAnnots as [a:[a \\ a <- m | not (a=:Unifier _)]] updateAnnots [a=:Unifier _:as] m = updateAnnots as [a:[a \\ a <- m | not (a=:Unifier _)]]
updateAnnots [a=:ExactResult:as] m = updateAnnots as [a:[a \\ a <- m | not a=:ExactResult]] updateAnnots [a=:ExactResult:as] m = updateAnnots as [a:[a \\ a <- m | not a=:ExactResult]]
......
...@@ -12,7 +12,8 @@ from Cloogle.DB import :: Annotation, :: CloogleEntry, :: CloogleDB ...@@ -12,7 +12,8 @@ from Cloogle.DB import :: Annotation, :: CloogleEntry, :: CloogleDB
* are given by this record. * are given by this record.
*/ */
:: RankSettings = :: RankSettings =
{ rs_ngram_distance :: !Real //* n-gram distance { rs_matching_ngrams_q :: !Real //* matching n-grams in the query
, rs_matching_ngrams_r :: !Real //* matching n-grams in the result
, rs_exact_result :: !Real //* results with an exact match , rs_exact_result :: !Real //* results with an exact match
, rs_record_field :: !Real //* record fields , rs_record_field :: !Real //* record fields
......
...@@ -28,7 +28,8 @@ import Cloogle.Search ...@@ -28,7 +28,8 @@ import Cloogle.Search
distance :: !RankSettings !CloogleEntry ![Annotation] -> Real distance :: !RankSettings !CloogleEntry ![Annotation] -> Real
distance settings entry annots = let info = symbolicDistance entry annots in distance settings entry annots = let info = symbolicDistance entry annots in
settings.rs_ngram_distance * info.rs_ngram_distance + settings.rs_matching_ngrams_q * info.rs_matching_ngrams_q +
settings.rs_matching_ngrams_r * info.rs_matching_ngrams_r +
settings.rs_exact_result * info.rs_exact_result + settings.rs_exact_result * info.rs_exact_result +
settings.rs_record_field * info.rs_record_field + settings.rs_record_field * info.rs_record_field +
settings.rs_constructor * info.rs_constructor + settings.rs_constructor * info.rs_constructor +
...@@ -43,7 +44,8 @@ distance settings entry annots = let info = symbolicDistance entry annots in ...@@ -43,7 +44,8 @@ distance settings entry annots = let info = symbolicDistance entry annots in
symbolicDistance :: !CloogleEntry ![Annotation] -> RankInformation symbolicDistance :: !CloogleEntry ![Annotation] -> RankInformation
symbolicDistance entry annots = symbolicDistance entry annots =
{ rs_ngram_distance = case [d \\ NGramDistance d <- annots] of [d:_] -> toReal d; _ -> 0.0 { rs_matching_ngrams_q = case [r \\ MatchingNGramsQuery r <- annots] of [r:_] -> r; _ -> 0.0
, rs_matching_ngrams_r = case [r \\ MatchingNGramsResult r <- annots] of [r:_] -> r; _ -> 0.0
, rs_exact_result = if (isEmpty [a \\ a=:ExactResult <- annots]) 0.0 1.0 , rs_exact_result = if (isEmpty [a \\ a=:ExactResult <- annots]) 0.0 1.0
, rs_record_field = if entry=:(FunctionEntry {fe_kind=RecordField}) 1.0 0.0 , rs_record_field = if entry=:(FunctionEntry {fe_kind=RecordField}) 1.0 0.0
, rs_constructor = if entry=:(FunctionEntry {fe_kind=Constructor}) 1.0 0.0 , rs_constructor = if entry=:(FunctionEntry {fe_kind=Constructor}) 1.0 0.0
...@@ -123,7 +125,8 @@ findRankSettings constraints cdb w ...@@ -123,7 +125,8 @@ findRankSettings constraints cdb w
| isError out = (Error "Failed to read z3 output", cdb, w) | isError out = (Error "Failed to read z3 output", cdb, w)
# out = split "\n" $ fromOk out # out = split "\n" $ fromOk out
# settings = findSettings out # settings = findSettings out
{ rs_ngram_distance = 0.0 { rs_matching_ngrams_q = 0.0
, rs_matching_ngrams_r = 0.0
, rs_exact_result = 0.0 , rs_exact_result = 0.0
, rs_record_field = 0.0 , rs_record_field = 0.0
, rs_constructor = 0.0 , rs_constructor = 0.0
...@@ -144,7 +147,8 @@ where ...@@ -144,7 +147,8 @@ where
# name = s % (14,size s-9) // strip off ' (define-fun ' and ' () Real' # name = s % (14,size s-9) // strip off ' (define-fun ' and ' () Real'
# val = toReal {#c \\ c <-: v | isDigit c || c == '.' || c == '-'} # val = toReal {#c \\ c <-: v | isDigit c || c == '.' || c == '-'}
# rs = case name of # rs = case name of
"rs_ngram_distance" -> {rs & rs_ngram_distance =val} "rs_matching_ngrams_q" -> {rs & rs_matching_ngrams_q =val}
"rs_matching_ngrams_r" -> {rs & rs_matching_ngrams_r =val}
"rs_exact_result" -> {rs & rs_exact_result =val} "rs_exact_result" -> {rs & rs_exact_result =val}
"rs_record_field" -> {rs & rs_record_field =val} "rs_record_field" -> {rs & rs_record_field =val}
"rs_constructor" -> {rs & rs_constructor =val} "rs_constructor" -> {rs & rs_constructor =val}
...@@ -166,7 +170,8 @@ rankConstraints constraints cdb ...@@ -166,7 +170,8 @@ rankConstraints constraints cdb
= (default ++ constraints,cdb) = (default ++ constraints,cdb)
where where
default = default =
[ "(declare-const rs_ngram_distance Real)" [ "(declare-const rs_matching_ngrams_q Real)"
, "(declare-const rs_matching_ngrams_r Real)"
, "(declare-const rs_exact_result Real)" , "(declare-const rs_exact_result Real)"
, "(declare-const rs_record_field Real)" , "(declare-const rs_record_field Real)"
, "(declare-const rs_constructor Real)" , "(declare-const rs_constructor Real)"
...@@ -217,7 +222,8 @@ where ...@@ -217,7 +222,8 @@ where
formula :: !RankInformation -> String formula :: !RankInformation -> String
formula ri = sum formula ri = sum
[ "* rs_ngram_distance " <+ ri.rs_ngram_distance [ "* rs_matching_ngrams_q " <+ ri.rs_matching_ngrams_q
, "* rs_matching_ngrams_r " <+ ri.rs_matching_ngrams_r
, "* rs_exact_result " <+ ri.rs_exact_result , "* rs_exact_result " <+ ri.rs_exact_result
, "* rs_record_field " <+ ri.rs_record_field , "* rs_record_field " <+ ri.rs_record_field
, "* rs_constructor " <+ ri.rs_constructor , "* rs_constructor " <+ ri.rs_constructor
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment