We are planning to upgrade GitLab to the latest version this Friday morning. Expect some downtime! (oh, and there are still some GitLab stickers available at the C&CZ office ;-) )

Verified Commit 31eced6f authored by Camil Staps's avatar Camil Staps 🐧

Add strictness annotation to Regex constructors

parent 526028f7
......@@ -9,15 +9,15 @@ import Regex.Parse
import Regex.Print
:: Regex
= Literal [Char]
| CharacterClass Bool [(Char,Char)]
| Concat [Regex]
| Any [Regex]
| Repeated Greediness Int (Maybe Int) Regex
| Group GroupId Regex
= Literal ![!Char!]
| CharacterClass !Bool ![!(!Char,!Char)!]
| Concat ![!Regex!]
| Any ![!Regex!]
| Repeated !Greediness !Int !(Maybe Int) !Regex
| Group !GroupId !Regex
| StartOfString
| EndOfString
| WordBreak Bool
| WordBreak !Bool
/**
* @type Regex
......@@ -37,6 +37,6 @@ Optional g :== Repeated g 0 (Just 1)
:: Greediness :== Bool
:: Position :== Int
:: GroupId = Named String | NotNamed Int
:: GroupId = Named !String | NotNamed !Int
instance == GroupId
......@@ -4,4 +4,4 @@ from Regex import :: Regex, :: Position, :: GroupId
:: Match :== (Position, [Char], [(GroupId, [Char])])
match :: !Regex [Char] -> [Match]
match :: !Regex ![Char] -> [Match]
implementation module Regex.Match
import _SystemStrictLists
import StdBool
from StdFunc import flip, id, o
import qualified StdOverloadedList
from StdOverloadedList import class List, ++|, Length
import StdTuple
from Data.Func import $
......@@ -12,37 +15,43 @@ import Data.Maybe
import Regex
:: MatchStatus
= { skipped :: [Char]
, matched :: [Char]
, unseen :: [Char]
, can_skip :: Bool
, groups :: [(GroupId, [Char])]
= { skipped :: ![Char]
, matched :: ![Char]
, unseen :: ![Char]
, can_skip :: !Bool
, groups :: ![(GroupId, [Char])]
}
instance zero MatchStatus
where zero = {skipped=[], matched=[], unseen=[], can_skip=True, groups=[]}
match :: !Regex [Char] -> [Match]
match :: !Regex ![Char] -> [Match]
match r s = [(length st.skipped, st.matched, st.groups) \\ st <- match` r stat]
where stat = {zero & unseen=s}
match` :: Regex MatchStatus -> [MatchStatus]
match` :: !Regex !MatchStatus -> [MatchStatus]
match` r=:(Literal cs) st
= matchOrSkip r st $
if (isPrefixOf cs st.unseen) [eat (length cs) st] []
if (isPrefixOf cs st.unseen) [eat (Length cs) st] []
where
isPrefixOf :: [!Char!] [Char] -> Bool
isPrefixOf [!c:cs!] xs = case xs of
[x:xs] -> c==x && isPrefixOf cs xs
[] -> False
isPrefixOf [!!] _ = True
match` (CharacterClass _ _) {unseen=[]}
= []
match` r=:(CharacterClass n cs) st=:{matched,unseen=[u:us]}
= matchOrSkip r st $
if (if n not id $ any (\(f,t) -> f <= u && u <= t) cs) [eat 1 st] []
match` (Concat []) st
if (if n not id $ 'StdOverloadedList'.Any (\(f,t) -> f <= u && u <= t) cs) [eat 1 st] []
match` (Concat [|]) st
= [st]
match` (Concat [r]) st
match` (Concat [|r]) st
= match` r st
match` (Concat [r:rs]) st
match` (Concat [|r:rs]) st
= [st`` \\ st` <- match` r st, st`` <- match` (Concat rs) st`]
match` (Any rs) st
= flatten $ map (flip match` st) rs
= flatten [match` r st \\ r <|- rs]
match` (Repeated _ 0 _ _) st=:{unseen=[]}
= [st]
match` tr=:(Repeated True 0 Nothing r) st
......@@ -51,9 +60,9 @@ match` (Repeated g f (Just 0) r) st
= [st]
match` (Repeated g f t r) st
| isJust t && fromJust t <= f
= match` (Concat $ repeatn f r) st
= match` (Concat $ repeatn f r ++ [Any $ if g id reverse opts]) st
where opts = [Concat [r, Repeated g 0 (flip (-) (f+1) <$> t) r], Concat []]
= match` (Concat [|r \\ _ <- [1..f]]) st
= match` (Concat $ [|r \\ _ <- [1..f]] ++| [|Any [|r \\ r <- if g id reverse opts]]) st
where opts = [Concat [|r, Repeated g 0 (flip (-) (f+1) <$> t) r], Concat [|]]
match` (Group id r) st
= [{st` & groups=put id (drop (length st.matched) st`.matched) st`.groups}
\\ st` <- match` r st]
......@@ -67,7 +76,7 @@ where
st` = {st & can_skip=False}
atWordBreak
| isEmpty st.skipped && isEmpty st.matched = nextIsWord
| isEmpty st.matched = isWordChar (last st.skipped) <> nextIsWord
| isEmpty st.matched = isWordChar (hd st.skipped) <> nextIsWord
| otherwise = isWordChar (last st.matched) <> nextIsWord
nextIsWord = not (isEmpty st.unseen) && isWordChar (hd st.unseen)
isWordChar c = ('A' <= c && c <= 'Z')
......@@ -75,15 +84,17 @@ where
|| ('0' <= c && c <= '9')
|| c == '_'
skip :: Int MatchStatus -> MatchStatus
skip :: !Int !MatchStatus -> MatchStatus
skip n st
= {st & skipped=st.skipped ++ take n st.unseen, unseen=drop n st.unseen}
= {st & skipped=reverse skip ++ st.skipped, unseen=rest}
where
(skip,rest) = splitAt n st.unseen
eat :: Int MatchStatus -> MatchStatus
eat :: !Int !MatchStatus -> MatchStatus
eat n st
= {st & matched=st.matched ++ take n st.unseen, unseen=drop n st.unseen}
matchOrSkip :: Regex MatchStatus [MatchStatus] -> [MatchStatus]
matchOrSkip :: !Regex !MatchStatus ![MatchStatus] -> [MatchStatus]
matchOrSkip r st sts = sts ++
if can_skip (match` r $ skip 1 st) []
where
......
implementation module Regex.Parse
import _SystemArray
import _SystemStrictLists
import StdBool
from StdFunc import o, flip
import StdList
from StdOverloadedList import class List, ++|, Flatten, Reverse, Span
import StdString
import StdTuple
......@@ -34,61 +36,57 @@ instance toRegexInput String where toRegexInput s = [c \\ c <-: s]
Concat rs = 'Regex'.Concat $ concat rs
where
concat [r=:(Literal _):rs] = [Literal $ flatten [l \\ Literal l <- [r:lits]]:concat rest]
concat [|r=:(Literal _):rs] = [|Literal $ Flatten [l \\ Literal l <- [r:lits]]:concat rest]
where
(lits,rest) = span (\r -> r=:(Literal _)) rs
concat [r:rs] = [r:concat rs]
concat [] = []
(lits,rest) = Span (\r -> r=:(Literal _)) rs
concat [|r:rs] = [|r:concat rs]
concat [|] = [|]
regex :: a -> Regex | toRegexInput a
regex s = fromOk $ compile s
compile :: a -> MaybeErrorString Regex | toRegexInput a
compile cs = compile` 0 [] (toRegexInput cs)
compile cs = compile` 0 [|] (toRegexInput cs)
where
compile` :: Int [Regex] [Char] -> MaybeErrorString Regex
compile` _ [] [] = Ok $ Concat []
compile` _ [r] [] = Ok r
compile` _ rs [] = Ok $ Concat $ reverse rs
where
itlt :: [a] -> ([a], a)
itlt [x] = ([], x)
itlt [x:xs] = let (it,lt) = itlt xs in ([x:it], lt)
compile` :: Int [!Regex!] [Char] -> MaybeErrorString Regex
compile` _ [|] [] = Ok $ Concat [!!]
compile` _ [|r] [] = Ok r
compile` _ rs [] = Ok $ Concat $ Reverse rs
compile` g rs cs = parse g rs cs >>= uncurry3 compile`
parse :: Int [Regex] [Char] -> MaybeErrorString (Int, [Regex], [Char])
parse :: Int [!Regex!] [Char] -> MaybeErrorString (Int, [!Regex!], [Char])
parse g rs [] = Ok (g, rs, [])
parse g [r:rs] ['+':'?':cs] = Ok (g, [Some False r:rs], cs)
parse g [r:rs] ['+':cs] = Ok (g, [Some True r:rs], cs)
parse g [r:rs] ['*':'?':cs] = Ok (g, [Many False r:rs], cs)
parse g [r:rs] ['*':cs] = Ok (g, [Many True r:rs], cs)
parse g [r:rs] ['?':'?':cs] = Ok (g, [Optional False r:rs], cs)
parse g [r:rs] ['?':cs] = Ok (g, [Optional True r:rs], cs)
parse g [|r:rs] ['+':'?':cs] = Ok (g, [|Some False r:rs], cs)
parse g [|r:rs] ['+':cs] = Ok (g, [|Some True r:rs], cs)
parse g [|r:rs] ['*':'?':cs] = Ok (g, [|Many False r:rs], cs)
parse g [|r:rs] ['*':cs] = Ok (g, [|Many True r:rs], cs)
parse g [|r:rs] ['?':'?':cs] = Ok (g, [|Optional False r:rs], cs)
parse g [|r:rs] ['?':cs] = Ok (g, [|Optional True r:rs], cs)
parse g rs ['|':cs]
= (\(g,rs`,cs) -> (g,[Any [revConcat rs:rs`]],cs)) <$> alternatives g [] cs
= (\(g,rs`,cs) -> (g,[|Any [|revConcat rs:rs`]],cs)) <$> alternatives g [|] cs
where
alternatives :: Int [Regex] [Char] -> MaybeErrorString (Int, [Regex], [Char])
alternatives :: Int [!Regex!] [Char] -> MaybeErrorString (Int, [!Regex!], [Char])
alternatives g rs []
= Ok (g, [revConcat rs], [])
= Ok (g, [|revConcat rs], [])
alternatives g rs cs=:[')':_]
= Ok (g, [revConcat rs], cs)
= Ok (g, [|revConcat rs], cs)
alternatives g rs ['|':cs]
= (\(g,rs`,cs) -> (g, [revConcat rs:rs`], cs)) <$> alternatives g [] cs
= (\(g,rs`,cs) -> (g, [|revConcat rs:rs`], cs)) <$> alternatives g [|] cs
alternatives g rs cs
= parse g rs cs >>= uncurry3 alternatives
parse g [r:rs] ['{':cs]
parse g [|r:rs] ['{':cs]
| isNothing fr = Error "couldn't parse from part of quantifier"
| hd cs` == '}'
| length cs` > 1 && cs`!!1 == '?'
= Ok (g, [Repeated False fr` (Just fr`) r:rs], drop 2 cs`)
= Ok (g, [Repeated True fr` (Just fr`) r:rs], tl cs`)
= Ok (g, [|Repeated False fr` (Just fr`) r:rs], drop 2 cs`)
= Ok (g, [|Repeated True fr` (Just fr`) r:rs], tl cs`)
| isNothing to = Error "couldn't parse to part of quantifier"
| hd cs` == ',' && hd cs`` == '}'
| length cs`` > 1 && cs``!!1 == '?'
= Ok (g, [Repeated False fr` to` r:rs], drop 2 cs``)
= Ok (g, [Repeated True fr` to` r:rs], tl cs``)
= Ok (g, [|Repeated False fr` to` r:rs], drop 2 cs``)
= Ok (g, [|Repeated True fr` to` r:rs], tl cs``)
| otherwise = Error "couldn't parse quantifier"
where
(fr`, to`) = (fromJust fr, fromJust to)
......@@ -104,26 +102,26 @@ where
toEnd cs = Just <$> parseInt cs
parse g rs ['[':'^':cs]
= (\(cc,cs) -> (g, [CharacterClass True cc:rs], cs)) <$> charClass [] cs
= (\(cc,cs) -> (g, [|CharacterClass True cc:rs], cs)) <$> charClass [|] cs
parse g rs ['[':cs]
= (\(cc,cs) -> (g, [CharacterClass False cc:rs], cs)) <$> charClass [] cs
= (\(cc,cs) -> (g, [|CharacterClass False cc:rs], cs)) <$> charClass [|] cs
parse g rs ['(':'?':':':cs] = app <$> inGroup g [] cs
/*parse g rs ['(':'?':':':cs] = app <$> inGroup g [|] cs
where
app (g, [], cs) = (g, rs, cs)
app (g, [r], cs) = (g, [r:rs], cs)
app (g, rs`, cs) = (g, [Concat $ reverse rs`:rs], cs)
app (g, [|], cs) = (g, rs, cs)
app (g, [|r], cs) = (g, [|r:rs], cs)
app (g, rs`, cs) = (g, [|Concat $ Reverse rs`:rs], cs)*/
parse g rs ['(':cs] = (\(g`,rs`,cs) -> (g`, [Group (NotNamed g) (cc rs`):rs], cs)) <$> inGroup (g+1) [] cs
where cc [r] = r; cc rs = Concat $ reverse rs
/*parse g rs ['(':cs] = (\(g`,rs`,cs) -> (g`, [|Group (NotNamed g) (cc rs`):rs], cs)) <$> inGroup (g+1) [|] cs
where cc [|r] = r; cc rs = Concat $ Reverse rs*/
parse g rs cs
= (\(c,cs) -> (g, [CharacterClass False c:rs],cs)) <$> shorthandClass cs
<|> (\(a,cs) -> (g, [a:rs], cs)) <$> anchor cs
<|> (\(c,cs) -> (g, [Literal [c]:rs], cs)) <$> singleChar cs
= (\(c,cs) -> (g, [|CharacterClass False c:rs],cs)) <$> shorthandClass cs
<|> (\(a,cs) -> (g, [|a:rs], cs)) <$> anchor cs
<|> (\(c,cs) -> (g, [|Literal [|c]:rs], cs)) <$> singleChar cs
<|> Error ("Cannot parse " +++ toString cs)
inGroup :: Int [Regex] [Char] -> MaybeErrorString (Int, [Regex], [Char])
inGroup :: Int [!Regex!] [Char] -> MaybeErrorString (Int, [!Regex!], [Char])
inGroup g grp [] = Error "unclosed group"
inGroup g grp [')':cs] = Ok (g, grp, cs)
inGroup g grp cs = parse g grp cs >>= uncurry3 inGroup
......@@ -163,27 +161,27 @@ singleChar [c:cs]
singleChar []
= Error "expected character"
charClass :: [(Char,Char)] [Char] -> MaybeErrorString ([(Char,Char)], [Char])
charClass :: [!(!Char,!Char)!] [Char] -> MaybeErrorString ([!(!Char,!Char)!], [Char])
charClass _ [] = Error "unclosed character class"
charClass cls=:[_:_] [']':cs] = Ok (cls,cs)
charClass cls=:[|_:_] [']':cs] = Ok (cls,cs)
charClass cls cs = (try_chars <|> shorthandClass cs) >>=
\(cls`,cs`) -> charClass (cls ++ cls`) cs`
\(cls`,cs`) -> charClass (cls ++| cls`) cs`
where
try_chars = singleChar cs >>= \(c1,cs`) -> case cs` of
['-':']':cs``] = Ok ([(c1,c1),('-','-')], [']':cs``])
['-':cs``] = (\(c2,cs) -> ([(c1,c2)],cs)) <$> singleChar cs``
_ = Ok ([(c1,c1)], cs`)
['-':']':cs``] = Ok ([|(c1,c1),('-','-')], [']':cs``])
['-':cs``] = (\(c2,cs) -> ([|(c1,c2)],cs)) <$> singleChar cs``
_ = Ok ([|(c1,c1)], cs`)
shorthandClass :: [Char] -> MaybeErrorString ([(Char,Char)], [Char])
shorthandClass :: [Char] -> MaybeErrorString ([!(!Char,!Char)!], [Char])
shorthandClass ['\\':c:cs] = case lookup c shorthand_classes of
Nothing = Error $ "unknown shorthand class \\" +++ {c}
(Just cc) = Ok (cc,cs)
shorthandClass ['.':cs] = Ok ([('\x00','\x09'),('\x0b','\xff')], cs)
shorthandClass ['.':cs] = Ok ([|('\x00','\x09'),('\x0b','\xff')], cs)
shorthandClass _ = Error "expected character class"
revConcat :: [Regex] -> Regex
revConcat [r] = r
revConcat rs = Concat $ reverse rs
revConcat :: [!Regex!] -> Regex
revConcat [!r!] = r
revConcat rs = Concat $ Reverse rs
uncurry3 :: (a b c -> d) (a,b,c) -> d
uncurry3 f (x,y,z) = f x y z
implementation module Regex.Print
import _SystemArray
import _SystemStrictLists
import StdBool
from StdFunc import flip
from StdOverloadedList import All, Foldr, IsMember, Length, Map, RemoveMember
import StdString
import StdTuple
......@@ -15,16 +17,16 @@ import Regex
import Regex.Util
print :: Bool Regex -> String
print ps (Literal cs) = parens (ps && length cs > 1) $ concat $ map escape cs
print ps (Literal cs) = parens (ps && Length cs > 1) $ concat $ Map escape cs
print ps (CharacterClass True cc) = "[^" + concat (printCharClasses cc) + "]"
print ps (CharacterClass False cc)
| length ccs == 1 = hd ccs
| otherwise = "[" + concat ccs + "]"
where ccs = printCharClasses cc
print ps (Concat rgxs) = parens ps $ foldl (+) r rs
where [r:rs] = map (print True) rgxs
where [r:rs] = Map (print True) rgxs
print ps (Any rgxs) = parens ps $ foldl (\x s -> x + "|" + s) r rs
where [r:rs] = map (print True) rgxs
where [r:rs] = Map (print True) rgxs
print ps (Repeated g 0 (Just 1) r) = print True r <+ "?" <+ lz g
print ps (Repeated g 1 (Just 1) r) = print True r
print ps (Repeated g 0 Nothing r) = print True r <+ "*" <+ lz g
......@@ -55,17 +57,17 @@ parens False s = toString s
(<+) infixr 5 :: a b -> String | toString a & toString b
(<+) x y = toString x + toString y
printCharClasses :: [(Char,Char)] -> [String]
printCharClasses [] = []
printCharClasses :: [!(!Char,!Char)!] -> [String]
printCharClasses [|] = []
printCharClasses cs = case findShorthand shorthand_classes cs of
(Just (n,cs`)) = [n:printCharClasses cs`]
Nothing = map printRange cs
Nothing = [printRange c \\ c <|- cs]
where
findShorthand :: [(Char, [(Char,Char)])] [(Char,Char)]
-> Maybe (String, [(Char,Char)])
findShorthand [] _ = Nothing
findShorthand [(n,ndl):shts] hay
| all (flip isMember hay) ndl = Just ({'\\',n}, foldr removeMember hay ndl)
findShorthand :: [(Char, [!(Char,Char)!])] [!(Char,Char)!]
-> Maybe (String, [!(Char,Char)!])
findShorthand [|] _ = Nothing
findShorthand [|(n,ndl):shts] hay
| All (flip IsMember hay) ndl = Just ({'\\',n}, Foldr RemoveMember hay ndl)
| otherwise = findShorthand shts hay
printRange :: (Char,Char) -> String
......
......@@ -18,10 +18,10 @@ escape_sequences :==
* @type [(Char, [(Char, Char)])]
*/
shorthand_classes :==
[ ('w', [('A','Z'),('a','z'),('0','9'),('_','_')])
, ('W', [('\0','/'),(':','@'),('[','^'),('`','`'),('{','\xff')])
, ('d', [('0','9')])
, ('D', [('\x00','/'),(':','\xff')])
, ('s', [(' ',' '),('\t','\n'),('\x0c','\x0d')])
, ('S', [('\x00','\x08'),('\x0b','\x0b'),('\x0e','\x1f'),('!','\xff')])
[ ('w', [!('A','Z'),('a','z'),('0','9'),('_','_')!])
, ('W', [!('\0','/'),(':','@'),('[','^'),('`','`'),('{','\xff')!])
, ('d', [!('0','9')!])
, ('D', [!('\x00','/'),(':','\xff')!])
, ('s', [!(' ',' '),('\t','\n'),('\x0c','\x0d')!])
, ('S', [!('\x00','\x08'),('\x0b','\x0b'),('\x0e','\x1f'),('!','\xff')!])
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment