Commit 89d50359 authored by Camil Staps's avatar Camil Staps 🐧

Character ranges

parent b42c904a
......@@ -8,7 +8,7 @@ import Regex.Print
:: Regex
= Literal [Char]
| CharacterClass [Char]
| CharacterClass [(Char,Char)]
| Concat [Regex]
| Any [Regex]
| Repeated Greediness Int (Maybe Int) Regex
......
......@@ -32,7 +32,7 @@ match` (CharacterClass _) {unseen=[]}
= []
match` r=:(CharacterClass cs) st=:{matched,unseen=[u:us]}
= matchAndContinue r st $
if (isMember u cs) [eat 1 st] []
if (any (\(f,t) -> f <= u && u <= t) cs) [eat 1 st] []
match` (Concat []) st
= [st]
match` (Concat [r]) st
......
......@@ -37,6 +37,7 @@ parse [r:rs] ['*':'?':cs] = Just ([Many False r:rs], cs)
parse [r:rs] ['*':cs] = Just ([Many True r:rs], cs)
parse [r:rs] ['?':'?':cs] = Just ([Optional False r:rs], cs)
parse [r:rs] ['?':cs] = Just ([Optional True r:rs], cs)
parse [r:rs] ['{':cs]
| isNothing fr = Nothing
| hd cs` == '}'
......@@ -61,10 +62,16 @@ where
toEnd :: [Char] -> Maybe (Maybe Int)
toEnd [] = Just Nothing
toEnd cs = Just <$> parseInt cs
parse rs ['[':cs] = case rest of
[']':r] = Just ([CharacterClass chars:rs], r)
_ = Nothing
where (chars, rest) = span ((<>) ']') cs
parse rs ['[':cs] = appFst (\cc -> [CharacterClass cc:rs]) <$> charClass [] cs
where
charClass :: [(Char,Char)] [Char] -> Maybe ([(Char,Char)], [Char])
charClass _ [] = Nothing
charClass cls [']':cs] = Just (cls,cs)
charClass cls [c:'-':']':cs] = Just (cls ++ [(c,c),('-','-')], cs)
charClass cls [c:'-':c2:cs] = charClass (cls ++ [(c,c2)]) cs
charClass cls [c:cs] = charClass (cls ++ [(c,c)]) cs
parse rs ['(':'?':':':cs] = case parse` [] cs of
Nothing = Nothing
(Just ([], cs)) = Just (rs, cs)
......
......@@ -12,8 +12,12 @@ import Regex
print :: Bool Regex -> String
print ps (Literal cs) = parens (ps && length cs > 1) cs
print ps (CharacterClass [c]) = {c}
print ps (CharacterClass cs) = "[" <+ cs <+ "]"
print ps (CharacterClass cs) = "[" <+ foldl (<++) "" cs <+ "]"
where
(<++) infixr 5 :: String (Char,Char) -> String
(<++) s (c1,c2)
| c1 == c2 = s <+ c1
| otherwise = s <+ c1 <+ "-" <+ c2
print ps (Concat rgxs) = parens ps $ foldl (\x s -> x +++ s) r rs
where [r:rs] = map (print True) rgxs
print ps (Any rgxs) = parens ps $ foldl (\x s -> x +++ "|" +++ s) r rs
......
module test
import StdEnv
import Regex
Start = match rgx string
Start = (toString rgx, match rgx string)
where
(Just rgx) = compile ['[hel]{1,2}?l']
(Just rgx) = compile ['[e-hl]{1,2}?l']
string = ['hello world']
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment