Verified Commit 36ebe651 authored by Camil Staps's avatar Camil Staps 🚀

Error reporting for compiling

parent 5f998314
......@@ -9,10 +9,13 @@ the `LICENSE` file for more details.
```clean
:: Regex = ...
:: Match :== (Position, [Char], [(GroupId, [Char])])
// :: Position :== Int; :: GroupId = Named String | NotNamed Int
instance toString Regex
compile :: [Char] -> Maybe Regex
match :: Regex [Char] -> [(Position, [Char])] // :: Position :== Int
compile :: [Char] -> MaybeErrorString Regex
match :: Regex [Char] -> [Match]
```
The following regex features are supported:
......
definition module Regex.Parse
from Data.Maybe import :: Maybe
from Data.Error import :: MaybeErrorString, :: MaybeError
from Regex import :: Regex
compile :: [Char] -> Maybe Regex
compile :: [Char] -> MaybeErrorString Regex
implementation module Regex.Parse
import _SystemArray
import StdBool
from StdFunc import o, flip
import StdList
import StdString
import StdTuple
import Control.Applicative
import Control.Monad
import Data.Error
from Data.Func import $
import Data.Functor
import Data.List
......@@ -16,47 +19,56 @@ import Data.Tuple
import Regex
import Regex.Util
compile :: [Char] -> Maybe Regex
instance zero String where zero = ""
instance Alternative (MaybeError a) | zero a
where
empty = Error zero
(<|>) (Error _) r = r
(<|>) l _ = l
compile :: [Char] -> MaybeErrorString Regex
compile cs = compile` 0 [] cs
where
compile` :: Int [Regex] [Char] -> Maybe Regex
compile` _ [] [] = Just $ Concat []
compile` _ [r] [] = Just r
compile` _ rs [] = Just $ Concat $ reverse rs
compile` :: Int [Regex] [Char] -> MaybeErrorString Regex
compile` _ [] [] = Ok $ Concat []
compile` _ [r] [] = Ok r
compile` _ rs [] = Ok $ Concat $ reverse rs
where
itlt :: [a] -> ([a], a)
itlt [x] = ([], x)
itlt [x:xs] = let (it,lt) = itlt xs in ([x:it], lt)
compile` g rs cs = parse g rs cs >>= uncurry3 compile`
parse :: Int [Regex] [Char] -> Maybe (Int, [Regex], [Char])
parse g rs [] = Just (g, rs, [])
parse g [r:rs] ['+':'?':cs] = Just (g, [Some False r:rs], cs)
parse g [r:rs] ['+':cs] = Just (g, [Some True r:rs], cs)
parse g [r:rs] ['*':'?':cs] = Just (g, [Many False r:rs], cs)
parse g [r:rs] ['*':cs] = Just (g, [Many True r:rs], cs)
parse g [r:rs] ['?':'?':cs] = Just (g, [Optional False r:rs], cs)
parse g [r:rs] ['?':cs] = Just (g, [Optional True r:rs], cs)
parse :: Int [Regex] [Char] -> MaybeErrorString (Int, [Regex], [Char])
parse g rs [] = Ok (g, rs, [])
parse g [r:rs] ['+':'?':cs] = Ok (g, [Some False r:rs], cs)
parse g [r:rs] ['+':cs] = Ok (g, [Some True r:rs], cs)
parse g [r:rs] ['*':'?':cs] = Ok (g, [Many False r:rs], cs)
parse g [r:rs] ['*':cs] = Ok (g, [Many True r:rs], cs)
parse g [r:rs] ['?':'?':cs] = Ok (g, [Optional False r:rs], cs)
parse g [r:rs] ['?':cs] = Ok (g, [Optional True r:rs], cs)
parse g [Any rs:rs`] ['|':cs]
= (\(g,r,cs) -> (g,[Any $ rs++r:rs`],cs)) <$> parse g [] cs
parse g [r:rs] ['|':cs]
= (\(g,r`,cs) -> (g,[Any [r:r`]:rs],cs)) <$> parse g [] cs
parse g _ ['|':_]
= Nothing
parse g [] ['|':_]
= Error "no regex before |"
parse g [r:rs] ['{':cs]
| isNothing fr = Nothing
| isNothing fr = Error "couldn't parse from part of quantifier"
| hd cs` == '}'
| length cs` > 1 && cs`!!1 == '?'
= Just (g, [Repeated False fr` (Just fr`) r:rs], drop 2 cs`)
= Just (g, [Repeated True fr` (Just fr`) r:rs], tl cs`)
| isNothing to = Nothing
= Ok (g, [Repeated False fr` (Just fr`) r:rs], drop 2 cs`)
= Ok (g, [Repeated True fr` (Just fr`) r:rs], tl cs`)
| isNothing to = Error "couldn't parse to part of quantifier"
| hd cs` == ',' && hd cs`` == '}'
| length cs`` > 1 && cs``!!1 == '?'
= Just (g, [Repeated False fr` to` r:rs], drop 2 cs``)
= Just (g, [Repeated True fr` to` r:rs], tl cs``)
| otherwise = Nothing
= Ok (g, [Repeated False fr` to` r:rs], drop 2 cs``)
= Ok (g, [Repeated True fr` to` r:rs], tl cs``)
| otherwise = Error "couldn't parse quantifier"
where
(fr`, to`) = (fromJust fr, fromJust to)
(fr, cs`) = appFst parseInt $ span isDigit cs
......@@ -75,11 +87,11 @@ parse g rs ['[':'^':cs]
parse g rs ['[':cs]
= (\(cc,cs) -> (g, [CharacterClass False cc:rs], cs)) <$> charClass [] cs
parse g rs ['(':'?':':':cs] = case inGroup g [] cs of
Nothing = Nothing
(Just (g, [], cs)) = Just (g, rs, cs)
(Just (g, [r], cs)) = Just (g, [r:rs], cs)
(Just (g, rs`, cs)) = Just (g, [Concat $ reverse rs`:rs], cs)
parse g rs ['(':'?':':':cs] = app <$> inGroup g [] cs
where
app (g, [], cs) = (g, rs, cs)
app (g, [r], cs) = (g, [r:rs], cs)
app (g, rs`, cs) = (g, [Concat $ reverse rs`:rs], cs)
parse g rs ['(':cs] = (\(g`,rs`,cs) -> (g`, [Group (NotNamed g) (cc rs`):rs], cs)) <$> inGroup (g+1) [] cs
where cc [r] = r; cc rs = Concat $ reverse rs
......@@ -88,32 +100,33 @@ parse g rs cs
= (\(c,cs) -> (g, [CharacterClass False c:rs],cs)) <$> shorthandClass cs
<|> (\(a,cs) -> (g, [a:rs], cs)) <$> anchor cs
<|> (\(c,cs) -> (g, [Literal [c]:rs], cs)) <$> singleChar cs
<|> Error ("Cannot parse " +++ toString cs)
inGroup :: Int [Regex] [Char] -> Maybe (Int, [Regex], [Char])
inGroup g grp [] = Nothing
inGroup g grp [')':cs] = Just (g, grp, cs)
inGroup :: Int [Regex] [Char] -> MaybeErrorString (Int, [Regex], [Char])
inGroup g grp [] = Error "unclosed group"
inGroup g grp [')':cs] = Ok (g, grp, cs)
inGroup g grp cs = parse g grp cs >>= uncurry3 inGroup
anchor :: [Char] -> Maybe (Regex, [Char])
anchor ['^':cs] = Just (StartOfString, cs)
anchor ['$':cs] = Just (EndOfString, cs)
anchor ['\\':'b':cs] = Just (WordBreak False, cs)
anchor ['\\':'B':cs] = Just (WordBreak True, cs)
anchor _ = Nothing
anchor :: [Char] -> MaybeErrorString (Regex, [Char])
anchor ['^':cs] = Ok (StartOfString, cs)
anchor ['$':cs] = Ok (EndOfString, cs)
anchor ['\\':'b':cs] = Ok (WordBreak False, cs)
anchor ['\\':'B':cs] = Ok (WordBreak True, cs)
anchor _ = Error "expected anchor"
singleChar :: [Char] -> Maybe (Char, [Char])
singleChar :: [Char] -> MaybeErrorString (Char, [Char])
singleChar ['\\':'x':c1:c2:cs]
= Just (toChar $ 16 * fromHex c1 + fromHex c2, cs)
= Ok (toChar $ 16 * fromHex c1 + fromHex c2, cs)
where fromHex c = length $ takeWhile ((<>) c) ['0123456789abcdef']
singleChar ['\\':c:cs]
| isMember c escapable
= Just (c, cs)
= Ok (c, cs)
| isMember c $ map fst escape_sequences
= Just (snd $ hd $ filter ((==) c o fst) escape_sequences, cs)
= Ok (snd $ hd $ filter ((==) c o fst) escape_sequences, cs)
| isOctDigit c
= Just (toChar $ foldl ((+) o ((*) 8)) 0 $ map digitToInt octs, nonocts)
= Ok (toChar $ foldl ((+) o ((*) 8)) 0 $ map digitToInt octs, nonocts)
| otherwise
= Nothing
= Error "expected character"
where
escapable = ['\\+*?()[]{}^$.|']
......@@ -125,23 +138,25 @@ where
spanMax n f [x:xs] = if (f x) ([x:ys],zs) ([],[x:xs])
where (ys,zs) = spanMax (n-1) f xs
singleChar [c:cs]
= Just (c,cs)
= Ok (c,cs)
charClass :: [(Char,Char)] [Char] -> Maybe ([(Char,Char)], [Char])
charClass _ [] = Nothing
charClass cls=:[_:_] [']':cs] = Just (cls,cs)
charClass :: [(Char,Char)] [Char] -> MaybeErrorString ([(Char,Char)], [Char])
charClass _ [] = Error "unclosed character class"
charClass cls=:[_:_] [']':cs] = Ok (cls,cs)
charClass cls cs = (try_chars <|> shorthandClass cs) >>=
\(cls`,cs`) -> charClass (cls ++ cls`) cs`
where
try_chars = singleChar cs >>= \(c1,cs`) -> case cs` of
['-':']':cs``] = Just ([(c1,c1),('-','-')], [']':cs``])
['-':']':cs``] = Ok ([(c1,c1),('-','-')], [']':cs``])
['-':cs``] = (\(c2,cs) -> ([(c1,c2)],cs)) <$> singleChar cs``
_ = Just ([(c1,c1)], cs`)
shorthandClass :: [Char] -> Maybe ([(Char,Char)], [Char])
shorthandClass ['\\':c:cs] = (\cc -> (cc,cs)) <$> lookup c shorthand_classes
shorthandClass ['.':cs] = Just ([('\x00','\x09'),('\x0b','\xff')], cs)
shorthandClass _ = Nothing
_ = Ok ([(c1,c1)], cs`)
shorthandClass :: [Char] -> MaybeErrorString ([(Char,Char)], [Char])
shorthandClass ['\\':c:cs] = case lookup c shorthand_classes of
Nothing = Error $ "unknown shorthand class \\" +++ {c}
(Just cc) = Ok (cc,cs)
shorthandClass ['.':cs] = Ok ([('\x00','\x09'),('\x0b','\xff')], cs)
shorthandClass _ = Error "expected character class"
uncurry3 :: (a b c -> d) (a,b,c) -> d
uncurry3 f (x,y,z) = f x y z
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment