Verified Commit 1acf139a authored by Camil Staps's avatar Camil Staps 🙂

Rough first version: Literal, CharacterClass, Optional, Concat, Any, Many,...

Rough first version: Literal, CharacterClass, Optional, Concat, Any, Many, Some, StartOfString, EndOfString, Empty
parent 307fc1d8
definition module Regex
from StdOverloaded import class toString
:: Regex
= Literal [Char]
| CharacterClass [Char]
| Optional Regex
| Concat Regex Regex
| Any [Regex]
| Many Greediness Regex
| Some Greediness Regex
| StartOfString
| EndOfString
| Empty
:: Greediness :== Bool
:: Position :== Int
instance toString Regex
match :: Regex [Char] -> [(Position, [Char])]
implementation module Regex
import _SystemArray
import StdBool
import StdString
from StdFunc import flip, id
from Data.Func import $
import Data.List
/*** Printing ***/
print :: Bool Regex -> String
print ps (Literal cs) = parens (ps && length cs > 1) cs
print ps (CharacterClass [c]) = {c}
print ps (CharacterClass cs) = "[" <+ cs <+ "]"
print ps (Optional r) = print True r <+ "?"
print ps (Concat r1 r2) = parens ps $ r1 <+ r2
print ps (Any rgxs) = "(?:" <+ foldl (\x s -> x +++ "|" +++ s) r rs <+ ")"
where [r:rs] = map (print True) rgxs
print ps (Many gr r) = parens ps $ print True r <+ "*" <+ if gr "" "?"
print ps (Some gr r) = parens ps $ print True r <+ "+" <+ if gr "" "?"
print ps StartOfString = "^"
print ps EndOfString = "$"
print ps Empty = ""
instance toString Regex where toString r = print False r
parens :: Bool a -> String | toString a
parens True s = "(?:" <+ s <+ ")"
parens False s = toString s
(<+) infixr 5 :: a b -> String | toString a & toString b
(<+) x y = toString x +++ toString y
/*** Matching ***/
:: MatchStatus
= { skipped :: [Char]
, matched :: [Char]
, unseen :: [Char]
, can_skip :: Bool
}
instance zero MatchStatus
where zero = {skipped=[], matched=[], unseen=[], can_skip=True}
match :: Regex [Char] -> [(Position, [Char])]
match r s = [(length st.skipped, st.matched) \\ st <- match` r status]
where status = {zero & unseen=s}
match` :: Regex MatchStatus -> [MatchStatus]
match` r=:(Literal cs) st
= matchAndContinue r st $
if (isPrefixOf cs st.unseen) [eat (length cs) st] []
match` (CharacterClass _) {unseen=[]}
= []
match` r=:(CharacterClass cs) st=:{matched,unseen=[u:us]}
= matchAndContinue r st $
if (isMember u cs) [eat 1 st] []
match` (Optional r) st
= match` (Any [r, Empty]) st
match` (Concat r1 r2) st
= [st`` \\ st` <- match` r1 st, st`` <- match` r2 st`]
match` (Any rs) st
= flatten $ map (flip match` st) rs
match` m=:(Many greedy r) st
= match` (Any $ if greedy id reverse [Concat r m, Empty]) st
match` (Some greedy r) st
= match` (Concat r $ Many greedy r) st
match` StartOfString st
= if (isEmpty st.matched) [{st & can_skip=False}] []
match` EndOfString st
= if (isEmpty st.unseen) [st] []
match` Empty st
= [st]
skip :: Int MatchStatus -> MatchStatus
skip n st = {st & skipped=st.skipped ++ take n st.unseen, unseen=drop n st.unseen}
eat :: Int MatchStatus -> MatchStatus
eat n st = {st & matched=st.matched ++ take n st.unseen, unseen=drop n st.unseen}
matchAndContinue :: Regex MatchStatus [MatchStatus] -> [MatchStatus]
matchAndContinue r st sts = sts ++
if can_skip (match` r $ skip 1 st) []
where
can_skip = st.can_skip && not (isEmpty st.unseen) && isEmpty st.matched
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment