Verified Commit 3af7dacb authored by Steffen Michels's avatar Steffen Michels Committed by Camil Staps

add support for XML CDATA & named character entity references

parent e0ddb50e
Pipeline #29738 passed with stage
in 2 minutes and 58 seconds
......@@ -14,7 +14,7 @@ test-nightly:
image: "camilstaps/clean:nightly"
script:
- COCLPATH=./compiler make -C tests/linux64 run
- cleantest -r testproperties --options '-IL;Dynamics;-d;src/libraries/OS-Independent;-P;OutputTestEvents;-T;Tests 100000;-T;MaxStringLength 500;-C;-h;-C;100m;-r' --junit junit.xml
- cleantest -r testproperties --options '-IL;Dynamics;-d;src/libraries/OS-Independent;-P;OutputTestEvents;-T;Tests 100000;-T;MaxStringLength 500;-T;Bent;-C;-h;-C;100m;-r' --junit junit.xml
artifacts:
paths:
- junit.xml
......
definition module Data.Error
from StdOverloaded import class ==
from Data.Functor import class Functor
from Control.Monad import class Monad
from Control.Applicative import class pure, class <*>, class Applicative
......@@ -23,7 +24,7 @@ instance Functor (MaybeError a)
instance pure (MaybeError a)
instance <*> (MaybeError a)
instance Monad (MaybeError a)
instance == (MaybeError a b) | == a & == b
derive gEq MaybeError
/**
......
implementation module Data.Error
import StdMisc
import Data.Functor, Data.Maybe, Data.GenEq
import Control.Monad
import Control.Applicative
instance Functor (MaybeError a)
where
fmap f (Ok x) = Ok (f x)
fmap f (Error x) = Error x
instance pure (MaybeError a)
where
pure x = Ok x
instance <*> (MaybeError a)
where
(<*>) (Error e) _ = Error e
(<*>) (Ok f) r = fmap f r
instance Monad (MaybeError e) where
bind (Error l) _ = Error l
bind (Ok r) k = k r
derive gEq MaybeError
isOk :: !(MaybeError a b) -> Bool
isOk (Ok _) = True
isOk (Error _) = False
isError :: !(MaybeError a b) -> Bool
isError (Ok _) = False
isError (Error _) = True
fromOk :: !(MaybeError .a .b) -> .b
fromOk (Ok b) = b
fromOk (Error _) = abort "Data.Error.fromOk: argument is Error"
fromError :: !(MaybeError .a .b) -> .a
fromError (Error a) = a
fromError (Ok _) = abort "Data.Error.fromError: argument is Ok"
liftError :: !(MaybeError .a .b) -> (MaybeError .a .c)
liftError (Error a) = Error a
liftError (Ok _) = abort "Data.Error.liftError: argument is Ok"
mb2error :: !e !(Maybe a) -> MaybeError e a
mb2error error mbV = maybe (Error error) Ok mbV
okSt :: *st (.a *st -> *st) !(MaybeError .e .a) -> *st
okSt st f (Error _) = st
okSt st f (Ok x) = f x st
error2mb :: !(MaybeError e a) -> Maybe a
error2mb (Error _) = Nothing
error2mb (Ok a) = Just a
seqErrors :: !(MaybeError e a) (a -> MaybeError e b) -> MaybeError e b
seqErrors a bfunc = case a of
Ok a = bfunc a
Error e = Error e
combineErrors :: !(MaybeError e a) (MaybeError e b) (a b -> MaybeError e c) -> MaybeError e c
combineErrors a b combf = case a of
Error e = Error e
Ok a = case b of
Error e = Error e
Ok b = combf a b
seqErrorsSt :: !(.st -> (MaybeError e a,.st)) (a .st -> u:(MaybeError e b, .st)) !.st -> v:(MaybeError e b, !.st), [u <= v]
seqErrorsSt aop bop st
# (a,st) = aop st
= case a of
Error e = (Error e,st)
Ok a = bop a st
combineErrorsSt :: !(.st -> (MaybeError e a, .st)) (.st -> (MaybeError e b, .st)) (a b -> MaybeError e c) !.st -> (!MaybeError e c, !.st)
combineErrorsSt aop bop combf st
# (a,st) = aop st
= case a of
Error e = (Error e,st)
Ok a
# (b,st) = bop st
= case b of
Error e = (Error e, st)
Ok b = (combf a b, st)
implementation module Data.Error
import StdMisc
import Data.Functor, Data.Maybe, Data.GenEq
import Control.Monad
import Control.Applicative
instance Functor (MaybeError a)
where
fmap f (Ok x) = Ok (f x)
fmap f (Error x) = Error x
instance pure (MaybeError a)
where
pure x = Ok x
instance <*> (MaybeError a)
where
(<*>) (Error e) _ = Error e
(<*>) (Ok f) r = fmap f r
instance Monad (MaybeError e) where
bind (Error l) _ = Error l
bind (Ok r) k = k r
instance == (MaybeError a b) | == a & == b where
== (Ok x) y = case y of
Ok y = x == y
_ = False
== (Error x) y = case y of
Error y = x == y
_ = False
derive gEq MaybeError
isOk :: !(MaybeError a b) -> Bool
isOk (Ok _) = True
isOk (Error _) = False
isError :: !(MaybeError a b) -> Bool
isError (Ok _) = False
isError (Error _) = True
fromOk :: !(MaybeError .a .b) -> .b
fromOk (Ok b) = b
fromOk (Error _) = abort "Data.Error.fromOk: argument is Error"
fromError :: !(MaybeError .a .b) -> .a
fromError (Error a) = a
fromError (Ok _) = abort "Data.Error.fromError: argument is Ok"
liftError :: !(MaybeError .a .b) -> (MaybeError .a .c)
liftError (Error a) = Error a
liftError (Ok _) = abort "Data.Error.liftError: argument is Ok"
mb2error :: !e !(Maybe a) -> MaybeError e a
mb2error error mbV = maybe (Error error) Ok mbV
okSt :: *st (.a *st -> *st) !(MaybeError .e .a) -> *st
okSt st f (Error _) = st
okSt st f (Ok x) = f x st
error2mb :: !(MaybeError e a) -> Maybe a
error2mb (Error _) = Nothing
error2mb (Ok a) = Just a
seqErrors :: !(MaybeError e a) (a -> MaybeError e b) -> MaybeError e b
seqErrors a bfunc = case a of
Ok a = bfunc a
Error e = Error e
combineErrors :: !(MaybeError e a) (MaybeError e b) (a b -> MaybeError e c) -> MaybeError e c
combineErrors a b combf = case a of
Error e = Error e
Ok a = case b of
Error e = Error e
Ok b = combf a b
seqErrorsSt :: !(.st -> (MaybeError e a,.st)) (a .st -> u:(MaybeError e b, .st)) !.st -> v:(MaybeError e b, !.st), [u <= v]
seqErrorsSt aop bop st
# (a,st) = aop st
= case a of
Error e = (Error e,st)
Ok a = bop a st
combineErrorsSt :: !(.st -> (MaybeError e a, .st)) (.st -> (MaybeError e b, .st)) (a b -> MaybeError e c) !.st -> (!MaybeError e c, !.st)
combineErrorsSt aop bop combf st
# (a,st) = aop st
= case a of
Error e = (Error e,st)
Ok a
# (b,st) = bop st
= case b of
Error e = (Error e, st)
Ok b = (combf a b, st)
definition module Data.Maybe.Gast
from Data.Maybe import :: Maybe
from Gast import generic ggen, generic genShow, :: GenState
derive genShow Maybe
derive ggen Maybe
implementation module Data.Maybe.Gast
import Gast
import Data.Maybe
import Control.GenBimap
derive genShow Maybe
derive ggen Maybe
definition module Data.Maybe.GenPrint
from Data.Maybe import :: Maybe
from Text.GenPrint import generic gPrint, class PrintOutput, :: PrintState
derive gPrint Maybe
implementation module Data.Maybe.GenPrint
import Data.Maybe
import Text.GenPrint
derive gPrint Maybe
definition module Text.GenXML
/**
* This module provides data types for easy construction of XML documents and
* a generic printer/parser.
* This module provides data types for easy construction of XML documents and
* a generic conversion of Clean values to XML.
*
* @property-bootstrap
* import StdEnv, Data.Error, Data.Func, Text.GenXML.Gast, Text.GenXML.GenPrint
*
* derive genShow MaybeError
* derive gPrint MaybeError
* derive class Gast Maybe
*
* withMergedConsecutiveText :: !XMLDoc -> XMLDoc
* withMergedConsecutiveText (XMLDoc defaultNamespace namespaces rootNode) =
* XMLDoc defaultNamespace namespaces $ nodeWithMergedConsecutiveText rootNode
*
* nodeWithMergedConsecutiveText :: !XMLNode -> XMLNode
* nodeWithMergedConsecutiveText (XMLElem name attrs nodes) =
* XMLElem name attrs $ nodesWithMergedConsecutiveText nodes
* nodeWithMergedConsecutiveText node = node
*
* nodesWithMergedConsecutiveText :: ![XMLNode] -> [XMLNode]
* nodesWithMergedConsecutiveText [] = []
* nodesWithMergedConsecutiveText [XMLText text1, XMLText text2: rest] =
* nodesWithMergedConsecutiveText [XMLText $ text1 +++ text2: rest]
* nodesWithMergedConsecutiveText [node: rest] =
* [nodeWithMergedConsecutiveText node: nodesWithMergedConsecutiveText rest]
*/
import StdOverloaded, StdGeneric, Data.Either
from Data.Maybe import :: Maybe
from Data.Error import :: MaybeErrorString, :: MaybeError
from Data.Maybe import :: Maybe
from Data.Error import :: MaybeErrorString, :: MaybeError
from Data.GenEq import generic gEq
:: XMLDoc = XMLDoc !(Maybe XMLURI) ![(XMLNamespacePrefix,XMLURI)] !XMLNode
:: XMLNode = XMLElem !XMLQName ![XMLAttr] ![XMLNode]
| XMLText !String
instance == XMLDoc
derive gEq XMLDoc
/**
* A node in an XML document.
*/
:: XMLNode
= XMLElem !XMLQName ![XMLAttr] ![XMLNode] //* An element
| XMLText !String //* Character data
| XMLCData !String //* A CDATA section
derive gEq XMLNode
:: XMLAttr = XMLAttr !XMLQName !String
derive gEq XMLAttr
:: XMLQName = XMLQName !(Maybe XMLNamespacePrefix) !XMLName
derive gEq XMLQName
:: XMLNamespacePrefix :== String
:: XMLURI :== String
:: XMLName :== String
/**
* Create an XMLQName containing an unqualified name from a String
* Create an XMLQName containing an unqualified name from a String.
*
* @param Unqualified name
* @return XMLQName containing the unqualified name
* @result XMLQName containing the unqualified name
*/
uname :: !String -> XMLQName
uname :: !String -> XMLQName
/**
* Create an XMLQName containing a qualified name from a String
* Create an XMLQName containing a qualified name from a String.
*
* @param Qualified name
* @return XMLQName containing the qualified name
* @result XMLQName containing the qualified name
*/
qname :: !XMLNamespacePrefix !String -> XMLQName
instance toString XMLDoc
/**
* The XML document corresponding to a string.
*
* @property correctness: A.doc :: XMLDoc:
* fromString (toString doc) =.= typedOk (withMergedConsecutiveText doc)
* where
* typedOk :: !XMLDoc -> MaybeErrorString XMLDoc
* typedOk doc = Ok doc
*/
instance fromString (MaybeErrorString XMLDoc)
// generic printer
......@@ -45,6 +95,7 @@ toXML :: !a -> XMLDoc | XMLEncode{|*|} a
toXMLString :: !a -> String | XMLEncode{|*|} a
:: XMLEncodeResult
generic XMLEncode a :: !a -> XMLEncodeResult
// special types for adding attributes to XML data
......
implementation module Text.GenXML
import StdArray, StdBool, StdInt, StdList, StdMisc, StdTuple, StdGeneric, StdFunc, StdString
import Data.Error, Data.Either, Data.Maybe, Text, Data.GenEq
from Text.Parsers.CParsers.ParserCombinators import :: Parser, :: ParsResult, :: CParser, &>, +&+, +&-, -&+, <!>, <&, <&>, <*?>, <@, >?<, @>, begin1, satisfy, symbol, yield, <|>, <+?>
import StdEnv
import Data.Error, Data.Either, Data.Maybe, Text, Data.GenEq, Data.Func, Data._Array
from Text.Parsers.CParsers.ParserCombinators import :: Parser, :: ParsResult, :: CParser, &>, +&+, +&-, -&+, <!>, <&, <&>, <*?>, <@, >?<, @>, begin1, satisfy, symbol, yield, <|>, <+?>, fail
instance == XMLDoc where
== x y = x === y
uname :: !String -> XMLQName
uname name = XMLQName Nothing name
......@@ -10,130 +13,136 @@ uname name = XMLQName Nothing name
qname :: !XMLNamespacePrefix !String -> XMLQName
qname namespace name = XMLQName (Just namespace) name
addNamespaces :: !(Maybe XMLURI) [(XMLNamespacePrefix,String)] !XMLNode -> XMLNode
addNamespaces mbDefaultNamespace namespaces (XMLElem qname attrs children)
# ns = map (\(prefix,uri) -> XMLAttr (XMLQName (Just "xmlns") prefix) uri) namespaces
# ns = case mbDefaultNamespace of
Nothing = ns
Just defaultNamespace = [XMLAttr (XMLQName Nothing "xmlns") defaultNamespace:ns]
= (XMLElem qname (ns ++ attrs) children)
addNamespaces _ _ _ = abort "addNamespaces called on non-XMLElem\n"
docSize :: !XMLDoc -> Int
docSize (XMLDoc defaultNamespace namespaces documentElement)
# documentElement = addNamespaces defaultNamespace namespaces documentElement
= 37 + nodeSize documentElement
nodeSize :: !XMLNode -> Int
nodeSize (XMLText text) = escapedSize text
nodeSize (XMLElem qname attrs children)
# attrsSize = sum (map attrSize attrs) + length attrs
= if (isEmpty children)
(3 + qnameSize qname + attrsSize)
(5 + 2 * qnameSize qname + attrsSize + sum (map nodeSize children))
attrSize :: !XMLAttr -> Int
attrSize (XMLAttr qname value) = 3 + qnameSize qname + escapedSize value
qnameSize :: !XMLQName -> Int
qnameSize (XMLQName Nothing name) = size name
qnameSize (XMLQName (Just ns) name) = 1 + size ns + size name
//Calculates the number of chars in a string when xml special characters are escaped
escapedSize :: !{#Char} -> Int
escapedSize s = escapedSize` s (size s) 0
where
escapedSize` s n i
| i == n = 0
| s.[i] == '<' = 4 + escapedSize` s n (i + 1)
| s.[i] == '>' = 4 + escapedSize` s n (i + 1)
| s.[i] == '&' = 5 + escapedSize` s n (i + 1)
| otherwise = 1 + escapedSize` s n (i + 1)
serializeDoc :: !XMLDoc !*{#Char} !Int -> (!*{#Char}, !Int)
serializeDoc (XMLDoc defaultNamespace namespaces documentElement) dest dest_i
# documentElement = addNamespaces defaultNamespace namespaces documentElement
# (dest,dest_i) = copyChars "<?xml version=\"1.0\" standalone=\"no\"?>" 0 False dest dest_i
= serializeNode documentElement dest dest_i
serializeNode :: !XMLNode !*{#Char} !Int -> (!*{#Char}, !Int)
serializeNode (XMLText text) dest dest_i = copyChars text 0 True dest dest_i
serializeNode (XMLElem qname attrs []) dest dest_i
# dest = {dest & [dest_i] = '<'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i
# dest = {dest & [dest_i] = '/'}
# dest_i = dest_i + 1
# dest = {dest & [dest_i] = '>'}
= (dest,dest_i + 1)
serializeNode (XMLElem qname attrs children) dest dest_i
# dest = {dest & [dest_i] = '<'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i
# dest = {dest & [dest_i] = '>'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeMap serializeNode children dest dest_i
# dest = {dest & [dest_i] = '<'}
# dest_i = dest_i + 1
# dest = {dest & [dest_i] = '/'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# dest = {dest & [dest_i] = '>'}
= (dest,dest_i + 1)
serializeMap f [] dest dest_i = (dest, dest_i)
serializeMap f [x:xs] dest dest_i
# (dest, dest_i) = f x dest dest_i
= serializeMap f xs dest dest_i
serializeAttr :: !XMLAttr !*{#Char} !Int -> (!*{#Char}, !Int)
serializeAttr (XMLAttr qname value) dest dest_i
# dest = {dest & [dest_i] = ' '}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# dest = {dest & [dest_i] = '='}
# dest_i = dest_i + 1
# dest = {dest & [dest_i] = '"'}
# dest_i = dest_i + 1
# (dest,dest_i) = copyChars value 0 True dest dest_i
# dest = {dest & [dest_i] = '"'}
# dest_i = dest_i + 1
= (dest,dest_i)
serializeQName :: !XMLQName !*{#Char} !Int -> (!*{#Char}, !Int)
serializeQName (XMLQName Nothing name) dest dest_i = copyChars name 0 False dest dest_i
serializeQName (XMLQName (Just ns) name) dest dest_i
# (dest, dest_i) = copyChars ns 0 False dest dest_i
# dest = {dest & [dest_i] = ':'}
# dest_i = dest_i + 1
= copyChars name 0 False dest dest_i
copyChars :: !{#Char} !Int !Bool !*{#Char} !Int -> (!*{#Char},!Int)
copyChars src src_i escape dest dest_i
| src_i == (size src) = (dest, dest_i)
| otherwise
| escape && (src.[src_i] == '<')
# dest = {dest & [dest_i] = '&', [dest_i + 1] = 'l', [dest_i + 2] = 't', [dest_i + 3] = ';'}
= copyChars src (src_i + 1) escape dest (dest_i + 4)
| escape && (src.[src_i] == '>')
# dest = {dest & [dest_i] = '&', [dest_i + 1] = 'g', [dest_i + 2] = 't', [dest_i + 3] = ';'}
= copyChars src (src_i + 1) escape dest (dest_i + 4)
| escape && (src.[src_i] == '&')
# dest = {dest & [dest_i] = '&', [dest_i + 1] = 'a', [dest_i + 2] = 'm', [dest_i + 3] = 'p', [dest_i + 4] = ';'}
= copyChars src (src_i + 1) escape dest (dest_i + 5)
| otherwise
# dest = {dest & [dest_i] = src.[src_i]}
= copyChars src (src_i + 1) escape dest (dest_i + 1)
instance toString XMLDoc
where
toString doc
toString (XMLDoc defaultNamespace namespaces documentElement)
# documentElement = addNamespaces defaultNamespace namespaces documentElement
# doc = XMLDoc defaultNamespace namespaces documentElement
# docsize = docSize doc
# docstring = createArray docsize '\0'
# docstring = unsafeCreateArray docsize
# (docstring,_) = serializeDoc doc docstring 0
= docstring
where
addNamespaces :: !(Maybe XMLURI) [(XMLNamespacePrefix,String)] !XMLNode -> XMLNode
addNamespaces mbDefaultNamespace namespaces (XMLElem qname attrs children)
# ns = map (\(prefix,uri) -> XMLAttr (XMLQName (Just "xmlns") prefix) uri) namespaces
# ns = case mbDefaultNamespace of
Nothing = ns
Just defaultNamespace = [XMLAttr (XMLQName Nothing "xmlns") defaultNamespace:ns]
= (XMLElem qname (ns ++ attrs) children)
addNamespaces _ _ _ = abort "addNamespaces called on non-XMLElem\n"
docSize :: !XMLDoc -> Int
docSize (XMLDoc defaultNamespace namespaces documentElement)
= 37 + nodeSize documentElement
nodeSize :: !XMLNode -> Int
nodeSize (XMLText text) = escapedSize text
nodeSize (XMLElem qname attrs children)
# attrsSize = sum (map attrSize attrs) + length attrs
= if (isEmpty children)
(3 + qnameSize qname + attrsSize)
(5 + 2 * qnameSize qname + attrsSize + sum (map nodeSize children))
nodeSize (XMLCData data) = 12 + size data
attrSize :: !XMLAttr -> Int
attrSize (XMLAttr qname value) = 3 + qnameSize qname + escapedSize value
qnameSize :: !XMLQName -> Int
qnameSize (XMLQName Nothing name) = size name
qnameSize (XMLQName (Just ns) name) = 1 + size ns + size name
//Calculates the number of chars in a string when xml special characters are escaped
escapedSize :: !{#Char} -> Int
escapedSize s = escapedSize` s (size s) 0
where
escapedSize` s n i
| i == n = 0
| s.[i] == '<' = 4 + escapedSize` s n (i + 1)
| s.[i] == '>' = 4 + escapedSize` s n (i + 1)
| s.[i] == '&' = 5 + escapedSize` s n (i + 1)
| otherwise = 1 + escapedSize` s n (i + 1)
serializeDoc :: !XMLDoc !*{#Char} !Int -> (!*{#Char}, !Int)
serializeDoc (XMLDoc defaultNamespace namespaces documentElement) dest dest_i
# (dest,dest_i) = copyChars "<?xml version=\"1.0\" standalone=\"no\"?>" 0 False dest dest_i
= serializeNode documentElement dest dest_i
serializeNode :: !XMLNode !*{#Char} !Int -> (!*{#Char}, !Int)
serializeNode (XMLText text) dest dest_i = copyChars text 0 True dest dest_i
serializeNode (XMLElem qname attrs []) dest dest_i
# dest = {dest & [dest_i] = '<'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i
# dest = {dest & [dest_i] = '/'}
# dest_i = dest_i + 1
# dest = {dest & [dest_i] = '>'}
= (dest,dest_i + 1)
serializeNode (XMLElem qname attrs children) dest dest_i
# dest = {dest & [dest_i] = '<'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeQName qname dest dest_i
# (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i
# dest = {dest & [dest_i] = '>'}
# dest_i = dest_i + 1
# (dest,dest_i) = serializeMap serializeNode children dest dest_i