Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
clean-and-itasks
clean-platform
Commits
631806b9
Commit
631806b9
authored
Apr 15, 2020
by
Camil Staps
🚀
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'fixXmlBug' into 'master'
fix bugs in XML decoder See merge request
!322
parents
c63ccb61
3d409129
Pipeline
#41384
passed with stage
in 1 minute and 48 seconds
Changes
3
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
54 deletions
+65
-54
src/libraries/OS-Independent/Text/GenXML.dcl
src/libraries/OS-Independent/Text/GenXML.dcl
+2
-2
src/libraries/OS-Independent/Text/GenXML.icl
src/libraries/OS-Independent/Text/GenXML.icl
+53
-47
src/libraries/OS-Independent/Text/GenXML/Gast.icl
src/libraries/OS-Independent/Text/GenXML/Gast.icl
+10
-5
No files found.
src/libraries/OS-Independent/Text/GenXML.dcl
View file @
631806b9
...
...
@@ -23,9 +23,9 @@ definition module Text.GenXML
* nodesWithMergedConsecutiveText :: ![XMLNode] -> [XMLNode]
* nodesWithMergedConsecutiveText [] = []
* nodesWithMergedConsecutiveText [XMLText text1, XMLText text2: rest] =
*
nodesWithMergedConsecutiveText [XMLText $ text1 +++ text2: rest]
*
nodesWithMergedConsecutiveText [XMLText $ text1 +++ text2: rest]
* nodesWithMergedConsecutiveText [node: rest] =
*
[nodeWithMergedConsecutiveText node: nodesWithMergedConsecutiveText rest]
*
[nodeWithMergedConsecutiveText node: nodesWithMergedConsecutiveText rest]
*/
import
StdOverloaded
,
StdGeneric
,
Data
.
Either
...
...
src/libraries/OS-Independent/Text/GenXML.icl
View file @
631806b9
...
...
@@ -58,9 +58,11 @@ where
where
escapedSize`
s
n
i
|
i
==
n
=
0
|
s
.[
i
]
==
'<'
=
4
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'>'
=
4
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'&'
=
5
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'<'
=
4
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'>'
=
4
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'&'
=
5
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'\''
=
6
+
escapedSize`
s
n
(
i
+
1
)
|
s
.[
i
]
==
'"'
=
6
+
escapedSize`
s
n
(
i
+
1
)
|
otherwise
=
1
+
escapedSize`
s
n
(
i
+
1
)
serializeDoc
::
!
XMLDoc
!*{#
Char
}
!
Int
->
(!*{#
Char
},
!
Int
)
...
...
@@ -138,8 +140,25 @@ where
#
dest
=
{
dest
&
[
dest_i
]
=
'&'
,
[
dest_i
+
1
]
=
'g'
,
[
dest_i
+
2
]
=
't'
,
[
dest_i
+
3
]
=
';'
}
=
copyChars
src
(
src_i
+
1
)
escape
dest
(
dest_i
+
4
)
|
escape
&&
(
src
.[
src_i
]
==
'&'
)
#
dest
=
{
dest
&
[
dest_i
]
=
'&'
,
[
dest_i
+
1
]
=
'a'
,
[
dest_i
+
2
]
=
'm'
,
[
dest_i
+
3
]
=
'p'
,
[
dest_i
+
4
]
=
';'
}
#
dest
=
{
dest
&
[
dest_i
]
=
'&'
,
[
dest_i
+
1
]
=
'a'
,
[
dest_i
+
2
]
=
'm'
,
[
dest_i
+
3
]
=
'p'
,
[
dest_i
+
4
]
=
';'
}
=
copyChars
src
(
src_i
+
1
)
escape
dest
(
dest_i
+
5
)
|
escape
&&
(
src
.[
src_i
]
==
'"'
)
#
dest
=
{
dest
&
[
dest_i
]
=
'&'
,
[
dest_i
+
1
]
=
'q'
,
[
dest_i
+
2
]
=
'u'
,
[
dest_i
+
3
]
=
'o'
,
[
dest_i
+
4
]
=
't'
,
[
dest_i
+
5
]
=
';'
}
=
copyChars
src
(
src_i
+
1
)
escape
dest
(
dest_i
+
6
)
|
escape
&&
(
src
.[
src_i
]
==
'\''
)
#
dest
=
{
dest
&
[
dest_i
]
=
'&'
,
[
dest_i
+
1
]
=
'a'
,
[
dest_i
+
2
]
=
'p'
,
[
dest_i
+
3
]
=
'o'
,
[
dest_i
+
4
]
=
's'
,
[
dest_i
+
5
]
=
';'
}
=
copyChars
src
(
src_i
+
1
)
escape
dest
(
dest_i
+
6
)
|
otherwise
#
dest
=
{
dest
&
[
dest_i
]
=
src
.[
src_i
]}
=
copyChars
src
(
src_i
+
1
)
escape
dest
(
dest_i
+
1
)
...
...
@@ -154,8 +173,7 @@ where
=
Ok
(
snd
(
hd
xmlDoc
))
//Token type which is the intermediary representation during XML parsing
::
Token
=
TokenAttrValue
!
String
|
TokenCharData
!
String
::
Token
=
TokenCharData
!
String
|
TokenName
!
String
|
TokenStartTagOpen
|
TokenTagClose
...
...
@@ -176,18 +194,15 @@ isName _ = False
isCharData
(
TokenCharData
_)
=
True
isCharData
_
=
False
isAttrValue
(
TokenAttrValue
_)
=
True
isAttrValue
_
=
False
::
LexFunctionResult
=
Token
!
Int
!
Token
|
NoToken
!
Int
|
Fail
!
String
::
LexFunction
:==
String
Int
->
Maybe
LexFunctionResult
lex
::
!
String
!
Int
![
Token
]
->
MaybeErrorString
[
Token
]
lex
input
offset
tokens
|
offset
>=
size
input
=
Ok
(
reverse
tokens
)
//Done
|
dataMode
tokens
&&
isJust
charDataResult
=
processResult
(
fromJust
charDataResult
)
|
otherwise
=
processResult
(
lexAny
input
offset
lexFunctions
)
where
lexFunctions
=
[
lexWhitespace
,
lexDeclarationStart
...
...
@@ -208,11 +223,11 @@ where
dataMode
[
TokenCData
_:
_]
=
True
dataMode
_
=
False
charDataResult
=
lexCharData
input
offset
charDataResult
=
lexCharData
'<'
False
input
offset
processResult
r
=
case
r
of
Token
offset
token
=
lex
input
offset
[
token
:
tokens
]
//Lex another token and do recursive call
NoToken
offset
=
lex
input
offset
tokens
Token
offset
token
=
lex
input
(
inc
offset
)
[
token
:
tokens
]
//Lex another token and do recursive call
NoToken
offset
=
lex
input
(
inc
offset
)
tokens
Fail
err
=
Error
err
//Try any of the lexers in the list until one succeeds
...
...
@@ -229,7 +244,7 @@ where
lexEmptyTagClose
=
lexFixed
"/>"
TokenEmptyTagClose
lexTagClose
=
lexFixed
">"
TokenTagClose
lexDeclarationStart
input
offset
=
case
lexFixed
"<?xml"
TokenDeclarationStart
input
offset
of
lexDeclarationStart
input
offset
=
case
lexFixed
"<?xml"
TokenDeclarationStart
input
offset
of
Nothing
=
Nothing
Just
res
|
offset
==
0
=
Just
res
...
...
@@ -242,7 +257,7 @@ where
Just
$
maybe
(
Fail
"CDATA start without end"
)
(\
endIdx
->
Token
(
endIdx
+
4
)
$
TokenCData
$
input
%
(
offset
+
9
,
endIdx
))
(\
endIdx
->
Token
(
endIdx
+
3
)
$
TokenCData
$
input
%
(
offset
+
9
,
endIdx
))
(
dataEndIndex
$
inc
offset
)
where
dataEndIndex
::
!
Int
->
Maybe
Int
...
...
@@ -252,31 +267,29 @@ where
|
otherwise
=
dataEndIndex
$
inc
curIndex
//Char data
lexCharData
::
!
String
!
Int
->
Maybe
LexFunctionResult
lexCharData
input
offset
=
lexCharData
::
!
Char
!
Bool
!
String
!
Int
->
Maybe
LexFunctionResult
lexCharData
endChar
endCharBelongsToToken
input
offset
=
case
lexCharData`
offset
[]
of
Error
e
=
Just
$
Fail
e
Ok
([],
_)
=
Nothing
Ok
(
dataStrings
,
end
)
=
Just
$
Token
end
(
TokenCharData
$
concat
dataStrings
)
Ok
(
dataStrings
,
end
)
=
Just
$
Token
end
(
TokenCharData
$
trim
$
concat
dataStrings
)
where
lexCharData`
::
!
Int
![
String
]
->
MaybeErrorString
(![
String
],
!
Int
)
lexCharData`
offset
accum
|
input
.[
offset
]
==
'&'
#
end
=
findEnd
(\
c
->
c
>=
'a'
&&
c
<=
'z'
)
input
(
offset
+
1
)
|
input
.[
end
]
<>
';'
=
Error
"Missing ';' at end of character entity"
|
input
.[
end
]
<>
';'
=
Error
$
"Missing ';' at end of character entity"
#
name
=
input
%
(
offset
+
1
,
end
-
1
)
=
maybe
(
Error
$
concat
[
"Unknown named character entity reference '"
,
name
,
"'"
])
(\
charString
->
lexCharData`
(
end
+
1
)
[
charString
:
accum
])
(
entityCharacter
name
)
|
isTextChar
input
.[
offset
]
|
input
.[
offset
]
<>
endChar
#
end
=
findEnd
isTextChar
input
(
offset
+
1
)
#
data
=
trim
(
input
%
(
offset
,
end
-
1
))
|
data
<>
""
=
lexCharData`
end
[
data
:
accum
]
|
otherwise
=
Ok
(
accum
,
offset
)
|
otherwise
=
Ok
(
reverse
accum
,
offset
)
=
lexCharData`
end
[
input
%
(
offset
,
end
-
1
):
accum
]
|
otherwise
=
Ok
(
reverse
accum
,
if
endCharBelongsToToken
offset
(
offset
-
1
))
where
isTextChar
c
=
c
<>
'<'
&&
c
<>
'&'
isTextChar
c
=
c
<>
endChar
&&
c
<>
'&'
entityCharacter
::
!
String
->
Maybe
String
entityCharacter
"quot"
=
Just
"
\"
"
...
...
@@ -288,7 +301,7 @@ where
//Names
lexName
input
offset
|
isNameStartChar
input
.[
offset
]
=
Just
(
Token
end
(
TokenName
(
input
%
(
offset
,
end
-
1
))))
|
isNameStartChar
input
.[
offset
]
=
Just
(
Token
(
dec
end
)
(
TokenName
(
input
%
(
offset
,
end
-
1
))))
|
otherwise
=
Nothing
where
end
=
findEnd
isNameChar
input
(
offset
+
1
)
...
...
@@ -307,21 +320,14 @@ where
//AttrValue
lexAttrValue
input
offset
|
input
.[
offset
]
<>
'"'
=
Nothing
=
Just
(
Token
end
(
TokenAttrValue
(
input
%
(
offset
+
1
,
end
-
2
))))
where
end
=
findAttrValueEnd
input
(
offset
+
1
)
findAttrValueEnd
input
offset
|
offset
>=
size
input
=
offset
|
input
.[
offset
]
==
'"'
=
offset
+
1
|
otherwise
=
findAttrValueEnd
input
(
offset
+
1
)
|
input
.[
offset
]
<>
'"'
=
Nothing
|
otherwise
=
lexCharData
'"'
True
input
(
inc
offset
)
lexWhitespace
input
offset
|
las
t
==
offset
=
Nothing
=
Just
(
NoToken
las
t
)
|
nex
t
==
offset
=
Nothing
|
otherwise
=
Just
(
NoToken
$
dec
nex
t
)
where
las
t
=
findEnd
isWhitespace
input
offset
nex
t
=
findEnd
isWhitespace
input
offset
isWhitespace
'\x20'
=
True
isWhitespace
'\x9'
=
True
...
...
@@ -331,14 +337,14 @@ where
//Lex token of fixed size
lexFixed
chars
token
input
offset
|
input
%
(
offset
,
offset
+
(
size
chars
)
-
1
)
==
chars
=
Just
(
Token
(
offset
+
size
chars
)
token
)
=
Nothing
|
input
%
(
offset
,
offset
+
(
size
chars
)
-
1
)
==
chars
=
Just
(
Token
(
offset
+
size
chars
-
1
)
token
)
|
otherwise
=
Nothing
//Find the first offset where the predicate no longer holds
//Find the first offset where the predicate no longer holds
findEnd
pred
input
offset
|
offset
>=
size
input
=
offset
|
pred
input
.[
offset
]
=
findEnd
pred
input
(
offset
+
1
)
=
offset
|
offset
>=
size
input
=
offset
|
pred
input
.[
offset
]
=
findEnd
pred
input
(
offset
+
1
)
|
otherwise
=
offset
pXMLDoc
::
Parser
Token
XMLDoc
pXMLDoc
=
begin1
pXMLDoc`
...
...
@@ -367,7 +373,7 @@ pElemStart = (\name attributes -> (name,attributes)) @> symbol TokenStartTagOpe
pElemContEnd
=
symbol
TokenEndTagOpen
&>
pName
<&
symbol
TokenTagClose
pAttr
=
(\
name
v
->
XMLAttr
(
toQName
name
)
v
)
@>
pName
+&-
symbol
TokenEqual
+&+
pAttrValue
pName
=
satisfy
isName
<@
\
n
->
case
n
of
TokenName
n
->
n
;
_
->
abort
"error in pName
\n
"
pAttrValue
=
satisfy
is
AttrValue
<@
\
n
->
case
n
of
Token
AttrValue
v
->
v
;
_
->
abort
"error in pAttrValue
\n
"
pAttrValue
=
satisfy
is
CharData
<@
\
n
->
case
n
of
Token
CharData
v
->
v
;
_
->
abort
"error in pAttrValue
\n
"
pCharData
=
satisfy
isCharData
<@
\
n
->
case
n
of
TokenCharData
d
->
d
;
_
->
abort
"error in pCharData
\n
"
pCData
=
satisfy
(\
t
->
t
=:
TokenCData
_)
<@
\
t
->
case
t
of
TokenCData
data
=
XMLCData
data
;
_
=
undef
...
...
src/libraries/OS-Independent/Text/GenXML/Gast.icl
View file @
631806b9
...
...
@@ -11,12 +11,17 @@ derive ggen XMLNode, XMLAttr
// TODO: Generate URIs for namespaces, instead of using names.
ggen
{|
XMLDoc
|}
st
=
[
XMLDoc
(
unNameString
<$>
defaultNamespace
)
(
bifmap
unNameString
unNameString
<$>
namespaces
)
(
XMLElem
rootName
rootAttrs
rootChildren
)
\\
(
defaultNamespace
,
namespaces
,
rootName
,
rootAttrs
,
rootChildren
)
<-
ggen
{|*|}
st
[
docWithNamedCharacterEntityReference
:
[
XMLDoc
(
unNameString
<$>
defaultNamespace
)
(
bifmap
unNameString
unNameString
<$>
namespaces
)
(
XMLElem
rootName
rootAttrs
rootChildren
)
\\
(
defaultNamespace
,
namespaces
,
rootName
,
rootAttrs
,
rootChildren
)
<-
ggen
{|*|}
st
]
]
where
docWithNamedCharacterEntityReference
=
XMLDoc
Nothing
[]
(
XMLElem
(
uname
"someName"
)
[
XMLAttr
(
uname
"attr"
)
"< >
\"
'&"
]
[
XMLText
"
\"
& <>
\"
"
])
ggen
{|
XMLQName
|}
st
=
[
XMLQName
(
unNameString
<$>
namespace
)
(
unNameString
name
)
\\
(
namespace
,
name
)
<-
ggen
{|*|}
st
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment