|
@@ -59,7 +59,7 @@ type
|
|
|
FBuffer: PChar;
|
|
|
FBufPos: PChar;
|
|
|
FCapacity: Integer;
|
|
|
- FLineBreak: string;
|
|
|
+ FLineBreak: WideString;
|
|
|
FNSHelper: TNSSupport;
|
|
|
FAttrFixups: TFPList;
|
|
|
FScratch: TFPList;
|
|
@@ -79,6 +79,7 @@ type
|
|
|
procedure Write(const Buffer; Count: Longint); virtual; abstract;
|
|
|
procedure WriteNode(Node: TDOMNode);
|
|
|
procedure VisitDocument(Node: TDOMNode);
|
|
|
+ procedure VisitDocument_Canonical(Node: TDOMNode);
|
|
|
procedure VisitElement(Node: TDOMNode);
|
|
|
procedure VisitText(Node: TDOMNode);
|
|
|
procedure VisitCDATA(Node: TDOMNode);
|
|
@@ -155,6 +156,16 @@ end;
|
|
|
TXMLWriter
|
|
|
---------------------------------------------------------------------}
|
|
|
|
|
|
+const
|
|
|
+ AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
|
|
|
+ TextSpecialChars = ['<', '>', '&', #10, #13];
|
|
|
+ CDSectSpecialChars = [']'];
|
|
|
+ LineEndingChars = [#13, #10];
|
|
|
+ QuotStr = '"';
|
|
|
+ AmpStr = '&';
|
|
|
+ ltStr = '<';
|
|
|
+ gtStr = '>';
|
|
|
+
|
|
|
constructor TXMLWriter.Create;
|
|
|
var
|
|
|
I: Integer;
|
|
@@ -164,14 +175,22 @@ begin
|
|
|
FBuffer := AllocMem(512+32);
|
|
|
FBufPos := FBuffer;
|
|
|
FCapacity := 512;
|
|
|
+ // Later on, this may be put under user control
|
|
|
+ // for now, take OS setting
|
|
|
+ if FCanonical then
|
|
|
+ FLineBreak := #10
|
|
|
+ else
|
|
|
+ FLineBreak := sLineBreak;
|
|
|
// Initialize Indent string
|
|
|
+ // TODO: this must be done in setter of FLineBreak
|
|
|
SetLength(FIndent, 100);
|
|
|
- FIndent[1] := #10;
|
|
|
- for I := 2 to 100 do FIndent[I] := ' ';
|
|
|
+ FIndent[1] := FLineBreak[1];
|
|
|
+ if Length(FLineBreak) > 1 then
|
|
|
+ FIndent[2] := FLineBreak[2]
|
|
|
+ else
|
|
|
+ FIndent[2] := ' ';
|
|
|
+ for I := 3 to 100 do FIndent[I] := ' ';
|
|
|
FIndentCount := 0;
|
|
|
- // Later on, this may be put under user control
|
|
|
- // for now, take OS setting
|
|
|
- FLineBreak := sLineBreak;
|
|
|
FNSHelper := TNSSupport.Create;
|
|
|
FScratch := TFPList.Create;
|
|
|
FNSDefs := TFPList.Create;
|
|
@@ -215,14 +234,7 @@ begin
|
|
|
|
|
|
wc := Cardinal(Src^); Inc(Src);
|
|
|
case wc of
|
|
|
- $0A: pb := StrECopy(pb, PChar(FLineBreak));
|
|
|
- $0D: begin
|
|
|
- pb := StrECopy(pb, PChar(FLineBreak));
|
|
|
- if (Src < SrcEnd) and (Src^ = #$0A) then
|
|
|
- Inc(Src);
|
|
|
- end;
|
|
|
-
|
|
|
- 0..$09, $0B, $0C, $0E..$7F: begin
|
|
|
+ 0..$7F: begin
|
|
|
pb^ := char(wc); Inc(pb);
|
|
|
end;
|
|
|
|
|
@@ -275,7 +287,7 @@ end;
|
|
|
|
|
|
procedure TXMLWriter.wrtIndent; { inline }
|
|
|
begin
|
|
|
- wrtChars(PWideChar(FIndent), FIndentCount*2+1);
|
|
|
+ wrtChars(PWideChar(FIndent), FIndentCount*2+Length(FLineBreak));
|
|
|
end;
|
|
|
|
|
|
procedure TXMLWriter.IncIndent;
|
|
@@ -298,25 +310,6 @@ begin
|
|
|
if FIndentCount>0 then dec(FIndentCount);
|
|
|
end;
|
|
|
|
|
|
-procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
|
|
|
-var
|
|
|
- Quote: WideChar;
|
|
|
-begin
|
|
|
- // TODO: need to check if the string also contains single quote
|
|
|
- // both quotes present is a error
|
|
|
- if Pos('"', ws) > 0 then
|
|
|
- Quote := ''''
|
|
|
- else
|
|
|
- Quote := '"';
|
|
|
- wrtChr(Quote);
|
|
|
- wrtStr(ws);
|
|
|
- wrtChr(Quote);
|
|
|
-end;
|
|
|
-
|
|
|
-const
|
|
|
- AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
|
|
|
- TextSpecialChars = ['<', '>', '&'];
|
|
|
-
|
|
|
procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
|
|
|
const SpecialCharCallback: TSpecialCharCallback);
|
|
|
var
|
|
@@ -326,7 +319,7 @@ begin
|
|
|
EndPos := 1;
|
|
|
while EndPos <= Length(s) do
|
|
|
begin
|
|
|
- if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
|
|
|
+ if (s[EndPos] < 'A') and (Char(ord(s[EndPos])) in SpecialChars) then
|
|
|
begin
|
|
|
wrtChars(@s[StartPos], EndPos - StartPos);
|
|
|
SpecialCharCallback(Self, s, EndPos);
|
|
@@ -338,12 +331,6 @@ begin
|
|
|
wrtChars(@s[StartPos], EndPos - StartPos);
|
|
|
end;
|
|
|
|
|
|
-const
|
|
|
- QuotStr = '"';
|
|
|
- AmpStr = '&';
|
|
|
- ltStr = '<';
|
|
|
- gtStr = '>';
|
|
|
-
|
|
|
procedure AttrSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
|
|
var idx: Integer);
|
|
|
begin
|
|
@@ -360,18 +347,74 @@ begin
|
|
|
end;
|
|
|
end;
|
|
|
|
|
|
-procedure TextnodeSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
|
|
+procedure TextnodeNormalCallback(Sender: TXMLWriter; const s: DOMString;
|
|
|
var idx: Integer);
|
|
|
begin
|
|
|
case s[idx] of
|
|
|
'<': Sender.wrtStr(ltStr);
|
|
|
'>': Sender.wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
|
|
|
'&': Sender.wrtStr(AmpStr);
|
|
|
+ #13:
|
|
|
+ begin
|
|
|
+ // We normalize #13#10 and #13 to FLineBreak, going somewhat
|
|
|
+ // beyond the specs here, see issue #13879.
|
|
|
+ Sender.wrtStr(Sender.FLineBreak);
|
|
|
+ if (idx < Length(s)) and (s[idx+1] = #10) then
|
|
|
+ Inc(idx);
|
|
|
+ end;
|
|
|
+ #10: Sender.wrtStr(Sender.FLineBreak);
|
|
|
else
|
|
|
Sender.wrtChr(s[idx]);
|
|
|
end;
|
|
|
end;
|
|
|
|
|
|
+procedure TextnodeCanonicalCallback(Sender: TXMLWriter; const s: DOMString;
|
|
|
+ var idx: Integer);
|
|
|
+begin
|
|
|
+ case s[idx] of
|
|
|
+ '<': Sender.wrtStr(ltStr);
|
|
|
+ '>': Sender.wrtStr(gtStr);
|
|
|
+ '&': Sender.wrtStr(AmpStr);
|
|
|
+ #13: Sender.wrtStr('
')
|
|
|
+ else
|
|
|
+ Sender.wrtChr(s[idx]);
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure CDSectSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
|
|
+ var idx: Integer);
|
|
|
+begin
|
|
|
+ if (idx <= Length(s)-2) and (s[idx+1] = ']') and (s[idx+2] = '>') then
|
|
|
+ begin
|
|
|
+ Sender.wrtStr(']]]]><![CDATA[>');
|
|
|
+ Inc(idx, 2);
|
|
|
+ // TODO: emit warning 'cdata-section-splitted'
|
|
|
+ end
|
|
|
+ else
|
|
|
+ Sender.wrtChr(s[idx]);
|
|
|
+end;
|
|
|
+
|
|
|
+const
|
|
|
+ TextnodeCallbacks: array[boolean] of TSpecialCharCallback = (
|
|
|
+ @TextnodeNormalCallback,
|
|
|
+ @TextnodeCanonicalCallback
|
|
|
+ );
|
|
|
+
|
|
|
+procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
|
|
|
+var
|
|
|
+ Quote: WideChar;
|
|
|
+begin
|
|
|
+ // TODO: need to check if the string also contains single quote
|
|
|
+ // both quotes present is a error
|
|
|
+ if Pos('"', ws) > 0 then
|
|
|
+ Quote := ''''
|
|
|
+ else
|
|
|
+ Quote := '"';
|
|
|
+ wrtChr(Quote);
|
|
|
+ ConvWrite(ws, LineEndingChars, @TextnodeNormalCallback);
|
|
|
+ wrtChr(Quote);
|
|
|
+end;
|
|
|
+
|
|
|
procedure TXMLWriter.WriteNode(node: TDOMNode);
|
|
|
begin
|
|
|
case node.NodeType of
|
|
@@ -382,7 +425,11 @@ begin
|
|
|
ENTITY_REFERENCE_NODE: VisitEntityRef(node);
|
|
|
PROCESSING_INSTRUCTION_NODE: VisitPI(node);
|
|
|
COMMENT_NODE: VisitComment(node);
|
|
|
- DOCUMENT_NODE: VisitDocument(node);
|
|
|
+ DOCUMENT_NODE:
|
|
|
+ if FCanonical then
|
|
|
+ VisitDocument_Canonical(node)
|
|
|
+ else
|
|
|
+ VisitDocument(node);
|
|
|
DOCUMENT_TYPE_NODE: VisitDocumentType(node);
|
|
|
ENTITY_NODE,
|
|
|
DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
|
|
@@ -473,7 +520,7 @@ begin
|
|
|
if Assigned(B) then // drop redundant namespace declarations
|
|
|
FNSDefs.Add(B);
|
|
|
end
|
|
|
- else if TDOMAttr(node).Specified then
|
|
|
+ else if FCanonical or TDOMAttr(node).Specified then
|
|
|
begin
|
|
|
// obtain a TAttrFixup record (allocate if needed)
|
|
|
if j >= FAttrFixups.Count then
|
|
@@ -557,7 +604,7 @@ begin
|
|
|
for i := 0 to node.Attributes.Length - 1 do
|
|
|
begin
|
|
|
child := node.Attributes.Item[i];
|
|
|
- if TDOMAttr(child).Specified then
|
|
|
+ if FCanonical or TDOMAttr(child).Specified then
|
|
|
VisitAttribute(child);
|
|
|
end;
|
|
|
Child := node.FirstChild;
|
|
@@ -567,7 +614,7 @@ begin
|
|
|
begin
|
|
|
SavedInsideTextNode := FInsideTextNode;
|
|
|
wrtChr('>');
|
|
|
- FInsideTextNode := Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE];
|
|
|
+ FInsideTextNode := FCanonical or (Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE]);
|
|
|
IncIndent;
|
|
|
repeat
|
|
|
WriteNode(Child);
|
|
@@ -586,7 +633,7 @@ end;
|
|
|
|
|
|
procedure TXMLWriter.VisitText(node: TDOMNode);
|
|
|
begin
|
|
|
- ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeSpecialCharCallback);
|
|
|
+ ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, TextnodeCallbacks[FCanonical]);
|
|
|
end;
|
|
|
|
|
|
procedure TXMLWriter.VisitCDATA(node: TDOMNode);
|
|
@@ -594,11 +641,11 @@ begin
|
|
|
if not FInsideTextNode then
|
|
|
wrtIndent;
|
|
|
if FCanonical then
|
|
|
- ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeSpecialCharCallback)
|
|
|
+ ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeCanonicalCallback)
|
|
|
else
|
|
|
begin
|
|
|
wrtChars('<![CDATA[', 9);
|
|
|
- wrtStr(TDOMCharacterData(node).Data);
|
|
|
+ ConvWrite(TDOMCharacterData(node).Data, CDSectSpecialChars, @CDSectSpecialCharCallback);
|
|
|
wrtChars(']]>', 3);
|
|
|
end;
|
|
|
end;
|
|
@@ -618,7 +665,8 @@ begin
|
|
|
if TDOMProcessingInstruction(node).Data <> '' then
|
|
|
begin
|
|
|
wrtChr(' ');
|
|
|
- wrtStr(TDOMProcessingInstruction(node).Data);
|
|
|
+ // TODO: How does this comply with c14n??
|
|
|
+ ConvWrite(TDOMProcessingInstruction(node).Data, LineEndingChars, @TextnodeNormalCallback);
|
|
|
end;
|
|
|
wrtStr('?>');
|
|
|
end;
|
|
@@ -627,7 +675,8 @@ procedure TXMLWriter.VisitComment(node: TDOMNode);
|
|
|
begin
|
|
|
if not FInsideTextNode then wrtIndent;
|
|
|
wrtChars('<!--', 4);
|
|
|
- wrtStr(TDOMCharacterData(node).Data);
|
|
|
+ // TODO: How does this comply with c14n??
|
|
|
+ ConvWrite(TDOMCharacterData(node).Data, LineEndingChars, @TextnodeNormalCallback);
|
|
|
wrtChars('-->', 3);
|
|
|
end;
|
|
|
|
|
@@ -658,7 +707,8 @@ begin
|
|
|
// TODO: now handled as a regular PI, remove this?
|
|
|
if Length(TXMLDocument(node).StylesheetType) > 0 then
|
|
|
begin
|
|
|
- wrtStr(#10'<?xml-stylesheet type="');
|
|
|
+ wrtStr(FLineBreak);
|
|
|
+ wrtStr('<?xml-stylesheet type="');
|
|
|
wrtStr(TXMLDocument(node).StylesheetType);
|
|
|
wrtStr('" href="');
|
|
|
wrtStr(TXMLDocument(node).StylesheetHRef);
|
|
@@ -671,7 +721,37 @@ begin
|
|
|
WriteNode(Child);
|
|
|
Child := Child.NextSibling;
|
|
|
end;
|
|
|
- wrtChars(#10, 1);
|
|
|
+ wrtStr(FLineBreak);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TXMLWriter.VisitDocument_Canonical(Node: TDOMNode);
|
|
|
+var
|
|
|
+ child, root: TDOMNode;
|
|
|
+begin
|
|
|
+ root := TDOMDocument(Node).DocumentElement;
|
|
|
+ child := node.FirstChild;
|
|
|
+ while Assigned(child) and (child <> root) do
|
|
|
+ begin
|
|
|
+ if child.nodeType in [COMMENT_NODE, PROCESSING_INSTRUCTION_NODE] then
|
|
|
+ begin
|
|
|
+ WriteNode(child);
|
|
|
+ wrtChr(#10);
|
|
|
+ end;
|
|
|
+ child := child.nextSibling;
|
|
|
+ end;
|
|
|
+ if root = nil then
|
|
|
+ Exit;
|
|
|
+ VisitElement(TDOMElement(root));
|
|
|
+ child := root.nextSibling;
|
|
|
+ while Assigned(child) do
|
|
|
+ begin
|
|
|
+ if child.nodeType in [COMMENT_NODE, PROCESSING_INSTRUCTION_NODE] then
|
|
|
+ begin
|
|
|
+ wrtChr(#10);
|
|
|
+ WriteNode(child);
|
|
|
+ end;
|
|
|
+ child := child.nextSibling;
|
|
|
+ end;
|
|
|
end;
|
|
|
|
|
|
procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
|
|
@@ -697,7 +777,8 @@ end;
|
|
|
|
|
|
procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
|
|
|
begin
|
|
|
- wrtStr(#10'<!DOCTYPE ');
|
|
|
+ wrtStr(FLineBreak);
|
|
|
+ wrtStr('<!DOCTYPE ');
|
|
|
wrtStr(Node.NodeName);
|
|
|
wrtChr(' ');
|
|
|
with TDOMDocumentType(Node) do
|
|
@@ -717,7 +798,7 @@ begin
|
|
|
if InternalSubset <> '' then
|
|
|
begin
|
|
|
wrtChr('[');
|
|
|
- wrtStr(InternalSubset);
|
|
|
+ ConvWrite(InternalSubset, LineEndingChars, @TextnodeNormalCallback);
|
|
|
wrtChr(']');
|
|
|
end;
|
|
|
end;
|