Browse Source

* Added reading and writing support of character entity references
(i.e. characters given by their ASCII/Unicode values)

sg 25 years ago
parent
commit
29acdad632
2 changed files with 74 additions and 22 deletions
  1. 63 17
      fcl/xml/xmlread.pp
  2. 11 5
      fcl/xml/xmlwrite.pp

+ 63 - 17
fcl/xml/xmlread.pp

@@ -836,7 +836,8 @@ end;
 
 
 function TXMLReader.ParsePEReference: Boolean;    // [69]
 function TXMLReader.ParsePEReference: Boolean;    // [69]
 begin
 begin
-  if CheckFor('%') then begin
+  if CheckFor('%') then
+  begin
     ExpectName;
     ExpectName;
     ExpectString(';');
     ExpectString(';');
     Result := True;
     Result := True;
@@ -845,18 +846,33 @@ begin
 end;
 end;
 
 
 function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean;    // [67] [68]
 function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean;    // [67] [68]
+var
+  s: String;
 begin
 begin
-  if not CheckFor('&') then begin
+  if not CheckFor('&') then
+  begin
     Result := False;
     Result := False;
     exit;
     exit;
   end;
   end;
-  if CheckFor('#') then begin    // Test for CharRef [66]
-    if CheckFor('x') then begin
+  if CheckFor('#') then    // Test for CharRef [66]
+  begin
+    s := '#';
+    if CheckFor('x') then
+    begin
       // !!!: there must be at least one digit
       // !!!: there must be at least one digit
-      while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
+      while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do
+      begin
+        s := s + buf[0];
+        Inc(buf);
+      end;
     end else
     end else
       // !!!: there must be at least one digit
       // !!!: there must be at least one digit
-      while buf[0] in ['0'..'9'] do Inc(buf);
+      while buf[0] in ['0'..'9'] do
+      begin
+        s := s + buf[0];
+        Inc(buf);
+      end;
+    AOwner.AppendChild(doc.CreateEntityReference(s));
   end else
   end else
     AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
     AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
   ExpectString(';');
   ExpectString(';');
@@ -972,24 +988,50 @@ procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
         RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
         RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
   end;
   end;
 
 
+  function HexToInt(const s: String): Integer;
+  var
+    i: Integer;
+  begin
+    Result := 0;
+    for i := 1 to Length(s) do
+    begin
+      Result := Result * 16;
+      if (s[1] >= '0') and (s[1] <= '9') then
+        Inc(Result, Ord(s[1]) - Ord('0'))
+      else if (s[1] >= 'A') and (s[1] <= 'F') then
+        Inc(Result, Ord(s[1]) - Ord('A') + 10)
+      else if (s[1] >= 'a') and (s[1] <= 'f') then
+        Inc(Result, Ord(s[1]) - Ord('a') + 10);
+    end;
+  end;
+
 var
 var
   Node, NextSibling: TDOMNode;
   Node, NextSibling: TDOMNode;
+  Value: Integer;
 begin
 begin
   Node := RootNode.FirstChild;
   Node := RootNode.FirstChild;
   while Assigned(Node) do
   while Assigned(Node) do
   begin
   begin
     NextSibling := Node.NextSibling;
     NextSibling := Node.NextSibling;
     if Node.NodeType = ENTITY_REFERENCE_NODE then
     if Node.NodeType = ENTITY_REFERENCE_NODE then
-      if Node.NodeName = 'amp' then
-	ReplaceEntityRef(Node, '&')
-      else if Node.NodeName = 'apos' then
-	ReplaceEntityRef(Node, '''')
-      else if Node.NodeName = 'gt' then
-	ReplaceEntityRef(Node, '>')
-      else if Node.NodeName = 'lt' then
-        ReplaceEntityRef(Node, '<')
-      else if Node.NodeName = 'quot' then
-	ReplaceEntityRef(Node, '"');
+      if Length(Node.NodeName) > 0 then
+        if Node.NodeName[1] = '#' then
+	begin
+	  if Node.NodeName[2] = 'x' then
+	    Value := HexToInt(Copy(Node.NodeName, 2, 4))
+	  else
+	    Value := StrToInt(Copy(Node.NodeName, 2, 5));
+	  ReplaceEntityRef(Node, Chr(Value));
+	end else if Node.NodeName = 'amp' then
+	  ReplaceEntityRef(Node, '&')
+        else if Node.NodeName = 'apos' then
+	  ReplaceEntityRef(Node, '''')
+        else if Node.NodeName = 'gt' then
+	  ReplaceEntityRef(Node, '>')
+        else if Node.NodeName = 'lt' then
+          ReplaceEntityRef(Node, '<')
+        else if Node.NodeName = 'quot' then
+	  ReplaceEntityRef(Node, '"');
     Node := NextSibling;
     Node := NextSibling;
   end;
   end;
 end;
 end;
@@ -1117,7 +1159,11 @@ end.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.19  2000-07-09 14:23:42  sg
+  Revision 1.20  2000-07-09 18:25:23  sg
+  * Added reading and writing support of character entity references
+    (i.e. characters given by their ASCII/Unicode values)
+
+  Revision 1.19  2000/07/09 14:23:42  sg
   * Tabs are now considered as whitespace, too, when the reader determines if
   * Tabs are now considered as whitespace, too, when the reader determines if
     a text node would be empty or not
     a text node would be empty or not
 
 

+ 11 - 5
fcl/xml/xmlwrite.pp

@@ -37,6 +37,8 @@ procedure WriteXML(Element: TDOMElement; var AStream: TStream);
 
 
 implementation
 implementation
 
 
+uses SysUtils;
+
 // -------------------------------------------------------------------
 // -------------------------------------------------------------------
 //   Writers for the different node types
 //   Writers for the different node types
 // -------------------------------------------------------------------
 // -------------------------------------------------------------------
@@ -137,8 +139,8 @@ type
   TSpecialCharCallback = procedure(c: Char);
   TSpecialCharCallback = procedure(c: Char);
 
 
 const
 const
-  AttrSpecialChars = ['"', '&'];
-  TextSpecialChars = ['<', '>', '&'];
+  AttrSpecialChars = [#1..#31, '"', '&'];
+  TextSpecialChars = [#1..#9, #11, #12, #14..#31, '<', '>', '&'];
 
 
 
 
 procedure ConvWrite(const s: String; const SpecialChars: TCharacters;
 procedure ConvWrite(const s: String; const SpecialChars: TCharacters;
@@ -169,7 +171,7 @@ begin
   else if c = '&' then
   else if c = '&' then
     wrt('&amp;')
     wrt('&amp;')
   else
   else
-    wrt(c);
+    wrt('&#' + IntToStr(Ord(c)) + ';');
 end;
 end;
 
 
 procedure TextnodeSpecialCharCallback(c: Char);
 procedure TextnodeSpecialCharCallback(c: Char);
@@ -181,7 +183,7 @@ begin
   else if c = '&' then
   else if c = '&' then
     wrt('&amp;')
     wrt('&amp;')
   else
   else
-    wrt(c);
+    wrt('&#' + IntToStr(Ord(c)) + ';');
 end;
 end;
 
 
 
 
@@ -398,7 +400,11 @@ end.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.9  2000-07-09 11:40:09  sg
+  Revision 1.10  2000-07-09 18:25:24  sg
+  * Added reading and writing support of character entity references
+    (i.e. characters given by their ASCII/Unicode values)
+
+  Revision 1.9  2000/07/09 11:40:09  sg
   * ">" and "&" in text nodes are now replaced by "&gt;" and "&amp;"
   * ">" and "&" in text nodes are now replaced by "&gt;" and "&amp;"
 
 
   Revision 1.8  2000/06/29 08:45:32  sg
   Revision 1.8  2000/06/29 08:45:32  sg