浏览代码

* Added support for predefined entities
* Improved whitespace handling

sg 25 年之前
父节点
当前提交
8e3c9135d1
共有 1 个文件被更改,包括 82 次插入16 次删除
  1. 82 16
      fcl/xml/xmlread.pp

+ 82 - 16
fcl/xml/xmlread.pp

@@ -712,15 +712,19 @@ var
     i: Integer;
     i: Integer;
   begin
   begin
     SetLength(s, 0);
     SetLength(s, 0);
-    while not (buf[0] in [#0, '<', '&']) do begin
+    while not (buf[0] in [#0, '<', '&']) do
+    begin
       s := s + buf[0];
       s := s + buf[0];
       Inc(buf);
       Inc(buf);
     end;
     end;
-    if Length(s) > 0 then begin
-      // Strip whitespace from end of s
+    if Length(s) > 0 then
+    begin
+      // Check if s has non-whitespace content
       i := Length(s);
       i := Length(s);
-      while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
-      NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
+      while (i > 0) and (s[i] in [#10, #13, ' ']) do
+        Dec(i);
+      if i > 0 then
+	NewElem.AppendChild(doc.CreateTextNode(s));
       Result := True;
       Result := True;
     end else
     end else
       Result := False;
       Result := False;
@@ -730,9 +734,11 @@ var
   var
   var
     cdata: String;
     cdata: String;
   begin
   begin
-    if CheckFor('<![CDATA[') then begin
+    if CheckFor('<![CDATA[') then
+    begin
       SetLength(cdata, 0);
       SetLength(cdata, 0);
-      while not CheckFor(']]>') do begin
+      while not CheckFor(']]>') do
+      begin
         cdata := cdata + buf[0];
         cdata := cdata + buf[0];
         Inc(buf);
         Inc(buf);
       end;
       end;
@@ -742,16 +748,47 @@ var
       Result := False;
       Result := False;
   end;
   end;
 
 
+
+  procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: String);
+  var
+    PrevSibling, NextSibling: TDOMNode;
+  begin
+    // ###
+    PrevSibling := EntityNode.PreviousSibling;
+    NextSibling := EntityNode.NextSibling;
+    if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
+    begin
+      TDOMCharacterData(PrevSibling).AppendData(Replacement);
+      NewElem.RemoveChild(EntityNode);
+      if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
+      begin
+        TDOMCharacterData(PrevSibling).AppendData(
+	  TDOMCharacterData(NextSibling).Data);
+	NewElem.RemoveChild(NextSibling);
+      end
+    end else
+      if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
+      begin
+        TDOMCharacterData(NextSibling).InsertData(0, Replacement);
+	NewElem.RemoveChild(EntityNode);
+      end else
+        NewElem.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
+  end;
+
+
 var
 var
   IsEmpty: Boolean;
   IsEmpty: Boolean;
   name: String;
   name: String;
   oldpos: PChar;
   oldpos: PChar;
 
 
   attr: TDOMAttr;
   attr: TDOMAttr;
+  Node, NextSibling: TDOMNode;
 begin
 begin
   oldpos := buf;
   oldpos := buf;
-  if CheckFor('<') then begin
-    if not GetName(name) then begin
+  if CheckFor('<') then
+  begin
+    if not GetName(name) then
+    begin
       buf := oldpos;
       buf := oldpos;
       Result := False;
       Result := False;
       exit;
       exit;
@@ -762,12 +799,15 @@ begin
 
 
     SkipWhitespace;
     SkipWhitespace;
     IsEmpty := False;
     IsEmpty := False;
-    while True do begin
-      if CheckFor('/>') then begin
+    while True do
+    begin
+      if CheckFor('/>') then
+      begin
         IsEmpty := True;
         IsEmpty := True;
         break;
         break;
       end;
       end;
-      if CheckFor('>') then break;
+      if CheckFor('>') then
+        break;
 
 
       // Get Attribute [41]
       // Get Attribute [41]
       attr := doc.CreateAttribute(ExpectName);
       attr := doc.CreateAttribute(ExpectName);
@@ -778,19 +818,41 @@ begin
       SkipWhitespace;
       SkipWhitespace;
     end;
     end;
 
 
-    if not IsEmpty then begin
+    if not IsEmpty then
+    begin
       // Get content
       // Get content
-      while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
+      SkipWhitespace;
+      while ParseCharData or ParseCDSect or ParsePI or
         ParseComment(NewElem) or ParseElement(NewElem) or
         ParseComment(NewElem) or ParseElement(NewElem) or
         ParseReference(NewElem) do;
         ParseReference(NewElem) do;
 
 
       // Get ETag [42]
       // Get ETag [42]
       ExpectString('</');
       ExpectString('</');
-      ExpectName;
+      if ExpectName <> name then
+        RaiseExc('Unmatching element end tag');
       SkipWhitespace;
       SkipWhitespace;
       ExpectString('>');
       ExpectString('>');
     end;
     end;
 
 
+    // Resolve predefined entities
+    Node := NewElem.FirstChild;
+    while Assigned(Node) do
+    begin
+      NextSibling := Node.NextSibling;
+      if Node.NodeType = ENTITY_REFERENCE_NODE then
+        if Node.NodeName = 'amp' then
+	  ReplaceEntityRef(Node, '&')
+        else if Node.NodeName = 'apos' then
+	  ReplaceEntityRef(Node, '''')
+        else if Node.NodeName = 'gt' then
+	  ReplaceEntityRef(Node, '>')
+        else if Node.NodeName = 'lt' then
+	  ReplaceEntityRef(Node, '<')
+        else if Node.NodeName = 'quot' then
+	  ReplaceEntityRef(Node, '"');
+      Node := NextSibling;
+    end;
+
     Result := True;
     Result := True;
   end else
   end else
     Result := False;
     Result := False;
@@ -1031,7 +1093,11 @@ end.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.16  2000-04-20 14:15:45  sg
+  Revision 1.17  2000-07-09 11:39:15  sg
+  * Added support for predefined entities
+  * Improved whitespace handling
+
+  Revision 1.16  2000/04/20 14:15:45  sg
   * Minor bugfixes
   * Minor bugfixes
   * Started support for DOM level 2
   * Started support for DOM level 2