Pārlūkot izejas kodu

* Patch from Sergei Gorelkin
xmlutils.pp:
+ Added THashTable - a simple hashed container with WideString keys.

dom.pp:
* Use the hash table instead of a sorted list for storing document IDs.
* Replaced all TLists by TFPList (which is smaller and faster).
* Fixed TDOMElement.RemoveAttributeNode to throw NOT_FOUND_ERR when
the requested node is not one of the element's attributes.
+ Added node read-only checks where required by the specs, this fixes
about 50 DOM tests.

xmlread.pp:

* Got rid of TXMLCharSource.FReloadHook, the corresponding procedure may
be called directly.
* Used a separate buffer to store the entity value literals, this
enables correct including of external PEs that have a text declaration
at the beginning.
* Some refactoring: ParseAttribute has been split into a separate
procedure, ProcessTextAndRefs was merged into ParseContent.

git-svn-id: trunk@11942 -

michael 17 gadi atpakaļ
vecāks
revīzija
e632e754cf
3 mainītis faili ar 424 papildinājumiem un 178 dzēšanām
  1. 99 81
      packages/fcl-xml/src/dom.pp
  2. 104 97
      packages/fcl-xml/src/xmlread.pp
  3. 221 0
      packages/fcl-xml/src/xmlutils.pp

+ 99 - 81
packages/fcl-xml/src/dom.pp

@@ -38,7 +38,7 @@ unit DOM;
 interface
 
 uses
-  SysUtils, Classes, AVL_Tree;
+  SysUtils, Classes, AVL_Tree, xmlutils;
 
 // -------------------------------------------------------
 //   DOMException
@@ -221,6 +221,8 @@ type
     function GetPrefix: DOMString; virtual;
     procedure SetPrefix(const Value: DOMString); virtual;
     function GetOwnerDocument: TDOMDocument; virtual;
+    procedure SetReadOnly(Value: Boolean);
+    procedure Changing;
   public
     constructor Create(AOwner: TDOMDocument);
     destructor Destroy; override;
@@ -299,7 +301,7 @@ type
   protected
     FNode: TDOMNode;
     FRevision: Integer;
-    FList: TList;
+    FList: TFPList;
     function GetCount: LongWord;
     function GetItem(index: LongWord): TDOMNode;
     procedure BuildList; virtual;
@@ -333,7 +335,7 @@ type
   protected
     FOwner: TDOMNode;
     FNodeType: Integer;
-    FList: TList;
+    FList: TFPList;
     function GetItem(index: LongWord): TDOMNode;
     function GetLength: LongWord;
     function Find(const name: DOMString; out Index: LongWord): Boolean;
@@ -415,7 +417,7 @@ type
 
   TDOMDocument = class(TDOMNode_WithChildren)
   protected
-    FIDList: TList;
+    FIDList: THashTable;
     FRevision: Integer;
     FXML11: Boolean;
     FImplementation: TDOMImplementation;
@@ -427,8 +429,6 @@ type
     function GetOwnerDocument: TDOMDocument; override;
     procedure SetTextContent(const value: DOMString); override;
     function IndexOfNS(const nsURI: DOMString): Integer;
-    function FindID(const aID: DOMString; out Index: LongWord): Boolean;
-    procedure ClearIDList;
     procedure RemoveID(Elem: TDOMElement);
   public
     property DocType: TDOMDocumentType read GetDocType;
@@ -713,16 +713,6 @@ type
 
 implementation
 
-uses
-  xmlutils;
-
-type
-  PIDItem = ^TIDItem;
-  TIDItem = record
-    ID: WideString;
-    Element: TDOMElement;
-  end;
-
 constructor TRefClass.Create;
 begin
   inherited Create;
@@ -858,12 +848,14 @@ end;
 
 function TDOMNode.InsertBefore(NewChild, RefChild: TDOMNode): TDOMNode;
 begin
+  Changing;  // merely to comply with core3/nodeinsertbefore14
   raise EDOMHierarchyRequest.Create('Node.InsertBefore');
   Result:=nil;
 end;
 
 function TDOMNode.ReplaceChild(NewChild, OldChild: TDOMNode): TDOMNode;
 begin
+  Changing;  // merely to comply with core3/nodereplacechild21
   raise EDOMHierarchyRequest.Create('Node.ReplaceChild');
   Result:=nil;
 end;
@@ -1000,6 +992,36 @@ begin
   Result := FOwnerDocument;
 end;
 
+procedure TDOMNode.SetReadOnly(Value: Boolean);
+var
+  child: TDOMNode;
+  attrs: TDOMNamedNodeMap;
+  I: Integer;
+begin
+  if Value then
+    Include(FFlags, nfReadOnly)
+  else
+    Exclude(FFlags, nfReadOnly);
+  child := FirstChild;
+  while Assigned(child) do
+  begin
+    child.SetReadOnly(Value);
+    child := child.NextSibling;
+  end;
+  attrs := Attributes;
+  if Assigned(attrs) then
+  begin
+    for I := 0 to attrs.Length-1 do
+      attrs[I].SetReadOnly(Value);
+  end;
+end;
+
+procedure TDOMNode.Changing;
+begin
+  if nfReadOnly in FFlags then
+    raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'Node.CheckReadOnly');
+end;
+
 function CompareDOMStrings(const s1, s2: DOMPChar; l1, l2: integer): integer;
 var i: integer;
 begin
@@ -1082,6 +1104,7 @@ begin
   Result := NewChild;
   NewChildType := NewChild.NodeType;
 
+  Changing;
   if NewChild.FOwnerDocument <> FOwnerDocument then
   begin
     if (NewChildType <> DOCUMENT_TYPE_NODE) or
@@ -1171,6 +1194,8 @@ end;
 
 function TDOMNode_WithChildren.DetachChild(OldChild: TDOMNode): TDOMNode;
 begin
+  Changing;
+
   if OldChild.ParentNode <> Self then
     raise EDOMNotFound.Create('NodeWC.RemoveChild');
 
@@ -1266,6 +1291,7 @@ end;
 
 procedure TDOMNode_WithChildren.SetTextContent(const AValue: DOMString);
 begin
+  Changing;
   FreeChildren;
   if AValue <> '' then
     AppendChild(FOwnerDocument.CreateTextNode(AValue));
@@ -1295,7 +1321,7 @@ begin
   inherited Create;
   FNode := ANode;
   FRevision := ANode.GetRevision-1;   // force BuildList at first access
-  FList := TList.Create;
+  FList := TFPList.Create;
 end;
 
 destructor TDOMNodeList.Destroy;
@@ -1395,7 +1421,7 @@ begin
   inherited Create;
   FOwner := AOwner;
   FNodeType := ANodeType;
-  FList := TList.Create;
+  FList := TFPList.Create;
 end;
 
 destructor TDOMNamedNodeMap.Destroy;
@@ -1467,7 +1493,9 @@ var
   AttrOwner: TDOMNode;
 begin
   Result := 0;
-  if arg.FOwnerDocument <> FOwner.FOwnerDocument then
+  if nfReadOnly in FOwner.FFlags then
+    Result := NO_MODIFICATION_ALLOWED_ERR
+  else if arg.FOwnerDocument <> FOwner.FOwnerDocument then
     Result := WRONG_DOCUMENT_ERR
   else if arg.NodeType <> FNodeType then
     Result := HIERARCHY_REQUEST_ERR
@@ -1537,6 +1565,8 @@ end;
 
 function TDOMNamedNodeMap.RemoveNamedItem(const name: DOMString): TDOMNode;
 begin
+  if nfReadOnly in FOwner.FFlags then
+    raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'NamedNodeMap.RemoveNamedItem');
   Result := InternalRemove(name);
   if Result = nil then
     raise EDOMNotFound.Create('NamedNodeMap.RemoveNamedItem');
@@ -1544,6 +1574,8 @@ end;
 
 function TDOMNamedNodeMap.RemoveNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode;
 begin
+  if nfReadOnly in FOwner.FFlags then
+    raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'NamedNodeMap.RemoveNamedItemNS');
   // TODO: Implement TDOMNamedNodeMap.RemoveNamedItemNS
   Result := nil;
 end;
@@ -1565,6 +1597,7 @@ end;
 
 procedure TDOMCharacterData.SetNodeValue(const AValue: DOMString);
 begin
+  Changing;
   FNodeValue := AValue;
 end;
 
@@ -1577,11 +1610,13 @@ end;
 
 procedure TDOMCharacterData.AppendData(const arg: DOMString);
 begin
+  Changing;
   FNodeValue := FNodeValue + arg;
 end;
 
 procedure TDOMCharacterData.InsertData(offset: LongWord; const arg: DOMString);
 begin
+  Changing;
   if offset > Length then
     raise EDOMIndexSize.Create('CharacterData.InsertData');
   Insert(arg, FNodeValue, offset+1);
@@ -1589,6 +1624,7 @@ end;
 
 procedure TDOMCharacterData.DeleteData(offset, count: LongWord);
 begin
+  Changing;
   if offset > Length then
     raise EDOMIndexSize.Create('CharacterData.DeleteData');
   Delete(FNodeValue, offset+1, count);
@@ -1685,86 +1721,61 @@ end;
 
 destructor TDOMDocument.Destroy;
 begin
-  ClearIDList;
   FreeAndNil(FIDList);   // set to nil before starting destroying chidlren
   inherited Destroy;
 end;
 
 function TDOMDocument.AddID(Attr: TDOMAttr): Boolean;
 var
-  I: Cardinal;
-  Item: PIDItem;
+  ID: DOMString;
+  Exists: Boolean;
+  p: PHashItem;
 begin
   if FIDList = nil then
-    FIDList := TList.Create;
-  New(Item);
-  Item^.ID := Attr.Value;
-  Item^.Element := Attr.OwnerElement;
-  if not FindID(Item^.ID, I) then
+    FIDList := THashTable.Create(256, False);
+
+  ID := Attr.Value;
+  p := FIDList.FindOrAdd(DOMPChar(ID), Length(ID), Exists);
+  if not Exists then
   begin
-    FIDList.Insert(I, Item);
+    p^.Data := Attr.OwnerElement;
     Result := True;
   end
   else
-  begin
-    Dispose(Item);
     Result := False;
-  end;
 end;
 
 // This shouldn't be called if document has no IDs,
 // or when it is being destroyed
-procedure TDOMDocument.RemoveID(Elem: TDOMElement);
-var
-  I: Integer;
-begin
-  for I := 0 to FIDList.Count-1 do
-  begin
-    if PIDItem(FIDList.List^[I])^.Element = Elem then
-    begin
-      Dispose(PIDItem(FIDList.List^[I]));
-      FIDList.Delete(I);
-      Exit;
-    end;
+// TODO: This could be much faster if removing ID happens
+// upon modification of corresponding attribute value.
+
+type
+  TempRec = record
+    Element: TDOMElement;
+    Entry: PHashItem;
   end;
-end;
 
-function TDOMDocument.FindID(const aID: DOMString; out Index: LongWord): Boolean;
-var
-  L, H, I, C: Integer;
-  P: PIDItem;
+function CheckID(Entry: PHashItem; arg: Pointer): Boolean;
 begin
-  Result := False;
-  L := 0;
-  H := FIDList.Count - 1;
-  while L <= H do
+  if Entry^.Data = TempRec(arg^).Element then
   begin
-    I := (L + H) shr 1;
-    P := PIDItem(FIDList.List^[I]);
-    C := CompareDOMStrings(PWideChar(aID), PWideChar(P^.ID), Length(aID), Length(P^.ID));
-    if C > 0 then L := I + 1 else
-    begin
-      H := I - 1;
-      if C = 0 then
-      begin
-        Result := True;
-        L := I;
-      end;
-    end;
-  end;
-  Index := L;
+    TempRec(arg^).Entry := Entry;
+    Result := False;
+  end
+  else
+    Result := True;
 end;
 
-procedure TDOMDocument.ClearIDList;
+procedure TDOMDocument.RemoveID(Elem: TDOMElement);
 var
-  I: Integer;
+  hr: TempRec;
 begin
-  if Assigned(FIDList) then
-  begin
-    for I := 0 to FIDList.Count-1 do
-      Dispose(PIDItem(FIDList.List^[I]));
-    FIDList.Clear;
-  end;    
+  hr.Element := Elem;
+  hr.Entry := nil;
+  FIDList.ForEach(@CheckID, @hr);
+  if Assigned(hr.Entry) then
+    FIDList.Remove(hr.Entry);
 end;
 
 function TDOMDocument.GetNodeType: Integer;
@@ -1924,13 +1935,10 @@ begin
 end;
 
 function TDOMDocument.GetElementById(const ElementID: DOMString): TDOMElement;
-var
-  I: Cardinal;
 begin
-  if Assigned(FIDList) and FindID(ElementID, I) then
-    Result := PIDItem(FIDList.List^[I])^.Element
-  else
   Result := nil;
+  if Assigned(FIDList) then
+    Result := TDOMElement(FIDList.Get(DOMPChar(ElementID), Length(ElementID)));
 end;
 
 function TDOMDocument.ImportNode(ImportedNode: TDOMNode;
@@ -1980,6 +1988,7 @@ begin
     if Assigned(ent) then
       ent.CloneChildren(Result, Self);
   end;
+  Result.SetReadOnly(True);
 end;
 
 procedure TXMLDocument.SetXMLVersion(const aValue: DOMString);
@@ -2119,6 +2128,7 @@ var
   I: Cardinal;
   attr: TDOMAttr;
 begin
+  Changing;
   if Attributes.Find(name, I) then
     Attr := FAttributes[I] as TDOMAttr
   else
@@ -2132,6 +2142,7 @@ end;
 
 procedure TDOMElement.RemoveAttribute(const name: DOMString);
 begin
+  Changing;
 // (note) NamedNodeMap.RemoveNamedItem can raise NOT_FOUND_ERR and we should not.
   if Assigned(FAttributes) then
     FAttributes.InternalRemove(name).Free;
@@ -2140,6 +2151,7 @@ end;
 procedure TDOMElement.RemoveAttributeNS(const nsURI,
   aLocalName: DOMString);
 begin
+  Changing;
   // TODO: Implement TDOMElement.RemoveAttributeNS
   raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS');
 end;
@@ -2202,14 +2214,18 @@ end;
 
 function TDOMElement.RemoveAttributeNode(OldAttr: TDOMAttr): TDOMAttr;
 begin
+  Changing;
   Result:=nil;
-  if FAttributes=nil then exit;
   // TODO: DOM 2: must raise NOT_FOUND_ERR if OldAttr is not ours.
   //       -- but what is the purpose of return value then?
   // TODO: delegate to TNamedNodeMap?  Nope, it does not have such method
   // (note) one way around is to remove by name
-  if FAttributes.FList.Remove(OldAttr) > -1 then
+  if Assigned(FAttributes) and (FAttributes.FList.Remove(OldAttr) > -1) then
+  begin
     Result := OldAttr;
+  end
+  else
+    raise EDOMNotFound.Create('Element.RemoveAttributeNode');
 end;
 
 function TDOMElement.GetElementsByTagName(const name: DOMString): TDOMNodeList;
@@ -2418,6 +2434,7 @@ begin
   TDOMEntity(Result).FNotationName := FNotationName;
   if deep then
     CloneChildren(Result, aCloneOwner);
+  Result.SetReadOnly(True);
 end;
 
 // -------------------------------------------------------
@@ -2466,6 +2483,7 @@ end;
 
 procedure TDOMProcessingInstruction.SetNodeValue(const AValue: DOMString);
 begin
+  Changing;
   FNodeValue := AValue;
 end;
 

+ 104 - 97
packages/fcl-xml/src/xmlread.pp

@@ -171,7 +171,6 @@ type
     FXML11Rules: Boolean;
     FSystemID: WideString;
     FPublicID: WideString;
-    FReloadHook: procedure of object;
     function GetSystemID: WideString;
     function GetPublicID: WideString;
   protected
@@ -306,6 +305,7 @@ type
     FInsideDecl: Boolean;
     FDocNotValid: Boolean;
     FValue: TWideCharBuf;
+    FEntityValue: TWideCharBuf;
     FName: TWideCharBuf;
     FTokenStart: TLocation;
     FStandalone: Boolean;          // property of Doc ?
@@ -379,14 +379,14 @@ type
     procedure ParseDoctypeDecl;                                         // [28]
     procedure ParseMarkupDecl;                                          // [29]
     procedure ParseElement;                                             // [39]
+    procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
     procedure ParseContent;                                             // [43]
     function  ResolvePredefined: Boolean;
     procedure IncludeEntity(InAttr: Boolean);
     procedure StartPE;
-    function  ParseCharRef: Boolean;                                    // [66]
+    function  ParseCharRef(var ToFill: TWideCharBuf): Boolean;        // [66]
     function  ParseExternalID(out SysID, PubID: WideString;             // [75]
       SysIdOptional: Boolean): Boolean;
-    procedure ProcessTextAndRefs;
 
     procedure BadPENesting(S: TErrorSeverity = esError);
     procedure ParseEntityDecl;
@@ -806,8 +806,8 @@ var
   c: WideChar;
   r: Integer;
 begin
-  if Assigned(FReloadHook) then
-    FReloadHook;
+  if DTDSubsetType = dsInternal then
+    FReader.DTDReloadHook;
   r := FBufEnd - FBuf;
   if r > 0 then
     Move(FBuf^, FBufStart^, r * sizeof(WideChar));
@@ -1260,6 +1260,8 @@ end;
 
 destructor TXMLReader.Destroy;
 begin
+  if Assigned(FEntityValue.Buffer) then
+    FreeMem(FEntityValue.Buffer);
   FreeMem(FName.Buffer);
   FreeMem(FValue.Buffer);
   if Assigned(FSource) then
@@ -1425,7 +1427,7 @@ begin
   Result := True;
 end;
 
-function TXMLReader.ParseCharRef: Boolean;           // [66]
+function TXMLReader.ParseCharRef(var ToFill: TWideCharBuf): Boolean;           // [66]
 var
   Value: Integer;
 begin
@@ -1460,15 +1462,15 @@ begin
     case Value of
       $01..$08, $0B..$0C, $0E..$1F:
         if FXML11 then
-          BufAppend(FValue, WideChar(Value))
+          BufAppend(ToFill, WideChar(Value))
         else
           FatalError('Invalid character reference');
       $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
-        BufAppend(FValue, WideChar(Value));
+        BufAppend(ToFill, WideChar(Value));
       $10000..$10FFFF:
         begin
-          BufAppend(FValue, WideChar($D7C0 + (Value shr 10)));
-          BufAppend(FValue, WideChar($DC00 xor (Value and $3FF)));
+          BufAppend(ToFill, WideChar($D7C0 + (Value shr 10)));
+          BufAppend(ToFill, WideChar($DC00 xor (Value and $3FF)));
         end;
     else
       FatalError('Invalid character reference');
@@ -1495,7 +1497,7 @@ begin
     end
     else
     begin
-      if ParseCharRef or ResolvePredefined then
+      if ParseCharRef(FValue) or ResolvePredefined then
         Continue;
       // have to insert entity or reference
       if FValue.Length > 0 then
@@ -1622,12 +1624,14 @@ begin
       SaveCursor := FCursor;
       FCursor := AEntity;         // build child node tree for the entity
       try
+        AEntity.SetReadOnly(False);
         if InAttr then
           DoParseAttValue(#0)
         else
           DoParseFragment;
         AEntity.FResolved := True;
       finally
+        AEntity.SetReadOnly(True);
         ContextPop;
         FCursor := SaveCursor;
         FValue.Length := 0;
@@ -1672,60 +1676,6 @@ begin
   FHavePERefs := True;
 end;
 
-procedure TXMLReader.ProcessTextAndRefs;
-var
-  nonWs: Boolean;
-begin
-  FValue.Length := 0;
-  nonWs := False;
-  StoreLocation(FTokenStart);
-  while (FCurChar <> '<') and (FCurChar <> #0) do
-  begin
-    if FCurChar <> '&' then
-    begin
-      if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
-        nonWs := True;
-      BufAppend(FValue, FCurChar);
-      if FCurChar = '>' then
-        with FValue do
-          if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
-            FatalError('Literal '']]>'' is not allowed in text', 2);
-      GetChar;
-    end
-    else
-    begin
-      if FState <> rsRoot then
-        FatalError('Illegal at document level');
-
-      if FCurrContentType = ctEmpty then
-          ValidationError('References are illegal in EMPTY elements', []);
-
-      if ParseCharRef or ResolvePredefined then
-        nonWs := True // CharRef to whitespace is not considered whitespace
-      else
-      begin
-        if (nonWs or FPreserveWhitespace) and (FValue.Length > 0)  then
-        begin
-          // 'Reference illegal at root' is checked above, no need to check here
-          DoText(FValue.Buffer, FValue.Length, not nonWs);
-          FValue.Length := 0;
-        end;
-        IncludeEntity(False);
-      end;
-    end;
-  end; // while
-  if FState = rsRoot then
-  begin
-    if (nonWs or FPreserveWhitespace) and (FValue.Length > 0)  then
-    begin
-      DoText(FValue.Buffer, FValue.Length, not nonWs);
-      FValue.Length := 0;
-    end;
-  end
-  else if nonWs then
-    FatalError('Illegal at document level', -1);
-end;
-
 procedure TXMLReader.ExpectAttValue;    // [10]
 var
   Delim: WideChar;
@@ -1955,14 +1905,12 @@ begin
   begin
     BufAllocate(FIntSubset, 256);
     FSource.DTDSubsetType := dsInternal;
-    FSource.FReloadHook := {$IFDEF FPC}@{$ENDIF}DTDReloadHook;
     try
       FDTDStartPos := FSource.FBuf;
       ParseMarkupDecl;
       DTDReloadHook;     // fetch last chunk
       SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
     finally
-      FSource.FReloadHook := nil;
       FreeMem(FIntSubset.Buffer);
       FSource.DTDSubsetType := dsNone;
     end;
@@ -1989,6 +1937,7 @@ begin
   end;
   FCursor := Doc;
   ValidateDTD;
+  FDocType.SetReadOnly(True);
 end;
 
 procedure TXMLReader.ExpectEq;   // [25]
@@ -2324,7 +2273,9 @@ var
   CurrentEntity: TObject;
 begin
   CurrentEntity := FSource.FEntity;
-  FValue.Length := 0;
+  if FEntityValue.Buffer = nil then
+    BufAllocate(FEntityValue, 256);
+  FEntityValue.Length := 0;
   // "Included in literal": process until delimiter hit IN SAME context
   while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do
   if CheckForChar('%') then
@@ -2337,16 +2288,16 @@ begin
   end
   else if FCurChar = '&' then  // CharRefs: include, EntityRefs: bypass
   begin
-    if not ParseCharRef then
+    if not ParseCharRef(FEntityValue) then
     begin
-      BufAppend(FValue, '&');
-      BufAppendChunk(FValue, FName.Buffer, FName.Length);
-      BufAppend(FValue, ';');
+      BufAppend(FEntityValue, '&');
+      BufAppendChunk(FEntityValue, FName.Buffer, FName.Length);
+      BufAppend(FEntityValue, ';');
     end;
   end
   else if FCurChar <> #0 then         // Regular character
   begin
-    BufAppend(FValue, FCurChar);
+    BufAppend(FEntityValue, FCurChar);
     GetChar;
   end
   else if (FSource.FEntity = CurrentEntity) or not ContextPop then         // #0
@@ -2378,6 +2329,7 @@ begin
   end;
 
   Entity := TDOMEntityEx.Create(Doc);
+  Entity.SetReadOnly(True);
   try
     Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
     Entity.FName := ExpectName;
@@ -2392,7 +2344,7 @@ begin
       StoreLocation(Entity.FStartLocation);
       if not ParseEntityDeclValue(Delim) then
         DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
-      SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
+      SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
     end
     else
       if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
@@ -2575,6 +2527,8 @@ begin
 end;
 
 procedure TXMLReader.ParseContent;
+var
+  nonWs: Boolean;
 begin
   repeat
     if FCurChar = '<' then
@@ -2600,7 +2554,56 @@ begin
         RaiseNameNotFound;
     end
     else
-      ProcessTextAndRefs;
+    begin
+      FValue.Length := 0;
+      nonWs := False;
+      StoreLocation(FTokenStart);
+      while (FCurChar <> '<') and (FCurChar <> #0) do
+      begin
+        if FCurChar <> '&' then
+        begin
+          if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
+            nonWs := True;
+          BufAppend(FValue, FCurChar);
+          if FCurChar = '>' then
+          with FValue do
+            if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
+              FatalError('Literal '']]>'' is not allowed in text', 2);
+          GetChar;
+        end
+        else
+        begin
+          if FState <> rsRoot then
+            FatalError('Illegal at document level');
+
+          if FCurrContentType = ctEmpty then
+            ValidationError('References are illegal in EMPTY elements', []);
+
+          if ParseCharRef(FValue) or ResolvePredefined then
+            nonWs := True // CharRef to whitespace is not considered whitespace
+          else
+          begin
+            if (nonWs or FPreserveWhitespace) and (FValue.Length > 0)  then
+            begin
+              // 'Reference illegal at root' is checked above, no need to check here
+              DoText(FValue.Buffer, FValue.Length, not nonWs);
+              FValue.Length := 0;
+            end;
+            IncludeEntity(False);
+          end;
+        end;
+      end; // while
+      if FState = rsRoot then
+      begin
+        if (nonWs or FPreserveWhitespace) and (FValue.Length > 0)  then
+        begin
+          DoText(FValue.Buffer, FValue.Length, not nonWs);
+          FValue.Length := 0;
+        end;
+      end
+      else if nonWs then
+        FatalError('Illegal at document level', -1);
+    end;
   until FCurChar = #0;
 end;
 
@@ -2610,8 +2613,6 @@ var
   NewElem: TDOMElement;
   ElDef: TDOMElementDef;
   IsEmpty: Boolean;
-  attr: TDOMAttr;
-  OldAttr: TDOMNode;
 begin
   if FState > rsRoot then
     FatalError('Only one top-level element allowed', FName.Length)
@@ -2639,28 +2640,15 @@ begin
     ValidationError('Element ''%s'' is not allowed in this context',[NewElem.TagName], FName.Length);
 
   IsEmpty := False;
-  if SkipS then
+  while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
   begin
-    while (FCurChar <> '>') and (FCurChar <> '/') do
-    begin
-      CheckName;
-      attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
-
-      // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
-      OldAttr := NewElem.Attributes.SetNamedItem(Attr);
-      if Assigned(OldAttr) then
-      begin
-        OldAttr.Free;
-        FatalError('Duplicate attribute', FName.Length);
-      end;
-      ExpectEq;
-      FCursor := attr;
-      ExpectAttValue;
-      if (FCurChar <> '>') and (FCurChar <> '/') then
-        SkipS(True);
-    end;   // while
+    SkipS(True);
+    if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
+      Break;
+    ParseAttribute(NewElem, ElDef);
   end;
-  if FCurChar = '/' then
+
+  if FSource.FBuf^ = '/' then
   begin
     IsEmpty := True;
     GetChar;
@@ -2706,6 +2694,25 @@ begin
   PopVC;
 end;
 
+procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
+var
+  attr: TDOMAttr;
+  OldAttr: TDOMNode;
+begin
+  CheckName;
+  attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
+  // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
+  OldAttr := Elem.Attributes.SetNamedItem(Attr);
+  if Assigned(OldAttr) then
+  begin
+    OldAttr.Free;
+    FatalError('Duplicate attribute', FName.Length);
+  end;
+  ExpectEq;
+  FCursor := attr;
+  ExpectAttValue;
+end;
+
 procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
 var
   w: PForwardRef;

+ 221 - 0
packages/fcl-xml/src/xmlutils.pp

@@ -30,6 +30,42 @@ function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean
 function IsValidXmlEncoding(const Value: WideString): Boolean;
 function Xml11NamePages: PByteArray;
 procedure NormalizeSpaces(var Value: WideString);
+function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
+
+{ a simple hash table with WideString keys }
+
+type
+  PPHashItem = ^PHashItem;
+  PHashItem = ^THashItem;
+  THashItem = record
+    Key: WideString;
+    HashValue: LongWord;
+    Next: PHashItem;
+    Data: TObject;
+  end;
+
+  THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
+
+  THashTable = class(TObject)
+  private
+    FCount: LongWord;
+    FBucketCount: LongWord;
+    FBucket: PPHashItem;
+    FOwnsObjects: Boolean;
+    function Lookup(Key: PWideChar; KeyLength: Integer; var Found: Boolean; CanCreate: Boolean): PHashItem;
+    procedure Resize(NewCapacity: LongWord);
+  public
+    constructor Create(InitSize: Integer; OwnObjects: Boolean);
+    destructor Destroy; override;
+    procedure Clear;
+    function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
+    function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
+    function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
+    function Get(Key: PWideChar; KeyLen: Integer): TObject;
+    function Remove(Entry: PHashItem): Boolean;
+    procedure ForEach(proc: THashForEach; arg: Pointer);
+    property Count: LongWord read FCount;
+  end;
 
 {$i names.inc}
 
@@ -239,6 +275,191 @@ begin
   end;
 end;
 
+function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
+begin
+  Result := InitValue;
+  while KeyLen <> 0 do
+  begin
+    Result := Result * $F4243 xor ord(Key^);
+    Inc(Key);
+    Dec(KeyLen);
+  end;
+end;
+
+function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
+begin
+  Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
+end;
+
+{ THashTable }
+
+constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
+var
+  I: Integer;
+begin
+  inherited Create;
+  FOwnsObjects := OwnObjects;
+  I := 256;
+  while I < InitSize do I := I shl 1;
+  FBucketCount := I;
+  FBucket := AllocMem(I * sizeof(PHashItem));
+end;
+
+destructor THashTable.Destroy;
+begin
+  Clear;
+  FreeMem(FBucket);
+  inherited Destroy;
+end;
+
+procedure THashTable.Clear;
+var
+  I: Integer;
+  item, next: PHashItem;
+begin
+  for I := 0 to FBucketCount-1 do
+  begin
+    item := FBucket[I];
+    while Assigned(item) do
+    begin
+      next := item^.Next;
+      if FOwnsObjects then
+        item^.Data.Free;
+      Dispose(item);
+      item := next;
+    end;
+  end;
+  FillChar(FBucket^, FBucketCount * sizeof(PHashItem), 0);
+end;
+
+function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
+var
+  Dummy: Boolean;
+begin
+  Result := Lookup(Key, KeyLen, Dummy, False);
+end;
+
+function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
+  var Found: Boolean): PHashItem;
+begin
+  Result := Lookup(Key, KeyLen, Found, True);
+end;
+
+function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
+var
+  Dummy: Boolean;
+begin
+  Result := Lookup(Key, KeyLen, Dummy, True);
+end;
+
+function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
+var
+  e: PHashItem;
+  Dummy: Boolean;
+begin
+  e := Lookup(Key, KeyLen, Dummy, False);
+  if Assigned(e) then
+    Result := e^.Data
+  else
+    Result := nil;  
+end;
+
+function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
+  var Found: Boolean; CanCreate: Boolean): PHashItem;
+var
+  Entry: PPHashItem;
+  h: LongWord;
+begin
+  h := Hash(0, Key, KeyLength);
+  Entry := @FBucket[h mod FBucketCount];
+  while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
+    Entry := @Entry^^.Next;
+  Found := Assigned(Entry^);
+  if Found or (not CanCreate) then
+  begin
+    Result := Entry^;
+    Exit;
+  end;
+  if FCount > FBucketCount then  { arbitrary limit, probably too high }
+  begin
+    Resize(FBucketCount * 2);
+    Result := Lookup(Key, KeyLength, Found, CanCreate);
+  end
+  else
+  begin
+    New(Result);
+    SetString(Result^.Key, Key, KeyLength);
+    Result^.HashValue := h;
+    Result^.Data := nil;
+    Result^.Next := nil;
+    Inc(FCount);
+    Entry^ := Result;
+  end;
+end;
+
+procedure THashTable.Resize(NewCapacity: LongWord);
+var
+  p, chain: PPHashItem;
+  i: Integer;
+  e, n: PHashItem;
+begin
+  p := AllocMem(NewCapacity * sizeof(PHashItem));
+  for i := 0 to FBucketCount-1 do
+  begin
+    e := FBucket[i];
+    while Assigned(e) do
+    begin
+      chain := @p[e^.HashValue mod NewCapacity];
+      n := e^.Next;
+      e^.Next := chain^;
+      chain^ := e;
+      e := n;
+    end;
+  end;
+  FBucketCount := NewCapacity;
+  FreeMem(FBucket);
+  FBucket := p;
+end;
+
+function THashTable.Remove(Entry: PHashItem): Boolean;
+var
+  chain: PPHashItem;
+begin
+  chain := @FBucket[Entry^.HashValue mod FBucketCount];
+  while Assigned(chain^) do
+  begin
+    if chain^ = Entry then
+    begin
+      chain^ := Entry^.Next;
+      if FOwnsObjects then
+        Entry^.Data.Free;
+      Dispose(Entry);
+      Dec(FCount);
+      Result := True;
+      Exit;
+    end;
+    chain := @chain^^.Next;
+  end;
+  Result := False;
+end;
+
+procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
+var
+  i: Integer;
+  e: PHashItem;
+begin
+  for i := 0 to FBucketCount-1 do
+  begin
+    e := FBucket[i];
+    while Assigned(e) do
+    begin
+      if not proc(e, arg) then
+        Exit;
+      e := e^.Next;
+    end;
+  end;
+end;
+
 initialization
 
 finalization