Browse Source

--- Merging r15414 into '.':
U packages/fcl-xml/src/sax_xml.pp
U packages/fcl-xml/src/sax_html.pp
U packages/fcl-xml/src/sax.pp
--- Merging r15438 into '.':
U packages/fcl-xml/tests/xpathts.pp
U packages/fcl-xml/src/xpath.pp
--- Merging r15439 into '.':
G packages/fcl-xml/tests/xpathts.pp
G packages/fcl-xml/src/xpath.pp
--- Merging r15442 into '.':
U packages/fcl-xml/tests/xmlts.pp
U packages/fcl-xml/src/xmlread.pp
--- Merging r15443 into '.':
U packages/fcl-xml/tests/api.xml
U packages/fcl-xml/src/dom_html.pp
U packages/fcl-xml/src/dom.pp
U packages/fcl-xml/src/xmlutils.pp
G packages/fcl-xml/src/xmlread.pp
--- Merging r15551 into '.':
G packages/fcl-xml/src/sax_xml.pp
G packages/fcl-xml/src/sax_html.pp
--- Merging r15564 into '.':
G packages/fcl-xml/src/sax_html.pp
G packages/fcl-xml/src/xmlutils.pp
--- Merging r15570 into '.':
G packages/fcl-xml/src/sax_xml.pp
G packages/fcl-xml/src/sax_html.pp
--- Merging r15574 into '.':
G packages/fcl-xml/src/sax_html.pp
--- Merging r15628 into '.':
G packages/fcl-xml/src/xpath.pp
--- Merging r15632 into '.':
G packages/fcl-xml/src/xpath.pp
--- Merging r15638 into '.':
A packages/fcl-xml/src/xpathkw.inc
G packages/fcl-xml/src/xpath.pp
--- Merging r15639 into '.':
G packages/fcl-xml/tests/xpathts.pp
G packages/fcl-xml/src/xpath.pp
--- Merging r15641 into '.':
G packages/fcl-xml/src/xpath.pp
--- Merging r15650 into '.':
G packages/fcl-xml/tests/xpathts.pp
--- Merging r15652 into '.':
G packages/fcl-xml/src/xpath.pp
--- Merging r15654 into '.':
G packages/fcl-xml/src/xpath.pp
--- Merging r15717 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r15718 into '.':
G packages/fcl-xml/src/xmlutils.pp
G packages/fcl-xml/src/xmlread.pp
--- Merging r15736 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r15737 into '.':
G packages/fcl-xml/src/xmlutils.pp
G packages/fcl-xml/src/xmlread.pp
--- Merging r15738 into '.':
G packages/fcl-xml/src/sax_xml.pp
--- Merging r15755 into '.':
U packages/fcl-xml/src/htmwrite.pp
U packages/fcl-xml/src/xmlwrite.pp
--- Merging r15974 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r15975 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r15984 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r16010 into '.':
U packages/fcl-xml/tests/extras.pp
G packages/fcl-xml/src/dom.pp
--- Merging r16014 into '.':
G packages/fcl-xml/src/dom.pp
--- Merging r16046 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r16048 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r16049 into '.':
G packages/fcl-xml/src/sax_xml.pp
--- Merging r16060 into '.':
G packages/fcl-xml/src/dom.pp
--- Merging r16066 into '.':
G packages/fcl-xml/src/dom.pp
--- Merging r16070 into '.':
G packages/fcl-xml/src/xmlread.pp
--- Merging r16072 into '.':
G packages/fcl-xml/src/dom.pp

# revisions: 15414,15438,15439,15442,15443,15551,15564,15570,15574,15628,15632,15638,15639,15641,15650,15652,15654,15717,15718,15736,15737,15738,15755,15974,15975,15984,16010,16014,16046,16048,16049,16060,16066,16070,16072
------------------------------------------------------------------------
r15414 | sergei | 2010-06-12 18:51:10 +0200 (Sat, 12 Jun 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/sax.pp
M /trunk/packages/fcl-xml/src/sax_html.pp
M /trunk/packages/fcl-xml/src/sax_xml.pp

+ Implement TSAXReader.Abort method, which can be used to end the parsing prematurely (Mantis #16703)
------------------------------------------------------------------------
------------------------------------------------------------------------
r15438 | sergei | 2010-06-14 19:21:35 +0200 (Mon, 14 Jun 2010) | 3 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp
M /trunk/packages/fcl-xml/tests/xpathts.pp

* Fixed XPath functions name() and local-name(), which should behave different from DOM properties of the same names. name() is empty for text, comment and document nodes. local-name() is the same as name() for non-prefixed attributes and processing instructions.
+ Another dozen of tests.

------------------------------------------------------------------------
------------------------------------------------------------------------
r15439 | sergei | 2010-06-14 23:15:53 +0200 (Mon, 14 Jun 2010) | 3 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp
M /trunk/packages/fcl-xml/tests/xpathts.pp

* XPath, fixed parent axis of attribute nodes: it must consist of the owner element of an attribute.
+ Tests

------------------------------------------------------------------------
------------------------------------------------------------------------
r15442 | sergei | 2010-06-15 18:13:42 +0200 (Tue, 15 Jun 2010) | 2 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp
M /trunk/packages/fcl-xml/tests/xmlts.pp

* xmlread.pp: when IgnoreComments=True, merge together text nodes that precede and follow the skipped comment. With this fix, the reader finally produces normalized documents in all modes, so remove the corresponding cheat from testing program (xmlts.pp).

------------------------------------------------------------------------
------------------------------------------------------------------------
r15443 | sergei | 2010-06-15 21:36:22 +0200 (Tue, 15 Jun 2010) | 6 lines
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp
M /trunk/packages/fcl-xml/src/dom_html.pp
M /trunk/packages/fcl-xml/src/xmlread.pp
M /trunk/packages/fcl-xml/src/xmlutils.pp
M /trunk/packages/fcl-xml/tests/api.xml

+ Enumeration type for XML version
+ Implemented DOM level 3 properties xmlVersion and xmlEncoding for both TDOMDocument and TDOMEntity classes. Also declared property inputEncoding for these classes.
* Non-conformant TXMLDocument.Encoding has been deprecated; it is now an alias for xmlEncoding property.
* TDOMDocument and TDOMEntity now share a common ancestor, TDOMNode_TopLevel.
* api.xml: enabled testing for the new properties

------------------------------------------------------------------------
------------------------------------------------------------------------
r15551 | sergei | 2010-07-11 15:21:08 +0200 (Sun, 11 Jul 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/sax_html.pp
M /trunk/packages/fcl-xml/src/sax_xml.pp

* The tag name may be followed by any whitespace char, not only #32 (Mantis #16906)
------------------------------------------------------------------------
------------------------------------------------------------------------
r15564 | sergei | 2010-07-14 15:54:09 +0200 (Wed, 14 Jul 2010) | 2 lines
Changed paths:
M /trunk/packages/fcl-xml/src/sax_html.pp
M /trunk/packages/fcl-xml/src/xmlutils.pp

* HTML parser: in case of malformed input, do not create attributes with invalid names (Mantis #16916).
* Along the way, eliminated one layer of useless converting strings from wide to ansi and back.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15570 | sergei | 2010-07-14 16:35:13 +0200 (Wed, 14 Jul 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/sax_html.pp
M /trunk/packages/fcl-xml/src/sax_xml.pp

* The fix in r15551 was not entirely correct. Must handle the case when string ends with a single whitespace.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15574 | sergei | 2010-07-15 06:54:14 +0200 (Thu, 15 Jul 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/sax_html.pp

* HTML reader, attempt recovery from misplaced '<' characters (another part of Mantis #16916).
------------------------------------------------------------------------
------------------------------------------------------------------------
r15628 | sergei | 2010-07-24 05:21:40 +0200 (Sat, 24 Jul 2010) | 4 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp

* TXPathScanner.ParseStep split into two functions in order to reduce complexity and improve readability.
* Also modified it so the data is accumulated in local vars, and resulting TStep objects are created only after the parsing is successfully complete.
* TXPathScanner.ParsePrimaryExpr: eliminated variable.

------------------------------------------------------------------------
------------------------------------------------------------------------
r15632 | sergei | 2010-07-25 01:15:35 +0200 (Sun, 25 Jul 2010) | 4 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp

XPath improvements:
- Deleted TXPathLocationPathNode, it was too much overhead to store a single bit of information. The path root (if any) is now represented by TStep node with Axis=axisRoot.
* Changed TStep linkage from 'right' to 'left', this is consistent with the way of parsing expressions and considerably simplifies evaluation.
* Fixed ParsePathExpr procedure so it no longer accepts empty/truncated expressions.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15638 | sergei | 2010-07-26 15:49:46 +0200 (Mon, 26 Jul 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp
A /trunk/packages/fcl-xml/src/xpathkw.inc

XPath, use a perfect hash to recognize all possible keywords.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15639 | sergei | 2010-07-26 18:35:35 +0200 (Mon, 26 Jul 2010) | 6 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp
M /trunk/packages/fcl-xml/tests/xpathts.pp

* XPath, fixed parsing of 'prefix:*' node tests:
o The prefix to resolve should not include following ':*' characters
o NextToken changes CurTokenString, so NextToken must be after reading CurTokenString.
o Added a test for that
* XPath test suite, fixed comparison of numeric result (it is quite tricky in presence of NaNs).

------------------------------------------------------------------------
------------------------------------------------------------------------
r15641 | sergei | 2010-07-26 21:30:26 +0200 (Mon, 26 Jul 2010) | 2 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp

* XPath, change parsing of function call so that function arguments are parsed before creating the function node. This way function nodes can validate their arguments at creation time.
* also changed function argument container type from TFPList to dynamic array.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15650 | sergei | 2010-07-28 14:24:27 +0200 (Wed, 28 Jul 2010) | 3 lines
Changed paths:
M /trunk/packages/fcl-xml/tests/xpathts.pp

+ XPath test suite, implemented possibility to use a specified context node instead of fixed root element.
* enabled expressions which start with a FilterNode.
+ added a test for ancestor:: axis of attribute.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15652 | sergei | 2010-07-28 16:25:08 +0200 (Wed, 28 Jul 2010) | 5 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp

XPath: reworked step processing:
* For ancestor and ancestor-or-self axes, added checks for attribute nodes similar to parent axis.
* For reverse axes, collect and filter nodes in 'natural' (i.e. reversed) order, and only then reverse order while adding to result node set. This is much simpler to implement.
* Fixed memory leak (not destroying TXPathFilterNode.FExpr)

------------------------------------------------------------------------
------------------------------------------------------------------------
r15654 | sergei | 2010-07-28 17:25:19 +0200 (Wed, 28 Jul 2010) | 2 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xpath.pp

XPath: now when predicate filtering is no longer dependent on axis, it becomes possible to use the same code for TStep and TXPathFilterNode. Inherited TStep from TXPathFilterNode, removed duplicating code.

------------------------------------------------------------------------
------------------------------------------------------------------------
r15717 | sergei | 2010-08-06 09:44:51 +0200 (Fri, 06 Aug 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

- xmlread.pp, removed redundant 'var' modifiers from TStream arguments of less common used functions. This should have been done a long ago, as part of r11788.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15718 | sergei | 2010-08-06 10:00:45 +0200 (Fri, 06 Aug 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp
M /trunk/packages/fcl-xml/src/xmlutils.pp

* Moved TWideCharBuf and associated functions from xmlread.pp to xmlutils.pp, so this stuff can be reused by other code.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15736 | sergei | 2010-08-08 04:47:04 +0200 (Sun, 08 Aug 2010) | 4 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

xmlread.pp:
- removed an unused field FDocNotValid
- optimized away a variable in ParseEndTag().

------------------------------------------------------------------------
------------------------------------------------------------------------
r15737 | sergei | 2010-08-08 05:27:31 +0200 (Sun, 08 Aug 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp
M /trunk/packages/fcl-xml/src/xmlutils.pp

xmlread.pp, moved decoder procedures to xmlutils.pp, so they can be reused by other code.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15738 | sergei | 2010-08-08 07:13:45 +0200 (Sun, 08 Aug 2010) | 6 lines
Changed paths:
M /trunk/packages/fcl-xml/src/sax_xml.pp

sax_xml.pp:
* Applied counterpart of sax_html.pp r15564, eliminating redundant wide-to-ansi conversions;
* AStart parameter of IgnorableWhitespace event should be zero, not 1;
* XML is case-sensitive, removed calls to lowercase();
* Accumulate token characters in FRawTokenText, then convert it all at once to SAXString. Without it, handling multi-byte encodings like UTF-8 was impossible, because it was converting by individual bytes which always resulted in errors. Provides a partial fix for Mantis #16732. Also provides a single location to insert a proper decoding procedure.

------------------------------------------------------------------------
------------------------------------------------------------------------
r15755 | sergei | 2010-08-09 00:25:37 +0200 (Mon, 09 Aug 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/htmwrite.pp
M /trunk/packages/fcl-xml/src/xmlwrite.pp

xmlwrite.pp, htmwrite.pp: replaced inheritance by composition. TxxxWriter always writes to a TStream (or its descendant), and for text files we use a simple TStream-compatible wrapper.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15974 | sergei | 2010-09-13 09:07:53 +0200 (Mon, 13 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

* xmlread.pp, a misplaced assignment could cause a #13 character to pass non-normalized to #10 if it was the last character in input stream.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15975 | sergei | 2010-09-13 18:07:50 +0200 (Mon, 13 Sep 2010) | 6 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

* xmlread.pp: refactoring, no function chages:
* Renamed ParseElement to ParseStartTag to reflect its actual functionality
* Changed ParseQuantity into function returning a enumeration type
* Simplified TXMLDecodingSource.NewLine
* Changed the main loop (ParseContent) so that multiple calls to DoText() are replaced by a single call.
- Removed "if FCDSectinsAsText" branch in DoCDSect. It is obsolete since this case is handled in ParseContent.
------------------------------------------------------------------------
------------------------------------------------------------------------
r15984 | sergei | 2010-09-14 14:53:55 +0200 (Tue, 14 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

* xmlread.pp, removed FRecognizePE field. Its functionality is replaced by checking other fields.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16010 | sergei | 2010-09-19 16:11:20 +0200 (Sun, 19 Sep 2010) | 6 lines
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp
M /trunk/packages/fcl-xml/tests/extras.pp

dom.pp:
* r15443 changed the node class with biggest instance size from TDOMAttr to TDOMEntity. Changed that in TDOMDocument constructor, too. Otherwise nodes created with TDOMEntity.CloneNode will leak (they cannot be inserted into tree).
* Do not restore default attributes during document destruction.
* Also added a general check that raises exception if someone tries to allocate from node pool during destruction.
* Fixed replaceChild() method: it was deleting node if that node was replaced by itself.
+ Test for replaceChild.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16014 | sergei | 2010-09-19 18:46:35 +0200 (Sun, 19 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp

* dom.pp, removed TDOMAttr.FOwnerElement field, storing owner element in FParentNode. This reduces memory requirements.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16046 | sergei | 2010-09-26 06:50:55 +0200 (Sun, 26 Sep 2010) | 4 lines
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

XML reader:
* Parse entities by creating another instance of TXMLReader. This is much more straightforward than saving/restoring context of the existing reader.
* Fixed version setting logic so that ReadXMLFragment procedures are now suitable to read entities:
accept streams conforming to extParsedEnt [78], correctly read fragments into documents having version=1.1.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16048 | sergei | 2010-09-26 21:50:37 +0200 (Sun, 26 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

XML reader, forgot to pass current settings to the nested reader (although it doesn't seem to change test suite behavior).
------------------------------------------------------------------------
------------------------------------------------------------------------
r16049 | sergei | 2010-09-26 21:54:20 +0200 (Sun, 26 Sep 2010) | 4 lines
Changed paths:
M /trunk/packages/fcl-xml/src/sax_xml.pp

sax_xml.pp: cleanup:
- remove duplicated code from constructor
- use CreateXXXBuf to create DOM nodes, eliminates temporary string vars.

------------------------------------------------------------------------
------------------------------------------------------------------------
r16060 | sergei | 2010-09-29 00:46:05 +0200 (Wed, 29 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp

* dom.pp, moved default attribute handling code from TDOMNamedNodeMap to TAttributeMap.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16066 | sergei | 2010-09-30 01:10:58 +0200 (Thu, 30 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp

* dom.pp, moved remaining TDOMAttr-specific code from TDOMNamedNodeMap to TAttributeMap.
------------------------------------------------------------------------
------------------------------------------------------------------------
r16070 | sergei | 2010-09-30 17:30:49 +0200 (Thu, 30 Sep 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/xmlread.pp

* xmlread.pp, a bit better separation of DOM-specific and DOM-independent code in ParseComment() and ParsePI().
------------------------------------------------------------------------
------------------------------------------------------------------------
r16072 | sergei | 2010-10-01 13:05:30 +0200 (Fri, 01 Oct 2010) | 1 line
Changed paths:
M /trunk/packages/fcl-xml/src/dom.pp

* dom.pp, fixed TDOMText.SplitText() to use custom allocation, so newly created node won't leak if initial node has no parent .
------------------------------------------------------------------------

git-svn-id: branches/fixes_2_4@16396 -

marco 14 years ago
parent
commit
4761ab0052

+ 1 - 0
.gitattributes

@@ -1804,6 +1804,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
 packages/fcl-xml/src/xpath.pp svneol=native#text/plain
+packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
 packages/fcl-xml/tests/README.txt svneol=native#text/plain
 packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
 packages/fcl-xml/tests/api.xml svneol=native#text/plain

+ 178 - 101
packages/fcl-xml/src/dom.pp

@@ -203,6 +203,7 @@ type
 
     function  GetNodeName: DOMString; virtual; abstract;
     function  GetNodeValue: DOMString; virtual;
+    function  GetParentNode: TDOMNode; virtual;
     procedure SetNodeValue(const AValue: DOMString); virtual;
     function  GetFirstChild: TDOMNode; virtual;
     function  GetLastChild: TDOMNode; virtual;
@@ -229,7 +230,7 @@ type
     property NodeName: DOMString read GetNodeName;
     property NodeValue: DOMString read GetNodeValue write SetNodeValue;
     property NodeType: Integer read GetNodeType;
-    property ParentNode: TDOMNode read FParentNode;
+    property ParentNode: TDOMNode read GetParentNode;
     property FirstChild: TDOMNode read GetFirstChild;
     property LastChild: TDOMNode read GetLastChild;
     property ChildNodes: TDOMNodeList read GetChildNodes;
@@ -293,6 +294,19 @@ type
     procedure InternalAppend(NewChild: TDOMNode);
   end;
 
+  { A common ancestor for Document and Entity nodes. }
+
+  TDOMNode_TopLevel = class(TDOMNode_WithChildren)
+  protected
+    FInputEncoding: DOMString;
+    FXMLEncoding: DOMString;
+    FURI: DOMString;
+    FXMLVersion: TXMLVersion;
+    function GetXMLVersion: DOMString;
+  public
+    property InputEncoding: DOMString read FInputEncoding;
+    property XMLEncoding: DOMString read FXMLEncoding;
+  end;
 
 // -------------------------------------------------------
 //   NodeList
@@ -347,16 +361,15 @@ type
     function GetItem(index: LongWord): TDOMNode;
     function GetLength: LongWord;
     function Find(const name: DOMString; out Index: LongWord): Boolean;
-    function Delete(index: LongWord): TDOMNode;
-    procedure RestoreDefault(const name: DOMString);
+    function Delete(index: LongWord): TDOMNode; virtual;
     function InternalRemove(const name: DOMString): TDOMNode;
-    function ValidateInsert(arg: TDOMNode): Integer;
+    function ValidateInsert(arg: TDOMNode): Integer; virtual;
   public
     constructor Create(AOwner: TDOMNode; ANodeType: Integer);
     destructor Destroy; override;
 
     function GetNamedItem(const name: DOMString): TDOMNode;
-    function SetNamedItem(arg: TDOMNode): TDOMNode;
+    function SetNamedItem(arg: TDOMNode): TDOMNode; virtual;
     function RemoveNamedItem(const name: DOMString): TDOMNode;
     // Introduced in DOM Level 2:
     function getNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode; virtual;
@@ -426,11 +439,10 @@ type
   // TODO: to be replaced by more suitable container
   TNamespaces = array of DOMString;
 
-  TDOMDocument = class(TDOMNode_WithChildren)
+  TDOMDocument = class(TDOMNode_TopLevel)
   protected
     FIDList: THashTable;
     FRevision: Integer;
-    FXML11: Boolean;
     FImplementation: TDOMImplementation;
     FNamespaces: TNamespaces;
     FNames: THashTable;
@@ -438,7 +450,6 @@ type
     FNodeLists: THashTable;
     FMaxPoolSize: Integer;
     FPools: PNodePoolArray;
-    FDocumentURI: DOMString;
     function GetDocumentElement: TDOMElement;
     function GetDocType: TDOMDocumentType;
     function GetNodeType: Integer; override;
@@ -451,6 +462,7 @@ type
     function GetElementList(aNode: TDOMNode; const nsURI, aLocalName: DOMString; UseNS: Boolean): TDOMNodeList;
     procedure NodeListDestroyed(aList: TDOMNodeList);
     function Alloc(AClass: TDOMNodeClass): TDOMNode;
+    procedure SetXMLVersion(const aValue: DOMString); virtual;
   public
     function IndexOfNS(const nsURI: DOMString; AddIfAbsent: Boolean = False): Integer;
     function InsertBefore(NewChild, RefChild: TDOMNode): TDOMNode; override;
@@ -484,7 +496,8 @@ type
     function GetElementsByTagNameNS(const nsURI, alocalName: DOMString): TDOMNodeList;
     function GetElementById(const ElementID: DOMString): TDOMElement;
     // DOM level 3:
-    property documentURI: DOMString read FDocumentURI write FDocumentURI;
+    property documentURI: DOMString read FURI write FURI;
+    property XMLVersion: DOMString read GetXMLVersion write SetXMLVersion;
     // Extensions to DOM interface:
     constructor Create;
     destructor Destroy; override;
@@ -493,18 +506,19 @@ type
   end;
 
   TXMLDocument = class(TDOMDocument)
-  private
-    FXMLVersion: DOMString;
-    procedure SetXMLVersion(const aValue: DOMString);
+  protected
+    procedure SetXMLVersion(const aValue: DOMString); override;
   public
     // These fields are extensions to the DOM interface:
-    Encoding, StylesheetType, StylesheetHRef: DOMString;
+    StylesheetType, StylesheetHRef: DOMString;
 
+    constructor Create;
     function CreateCDATASection(const data: DOMString): TDOMCDATASection; override;
     function CreateProcessingInstruction(const target, data: DOMString):
       TDOMProcessingInstruction; override;
     function CreateEntityReference(const name: DOMString): TDOMEntityReference; override;
-    property XMLVersion: DOMString read FXMLVersion write SetXMLVersion;
+    // non-compliant symbol, superseded by XMLEncoding, to be phased out
+    property Encoding: DOMString read FXMLEncoding write FXMLEncoding; deprecated;
   end;
 
   // This limits number of namespaces per document to 65535,
@@ -549,12 +563,13 @@ type
 
   TDOMAttr = class(TDOMNode_NS)
   protected
-    FOwnerElement: TDOMElement;
     FDataType: TAttrDataType;
     function  GetNodeValue: DOMString; override;
     function GetNodeType: Integer; override;
+    function GetParentNode: TDOMNode; override;
     function GetSpecified: Boolean;
     function GetIsID: Boolean;
+    function GetOwnerElement: TDOMElement;
     procedure SetNodeValue(const AValue: DOMString); override;
   public
     destructor Destroy; override;
@@ -562,7 +577,7 @@ type
     property Name: DOMString read GetNodeName;
     property Specified: Boolean read GetSpecified;
     property Value: DOMString read GetNodeValue write SetNodeValue;
-    property OwnerElement: TDOMElement read FOwnerElement;
+    property OwnerElement: TDOMElement read GetOwnerElement;
     property IsID: Boolean read GetIsID;
     // extensions
     // TODO: this is to be replaced with DOM 3 TypeInfo
@@ -701,7 +716,7 @@ type
 //   Entity
 // -------------------------------------------------------
 
-  TDOMEntity = class(TDOMNode_WithChildren)
+  TDOMEntity = class(TDOMNode_TopLevel)
   protected
     FName: DOMString;
     FPublicID, FSystemID, FNotationName: DOMString;
@@ -712,6 +727,7 @@ type
     property PublicID: DOMString read FPublicID;
     property SystemID: DOMString read FSystemID;
     property NotationName: DOMString read FNotationName;
+    property XMLVersion: DOMString read GetXMLVersion;
   end;
 
 
@@ -817,7 +833,12 @@ type
     function FindNS(nsIndex: Integer; const aLocalName: DOMString;
       out Index: LongWord): Boolean;
     function InternalRemoveNS(const nsURI, aLocalName: DOMString): TDOMNode;
+    procedure RestoreDefault(const name: DOMString);
+  protected
+    function Delete(index: LongWord): TDOMNode; override;
+    function ValidateInsert(arg: TDOMNode): Integer; override;
   public
+    function setNamedItem(arg: TDOMNode): TDOMNode; override;
     function getNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode; override;
     function setNamedItemNS(arg: TDOMNode): TDOMNode; override;
     function removeNamedItemNS(const namespaceURI,localName: DOMString): TDOMNode; override;
@@ -922,6 +943,11 @@ begin
   Result := '';
 end;
 
+function TDOMNode.GetParentNode: TDOMNode;
+begin
+  Result := FParentNode;
+end;
+
 procedure TDOMNode.SetNodeValue(const AValue: DOMString);
 begin
   // do nothing
@@ -1246,7 +1272,7 @@ begin
   case NodeType of
   // !! Incomplete !!
     DOCUMENT_NODE:
-      result := TDOMDocument(Self).FDocumentURI;
+      result := TDOMDocument(Self).FURI;
     PROCESSING_INSTRUCTION_NODE:
       if Assigned(ParentNode) then
         result := ParentNode.GetBaseURI
@@ -1388,7 +1414,7 @@ function TDOMNode_WithChildren.ReplaceChild(NewChild, OldChild: TDOMNode):
   TDOMNode;
 begin
   InsertBefore(NewChild, OldChild);
-  if Assigned(OldChild) then
+  if Assigned(OldChild) and (OldChild <> NewChild) then
     RemoveChild(OldChild);
   Result := OldChild;
 end;
@@ -1650,7 +1676,7 @@ var
   I: Integer;
 begin
   for I := FList.Count-1 downto 0 do
-    TDOMNode(FList[I]).Free;
+    TDOMNode(FList.List^[I]).Free;
   FList.Free;
   inherited Destroy;
 end;
@@ -1710,8 +1736,6 @@ begin
 end;
 
 function TDOMNamedNodeMap.ValidateInsert(arg: TDOMNode): Integer;
-var
-  AttrOwner: TDOMNode;
 begin
   Result := 0;
   if nfReadOnly in FOwner.FFlags then
@@ -1719,13 +1743,7 @@ begin
   else if arg.FOwnerDocument <> FOwner.FOwnerDocument then
     Result := WRONG_DOCUMENT_ERR
   else if arg.NodeType <> FNodeType then
-    Result := HIERARCHY_REQUEST_ERR
-  else if (FNodeType = ATTRIBUTE_NODE) then
-  begin
-    AttrOwner := TDOMAttr(arg).ownerElement;
-    if Assigned(AttrOwner) and (AttrOwner <> FOwner) then
-      Result := INUSE_ATTRIBUTE_ERR;
-  end;
+    Result := HIERARCHY_REQUEST_ERR;
 end;
 
 function TDOMNamedNodeMap.SetNamedItem(arg: TDOMNode): TDOMNode;
@@ -1738,19 +1756,10 @@ begin
   if res <> 0 then
     raise EDOMError.Create(res, 'NamedNodeMap.SetNamedItem');
 
-  if FNodeType = ATTRIBUTE_NODE then
-  begin
-    TDOMAttr(arg).FOwnerElement := TDOMElement(FOwner);
-    Exists := Find(TDOMAttr(arg).Name, i); // optimization
-  end
-  else
-    Exists := Find(arg.NodeName, i);
-
+  Exists := Find(arg.NodeName, i);
   if Exists then
   begin
     Result := TDOMNode(FList.List^[i]);
-    if (Result <> arg) and (FNodeType = ATTRIBUTE_NODE) then
-      TDOMAttr(Result).FOwnerElement := nil;
     FList.List^[i] := arg;
     exit;
   end;
@@ -1772,40 +1781,16 @@ function TDOMNamedNodeMap.Delete(index: LongWord): TDOMNode;
 begin
   Result := TDOMNode(FList.List^[index]);
   FList.Delete(index);
-  if FNodeType = ATTRIBUTE_NODE then
-    TDOMAttr(Result).FOwnerElement := nil;
-end;
-
-procedure TDOMNamedNodeMap.RestoreDefault(const name: DOMString);
-var
-  eldef: TDOMElement;
-  attrdef: TDOMAttr;
-begin
-  if FNodeType = ATTRIBUTE_NODE then
-  begin
-    if not Assigned(TDOMElement(FOwner).FNSI.QName) then  // safeguard
-      Exit;
-    eldef := TDOMElement(TDOMElement(FOwner).FNSI.QName^.Data);
-    if Assigned(eldef) then
-    begin
-      // TODO: can be avoided by linking attributes directly to their defs
-      attrdef := eldef.GetAttributeNode(name);
-      if Assigned(attrdef) and (TDOMAttrDef(attrdef).FDefault in [adDefault, adFixed]) then
-        TDOMElement(FOwner).RestoreDefaultAttr(attrdef);
-    end;
-  end;
 end;
 
 function TDOMNamedNodeMap.InternalRemove(const name: DOMString): TDOMNode;
 var
   i: Cardinal;
 begin
-  Result := nil;
   if Find(name, i) then
-  begin
-    Result := Delete(I);
-    RestoreDefault(name);
-  end;
+    Result := Delete(I)
+  else
+    Result := nil;
 end;
 
 function TDOMNamedNodeMap.RemoveNamedItem(const name: DOMString): TDOMNode;
@@ -1827,6 +1812,46 @@ end;
 
 { TAttributeMap }
 
+function TAttributeMap.Delete(index: LongWord): TDOMNode;
+begin
+  Result := inherited Delete(index);
+  if Assigned(Result) then
+  begin
+    Result.FParentNode := nil;
+    if Assigned(TDOMAttr(Result).FNSI.QName) then
+      RestoreDefault(TDOMAttr(Result).FNSI.QName^.Key);
+  end;
+end;
+
+function TAttributeMap.ValidateInsert(arg: TDOMNode): Integer;
+begin
+  Result := inherited ValidateInsert(arg);
+  if Result = 0 then
+  begin
+    if arg.NodeType <> ATTRIBUTE_NODE then
+      Result := HIERARCHY_REQUEST_ERR
+    else if Assigned(arg.FParentNode) and (arg.FParentNode <> FOwner) then
+      Result := INUSE_ATTRIBUTE_ERR;
+  end;
+end;
+
+procedure TAttributeMap.RestoreDefault(const name: DOMString);
+var
+  eldef: TDOMElement;
+  attrdef: TDOMAttr;
+begin
+  if not Assigned(TDOMElement(FOwner).FNSI.QName) then  // safeguard
+    Exit;
+  eldef := TDOMElement(TDOMElement(FOwner).FNSI.QName^.Data);
+  if Assigned(eldef) then
+  begin
+    // TODO: can be avoided by linking attributes directly to their defs
+    attrdef := eldef.GetAttributeNode(name);
+    if Assigned(attrdef) and (TDOMAttrDef(attrdef).FDefault in [adDefault, adFixed]) then
+      TDOMElement(FOwner).RestoreDefaultAttr(attrdef);
+  end;
+end;
+
 // Since list is kept sorted by nodeName, we must use linear search here.
 // This routine is not called while parsing, so parsing speed is not lowered.
 function TAttributeMap.FindNS(nsIndex: Integer; const aLocalName: DOMString;
@@ -1864,10 +1889,7 @@ begin
   Result := nil;
   nsIndex := FOwner.FOwnerDocument.IndexOfNS(nsURI);
   if (nsIndex >= 0) and FindNS(nsIndex, aLocalName, i) then
-  begin
     Result := Delete(I);
-    RestoreDefault(TDOMAttr(Result).FNSI.QName^.Key);
-  end;
 end;
 
 function TAttributeMap.getNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode;
@@ -1882,6 +1904,14 @@ begin
     Result := nil;
 end;
 
+function TAttributeMap.setNamedItem(arg: TDOMNode): TDOMNode;
+begin
+  Result := inherited setNamedItem(arg);
+  if Assigned(Result) then
+    Result.FParentNode := nil;
+  arg.FParentNode := FOwner;
+end;
+
 function TAttributeMap.setNamedItemNS(arg: TDOMNode): TDOMNode;
 var
   i: LongWord;
@@ -1912,8 +1942,8 @@ begin
       FList.Insert(i, arg);
   end;
   if Assigned(Result) then
-    TDOMAttr(Result).FOwnerElement := nil;
-  TDOMAttr(arg).FOwnerElement := TDOMElement(FOwner);
+    Result.FParentNode := nil;
+  arg.FParentNode := FOwner;
 end;
 
 function TAttributeMap.removeNamedItemNS(const namespaceURI,
@@ -2003,6 +2033,15 @@ begin
     CloneChildren(Result, aCloneOwner);
 end;
 
+// -------------------------------------------------------
+//   Top-level node
+// -------------------------------------------------------
+
+function TDOMNode_TopLevel.GetXMLVersion: DOMString;
+begin
+  Result := xmlVersionStr[FXMLVersion];
+end;
+
 // -------------------------------------------------------
 //   DOMImplementation
 // -------------------------------------------------------
@@ -2107,7 +2146,7 @@ constructor TDOMDocument.Create;
 begin
   inherited Create(nil);
   FOwnerDocument := Self;
-  FMaxPoolSize := (TDOMAttr.InstanceSize + sizeof(Pointer)-1) and not (sizeof(Pointer)-1) + sizeof(Pointer);
+  FMaxPoolSize := (TDOMEntity.InstanceSize + sizeof(Pointer)-1) and not (sizeof(Pointer)-1) + sizeof(Pointer);
   FPools := AllocMem(FMaxPoolSize);
   FNames := THashTable.Create(256, True);
   SetLength(FNamespaces, 3);
@@ -2139,6 +2178,8 @@ var
   pp: TNodePool;
   size: Integer;
 begin
+  if nfDestroying in FFlags then
+    raise EDOMError.Create(INVALID_ACCESS_ERR, 'Attempt to allocate node memory while destroying');
   size := (AClass.InstanceSize + sizeof(Pointer)-1) and not (sizeof(Pointer)-1);
   if size > FMaxPoolSize then
   begin
@@ -2166,9 +2207,9 @@ begin
 
   ID := Attr.Value;
   p := FIDList.FindOrAdd(DOMPChar(ID), Length(ID), Exists);
+  if not Exists then
+    p^.Data := Attr.FParentNode;
   Result := not Exists;
-  if Result then
-    p^.Data := Attr.OwnerElement;
 end;
 
 // This shouldn't be called if document has no IDs,
@@ -2226,7 +2267,9 @@ begin
      ((nType = DOCUMENT_TYPE_NODE) and (OldChild = DocType)) then   // and so can be DTD
   begin
     inherited InsertBefore(NewChild, OldChild);
-    Result := RemoveChild(OldChild);
+    Result := OldChild;
+    if OldChild <> NewChild then
+      RemoveChild(OldChild);
   end
   else
     Result := inherited ReplaceChild(NewChild, OldChild);
@@ -2254,7 +2297,7 @@ end;
 
 function TDOMDocument.CreateElement(const tagName: DOMString): TDOMElement;
 begin
-  if not IsXmlName(tagName, FXML11) then
+  if not IsXmlName(tagName, FXMLVersion = xmlVersion11) then
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateElement');
   TDOMNode(Result) := Alloc(TDOMElement);
   Result.Create(Self);
@@ -2322,7 +2365,7 @@ end;
 
 function TDOMDocument.CreateAttribute(const name: DOMString): TDOMAttr;
 begin
-  if not IsXmlName(name, FXML11) then
+  if not IsXmlName(name, FXMLVersion = xmlVersion11) then
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute');
   TDOMNode(Result) := Alloc(TDOMAttr);
   Result.Create(Self);
@@ -2429,7 +2472,7 @@ var
   idx, PrefIdx: Integer;
 begin
   idx := IndexOfNS(nsURI, True);
-  PrefIdx := CheckQName(QualifiedName, idx, FXml11);
+  PrefIdx := CheckQName(QualifiedName, idx, FXMLVersion = xmlVersion11);
   if PrefIdx < 0 then
     raise EDOMError.Create(-PrefIdx, 'Document.CreateAttributeNS');
   TDOMNode(Result) := Alloc(TDOMAttr);
@@ -2447,7 +2490,7 @@ var
   idx, PrefIdx: Integer;
 begin
   idx := IndexOfNS(nsURI, True);
-  PrefIdx := CheckQName(QualifiedName, idx, FXml11);
+  PrefIdx := CheckQName(QualifiedName, idx, FXMLVersion = xmlVersion11);
   if PrefIdx < 0 then
     raise EDOMError.Create(-PrefIdx, 'Document.CreateElementNS');
   TDOMNode(Result) := Alloc(TDOMElement);
@@ -2493,6 +2536,16 @@ begin
     Result := -1;
 end;
 
+procedure TDOMDocument.SetXMLVersion(const aValue: DOMString);
+begin
+  raise EDOMNotSupported.Create('DOMDocument.SetXMLVersion');
+end;
+
+constructor TXMLDocument.Create;
+begin
+  inherited Create;
+  FXMLVersion := xmlVersion10;
+end;
 
 function TXMLDocument.CreateCDATASection(const data: DOMString):
   TDOMCDATASection;
@@ -2505,7 +2558,7 @@ end;
 function TXMLDocument.CreateProcessingInstruction(const target,
   data: DOMString): TDOMProcessingInstruction;
 begin
-  if not IsXmlName(target, FXML11) then
+  if not IsXmlName(target, FXMLVersion = xmlVersion11) then
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateProcessingInstruction');
   TDOMNode(Result) := Alloc(TDOMProcessingInstruction);
   Result.Create(Self);
@@ -2519,7 +2572,7 @@ var
   dType: TDOMDocumentType;
   ent: TDOMEntity;
 begin
-  if not IsXmlName(name, FXML11) then
+  if not IsXmlName(name, FXMLVersion = xmlVersion11) then
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateEntityReference');
   TDOMNode(Result) := Alloc(TDOMEntityReference);
   Result.Create(Self);
@@ -2536,8 +2589,12 @@ end;
 
 procedure TXMLDocument.SetXMLVersion(const aValue: DOMString);
 begin
-  FXMLVersion := aValue;
-  FXML11 := (aValue = '1.1');
+  if aValue = '1.0' then
+    FXMLVersion := xmlVersion10
+  else if aValue = '1.1' then
+    FXMLVersion := xmlVersion11
+  else
+    raise EDOMNotSupported.Create('XMLDocument.SetXMLVersion');
 end;
 
 { TDOMNode_NS }
@@ -2579,7 +2636,7 @@ var
   NewName: DOMString;
 begin
   Changing;
-  if not IsXmlName(Value, FOwnerDocument.FXml11) then
+  if not IsXmlName(Value, FOwnerDocument.FXMLVersion = xmlVersion11) then
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'Node.SetPrefix');
 
   if (Pos(WideChar(':'), Value) > 0) or not (nfLevel2 in FFlags) or
@@ -2615,11 +2672,17 @@ begin
   Result := ATTRIBUTE_NODE;
 end;
 
+function TDOMAttr.GetParentNode: TDOMNode;
+begin
+  Result := nil;
+end;
+
 destructor TDOMAttr.Destroy;
 begin
-  if Assigned(FOwnerElement) and not (nfDestroying in FOwnerElement.FFlags) then
+  if Assigned(FParentNode) and not (nfDestroying in FParentNode.FFlags) then
   // TODO: This may raise NOT_FOUND_ERR in case something's really wrong
-    FOwnerElement.RemoveAttributeNode(Self);
+    TDOMElement(FParentNode).RemoveAttributeNode(Self);
+  FParentNode := nil;
   inherited Destroy;
 end;
 
@@ -2657,6 +2720,11 @@ begin
   Result := FDataType = dtID;
 end;
 
+function TDOMAttr.GetOwnerElement: TDOMElement;
+begin
+  Result := TDOMElement(FParentNode);
+end;
+
 // -------------------------------------------------------
 //   Element
 // -------------------------------------------------------
@@ -2671,7 +2739,8 @@ begin
   Include(FFlags, nfDestroying);
   if Assigned(FOwnerDocument.FIDList) then
     FOwnerDocument.RemoveID(Self);
-  FreeAndNil(FAttributes);
+  FAttributes.Free;
+  FAttributes := nil;
   inherited Destroy;
 end;
 
@@ -2778,6 +2847,8 @@ var
   ColonPos: Integer;
   AttrName, nsuri: DOMString;
 begin
+  if nfDestroying in FOwnerDocument.FFlags then
+    Exit;
   Attr := TDOMAttr(AttrDef.CloneNode(True));
   AttrName := Attr.Name;
   ColonPos := Pos(WideChar(':'), AttrName);
@@ -2859,7 +2930,7 @@ begin
   else
   begin
     Attr := FOwnerDocument.CreateAttribute(name);
-    Attr.FOwnerElement := Self;
+    Attr.FParentNode := Self;
     FAttributes.FList.Insert(I, Attr);
   end;
   attr.NodeValue := value;
@@ -2890,7 +2961,7 @@ var
 begin
   Changing;
   idx := FOwnerDocument.IndexOfNS(nsURI, True);
-  prefIdx := CheckQName(qualifiedName, idx, FOwnerDocument.FXml11);
+  prefIdx := CheckQName(qualifiedName, idx, FOwnerDocument.FXMLVersion = xmlVersion11);
   if prefIdx < 0 then
     raise EDOMError.Create(-prefIdx, 'Element.SetAttributeNS');
 
@@ -2904,7 +2975,7 @@ begin
   begin
     TDOMNode(Attr) := FOwnerDocument.Alloc(TDOMAttr);
     Attr.Create(FOwnerDocument);
-    Attr.FOwnerElement := Self;
+    Attr.FParentNode := Self;
     Attr.FNSI.NSIndex := Word(idx);
     Include(Attr.FFlags, nfLevel2);
   end;
@@ -2945,17 +3016,21 @@ end;
 
 
 function TDOMElement.RemoveAttributeNode(OldAttr: TDOMAttr): TDOMAttr;
+var
+  Index: Integer;
 begin
   Changing;
-  Result:=OldAttr;
-  if Assigned(FAttributes) and (FAttributes.FList.Remove(OldAttr) > -1) then
+  Result := OldAttr;
+  if Assigned(FAttributes) then
   begin
-    if Assigned(OldAttr.FNSI.QName) then  // safeguard
-      FAttributes.RestoreDefault(OldAttr.FNSI.QName^.Key);
-    Result.FOwnerElement := nil;
-  end
-  else
-    raise EDOMNotFound.Create('Element.RemoveAttributeNode');
+    Index := FAttributes.FList.IndexOf(OldAttr);
+    if Index > -1 then
+    begin
+      FAttributes.Delete(Index);
+      Exit;
+    end;
+  end;
+  raise EDOMNotFound.Create('Element.RemoveAttributeNode');
 end;
 
 function TDOMElement.GetElementsByTagName(const name: DOMString): TDOMNodeList;
@@ -3012,13 +3087,15 @@ begin
 end;
 
 function TDOMText.SplitText(offset: LongWord): TDOMText;
+var
+  L: LongWord;
 begin
   Changing;
-  if offset > Length then
+  L := Length;
+  if offset > L then
     raise EDOMIndexSize.Create('Text.SplitText');
 
-  Result := TDOMText.Create(FOwnerDocument);
-  Result.FNodeValue := Copy(FNodeValue, offset + 1, Length);
+  Result := FOwnerDocument.CreateTextNodeBuf(@FNodeValue[offset+1], L-offset, False);
   Result.FFlags := FFlags * [nfIgnorableWS];
   FNodeValue := Copy(FNodeValue, 1, offset);
   if Assigned(FParentNode) then

+ 1 - 1
packages/fcl-xml/src/dom_html.pp

@@ -648,7 +648,7 @@ type
     property Title: DOMString read GetTitle write SetTitle;
     property Referrer: DOMString read GetReferrer;
     property Domain: DOMString read GetDomain;
-    property URL: DOMString read FDocumentURI;
+    property URL: DOMString read FURI;
     property Body: THTMLElement read GetBody write SetBody;
     property Images: THTMLCollection read GetImages;
     property Applets: THTMLCollection read GetApplets;

+ 23 - 42
packages/fcl-xml/src/htmwrite.pp

@@ -44,6 +44,7 @@ type
 
   THTMLWriter = class(TObject)
   private
+    FStream: TStream;
     FInsideTextNode: Boolean;
     FBuffer: PChar;
     FBufPos: PChar;
@@ -59,7 +60,6 @@ type
     procedure AttrSpecialCharCallback(c: WideChar);
     procedure TextNodeSpecialCharCallback(c: WideChar);
   protected
-    procedure Write(const Buffer; Count: Longint); virtual; abstract;
     procedure WriteNode(Node: TDOMNode);
     procedure VisitDocument(Node: TDOMNode);
     procedure VisitElement(Node: TDOMNode);
@@ -72,40 +72,30 @@ type
     procedure VisitDocumentType(Node: TDOMNode);
     procedure VisitPI(Node: TDOMNode);
   public
-    constructor Create;
+    constructor Create(AStream: TStream);
     destructor Destroy; override;
   end;
 
-  TTextHTMLWriter = Class(THTMLWriter)
+  TTextStream = class(TStream)
   Private
     F : ^Text;
-  Protected
-    Procedure Write(Const Buffer; Count : Longint);override;
   Public
     constructor Create(var AFile: Text);
-  end;
-
-  TStreamHTMLWriter = Class(THTMLWriter)
-  Private
-    F : TStream;
-  Protected
-    Procedure Write(Const Buffer; Count : Longint);override;
-  Public
-    constructor Create(AStream: TStream);
+    function Write(Const Buffer; Count: Longint): Longint; override;
   end;
 
 { ---------------------------------------------------------------------
-    TTextHTMLWriter
+    TTextStream
   ---------------------------------------------------------------------}
 
 
-constructor TTextHTMLWriter.Create(var AFile: Text);
+constructor TTextStream.Create(var AFile: Text);
 begin
   inherited Create;
   f := @AFile;
 end;
 
-procedure TTextHTMLWriter.Write(const Buffer; Count: Longint);
+function TTextStream.Write(const Buffer; Count: Longint): Longint;
 var
   s: string;
 begin
@@ -114,33 +104,17 @@ begin
     SetString(s, PChar(@Buffer), Count);
     system.Write(f^, s);
   end;
+  Result := Count;
 end;
 
-{ ---------------------------------------------------------------------
-    TStreamHTMLWriter
-  ---------------------------------------------------------------------}
-
-constructor TStreamHTMLWriter.Create(AStream: TStream);
-begin
-  inherited Create;
-  F := AStream;
-end;
-
-
-procedure TStreamHTMLWriter.Write(const Buffer; Count: Longint);
-begin
-  if Count > 0 then
-    F.Write(Buffer, Count);
-end;
-
-
 { ---------------------------------------------------------------------
     THTMLWriter
   ---------------------------------------------------------------------}
 
-constructor THTMLWriter.Create;
+constructor THTMLWriter.Create(AStream: TStream);
 begin
   inherited Create;
+  FStream := AStream;
   // some overhead - always be able to write at least one extra UCS4
   FBuffer := AllocMem(512+32);
   FBufPos := FBuffer;
@@ -153,7 +127,7 @@ end;
 destructor THTMLWriter.Destroy;
 begin
   if FBufPos > FBuffer then
-    write(FBuffer^, FBufPos-FBuffer);
+    FStream.write(FBuffer^, FBufPos-FBuffer);
 
   FreeMem(FBuffer);
   inherited Destroy;
@@ -171,7 +145,7 @@ begin
   begin
     if pb >= @FBuffer[FCapacity] then
     begin
-      write(FBuffer^, FCapacity);
+      FStream.write(FBuffer^, FCapacity);
       Dec(pb, FCapacity);
       if pb > FBuffer then
         Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
@@ -525,18 +499,25 @@ begin
 end;
 
 procedure WriteHTMLFile(doc: TXMLDocument; var AFile: Text);
+var
+  s: TStream;
 begin
-  with TTextHTMLWriter.Create(AFile) do
+  s := TTextStream.Create(AFile);
   try
-    WriteNode(doc);
+    with THTMLWriter.Create(s) do
+    try
+      WriteNode(doc);
+    finally
+      Free;
+    end;
   finally
-    Free;
+    s.Free;
   end;
 end;
 
 procedure WriteHTMLFile(doc: TXMLDocument; AStream: TStream);
 begin
-  with TStreamHTMLWriter.Create(AStream) do
+  with THTMLWriter.Create(AStream) do
   try
     WriteNode(doc);
   finally

+ 7 - 0
packages/fcl-xml/src/sax.pp

@@ -177,6 +177,7 @@ type
   protected
     FCurColumnNumber, FCurLineNumber: Integer;
     FCurPublicID, FCurSystemID: SAXString;
+    FStopFlag: Boolean;
 
     function GetFeature(const Name: String): Boolean; virtual;
     function GetProperty(const Name: String): TObject; virtual;
@@ -213,6 +214,7 @@ type
     procedure Parse(AInput: TSAXInputSource); virtual; abstract; overload;
     procedure Parse(const SystemID: SAXString); virtual; overload;
     procedure ParseStream(AStream: TStream);
+    procedure Abort;
 
     // Current location
     property CurColumnNumber: Integer read FCurColumnNumber;
@@ -653,6 +655,11 @@ begin
   end;
 end;
 
+procedure TSAXReader.Abort;
+begin
+  FStopFlag := True;
+end;
+
 function TSAXReader.DoResolveEntity(const PublicID,
   SystemID: SAXString): TSAXInputSource;
 begin

+ 78 - 41
packages/fcl-xml/src/sax_html.pp

@@ -31,7 +31,7 @@ unit SAX_HTML;
 
 interface
 
-uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs;
+uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs,xmlutils;
 
 type
 
@@ -54,8 +54,8 @@ type
     FAttrNameRead: Boolean;
     FStack: array of THTMLElementTag;
     FNesting: Integer;
-    procedure AutoClose(const aName: string);
-    procedure NamePush(const aName: string);
+    procedure AutoClose(const aName: SAXString);
+    procedure NamePush(const aName: SAXString);
     procedure NamePop;
   protected
     procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
@@ -136,6 +136,22 @@ begin
   inherited Destroy;
 end;
 
+function CheckForName(const Tag: SAXString): Boolean;
+var
+  p, p1: PSAXChar;
+begin
+  p := PSAXChar(Tag);
+  result := False;
+  if p^ <> '!' then
+  begin
+    if p^ = '/' then Inc(p);
+    p1 := p;
+    while (p1^ <> #0) and (p1^ <> '/') and not IsXMLWhitespace(p1^) do
+      Inc(p1);
+    result := IsXMLName(p, p1-p);
+  end;
+end;
+
 procedure THTMLReader.Parse(AInput: TSAXInputSource);
 const
   MaxBufferSize = 1024;
@@ -150,7 +166,8 @@ begin
   end;
 
   FEndOfStream := False;
-  while True do
+  FStopFlag := False;
+  while not FStopFlag do
   begin
     // Read data into the input buffer
     BufferSize := AInput.Stream.Read(Buffer, MaxBufferSize);
@@ -161,7 +178,8 @@ begin
     end;
 
     BufferPos := 0;
-    while BufferPos < BufferSize do
+    while (BufferPos < BufferSize) and not FStopFlag do
+    begin
       case ScannerContext of
         scUnknown:
           case Buffer[BufferPos] of
@@ -260,22 +278,41 @@ begin
                 if FCurStringValueDelimiter = #0 then
                   EnterNewScannerContext(scUnknown);
               end;
-            else
-            begin
-              FTokenText := FTokenText + Buffer[BufferPos];
-              Inc(BufferPos);
-            end;
+            '<':    // either an unclosed tag or unescaped '<' in text; attempt recovery
+              begin
+                // TODO: this check is hardly complete, probably must also check if
+                // tag name is followed by legal attributes.
+                if CheckForName(FTokenText) then
+                  EnterNewScannerContext(scUnknown)   // assume unclosed tag
+                else if (FTokenText <> '') and (FTokenText[1] <> '!') then
+                begin
+                  Insert('<', FTokenText, 1);         // assume plaintext
+                  FScannerContext := scText;
+                  EnterNewScannerContext(scUnknown);
+                end
+                else
+                begin  // in comment, ignore
+                  FTokenText := FTokenText + Buffer[BufferPos];
+                  Inc(BufferPos);
+                end;
+              end;
+          else
+            FTokenText := FTokenText + Buffer[BufferPos];
+            Inc(BufferPos);
           end;
-      end;
+        end;    // case ScannerContext of
+    end;        // while not endOfBuffer
   end;
 end;
 
-function LookupTag(const aName: string): THTMLElementTag;
+function LookupTag(const aName: SAXString): THTMLElementTag;
 var
   j: THTMLElementTag;
+  ansiName: string;
 begin
+  ansiName := aName;
   for j := Low(THTMLElementTag) to High(THTMLElementTag) do
-    if SameText(HTMLElementProps[j].Name, aName) then
+    if SameText(HTMLElementProps[j].Name, ansiName) then
     begin
       Result := j;
       Exit;
@@ -283,7 +320,7 @@ begin
   Result := etUnknown;
 end;
 
-procedure THTMLReader.AutoClose(const aName: string);
+procedure THTMLReader.AutoClose(const aName: SAXString);
 var
   newTag: THTMLElementTag;
 begin
@@ -295,7 +332,7 @@ begin
   end;
 end;
 
-procedure THTMLReader.NamePush(const aName: string);
+procedure THTMLReader.NamePush(const aName: SAXString);
 var
   tag: THTMLElementTag;
 begin
@@ -314,24 +351,28 @@ begin
   FStack[FNesting] := etUnknown;
 end;
 
-function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
+function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
 var
   i, j: Integer;
-  AttrName: String;
-  ValueDelimiter: Char;
+  AttrName: SAXString;
+  ValueDelimiter: WideChar;
   DoIncJ: Boolean;
 begin
   Attr := nil;
-  i := Pos(' ', s);
-  if i <= 0 then
-    Result := LowerCase(s)
+  i := 0;
+  repeat
+    Inc(i)
+  until (i > Length(s)) or IsXMLWhitespace(s[i]);
+
+  if i > Length(s) then
+    Result := s
   else
   begin
-    Result := LowerCase(Copy(s, 1, i - 1));
+    Result := Copy(s, 1, i - 1);
     Attr := TSAXAttributes.Create;
     Inc(i);
 
-    while (i <= Length(s)) and (s[i] in WhitespaceChars) do
+    while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
       Inc(i);
 
     SetLength(AttrName, 0);
@@ -340,7 +381,8 @@ begin
     while j <= Length(s) do
       if s[j] = '=' then
       begin
-        AttrName := LowerCase(Copy(s, i, j - i));
+        AttrName := Copy(s, i, j - i);
+        WStrLower(AttrName);
         Inc(j);
         if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
         begin
@@ -352,7 +394,7 @@ begin
         DoIncJ := False;
         while j <= Length(s) do
           if ValueDelimiter = #0 then
-            if s[j] in WhitespaceChars then
+            if IsXMLWhitespace(s[j]) then
               break
             else
               Inc(j)
@@ -363,31 +405,34 @@ begin
           end else
             Inc(j);
 
-        Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
+        if IsXMLName(AttrName) then
+          Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
 
         if DoIncJ then
           Inc(j);
 
-        while (j <= Length(s)) and (s[j] in WhitespaceChars) do
+        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
           Inc(j);
         i := j;
       end
-      else if s[j] in WhitespaceChars then
+      else if IsXMLWhitespace(s[j]) then
       begin
-        Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
+        if IsXMLName(@s[i], j-i) then
+          Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
         Inc(j);
-        while (j <= Length(s)) and (s[j] in WhitespaceChars) do
+        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
           Inc(j);
         i := j;
       end else
         Inc(j);
   end;
+  WStrLower(result);
 end;
 
 procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
 var
   Attr: TSAXAttributes;
-  TagName: String;
+  TagName: SAXString;
   Ent: SAXChar;
   i: Integer;
   elTag: THTMLElementTag;
@@ -498,30 +543,22 @@ end;
 procedure THTMLToDOMConverter.ReaderCharacters(Sender: TObject;
   const ch: PSAXChar; Start, Count: Integer);
 var
-  s: SAXString;
   NodeInfo: THTMLNodeInfo;
 begin
-  SetLength(s, Count);
-  Move(ch^, s[1], Count * SizeOf(SAXChar));
-
   NodeInfo := THTMLNodeInfo.Create;
   NodeInfo.NodeType := ntText;
-  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
   FNodeBuffer.Add(NodeInfo);
 end;
 
 procedure THTMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
   const ch: PSAXChar; Start, Count: Integer);
 var
-  s: SAXString;
   NodeInfo: THTMLNodeInfo;
 begin
-  SetLength(s, Count);
-  Move(ch^, s[1], Count * SizeOf(SAXChar));
-
   NodeInfo := THTMLNodeInfo.Create;
   NodeInfo.NodeType := ntWhitespace;
-  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
   FNodeBuffer.Add(NodeInfo);
 end;
 

+ 85 - 92
packages/fcl-xml/src/sax_xml.pp

@@ -40,6 +40,7 @@ type
     FEndOfStream: Boolean;
     FScannerContext: TXMLScannerContext;
     FTokenText: SAXString;
+    FRawTokenText: string;
     FCurStringValueDelimiter: Char;
     FAttrNameRead: Boolean;
   protected
@@ -103,7 +104,9 @@ procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
 
 implementation
 
-uses htmldefs; // for entities...
+uses
+  xmlutils,
+  htmldefs; // for entities...
 
 const
   WhitespaceChars = [#9, #10, #13, ' '];
@@ -141,7 +144,8 @@ begin
   end;
 
   FEndOfStream := False;
-  while True do
+  FStopFlag := False;
+  while not FStopFlag do
   begin
     // Read data into the input buffer
     BufferSize := AInput.Stream.Read(Buffer, MaxBufferSize);
@@ -152,7 +156,8 @@ begin
     end;
 
     BufferPos := 0;
-    while BufferPos < BufferSize do
+    while (BufferPos < BufferSize) and not FStopFlag do
+    begin
       case ScannerContext of
         scUnknown:
           case Buffer[BufferPos] of
@@ -175,7 +180,7 @@ begin
           case Buffer[BufferPos] of
             #9, #10, #13, ' ':
               begin
-                FTokenText := FTokenText + Buffer[BufferPos];
+                FRawTokenText := FRawTokenText + Buffer[BufferPos];
                 Inc(BufferPos);
               end;
             '&':
@@ -189,7 +194,7 @@ begin
                 EnterNewScannerContext(scTag);
               end;
             else
-              FScannerContext := scText
+              FScannerContext := scText;
           end;
         scText:
           case Buffer[BufferPos] of
@@ -205,7 +210,7 @@ begin
               end;
             else
             begin
-              FTokenText := FTokenText + Buffer[BufferPos];
+              FRawTokenText := FRawTokenText + Buffer[BufferPos];
               Inc(BufferPos);
             end;
           end;
@@ -219,7 +224,7 @@ begin
             EnterNewScannerContext(scUnknown)
           else
           begin
-            FTokenText := FTokenText + Buffer[BufferPos];
+            FRawTokenText := FRawTokenText + Buffer[BufferPos];
             Inc(BufferPos);
           end;
         scTag:
@@ -236,13 +241,13 @@ begin
                     FAttrNameRead := False;
                   end;
                 end;
-                FTokenText := FTokenText + Buffer[BufferPos];
+                FRawTokenText := FRawTokenText + Buffer[BufferPos];
                 Inc(BufferPos);
               end;
             '=':
               begin
                 FAttrNameRead := True;
-                FTokenText := FTokenText + Buffer[BufferPos];
+                FRawTokenText := FRawTokenText + Buffer[BufferPos];
                 Inc(BufferPos);
               end;
             '>':
@@ -253,97 +258,101 @@ begin
               end;
             else
             begin
-              FTokenText := FTokenText + Buffer[BufferPos];
+              FRawTokenText := FRawTokenText + Buffer[BufferPos];
               Inc(BufferPos);
             end;
           end;
-      end;
+        end;    // case ScannerContext of
+    end;        // while not endOfBuffer
   end;
 end;
 
-procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
-
-  function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
-  var
-    i, j: Integer;
-    AttrName: String;
-    ValueDelimiter: Char;
-    DoIncJ: Boolean;
+function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
+var
+  i, j: Integer;
+  AttrName: SAXString;
+  ValueDelimiter: WideChar;
+  DoIncJ: Boolean;
+begin
+  Attr := nil;
+  i := 0;
+  repeat
+    Inc(i)
+  until (i > Length(s)) or IsXMLWhitespace(s[i]);
+
+  if i > Length(s) then
+    Result := s
+  else
   begin
-    Attr := nil;
-    i := Pos(' ', s);
-    if i <= 0 then
-      Result := LowerCase(s)
-    else
-    begin
-      Result := LowerCase(Copy(s, 1, i - 1));
-      Attr := TSAXAttributes.Create;
+    Result := Copy(s, 1, i - 1);
+    Attr := TSAXAttributes.Create;
+    Inc(i);
 
+    while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
       Inc(i);
 
-      while (i <= Length(s)) and (s[i] in WhitespaceChars) do
-        Inc(i);
-
-      SetLength(AttrName, 0);
-      j := i;
+    SetLength(AttrName, 0);
+    j := i;
 
-      while j <= Length(s) do
-        if s[j] = '=' then
+    while j <= Length(s) do
+      if s[j] = '=' then
+      begin
+        AttrName := Copy(s, i, j - i);
+        Inc(j);
+        if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
         begin
-          AttrName := LowerCase(Copy(s, i, j - i));
+          ValueDelimiter := s[j];
           Inc(j);
-          if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
+        end else
+          ValueDelimiter := #0;
+        i := j;
+        DoIncJ := False;
+        while j <= Length(s) do
+          if ValueDelimiter = #0 then
+            if IsXMLWhitespace(s[j]) then
+              break
+            else
+              Inc(j)
+          else if s[j] = ValueDelimiter then
           begin
-            ValueDelimiter := s[j];
-            Inc(j);
+            DoIncJ := True;
+            break
           end else
-            ValueDelimiter := #0;
-          i := j;
-          DoIncJ := False;
-          while j <= Length(s) do
-            if ValueDelimiter = #0 then
-              if s[j] in WhitespaceChars then
-                break
-              else
-                Inc(j)
-            else if s[j] = ValueDelimiter then
-            begin
-              DoIncJ := True;
-              break
-            end else
-              Inc(j);
+            Inc(j);
 
+        if IsXMLName(AttrName) then
           Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
 
-          if DoIncJ then
-            Inc(j);
+        if DoIncJ then
+          Inc(j);
 
-          while (j <= Length(s)) and (s[j] in WhitespaceChars) do
-            Inc(j);
-          i := j;
-        end
-        else if s[j] in WhitespaceChars then
-        begin
-          Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
+        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
           Inc(j);
-          while (j <= Length(s)) and (s[j] in WhitespaceChars) do
-            Inc(j);
-          i := j;
-        end else
+        i := j;
+      end
+      else if IsXMLWhitespace(s[j]) then
+      begin
+        if IsXMLName(@s[i], j-i) then
+          Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
+        Inc(j);
+        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
           Inc(j);
-    end;
+        i := j;
+      end else
+        Inc(j);
   end;
+end;
 
+procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
 var
   Attr: TSAXAttributes;
-  TagName: String;
-  Found: Boolean;
+  TagName: SAXString;
   Ent: SAXChar;
-  i: Integer;
 begin
+  FTokenText := FRawTokenText;  // this is where conversion takes place
   case ScannerContext of
     scWhitespace:
-      DoIgnorableWhitespace(PSAXChar(TokenText), 1, Length(TokenText));
+      DoIgnorableWhitespace(PSAXChar(TokenText), 0, Length(TokenText));
     scText:
       DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
     scEntityReference:
@@ -394,7 +403,8 @@ begin
       end;
   end;
   FScannerContext := NewContext;
-  SetLength(FTokenText, 0);
+  FTokenText := '';
+  FRawTokenText := '';
   FCurStringValueDelimiter := #0;
   FAttrNameRead := False;
 end;
@@ -420,16 +430,7 @@ end;
 constructor TXMLToDOMConverter.CreateFragment(AReader: TSAXXMLReader;
   AFragmentRoot: TDOMNode);
 begin
-  inherited Create;
-  FReader := AReader;
-  FReader.OnCharacters := @ReaderCharacters;
-  FReader.OnIgnorableWhitespace := @ReaderIgnorableWhitespace;
-  FReader.OnSkippedEntity := @ReaderSkippedEntity;
-  FReader.OnStartElement := @ReaderStartElement;
-  FReader.OnEndElement := @ReaderEndElement;
-  FDocument := AFragmentRoot.OwnerDocument;
-  FElementStack := TList.Create;
-  FNodeBuffer := TList.Create;
+  Create(AReader, AFragmentRoot.OwnerDocument);
   FragmentRoot := AFragmentRoot;
   IsFragmentMode := True;
 end;
@@ -450,30 +451,22 @@ end;
 procedure TXMLToDOMConverter.ReaderCharacters(Sender: TObject;
   const ch: PSAXChar; Start, Count: Integer);
 var
-  s: SAXString;
   NodeInfo: TXMLNodeInfo;
 begin
-  SetLength(s, Count);
-  Move(ch^, s[1], Count * SizeOf(SAXChar));
-
   NodeInfo := TXMLNodeInfo.Create;
   NodeInfo.NodeType := ntText;
-  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
   FNodeBuffer.Add(NodeInfo);
 end;
 
 procedure TXMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
   const ch: PSAXChar; Start, Count: Integer);
 var
-  s: SAXString;
   NodeInfo: TXMLNodeInfo;
 begin
-  SetLength(s, Count);
-  Move(ch^, s[1], Count * SizeOf(SAXChar));
-
   NodeInfo := TXMLNodeInfo.Create;
   NodeInfo.NodeType := ntWhitespace;
-  NodeInfo.DOMNode := FDocument.CreateTextNode(s);
+  NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
   FNodeBuffer.Add(NodeInfo);
 end;
 

+ 140 - 341
packages/fcl-xml/src/xmlread.pp

@@ -48,13 +48,13 @@ procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String
 
 procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
 procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
-procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
-procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String); overload;
+procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream); overload;
+procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String); overload;
 
 procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);  overload;
 procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
-procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
-procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
+procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream); overload;
+procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
 
 type
   TDOMParseOptions = class(TObject)
@@ -153,6 +153,7 @@ const
 type
   TDOMNotationEx = class(TDOMNotation);
   TDOMDocumentTypeEx = class(TDOMDocumentType);
+  TDOMTopNodeEx = class(TDOMNode_TopLevel);
   TDOMElementDef = class;
 
   TDTDSubsetType = (dsNone, dsInternal, dsExternal);
@@ -172,18 +173,10 @@ type
     FBetweenDecls: Boolean;
     FIsPE: Boolean;
     FReplacementText: DOMString;
-    FURI: DOMString;
     FStartLocation: TLocation;
     FCharCount: Cardinal;
   end;
 
-  PWideCharBuf = ^TWideCharBuf;
-  TWideCharBuf = record
-    Buffer: PWideChar;
-    Length: Integer;
-    MaxLength: Integer;
-  end;
-
   TXMLReader = class;
 
   TXMLCharSource = class(TObject)
@@ -325,10 +318,8 @@ type
     FCtrl: TDOMParser;
     FXML11: Boolean;
     FState: TXMLReadState;
-    FRecognizePE: Boolean;
     FHavePERefs: Boolean;
     FInsideDecl: Boolean;
-    FDocNotValid: Boolean;
     FValue: TWideCharBuf;
     FEntityValue: TWideCharBuf;
     FName: TWideCharBuf;
@@ -367,10 +358,11 @@ type
 
     procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
     procedure Initialize(ASource: TXMLCharSource);
+    procedure EntityToSource(AEntity: TDOMEntityEx; out Src: TXMLCharSource);
     function ContextPush(AEntity: TDOMEntityEx): Boolean;
     function ContextPop(Forced: Boolean = False): Boolean;
     procedure XML11_BuildTables;
-    procedure ParseQuantity(CP: TContentParticle);
+    function ParseQuantity: TCPQuant;
     procedure StoreLocation(out Loc: TLocation);
     function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
     procedure ValidateAttrValue(Attr: TDOMAttr; const aValue: WideString);
@@ -413,7 +405,7 @@ type
     procedure ExpectEq;
     procedure ParseDoctypeDecl;                                         // [28]
     procedure ParseMarkupDecl;                                          // [29]
-    procedure ParseElement;                                             // [39]
+    procedure ParseStartTag;                                            // [39]
     procedure ParseEndTag;                                              // [42]
     procedure DoEndElement(ErrOffset: Integer);
     procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
@@ -504,137 +496,6 @@ begin
     end;
 end;
 
-function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
-var
-  cnt: Cardinal;
-begin
-  cnt := OutCnt;         // num of widechars
-  if cnt > InCnt div sizeof(WideChar) then
-    cnt := InCnt div sizeof(WideChar);
-  Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
-  Dec(InCnt, cnt*sizeof(WideChar));
-  Dec(OutCnt, cnt);
-  Result := cnt;
-end;
-
-function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
-var
-  I: Integer;
-  cnt: Cardinal;
-  InPtr: PChar;
-begin
-  cnt := OutCnt;         // num of widechars
-  if cnt > InCnt div sizeof(WideChar) then
-    cnt := InCnt div sizeof(WideChar);
-  InPtr := InBuf;
-  for I := 0 to cnt-1 do
-  begin
-    OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
-    Inc(InPtr, 2);
-  end;
-  Dec(InCnt, cnt*sizeof(WideChar));
-  Dec(OutCnt, cnt);
-  Result := cnt;
-end;
-
-function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
-var
-  I: Integer;
-  cnt: Cardinal;
-begin
-  cnt := OutCnt;         // num of widechars
-  if cnt > InCnt then
-    cnt := InCnt;
-  for I := 0 to cnt-1 do
-    OutBuf[I] := WideChar(ord(InBuf[I]));
-  Dec(InCnt, cnt);
-  Dec(OutCnt, cnt);
-  Result := cnt;
-end;
-
-function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
-const
-  MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
-var
-  i, j, bc: Cardinal;
-  Value: Cardinal;
-begin
-  result := 0;
-  i := OutCnt;
-  while (i > 0) and (InCnt > 0) do
-  begin
-    bc := 1;
-    Value := ord(InBuf^);
-    if Value < $80 then
-      OutBuf^ := WideChar(Value)
-    else
-    begin
-      if Value < $C2 then
-      begin
-        Result := -1;
-        Break;
-      end;
-      Inc(bc);
-      if Value > $DF then
-      begin
-        Inc(bc);
-        if Value > $EF then
-        begin
-          Inc(bc);
-          if Value > $F7 then  // never encountered in the tests.
-          begin
-            Result := -1;
-            Break;
-          end;
-        end;
-      end;
-      if InCnt < bc then
-        Break;
-      j := 1;
-      while j < bc do
-      begin
-        if InBuf[j] in [#$80..#$BF] then
-          Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
-        else
-        begin
-          Result := -1;
-          Break;
-        end;
-        Inc(j);
-      end;
-      Value := Value and MaxCode[bc];
-      // RFC2279 check
-      if Value <= MaxCode[bc-1] then
-      begin
-        Result := -1;
-        Break;
-      end;
-      case Value of
-        0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
-        $10000..$10FFFF:
-        begin
-          if i < 2 then Break;
-          OutBuf^ := WideChar($D7C0 + (Value shr 10));
-          OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
-          Inc(OutBuf); // once here
-          Dec(i);
-        end
-        else
-        begin
-          Result := -1;
-          Break;
-        end;
-      end;
-    end;
-    Inc(OutBuf);
-    Inc(InBuf, bc);
-    Dec(InCnt, bc);
-    Dec(i);
-  end;
-  if Result >= 0 then
-    Result := OutCnt-i;
-  OutCnt := i;
-end;
 
 function Is_8859_1(const AEncoding: string): Boolean;
 begin
@@ -651,48 +512,6 @@ begin
             SameText(AEncoding, 'ISO8859-1');
 end;
 
-procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
-begin
-  ABuffer.MaxLength := ALength;
-  ABuffer.Length := 0;
-  ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
-end;
-
-procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
-begin
-  if ABuffer.Length >= ABuffer.MaxLength then
-  begin
-    ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
-    FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
-    ABuffer.MaxLength := ABuffer.MaxLength * 2;
-  end;
-  ABuffer.Buffer[ABuffer.Length] := wc;
-  Inc(ABuffer.Length);
-end;
-
-procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
-var
-  Len: Integer;
-begin
-  Len := PEnd - PStart;
-  if Len <= 0 then
-    Exit;
-  if Len >= ABuf.MaxLength - ABuf.Length then
-  begin
-    ABuf.MaxLength := (Len + ABuf.Length)*2;
-    // note: memory clean isn't necessary here.
-    // To avoid garbage, control Length field.
-    ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
-  end;
-  Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
-  Inc(ABuf.Length, Len);
-end;
-
-function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
-begin
-  Result := (ABuf.Length = Length(Arg)) and
-    CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
-end;
 
 { TDOMParseOptions }
 
@@ -999,8 +818,6 @@ procedure TXMLDecodingSource.Initialize;
 begin
   inherited;
   FLineNo := 1;
-  FXml11Rules := FReader.FXML11;
-
   FDecoder.Decode := @Decode_UTF8;
 
   FFixedUCS2 := '';
@@ -1029,9 +846,11 @@ begin
   begin
     FBufSize := 3;           // don't decode past XML declaration
     Inc(FBuf, Length(XmlSign));
-    FReader.ParseXmlOrTextDecl(FParent <> nil);
+    FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
   end;
   FBufSize := 2047;
+  if FReader.FXML11 then
+    FReader.XML11_BuildTables;
 end;
 
 function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
@@ -1052,7 +871,7 @@ begin
 // see rmt-e2e-61, it now fails but for a completely different reason.
   FillChar(NewDecoder, sizeof(TDecoder), 0);
   if Is_8859_1(AEncoding) then
-    FDecoder.Decode := @Decode_88591
+    FDecoder.Decode := @Decode_8859_1
   else if FindDecoder(AEncoding, NewDecoder) then
     FDecoder := NewDecoder
   else
@@ -1062,31 +881,25 @@ end;
 procedure TXMLDecodingSource.NewLine;
 begin
   case FBuf^ of
-    #10: begin
-      Inc(FLineNo);
-      LFPos := FBuf;
-    end;
+    #10: ;
     #13: begin
-      Inc(FLineNo);
-      LFPos := FBuf;
       // Reload trashes the buffer, it should be consumed beforehand
       if (FBufEnd >= FBuf+2) or Reload then
       begin
         if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
-        begin
           Inc(FBuf);
-          Inc(LFPos);
-        end;
-        FBuf^ := #10;
       end;
-    end;
-    #$85, #$2028: if FXML11Rules then
-    begin
       FBuf^ := #10;
-      Inc(FLineNo);
-      LFPos := FBuf;
     end;
+    #$85, #$2028: if FXML11Rules then
+      FBuf^ := #10
+    else
+      Exit;
+  else
+    Exit;
   end;
+  Inc(FLineNo);
+  LFPos := FBuf;
 end;
 
 { TXMLStreamInputSource }
@@ -1261,7 +1074,6 @@ end;
 
 procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
 begin
-  FDocNotValid := True;
   if FValidate then
     DoError(esError, Format(Msg, Args), LineOffs);
 end;
@@ -1348,7 +1160,7 @@ begin
     end
     else if FSource.FBuf^ = '%' then
     begin
-      if not FRecognizePE then
+      if (FState <> rsDTD) or ((FSource.DTDSubsetType = dsInternal) and FInsideDecl) then
         Break;
 // This is the only case where look-ahead is needed
       if FSource.FBuf > FSource.FBufEnd-2 then
@@ -1463,6 +1275,8 @@ constructor TXMLReader.Create(AParser: TDOMParser);
 begin
   Create;
   FCtrl := AParser;
+  if FCtrl = nil then
+    Exit;
   FValidate := FCtrl.Options.Validate;
   FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
   FExpandEntities := FCtrl.Options.ExpandEntities;
@@ -1526,8 +1340,9 @@ begin
   doc := AOwner.OwnerDocument;
   FCursor := AOwner as TDOMNode_WithChildren;
   FState := rsRoot;
-  Initialize(ASource);
   FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
+  Initialize(ASource);
+  FDocType := TDOMDocumentTypeEx(doc.DocType);
   ParseContent;
 end;
 
@@ -1770,20 +1585,18 @@ end;
 const
   PrefixChar: array[Boolean] of string = ('', '%');
 
-function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
-var
-  Src: TXMLCharSource;
+procedure TXMLReader.EntityToSource(AEntity: TDOMEntityEx; out Src: TXMLCharSource);
 begin
   if AEntity.FOnStack then
     FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
 
   if (AEntity.SystemID <> '') and not AEntity.FPrefetched then
   begin
-    Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
-    if not Result then
+    if not ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src) then
     begin
       // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
       ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
+      Src := nil;
       Exit;
     end;
   end
@@ -1799,9 +1612,16 @@ begin
 
   AEntity.FOnStack := True;
   Src.FEntity := AEntity;
+end;
 
-  Initialize(Src);
-  Result := True;
+function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
+var
+  Src: TXMLCharSource;
+begin
+  EntityToSource(AEntity, Src);
+  Result := Assigned(Src);
+  if Result then
+    Initialize(Src);
 end;
 
 function TXMLReader.ContextPop(Forced: Boolean): Boolean;
@@ -1833,10 +1653,8 @@ function TXMLReader.EntityCheck(NoExternals: Boolean): TDOMEntityEx;
 var
   RefName: WideString;
   cnt: Integer;
-  SaveCursor: TDOMNode_WithChildren;
-  SaveState: TXMLReadState;
-  SaveElDef: TDOMElementDef;
-  SaveValue: TWideCharBuf;
+  InnerReader: TXMLReader;
+  Src: TXMLCharSource;
 begin
   Result := nil;
   SetString(RefName, FName.Buffer, FName.Length);
@@ -1865,30 +1683,17 @@ begin
   if not Result.FResolved then
   begin
     // To build children of the entity itself, we must parse it "out of context"
-    SaveCursor := FCursor;
-    SaveElDef := FValidator[FNesting].FElementDef;
-    SaveState := FState;
-    SaveValue := FValue;
-    if ContextPush(Result) then
+    InnerReader := TXMLReader.Create(FCtrl);
     try
-      FCursor := Result;         // build child node tree for the entity
+      EntityToSource(Result, Src);
       Result.SetReadOnly(False);
-      FState := rsRoot;
-      FValidator[FNesting].FElementDef := nil;
-      UpdateConstraints;
-      FSource.DTDSubsetType := dsExternal;  // avoids ContextPop at the end
-      BufAllocate(FValue, 256);
-      ParseContent;
+      if Assigned(Src) then
+        InnerReader.ProcessFragment(Src, Result);
       Result.FResolved := True;
     finally
-      FreeMem(FValue.Buffer);
-      FValue := SaveValue;
+      InnerReader.Free;
+      Result.FOnStack := False;
       Result.SetReadOnly(True);
-      ContextPop(True);
-      FCursor := SaveCursor;
-      FState := SaveState;
-      FValidator[FNesting].FElementDef := SaveElDef;
-      UpdateConstraints;
     end;
   end;
   // at this point we know the charcount of the entity being included
@@ -2060,7 +1865,6 @@ var
   wc: WideChar;
 begin
   Result := False;
-  FValue.Length := 0;
   StoreLocation(FTokenStart);
   repeat
     wc := FSource.SkipUntil(FValue, Delim);
@@ -2083,15 +1887,17 @@ begin
 end;
 
 procedure TXMLReader.ParseComment;    // [15]
+var
+  SaveLength: Integer;
 begin
   ExpectString('--');
-  if SkipUntilSeq([#0, '-'], '-') then
-  begin
-    ExpectChar('>');
-    DoComment(FValue.Buffer, FValue.Length);
-  end
-  else
+  SaveLength := FValue.Length;
+  if not SkipUntilSeq([#0, '-'], '-') then
     FatalError('Unterminated comment', -1);
+  ExpectChar('>');
+
+  DoComment(@FValue.Buffer[SaveLength], FValue.Length-SaveLength);
+  FValue.Length := SaveLength;
 end;
 
 procedure TXMLReader.ParsePI;                    // [16]
@@ -2100,7 +1906,7 @@ var
   PINode: TDOMProcessingInstruction;
 begin
   FSource.NextChar;      // skip '?'
-  Name := ExpectName;
+  CheckName;
   CheckNCName;
   with FName do
     if (Length = 3) and
@@ -2108,7 +1914,7 @@ begin
      ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
      ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
   begin
-    if Name <> 'xml' then
+    if not BufEquals(FName, 'xml') then
       FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
     else
       FatalError('XML declaration is not allowed here', FName.Length);
@@ -2117,35 +1923,40 @@ begin
   if FSource.FBuf^ <> '?' then
     SkipS(True);
 
-  if SkipUntilSeq(GT_Delim, '?') then
-  begin
-    SetString(Value, FValue.Buffer, FValue.Length);
-    // SAX: ContentHandler.ProcessingInstruction(Name, Value);
-    if FCurrContentType = ctEmpty then
-      ValidationError('Processing instructions are not allowed within EMPTY elements', []);
-
-    PINode := Doc.CreateProcessingInstruction(Name, Value);
-    if Assigned(FCursor) then
-      FCursor.AppendChild(PINode)
-    else  // to comply with certain tests, insert PI from DTD before DTD
-      Doc.InsertBefore(PINode, FDocType);
-  end
-  else
+  FValue.Length := 0;
+  if not SkipUntilSeq(GT_Delim, '?') then
     FatalError('Unterminated processing instruction', -1);
+
+  SetString(Name, FName.Buffer, FName.Length);
+  SetString(Value, FValue.Buffer, FValue.Length);
+  // SAX: ContentHandler.ProcessingInstruction(Name, Value);
+  if FCurrContentType = ctEmpty then
+    ValidationError('Processing instructions are not allowed within EMPTY elements', []);
+
+  PINode := Doc.CreateProcessingInstruction(Name, Value);
+  if Assigned(FCursor) then
+    FCursor.AppendChild(PINode)
+  else  // to comply with certain tests, insert PI from DTD before DTD
+    Doc.InsertBefore(PINode, FDocType);
 end;
 
 const
-  verStr: array[Boolean] of WideString = ('1.0', '1.1');
+  vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
 
 procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
 var
   TmpStr: WideString;
-  IsXML11: Boolean;
+  Ver: TXMLVersion;
   Delim: WideChar;
   buf: array[0..31] of WideChar;
   I: Integer;
+  node: TDOMNode;
 begin
   SkipS(True);
+  if TextDecl then
+    node := TDOMNode(FSource.FEntity)
+  else
+    node := doc;
   // [24] VersionInfo: optional in TextDecl, required in XmlDecl
   if (not TextDecl) or (FSource.FBuf^ = 'v') then
   begin
@@ -2164,16 +1975,12 @@ begin
       FatalError('Illegal version number', -1);
 
     ExpectChar(Delim);
-    IsXML11 := buf[2] = '1';
+    Ver := vers[buf[2] = '1'];
 
-    if not TextDecl then
-    begin
-      if doc.InheritsFrom(TXMLDocument) then
-        TXMLDocument(doc).XMLVersion := verStr[IsXML11];  // buf[0..2] works with FPC only
-    end
-    else   // parsing external entity
-      if IsXML11 and not FXML11 then
-        FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
+    if TextDecl and (Ver = xmlVersion11) and not FXML11 then
+      FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
+    if Assigned(node) then  { it is nil for external DTD subset }
+      TDOMTopNodeEx(node).FXMLVersion := Ver;
 
     if TextDecl or (FSource.FBuf^ <> '?') then
       SkipS(True);
@@ -2202,8 +2009,8 @@ begin
       FatalError('Encoding ''%s'' is not supported', [TmpStr], i+1);
     // getting here means that specified encoding is supported
     // TODO: maybe assign the 'preferred' encoding name?
-    if not TextDecl and doc.InheritsFrom(TXMLDocument) then
-      TXMLDocument(doc).Encoding := TmpStr;
+    if Assigned(node) then
+      TDOMTopNodeEx(node).FXMLEncoding := TmpStr;
 
     if FSource.FBuf^ <> '?' then
       SkipS(not TextDecl);
@@ -2226,8 +2033,8 @@ begin
   ExpectString('?>');
   { Switch to 1.1 rules only after declaration is parsed completely. This is to
     ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
-  if (not TextDecl) and IsXML11 then
-    XML11_BuildTables;
+  if Ver = xmlVersion11 then
+    FXML11 := True;
 end;
 
 procedure TXMLReader.DTDReloadHook;
@@ -2343,13 +2150,14 @@ begin
   ValidationError('Standalone constriant violation', [], LineOffs);
 end;
 
-procedure TXMLReader.ParseQuantity(CP: TContentParticle);
+function TXMLReader.ParseQuantity: TCPQuant;
 begin
   case FSource.FBuf^ of
-    '?': CP.CPQuant := cqZeroOrOnce;
-    '*': CP.CPQuant := cqZeroOrMore;
-    '+': CP.CPQuant := cqOnceOrMore;
+    '?': Result := cqZeroOrOnce;
+    '*': Result := cqZeroOrMore;
+    '+': Result := cqOnceOrMore;
   else
+    Result := cqOnce;
     Exit;
   end;
   FSource.NextChar;
@@ -2391,7 +2199,7 @@ begin
     else
       CurrentCP.Def := FindOrCreateElDef;
 
-    ParseQuantity(CurrentCP);
+    CurrentCP.CPQuant := ParseQuantity;
     SkipWhitespace;
     if FSource.FBuf^ = ')' then
       Break;
@@ -2473,7 +2281,7 @@ begin
         if CurrentEntity <> FSource.FEntity then
           BadPENesting;
         FSource.NextChar;
-        ParseQuantity(CP);
+        CP.CPQuant := ParseQuantity;
       end;
     except
       CP.Free;
@@ -2740,9 +2548,7 @@ begin
   IncludeLevel := 0;
   IgnoreLevel := 0;
   repeat
-    FRecognizePE := True;      // PERef between declarations should always be recognized
     SkipWhitespace;
-    FRecognizePE := False;
 
     if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
     begin
@@ -2768,7 +2574,6 @@ begin
         if FSource.DTDSubsetType = dsInternal then
           FatalError('Conditional sections are not allowed in internal subset', 1);
 
-        FRecognizePE := True;
         SkipWhitespace;
 
         CondType := ctUnknown;  // satisfy compiler
@@ -2809,7 +2614,6 @@ begin
       end
       else
       begin
-        FRecognizePE := FSource.DTDSubsetType <> dsInternal;
         FInsideDecl := True;
         if FSource.Matches('ELEMENT') then
           ParseElementDecl
@@ -2823,7 +2627,6 @@ begin
           FatalError('Illegal markup declaration');
 
         SkipWhitespace;
-        FRecognizePE := False;
 
         if CurrentEntity <> FSource.FEntity then
           BadPENesting;
@@ -2832,7 +2635,6 @@ begin
       end;
     end;
   until False;
-  FRecognizePE := False;
   if IncludeLevel > 0 then
     DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
   if (FSource.DTDSubsetType = dsInternal) and (FSource.FBuf^ = ']') then
@@ -2912,12 +2714,16 @@ const
     [#0, '>']
   );
 
+type
+  TXMLToken = (xtNone, xtText, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd);
+
 procedure TXMLReader.ParseContent;
 var
   nonWs: Boolean;
   wc: WideChar;
   ent: TDOMEntityEx;
   InCDATA: Boolean;
+  tok: TXMLToken;
 begin
   InCDATA := False;
   StoreLocation(FTokenStart);
@@ -2931,18 +2737,9 @@ begin
       if FSource.FBufEnd < FSource.FBuf + 2 then
         FSource.Reload;
       if FSource.FBuf^ = '/' then
-      begin
-        DoText(FValue.Buffer, FValue.Length, not nonWs);
-        if FNesting <= FSource.FStartNesting then
-          FatalError('End-tag is not allowed here');
-        Inc(FSource.FBuf);
-        ParseEndTag;
-      end
+        tok := xtEndElement
       else if CheckName([cnOptional]) then
-      begin
-        DoText(FValue.Buffer, FValue.Length, not nonWs);
-        ParseElement;
-      end
+        tok := xtElement
       else if FSource.FBuf^ = '!' then
       begin
         Inc(FSource.FBuf);
@@ -2953,27 +2750,24 @@ begin
             FatalError('Illegal at document level');
           StoreLocation(FTokenStart);
           InCDATA := True;
-          if not FCDSectionsAsText then
-            DoText(FValue.Buffer, FValue.Length, not nonWs)
-          else
+          if FCDSectionsAsText or (FValue.Length = 0) then
             Continue;
+          tok := xtCDSect;
         end
         else if FSource.FBuf^ = '-' then
         begin
-          DoText(FValue.Buffer, FValue.Length, not nonWs);
-          ParseComment;
+          if FIgnoreComments then
+          begin
+            ParseComment;
+            Continue;
+          end;
+          tok := xtComment;
         end
         else
-        begin
-          DoText(FValue.Buffer, FValue.Length, not nonWs);
-          ParseDoctypeDecl;
-        end;
+          tok := xtDoctype;
       end
       else if FSource.FBuf^ = '?' then
-      begin
-        DoText(FValue.Buffer, FValue.Length, not nonWs);
-        ParsePI;
-      end
+        tok := xtPI
       else
         RaiseNameNotFound;
     end
@@ -3000,7 +2794,7 @@ begin
         InCDATA := False;
         if FCDSectionsAsText then
           Continue;
-        DoCDSect(FValue.Buffer, FValue.Length);
+        tok := xtText;
       end
       else
         FatalError('Literal '']]>'' is not allowed in text', 3);
@@ -3021,18 +2815,27 @@ begin
       else
       begin
         ent := EntityCheck;
-        if (ent = nil) or (not FExpandEntities) then
-        begin
-          DoText(FValue.Buffer, FValue.Length, not nonWs);
-          AppendReference(ent);
-        end
-        else
+        if Assigned(ent) and FExpandEntities then
         begin
           ContextPush(ent);
           Continue;
         end;
+        tok := xtEntity;
       end;
     end;
+    // flush text accumulated this far
+    if tok = xtText then
+      DoCDSect(FValue.Buffer, FValue.Length)
+    else
+      DoText(FValue.Buffer, FValue.Length, not nonWs);
+    case tok of
+      xtEntity:     AppendReference(ent);
+      xtElement:    ParseStartTag;
+      xtEndElement: ParseEndTag;
+      xtPI:         ParsePI;
+      xtDoctype:    ParseDoctypeDecl;
+      xtComment:    ParseComment;
+    end;
     StoreLocation(FTokenStart);
     FValue.Length := 0;
     nonWs := False;
@@ -3056,7 +2859,7 @@ begin
 end;
 
 // Element name already in FNameBuffer
-procedure TXMLReader.ParseElement;    // [39] [40] [44]
+procedure TXMLReader.ParseStartTag;    // [39] [40] [44]
 var
   NewElem: TDOMElement;
   ElDef: TDOMElementDef;
@@ -3140,9 +2943,12 @@ end;
 
 procedure TXMLReader.ParseEndTag;     // [42]
 var
-  ErrOffset: Integer;
   ElName: PHashItem;
 begin
+  if FNesting <= FSource.FStartNesting then
+    FatalError('End-tag is not allowed here');
+  Inc(FSource.FBuf);
+
   ElName := FValidator[FNesting].FElement.NSI.QName;
 
   CheckName;
@@ -3150,18 +2956,17 @@ begin
     FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
   if FSource.FBuf^ = '>' then    // this handles majority of cases
   begin
-    ErrOffset := FName.Length+1;
     FSource.NextChar;
+    DoEndElement(FName.Length+1);
   end
   else    // but if closing '>' is preceded by whitespace,
   begin   // skipping it is likely to lose position info.
     StoreLocation(FTokenStart);
     Dec(FTokenStart.LinePos, FName.Length);
-    ErrOffset := -1;
     SkipS;
     ExpectChar('>');
+    DoEndElement(-1);
   end;
-  DoEndElement(ErrOffset);
 end;
 
 procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
@@ -3558,19 +3363,13 @@ procedure TXMLReader.DoCDSect(ch: PWideChar; Count: Integer);
 var
   s: WideString;
 begin
+  Assert(not FCDSectionsAsText, 'Should not be called when CDSectionsAsText=True');
+
   if FCurrContentType = ctChildren then
     ValidationError('CDATA sections are not allowed in element-only content',[]);
 
-  if not FCDSectionsAsText then
-  begin
-    SetString(s, ch, Count);
-    // SAX: LexicalHandler.StartCDATA;
-    // SAX: ContentHandler.Characters(...);
-    FCursor.AppendChild(doc.CreateCDATASection(s));
-    // SAX: LexicalHandler.EndCDATA;
-  end
-  else
-    FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count, False));
+  SetString(s, ch, Count);
+  FCursor.AppendChild(doc.CreateCDATASection(s));
 end;
 
 procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString);
@@ -3863,7 +3662,7 @@ begin
   end;
 end;
 
-procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String);
+procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String);
 var
   Reader: TXMLReader;
   Src: TXMLCharSource;
@@ -3878,7 +3677,7 @@ begin
   end;
 end;
 
-procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
+procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream);
 begin
   ReadXMLFragment(AParentNode, f, 'stream:');
 end;
@@ -3912,7 +3711,7 @@ begin
   end;
 end;
 
-procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
+procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
 var
   Reader: TXMLReader;
   Src: TXMLCharSource;
@@ -3929,7 +3728,7 @@ begin
   end;
 end;
 
-procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
+procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream);
 begin
   ReadDTDFile(ADoc, f, 'stream:');
 end;

+ 216 - 0
packages/fcl-xml/src/xmlutils.pp

@@ -35,7 +35,14 @@ function IsXmlWhiteSpace(c: WideChar): Boolean;
 function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
 { beware, works in ASCII range only }
 function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
+procedure WStrLower(var S: WideString);
 
+type
+  TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
+
+const
+  xmlVersionStr: array[TXMLVersion] of WideString = ('', '1.0', '1.1');
+  
 { a simple hash table with WideString keys }
 
 type
@@ -139,6 +146,27 @@ type
     procedure EndElement;
   end;
 
+{ Buffer builder, used to compose long strings without too much memory allocations }
+
+  PWideCharBuf = ^TWideCharBuf;
+  TWideCharBuf = record
+    Buffer: PWideChar;
+    Length: Integer;
+    MaxLength: Integer;
+  end;
+
+procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
+procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
+procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
+function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
+
+{ Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
+
+function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+
 {$i names.inc}
 
 implementation
@@ -379,6 +407,15 @@ begin
   result := c1 - c2;
 end;
 
+procedure WStrLower(var S: WideString);
+var
+  i: Integer;
+begin
+  for i := 1 to Length(S) do
+    if (S[i] >= 'A') and (S[i] <= 'Z') then
+      Inc(word(S[i]), 32);
+end;
+
 function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
 begin
   Result := InitValue;
@@ -830,6 +867,185 @@ begin
     Dec(FNesting);
 end;
 
+{ Buffer builder utils }
+
+procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
+begin
+  ABuffer.MaxLength := ALength;
+  ABuffer.Length := 0;
+  ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
+end;
+
+procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
+begin
+  if ABuffer.Length >= ABuffer.MaxLength then
+  begin
+    ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
+    FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
+    ABuffer.MaxLength := ABuffer.MaxLength * 2;
+  end;
+  ABuffer.Buffer[ABuffer.Length] := wc;
+  Inc(ABuffer.Length);
+end;
+
+procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
+var
+  Len: Integer;
+begin
+  Len := PEnd - PStart;
+  if Len <= 0 then
+    Exit;
+  if Len >= ABuf.MaxLength - ABuf.Length then
+  begin
+    ABuf.MaxLength := (Len + ABuf.Length)*2;
+    // note: memory clean isn't necessary here.
+    // To avoid garbage, control Length field.
+    ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
+  end;
+  Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
+  Inc(ABuf.Length, Len);
+end;
+
+function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
+begin
+  Result := (ABuf.Length = Length(Arg)) and
+    CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
+end;
+
+{ standard decoders }
+
+function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  cnt: Cardinal;
+begin
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt div sizeof(WideChar) then
+    cnt := InCnt div sizeof(WideChar);
+  Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
+  Dec(InCnt, cnt*sizeof(WideChar));
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
+
+function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  I: Integer;
+  cnt: Cardinal;
+  InPtr: PChar;
+begin
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt div sizeof(WideChar) then
+    cnt := InCnt div sizeof(WideChar);
+  InPtr := InBuf;
+  for I := 0 to cnt-1 do
+  begin
+    OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
+    Inc(InPtr, 2);
+  end;
+  Dec(InCnt, cnt*sizeof(WideChar));
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
+
+function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  I: Integer;
+  cnt: Cardinal;
+begin
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt then
+    cnt := InCnt;
+  for I := 0 to cnt-1 do
+    OutBuf[I] := WideChar(ord(InBuf[I]));
+  Dec(InCnt, cnt);
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
+
+function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+const
+  MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
+var
+  i, j, bc: Cardinal;
+  Value: Cardinal;
+begin
+  result := 0;
+  i := OutCnt;
+  while (i > 0) and (InCnt > 0) do
+  begin
+    bc := 1;
+    Value := ord(InBuf^);
+    if Value < $80 then
+      OutBuf^ := WideChar(Value)
+    else
+    begin
+      if Value < $C2 then
+      begin
+        Result := -1;
+        Break;
+      end;
+      Inc(bc);
+      if Value > $DF then
+      begin
+        Inc(bc);
+        if Value > $EF then
+        begin
+          Inc(bc);
+          if Value > $F7 then  // never encountered in the tests.
+          begin
+            Result := -1;
+            Break;
+          end;
+        end;
+      end;
+      if InCnt < bc then
+        Break;
+      j := 1;
+      while j < bc do
+      begin
+        if InBuf[j] in [#$80..#$BF] then
+          Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
+        else
+        begin
+          Result := -1;
+          Break;
+        end;
+        Inc(j);
+      end;
+      Value := Value and MaxCode[bc];
+      // RFC2279 check
+      if Value <= MaxCode[bc-1] then
+      begin
+        Result := -1;
+        Break;
+      end;
+      case Value of
+        0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
+        $10000..$10FFFF:
+        begin
+          if i < 2 then Break;
+          OutBuf^ := WideChar($D7C0 + (Value shr 10));
+          OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
+          Inc(OutBuf); // once here
+          Dec(i);
+        end
+        else
+        begin
+          Result := -1;
+          Break;
+        end;
+      end;
+    end;
+    Inc(OutBuf);
+    Inc(InBuf, bc);
+    Dec(InCnt, bc);
+    Dec(i);
+  end;
+  if Result >= 0 then
+    Result := OutCnt-i;
+  OutCnt := i;
+end;
+
 
 initialization
 

+ 25 - 42
packages/fcl-xml/src/xmlwrite.pp

@@ -52,6 +52,7 @@ type
 
   TXMLWriter = class(TObject)
   private
+    FStream: TStream;
     FInsideTextNode: Boolean;
     FCanonical: Boolean;
     FIndent: WideString;
@@ -76,7 +77,6 @@ type
     procedure WriteNSDef(B: TBinding);
     procedure NamespaceFixup(Element: TDOMElement);
   protected
-    procedure Write(const Buffer; Count: Longint); virtual; abstract;
     procedure WriteNode(Node: TDOMNode);
     procedure VisitDocument(Node: TDOMNode);
     procedure VisitDocument_Canonical(Node: TDOMNode);
@@ -90,40 +90,30 @@ type
     procedure VisitDocumentType(Node: TDOMNode);
     procedure VisitPI(Node: TDOMNode);
   public
-    constructor Create;
+    constructor Create(AStream: TStream);
     destructor Destroy; override;
   end;
 
-  TTextXMLWriter = Class(TXMLWriter)
+  TTextStream = class(TStream)
   Private
     F : ^Text;
-  Protected
-    Procedure Write(Const Buffer; Count : Longint);override;
   Public
     constructor Create(var AFile: Text);
-  end;
-
-  TStreamXMLWriter = Class(TXMLWriter)
-  Private
-    F : TStream;
-  Protected
-    Procedure Write(Const Buffer; Count : Longint);override;
-  Public
-    constructor Create(AStream: TStream);
+    function Write(Const Buffer; Count: Longint): Longint; override;
   end;
 
 { ---------------------------------------------------------------------
-    TTextXMLWriter
+    TTextStream
   ---------------------------------------------------------------------}
 
 
-constructor TTextXMLWriter.Create(var AFile: Text);
+constructor TTextStream.Create(var AFile: Text);
 begin
   inherited Create;
   f := @AFile;
 end;
 
-procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
+function TTextStream.Write(const Buffer; Count: Longint): Longint;
 var
   s: string;
 begin
@@ -132,26 +122,9 @@ begin
     SetString(s, PChar(@Buffer), Count);
     system.Write(f^, s);
   end;
+  Result := Count;
 end;
 
-{ ---------------------------------------------------------------------
-    TStreamXMLWriter
-  ---------------------------------------------------------------------}
-
-constructor TStreamXMLWriter.Create(AStream: TStream);
-begin
-  inherited Create;
-  F := AStream;
-end;
-
-
-procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
-begin
-  if Count > 0 then
-    F.Write(Buffer, Count);
-end;
-
-
 { ---------------------------------------------------------------------
     TXMLWriter
   ---------------------------------------------------------------------}
@@ -166,11 +139,12 @@ const
   ltStr = '&lt;';
   gtStr = '&gt;';
 
-constructor TXMLWriter.Create;
+constructor TXMLWriter.Create(AStream: TStream);
 var
   I: Integer;
 begin
   inherited Create;
+  FStream := AStream;
   // some overhead - always be able to write at least one extra UCS4
   FBuffer := AllocMem(512+32);
   FBufPos := FBuffer;
@@ -208,7 +182,7 @@ begin
   FScratch.Free;
   FNSHelper.Free;
   if FBufPos > FBuffer then
-    write(FBuffer^, FBufPos-FBuffer);
+    FStream.write(FBuffer^, FBufPos-FBuffer);
 
   FreeMem(FBuffer);
   inherited Destroy;
@@ -226,7 +200,7 @@ begin
   begin
     if pb >= @FBuffer[FCapacity] then
     begin
-      write(FBuffer^, FCapacity);
+      FStream.write(FBuffer^, FCapacity);
       Dec(pb, FCapacity);
       if pb > FBuffer then
         Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
@@ -612,6 +586,8 @@ begin
     wrtChars('/>', 2)
   else
   begin
+    // TODO: presence of zero-length textnodes triggers the indenting logic,
+    // while they should be ignored altogeter.
     SavedInsideTextNode := FInsideTextNode;
     wrtChr('>');
     FInsideTextNode := FCanonical or (Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE]);
@@ -837,18 +813,25 @@ begin
 end;
 
 procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text);
+var
+  s: TStream;
 begin
-  with TTextXMLWriter.Create(AFile) do
+  s := TTextStream.Create(AFile);
   try
-    WriteNode(doc);
+    with TXMLWriter.Create(s) do
+    try
+      WriteNode(doc);
+    finally
+      Free;
+    end;
   finally
-    Free;
+    s.Free;
   end;
 end;
 
 procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream);
 begin
-  with TStreamXMLWriter.Create(AStream) do
+  with TXMLWriter.Create(AStream) do
   try
     WriteNode(doc);
   finally

File diff suppressed because it is too large
+ 295 - 349
packages/fcl-xml/src/xpath.pp


+ 162 - 0
packages/fcl-xml/src/xpathkw.inc

@@ -0,0 +1,162 @@
+{
+    This file is part of the Free Component Library
+
+    A perfect hash for XPath keywords
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+const
+  XPathKeywords: array [TXPathKeyword] of PWideChar = (
+    '',
+    #08'ancestor',
+    #16'ancestor-or-self',
+    #09'attribute',
+    #05'child',
+    #10'descendant',
+    #18'descendant-or-self',
+    #09'following',
+    #17'following-sibling',
+    #09'namespace',
+    #06'parent',
+    #09'preceding',
+    #17'preceding-sibling',
+    #04'self',
+    #07'comment',
+    #04'text',
+    #22'processing-instruction',
+    #04'node',
+    #03'and',
+    #02'or',
+    #03'div',
+    #03'mod',
+    #04'last',
+    #08'position',
+    #05'count',
+    #02'id',
+    #10'local-name',
+    #13'namespace-uri',
+    #04'name',
+    #06'string',
+    #06'concat',
+    #11'starts-with',
+    #08'contains',
+    #16'substring-before',
+    #15'substring-after',
+    #09'substring',
+    #13'string-length',
+    #15'normalize-space',
+    #09'translate',
+    #07'boolean',
+    #03'not',
+    #04'true',
+    #05'false',
+    #04'lang',
+    #06'number',
+    #03'sum',
+    #05'floor',
+    #07'ceiling',
+    #05'round'
+  );
+
+{ The following code is not very maintainable because it was hand-ported from 
+  C code generated by gperf. Unless a tool like gperf is ported or modified to
+  generate Pascal, modifying it will be painful.
+  The good side is that one shouldn't ever need to modify it. }
+
+  MaxHash = 55;
+
+  KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
+    xkNone, xkNone,
+    xkId,
+    xkNone, xkNone, xkNone,
+    xkString,
+    xkSum,
+    xkParent,
+    xkSubstring,
+    xkNone,
+    xkComment,
+    xkName,
+    xkStringLength,
+    xkNumber,
+    xkSubstringAfter,
+    xkSubstringBefore,
+    xkNamespace,
+    xkFloor,
+    xkNormalizeSpace,
+    xkSelf,
+    xkNamespaceUri,
+    xkPreceding,
+    xkOr,
+    xkPosition,
+    xkText,
+    xkProcessingInstruction,
+    xkConcat,
+    xkLast,
+    xkContains,
+    xkPrecedingSibling,
+    xkAncestor,
+    xkFalse,
+    xkLocalName,
+    xkCount,
+    xkLang,
+    xkFollowing,
+    xkDescendant,
+    xkNode,
+    xkAncestorOrSelf,
+    xkBoolean,
+    xkNot,
+    xkStartsWith,
+    xkAnd,
+    xkFollowingSibling,
+    xkDescendantOrSelf,
+    xkChild,
+    xkTrue,
+    xkCeiling,
+    xkMod,
+    xkDiv,
+    xkRound,
+    xkNone,
+    xkAttribute,
+    xkTranslate
+  );
+
+  AssoValues: array[97..122] of Byte = (
+    10, 31,  0, 13, 30, 11, 55, 55, 0, 41,
+    55, 10, 16,  4, 21,  2, 55, 17, 0, 14,
+    34, 29, 34, 55,  7, 55
+  );
+
+function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
+var
+  hash: Integer;
+  p1: PWideChar;
+begin
+  result := xkNone;
+  hash := Len;
+  if Len >= 1 then
+  begin
+    if (p^ >= 'a') and (p^ <= 'y') then
+      Inc(hash, AssoValues[ord(p^)])
+    else
+      Exit;
+    if Len > 2 then
+      if (p[2] >= 'a') and (p[2] <= 'y') then
+        Inc(hash, AssoValues[ord(p[2])+1])
+      else
+        Exit;
+  end;
+  if (hash >= 0) and (hash <= MaxHash) then
+  begin
+    p1 := XPathKeywords[KeywordIndex[hash]];
+    if (ord(p1^) = Len) and
+      CompareMem(p, p1+1, Len*sizeof(WideChar)) then
+        Result := KeywordIndex[hash];
+  end;
+end;

+ 4 - 0
packages/fcl-xml/tests/api.xml

@@ -286,6 +286,10 @@
 <item id="replaceWholeText"/>
 <item id="wholeText"/>
 -->
+<item id="inputEncoding"/>
+<item id="xmlEncoding"/>
+<item id="xmlVersion" type="prop"/>
+
 <!-- XPath -->
 
 <item id="createNSResolver">

+ 25 - 0
packages/fcl-xml/tests/extras.pp

@@ -30,6 +30,7 @@ type
     procedure attr_ownership03;
     procedure attr_ownership04;
     procedure attr_ownership05;
+    procedure replacesamechild;
     procedure nsFixup1;
     procedure nsFixup2;
     procedure nsFixup3;
@@ -135,6 +136,30 @@ begin
   AssertNull('ownerElement_after', attr.ownerElement);
 end;
 
+// verify that replacing a node by itself does not remove it from the tree
+// (specs say this is implementation-dependent, but guess that means either
+//  no-op or raising an exception, not removal).
+procedure TDOMTestExtra.replacesamechild;
+var
+  doc: TDOMDocument;
+  root, el, prev, next: TDOMNode;
+begin
+  LoadStringData(doc, '<root><child1/><child2/><child3/></root>');
+  root := doc.DocumentElement;
+  el := root.ChildNodes[1];
+  prev := el.PreviousSibling;
+  next := el.NextSibling;
+  AssertEquals('prev_name_before', 'child1', prev.NodeName);
+  AssertEquals('next_name_before', 'child3', next.NodeName);
+  root.replaceChild(el, el);
+  prev := el.PreviousSibling;
+  next := el.NextSibling;
+  AssertNotNull('prev_after', prev);
+  AssertNotNull('prev_after', next);  
+  AssertEquals('prev_name_after', 'child1', prev.NodeName);
+  AssertEquals('next_name_after', 'child3', next.NodeName);
+end;
+
 const
   nsURI1 = 'http://www.example.com/ns1';
   nsURI2 = 'http://www.example.com/ns2';

+ 0 - 1
packages/fcl-xml/tests/xmlts.pp

@@ -482,7 +482,6 @@ begin
       end;
 
     if outURI = '' then Exit;
-    TempDoc.DocumentElement.Normalize;
     try
       // reference data must be parsed in non-validating mode because it contains DTDs
       // only when Notations need to be reported

+ 156 - 24
packages/fcl-xml/tests/xpathts.pp

@@ -552,7 +552,16 @@ const
   '<b ns1:attrib2="test"/>'#10+
   '</doc>';
 
-  StringTests: array[0..74] of TTestRec = (             // numbers refer to xalan/string/stringXX
+  ns11='<doc-one xmlns="http://xsl.lotus.com/ns2" xmlns:ns1="http://xsl.lotus.com/ns1">'+
+  '  <ns1:a-two attrib1="Goodbye" xmlns="http://xsl.lotus.com/ns2" xmlns:ns1="http://xsl.lotus.com/ns1">Hello</ns1:a-two>'+
+  '  <b-three ns1:attrib2="Ciao">'+
+  '    <c-four/>'+
+  '  </b-three>'+
+  '</doc-one>';
+
+  pidata='<?a-pi data="foo"?><?b-pi data="bar"?><doc/>';
+
+  StringTests: array[0..87] of TTestRec = (             // numbers refer to xalan/string/stringXX
     (expr: 'string(0)';       rt: rtString; s: '0'),
     (expr: 'string(5)';       rt: rtString; s: '5'),    // #38/39
     (expr: 'string(0.5)';     rt: rtString; s: '0.5'),
@@ -633,6 +642,23 @@ const
     (expr: 'translate("--aaa--","abc-","ABC")'; rt: rtString; s: 'AAA'),
     (expr: 'translate("ddaaadddd","abcd","ABCxy")'; rt: rtString; s: 'xxAAAxxxx'),   // #96
 
+    (data: node08; expr: 'name(a/@attr1)';          rt: rtString; s: 'attr1'),  // namespace08 modified
+    (data: node08; expr: 'namespace-uri(a/@attr1)'; rt: rtString; s: ''),
+    (data: node08; expr: 'local-name(a/@attr1)';    rt: rtString; s: 'attr1'),
+
+    (data: pidata; expr: 'name(/processing-instruction())';          rt: rtString; s: 'a-pi'),       // namespace29 modified
+    (data: pidata; expr: 'name(/processing-instruction("b-pi"))';    rt: rtString; s: 'b-pi'),
+    (data: pidata; expr: 'local-name(/processing-instruction())';    rt: rtString; s: 'a-pi'),
+    (data: pidata; expr: 'namespace-uri(/processing-instruction())'; rt: rtString; s: ''),
+
+    (data: node08; expr: 'name(//comment())';          rt: rtString; s: ''),  // namespace30 modified
+    (data: node08; expr: 'local-name(//comment())';    rt: rtString; s: ''),
+    (data: node08; expr: 'namespace-uri(//comment())'; rt: rtString; s: ''),
+
+    (data: node08; expr: 'name(//text())';          rt: rtString; s: ''),  // namespace31 modified
+    (data: node08; expr: 'local-name(//text())';    rt: rtString; s: ''),
+    (data: node08; expr: 'namespace-uri(//text())'; rt: rtString; s: ''),
+
     // tests for number->string conversions at boundary conditions
     (expr: 'string(123456789012345678)';     rt: rtString; s: '123456789012345680'),    // #132.1
     (expr: 'string(-123456789012345678)';    rt: rtString; s: '-123456789012345680'),   // #132.2
@@ -649,7 +675,7 @@ const
 
   res1 = '<foo xmlns:baz1="http://xsl.lotus.com/ns1" xmlns:baz2="http://xsl.lotus.com/ns2"/>';
 
-  nameTests: array[0..9] of TTestRec3 = (
+  nameTests: array[0..17] of TTestRec3 = (
     (data: str30; re: res1; expr: 'namespace-uri(baz1:a/@baz2:attrib1)'; rt: rtString; s: ''), // #30
     (data: str30; re: res1; expr: 'namespace-uri(baz2:b/@baz1:attrib2)'; rt: rtString; s: 'http://xsl.lotus.com/ns1'), // #31
     (data: str30; re: res1; expr: 'name(*)'; rt: rtString; s: 'ns1:a'),       // #32
@@ -660,7 +686,17 @@ const
 
     (data: str30; re: res1; expr: 'local-name(baz2:b)'; rt: rtString; s: 'b'), // namespace07
     (data: str30; re: res1; expr: 'local-name(baz2:b/@baz1:attrib2)'; rt: rtString; s: 'attrib2'), // namespace09
-    (data: str30; re: res1; expr: 'local-name()'; rt: rtString; s: 'doc')      // namespace26
+    (data: str30; re: res1; expr: 'local-name()'; rt: rtString; s: 'doc'),      // namespace26
+    (data: str30; re: res1; expr: 'namespace-uri()'; rt: rtString; s: 'http://xsl.lotus.com/ns2'), // namespace27
+
+    (data: ns11; re: res1; expr: 'namespace-uri(baz1:a-two)'; rt: rtString; s: 'http://xsl.lotus.com/ns1'), // namespace11
+    (data: ns11; re: res1; expr: 'namespace-uri(baz1:a-two/@attrib1)'; rt: rtString; s: ''),
+    (data: ns11; re: res1; expr: 'namespace-uri(baz2:b-three)'; rt: rtString; s: 'http://xsl.lotus.com/ns2'),
+    (data: ns11; re: res1; expr: 'namespace-uri(baz2:b-three/@baz1:attrib2)'; rt: rtString; s: 'http://xsl.lotus.com/ns1'),
+{*} (data: ns11; re: res1; expr: 'namespace-uri(baz2:b-three/c-four)'; rt: rtString; s: ''),
+    (data: ns11; re: res1; expr: 'namespace-uri(bogus)'; rt: rtString; s: ''),
+
+    (data: str30; re: res1; expr: 'name(baz1:*)'; rt: rtString; s: 'ns1:a')
   );
 
   ax114='<doc>'+
@@ -700,7 +736,7 @@ const
   '</section>'+
   '</chapter>';
 
-  AxesTests: array[0..13] of TTestRec = (
+  AxesTests: array[0..15] of TTestRec = (
     (data: ax117; expr: 'count(//@*)';                        rt: rtNumber; n: 16),
     (data: ax117; expr: 'count(//@title)';                    rt: rtNumber; n: 12),
     (data: ax117; expr: 'count(//section//@*)';               rt: rtNumber; n: 14),
@@ -714,17 +750,60 @@ const
     (data: ax117; expr: 'count(/chapter/section[3]//@*)';     rt: rtNumber; n: 5),
     (data: ax117; expr: 'count(/chapter/section[3]//@title)'; rt: rtNumber; n: 4),
 
-    (data: ax114; expr: '//baz/preceding::foo[1]/@att1';    rt: rtNodeStr; s: 'a'),
-//  (data: ax114; expr: '//baz/(preceding::foo)[1]/@att1';  rt: rtNodeStr; s: 'c'),         // won't parse
-    (data: ax115; expr: '//baz/preceding-sibling::foo[1]/@att1';    rt: rtNodeStr; s: 'a')
-//  (data: ax115; expr: '//baz/(preceding-sibling::foo)[1]/@att1';  rt: rtNodeStr; s: 'c')  // won't parse
+    (data: simple; expr: 'local-name(namespace::*[1])';     rt: rtString; s: 'xml'), // namespace28a
+    (data: simple; expr: 'name(namespace::*[1])';           rt: rtString; s: 'xml'), // namespace28b
+    (data: ax117; expr: 'name(//subsection[@title="A3b"]/@title/parent::*)'; rt: rtString; s: 'subsection'),   // axes96 modified
+    (data: ax117; expr: 'name(//subsection[@title="A3b"]/@title/ancestor::*[1])'; rt: rtString; s: 'subsection')  // axes97 modified
+  );
+
+  AxesTests2: array[0..3] of TTestRec3 = (
+    (data: ax114; re: '//baz'; expr: 'preceding::foo[1]/@att1';    rt: rtNodeStr; s: 'a'),
+    (data: ax114; re: '//baz'; expr: '(preceding::foo)[1]/@att1';  rt: rtNodeStr; s: 'c'),         // won't parse
+    (data: ax115; re: '//baz'; expr: 'preceding-sibling::foo[1]/@att1';    rt: rtNodeStr; s: 'a'),
+    (data: ax115; re: '//baz'; expr: '(preceding-sibling::foo)[1]/@att1';  rt: rtNodeStr; s: 'c')  // won't parse
+  );
+
+  pred44 = '<doc>'+
+  '<element1>'+
+    '<child1>Success</child1>'+
+    '<child2>child2</child2>'+
+  '</element1>'+
+  '<element2>'+
+    '<child1>Wrong node selected!!</child1>'+
+  '</element2>'+
+  '<element3>'+
+    '<child1>Wrong node selected!!</child1>'+
+  '</element3>'+
+  '</doc>';
+
+  pred11 = '<doc>'+
+  '<a>1</a>'+
+  '<a>2'+
+  '<achild>target</achild>'+
+  '</a>'+
+  '<a>3</a>'+
+  '<a>target</a>'+
+  '</doc>';
+
+  PredicateTests: array [0..4] of TTestRec = (
+    (data: pred44; expr: '//child1[parent::element1]'; rt: rtNodeStr; s: 'Success'),  // predicate44
+    {should select all but last elements named 'e' }
+    (data: math96; expr: 'sum(e[true()=following-sibling::*])'; rt: rtNumber; n: 20), // predicate03
+    {should select two first elements}
+    (data: math96; expr: 'sum(e[8=following-sibling::*])'; rt: rtNumber; n: 12),      // predicate05
+    (data: pred11; expr: 'a["target"=descendant::*]'; rt: rtNodeStr; s: '2target'),    // predicate06
+    (data: pred11; expr: 'a[following-sibling::*=descendant::*]'; rt: rtNodeStr; s: '2target')  // predicate11
+
+
+
+
   );
 {$warnings on}
 
 var
   FailCount: Integer = 0;  
 
-procedure CheckResult(const t: TTestRec; r: TXPathVariable);
+procedure CheckResult(const t: TTestRec; r: TXPathVariable); overload;
 begin
   case t.rt of
     rtBool:
@@ -740,12 +819,18 @@ begin
     begin
       if (r is TXPathNumberVariable) then
       begin
-        if IsNan(t.n) and IsNan(r.AsNumber) then
-          Exit;
-        if IsInfinite(t.n) and (t.n = r.AsNumber) then
-          Exit;
-        if SameValue(r.AsNumber, t.n) then
-          Exit;
+        if IsNan(t.n) then
+        begin
+          if IsNan(r.AsNumber) then
+            Exit;
+        end
+        else
+        begin
+          if IsInfinite(t.n) and (t.n = r.AsNumber) then
+            Exit;
+          if not IsNan(TXPathNumberVariable(r).Value) and SameValue(TXPathNumberVariable(r).Value, t.n) then
+            Exit;
+        end;  
       end;
       writeln;
       writeln('Failed: ', t.expr);
@@ -774,6 +859,18 @@ begin
   Inc(FailCount);
 end;
 
+procedure CheckResult(const t: TTestRec3; r: TXPathVariable); overload;
+var
+  temp: TTestRec;
+begin
+  temp.data := t.data;
+  temp.expr := t.expr;
+  temp.rt := t.rt;
+  temp.n := t.n;
+  CheckResult(temp, r);
+end;
+
+
 function ParseString(const data: string): TXMLDocument;
 var
   parser: TDOMParser;
@@ -826,13 +923,12 @@ begin
   end;
 end;
 
-procedure DoSuite3(const tests: array of TTestRec3);
+procedure DoSuite_WithResolver(const tests: array of TTestRec3);
 var
   i: Integer;
   doc: TXMLDocument;
   rslt: TXPathVariable;
   nsdoc: TXMLDocument;
-  temp: TTestRec;
 begin
   for i := 0 to High(tests) do
   begin
@@ -843,11 +939,7 @@ begin
         try
           rslt := EvaluateXPathExpression(tests[i].expr, doc.DocumentElement, nsdoc.DocumentElement);
           try
-            temp.data := tests[i].data;
-            temp.expr := tests[i].expr;
-            temp.rt := tests[i].rt;
-            temp.n := tests[i].n;
-            CheckResult(temp, rslt);
+            CheckResult(tests[i], rslt);
           finally
             rslt.Free;
           end;
@@ -866,6 +958,45 @@ begin
   end;
 end;
 
+procedure DoSuite_WithContext(const tests: array of TTestRec3);
+var
+  i: Integer;
+  doc: TXMLDocument;
+  rslt: TXPathVariable;
+  context: TXPathVariable;
+  ctxNs: TNodeSet;
+begin
+  for i := 0 to High(tests) do
+  begin
+    doc := ParseString(tests[i].data);
+    try
+      context := EvaluateXPathExpression(tests[i].re, doc.DocumentElement);
+      try
+        try
+          ctxNs := context.AsNodeSet;
+          if ctxNs.Count <> 1 then
+            raise Exception.CreateFmt('Context expression "%s" does not evaluate to a single node', [tests[i].re]);
+          rslt := EvaluateXPathExpression(tests[i].expr, TDOMNode(ctxNs[0]));
+          try
+            CheckResult(tests[i], rslt);
+          finally
+            rslt.Free;
+          end;
+        except
+          writeln;
+          writeln('Failed: ', tests[i].expr);
+          SysUtils.ShowException(ExceptObject, ExceptAddr);
+          Inc(FailCount);
+        end;
+      finally
+        context.Free;
+      end;
+    finally
+      doc.Free;
+    end;
+  end;
+end;
+
 begin
   DoSuite(BaseTests);
   DoSuite(CompareTests);
@@ -875,8 +1006,9 @@ begin
   DoSuite(FunctionTests);
   DoSuite(StringTests);
   DoSuite(AxesTests);
-
-  DoSuite3(nameTests);
+  DoSuite_WithContext(AxesTests2);
+  DoSuite_WithResolver(nameTests);
+  DoSuite(PredicateTests);
 
   writeln;
   writeln('Total failed tests: ', FailCount);

Some files were not shown because too many files changed in this diff