Browse Source

* Large patch from Sergei Gorelkin:
xmlutils.pp, names.inc:
* exclude colon from the NameChar bitmap and handle it in code.

dom.pp:
+ TDOMText.IsElementContentWhitespace now implemented completely.
* Attributes created by TDOMElement.SetAttribute get their
OwnerElement property assigned properly
* Attribute replaced by TDOMNamedNodeMap.SetNamedItem get their
OwnerElement reset to nil
* TDOMElement.SetAttributeNode does not destroy the attribute when it
is being replaced by itself
* Most node boolean properties collected into a single FFlags field
to reduce memory requirements.

xmlread.pp:
+ Syntax-level support of namespaces: handle colons in names, check
correct qualified name syntax, prohibit colons in entity/notation/PI
names and ID/IDREF attribute values (all this only happens when
Options.Namespaces is set to True - not by default).
* Reaching end of input while parsing the Ignore Section is a fatal
error because parameter entities are not recognized there.
* Reaching end of input while parsing entity value literal that was
started in a parameter entity aborts immediately instead of hopelessly
scanning the whole document up to its end.
* Fixed parsing duplicate Element declarations. The content models of
subsequent declarations are now discarded as they should - not
appended to the existing model.
* Fixed parsing duplicate Attlist declarations. In addition to dropping
the attribute declaration itself, do not modify the corresponding
element declaration and suppress 'Duplicate ID attribute' and
'Duplicate NOTATION attribute' validation errors.
* Fixed error position in cases when attribute value lacks the closing
quote.
* Some refactoring in order to reduce number of WideString vars and code
size (some SkipX and ExpectX merged into SkipX(required: Boolean)).
* TXMLCharSource.FLocation record replaced by single integer FLineNo
because LinePosition is always calculated.
* TXMLCharSource.FCursor replaced by local var.
* TXMLReader.NameIs changed to a more general BufEquals(), it eliminates
TXMLReader.GetString and some WideString variables.

tests/xmlts.pp:
* Ignored tests do not change suite conformance state.

tests/testgen.pp
* Added a forgotten semicolon.

git-svn-id: trunk@11869 -

michael 17 years ago
parent
commit
213f8a41c7

+ 55 - 26
packages/fcl-xml/src/dom.pp

@@ -191,8 +191,17 @@ type
   accessible via fields using specialized properties of descendant classes,
   accessible via fields using specialized properties of descendant classes,
   e.g. TDOMElement.TagName, TDOMCharacterData.Data etc.}
   e.g. TDOMElement.TagName, TDOMCharacterData.Data etc.}
 
 
+  TNodeFlagEnum = (
+    nfReadonly,
+    nfRecycled,
+    nfIgnorableWS,
+    nfSpecified
+  );
+  TNodeFlags = set of TNodeFlagEnum;
+
   TDOMNode = class
   TDOMNode = class
   protected
   protected
+    FFlags: TNodeFlags;
     FParentNode: TDOMNode;
     FParentNode: TDOMNode;
     FPreviousSibling, FNextSibling: TDOMNode;
     FPreviousSibling, FNextSibling: TDOMNode;
     FOwnerDocument: TDOMDocument;
     FOwnerDocument: TDOMDocument;
@@ -360,7 +369,7 @@ type
     function GetNodeValue: DOMString; override;
     function GetNodeValue: DOMString; override;
     procedure SetNodeValue(const AValue: DOMString); override;
     procedure SetNodeValue(const AValue: DOMString); override;
   public
   public
-    property Data: DOMString read FNodeValue write FNodeValue;
+    property Data: DOMString read FNodeValue write SetNodeValue;
     property Length: LongWord read GetLength;
     property Length: LongWord read GetLength;
     function SubstringData(offset, count: LongWord): DOMString;
     function SubstringData(offset, count: LongWord): DOMString;
     procedure AppendData(const arg: DOMString);
     procedure AppendData(const arg: DOMString);
@@ -430,7 +439,7 @@ type
     function CreateElementBuf(Buf: DOMPChar; Length: Integer): TDOMElement;
     function CreateElementBuf(Buf: DOMPChar; Length: Integer): TDOMElement;
     function CreateDocumentFragment: TDOMDocumentFragment;
     function CreateDocumentFragment: TDOMDocumentFragment;
     function CreateTextNode(const data: DOMString): TDOMText;
     function CreateTextNode(const data: DOMString): TDOMText;
-    function CreateTextNodeBuf(Buf: DOMPChar; Length: Integer): TDOMText;
+    function CreateTextNodeBuf(Buf: DOMPChar; Length: Integer; IgnWS: Boolean): TDOMText;
     function CreateComment(const data: DOMString): TDOMComment;
     function CreateComment(const data: DOMString): TDOMComment;
     function CreateCommentBuf(Buf: DOMPChar; Length: Integer): TDOMComment;
     function CreateCommentBuf(Buf: DOMPChar; Length: Integer): TDOMComment;
     function CreateCDATASection(const data: DOMString): TDOMCDATASection;
     function CreateCDATASection(const data: DOMString): TDOMCDATASection;
@@ -492,18 +501,18 @@ type
   protected
   protected
     FName: DOMString;
     FName: DOMString;
     FOwnerElement: TDOMElement;
     FOwnerElement: TDOMElement;
-    FSpecified: Boolean;
     // TODO: following 2 - replace with a link to AttDecl ??    
     // TODO: following 2 - replace with a link to AttDecl ??    
     FDeclared: Boolean;
     FDeclared: Boolean;
     FDataType: TAttrDataType;
     FDataType: TAttrDataType;
     function  GetNodeValue: DOMString; override;
     function  GetNodeValue: DOMString; override;
     function GetNodeType: Integer; override;
     function GetNodeType: Integer; override;
     function GetNodeName: DOMString; override;
     function GetNodeName: DOMString; override;
+    function GetSpecified: Boolean;
     procedure SetNodeValue(const AValue: DOMString); override;
     procedure SetNodeValue(const AValue: DOMString); override;
   public
   public
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     property Name: DOMString read FName;
     property Name: DOMString read FName;
-    property Specified: Boolean read FSpecified;
+    property Specified: Boolean read GetSpecified;
     property Value: DOMString read GetNodeValue write SetNodeValue;
     property Value: DOMString read GetNodeValue write SetNodeValue;
     property OwnerElement: TDOMElement read FOwnerElement;
     property OwnerElement: TDOMElement read FOwnerElement;
     // extensions
     // extensions
@@ -561,16 +570,13 @@ type
 
 
   TDOMText = class(TDOMCharacterData)
   TDOMText = class(TDOMCharacterData)
   protected
   protected
-    // set by parser if text contains only literal whitespace (i.e. not coming from CharRefs) 
-    FMayBeIgnorable: Boolean;
     function GetNodeType: Integer; override;
     function GetNodeType: Integer; override;
     function GetNodeName: DOMString; override;
     function GetNodeName: DOMString; override;
     procedure SetNodeValue(const aValue: DOMString); override;
     procedure SetNodeValue(const aValue: DOMString); override;
   public
   public
     function  CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     function  CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     function SplitText(offset: LongWord): TDOMText;
     function SplitText(offset: LongWord): TDOMText;
-    // Extension
-    property MayBeIgnorable: Boolean read FMayBeIgnorable write FMayBeIgnorable;
+    function IsElementContentWhitespace: Boolean;
   end;
   end;
 
 
 
 
@@ -696,7 +702,7 @@ type
   public
   public
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     property Target: DOMString read FTarget;
     property Target: DOMString read FTarget;
-    property Data: DOMString read FNodeValue write FNodeValue;
+    property Data: DOMString read FNodeValue write SetNodeValue;
   end;
   end;
 
 
 
 
@@ -937,7 +943,8 @@ begin
         if Assigned(Txt) then
         if Assigned(Txt) then
         begin
         begin
           Txt.AppendData(TDOMText(Child).Data);
           Txt.AppendData(TDOMText(Child).Data);
-          Txt.FMayBeIgnorable := Txt.FMayBeIgnorable and TDOMText(Child).FMayBeIgnorable;
+          // TODO: maybe should be smarter
+          Exclude(Txt.FFlags, nfIgnorableWS);
         end
         end
         else
         else
         begin
         begin
@@ -965,7 +972,7 @@ end;
 
 
 procedure TDOMNode.SetTextContent(const AValue: DOMString);
 procedure TDOMNode.SetTextContent(const AValue: DOMString);
 begin
 begin
-  NodeValue := AValue;
+  SetNodeValue(AValue);
 end;
 end;
 
 
 function TDOMNode.GetNamespaceURI: DOMString;
 function TDOMNode.GetNamespaceURI: DOMString;
@@ -1244,11 +1251,15 @@ begin
   Result := '';
   Result := '';
   child := FFirstChild;
   child := FFirstChild;
   // TODO: probably very slow, optimization needed
   // TODO: probably very slow, optimization needed
-  // TODO: must ignore whitespace nodes
   while Assigned(child) do
   while Assigned(child) do
   begin
   begin
-    if not (child.NodeType in [COMMENT_NODE, PROCESSING_INSTRUCTION_NODE]) then
+    case child.NodeType of
+      TEXT_NODE: if not (nfIgnorableWS in child.FFlags) then
+        Result := Result + TDOMText(child).Data;
+      COMMENT_NODE, PROCESSING_INSTRUCTION_NODE: ; // ignored
+    else
       Result := Result + child.TextContent;
       Result := Result + child.TextContent;
+    end;
     child := child.NextSibling;
     child := child.NextSibling;
   end;
   end;
 end;
 end;
@@ -1489,6 +1500,8 @@ begin
   if Exists then
   if Exists then
   begin
   begin
     Result := TDOMNode(FList.List^[i]);
     Result := TDOMNode(FList.List^[i]);
+    if FNodeType = ATTRIBUTE_NODE then
+      TDOMAttr(Result).FOwnerElement := nil;
     FList.List^[i] := arg;
     FList.List^[i] := arg;
     exit;
     exit;
   end;
   end;
@@ -1826,10 +1839,12 @@ begin
   Result.FNodeValue := data;
   Result.FNodeValue := data;
 end;
 end;
 
 
-function TDOMDocument.CreateTextNodeBuf(Buf: DOMPChar; Length: Integer): TDOMText;
+function TDOMDocument.CreateTextNodeBuf(Buf: DOMPChar; Length: Integer; IgnWS: Boolean): TDOMText;
 begin
 begin
   Result := TDOMText.Create(Self);
   Result := TDOMText.Create(Self);
   SetString(Result.FNodeValue, Buf, Length);
   SetString(Result.FNodeValue, Buf, Length);
+  if IgnWS then
+    Include(Result.FFlags, nfIgnorableWS);
 end;
 end;
 
 
 
 
@@ -1865,13 +1880,14 @@ begin
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute');
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute');
   Result := TDOMAttr.Create(Self);
   Result := TDOMAttr.Create(Self);
   Result.FName := name;
   Result.FName := name;
+  Include(Result.FFlags, nfSpecified);
 end;
 end;
 
 
 function TDOMDocument.CreateAttributeBuf(Buf: DOMPChar; Length: Integer): TDOMAttr;
 function TDOMDocument.CreateAttributeBuf(Buf: DOMPChar; Length: Integer): TDOMAttr;
 begin
 begin
   Result := TDOMAttr.Create(Self);
   Result := TDOMAttr.Create(Self);
   SetString(Result.FName, Buf, Length);
   SetString(Result.FName, Buf, Length);
-  Result.FSpecified := True;
+  Include(Result.FFlags, nfSpecified);
 end;
 end;
 
 
 function TDOMDocument.CreateEntityReference(const name: DOMString):
 function TDOMDocument.CreateEntityReference(const name: DOMString):
@@ -1990,7 +2006,6 @@ function TDOMAttr.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
 begin
 begin
   // Cloned attribute is always specified and carries its children
   // Cloned attribute is always specified and carries its children
   Result := ACloneOwner.CreateAttribute(FName);
   Result := ACloneOwner.CreateAttribute(FName);
-  TDOMAttr(Result).FSpecified := True;
   TDOMAttr(Result).FDataType := FDataType;
   TDOMAttr(Result).FDataType := FDataType;
   // Declared = ?
   // Declared = ?
   CloneChildren(Result, ACloneOwner);
   CloneChildren(Result, ACloneOwner);
@@ -2005,8 +2020,8 @@ end;
 
 
 procedure TDOMAttr.SetNodeValue(const AValue: DOMString);
 procedure TDOMAttr.SetNodeValue(const AValue: DOMString);
 begin
 begin
-  FSpecified := True;
   SetTextContent(AValue);
   SetTextContent(AValue);
+  Include(FFlags, nfSpecified);
 end;
 end;
 
 
 function TDOMAttr.CompareName(const AName: DOMString): Integer;
 function TDOMAttr.CompareName(const AName: DOMString): Integer;
@@ -2014,6 +2029,11 @@ begin
   Result := CompareDOMStrings(DOMPChar(AName), DOMPChar(FName), Length(AName), Length(FName));
   Result := CompareDOMStrings(DOMPChar(AName), DOMPChar(FName), Length(AName), Length(FName));
 end;
 end;
 
 
+function TDOMAttr.GetSpecified: Boolean;
+begin
+  Result := nfSpecified in FFlags;
+end;
+
 // -------------------------------------------------------
 // -------------------------------------------------------
 //   Element
 //   Element
 // -------------------------------------------------------
 // -------------------------------------------------------
@@ -2104,6 +2124,7 @@ begin
   else
   else
   begin
   begin
     Attr := FOwnerDocument.CreateAttribute(name);
     Attr := FOwnerDocument.CreateAttribute(name);
+    Attr.FOwnerElement := Self;
     FAttributes.FList.Insert(I, Attr);
     FAttributes.FList.Insert(I, Attr);
   end;
   end;
   attr.NodeValue := value;
   attr.NodeValue := value;
@@ -2140,7 +2161,6 @@ end;
 
 
 function TDOMElement.GetAttributeNode(const name: DOMString): TDOMAttr;
 function TDOMElement.GetAttributeNode(const name: DOMString): TDOMAttr;
 begin
 begin
-  // DONE: delegated to TNamedNodeMap.GetNamedItem
   if Assigned(FAttributes) then
   if Assigned(FAttributes) then
     Result := FAttributes.GetNamedItem(name) as TDOMAttr
     Result := FAttributes.GetNamedItem(name) as TDOMAttr
   else
   else
@@ -2160,8 +2180,11 @@ begin
   Result := Attributes.SetNamedItem(NewAttr) as TDOMAttr;
   Result := Attributes.SetNamedItem(NewAttr) as TDOMAttr;
 
 
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
-  Result.Free;
-  Result := nil;
+  if Assigned(Result) and (Result <> NewAttr) then
+  begin
+    Result.Free;
+    Result := nil;
+  end;  
 end;
 end;
 
 
 function TDOMElement.SetAttributeNodeNS(NewAttr: TDOMAttr): TDOMAttr;
 function TDOMElement.SetAttributeNodeNS(NewAttr: TDOMAttr): TDOMAttr;
@@ -2169,8 +2192,11 @@ begin
   Result := Attributes.SetNamedItemNS(NewAttr) as TDOMAttr;
   Result := Attributes.SetNamedItemNS(NewAttr) as TDOMAttr;
 
 
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
-  Result.Free;
-  Result := nil;
+  if Assigned(Result) and (Result <> NewAttr) then
+  begin
+    Result.Free;
+    Result := nil;
+  end;  
 end;
 end;
 
 
 
 
@@ -2234,15 +2260,14 @@ end;
 
 
 procedure TDOMText.SetNodeValue(const aValue: DOMString);
 procedure TDOMText.SetNodeValue(const aValue: DOMString);
 begin
 begin
+  inherited SetNodeValue(aValue);
   // TODO: may analyze aValue, but this will slow things down...
   // TODO: may analyze aValue, but this will slow things down...
-  FMayBeIgnorable := False;
-  FNodeValue := aValue;
+  Exclude(FFlags, nfIgnorableWS);
 end;
 end;
 
 
 function TDOMText.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
 function TDOMText.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
 begin
 begin
   Result := ACloneOwner.CreateTextNode(FNodeValue);
   Result := ACloneOwner.CreateTextNode(FNodeValue);
-  TDOMText(Result).FMayBeIgnorable := FMayBeIgnorable;
 end;
 end;
 
 
 function TDOMText.SplitText(offset: LongWord): TDOMText;
 function TDOMText.SplitText(offset: LongWord): TDOMText;
@@ -2252,12 +2277,16 @@ begin
 
 
   Result := TDOMText.Create(FOwnerDocument);
   Result := TDOMText.Create(FOwnerDocument);
   Result.FNodeValue := Copy(FNodeValue, offset + 1, Length);
   Result.FNodeValue := Copy(FNodeValue, offset + 1, Length);
-  Result.FMayBeIgnorable := FMayBeIgnorable;
+  Result.FFlags := FFlags * [nfIgnorableWS];
   FNodeValue := Copy(FNodeValue, 1, offset);
   FNodeValue := Copy(FNodeValue, 1, offset);
   if Assigned(FParentNode) then
   if Assigned(FParentNode) then
     FParentNode.InsertBefore(Result, FNextSibling);
     FParentNode.InsertBefore(Result, FNextSibling);
 end;
 end;
 
 
+function TDOMText.IsElementContentWhitespace: Boolean;
+begin
+  Result := nfIgnorableWS in FFlags;
+end;
 
 
 // -------------------------------------------------------
 // -------------------------------------------------------
 //   Comment
 //   Comment

+ 2 - 1
packages/fcl-xml/src/names.inc

@@ -16,7 +16,8 @@ type
   TSetOfByte = set of Byte;
   TSetOfByte = set of Byte;
 
 
 const
 const
-  ns_ASCII = [$3A, $41..$5A, $5F, $61..$7A, $C0..$D6, $D8..$F6, $F8..$FF];
+// colon ($3a) is excluded, it is handled in the code
+  ns_ASCII = [{ $3A,} $41..$5A, $5F, $61..$7A, $C0..$D6, $D8..$F6, $F8..$FF];
   ns_0200  = [0..$17, $50..$A8, $BB..$C1];
   ns_0200  = [0..$17, $50..$A8, $BB..$C1];
   ns_0300  = [$86, $88..$8A, $8C, $8E..$A1,
   ns_0300  = [$86, $88..$8A, $8C, $8E..$A1,
               $A3..$CE, $D0..$D6, $DA, $DC,
               $A3..$CE, $D0..$D6, $DA, $DC,

File diff suppressed because it is too large
+ 348 - 311
packages/fcl-xml/src/xmlread.pp


+ 5 - 0
packages/fcl-xml/src/xmlutils.pp

@@ -106,12 +106,14 @@ begin
 
 
   I := 0;
   I := 0;
   if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
   if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
+    (Value[I] = ':') or
     (Xml11 and IsXml11Char(Value, I))) then
     (Xml11 and IsXml11Char(Value, I))) then
       Exit;
       Exit;
   Inc(I);
   Inc(I);
   while I < Len do
   while I < Len do
   begin
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
       (Xml11 and IsXml11Char(Value, I))) then
         Exit;
         Exit;
     Inc(I);
     Inc(I);
@@ -137,6 +139,7 @@ begin
   while I <= Length(Value) do
   while I <= Length(Value) do
   begin
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
       (Xml11 and IsXml11Char(Value, I))) then
     begin
     begin
       if (I = Length(Value)) or (Value[I] <> #32) then
       if (I = Length(Value)) or (Value[I] <> #32) then
@@ -167,6 +170,7 @@ begin
   while I <= Length(Value) do
   while I <= Length(Value) do
   begin
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
       (Xml11 and IsXml11Char(Value, I))) then
         Exit;
         Exit;
     Inc(I);
     Inc(I);
@@ -190,6 +194,7 @@ begin
   while I <= Length(Value) do
   while I <= Length(Value) do
   begin
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
       (Xml11 and IsXml11Char(Value, I))) then
     begin
     begin
       if (I = Length(Value)) or (Value[I] <> #32) then
       if (I = Length(Value)) or (Value[I] <> #32) then

+ 1 - 1
packages/fcl-xml/tests/testgen.pp

@@ -377,7 +377,7 @@ begin
   else if s = 'decrement' then
   else if s = 'decrement' then
     rslt.Add(indent + 'Dec(' + node['var'] + ', ' + node['value'] + ');')
     rslt.Add(indent + 'Dec(' + node['var'] + ', ' + node['value'] + ');')
   else if s = 'plus' then
   else if s = 'plus' then
-    rslt.Add(indent + node['var'] + ' := ' + ReplaceQuotes(node['op1']) + ' + ' + ReplaceQuotes(node['op2']))
+    rslt.Add(indent + node['var'] + ' := ' + ReplaceQuotes(node['op1']) + ' + ' + ReplaceQuotes(node['op2']) + ';')
 
 
   else if s = 'fail' then
   else if s = 'fail' then
     rslt.Add(indent + s + '(''' + node['id'] + ''');')
     rslt.Add(indent + s + '(''' + node['id'] + ''');')

+ 1 - 1
packages/fcl-xml/tests/xmlts.pp

@@ -333,7 +333,7 @@ begin
 
 
   if FPassed = 0 then
   if FPassed = 0 then
     FState := 'N/A'
     FState := 'N/A'
-  else if FPassed = FTotal then
+  else if FPassed = FTotal - FSkipped then
     FState := 'CONFORMS (provisionally)'
     FState := 'CONFORMS (provisionally)'
   else
   else
     FState := 'DOES NOT CONFORM';
     FState := 'DOES NOT CONFORM';

Some files were not shown because too many files changed in this diff