瀏覽代碼

* Large patch from Sergei Gorelkin:
xmlutils.pp, names.inc:
* exclude colon from the NameChar bitmap and handle it in code.

dom.pp:
+ TDOMText.IsElementContentWhitespace now implemented completely.
* Attributes created by TDOMElement.SetAttribute get their
OwnerElement property assigned properly
* Attribute replaced by TDOMNamedNodeMap.SetNamedItem get their
OwnerElement reset to nil
* TDOMElement.SetAttributeNode does not destroy the attribute when it
is being replaced by itself
* Most node boolean properties collected into a single FFlags field
to reduce memory requirements.

xmlread.pp:
+ Syntax-level support of namespaces: handle colons in names, check
correct qualified name syntax, prohibit colons in entity/notation/PI
names and ID/IDREF attribute values (all this only happens when
Options.Namespaces is set to True - not by default).
* Reaching end of input while parsing the Ignore Section is a fatal
error because parameter entities are not recognized there.
* Reaching end of input while parsing entity value literal that was
started in a parameter entity aborts immediately instead of hopelessly
scanning the whole document up to its end.
* Fixed parsing duplicate Element declarations. The content models of
subsequent declarations are now discarded as they should - not
appended to the existing model.
* Fixed parsing duplicate Attlist declarations. In addition to dropping
the attribute declaration itself, do not modify the corresponding
element declaration and suppress 'Duplicate ID attribute' and
'Duplicate NOTATION attribute' validation errors.
* Fixed error position in cases when attribute value lacks the closing
quote.
* Some refactoring in order to reduce number of WideString vars and code
size (some SkipX and ExpectX merged into SkipX(required: Boolean)).
* TXMLCharSource.FLocation record replaced by single integer FLineNo
because LinePosition is always calculated.
* TXMLCharSource.FCursor replaced by local var.
* TXMLReader.NameIs changed to a more general BufEquals(), it eliminates
TXMLReader.GetString and some WideString variables.

tests/xmlts.pp:
* Ignored tests do not change suite conformance state.

tests/testgen.pp
* Added a forgotten semicolon.

git-svn-id: trunk@11869 -

michael 17 年之前
父節點
當前提交
213f8a41c7

+ 55 - 26
packages/fcl-xml/src/dom.pp

@@ -191,8 +191,17 @@ type
   accessible via fields using specialized properties of descendant classes,
   e.g. TDOMElement.TagName, TDOMCharacterData.Data etc.}
 
+  TNodeFlagEnum = (
+    nfReadonly,
+    nfRecycled,
+    nfIgnorableWS,
+    nfSpecified
+  );
+  TNodeFlags = set of TNodeFlagEnum;
+
   TDOMNode = class
   protected
+    FFlags: TNodeFlags;
     FParentNode: TDOMNode;
     FPreviousSibling, FNextSibling: TDOMNode;
     FOwnerDocument: TDOMDocument;
@@ -360,7 +369,7 @@ type
     function GetNodeValue: DOMString; override;
     procedure SetNodeValue(const AValue: DOMString); override;
   public
-    property Data: DOMString read FNodeValue write FNodeValue;
+    property Data: DOMString read FNodeValue write SetNodeValue;
     property Length: LongWord read GetLength;
     function SubstringData(offset, count: LongWord): DOMString;
     procedure AppendData(const arg: DOMString);
@@ -430,7 +439,7 @@ type
     function CreateElementBuf(Buf: DOMPChar; Length: Integer): TDOMElement;
     function CreateDocumentFragment: TDOMDocumentFragment;
     function CreateTextNode(const data: DOMString): TDOMText;
-    function CreateTextNodeBuf(Buf: DOMPChar; Length: Integer): TDOMText;
+    function CreateTextNodeBuf(Buf: DOMPChar; Length: Integer; IgnWS: Boolean): TDOMText;
     function CreateComment(const data: DOMString): TDOMComment;
     function CreateCommentBuf(Buf: DOMPChar; Length: Integer): TDOMComment;
     function CreateCDATASection(const data: DOMString): TDOMCDATASection;
@@ -492,18 +501,18 @@ type
   protected
     FName: DOMString;
     FOwnerElement: TDOMElement;
-    FSpecified: Boolean;
     // TODO: following 2 - replace with a link to AttDecl ??    
     FDeclared: Boolean;
     FDataType: TAttrDataType;
     function  GetNodeValue: DOMString; override;
     function GetNodeType: Integer; override;
     function GetNodeName: DOMString; override;
+    function GetSpecified: Boolean;
     procedure SetNodeValue(const AValue: DOMString); override;
   public
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     property Name: DOMString read FName;
-    property Specified: Boolean read FSpecified;
+    property Specified: Boolean read GetSpecified;
     property Value: DOMString read GetNodeValue write SetNodeValue;
     property OwnerElement: TDOMElement read FOwnerElement;
     // extensions
@@ -561,16 +570,13 @@ type
 
   TDOMText = class(TDOMCharacterData)
   protected
-    // set by parser if text contains only literal whitespace (i.e. not coming from CharRefs) 
-    FMayBeIgnorable: Boolean;
     function GetNodeType: Integer; override;
     function GetNodeName: DOMString; override;
     procedure SetNodeValue(const aValue: DOMString); override;
   public
     function  CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     function SplitText(offset: LongWord): TDOMText;
-    // Extension
-    property MayBeIgnorable: Boolean read FMayBeIgnorable write FMayBeIgnorable;
+    function IsElementContentWhitespace: Boolean;
   end;
 
 
@@ -696,7 +702,7 @@ type
   public
     function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
     property Target: DOMString read FTarget;
-    property Data: DOMString read FNodeValue write FNodeValue;
+    property Data: DOMString read FNodeValue write SetNodeValue;
   end;
 
 
@@ -937,7 +943,8 @@ begin
         if Assigned(Txt) then
         begin
           Txt.AppendData(TDOMText(Child).Data);
-          Txt.FMayBeIgnorable := Txt.FMayBeIgnorable and TDOMText(Child).FMayBeIgnorable;
+          // TODO: maybe should be smarter
+          Exclude(Txt.FFlags, nfIgnorableWS);
         end
         else
         begin
@@ -965,7 +972,7 @@ end;
 
 procedure TDOMNode.SetTextContent(const AValue: DOMString);
 begin
-  NodeValue := AValue;
+  SetNodeValue(AValue);
 end;
 
 function TDOMNode.GetNamespaceURI: DOMString;
@@ -1244,11 +1251,15 @@ begin
   Result := '';
   child := FFirstChild;
   // TODO: probably very slow, optimization needed
-  // TODO: must ignore whitespace nodes
   while Assigned(child) do
   begin
-    if not (child.NodeType in [COMMENT_NODE, PROCESSING_INSTRUCTION_NODE]) then
+    case child.NodeType of
+      TEXT_NODE: if not (nfIgnorableWS in child.FFlags) then
+        Result := Result + TDOMText(child).Data;
+      COMMENT_NODE, PROCESSING_INSTRUCTION_NODE: ; // ignored
+    else
       Result := Result + child.TextContent;
+    end;
     child := child.NextSibling;
   end;
 end;
@@ -1489,6 +1500,8 @@ begin
   if Exists then
   begin
     Result := TDOMNode(FList.List^[i]);
+    if FNodeType = ATTRIBUTE_NODE then
+      TDOMAttr(Result).FOwnerElement := nil;
     FList.List^[i] := arg;
     exit;
   end;
@@ -1826,10 +1839,12 @@ begin
   Result.FNodeValue := data;
 end;
 
-function TDOMDocument.CreateTextNodeBuf(Buf: DOMPChar; Length: Integer): TDOMText;
+function TDOMDocument.CreateTextNodeBuf(Buf: DOMPChar; Length: Integer; IgnWS: Boolean): TDOMText;
 begin
   Result := TDOMText.Create(Self);
   SetString(Result.FNodeValue, Buf, Length);
+  if IgnWS then
+    Include(Result.FFlags, nfIgnorableWS);
 end;
 
 
@@ -1865,13 +1880,14 @@ begin
     raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute');
   Result := TDOMAttr.Create(Self);
   Result.FName := name;
+  Include(Result.FFlags, nfSpecified);
 end;
 
 function TDOMDocument.CreateAttributeBuf(Buf: DOMPChar; Length: Integer): TDOMAttr;
 begin
   Result := TDOMAttr.Create(Self);
   SetString(Result.FName, Buf, Length);
-  Result.FSpecified := True;
+  Include(Result.FFlags, nfSpecified);
 end;
 
 function TDOMDocument.CreateEntityReference(const name: DOMString):
@@ -1990,7 +2006,6 @@ function TDOMAttr.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
 begin
   // Cloned attribute is always specified and carries its children
   Result := ACloneOwner.CreateAttribute(FName);
-  TDOMAttr(Result).FSpecified := True;
   TDOMAttr(Result).FDataType := FDataType;
   // Declared = ?
   CloneChildren(Result, ACloneOwner);
@@ -2005,8 +2020,8 @@ end;
 
 procedure TDOMAttr.SetNodeValue(const AValue: DOMString);
 begin
-  FSpecified := True;
   SetTextContent(AValue);
+  Include(FFlags, nfSpecified);
 end;
 
 function TDOMAttr.CompareName(const AName: DOMString): Integer;
@@ -2014,6 +2029,11 @@ begin
   Result := CompareDOMStrings(DOMPChar(AName), DOMPChar(FName), Length(AName), Length(FName));
 end;
 
+function TDOMAttr.GetSpecified: Boolean;
+begin
+  Result := nfSpecified in FFlags;
+end;
+
 // -------------------------------------------------------
 //   Element
 // -------------------------------------------------------
@@ -2104,6 +2124,7 @@ begin
   else
   begin
     Attr := FOwnerDocument.CreateAttribute(name);
+    Attr.FOwnerElement := Self;
     FAttributes.FList.Insert(I, Attr);
   end;
   attr.NodeValue := value;
@@ -2140,7 +2161,6 @@ end;
 
 function TDOMElement.GetAttributeNode(const name: DOMString): TDOMAttr;
 begin
-  // DONE: delegated to TNamedNodeMap.GetNamedItem
   if Assigned(FAttributes) then
     Result := FAttributes.GetNamedItem(name) as TDOMAttr
   else
@@ -2160,8 +2180,11 @@ begin
   Result := Attributes.SetNamedItem(NewAttr) as TDOMAttr;
 
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
-  Result.Free;
-  Result := nil;
+  if Assigned(Result) and (Result <> NewAttr) then
+  begin
+    Result.Free;
+    Result := nil;
+  end;  
 end;
 
 function TDOMElement.SetAttributeNodeNS(NewAttr: TDOMAttr): TDOMAttr;
@@ -2169,8 +2192,11 @@ begin
   Result := Attributes.SetNamedItemNS(NewAttr) as TDOMAttr;
 
   // TODO -cConformance: here goes inconsistency with DOM 2 - same as in TDOMNode.RemoveChild
-  Result.Free;
-  Result := nil;
+  if Assigned(Result) and (Result <> NewAttr) then
+  begin
+    Result.Free;
+    Result := nil;
+  end;  
 end;
 
 
@@ -2234,15 +2260,14 @@ end;
 
 procedure TDOMText.SetNodeValue(const aValue: DOMString);
 begin
+  inherited SetNodeValue(aValue);
   // TODO: may analyze aValue, but this will slow things down...
-  FMayBeIgnorable := False;
-  FNodeValue := aValue;
+  Exclude(FFlags, nfIgnorableWS);
 end;
 
 function TDOMText.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
 begin
   Result := ACloneOwner.CreateTextNode(FNodeValue);
-  TDOMText(Result).FMayBeIgnorable := FMayBeIgnorable;
 end;
 
 function TDOMText.SplitText(offset: LongWord): TDOMText;
@@ -2252,12 +2277,16 @@ begin
 
   Result := TDOMText.Create(FOwnerDocument);
   Result.FNodeValue := Copy(FNodeValue, offset + 1, Length);
-  Result.FMayBeIgnorable := FMayBeIgnorable;
+  Result.FFlags := FFlags * [nfIgnorableWS];
   FNodeValue := Copy(FNodeValue, 1, offset);
   if Assigned(FParentNode) then
     FParentNode.InsertBefore(Result, FNextSibling);
 end;
 
+function TDOMText.IsElementContentWhitespace: Boolean;
+begin
+  Result := nfIgnorableWS in FFlags;
+end;
 
 // -------------------------------------------------------
 //   Comment

+ 2 - 1
packages/fcl-xml/src/names.inc

@@ -16,7 +16,8 @@ type
   TSetOfByte = set of Byte;
 
 const
-  ns_ASCII = [$3A, $41..$5A, $5F, $61..$7A, $C0..$D6, $D8..$F6, $F8..$FF];
+// colon ($3a) is excluded, it is handled in the code
+  ns_ASCII = [{ $3A,} $41..$5A, $5F, $61..$7A, $C0..$D6, $D8..$F6, $F8..$FF];
   ns_0200  = [0..$17, $50..$A8, $BB..$C1];
   ns_0300  = [$86, $88..$8A, $8C, $8E..$A1,
               $A3..$CE, $D0..$D6, $DA, $DC,

File diff suppressed because it is too large
+ 348 - 311
packages/fcl-xml/src/xmlread.pp


+ 5 - 0
packages/fcl-xml/src/xmlutils.pp

@@ -106,12 +106,14 @@ begin
 
   I := 0;
   if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
+    (Value[I] = ':') or
     (Xml11 and IsXml11Char(Value, I))) then
       Exit;
   Inc(I);
   while I < Len do
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
         Exit;
     Inc(I);
@@ -137,6 +139,7 @@ begin
   while I <= Length(Value) do
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
     begin
       if (I = Length(Value)) or (Value[I] <> #32) then
@@ -167,6 +170,7 @@ begin
   while I <= Length(Value) do
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
         Exit;
     Inc(I);
@@ -190,6 +194,7 @@ begin
   while I <= Length(Value) do
   begin
     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+      (Value[I] = ':') or
       (Xml11 and IsXml11Char(Value, I))) then
     begin
       if (I = Length(Value)) or (Value[I] <> #32) then

+ 1 - 1
packages/fcl-xml/tests/testgen.pp

@@ -377,7 +377,7 @@ begin
   else if s = 'decrement' then
     rslt.Add(indent + 'Dec(' + node['var'] + ', ' + node['value'] + ');')
   else if s = 'plus' then
-    rslt.Add(indent + node['var'] + ' := ' + ReplaceQuotes(node['op1']) + ' + ' + ReplaceQuotes(node['op2']))
+    rslt.Add(indent + node['var'] + ' := ' + ReplaceQuotes(node['op1']) + ' + ' + ReplaceQuotes(node['op2']) + ';')
 
   else if s = 'fail' then
     rslt.Add(indent + s + '(''' + node['id'] + ''');')

+ 1 - 1
packages/fcl-xml/tests/xmlts.pp

@@ -333,7 +333,7 @@ begin
 
   if FPassed = 0 then
     FState := 'N/A'
-  else if FPassed = FTotal then
+  else if FPassed = FTotal - FSkipped then
     FState := 'CONFORMS (provisionally)'
   else
     FState := 'DOES NOT CONFORM';

Some files were not shown because too many files changed in this diff