Browse Source

* Refactored the main parsing loop, it is no longer recursive with respect to elements (still
recursive with respect to entities, however). This enables more useful backtrace in case of
parsing errors, and makes more fun at profiling.

git-svn-id: trunk@13275 -

sergei 16 years ago
parent
commit
64966ccae1
1 changed files with 61 additions and 54 deletions
  1. 61 54
      packages/fcl-xml/src/xmlread.pp

+ 61 - 54
packages/fcl-xml/src/xmlread.pp

@@ -279,6 +279,7 @@ type
   end;
   end;
 
 
   TElementValidator = object
   TElementValidator = object
+    FElement: TDOMElement;
     FElementDef: TDOMElementDef;
     FElementDef: TDOMElementDef;
     FCurCP: TContentParticle;
     FCurCP: TContentParticle;
     FFailed: Boolean;
     FFailed: Boolean;
@@ -359,7 +360,6 @@ type
     procedure GetChar;
     procedure GetChar;
     procedure Initialize(ASource: TXMLCharSource);
     procedure Initialize(ASource: TXMLCharSource);
     function DoParseAttValue(Delim: WideChar): Boolean;
     function DoParseAttValue(Delim: WideChar): Boolean;
-    procedure DoParseFragment;
     function ContextPush(AEntity: TDOMEntityEx): Boolean;
     function ContextPush(AEntity: TDOMEntityEx): Boolean;
     function ContextPop: Boolean;
     function ContextPop: Boolean;
     procedure XML11_BuildTables;
     procedure XML11_BuildTables;
@@ -407,6 +407,8 @@ type
     procedure ParseDoctypeDecl;                                         // [28]
     procedure ParseDoctypeDecl;                                         // [28]
     procedure ParseMarkupDecl;                                          // [29]
     procedure ParseMarkupDecl;                                          // [29]
     procedure ParseElement;                                             // [39]
     procedure ParseElement;                                             // [39]
+    procedure ParseEndTag;                                              // [42]
+    procedure DoEndElement(ErrOffset: Integer);
     procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
     procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
     procedure ParseContent;                                             // [43]
     procedure ParseContent;                                             // [43]
     function  ResolvePredefined: Boolean;
     function  ResolvePredefined: Boolean;
@@ -429,7 +431,7 @@ type
     procedure AddBinding(Attr: TDOMAttr; Prefix: PHashItem; var Chain: TBinding);
     procedure AddBinding(Attr: TDOMAttr; Prefix: PHashItem; var Chain: TBinding);
     procedure EndNamespaceScope(var Chain: TBinding);
     procedure EndNamespaceScope(var Chain: TBinding);
 
 
-    procedure PushVC(aElDef: TDOMElementDef);
+    procedure PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
     procedure PopVC;
     procedure PopVC;
     procedure UpdateConstraints;
     procedure UpdateConstraints;
     procedure ValidateDTD;
     procedure ValidateDTD;
@@ -1446,7 +1448,7 @@ begin
   FState := rsProlog;
   FState := rsProlog;
   FNesting := 0;
   FNesting := 0;
   Initialize(ASource);
   Initialize(ASource);
-  DoParseFragment;
+  ParseContent;
 
 
   if FState < rsRoot then
   if FState < rsRoot then
     FatalError('Root element is missing');
     FatalError('Root element is missing');
@@ -1462,7 +1464,7 @@ begin
   FState := rsRoot;
   FState := rsRoot;
   Initialize(ASource);
   Initialize(ASource);
   FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
   FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
-  DoParseFragment;
+  ParseContent;
 end;
 end;
 
 
 function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
 function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
@@ -1693,15 +1695,6 @@ begin
   Result := wc <> #0;
   Result := wc <> #0;
 end;
 end;
 
 
-procedure TXMLReader.DoParseFragment;
-begin
-  // SAX: ContentHandler.StartDocument() - here?
-  ParseContent;
-  if FSource.FBuf^ <> #0 then
-    FatalError('End-tag is not allowed here');
-  // SAX: ContentHandler.EndDocument() - here? or somewhere in destructor?  
-end;
-
 function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
 function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
 var
 var
   Src: TXMLCharSource;
   Src: TXMLCharSource;
@@ -1798,7 +1791,7 @@ begin
         if InAttr then
         if InAttr then
           DoParseAttValue(#0)
           DoParseAttValue(#0)
         else
         else
-          DoParseFragment;
+          ParseContent;
         AEntity.FResolved := True;
         AEntity.FResolved := True;
       finally
       finally
         AEntity.SetReadOnly(True);
         AEntity.SetReadOnly(True);
@@ -2769,16 +2762,24 @@ procedure TXMLReader.ParseContent;
 var
 var
   nonWs: Boolean;
   nonWs: Boolean;
   wc: WideChar;
   wc: WideChar;
+  StartNesting: Integer;
 begin
 begin
+  StartNesting := FNesting;
   with FSource do
   with FSource do
   repeat
   repeat
     if FBuf^ = '<' then
     if FBuf^ = '<' then
     begin
     begin
-      if FBufEnd < FBuf + 3 then
-        Reload;
       Inc(FBuf);
       Inc(FBuf);
-      if FBuf^ = '/' then Break;     // end tag case is as frequent as start tag
-      if CheckName([cnOptional]) then
+      if FBufEnd < FBuf + 2 then
+        Reload;
+      if FBuf^ = '/' then
+      begin
+        if FNesting <= StartNesting then
+          FatalError('End-tag is not allowed here');
+        Inc(FBuf);
+        ParseEndTag;
+      end
+      else if CheckName([cnOptional]) then
         ParseElement
         ParseElement
       else if FBuf^ = '!' then
       else if FBuf^ = '!' then
       begin
       begin
@@ -2847,6 +2848,8 @@ begin
         FatalError('Illegal at document level', -1);
         FatalError('Illegal at document level', -1);
     end;
     end;
   until FBuf^ = #0;
   until FBuf^ = #0;
+  if FNesting > StartNesting then
+    FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
 end;
 end;
 
 
 procedure TXMLCharSource.NextChar;
 procedure TXMLCharSource.NextChar;
@@ -2870,7 +2873,6 @@ var
   NewElem: TDOMElement;
   NewElem: TDOMElement;
   ElDef: TDOMElementDef;
   ElDef: TDOMElementDef;
   IsEmpty: Boolean;
   IsEmpty: Boolean;
-  ErrOffset: Integer;
   ElName: PHashItem;
   ElName: PHashItem;
 begin
 begin
   if FState > rsRoot then
   if FState > rsRoot then
@@ -2917,59 +2919,63 @@ begin
 
 
   if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
   if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
     ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
     ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
-  PushVC(ElDef);  // this increases FNesting
+  PushVC(NewElem, ElDef);  // this increases FNesting
   if FNamespaces then
   if FNamespaces then
     ProcessNamespaceAtts(NewElem);
     ProcessNamespaceAtts(NewElem);
 
 
-  // SAX: ContentHandler.StartElement(...)
-  // SAX: ContentHandler.StartPrefixMapping(...)
-
-  ErrOffset := 0;
   if not IsEmpty then
   if not IsEmpty then
   begin
   begin
     FCursor := NewElem;
     FCursor := NewElem;
     if not FPreserveWhitespace then   // critical for testsuite compliance
     if not FPreserveWhitespace then   // critical for testsuite compliance
       SkipS;
       SkipS;
-    ParseContent;
-    if FSource.FBuf^ = '/' then         // Get ETag [42]
-    begin
-      FSource.NextChar;
-      CheckName;
-      if not BufEquals(FName, ElName^.Key) then
-        FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
-      if FSource.FBuf^ = '>' then    // this handles majority of cases
-      begin
-        ErrOffset := FName.Length+1;
-        FSource.NextChar;
-      end
-      else    // but if closing '>' is preceded by whitespace,
-      begin   // skipping it is likely to lose position info.
-        StoreLocation(FTokenStart);
-        Dec(FTokenStart.LinePos, FName.Length);
-        ErrOffset := -1;
-        SkipS;
-        ExpectChar('>');
-      end;
-    end
-    else if FSource.FBuf^ <> #0 then
-      RaiseNameNotFound
-    else // End of stream in content
-      FatalError('End-tag is missing for ''%s''', [ElName^.Key]);
-  end;
-  // SAX: ContentHandler.EndElement(...)
-  // SAX: ContentHandler.EndPrefixMapping(...)
+  end
+  else
+    DoEndElement(0);
+end;
+
+procedure TXMLReader.DoEndElement(ErrOffset: Integer);
+var
+  NewElem: TDOMElement;
+begin
+  NewElem := FValidator[FNesting].FElement;
   TDOMNode(FCursor) := NewElem.ParentNode;
   TDOMNode(FCursor) := NewElem.ParentNode;
   if FCursor = doc then
   if FCursor = doc then
     FState := rsEpilog;
     FState := rsEpilog;
 
 
   if FValidate and FValidator[FNesting].Incomplete then
   if FValidate and FValidator[FNesting].Incomplete then
-    ValidationError('Element ''%s'' is missing required sub-elements', [ElName^.Key], ErrOffset);
+    ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
 
 
   if FNamespaces then
   if FNamespaces then
     EndNamespaceScope(FBindingStack[FNesting]);
     EndNamespaceScope(FBindingStack[FNesting]);
   PopVC;
   PopVC;
 end;
 end;
 
 
+procedure TXMLReader.ParseEndTag;     // [42]
+var
+  ErrOffset: Integer;
+  ElName: PHashItem;
+begin
+  ElName := FValidator[FNesting].FElement.NSI.QName;
+
+  CheckName;
+  if not BufEquals(FName, ElName^.Key) then
+    FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
+  if FSource.FBuf^ = '>' then    // this handles majority of cases
+  begin
+    ErrOffset := FName.Length+1;
+    FSource.NextChar;
+  end
+  else    // but if closing '>' is preceded by whitespace,
+  begin   // skipping it is likely to lose position info.
+    StoreLocation(FTokenStart);
+    Dec(FTokenStart.LinePos, FName.Length);
+    ErrOffset := -1;
+    SkipS;
+    ExpectChar('>');
+  end;
+  DoEndElement(ErrOffset);
+end;
+
 procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
 procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
 var
 var
   attr: TDOMAttr;
   attr: TDOMAttr;
@@ -3419,11 +3425,12 @@ begin
     ValidationError('Duplicate notation declaration: ''%s''', [aName]);
     ValidationError('Duplicate notation declaration: ''%s''', [aName]);
 end;
 end;
 
 
-procedure TXMLReader.PushVC(aElDef: TDOMElementDef);
+procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
 begin
 begin
   Inc(FNesting);
   Inc(FNesting);
   if FNesting >= Length(FValidator) then
   if FNesting >= Length(FValidator) then
     SetLength(FValidator, FNesting * 2);
     SetLength(FValidator, FNesting * 2);
+  FValidator[FNesting].FElement := aElement;
   FValidator[FNesting].FElementDef := aElDef;
   FValidator[FNesting].FElementDef := aElDef;
   FValidator[FNesting].FCurCP := nil;
   FValidator[FNesting].FCurCP := nil;
   FValidator[FNesting].FFailed := False;
   FValidator[FNesting].FFailed := False;