ソースを参照

* Patch from Sergei Gorelkin:
* excludes #$FFFE and #$FFFF from allowed XML 1.1 name chars, so
IsXmlName result is correct when its argument comes not from the
parser.
xmlread.pp:
+ Two new parsing options, Namespaces and ResolveExternals (not
functional yet but needed to proceed).
* Fixed checking of WFC [28a], forces fatal error as soon as possible
and prevents parsing of further (potentially malicious) data.
Hopefully now it is truly compliant to the specs and not just
satisfies the tests.
* In entity value literals, nesting is checked by entity, not by the
input source (consistent to other places).
- Saving FCursor around attribute default value isn't necessary because
FCursor is always nil while parsing the DTD.
* TList's changed to more lightweight TFPList's.
* Changed once more (probably the last time) recognizing the standalone
percent sign in parameter entity declarations. Rationale is that
FCurChar is no more out of sync with FSource.FBuf^, and therefore may
be removed.

tests/xmlts.pp and tests/README:
+ Added support for the latest XML test suite (by skipping tests
targeted for the upcoming fifth edition of XML specs).
+ 'Namespaces' option is passed to the parser.
* README updated with the latest testsuite URL.

git-svn-id: trunk@11303 -

michael 17 年 前
コミット
d812fa0c92

+ 4 - 3
packages/fcl-xml/src/names.inc

@@ -58,7 +58,7 @@ const
 
   ns_3000  = [$41..$94, $A1..$FA] + [$07, $21..$29];
 
-  namingBitmap: array[0..$2F] of TSetOfByte = (
+  namingBitmap: array[0..$30] of TSetOfByte = (
 
   [],                              // 00 - nothing allowed
   [0..255],                        // 01 - all allowed
@@ -165,10 +165,11 @@ const
   [$70..$7D, $7F..$FF],            // 2C $0300 - NameStart
   [1..$FF],                        // 2D $3000 - NameStart
   [0..$7D, $7F..$FF],              // 2E $0300 - Names
-  [$0C..$0D, $3F..$40, $70..$FF]   // 2F $2000 - Names
+  [$0C..$0D, $3F..$40, $70..$FF],  // 2F $2000 - Names
+  [$00..$FD]                       // 30 $FF00 - both Name and NameStart
 );
 
-  Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FF];
+  Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FE];
 
   NamePages: array[0..511] of Byte = (
 $02, $03, $04, $05, $06, $07, $08, $00,

+ 66 - 57
packages/fcl-xml/src/xmlread.pp

@@ -64,12 +64,16 @@ type
     FExpandEntities: Boolean;
     FIgnoreComments: Boolean;
     FCDSectionsAsText: Boolean;
+    FResolveExternals: Boolean;
+    FNamespaces: Boolean;
   public
     property Validate: Boolean read FValidate write FValidate;
     property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
     property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
     property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
     property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
+    property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
+    property Namespaces: Boolean read FNamespaces write FNamespaces;
   end;
 
   // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
@@ -148,6 +152,7 @@ type
     FExternallyDeclared: Boolean;
     FResolved: Boolean;
     FOnStack: Boolean;
+    FBetweenDecls: Boolean;
     FReplacementText: DOMString;
     FStartLocation: TLocation;
   end;
@@ -162,6 +167,7 @@ type
     FCursor: TObject;   // weak reference
     FLocation: TLocation;
     LFPos: PWideChar;
+    FXML11Rules: Boolean;
     FSystemID: WideString;
     FPublicID: WideString;
     FReloadHook: procedure of object;
@@ -188,7 +194,6 @@ type
     FBufStart: PWideChar;
     FDecoder: TDecoder;
     FSeenCR: Boolean;
-    FXML11Rules: Boolean;
     FFixedUCS2: string;
     FBufSize: Integer;
     FSurrogate: WideChar;
@@ -243,7 +248,7 @@ type
   TContentParticle = class(TObject)
   private
     FParent: TContentParticle;
-    FChildren: TList;
+    FChildren: TFPList;
     FIndex: Integer;
     function GetChildCount: Integer;
     function GetChild(Index: Integer): TContentParticle;
@@ -294,6 +299,7 @@ type
     FState: TXMLReadState;
     FRecognizePE: Boolean;
     FHavePERefs: Boolean;
+    FInsideDecl: Boolean;
     FDocNotValid: Boolean;
     FValue: TWideCharBuf;
     FName: TWideCharBuf;
@@ -302,8 +308,8 @@ type
     FNamePages: PByteArray;
     FDocType: TDOMDocumentTypeEx;  // a shortcut
     FPEMap: TDOMNamedNodeMap;
-    FIDRefs: TList;
-    FNotationRefs: TList;
+    FIDRefs: TFPList;
+    FNotationRefs: TFPList;
     FCurrContentType: TElementContentType;
     FSaViolation: Boolean;
     FDTDStartPos: PWideChar;
@@ -314,6 +320,8 @@ type
     FExpandEntities: Boolean;
     FIgnoreComments: Boolean;
     FCDSectionsAsText: Boolean;
+    FResolveExternals: Boolean;
+    FNamespaces: Boolean;
 
     procedure RaiseExpectedQmark;
     procedure GetChar;
@@ -328,8 +336,8 @@ type
     procedure ParseQuantity(CP: TContentParticle);
     procedure StoreLocation(out Loc: TLocation);
     function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
-    procedure AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
-    procedure ClearRefs(aList: TList);
+    procedure AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
+    procedure ClearRefs(aList: TFPList);
     procedure ValidateIdRefs;
     procedure StandaloneError(LineOffs: Integer = 0);
     procedure CallErrorHandler(E: EXMLReadError);
@@ -345,7 +353,7 @@ type
     procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
     procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
     procedure FatalError(Expected: WideChar); overload;
-    function  SkipWhitespace: Boolean;
+    function  SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
     function  SkipWhitespaceRaw: Boolean;
     procedure ExpectWhitespace;
     procedure ExpectString(const s: String);
@@ -1111,7 +1119,7 @@ begin
   E.Free;
 end;
 
-function TXMLReader.SkipWhitespace: Boolean;
+function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
 begin
   Result := False;
   repeat
@@ -1129,19 +1137,24 @@ begin
       '%': begin
         if not FRecognizePE then
           Exit;
-        GetChar;
-        if not CheckName then
+// This is the only case where look-ahead is needed
+        if FSource.FBuf > FSource.FBufEnd-2 then
+          FSource.Reload;
+        if (not PercentAloneIsOk) or
+          (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[hi(Word(FSource.FBuf[1]))]]) or
+          (FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
         begin
-          if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
-            FatalError('Expected whitespace');
-          FCurChar := '%';
-          Exit;
-        end;
-        ExpectChar(';');
-        StartPE;
-        Result := True;        // report whitespace on both ends of PE
-        Continue;
-      end;
+          Inc(FSource.FBuf);    // skip '%'
+          FCurChar := FSource.FBuf^;
+          if not CheckName then
+            RaiseNameNotFound;
+          ExpectChar(';');
+          StartPE;
+          Result := True;        // report whitespace upon entering the PE
+          Continue;
+        end
+        else Break;
+      end
     else
       Exit;
     end;  
@@ -1219,8 +1232,8 @@ begin
   inherited Create;
   BufAllocate(FName, 128);
   BufAllocate(FValue, 512);
-  FIDRefs := TList.Create;
-  FNotationRefs := TList.Create;
+  FIDRefs := TFPList.Create;
+  FNotationRefs := TFPList.Create;
 
   // Set char rules to XML 1.0
   FNamePages := @NamePages;
@@ -1236,6 +1249,8 @@ begin
   FExpandEntities := FCtrl.Options.ExpandEntities;
   FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
   FIgnoreComments := FCtrl.Options.IgnoreComments;
+  FResolveExternals := FCtrl.Options.ResolveExternals;
+  FNamespaces := FCtrl.Options.Namespaces;
 end;
 
 destructor TXMLReader.Destroy;
@@ -1257,8 +1272,7 @@ procedure TXMLReader.XML11_BuildTables;
 begin
   FNamePages := Xml11NamePages;
   FXML11 := True;
-  { switching to xml11 may occur only with DecodingSource }
-  TXMLDecodingSource(FSource).FXml11Rules := True;
+  FSource.FXml11Rules := True;
 end;
 
 procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
@@ -1504,17 +1518,26 @@ end;
 function TXMLReader.ContextPop: Boolean;
 var
   Src: TXMLCharSource;
+  Error: Boolean;
 begin
   Result := Assigned(FSource.FParent) and (FSource.DTDSubsetType = dsNone);
   if Result then
   begin
     Src := FSource.FParent;
+    Error := False;
     if Assigned(FSource.FEntity) then
+    begin
       TDOMEntityEx(FSource.FEntity).FOnStack := False;
+// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
+      Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
+    end;
     FCursor := TDOMNode(FSource.FCursor);
     FSource.Free;
     FSource := Src;
     FCurChar := FSource.FBuf^;
+// correct position of this error is after PE reference      
+    if Error then
+      BadPENesting(esFatal);
   end;
 end;
 
@@ -1600,6 +1623,7 @@ begin
   if PEnt.FOnStack then
     FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
 
+  PEnt.FBetweenDecls := not FInsideDecl;
   ContextPush(PEnt);
   FHavePERefs := True;
 end;
@@ -2124,7 +2148,6 @@ end;
 
 procedure TXMLReader.ParseAttlistDecl;         // [52]
 var
-  SaveCurNode: TDOMNode;
   ValueRequired: Boolean;
   Token: WideString;
   ElDef: TDOMElementDef;
@@ -2233,18 +2256,15 @@ begin
         if AttDef.FDataType = dtId then
           ValidationError('An attribute of type ID cannot have a default value',[]);
 
-        SaveCurNode := FCursor;
         FCursor := AttDef;
         // TODO: move this to ExpectAttValue?
         StoreLocation(FTokenStart);
         Inc(FTokenStart.LinePos);
 // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
 // ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
-        try
-          ExpectAttValue;
-        finally
-          FCursor := SaveCurNode;
-        end;
+// Saving/restoring FCursor is also redundant because it is always nil here.
+        ExpectAttValue;
+        FCursor := nil;
         if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
           ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
       end;
@@ -2265,11 +2285,11 @@ end;
 
 function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean;   // [9]
 var
-  Src: TXMLCharSource;
+  CurrentEntity: TObject;
 begin
-  Src := FSource;
+  CurrentEntity := FSource.FEntity;
   // "Included in literal": process until delimiter hit IN SAME context
-  while not ((FSource = Src) and CheckForChar(Delim)) do
+  while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do
   if CheckForChar('%') then
   begin
     if not CheckName then
@@ -2308,16 +2328,13 @@ var
   Entity: TDOMEntityEx;
   Map: TDOMNamedNodeMap;
 begin
-  ExpectWhitespace;
+  if not SkipWhitespace(True) then
+    FatalError('Expected whitespace');
   NDataAllowed := True;
   Map := FDocType.Entities;
   if CheckForChar('%') then                  // [72]
   begin
-    if FRecognizePE then
-      SkipWhitespace   // we know that there IS whitespace due to the check in
-                       // previous call to SkipWhitespace
-    else
-      ExpectWhitespace;
+    ExpectWhitespace;
     NDataAllowed := False;
     if FPEMap = nil then
       FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
@@ -2338,10 +2355,7 @@ begin
       StoreLocation(Entity.FStartLocation);
       FValue.Length := 0;
       if not ParseEntityDeclValue(Delim) then
-      begin
-        FTokenStart := Entity.FStartLocation;
-        FatalError('Literal has no closing quote', -1);
-      end;
+        DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
       SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
     end
     else
@@ -2453,6 +2467,7 @@ begin
       else
       begin
         FRecognizePE := FSource.DTDSubsetType <> dsInternal;
+        FInsideDecl := True;
         Token := GetString(['A'..'Z']);
         if Token = 'ELEMENT' then
           ParseElementDecl
@@ -2467,17 +2482,11 @@ begin
 
         SkipWhitespace;
         FRecognizePE := False;
-{
-  MarkupDecl starting in PE and ending in root is a WFC [28a]
-  MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
-}
-      // TODO: what if statrs in PE1 and ends in PE2, and other cases? 
-      if CurrentEntity <> FSource.FEntity then
-        if Assigned(FSource.FEntity) then { ends in PE }
-          BadPENesting(esError)
-        else
-          BadPENesting(esFatal);
+
+        if CurrentEntity <> FSource.FEntity then
+          BadPENesting;
         ExpectChar('>');
+        FInsideDecl := False;
       end;
     end;
   until False;
@@ -2659,7 +2668,7 @@ begin
   PopVC;
 end;
 
-procedure TXMLReader.AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
+procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
 var
   w: PForwardRef;
 begin
@@ -2671,7 +2680,7 @@ begin
   aList.Add(w);
 end;
 
-procedure TXMLReader.ClearRefs(aList: TList);
+procedure TXMLReader.ClearRefs(aList: TFPList);
 var
   I: Integer;
 begin
@@ -3088,7 +3097,7 @@ end;
 function TContentParticle.Add: TContentParticle;
 begin
   if FChildren = nil then
-    FChildren := TList.Create;
+    FChildren := TFPList.Create;
   Result := TContentParticle.Create;
   Result.FParent := Self;
   Result.FIndex := FChildren.Add(Result);

+ 1 - 0
packages/fcl-xml/src/xmlutils.pp

@@ -55,6 +55,7 @@ begin
     p^[$2f] := $29;
     p^[$30] := $2d;
     p^[$fd] := $28;
+    p^[$ff] := $30;
 
     Move(p^, p^[256], 256);
     p^[$100] := $19;

+ 2 - 11
packages/fcl-xml/tests/README

@@ -3,7 +3,7 @@ Test runner for w3.org XML compliance suite
 
 The xmlts is intended to run the XML compliance suite from W3.org.
 The suite includes 2500+ tests. It may be downloaded from
-http://www.w3.org/XML/Test/xmlts20031210.zip  (approx. 1.7 mBytes)
+http://www.w3.org/XML/Test/xmlts20080205.zip  (approx. 1.7 mBytes)
 After compiling xmlts.pp, run it with the following command line:
 
 xmlts <path-to-xmlconf.xml> <report-filename> [-t template.xml] [-v]
@@ -23,16 +23,7 @@ Report is produced in xhtml format, use your favourite browser to view it.
 
 As of 10.03.2007, the xml package does not support namespaces yet, so you might wish
 to exclude namespace tests. To do this, edit xmlconf/xmlconf.xml file and comment out
-two lines at the bottom which reference 'eduni-ns10' and 'eduni-ns11' testsuites.
-
-(The last lines should look like:
-
-    &eduni-xml11;
-<!--    &eduni-ns10; -->
-<!--    &eduni-ns11; -->
-
-</TESTSUITE>
-)
+the lines that contain references &eduni-ns10; &eduni-ns11; and &eduni-nse;
 
 
 Testsuite errata

+ 8 - 1
packages/fcl-xml/tests/xmlts.pp

@@ -232,6 +232,7 @@ begin
 
     if Child.NodeName = 'run-id' then
     begin
+      newChild := nil;
       if Data = 'name' then
         newChild := FTemplate.createTextNode(parser)
       else if Data = 'description' then
@@ -358,6 +359,12 @@ begin
   FErrCol := -1;
   FTestID := Element['ID'];
   TestType := Element['TYPE'];
+  if Pos(WideChar('5'), Element['EDITION']) > 0 then
+  begin
+    Inc(FSkipped);
+    Exit;
+  end;
+
   root := GetBaseURI(Element, FRootUri);
   ResolveRelativeURI(root, UTF8Encode(Element['URI']), s);
 
@@ -393,7 +400,7 @@ begin
   try
     try
       FParser.Options.Validate := FValidating;
-//      FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
+      FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
       FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler;
       FParser.ParseUri(s, TempDoc);
     except