소스 검색

* Replaced all literal parsing routines with a single ParseLiteral(). Due to entity handling issues, this isn't yet enabled for attributes, therefore the current code contains some amount of redundancy.
* Started refactoring of the entity processing.

git-svn-id: trunk@14202 -

sergei 15 년 전
부모
커밋
73b7248597
1개의 변경된 파일168개의 추가작업 그리고 99개의 파일을 삭제
  1. 168 99
      packages/fcl-xml/src/xmlread.pp

+ 168 - 99
packages/fcl-xml/src/xmlread.pp

@@ -313,6 +313,8 @@ type
     PrefixLen: Integer;  // to avoid recalculation
   end;
 
+  TLiteralType = (ltPlain, ltAttr, ltTokAttr, ltPubid, ltEntity);
+
   TXMLReader = class
   private
     FSource: TXMLCharSource;
@@ -398,7 +400,8 @@ type
     function  CheckName(aFlags: TCheckNameFlags = []): Boolean;
     procedure CheckNCName;
     function  ExpectName: WideString;                                   // [5]
-    procedure SkipQuotedLiteral(out Literal: WideString; required: Boolean = True);
+    function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
+      Required: Boolean; Normalized: PBoolean = nil): Boolean;
     procedure ExpectAttValue;                                           // [10]
     procedure ParseComment;                                             // [15]
     procedure ParsePI;                                                  // [16]
@@ -413,6 +416,8 @@ type
     procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
     procedure ParseContent;                                             // [43]
     function  ResolvePredefined: Boolean;
+    function  EntityCheck: TDOMEntityEx;
+    procedure StartGE(AEntity: TDOMEntityEx);
     procedure IncludeEntity(InAttr: Boolean);
     procedure StartPE;
     function  ParseRef(var ToFill: TWideCharBuf): Boolean;              // [67]
@@ -421,7 +426,6 @@ type
 
     procedure BadPENesting(S: TErrorSeverity = esError);
     procedure ParseEntityDecl;
-    function  ParseEntityDeclValue(Delim: WideChar): Boolean;
     procedure ParseAttlistDecl;
     procedure ExpectChoiceOrSeq(CP: TContentParticle);
     procedure ParseElementDecl;
@@ -1695,9 +1699,6 @@ end;
 
 const
   AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
-  EntityValueDelims: TSetOfChar = [#0, '%', '&', '''', '"'];
-  SQ_Delim: TSetOfChar = [#0, ''''];
-  DQ_Delim: TSetOfChar = [#0, '"'];
   GT_Delim: TSetOfChar = [#0, '>'];
 
 function TXMLReader.DoParseAttValue(Delim: WideChar): Boolean;
@@ -1794,37 +1795,60 @@ begin
   end;
 end;
 
-procedure TXMLReader.IncludeEntity(InAttr: Boolean);
+function TXMLReader.EntityCheck: TDOMEntityEx;
 var
-  AEntity: TDOMEntityEx;
   RefName: WideString;
-  Child: TDOMNode;
-  SaveCursor: TDOMNode_WithChildren;
-  cnt: Cardinal;
+  cnt: Integer;
 begin
-  AEntity := nil;
+  Result := nil;
   SetString(RefName, FName.Buffer, FName.Length);
   cnt := FName.Length+2;
 
   if Assigned(FDocType) then
-    AEntity := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
+    Result := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
 
-  if AEntity = nil then
+  if Result = nil then
   begin
     if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then
       FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
     else
       ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
+    Exit;
+  end;
+
+  if FStandalone and Result.FExternallyDeclared then
+    FatalError('Standalone constraint violation', cnt);
+  if Result.NotationName <> '' then
+    FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
+end;
+
+procedure TXMLReader.StartGE(AEntity: TDOMEntityEx);
+begin
+  if AEntity.FOnStack then
+    FatalError('Entity ''%s'' recursively references itself', [AEntity.FName]);
+  ContextPush(AEntity);
+end;
+
+procedure TXMLReader.IncludeEntity(InAttr: Boolean);
+var
+  AEntity: TDOMEntityEx;
+  RefName: WideString;
+  Child: TDOMNode;
+  SaveCursor: TDOMNode_WithChildren;
+  cnt: Cardinal;
+begin
+  SetString(RefName, FName.Buffer, FName.Length);
+  cnt := FName.Length+2;
+  AEntity := EntityCheck;
+
+  if AEntity = nil then
+  begin
     FCursor.AppendChild(doc.CreateEntityReference(RefName));
     Exit;
   end;
 
   if InAttr and (AEntity.SystemID <> '') then
     FatalError('External entity reference is not allowed in attribute value', cnt);
-  if FStandalone and AEntity.FExternallyDeclared then
-    FatalError('Standalone constraint violation', cnt);
-  if AEntity.NotationName <> '' then
-    FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
 
   if not AEntity.FResolved then
   begin
@@ -1923,24 +1947,116 @@ begin
     FatalError('Literal has no closing quote',-1);
 end;
 
-procedure TXMLReader.SkipQuotedLiteral(out Literal: WideString; required: Boolean);
+procedure Normalize(var Buf: TWideCharBuf; Modified: PBoolean);
 var
-  Delim: WideChar;
+  Dst, Src: Integer;
 begin
-  SkipQuote(Delim, required);
-  if Delim <> #0 then
+  Dst := 0;
+  Src := 0;
+  // skip leading space if any
+  while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
+    Inc(Src);
+
+  while Src < Buf.Length do
   begin
-    StoreLocation(FTokenStart);
-    FValue.Length := 0;
-    if Delim = '''' then
-      Delim := FSource.SkipUntil(FValue, SQ_Delim)
+    if Buf.Buffer[Src] = ' ' then
+    begin
+      // Dst cannot be 0 here, because leading space is already skipped
+      if Buf.Buffer[Dst-1] <> ' ' then
+      begin
+        Buf.Buffer[Dst] := ' ';
+        Inc(Dst);
+      end;
+    end
     else
-      Delim := FSource.SkipUntil(FValue, DQ_Delim);
-    if Delim = #0 then
-      FatalError('Literal has no closing quote', -1);
-    FSource.NextChar;
-    SetString(Literal, FValue.Buffer, FValue.Length);
+    begin
+      Buf.Buffer[Dst] := Buf.Buffer[Src];
+      Inc(Dst);
+    end;
+    Inc(Src);
   end;
+  // trailing space (only one possible due to compression)
+  if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
+    Dec(Dst);
+
+  if Assigned(Modified) then
+    Modified^ := Dst <> Buf.Length;
+  Buf.Length := Dst;
+end;
+
+const
+  LiteralDelims: array[TLiteralType] of TSetOfChar = (
+    [#0, '''', '"'],                          // ltPlain
+    [#0, '<', '&', '''', '"', #9, #10, #13],  // ltAttr
+    [#0, '<', '&', '''', '"', #9, #10, #13],  // ltTokAttr
+    [#0, '''', '"', #13, #10],                // ltPubid
+    [#0, '%', '&', '''', '"']                 // ltEntity
+  );
+
+function TXMLReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
+  Required: Boolean; Normalized: PBoolean): Boolean;
+var
+  start: TObject;
+  wc, Delim: WideChar;
+  ent: TDOMEntityEx;
+begin
+  SkipQuote(Delim, Required);
+  Result := (Delim <> #0);
+  if not Result then
+    Exit;
+  StoreLocation(FTokenStart);
+  ToFill.Length := 0;
+  start := FSource.FEntity;
+  repeat
+    wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
+    if wc = '%' then       { ltEntity only }
+    begin
+      FSource.NextChar;
+      CheckName;
+      ExpectChar(';');
+      if FSource.DTDSubsetType = dsInternal then
+        FatalError('PE reference not allowed here in internal subset', FName.Length+2);
+      StartPE;
+    end
+    else if wc = '&' then  { ltAttr, ltTokAttr, ltEntity }
+    begin
+      if ParseRef(ToFill) then   // charRefs always expanded
+        Continue;
+      if aType = ltEntity then   // bypass
+      begin
+        BufAppend(ToFill, '&');
+        BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
+        BufAppend(ToFill, ';');
+      end
+      else                       // include
+      begin
+        if ResolvePredefined then
+          Continue;
+        ent := EntityCheck;
+        if ent = nil then
+          Continue;
+        if ent.SystemID <> '' then
+          FatalError('External entity reference is not allowed in attribute value', FName.Length+2);
+        StartGE(ent);
+      end;
+    end
+    else if wc = '<' then
+      FatalError('Character ''<'' is not allowed in attribute value')
+    else if wc <> #0 then
+    begin
+      FSource.NextChar;
+      if (wc = #10) or (wc = #13) or (wc = #9) then
+        wc := #32
+      // terminating delimiter must be in the same context as the starting one
+      else if (wc = Delim) and (start = FSource.FEntity) then
+        Break;
+      BufAppend(ToFill, wc);
+    end
+    else if (FSource.FEntity = start) or not ContextPop then    // #0
+      FatalError('Literal has no closing quote', -1);
+  until False;
+  if aType in [ltTokAttr, ltPubid] then
+    Normalize(ToFill, Normalized);
 end;
 
 function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
@@ -2541,54 +2657,9 @@ begin
   end;
 end;
 
-function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean;   // [9]
-var
-  CurrentEntity: TObject;
-  wc: WideChar;
-begin
-  CurrentEntity := FSource.FEntity;
-  if FEntityValue.Buffer = nil then
-    BufAllocate(FEntityValue, 256);
-  FEntityValue.Length := 0;
-  repeat
-    wc := FSource.SkipUntil(FEntityValue, EntityValueDelims);
-    if wc = '%' then
-    begin
-      FSource.NextChar;
-      CheckName;
-      ExpectChar(';');
-      if FSource.DTDSubsetType = dsInternal then
-        FatalError('PE reference not allowed here in internal subset', FName.Length+2);
-      StartPE;
-    end
-    else if wc = '&' then
-    begin
-// expand CharRefs, bypass (but check for well-formedness) EntityRefs
-      if not ParseRef(FEntityValue) then
-      begin
-        BufAppend(FEntityValue, '&');
-        BufAppendChunk(FEntityValue, FName.Buffer, FName.Buffer + FName.Length);
-        BufAppend(FEntityValue, ';');
-      end;
-    end
-    else if wc <> #0 then
-    begin
-      FSource.NextChar;
-      // terminating delimiter must be in the same context as the starting one
-      if (wc = Delim) and (CurrentEntity = FSource.FEntity) then
-        Break;
-      BufAppend(FEntityValue, wc);
-    end
-    else if (FSource.FEntity = CurrentEntity) or not ContextPop then    // #0
-      Break;
-  until False;
-  Result := (wc <> #0);
-end;
-
 procedure TXMLReader.ParseEntityDecl;        // [70]
 var
   NDataAllowed: Boolean;
-  Delim: WideChar;
   Entity: TDOMEntityEx;
   Map: TDOMNamedNodeMap;
 begin
@@ -2615,16 +2686,16 @@ begin
 
     // remember where the entity is declared
     Entity.FURI := FSource.SystemID;
-    if (FSource.FBuf^ = '"') or (FSource.FBuf^ = '''') then
+
+    if FEntityValue.Buffer = nil then
+      BufAllocate(FEntityValue, 256);
+
+    if ParseLiteral(FEntityValue, ltEntity, False) then
     begin
-      NDataAllowed := False;
-      Delim := FSource.FBuf^;
-      FSource.NextChar;
-      StoreLocation(Entity.FStartLocation);
-      if not ParseEntityDeclValue(Delim) then
-        DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
       SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
       Entity.FCharCount := FEntityValue.Length;
+      Entity.FStartLocation := FTokenStart;
+      NDataAllowed := False;
     end
     else if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
       FatalError('Expected entity value or external ID');
@@ -3286,33 +3357,31 @@ var
   I: Integer;
   wc: WideChar;
 begin
+  Result := False;
   if FSource.Matches('SYSTEM') then
-  begin
-    ExpectWhitespace;
-    SkipQuotedLiteral(SysID);
-    Result := True;
-  end
+    SysIdOptional := False
   else if FSource.Matches('PUBLIC') then
   begin
     ExpectWhitespace;
-    SkipQuotedLiteral(PubID);
+    ParseLiteral(FValue, ltPubid, True);
+    SetString(PubID, FValue.Buffer, FValue.Length);
     for I := 1 to Length(PubID) do
     begin
       wc := PubID[I];
       if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
         FatalError('Illegal Public ID literal', -1);
-      if (wc = #10) or (wc = #13) then
-        PubID[I] := #32;
     end;
-    NormalizeSpaces(PubID);
-    if SysIdOptional then
-      SkipWhitespace
-    else
-      ExpectWhitespace;
-    SkipQuotedLiteral(SysID, not SysIdOptional);
-    Result := True;
-  end else
-    Result := False;
+  end
+  else
+    Exit;
+
+  if SysIdOptional then
+    SkipWhitespace
+  else
+    ExpectWhitespace;
+  if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
+    SetString(SysID, FValue.Buffer, FValue.Length);
+  Result := True;
 end;
 
 function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;