Преглед изворни кода

+ Patch from Sergey Gorelkin to implement CR/LF folding and fix memory leaks

git-svn-id: trunk@3811 -
michael пре 19 година
родитељ
комит
cf54b7f748
1 измењених фајлова са 55 додато и 46 уклоњено
  1. 55 46
      fcl/xml/xmlread.pp

+ 55 - 46
fcl/xml/xmlread.pp

@@ -86,11 +86,13 @@ type
     FCurChar: WideChar;
     FLine: Integer;
     FColumn: Integer;
+    FSeenCR: Boolean;
     FEncoding: TEncoding;
     FValue: array of WideChar;
     FValueLength: Integer;
     FPrologParsed: Boolean;
     procedure RaiseExpectedQmark;
+    procedure InternalGetChar;
     function GetChar: WideChar;
     procedure AppendValue(wc: WideChar);
     procedure DetectEncoding;
@@ -223,6 +225,7 @@ begin
   begin
     FEncoding := Enc;
     FCurChar := '?';
+    Inc(FColumn);
     Exit;
   end;
 
@@ -236,7 +239,8 @@ begin
   GetChar;
 end;
 
-function TXMLReader.GetChar: WideChar;
+
+procedure TXMLReader.InternalGetChar;
 var
   ch, ch2, ch3: Byte;
 
@@ -291,8 +295,28 @@ begin
       FCurChar :=
         WideChar((Ord(FCurChar) and $FF) shl 8 + (Ord(FCurChar) shr 8));
   end;
+end;
+
+function TXMLReader.GetChar: WideChar;
+begin
+  InternalGetChar;
+  if FSeenCR then
+  begin
+    case FCurChar of
+      #10, #$85: InternalGetChar; // #$85 is xml 1.1 specific
+    end;
+    FSeenCR := False;
+  end;
+
+  if FCurChar = #13 then
+  begin
+    FSeenCR := True;
+    FCurChar := #10;
+  end
+  else // xml 1.1 specific check
+    if (FCurChar = #$85) or (FCurChar = #$2028) then
+      FCurChar := #10;
 
-  // TODO: Linefeed handling according to W3C
   if FCurChar = #10 then
   begin
     Inc(FLine);
@@ -554,15 +578,14 @@ begin
   ExpectString('--');
   FValueLength := 0;
   repeat
-    FValue[FValueLength] := FCurChar;
+    AppendValue(FCurChar);
     GetChar;
-    if (FValueLength >= 2) and (FValue[FValueLength] = '>') and
-      (FValue[FValueLength-1] = '-') and (FValue[FValueLength-2] = '-') then
+    if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
+      (FValue[FValueLength-2] = '-') and (FValue[FValueLength-3] = '-') then
       begin
-        Dec(FValueLength, 2);
+        Dec(FValueLength, 3);
         Break;
       end;
-    Inc(FValueLength);
   until FCurChar = #0;  // should not happen
 
   SetString(comment, PWideChar(@FValue[0]), FValueLength);
@@ -595,15 +618,14 @@ begin
   begin
     FValueLength := 0;
     repeat
-      FValue[FValueLength] := FCurChar;
+      AppendValue(FCurChar);
       GetChar;
-      if (FValueLength >= 1) and (FValue[FValueLength] = '>') and
-        (FValue[FValueLength-1] = '?') then
+      if (FValueLength >= 2) and (FValue[FValueLength-1] = '>') and
+        (FValue[FValueLength-2] = '?') then
         begin
-          Dec(FValueLength);
+          Dec(FValueLength, 2);
           Break;
         end;
-      Inc(FValueLength);
     until FCurChar = #0;  // should not happen
   end;
   
@@ -716,7 +738,7 @@ procedure TXMLReader.ParseMisc(AOwner: TDOMNode);    // [27]
 begin
   repeat
     SkipWhitespace;
-    if (FCurChar = #0) or (FCurChar <> '<') then
+    if FCurChar <> '<' then
       Break;
     GetChar;
     if FCurChar = '!' then
@@ -1017,9 +1039,8 @@ begin
   Result := False;
   repeat
     SkipWhitespace;
-    if (FCurChar = #0) or (FCurChar <> '<') then
+    if FCurChar <> '<' then  // condition is true for #0
       Exit;
-    // '<'
     GetChar;
     if FCurChar = '!' then
     begin
@@ -1094,15 +1115,14 @@ begin
   ExpectString('[CDATA[');
   FValueLength := 0;
   repeat
-    FValue[FValueLength] := FCurChar;
+    AppendValue(FCurChar);
     GetChar;
-    if (FValueLength >= 2) and (FValue[FValueLength] = '>') and
-      (FValue[FValueLength-1] = ']') and (FValue[FValueLength-2] = ']') then
+    if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
+      (FValue[FValueLength-2] = ']') and (FValue[FValueLength-3] = ']') then
     begin
-      Dec(FValueLength, 2);
+      Dec(FValueLength, 3);
       Break;
     end;
-    Inc(FValueLength);
   until FCurChar = #0;
 
   SetString(name, PWideChar(@FValue[0]), FValueLength);
@@ -1112,15 +1132,14 @@ end;
 function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean;    // [39] [40] [44]
 var
   NewElem: TDOMElement;
-
-  procedure CreateNameElement;
-  var
-    IsEmpty: Boolean;
-    attr: TDOMAttr;
-    name: WideString;
-    t: WideChar;
+  IsEmpty: Boolean;
+  attr: TDOMAttr;
+  name: WideString;
+begin                 // starting '<' is already consumed at this point
+  Result := CheckName;
+  if Result then
   begin
-    {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  CreateNameElement A');{$ENDIF}
+    {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  ParseElement A');{$ENDIF}
     SetString(name, PWideChar(@FValue[0]), FValueLength);
 
     NewElem := doc.CreateElement(name);
@@ -1130,7 +1149,7 @@ var
     IsEmpty := False;
     while True do
     begin
-      {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  CreateNameElement E');{$ENDIF}
+      {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  ParseElement E');{$ENDIF}
       if CheckForChar('>') then
         Break
       else if FCurChar = '/' then
@@ -1143,7 +1162,7 @@ var
         end
            // <-- error: '>' required
       end;
-      
+
       // Get Attribute [41]
       attr := doc.CreateAttribute(ExpectName);
       NewElem.Attributes.SetNamedItem(attr);
@@ -1158,8 +1177,8 @@ var
       SkipWhitespace;
       if FCurChar = '<' then
       begin
-        t := GetChar;
-        if t = '!' then
+        GetChar;
+        if FCurChar = '!' then
         begin
           GetChar;
           if FCurChar = '[' then
@@ -1167,9 +1186,9 @@ var
           else if FCurChar = '-' then
             ParseComment(NewElem);
         end
-        else if t = '?' then
+        else if FCurChar = '?' then
           ParsePI
-        else if t = '/' then       // Get ETag [42]
+        else if FCurChar = '/' then       // Get ETag [42]
         begin
           GetChar; // skip '/'
           if ExpectName <> NewElem.NodeName then
@@ -1186,22 +1205,12 @@ var
       else
         ParseCharData(NewElem);
     until False;
-    {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  CreateNameElement END');{$ENDIF}
+    {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('  ParseElement END');{$ENDIF}
     ResolveEntities(NewElem);
   end;
-
-begin                // starting '<' is already consumed at this point
-  {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ParseElement A');{$ENDIF}
-  if CheckName then
-  begin
-    CreateNameElement;
-    Result := True;
-  end
-  else
-    Result := False;
-  {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ParseElement END');{$ENDIF}
 end;
 
+
 procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
 begin
   if not ParseElement(AOwner) then