Ver Fonte

* Reworked SplitTagString as suggested by Mantis #22434: it now accepts html-style unquoted attribute values and is not sensitive to whitespace around '=' name-value separator.

git-svn-id: trunk@33242 -
sergei há 9 anos atrás
pai
commit
e50deef149
1 ficheiros alterados com 59 adições e 60 exclusões
  1. 59 60
      packages/fcl-xml/src/sax_html.pp

+ 59 - 60
packages/fcl-xml/src/sax_html.pp

@@ -119,10 +119,6 @@ procedure ReadHTMLFragment(AParentNode: TDOMNode; f: TStream);
 implementation
 
 
-const
-  WhitespaceChars = [#9, #10, #13, ' '];
-
-
 constructor THTMLReader.Create;
 begin
   inherited Create;
@@ -354,80 +350,83 @@ end;
 
 function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
 var
-  i, j: Integer;
+  i, j, len: Integer;
   AttrName: SAXString;
   ValueDelimiter: WideChar;
-  DoIncJ: Boolean;
+  haseq, hasname: Boolean;
 begin
   Attr := nil;
   i := 0;
+  len := Length(s);
   repeat
     Inc(i)
-  until (i > Length(s)) or IsXMLWhitespace(s[i]);
+  until (i > len) or IsXMLWhitespace(s[i]);
 
-  if i > Length(s) then
-    Result := s
-  else
-  begin
-    Result := Copy(s, 1, i - 1);
-    Attr := TSAXAttributes.Create;
-    Inc(i);
+  Result := Copy(s, 1, i - 1);
+  WStrLower(Result);
+  if i > len then
+    exit;
+  Attr := TSAXAttributes.Create;
+  Inc(i);
 
-    while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
+  repeat
+    while (i <= len) and IsXMLWhitespace(s[i]) do
       Inc(i);
-
-    SetLength(AttrName, 0);
+    if (i > len) then
+      break;
     j := i;
-
-    while j <= Length(s) do
-      if s[j] = '=' then
+    haseq := false;
+    hasname := false;
+    ValueDelimiter := #0;
+    // Attr Name
+    while (j <= len) and not (IsXMLWhitespace(s[j]) or (s[j] = '='))  do
+      Inc(j);
+    // j points on =, whitespace or after s
+    AttrName := Copy(s, i, j - i);
+    WStrLower(AttrName);
+    hasname := IsXMLName(AttrName);
+    // Look for = and following whitespaces (maybe j already points on =)
+    while (j <= len) and (IsXMLWhitespace(s[j]) or (s[j] = '=')) do
+    begin
+      if (s[j] = '=') then
+        haseq := true;
+      Inc(j);
+    end;
+    // Value (j points on first nonblank or after end)
+    if haseq then
+    begin
+      if (j > len) then  { terminal case <tag attr=> }
       begin
-        AttrName := Copy(s, i, j - i);
-        WStrLower(AttrName);
-        Inc(j);
-        if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
+        if hasname then
+          Attr.AddAttribute('', AttrName, '', '', '');
+        break;
+      end
+      else
+      begin
+        if (s[j]='''') or (s[j]='"') then
         begin
           ValueDelimiter := s[j];
           Inc(j);
-        end else
-          ValueDelimiter := #0;
+        end;
         i := j;
-        DoIncJ := False;
-        while j <= Length(s) do
-          if ValueDelimiter = #0 then
-            if IsXMLWhitespace(s[j]) then
-              break
-            else
-              Inc(j)
-          else if s[j] = ValueDelimiter then
-          begin
-            DoIncJ := True;
-            break
-          end else
-            Inc(j);
-
-        if IsXMLName(AttrName) then
-          Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
-
-        if DoIncJ then
+        while (j <= len) do
+        begin
+          if (s[j]=ValueDelimiter) then
+            break;
+          if (ValueDelimiter=#0) and IsXMLWhitespace(s[j]) then
+            break;
           Inc(j);
+        end;
+        if hasname then
+          Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i))
+      end;
+    end
+    else if hasname then   { html boolean-style attribute }
+      Attr.AddAttribute('', AttrName, '', '', AttrName);
 
-        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
-          Inc(j);
-        i := j;
-      end
-      else if IsXMLWhitespace(s[j]) then
-      begin
-        if IsXMLName(@s[i], j-i) then
-          Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
-        Inc(j);
-        while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
-          Inc(j);
-        i := j;
-      end else
-        Inc(j);
-  end;
-  WStrLower(result);
+    { skip closing quote if one is present }
+    i := j + ord(ValueDelimiter<>#0);
+  until false;
 end;
 
 function RightTrimmedLength(const s: SAXString): Integer;