Просмотр исходного кода

* The second part of changes suggested by Maris Janis Vasilevskis (Mantis #22434):
* Parse comments and script/style elements using dedicated scanner states
* Ignore markup in comments
* Ignore unescaped '&' in script/style elements
* Additionally, report comments via OnComment event
* Reverted r17003 since it is entirely replaced by above changes.

git-svn-id: trunk@33250 -

sergei 9 лет назад
Родитель
Сommit
5cc8ddfbf1
1 измененных файлов с 111 добавлено и 60 удалено
  1. 111 60
      packages/fcl-xml/src/sax_html.pp

+ 111 - 60
packages/fcl-xml/src/sax_html.pp

@@ -42,7 +42,10 @@ type
     scWhitespace,       // within whitespace
     scText,             // within text
     scEntityReference,  // within entity reference ("&...;")
-    scTag);             // within a start tag or end tag
+    scTag,              // within a start tag or end tag
+    scComment,
+    scScript
+  );
 
   THTMLReader = class(TSAXReader)
   private
@@ -51,6 +54,8 @@ type
     FScannerContext: THTMLScannerContext;
     FTokenText: SAXString;
     FRawTokenText: string;
+    FScriptEndTag: string;
+    FScriptEndMatchPos: Integer;
     FCurStringValueDelimiter: Char;
     FAttrNameRead: Boolean;
     FStack: array of THTMLElementTag;
@@ -155,6 +160,8 @@ const
 var
   Buffer: array[0..MaxBufferSize - 1] of Char;
   BufferSize, BufferPos: Integer;
+  len: Integer;
+  ch: Char;
 begin
   if not FStarted then
   begin
@@ -294,8 +301,62 @@ begin
                 end;
               end;
           else
+            FRawTokenText := FRawTokenText + Buffer[BufferPos];
+            if FRawTokenText='!--' then
+            begin
+              FScannerContext := scComment;
+              FRawTokenText := '';
+            end;
+            Inc(BufferPos);
+          end;
+        scComment:
+          begin
             FRawTokenText := FRawTokenText + Buffer[BufferPos];
             Inc(BufferPos);
+
+            if (Buffer[BufferPos-1]='>') then
+            begin
+              len:=length(FRawTokenText);
+              if (len>2) and (FRawTokenText[len-1]='-') and (FRawTokenText[len-2]='-') then
+              begin
+                Delete(FRawTokenText, Length(FRawTokenText)-2, MaxInt);
+                EnterNewScannerContext(scUnknown);
+              end;
+            end;
+          end;
+        scScript:
+          begin
+            ch := Buffer[BufferPos];
+            if FScriptEndMatchPos <= Length(FScriptEndTag) then
+            begin
+              if lowercase(ch) = FScriptEndTag[FScriptEndMatchPos] then
+                Inc(FScriptEndMatchPos)
+              else
+                FScriptEndMatchPos := 1;
+              FRawTokenText := FRawTokenText + ch;
+              Inc(BufferPos);
+            end
+            else
+            begin
+              case ch of
+                #9,#10,#13,' ':
+                  begin
+                    FRawTokenText := FRawTokenText + ch;
+                    Inc(BufferPos);
+                    Inc(FScriptEndMatchPos);
+                  end;
+                '>':
+                  begin
+                    Inc(BufferPos);
+                    Delete(FRawTokenText, Length(FRawTokenText)-FScriptEndMatchPos+2, MaxInt);
+                    EnterNewScannerContext(scUnknown);
+                  end;
+              else
+                FRawTokenText := FRawTokenText + ch;
+                Inc(BufferPos);
+                FScriptEndMatchPos := 1;
+              end;
+            end;
           end;
         end;    // case ScannerContext of
     end;        // while not endOfBuffer
@@ -429,18 +490,6 @@ begin
   until false;
 end;
 
-function RightTrimmedLength(const s: SAXString): Integer;
-begin
-  result := Length(s);
-  while IsXmlWhitespace(s[result]) do Dec(result);
-end;
-
-function TagPos(elTag: THTMLElementTag; s: SAXString): Integer;
-begin
-  WStrLower(s);
-  Result := Pos(HTMLElementProps[elTag].Name, s);
-end;
-
 procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
 var
   Attr: TSAXAttributes;
@@ -468,60 +517,62 @@ begin
     scTag:
       if Length(TokenText) > 0 then
       begin
-        { ignore possibly unescaped markup in SCRIPT and STYLE }
-        if (FNesting > 0) and (FStack[FNesting-1] in [etScript,etStyle]) and
-          not (
-           (TokenText[1] = '/') and
-           (RightTrimmedLength(TokenText)=Length(HTMLElementProps[FStack[FNesting-1]].Name)+1) and
-           (TagPos(FStack[FNesting-1], TokenText) = 2)
-          )
-          and (TokenText[1] <> '!') then
+        Attr := nil;
+        if TokenText[Length(fTokenText)]='/' then  // handle xml/xhtml style empty tag
         begin
-          FTokenText := '<'+FTokenText+'>';
-          DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
+          setlength(fTokenText,length(fTokenText)-1);
+          // Do NOT combine to a single line, as Attr is an output value!
+          TagName := SplitTagString(TokenText, Attr);
+          AutoClose(TagName);
+          DoStartElement('', TagName, '', Attr);
+          DoEndElement('', TagName, '');
         end
-        else
+        else if TokenText[1] = '/' then
         begin
-          Attr := nil;
-          if TokenText[Length(fTokenText)]='/' then  // handle xml/xhtml style empty tag
-          begin
-            setlength(fTokenText,length(fTokenText)-1);
-            // Do NOT combine to a single line, as Attr is an output value!
-            TagName := SplitTagString(TokenText, Attr);
-            AutoClose(TagName);
-            DoStartElement('', TagName, '', Attr);
-            DoEndElement('', TagName, '');
-          end
-          else if TokenText[1] = '/' then
-          begin
-            Delete(FTokenText, 1, 1);
-            TagName := SplitTagString(TokenText, Attr);
-            elTag := LookupTag(TagName);
-            i := FNesting-1;
-            while (i >= 0) and (FStack[i] <> elTag) and
-              (efEndTagOptional in HTMLElementProps[FStack[i]].Flags) do
-              Dec(i);
-            if (i>=0) and (FStack[i] = elTag) then
-              while FStack[FNesting-1] <> elTag do
-              begin
-                DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
-                namePop;
-              end;
+          Delete(FTokenText, 1, 1);
+          TagName := SplitTagString(TokenText, Attr);
+          elTag := LookupTag(TagName);
+          i := FNesting-1;
+          while (i >= 0) and (FStack[i] <> elTag) and
+            (efEndTagOptional in HTMLElementProps[FStack[i]].Flags) do
+            Dec(i);
+          if (i>=0) and (FStack[i] = elTag) then
+            while FStack[FNesting-1] <> elTag do
+            begin
+              DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
+              namePop;
+            end;
 
-            DoEndElement('', TagName, '');
-            namePop;
-          end
-          else if TokenText[1] <> '!' then
+          DoEndElement('', TagName, '');
+          namePop;
+        end
+        else if TokenText[1] <> '!' then
+        begin
+          // Do NOT combine to a single line, as Attr is an output value!
+          TagName := SplitTagString(TokenText, Attr);
+          AutoClose(TagName);
+          namePush(TagName);
+          DoStartElement('', TagName, '', Attr);
+          if FStack[FNesting-1] in [etScript,etStyle] then
           begin
-            // Do NOT combine to a single line, as Attr is an output value!
-            TagName := SplitTagString(TokenText, Attr);
-            AutoClose(TagName);
-            namePush(TagName);
-            DoStartElement('', TagName, '', Attr);
+            NewContext := scScript;
+            FScriptEndTag := '</' + HTMLElementProps[FStack[FNesting-1]].Name;
+            FScriptEndMatchPos := 1;
           end;
-          if Assigned(Attr) then
-            Attr.Free;
         end;
+        if Assigned(Attr) then
+          Attr.Free;
+      end;
+    scComment:
+      begin
+        DoComment(PSAXChar(TokenText), 0, Length(TokenText));
+      end;
+    scScript:
+      begin
+        DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
+        DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
+        namePop;
+        FScriptEndTag := '';
       end;
   end;
   FScannerContext := NewContext;