Browse Source

* SAX parsers: support hexadecimal notation in character references and return result in a SAXChar
instead of Char, provides a partial fix for #14074

git-svn-id: trunk@13351 -

sergei 16 years ago
parent
commit
a8e6f79ef3

+ 36 - 12
packages/fcl-xml/src/htmldefs.pp

@@ -557,7 +557,7 @@ const
 
 
 
 
 function ResolveHTMLEntityReference(const Name: String;
 function ResolveHTMLEntityReference(const Name: String;
-  var Entity: Char): Boolean;
+  var Entity: WideChar): Boolean;
 
 
 
 
 
 
@@ -566,10 +566,11 @@ implementation
 uses SysUtils;
 uses SysUtils;
 
 
 function ResolveHTMLEntityReference(const Name: String;
 function ResolveHTMLEntityReference(const Name: String;
-  var Entity: Char): Boolean;
+  var Entity: WideChar): Boolean;
 var
 var
-  Ent: Char;
+  Ent: WideChar;
   i: Integer;
   i: Integer;
+  value: Integer;
 begin
 begin
   if Name = 'quot' then
   if Name = 'quot' then
   begin
   begin
@@ -591,17 +592,40 @@ begin
   begin
   begin
     Entity := '>';
     Entity := '>';
     Result := True;
     Result := True;
-  end else if (Length(Name) > 0) and (Name[1] = '#') then
+  end else if (Length(Name) > 1) and (Name[1] = '#') then
   begin
   begin
-    for i := 2 to Length(Name) do
-      if (Name[i] < '0') or (Name[i] > '9') then
-        break;
-    if i > 2 then
+    value := 0;
+    if Name[2] in ['x', 'X'] then
     begin
     begin
-      Entity := Chr(StrToInt(Copy(Name, 2, i - 1)));
-      Result := True;
-    end else
-      Result := False;
+      i := 3;
+      while i <= Length(Name) do
+      begin
+        case Name[i] of
+          '0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0');
+          'a'..'f': Value := Value * 16 + Ord(Name[i]) - (Ord('a') - 10);
+          'A'..'F': Value := Value * 16 + Ord(Name[i]) - (Ord('A') - 10);
+        else
+          Break;
+        end;
+        Inc(i);
+      end;
+    end
+    else
+    begin
+      i := 2;
+      while i <= Length(Name) do
+      begin
+        case Name[i] of
+          '0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0');
+        else
+          Break;
+        end;
+        Inc(i);
+      end;
+    end;
+    Result := (i = Length(Name)+1);
+    if Result then
+      Entity := WideChar(Value);
   end else
   end else
   begin
   begin
     for Ent := Low(HTMLEntities) to High(HTMLEntities) do
     for Ent := Low(HTMLEntities) to High(HTMLEntities) do

+ 4 - 5
packages/fcl-xml/src/sax_html.pp

@@ -342,9 +342,9 @@ procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
 
 
 var
 var
   Attr: TSAXAttributes;
   Attr: TSAXAttributes;
-  EntString, TagName: String;
+  TagName: String;
   Found: Boolean;
   Found: Boolean;
-  Ent: Char;
+  Ent: SAXChar;
   i: Integer;
   i: Integer;
 begin
 begin
   case ScannerContext of
   case ScannerContext of
@@ -356,8 +356,7 @@ begin
       begin
       begin
         if ResolveHTMLEntityReference(TokenText, Ent) then
         if ResolveHTMLEntityReference(TokenText, Ent) then
         begin
         begin
-          EntString := Ent;
-          DoCharacters(PSAXChar(EntString), 0, 1);
+          DoCharacters(@Ent, 0, 1);
         end else
         end else
         begin
         begin
           { Is this a predefined Unicode character entity? We must check this,
           { Is this a predefined Unicode character entity? We must check this,
@@ -373,7 +372,7 @@ begin
           if Found then
           if Found then
             DoSkippedEntity(TokenText)
             DoSkippedEntity(TokenText)
           else
           else
-            DoCharacters(PSAXChar('&' + TokenText), 0, Length(TokenText) + 1);
+            DoCharacters(PSAXChar('&' + TokenText), 0, Length(TokenText) + 2);
         end;
         end;
       end;
       end;
     scTag:
     scTag:

+ 4 - 5
packages/fcl-xml/src/sax_xml.pp

@@ -333,9 +333,9 @@ procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
 
 
 var
 var
   Attr: TSAXAttributes;
   Attr: TSAXAttributes;
-  EntString, TagName: String;
+  TagName: String;
   Found: Boolean;
   Found: Boolean;
-  Ent: Char;
+  Ent: SAXChar;
   i: Integer;
   i: Integer;
 begin
 begin
   case ScannerContext of
   case ScannerContext of
@@ -347,8 +347,7 @@ begin
       begin
       begin
         if ResolveHTMLEntityReference(TokenText, Ent) then
         if ResolveHTMLEntityReference(TokenText, Ent) then
         begin
         begin
-          EntString := Ent;
-          DoCharacters(PSAXChar(EntString), 0, 1);
+          DoCharacters(@Ent, 0, 1);
         end else
         end else
         begin
         begin
           { Is this a predefined Unicode character entity? We must check this,
           { Is this a predefined Unicode character entity? We must check this,
@@ -364,7 +363,7 @@ begin
           if Found then
           if Found then
             DoSkippedEntity(TokenText)
             DoSkippedEntity(TokenText)
           else
           else
-            DoCharacters(PSAXChar('&' + TokenText), 0, Length(TokenText) + 1);
+            DoCharacters(PSAXChar('&' + TokenText), 0, Length(TokenText) + 2);
         end;
         end;
       end;
       end;
     scTag:
     scTag: