Kaynağa Gözat

* Fix bug ID #38624

git-svn-id: trunk@48980 -
michael 4 yıl önce
ebeveyn
işleme
5e913147ab

+ 15 - 5
packages/fcl-json/src/fpjson.pp

@@ -1013,12 +1013,22 @@ begin
                 Inc(I,4);
                 if (U1<>0) then
                   begin
-                  App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
-                  U1:=0;
-                  U2:=0;
-                  end
+                  if ((U1>=$D800) and (U1<=$DBFF)) and
+                     ((U2>=$DC00) and (U2<=$DFFF)) then                  
+                    begin
+                     App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
+                     U2:=0;
+                    end 
+                  else
+                    begin
+                    App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
+                    Result:=Result+App;
+                    App:='';
+                   end;
+                  end 
                 else
-                  U1:=U2;
+                   App:='';
+                U1:=U2;
                 end;
         end;
         if App<>'' then

+ 20 - 4
packages/fcl-json/src/jsonscanner.pp

@@ -354,9 +354,11 @@ begin
                         Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
                       end;
                       end;
-                    // ToDo: 4-bytes UTF16
                     if u1<>0 then
                       begin
+                      // 4bytes, compose.
+                      if not ((u2>=$DC00) and (u2<=$DFFF)) then
+                        Error(SErrInvalidCharacter, [CurRow,CurColumn,IntToStr(u2)]);
                       if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
                         S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
                       else
@@ -365,9 +367,23 @@ begin
                       end
                     else
                       begin
-                      S:='';
-                      u1:=u2;
-                      end
+                      // Surrogate start
+                      if (u2>=$D800) and (U2<=$DBFF) then
+                        begin
+                        u1:=u2;
+                        S:='';
+                        end
+                      else
+                        begin
+                        if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
+                          S:=Utf8Encode(WideString(WideChar(u2))) // ToDo: use faster function
+                        else
+                          S:=String(WideChar(u1))+String(WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
+                        U1:=0;  
+                        U2:=0;
+                        end;
+                      end;
+                    Writeln(' U2 : ',U2,' : >',S,'<');
                     end;
               #0  : Error(SErrOpenString,[FCurRow]);
             else

+ 2 - 0
packages/fcl-json/tests/testjsondata.pp

@@ -4038,6 +4038,7 @@ Const
   // Glowing star in UTF8
   GlowingStar = #$F0#$9F#$8C#$9F;
   Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
+  Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
 
 begin
   TestFrom('','');
@@ -4082,6 +4083,7 @@ begin
   TestFrom('\u0041\u0042\u0043','ABC');
   TestFrom('\u0041\u0042\u0043\u0044','ABCD');
   TestFrom('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
+  TestFrom('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));
 end;
 
 procedure TTestJSONString.TestStringToJSONString;

+ 2 - 1
packages/fcl-json/tests/testjsonreader.pp

@@ -317,6 +317,7 @@ procedure TBaseTestReader.TestString;
 const
   GlowingStar = #$F0#$9F#$8C#$9F;
   Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
+  Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
 
 begin
   DoTestString('A string');
@@ -329,7 +330,7 @@ begin
   DoTestString('\u0041\u0042\u0043','ABC');
   DoTestString('\u0041\u0042\u0043\u0044','ABCD');
   DoTestString('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
-  
+  DoTestString('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));   
 end;