Selaa lähdekoodia

* Fix 4-byte unicode characters

git-svn-id: trunk@40058 -
michael 6 vuotta sitten
vanhempi
commit
8e0442fb1f

+ 41 - 10
packages/fcl-json/src/fpjson.pp

@@ -875,14 +875,29 @@ end;
 function JSONStringToString(const S: TJSONStringType): TJSONStringType;
 
 Var
-  I,J,L : Integer;
-  w : String;
+  I,J,L,U1,U2 : Integer;
+  App,W : String;
+
+  Procedure MaybeAppendUnicode;
+
+  Var
+    U : String;
+
+  begin
+    if (U1<>0) then
+      begin
+      U:=UTF8Encode(WideChar(U1));
+      Result:=Result+U;
+      U1:=0;
+      end;
+  end;
 
 begin
   I:=1;
   J:=1;
   L:=Length(S);
   Result:='';
+  U1:=0;
   While (I<=L) do
     begin
     if (S[I]='\') then
@@ -891,25 +906,41 @@ begin
       If I<L then
         begin
         Inc(I);
+        App:='';
         Case S[I] of
           '\','"','/'
-              : Result:=Result+S[I];
-          'b' : Result:=Result+#8;
-          't' : Result:=Result+#9;
-          'n' : Result:=Result+#10;
-          'f' : Result:=Result+#12;
-          'r' : Result:=Result+#13;
+              : App:=S[I];
+          'b' : App:=#8;
+          't' : App:=#9;
+          'n' : App:=#10;
+          'f' : App:=#12;
+          'r' : App:=#13;
           'u' : begin
                 W:=Copy(S,I+1,4);
                 Inc(I,4);
-                Result:=Result+TJSONStringType(WideChar(StrToInt('$'+W)));
+                u2:=StrToInt('$'+W);
+                if (U1<>0) then
+                  begin
+                  App:=UTF8Encode(WideChar(U1)+WideChar(U2));
+                  U2:=0;
+                  end
+                else
+                  U1:=U2;
                 end;
         end;
+        if App<>'' then
+          begin
+          MaybeAppendUnicode;
+          Result:=Result+App;
+          end;
         end;
       J:=I+1;
-      end;
+      end
+    else
+      MaybeAppendUnicode;
     Inc(I);
     end;
+  MaybeAppendUnicode;
   Result:=Result+Copy(S,J,I-J+1);
 end;
 

+ 56 - 14
packages/fcl-json/src/jsonscanner.pp

@@ -218,11 +218,31 @@ var
   TokenStart: PChar;
   it : TJSONToken;
   I : Integer;
-  OldLength, SectionLength,  tstart,tcol, u: Integer;
+  OldLength, SectionLength,  tstart,tcol, u1,u2: Integer;
   C , c2: char;
   S : String;
   IsStar,EOC: Boolean;
 
+  Procedure MaybeAppendUnicode;
+
+  Var
+    u : String;
+
+  begin
+  // if there is a leftover \u, append
+  if (u1<>0) then
+    begin
+    if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
+      U:=Utf8Encode(WideString(WideChar(u1))) // ToDo: use faster function
+    else
+      U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss?
+    FCurTokenString:=FCurTokenString+U;
+    OldLength:=Length(FCurTokenString);
+    u1:=0;
+    end;
+  end;
+
+
 begin
   if FTokenStr = nil then
     if not FetchLine then
@@ -262,6 +282,7 @@ begin
         TokenStart := FTokenStr;
         OldLength := 0;
         FCurTokenString := '';
+        u1:=0;
         while not (FTokenStr[0] in [#0,C]) do
           begin
           if (FTokenStr[0]='\') then
@@ -282,43 +303,64 @@ begin
               '/' : S:='/';
               'u' : begin
                     S:='0000';
-                    u:=0;
+                    u2:=0;
                     For I:=1 to 4 do
                       begin
                       Inc(FTokenStr);
                       c2:=FTokenStr^;
                       Case c2 of
-                        '0'..'9': u:=u*16+ord(c2)-ord('0');
-                        'A'..'F': u:=u*16+ord(c2)-ord('A')+10;
-                        'a'..'f': u:=u*16+ord(c2)-ord('a')+10;
+                        '0'..'9': u2:=u2*16+ord(c2)-ord('0');
+                        'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10;
+                        'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10;
                       else
                         Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
                       end;
                       end;
                     // ToDo: 4-bytes UTF16
-                    if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
-                      S:=Utf8Encode(WideString(WideChar(u))) // ToDo: use faster function
+                    if u1<>0 then
+                      begin
+                      if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
+                        S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
+                      else
+                        S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
+                      u1:=0;
+                      end
                     else
-                      S:=String(WideChar(u)); // WideChar converts the encoding. Should it warn on loss?
+                      begin
+                      S:='';
+                      u1:=u2;
+                      end
                     end;
               #0  : Error(SErrOpenString);
             else
               Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
             end;
-            SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S));
-            if SectionLength > 0 then
-              Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
-            Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S));
-            Inc(OldLength, SectionLength+Length(S));
+            I:=Length(S);
+            if (SectionLength+I>0) then
+              begin
+              // If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it.
+              // example: \u00f8\"
+              if I=1 then
+                MaybeAppendUnicode;
+              SetLength(FCurTokenString, OldLength + SectionLength+Length(S));
+              if SectionLength > 0 then
+                Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
+              if I>0 then
+                Move(S[1],FCurTokenString[OldLength + SectionLength+1],i);
+              Inc(OldLength, SectionLength+Length(S));
+              end;
             // Next char
             TokenStart := FTokenStr+1;
-            end;
+            end
+          else
+            MaybeAppendUnicode;
           if FTokenStr[0] = #0 then
             Error(SErrOpenString);
           Inc(FTokenStr);
           end;
         if FTokenStr[0] = #0 then
           Error(SErrOpenString);
+        MaybeAppendUnicode;
         SectionLength := FTokenStr - TokenStart;
         SetLength(FCurTokenString, OldLength + SectionLength);
         if SectionLength > 0 then

+ 2 - 5
packages/fcl-json/tests/testjson.lpi

@@ -15,20 +15,17 @@
     </BuildModes>
     <PublishOptions>
       <Version Value="2"/>
-      <IgnoreBinaries Value="False"/>
-      <IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
-      <ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/>
     </PublishOptions>
     <RunParams>
       <local>
-        <CommandLineParams Value="--suite=TTestParser.TestArray"/>
+        <CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
         <LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
       </local>
       <FormatVersion Value="2"/>
       <Modes Count="1">
         <Mode0 Name="default">
           <local>
-            <CommandLineParams Value="--suite=TTestParser.TestArray"/>
+            <CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
             <LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
           </local>
         </Mode0>

+ 3 - 0
packages/fcl-json/tests/testjson.pp

@@ -17,6 +17,9 @@
 program testjson;
 
 uses
+  {$ifdef unix}
+  cwstring,
+  {$endif}
   Classes, testjsondata, testjsonparser, testjsonrtti, consoletestrunner, testjsonreader;
 
 type

+ 8 - 0
packages/fcl-json/tests/testjsondata.pp

@@ -3993,6 +3993,11 @@ begin
 end;
 
 procedure TTestJSONString.TestJSONStringToString;
+
+Const
+  // Glowing star in UTF8
+  GlowingStar = #$F0#$9F#$8C#$9F;
+
 begin
   TestFrom('','');
   TestFrom('A','A');
@@ -4029,6 +4034,9 @@ begin
   TestFrom('\n\n',#10#10);
   TestFrom('\f\f',#12#12);
   TestFrom('\r\r',#13#13);
+  TestFrom('\u00f8','ø'); // this is ø
+  TestFrom('\u00f8\"','ø"'); // this is ø"
+  TestFrom('\ud83c\udf1f',GlowingStar);
 end;
 
 procedure TTestJSONString.TestStringToJSONString;

+ 29 - 14
packages/fcl-json/tests/testjsonparser.pp

@@ -37,6 +37,7 @@ type
     procedure DoTestFloat(F: TJSONFloat); overload;
     procedure DoTestFloat(F: TJSONFloat; S: String); overload;
     procedure DoTestObject(S: String; const ElNames: array of String; DoJSONTest : Boolean = True);
+    procedure DoTestString(S : String; AResult : String);
     procedure DoTestString(S : String);
     procedure DoTestArray(S: String; ACount: Integer; IgnoreJSON: Boolean=False);
     Procedure DoTestClass(S : String; AClass : TJSONDataClass);
@@ -79,7 +80,7 @@ Var
   J : TJSONData;
   
 begin
-  P:=TJSONParser.Create('');
+  P:=TJSONParser.Create('',[joUTF8]);
   Try
     J:=P.Parse;
     If (J<>Nil) then
@@ -97,7 +98,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('1');
+  P:=TJSONParser.Create('1',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
@@ -117,7 +118,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('123456789012345');
+  P:=TJSONParser.Create('123456789012345',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
@@ -137,7 +138,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('null');
+  P:=TJSONParser.Create('null',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
@@ -156,7 +157,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('true');
+  P:=TJSONParser.Create('true',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
@@ -176,7 +177,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('false');
+  P:=TJSONParser.Create('false',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
@@ -206,10 +207,18 @@ end;
 
 procedure TTestParser.TestString;
 
+Const
+  // Glowing star in UTF8
+  GlowingStar = #$F0#$9F#$8C#$9F;
+
 begin
   DoTestString('A string');
   DoTestString('');
   DoTestString('\"');
+  DoTestString('\u00f8','ø'); // this is ø
+  DoTestString('\u00f8\"','ø"'); // this is ø"
+//  Writeln(GlowingStar);
+  DoTestString('\ud83c\udf1f',GlowingStar);
 end;
 
 
@@ -348,7 +357,7 @@ Var
 
 begin
   J:=Nil;
-  P:=TJSONParser.Create(S);
+  P:=TJSONParser.Create(S,[joUTF8]);
   Try
     P.Options:=FOptions;
     J:=P.Parse;
@@ -400,7 +409,7 @@ Var
   D : TJSONData;
 
 begin
-  P:=TJSONParser.Create(S);
+  P:=TJSONParser.Create(S,[joUTF8]);
   try
     D:=P.Parse;
     try
@@ -536,7 +545,7 @@ Var
 
 begin
   ParseOK:=False;
-  P:=TJSONParser.Create(S);
+  P:=TJSONParser.Create(S,[joUTF8]);
   P.OPtions:=Options;
   J:=Nil;
   Try
@@ -561,24 +570,30 @@ end;
 
 procedure TTestParser.DoTestString(S: String);
 
+begin
+  DoTestString(S,JSONStringToString(S));
+end;
+
+procedure TTestParser.DoTestString(S: String; AResult : String);
+
 Var
   P : TJSONParser;
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create('"'+S+'"');
+  P:=TJSONParser.Create('"'+S+'"',[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then
       Fail('Parse of string "'+S+'" fails');
     TestJSONType(J,jtString);
-    TestAsString(J,JSONStringToString(S));
-    TestJSON(J,'"'+S+'"');
+    TestAsString(J,aResult);
+    if Pos('\u',S)=0 then
+      TestJSON(J,'"'+S+'"');
   Finally
     FreeAndNil(J);
     FreeAndNil(P);
   end;
-
 end;
 
 procedure TTestParser.DoTestFloat(F : TJSONFloat);
@@ -598,7 +613,7 @@ Var
   J : TJSONData;
 
 begin
-  P:=TJSONParser.Create(S);
+  P:=TJSONParser.Create(S,[joUTF8]);
   Try
     J:=P.Parse;
     If (J=Nil) then