Browse Source

+ add support for Unicode code point constants > $FFFF; they are converted to a surrogate pair so they are in
fact a UnicodeString constant
+ added tests

git-svn-id: trunk@39123 -

svenbarth 7 years ago
parent
commit
f077c7d950
8 changed files with 98 additions and 6 deletions
  1. 6 0
      .gitattributes
  2. 14 6
      compiler/scanner.pas
  3. 10 0
      tests/test/twide10.pp
  4. 10 0
      tests/test/twide11.pp
  5. 10 0
      tests/test/twide12.pp
  6. 10 0
      tests/test/twide13.pp
  7. 28 0
      tests/test/twide8.pp
  8. 10 0
      tests/test/twide9.pp

+ 6 - 0
.gitattributes

@@ -13770,12 +13770,18 @@ tests/test/tweaklib2.pp svneol=native#text/plain
 tests/test/tweaklib3.pp svneol=native#text/plain
 tests/test/tweaklib3.pp svneol=native#text/plain
 tests/test/tweaklib4.pp svneol=native#text/plain
 tests/test/tweaklib4.pp svneol=native#text/plain
 tests/test/twide1.pp svneol=native#text/plain
 tests/test/twide1.pp svneol=native#text/plain
+tests/test/twide10.pp svneol=native#text/pascal
+tests/test/twide11.pp svneol=native#text/pascal
+tests/test/twide12.pp svneol=native#text/pascal
+tests/test/twide13.pp svneol=native#text/pascal
 tests/test/twide2.pp svneol=native#text/plain
 tests/test/twide2.pp svneol=native#text/plain
 tests/test/twide3.pp svneol=native#text/plain
 tests/test/twide3.pp svneol=native#text/plain
 tests/test/twide4.pp svneol=native#text/plain
 tests/test/twide4.pp svneol=native#text/plain
 tests/test/twide5.pp svneol=native#text/plain
 tests/test/twide5.pp svneol=native#text/plain
 tests/test/twide6.pp svneol=native#text/plain
 tests/test/twide6.pp svneol=native#text/plain
 tests/test/twide7.pp svneol=native#text/plain
 tests/test/twide7.pp svneol=native#text/plain
+tests/test/twide8.pp svneol=native#text/pascal
+tests/test/twide9.pp svneol=native#text/pascal
 tests/test/twrstr1.pp svneol=native#text/plain
 tests/test/twrstr1.pp svneol=native#text/plain
 tests/test/twrstr2.pp svneol=native#text/plain
 tests/test/twrstr2.pp svneol=native#text/plain
 tests/test/twrstr3.pp svneol=native#text/plain
 tests/test/twrstr3.pp svneol=native#text/plain

+ 14 - 6
compiler/scanner.pas

@@ -5104,7 +5104,7 @@ type
                              begin
                              begin
                                readchar; { read leading $ }
                                readchar; { read leading $ }
                                asciinr:='$';
                                asciinr:='$';
-                               while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=5) do
+                               while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=7) do
                                  begin
                                  begin
                                    asciinr:=asciinr+c;
                                    asciinr:=asciinr+c;
                                    readchar;
                                    readchar;
@@ -5114,7 +5114,7 @@ type
                              begin
                              begin
                                readchar; { read leading $ }
                                readchar; { read leading $ }
                                asciinr:='&';
                                asciinr:='&';
-                               while (upcase(c) in ['0'..'7']) and (length(asciinr)<=7) do
+                               while (upcase(c) in ['0'..'7']) and (length(asciinr)<=8) do
                                  begin
                                  begin
                                    asciinr:=asciinr+c;
                                    asciinr:=asciinr+c;
                                    readchar;
                                    readchar;
@@ -5124,7 +5124,7 @@ type
                              begin
                              begin
                                readchar; { read leading $ }
                                readchar; { read leading $ }
                                asciinr:='%';
                                asciinr:='%';
-                               while (upcase(c) in ['0','1']) and (length(asciinr)<=17) do
+                               while (upcase(c) in ['0','1']) and (length(asciinr)<=22) do
                                  begin
                                  begin
                                    asciinr:=asciinr+c;
                                    asciinr:=asciinr+c;
                                    readchar;
                                    readchar;
@@ -5133,7 +5133,7 @@ type
                            else
                            else
                              begin
                              begin
                                asciinr:='';
                                asciinr:='';
-                               while (c in ['0'..'9']) and (length(asciinr)<=5) do
+                               while (c in ['0'..'9']) and (length(asciinr)<=8) do
                                  begin
                                  begin
                                    asciinr:=asciinr+c;
                                    asciinr:=asciinr+c;
                                    readchar;
                                    readchar;
@@ -5145,7 +5145,7 @@ type
                            Message(scan_e_illegal_char_const)
                            Message(scan_e_illegal_char_const)
                          else if (m<0) or (m>255) or (length(asciinr)>3) then
                          else if (m<0) or (m>255) or (length(asciinr)>3) then
                            begin
                            begin
-                              if (m>=0) and (m<=65535) then
+                              if (m>=0) and (m<=$10FFFF) then
                                 begin
                                 begin
                                   if not iswidestring then
                                   if not iswidestring then
                                    begin
                                    begin
@@ -5156,7 +5156,15 @@ type
                                      iswidestring:=true;
                                      iswidestring:=true;
                                      len:=0;
                                      len:=0;
                                    end;
                                    end;
-                                  concatwidestringchar(patternw,tcompilerwidechar(m));
+                                  if m<=$FFFF then
+                                    concatwidestringchar(patternw,tcompilerwidechar(m))
+                                  else
+                                    begin
+                                      { split into surrogate pair }
+                                      dec(m,$10000);
+                                      concatwidestringchar(patternw,tcompilerwidechar((m shr 10) + $D800));
+                                      concatwidestringchar(patternw,tcompilerwidechar((m and $3FF) + $DC00));
+                                    end;
                                 end
                                 end
                               else
                               else
                                 Message(scan_e_illegal_char_const)
                                 Message(scan_e_illegal_char_const)

+ 10 - 0
tests/test/twide10.pp

@@ -0,0 +1,10 @@
+{ %FAIL }
+
+program twide10;
+
+var
+  s: UnicodeString;
+begin
+  { this is greater than the highest defined Unicode code point }
+  s := #$110000;
+end.

+ 10 - 0
tests/test/twide11.pp

@@ -0,0 +1,10 @@
+{ %FAIL }
+
+program twide11;
+
+var
+  s: UnicodeString;
+begin
+  { this is greater than the highest defined Unicode code point }
+  s := #1114112;
+end.

+ 10 - 0
tests/test/twide12.pp

@@ -0,0 +1,10 @@
+{ %FAIL }
+
+program twide12;
+
+var
+  s: UnicodeString;
+begin
+  { this is greater than the highest defined Unicode code point }
+  s := #&4200000;
+end.

+ 10 - 0
tests/test/twide13.pp

@@ -0,0 +1,10 @@
+{ %FAIL }
+
+program twide13;
+
+var
+  s: UnicodeString;
+begin
+  { this is greater than the highest defined Unicode code point }
+  s := #%100010000000000000000;
+end.

+ 28 - 0
tests/test/twide8.pp

@@ -0,0 +1,28 @@
+program twide8;
+
+procedure Check(const aStr: UnicodeString; aIndex: LongInt);
+const
+  Char1 = #$DBFF;
+  Char2 = #$DFFF;
+begin
+  if Length(aStr) <> 2 then
+    Halt(aIndex * 3);
+  if aStr[1] <> Char1 then
+    Halt(aIndex * 3 + 1);
+  if aStr[2] <> Char2 then
+    Halt(aIndex * 3 + 2);
+end;
+
+var
+  s: UnicodeString;
+begin
+  s := #$10FFFF;
+  Check(s, 1);
+  s := #1114111;
+  Check(s, 2);
+  s := #&4177777;
+  Check(s, 3);
+  s := #%100001111111111111111;
+  Check(s, 4);
+  Writeln('ok');
+end.

+ 10 - 0
tests/test/twide9.pp

@@ -0,0 +1,10 @@
+{ %FAIL }
+
+program twide9;
+
+var
+  u: UnicodeChar;
+begin
+  { fails, because a code point > $FFFF decodes to a surrogate pair, thus a string constant }
+  u := #$10FFFF;
+end.