tw29585.pp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. program tw29585;
  2. {$IFDEF FPC}
  3. {$MODE OBJFPC}{$H+}
  4. {$ELSE}
  5. {$APPTYPE Console}
  6. {$ENDIF}
  7. uses
  8. {$ifndef FPC}Windows,{$endif}Sysutils;
  9. {$IFNDEF FPC}
  10. type
  11. tsystemcodepage = word;
  12. {$ENDIF}
  13. Type
  14. tstr1251 = type ansistring(1251);
  15. const
  16. utf8data: array[0..10] of ansichar = #$C3#$A9#$C2#$BA#$C3#$AE#$C5#$93#$E2#$88#$82;
  17. utf8data_in_utf16: unicodestring = #$00E9#$00BA#$00EE#$0153#$2202;
  18. invalidutf8data: array[0..3] of ansichar = #$80#$81#$82#$83;
  19. invalidutf8data_utf_16a: unicodestring = '????';
  20. invalidutf8data_utf_16b: unicodestring = #$fffd#$fffd#$fffd#$fffd;
  21. procedure error(l: longint; const u: unicodestring);
  22. var
  23. i: longint;
  24. begin
  25. writeln('error for test ',l);
  26. write('result: ');
  27. for i:=low(u) to high(u) do
  28. write('#$',inttohex(ord(u[i]),2));
  29. writeln;
  30. halt(l);
  31. end;
  32. procedure initarray(p: pbyte; const data: array of ansichar);
  33. var
  34. i: longint;
  35. begin
  36. for i:=low(data) to high(data) do
  37. p[i]:=ord(data[i]);
  38. end;
  39. procedure initstr(var s: rawbytestring; cp: tsystemcodepage; const data: array of ansichar); overload;
  40. var
  41. i: longint;
  42. begin
  43. setlength(s,length(data));
  44. setcodepage(s,cp,false);
  45. for i:=low(data) to high(data) do
  46. s[i+1]:=data[i];
  47. end;
  48. procedure initstr(var s: shortstring; const data: array of ansichar); overload;
  49. var
  50. i: longint;
  51. begin
  52. setlength(s,length(data));
  53. for i:=low(data) to high(data) do
  54. s[i+1]:=data[i];
  55. end;
  56. procedure testvalidutf8;
  57. var
  58. s1251: tstr1251;
  59. rs: rawbytestring;
  60. utf8: utf8string;
  61. s: ansistring;
  62. ss: shortstring;
  63. ba: array[low(utf8data)..high(utf8data)] of byte;
  64. bc: array[low(utf8data)..high(utf8data)] of ansichar;
  65. bcc: array[low(utf8data)..high(utf8data)+1] of ansichar;
  66. w: unicodestring;
  67. begin
  68. initstr(rawbytestring(s1251),1251,utf8data);
  69. w:=UTF8ToString(s1251);
  70. if w<>utf8data_in_utf16 then
  71. error(1,w);
  72. initstr(rs,0,utf8data);
  73. w:=UTF8ToString(rs);
  74. if w<>utf8data_in_utf16 then
  75. error(2,w);
  76. initstr(rawbytestring(utf8),CP_UTF8,utf8data);
  77. w:=UTF8ToString(utf8);
  78. if w<>utf8data_in_utf16 then
  79. error(3,w);
  80. initstr(rawbytestring(s),defaultsystemcodepage,utf8data);
  81. w:=UTF8ToString(s);
  82. if w<>utf8data_in_utf16 then
  83. error(4,w);
  84. initstr(ss,utf8data);
  85. w:=UTF8ToString(ss);
  86. if w<>utf8data_in_utf16 then
  87. error(5,w);
  88. initarray(@bcc[0],utf8data);
  89. bcc[high(bcc)]:=#0;
  90. w:=UTF8ToString(@bcc[0]);
  91. if w<>utf8data_in_utf16 then
  92. error(6,w);
  93. {$ifndef cpujvm}
  94. initarray(@ba[0],utf8data);
  95. w:=UTF8ToString(ba);
  96. if w<>utf8data_in_utf16 then
  97. error(7,w);
  98. initarray(@bc[0],utf8data);
  99. w:=UTF8ToString(bc);
  100. if w<>utf8data_in_utf16 then
  101. error(8,w);
  102. {$endif not cpujvm}
  103. end;
  104. procedure testinvalidutf8;
  105. var
  106. s1251: tstr1251;
  107. rs: rawbytestring;
  108. utf8: utf8string;
  109. s: ansistring;
  110. ss: shortstring;
  111. ba: array[low(invalidutf8data)..high(invalidutf8data)] of byte;
  112. bc: array[low(invalidutf8data)..high(invalidutf8data)] of ansichar;
  113. bcc: array[low(invalidutf8data)..high(invalidutf8data)+1] of ansichar;
  114. w: unicodestring;
  115. begin
  116. initstr(rawbytestring(s1251),1251,invalidutf8data);
  117. w:=UTF8ToString(s1251);
  118. if (w<>invalidutf8data_utf_16a) and
  119. (w<>invalidutf8data_utf_16b) then
  120. error(11,w);
  121. initstr(rs,0,invalidutf8data);
  122. w:=UTF8ToString(rs);
  123. if (w<>invalidutf8data_utf_16a) and
  124. (w<>invalidutf8data_utf_16b) then
  125. error(12,w);
  126. initstr(rawbytestring(utf8),CP_UTF8,invalidutf8data);
  127. w:=UTF8ToString(utf8);
  128. if (w<>invalidutf8data_utf_16a) and
  129. (w<>invalidutf8data_utf_16b) then
  130. error(13,w);
  131. initstr(rawbytestring(s),defaultsystemcodepage,invalidutf8data);
  132. w:=UTF8ToString(s);
  133. if (w<>invalidutf8data_utf_16a) and
  134. (w<>invalidutf8data_utf_16b) then
  135. error(14,w);
  136. initstr(ss,invalidutf8data);
  137. w:=UTF8ToString(ss);
  138. if (w<>invalidutf8data_utf_16a) and
  139. (w<>invalidutf8data_utf_16b) then
  140. error(15,w);
  141. initarray(@bcc[0],invalidutf8data);
  142. bcc[high(bcc)]:=#0;
  143. w:=UTF8ToString(@bcc[0]);
  144. if (w<>invalidutf8data_utf_16a) and
  145. (w<>invalidutf8data_utf_16b) then
  146. error(16,w);
  147. {$ifndef cpujvm}
  148. initarray(@ba[0],invalidutf8data);
  149. w:=UTF8ToString(ba);
  150. if (w<>invalidutf8data_utf_16a) and
  151. (w<>invalidutf8data_utf_16b) then
  152. error(17,w);
  153. initarray(@bc[0],invalidutf8data);
  154. w:=UTF8ToString(bc);
  155. if (w<>invalidutf8data_utf_16a) and
  156. (w<>invalidutf8data_utf_16b) then
  157. error(18,w);
  158. {$endif not cpujvm}
  159. end;
  160. begin
  161. testvalidutf8;
  162. testinvalidutf8;
  163. end.