tunistr6.pp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. {%skiptarget=wince}
  2. {$codepage utf-8}
  3. uses
  4. {$ifdef unix}
  5. cwstring,
  6. {$endif}
  7. sysutils;
  8. procedure doerror(i : integer);
  9. begin
  10. writeln('Error: ',i);
  11. halt(i);
  12. end;
  13. { normal upper case testing }
  14. procedure testupper;
  15. var
  16. s: ansistring;
  17. w1,w2,w3,w4: unicodestring;
  18. i: longint;
  19. begin
  20. w1:='aé'#0'èàł'#$d87e#$dc04;
  21. w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
  22. {$ifdef print}
  23. // the utf-8 output can confuse the testsuite parser
  24. writeln('original: ',w1);
  25. writeln('original upper: ',w2);
  26. {$endif print}
  27. s:=w1;
  28. {$ifdef print}
  29. writeln('ansi: ',s);
  30. {$endif print}
  31. w3:=s;
  32. w4:=AnsiUpperCase(s);
  33. { filter out unsupported characters }
  34. for i:=1 to length(w3) do
  35. if w3[i]='?' then
  36. begin
  37. w2[i]:='?';
  38. w1[i]:='?';
  39. end;
  40. w1:=UnicodeUpperCase(w1);
  41. {$ifdef print}
  42. writeln('unicodeupper: ',w1);
  43. writeln('original upper: ',w2);
  44. writeln('ansiupper: ',w4);
  45. {$endif print}
  46. if (w1 <> w2) then
  47. doerror(1);
  48. if (w4 <> w2) then
  49. doerror(2);
  50. w1:='aéèàł'#$d87e#$dc04;
  51. w2:='AÉÈÀŁ'#$d87e#$dc04;
  52. s:=w1;
  53. w3:=s;
  54. w4:=AnsiStrUpper(pchar(s));
  55. { filter out unsupported characters }
  56. for i:=1 to length(w3) do
  57. if w3[i]='?' then
  58. begin
  59. w2[i]:='?';
  60. w1[i]:='?';
  61. end;
  62. w1:=UnicodeUpperCase(w1);
  63. {$ifdef print}
  64. writeln('unicodeupper: ',w1);
  65. writeln('ansistrupper: ',w4);
  66. {$endif print}
  67. if (w1 <> w2) then
  68. doerror(21);
  69. if (w4 <> w2) then
  70. doerror(22);
  71. end;
  72. { normal lower case testing }
  73. procedure testlower;
  74. var
  75. s: ansistring;
  76. w1,w2,w3,w4: unicodestring;
  77. i: longint;
  78. begin
  79. w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
  80. w2:='aé'#0'èàł'#$d87e#$dc04;
  81. {$ifdef print}
  82. // the utf-8 output can confuse the testsuite parser
  83. writeln('original: ',w1);
  84. writeln('original lower: ',w2);
  85. {$endif print}
  86. s:=w1;
  87. w3:=s;
  88. w4:=AnsiLowerCase(s);
  89. { filter out unsupported characters }
  90. for i:=1 to length(w3) do
  91. if w3[i]='?' then
  92. begin
  93. w2[i]:='?';
  94. w1[i]:='?';
  95. end;
  96. w1:=UnicodeLowerCase(w1);
  97. {$ifdef print}
  98. writeln('unicodelower: ',w1);
  99. writeln('ansilower: ',w4);
  100. {$endif print}
  101. if (w1 <> w2) then
  102. doerror(3);
  103. if (w4 <> w2) then
  104. doerror(4);
  105. w1:='AÉÈÀŁ'#$d87e#$dc04;
  106. w2:='aéèàł'#$d87e#$dc04;
  107. s:=w1;
  108. w3:=s;
  109. w4:=AnsiStrLower(pchar(s));
  110. { filter out unsupported characters }
  111. for i:=1 to length(w3) do
  112. if w3[i]='?' then
  113. begin
  114. w2[i]:='?';
  115. w1[i]:='?';
  116. end;
  117. w1:=UnicodeLowerCase(w1);
  118. {$ifdef print}
  119. writeln('unicodelower: ',w1);
  120. writeln('ansistrlower: ',w4);
  121. {$endif print}
  122. if (w1 <> w2) then
  123. doerror(3);
  124. if (w4 <> w2) then
  125. doerror(4);
  126. end;
  127. { upper case testing with a missing utf-16 pair at the end }
  128. procedure testupperinvalid;
  129. var
  130. s: ansistring;
  131. w1,w2,w3,w4: unicodestring;
  132. i: longint;
  133. begin
  134. { missing utf-16 pair at end }
  135. w1:='aé'#0'èàł'#$d87e;
  136. w2:='AÉ'#0'ÈÀŁ'#$d87e;
  137. {$ifdef print}
  138. // the utf-8 output can confuse the testsuite parser
  139. writeln('original: ',w1);
  140. writeln('original upper: ',w2);
  141. {$endif print}
  142. s:=w1;
  143. w3:=s;
  144. w4:=AnsiUpperCase(s);
  145. { filter out unsupported characters }
  146. for i:=1 to length(w3) do
  147. if w3[i]='?' then
  148. begin
  149. w2[i]:='?';
  150. w1[i]:='?';
  151. end;
  152. w1:=UnicodeUpperCase(w1);
  153. {$ifdef print}
  154. writeln('unicodeupper: ',w1);
  155. writeln('ansiupper: ',w4);
  156. {$endif print}
  157. if (w1 <> w2) then
  158. doerror(5);
  159. if (w4 <> w2) then
  160. doerror(6);
  161. end;
  162. { lower case testing with a missing utf-16 pair at the end }
  163. procedure testlowerinvalid;
  164. var
  165. s: ansistring;
  166. w1,w2,w3,w4: unicodestring;
  167. i: longint;
  168. begin
  169. { missing utf-16 pair at end}
  170. w1:='AÉ'#0'ÈÀŁ'#$d87e;
  171. w2:='aé'#0'èàł'#$d87e;
  172. {$ifdef print}
  173. // the utf-8 output can confuse the testsuite parser
  174. writeln('original: ',w1);
  175. writeln('original lower: ',w2);
  176. {$endif print}
  177. s:=w1;
  178. w3:=s;
  179. w4:=AnsiLowerCase(s);
  180. { filter out unsupported characters }
  181. for i:=1 to length(w3) do
  182. if w3[i]='?' then
  183. begin
  184. w2[i]:='?';
  185. w1[i]:='?';
  186. end;
  187. w1:=UnicodeLowerCase(w1);
  188. {$ifdef print}
  189. writeln('unicodelower: ',w1);
  190. writeln('ansilower: ',w4);
  191. {$endif print}
  192. if (w1 <> w2) then
  193. doerror(7);
  194. if (w4 <> w2) then
  195. doerror(8);
  196. end;
  197. { upper case testing with a missing utf-16 pair at the end, followed by a normal char }
  198. procedure testupperinvalid1;
  199. var
  200. s: ansistring;
  201. w1,w2,w3,w4: unicodestring;
  202. i: longint;
  203. begin
  204. { missing utf-16 pair at end with char after it}
  205. w1:='aé'#0'èàł'#$d87e'j';
  206. w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
  207. {$ifdef print}
  208. // the utf-8 output can confuse the testsuite parser
  209. writeln('original: ',w1);
  210. writeln('original upper: ',w2);
  211. {$endif print}
  212. s:=w1;
  213. w3:=s;
  214. w4:=AnsiUpperCase(s);
  215. { filter out unsupported characters }
  216. for i:=1 to length(w3) do
  217. if w3[i]='?' then
  218. begin
  219. w2[i]:='?';
  220. w1[i]:='?';
  221. end;
  222. w1:=UnicodeUpperCase(w1);
  223. {$ifdef print}
  224. writeln('unicodeupper: ',w1);
  225. writeln('ansiupper: ',w4);
  226. {$endif print}
  227. if (w1 <> w2) then
  228. doerror(9);
  229. if (w4 <> w2) then
  230. doerror(10);
  231. end;
  232. { lower case testing with a missing utf-16 pair at the end, followed by a normal char }
  233. procedure testlowerinvalid1;
  234. var
  235. s: ansistring;
  236. w1,w2,w3,w4: unicodestring;
  237. i: longint;
  238. begin
  239. { missing utf-16 pair at end with char after it}
  240. w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
  241. w2:='aé'#0'èàł'#$d87e'j';
  242. {$ifdef print}
  243. // the utf-8 output can confuse the testsuite parser
  244. writeln('original: ',w1);
  245. writeln('original lower: ',w2);
  246. {$endif print}
  247. s:=w1;
  248. w3:=s;
  249. w4:=AnsiLowerCase(s);
  250. { filter out unsupported characters }
  251. for i:=1 to length(w3) do
  252. if w3[i]='?' then
  253. begin
  254. w2[i]:='?';
  255. w1[i]:='?';
  256. end;
  257. w1:=UnicodeLowerCase(w1);
  258. {$ifdef print}
  259. writeln('unicodelower: ',w1);
  260. writeln('ansilower: ',w4);
  261. {$endif print}
  262. if (w1 <> w2) then
  263. doerror(11);
  264. if (w4 <> w2) then
  265. doerror(12);
  266. end;
  267. { upper case testing with corrupting the utf-8 string after conversion }
  268. procedure testupperinvalid2;
  269. var
  270. s: ansistring;
  271. w1,w2,w3,w4: unicodestring;
  272. i: longint;
  273. begin
  274. w1:='aé'#0'èàł'#$d87e#$dc04'ö';
  275. w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
  276. {$ifdef print}
  277. // the utf-8 output can confuse the testsuite parser
  278. writeln('original: ',w1);
  279. writeln('original upper: ',w2);
  280. {$endif print}
  281. s:=w1;
  282. { truncate the last utf-8 character }
  283. setlength(s,length(s)-1);
  284. w3:=s;
  285. { adjust checking values for new length due to corruption }
  286. if length(w3)<>length(w2) then
  287. begin
  288. setlength(w2,length(w3));
  289. setlength(w1,length(w3));
  290. end;
  291. w4:=AnsiUpperCase(s);
  292. { filter out unsupported characters }
  293. for i:=1 to length(w3) do
  294. if w3[i]='?' then
  295. begin
  296. w2[i]:='?';
  297. w1[i]:='?';
  298. end;
  299. w1:=UnicodeUpperCase(w1);
  300. {$ifdef print}
  301. writeln('unicodeupper: ',w1);
  302. writeln('ansiupper: ',w4);
  303. {$endif print}
  304. if (w1 <> w2) then
  305. doerror(13);
  306. if (w4 <> w2) then
  307. doerror(14);
  308. end;
  309. { lower case testing with corrupting the utf-8 string after conversion }
  310. procedure testlowerinvalid2;
  311. var
  312. s: ansistring;
  313. w1,w2,w3,w4: unicodestring;
  314. i: longint;
  315. begin
  316. w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
  317. w2:='aé'#0'èàł'#$d87e#$dc04'ö';
  318. {$ifdef print}
  319. // the utf-8 output can confuse the testsuite parser
  320. writeln('original: ',w1);
  321. writeln('original lower: ',w2);
  322. {$endif print}
  323. s:=w1;
  324. { truncate the last utf-8 character }
  325. setlength(s,length(s)-1);
  326. w3:=s;
  327. { adjust checking values for new length due to corruption }
  328. if length(w3)<>length(w2) then
  329. begin
  330. setlength(w2,length(w3));
  331. setlength(w1,length(w3));
  332. end;
  333. w4:=AnsiLowerCase(s);
  334. { filter out unsupported characters }
  335. for i:=1 to length(w3) do
  336. if w3[i]='?' then
  337. begin
  338. w2[i]:='?';
  339. w1[i]:='?';
  340. end;
  341. w1:=UnicodeLowerCase(w1);
  342. {$ifdef print}
  343. writeln('unicodelower: ',w1);
  344. writeln('ansilower: ',w4);
  345. {$endif print}
  346. if (w1 <> w2) then
  347. doerror(15);
  348. if (w4 <> w2) then
  349. doerror(16);
  350. end;
  351. begin
  352. testupper;
  353. writeln;
  354. testlower;
  355. writeln;
  356. writeln;
  357. testupperinvalid;
  358. writeln;
  359. testlowerinvalid;
  360. writeln;
  361. writeln;
  362. testupperinvalid1;
  363. writeln;
  364. testlowerinvalid1;
  365. writeln;
  366. writeln;
  367. testupperinvalid2;
  368. writeln;
  369. testlowerinvalid2;
  370. writeln('ok');
  371. end.