utf8.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. #include "test.h"
  2. /*
  3. * g_utf16_to_utf8
  4. */
  5. glong
  6. compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
  7. {
  8. int i;
  9. for (i = 0; i < size; i++)
  10. if (expected [i] != actual [i])
  11. return i;
  12. return -1;
  13. }
  14. RESULT
  15. compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
  16. {
  17. glong ret;
  18. ret = compare_strings_utf8_pos (expected, actual, size);
  19. if (ret < 0)
  20. return OK;
  21. return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
  22. }
  23. void
  24. gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
  25. {
  26. int i;
  27. for (i = 0; src [i]; i++)
  28. ret [i] = src [i];
  29. ret [i] = 0;
  30. }
  31. RESULT
  32. compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
  33. {
  34. GError *error;
  35. gchar* ret;
  36. RESULT result;
  37. glong in_read, out_read;
  38. result = NULL;
  39. error = NULL;
  40. ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
  41. if (error) {
  42. result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  43. g_error_free (error);
  44. if (ret)
  45. g_free (ret);
  46. return result;
  47. }
  48. if (in_read != len_in)
  49. result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  50. else if (out_read != len_out)
  51. result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  52. else
  53. result = compare_strings_utf8_RESULT (expected, ret, len_out);
  54. g_free (ret);
  55. if (result)
  56. return result;
  57. return OK;
  58. }
  59. RESULT
  60. compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
  61. {
  62. RESULT result;
  63. result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
  64. if (result != OK)
  65. return result;
  66. return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
  67. }
  68. RESULT
  69. test_utf16_to_utf8 ()
  70. {
  71. const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
  72. gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
  73. RESULT result;
  74. gchar_to_gunichar2 (str1, src1);
  75. /* empty string */
  76. result = compare_utf16_to_utf8 (src0, str0, 0, 0);
  77. if (result != OK)
  78. return result;
  79. result = compare_utf16_to_utf8 (src1, str1, 5, 5);
  80. if (result != OK)
  81. return result;
  82. result = compare_utf16_to_utf8 (src2, str2, 2, 4);
  83. if (result != OK)
  84. return result;
  85. result = compare_utf16_to_utf8 (src3, str3, 1, 3);
  86. if (result != OK)
  87. return result;
  88. result = compare_utf16_to_utf8 (src4, str4, 1, 3);
  89. if (result != OK)
  90. return result;
  91. result = compare_utf16_to_utf8 (src5, str5, 2, 4);
  92. if (result != OK)
  93. return result;
  94. return OK;
  95. }
  96. /*
  97. * g_utf8_to_utf16
  98. */
  99. glong
  100. compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
  101. {
  102. int i;
  103. for (i = 0; i < size; i++)
  104. if (expected [i] != actual [i])
  105. return i;
  106. return -1;
  107. }
  108. RESULT
  109. compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
  110. {
  111. glong ret;
  112. ret = compare_strings_utf16_pos (expected, actual, size);
  113. if (ret < 0)
  114. return OK;
  115. return FAILED ("Incorrect output: expected '%s' but was '%s'\n", expected, actual);
  116. }
  117. RESULT
  118. compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec)
  119. {
  120. GError *error;
  121. gunichar2* ret;
  122. RESULT result;
  123. glong in_read, out_read;
  124. result = NULL;
  125. error = NULL;
  126. ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
  127. if (error) {
  128. result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  129. g_error_free (error);
  130. if (ret)
  131. g_free (ret);
  132. return result;
  133. }
  134. if (in_read != len_in)
  135. result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  136. else if (out_read != len_out)
  137. result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  138. else
  139. result = compare_strings_utf16_RESULT (expected, ret, len_out);
  140. g_free (ret);
  141. if (result)
  142. return result;
  143. return OK;
  144. }
  145. RESULT
  146. compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
  147. {
  148. RESULT result;
  149. result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1);
  150. if (result != OK)
  151. return result;
  152. return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in);
  153. }
  154. RESULT
  155. test_utf8_seq ()
  156. {
  157. const gchar *src = "\xE5\xB9\xB4\x27";
  158. glong in_read, out_read;
  159. //gunichar2 expected [6];
  160. GError *error = NULL;
  161. gunichar2 *dst;
  162. printf ("got: %s\n", src);
  163. dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
  164. if (error != NULL){
  165. return error->message;
  166. }
  167. if (in_read != 4) {
  168. return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
  169. }
  170. if (out_read != 2) {
  171. return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
  172. }
  173. g_free (dst);
  174. return OK;
  175. }
  176. RESULT
  177. test_utf8_to_utf16 ()
  178. {
  179. const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
  180. gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
  181. RESULT result;
  182. gchar_to_gunichar2 (str1, src1);
  183. /* empty string */
  184. result = compare_utf8_to_utf16 (str0, src0, 0, 0);
  185. if (result != OK)
  186. return result;
  187. result = compare_utf8_to_utf16 (str1, src1, 5, 5);
  188. if (result != OK)
  189. return result;
  190. result = compare_utf8_to_utf16 (str2, src2, 4, 2);
  191. if (result != OK)
  192. return result;
  193. result = compare_utf8_to_utf16 (str3, src3, 3, 1);
  194. if (result != OK)
  195. return result;
  196. result = compare_utf8_to_utf16 (str4, src4, 3, 1);
  197. if (result != OK)
  198. return result;
  199. return OK;
  200. }
  201. RESULT
  202. test_convert ()
  203. {
  204. gsize n;
  205. char *s = g_convert ("\242\241\243\242\241\243\242\241\243\242\241\243", -1, "UTF-8", "ISO-8859-1", NULL, &n, NULL);
  206. guchar *u = (guchar *) s;
  207. if (!s)
  208. return FAILED ("Expected 24 bytes, got: NULL");
  209. if (strlen (s) != 24)
  210. return FAILED ("Expected 24 bytes, got: %d", strlen (s));
  211. if (u [1] != 162 || u [2] != 194 ||
  212. u [3] != 161 || u [4] != 194 ||
  213. u [5] != 163 || u [6] != 194)
  214. return FAILED ("Incorrect conversion");
  215. g_free (s);
  216. return OK;
  217. }
  218. RESULT
  219. test_xdigit ()
  220. {
  221. static char test_chars[] = {
  222. '0', '1', '2', '3', '4',
  223. '5', '6', '7', '8', '9',
  224. 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  225. 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
  226. static gint32 test_values[] = {
  227. 0, 1, 2, 3, 4,
  228. 5, 6, 7, 8, 9,
  229. 10, 11, 12, 13, 14, 15, -1,
  230. 10, 11, 12, 13, 14, 15, -1};
  231. int i =0;
  232. for (i = 0; i < sizeof(test_chars); i++)
  233. if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
  234. return FAILED("Incorrect value %d at index %d", test_values[i], i);
  235. return OK;
  236. }
  237. static RESULT
  238. ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
  239. glong result_items_read, glong expected_items_read,
  240. glong result_items_written, glong expected_items_written,
  241. GError* result_error, gboolean expect_error)
  242. {
  243. glong i;
  244. if (result_items_read != expected_items_read)
  245. return FAILED("Incorrect number of items read %d", result_items_read);
  246. if (result_items_written != expected_items_written)
  247. return FAILED("Incorrect number of items written %d", result_items_written);
  248. if (result_error && !expect_error)
  249. return FAILED("There should not be an error code.");
  250. if (!result_error && expect_error)
  251. return FAILED("Unexpected error object.");
  252. if (expect_error && result_str)
  253. return FAILED("NULL should be returned when an error occurs.");
  254. if (!expect_error && !result_str)
  255. return FAILED("When no error occurs NULL should not be returned.");
  256. for (i=0; i<expected_items_written;i++) {
  257. if (result_str [i] != expected_str [i])
  258. return FAILED("Incorrect value %d at index %d", result_str [i], i);
  259. }
  260. if (result_str && result_str[expected_items_written] != '\0')
  261. return FAILED("Null termination not found at the end of the string.");
  262. return OK;
  263. }
  264. RESULT
  265. test_ucs4_to_utf16 ()
  266. {
  267. static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  268. static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  269. static gunichar str2[3] = {'h',0x80000000,'\0'};
  270. static gunichar2 exp2[2] = {'h','\0'};
  271. static gunichar str3[3] = {'h',0xDA00,'\0'};
  272. static gunichar str4[3] = {'h',0x10FFFF,'\0'};
  273. static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
  274. static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
  275. static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
  276. static gunichar str6[2] = {0x10400, '\0'};
  277. static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
  278. static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
  279. gunichar2* res;
  280. glong items_read, items_written, current_write_index;
  281. GError* err=0;
  282. RESULT check_result;
  283. glong i;
  284. res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
  285. check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
  286. if (check_result) return check_result;
  287. g_free (res);
  288. items_read = items_written = 0;
  289. res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
  290. check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
  291. if (check_result) return check_result;
  292. g_free (res);
  293. items_read = items_written = 0;
  294. res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
  295. check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  296. if (check_result) return check_result;
  297. g_free (res);
  298. items_read = items_written = 0;
  299. res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
  300. check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
  301. g_free (res);
  302. if (check_result) return check_result;
  303. items_read = items_written = 0;
  304. err = 0;
  305. res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
  306. check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
  307. if (check_result) return check_result;
  308. g_free (res);
  309. items_read = items_written = 0;
  310. err = 0;
  311. res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
  312. check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
  313. if (check_result) return check_result;
  314. g_free (res);
  315. // This loop tests the bounds of the conversion algorithm
  316. current_write_index = 0;
  317. for (i=0;i<6;i++) {
  318. items_read = items_written = 0;
  319. err = 0;
  320. res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
  321. check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
  322. items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
  323. if (check_result) return check_result;
  324. g_free (res);
  325. current_write_index += items_written;
  326. }
  327. items_read = items_written = 0;
  328. err = 0;
  329. res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
  330. check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
  331. if (check_result) return check_result;
  332. g_free (res);
  333. return OK;
  334. }
  335. static RESULT
  336. utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
  337. glong result_items_read, glong expected_items_read,
  338. glong result_items_written, glong expected_items_written,
  339. GError* result_error, gboolean expect_error)
  340. {
  341. glong i;
  342. if (result_items_read != expected_items_read)
  343. return FAILED("Incorrect number of items read %d", result_items_read);
  344. if (result_items_written != expected_items_written)
  345. return FAILED("Incorrect number of items written %d", result_items_written);
  346. if (result_error && !expect_error)
  347. return FAILED("There should not be an error code.");
  348. if (!result_error && expect_error)
  349. return FAILED("Unexpected error object.");
  350. if (expect_error && result_str)
  351. return FAILED("NULL should be returned when an error occurs.");
  352. if (!expect_error && !result_str)
  353. return FAILED("When no error occurs NULL should not be returned.");
  354. for (i=0; i<expected_items_written;i++) {
  355. if (result_str [i] != expected_str [i])
  356. return FAILED("Incorrect value %d at index %d", result_str [i], i);
  357. }
  358. if (result_str && result_str[expected_items_written] != '\0')
  359. return FAILED("Null termination not found at the end of the string.");
  360. return OK;
  361. }
  362. RESULT
  363. test_utf16_to_ucs4 ()
  364. {
  365. static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  366. static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  367. static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
  368. static gunichar exp2[3] = {'H',0x00010001,'\0'};
  369. static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
  370. static gunichar exp3[2] = {'H','\0'};
  371. static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
  372. 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
  373. static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
  374. static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
  375. static gunichar exp5[2] = {0x10400, 0};
  376. static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
  377. gunichar* res;
  378. glong items_read, items_written, current_read_index,current_write_index;
  379. GError* err=0;
  380. RESULT check_result;
  381. glong i;
  382. res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
  383. check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
  384. if (check_result) return check_result;
  385. g_free (res);
  386. items_read = items_written = 0;
  387. res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
  388. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
  389. if (check_result) return check_result;
  390. g_free (res);
  391. items_read = items_written = 0;
  392. res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
  393. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  394. if (check_result) return check_result;
  395. g_free (res);
  396. items_read = items_written = 0;
  397. res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
  398. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  399. if (check_result) return check_result;
  400. g_free (res);
  401. items_read = items_written = 0;
  402. res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
  403. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
  404. if (check_result) return check_result;
  405. g_free (res);
  406. items_read = items_written = 0;
  407. res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
  408. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
  409. if (check_result) return check_result;
  410. g_free (res);
  411. items_read = items_written = 0;
  412. res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
  413. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
  414. if (check_result) return check_result;
  415. g_free (res);
  416. items_read = items_written = 0;
  417. err = 0;
  418. res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
  419. check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
  420. if (check_result) return check_result;
  421. g_free (res);
  422. // This loop tests the bounds of the conversion algorithm
  423. current_read_index = current_write_index = 0;
  424. for (i=0;i<11;i++) {
  425. items_read = items_written = 0;
  426. err = 0;
  427. res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
  428. check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
  429. read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
  430. !read_write[(i*3)+2]);
  431. if (check_result) return check_result;
  432. g_free (res);
  433. current_read_index += read_write[i*3];
  434. current_write_index += items_written;
  435. }
  436. items_read = items_written = 0;
  437. err = 0;
  438. res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
  439. check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
  440. if (check_result) return check_result;
  441. g_free (res);
  442. return OK;
  443. }
  444. RESULT
  445. test_utf8_strlen ()
  446. {
  447. gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
  448. gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
  449. gchar word3 [] = {'h','e',0xC2, 0x82,0x45,'\0'}; //Valid, len = 4
  450. gchar word4 [] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
  451. glong len = 0;
  452. //Test word1
  453. len = g_utf8_strlen (word1,-1);
  454. if (len != 5)
  455. return FAILED ("Word1 expected length of 5, but was %i", len);
  456. //Do tests with different values for max parameter.
  457. len = g_utf8_strlen (word1,1);
  458. if (len != 0)
  459. return FAILED ("Word1, max = 1, expected length of 0, but was %i", len);
  460. len = g_utf8_strlen (word1,2);
  461. if (len != 1)
  462. return FAILED ("Word1, max = 1, expected length of 1, but was %i", len);
  463. len = g_utf8_strlen (word1,3);
  464. if (len != 2)
  465. return FAILED ("Word1, max = 2, expected length of 2, but was %i", len);
  466. //Test word2
  467. len = g_utf8_strlen (word2,-1);
  468. if (len != 5)
  469. return FAILED ("Word2 expected length of 5, but was %i", len);
  470. //Test word3
  471. len = g_utf8_strlen (word3,-1);
  472. if (len != 4)
  473. return FAILED ("Word3 expected length of 4, but was %i", len);
  474. //Test word4
  475. len = g_utf8_strlen (word4,-1);
  476. if (len != 5)
  477. return FAILED ("Word4 expected length of 5, but was %i", len);
  478. //Test null case
  479. len = g_utf8_strlen(NULL,0);
  480. if (len != 0)
  481. return FAILED ("Expected passing null to result in a length of 0");
  482. return OK;
  483. }
  484. RESULT
  485. test_utf8_get_char()
  486. {
  487. gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
  488. gunichar value = g_utf8_get_char (&word1 [0]);
  489. if (value != 0x82UL)
  490. return FAILED ("Expected value of 0x82, but was %x", value);
  491. value = g_utf8_get_char (&word1 [2]);
  492. if (value != 0x45UL)
  493. return FAILED ("Expected value of 0x45, but was %x", value);
  494. value = g_utf8_get_char (&word1 [3]);
  495. if (value != 0x1043UL)
  496. return FAILED ("Expected value of 0x1043, but was %x", value);
  497. value = g_utf8_get_char (&word1 [6]);
  498. if (value != 0x58UL)
  499. return FAILED ("Expected value of 0x58, but was %x", value);
  500. value = g_utf8_get_char (&word1 [7]);
  501. if (value != 0x42082UL)
  502. return FAILED ("Expected value of 0x42082, but was %x", value);
  503. return OK;
  504. }
  505. RESULT
  506. test_utf8_next_char()
  507. {
  508. gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
  509. gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
  510. gchar word1ExpectedValues [] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
  511. gchar word2ExpectedValues [] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
  512. gchar* ptr = word1;
  513. gint count = 0;
  514. //Test word1
  515. while (*ptr != 0) {
  516. if (count > 4)
  517. return FAILED ("Word1 has gone past its expected length");
  518. if (*ptr != word1ExpectedValues[count])
  519. return FAILED ("Word1 has an incorrect next_char at index %i", count);
  520. ptr = g_utf8_next_char (ptr);
  521. count++;
  522. }
  523. //Test word2
  524. count = 0;
  525. ptr = word2;
  526. while (*ptr != 0) {
  527. if (count > 4)
  528. return FAILED ("Word2 has gone past its expected length");
  529. if (*ptr != word2ExpectedValues[count])
  530. return FAILED ("Word2 has an incorrect next_char at index %i", count);
  531. ptr = g_utf8_next_char (ptr);
  532. count++;
  533. }
  534. return OK;
  535. }
  536. RESULT
  537. test_utf8_validate()
  538. {
  539. gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
  540. gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
  541. gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
  542. gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
  543. gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
  544. const gchar* end;
  545. gboolean retVal = g_utf8_validate (invalidWord1, -1, &end);
  546. if (retVal != FALSE)
  547. return FAILED ("Expected invalidWord1 to be invalid");
  548. if (end != &invalidWord1 [2])
  549. return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
  550. end = NULL;
  551. retVal = g_utf8_validate (invalidWord2, -1, &end);
  552. if (retVal != FALSE)
  553. return FAILED ("Expected invalidWord2 to be invalid");
  554. if (end != &invalidWord2 [0])
  555. return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
  556. end = NULL;
  557. retVal = g_utf8_validate (invalidWord3, -1, &end);
  558. if (retVal != FALSE)
  559. return FAILED ("Expected invalidWord3 to be invalid");
  560. if (end != &invalidWord3 [0])
  561. return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
  562. end = NULL;
  563. retVal = g_utf8_validate (validWord1, -1, &end);
  564. if (retVal != TRUE)
  565. return FAILED ("Expected validWord1 to be valid");
  566. if (end != &validWord1 [4])
  567. return FAILED ("Expected end parameter to be pointing to validWord1[4]");
  568. end = NULL;
  569. retVal = g_utf8_validate (validWord2, -1, &end);
  570. if (retVal != TRUE)
  571. return FAILED ("Expected validWord2 to be valid");
  572. if (end != &validWord2 [11])
  573. return FAILED ("Expected end parameter to be pointing to validWord2[11]");
  574. return OK;
  575. }
  576. glong
  577. utf8_byteslen (const gchar *src)
  578. {
  579. int i = 0;
  580. do {
  581. if (src [i] == '\0')
  582. return i;
  583. i++;
  584. } while (TRUE);
  585. }
  586. RESULT
  587. test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
  588. {
  589. gchar *tmp;
  590. glong len, len2;
  591. RESULT r;
  592. len = utf8_byteslen (src);
  593. tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
  594. len2 = utf8_byteslen (tmp);
  595. r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
  596. g_free (tmp);
  597. return r;
  598. }
  599. RESULT
  600. test_utf8_strup_each (const gchar *src, const gchar *expected)
  601. {
  602. return test_utf8_strcase_each (src, expected, TRUE);
  603. }
  604. RESULT
  605. test_utf8_strdown_each (const gchar *src, const gchar *expected)
  606. {
  607. return test_utf8_strcase_each (src, expected, FALSE);
  608. }
  609. /*
  610. * g_utf8_strup
  611. */
  612. RESULT
  613. test_utf8_strup ()
  614. {
  615. RESULT r;
  616. if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
  617. return r;
  618. if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
  619. return r;
  620. // U+3B1 U+392 -> U+391 U+392
  621. if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
  622. return r;
  623. // U+FF21 -> U+FF21
  624. if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
  625. return r;
  626. // U+FF41 -> U+FF21
  627. if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
  628. return r;
  629. // U+10428 -> U+10400
  630. if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
  631. return r;
  632. return OK;
  633. }
  634. /*
  635. * g_utf8_strdown
  636. */
  637. RESULT
  638. test_utf8_strdown ()
  639. {
  640. RESULT r;
  641. if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
  642. return r;
  643. if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
  644. return r;
  645. // U+391 U+3B2 -> U+3B1 U+3B2
  646. if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
  647. return r;
  648. /*
  649. // U+FF41 -> U+FF41
  650. if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
  651. return r;
  652. // U+FF21 -> U+FF41
  653. if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
  654. return r;
  655. // U+10400 -> U+10428
  656. if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
  657. return r;
  658. */
  659. return OK;
  660. }
  661. /*
  662. * test initialization
  663. */
  664. static Test utf8_tests [] = {
  665. {"g_utf16_to_utf8", test_utf16_to_utf8},
  666. {"g_utf8_to_utf16", test_utf8_to_utf16},
  667. {"g_utf8_seq", test_utf8_seq},
  668. {"g_convert", test_convert },
  669. {"g_unichar_xdigit_value", test_xdigit },
  670. {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
  671. {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
  672. {"g_utf8_strlen", test_utf8_strlen },
  673. {"g_utf8_get_char", test_utf8_get_char },
  674. {"g_utf8_next_char", test_utf8_next_char },
  675. {"g_utf8_validate", test_utf8_validate },
  676. {"g_utf8_strup", test_utf8_strup},
  677. {"g_utf8_strdown", test_utf8_strdown},
  678. {NULL, NULL}
  679. };
  680. DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)