utf8.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #include "test.h"
  2. /*
  3. * g_utf16_to_utf8
  4. */
  5. glong
  6. compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
  7. {
  8. int i;
  9. for (i = 0; i < size; i++)
  10. if (expected [i] != actual [i])
  11. return i;
  12. return -1;
  13. }
  14. RESULT
  15. compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
  16. {
  17. glong ret;
  18. ret = compare_strings_utf8_pos (expected, actual, size);
  19. if (ret < 0)
  20. return OK;
  21. return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
  22. }
  23. void
  24. gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
  25. {
  26. int i;
  27. for (i = 0; src [i]; i++)
  28. ret [i] = src [i];
  29. ret [i] = 0;
  30. }
  31. RESULT
  32. compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
  33. {
  34. GError *error;
  35. gchar* ret;
  36. RESULT result;
  37. glong in_read, out_read;
  38. result = NULL;
  39. error = NULL;
  40. ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
  41. if (error) {
  42. result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  43. g_error_free (error);
  44. if (ret)
  45. g_free (ret);
  46. return result;
  47. }
  48. if (in_read != len_in)
  49. result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  50. else if (out_read != len_out)
  51. result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  52. else
  53. result = compare_strings_utf8_RESULT (expected, ret, len_out);
  54. g_free (ret);
  55. if (result)
  56. return result;
  57. return OK;
  58. }
  59. RESULT
  60. compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
  61. {
  62. RESULT result;
  63. result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
  64. if (result != OK)
  65. return result;
  66. return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
  67. }
  68. RESULT
  69. test_utf16_to_utf8 ()
  70. {
  71. const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
  72. gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
  73. RESULT result;
  74. gchar_to_gunichar2 (str1, src1);
  75. /* empty string */
  76. result = compare_utf16_to_utf8 (src0, str0, 0, 0);
  77. if (result != OK)
  78. return result;
  79. result = compare_utf16_to_utf8 (src1, str1, 5, 5);
  80. if (result != OK)
  81. return result;
  82. result = compare_utf16_to_utf8 (src2, str2, 2, 4);
  83. if (result != OK)
  84. return result;
  85. return OK;
  86. }
  87. /*
  88. * g_utf8_to_utf16
  89. */
  90. glong
  91. compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
  92. {
  93. int i;
  94. for (i = 0; i < size; i++)
  95. if (expected [i] != actual [i])
  96. return i;
  97. return -1;
  98. }
  99. RESULT
  100. compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
  101. {
  102. glong ret;
  103. ret = compare_strings_utf16_pos (expected, actual, size);
  104. if (ret < 0)
  105. return OK;
  106. return FAILED ("Incorrect output: expected '%s' but was '%s'\n", expected, actual);
  107. }
  108. RESULT
  109. compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec)
  110. {
  111. GError *error;
  112. gunichar2* ret;
  113. RESULT result;
  114. glong in_read, out_read;
  115. result = NULL;
  116. error = NULL;
  117. ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
  118. if (error) {
  119. result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  120. g_error_free (error);
  121. if (ret)
  122. g_free (ret);
  123. return result;
  124. }
  125. if (in_read != len_in)
  126. result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  127. else if (out_read != len_out)
  128. result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  129. else
  130. result = compare_strings_utf16_RESULT (expected, ret, len_out);
  131. g_free (ret);
  132. if (result)
  133. return result;
  134. return OK;
  135. }
  136. RESULT
  137. compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
  138. {
  139. RESULT result;
  140. result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1);
  141. if (result != OK)
  142. return result;
  143. return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in);
  144. }
  145. RESULT
  146. test_utf8_seq ()
  147. {
  148. const gchar *src = "\xE5\xB9\xB4\x27";
  149. glong in_read, out_read;
  150. //gunichar2 expected [6];
  151. GError *error = NULL;
  152. gunichar2 *dst;
  153. printf ("got: %s\n", src);
  154. dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
  155. if (error != NULL){
  156. return error->message;
  157. }
  158. if (in_read != 4) {
  159. return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
  160. }
  161. if (out_read != 2) {
  162. return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
  163. }
  164. return OK;
  165. }
  166. RESULT
  167. test_utf8_to_utf16 ()
  168. {
  169. const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
  170. gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
  171. RESULT result;
  172. gchar_to_gunichar2 (str1, src1);
  173. /* empty string */
  174. result = compare_utf8_to_utf16 (str0, src0, 0, 0);
  175. if (result != OK)
  176. return result;
  177. result = compare_utf8_to_utf16 (str1, src1, 5, 5);
  178. if (result != OK)
  179. return result;
  180. result = compare_utf8_to_utf16 (str2, src2, 4, 2);
  181. if (result != OK)
  182. return result;
  183. return OK;
  184. }
  185. RESULT
  186. test_convert ()
  187. {
  188. gsize n;
  189. char *s = g_convert ("\242\241\243\242\241\243\242\241\243\242\241\243", -1, "UTF-8", "ISO-8859-1", NULL, &n, NULL);
  190. guchar *u = (guchar *) s;
  191. if (!s)
  192. return FAILED ("Expected 24 bytes, got: NULL");
  193. if (strlen (s) != 24)
  194. return FAILED ("Expected 24 bytes, got: %d", strlen (s));
  195. if (u [1] != 162 || u [2] != 194 ||
  196. u [3] != 161 || u [4] != 194 ||
  197. u [5] != 163 || u [6] != 194)
  198. return FAILED ("Incorrect conversion");
  199. g_free (s);
  200. return OK;
  201. }
  202. RESULT
  203. test_xdigit ()
  204. {
  205. static char test_chars[] = {
  206. '0', '1', '2', '3', '4',
  207. '5', '6', '7', '8', '9',
  208. 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  209. 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
  210. static gint32 test_values[] = {
  211. 0, 1, 2, 3, 4,
  212. 5, 6, 7, 8, 9,
  213. 10, 11, 12, 13, 14, 15, -1,
  214. 10, 11, 12, 13, 14, 15, -1};
  215. int i =0;
  216. for (i = 0; i < sizeof(test_chars); i++)
  217. if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
  218. return FAILED("Incorrect value %d at index %d", test_values[i], i);
  219. return OK;
  220. }
  221. static RESULT
  222. ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
  223. glong result_items_read, glong expected_items_read,
  224. glong result_items_written, glong expected_items_written,
  225. GError* result_error, gboolean expect_error)
  226. {
  227. glong i;
  228. if (result_items_read != expected_items_read)
  229. return FAILED("Incorrect number of items read %d", result_items_read);
  230. if (result_items_written != expected_items_written)
  231. return FAILED("Incorrect number of items written %d", result_items_written);
  232. if (result_error && !expect_error)
  233. return FAILED("There should not be an error code.");
  234. if (!result_error && expect_error)
  235. return FAILED("Unexpected error object.");
  236. if (expect_error && result_str)
  237. return FAILED("NULL should be returned when an error occurs.");
  238. if (!expect_error && !result_str)
  239. return FAILED("When no error occurs NULL should not be returned.");
  240. for (i=0; i<expected_items_written;i++) {
  241. if (result_str [i] != expected_str [i])
  242. return FAILED("Incorrect value %d at index %d", result_str [i], i);
  243. }
  244. if (result_str && result_str[expected_items_written] != '\0')
  245. return FAILED("Null termination not found at the end of the string.");
  246. return OK;
  247. }
  248. RESULT
  249. test_ucs4_to_utf16 ()
  250. {
  251. static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  252. static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  253. static gunichar str2[3] = {'h',0x80000000,'\0'};
  254. static gunichar2 exp2[2] = {'h','\0'};
  255. static gunichar str3[3] = {'h',0xDA00,'\0'};
  256. static gunichar str4[3] = {'h',0x10FFFF,'\0'};
  257. static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
  258. static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
  259. static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
  260. static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
  261. gunichar2* res;
  262. glong items_read, items_written, current_write_index;
  263. GError* err=0;
  264. RESULT check_result;
  265. glong i;
  266. res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
  267. check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
  268. if (check_result) return check_result;
  269. g_free (res);
  270. items_read = items_written = 0;
  271. res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
  272. check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
  273. if (check_result) return check_result;
  274. g_free (res);
  275. items_read = items_written = 0;
  276. res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
  277. check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  278. if (check_result) return check_result;
  279. g_free (res);
  280. items_read = items_written = 0;
  281. res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
  282. check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
  283. if (check_result) return check_result;
  284. g_free (res);
  285. items_read = items_written = 0;
  286. err = 0;
  287. res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
  288. check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
  289. if (check_result) return check_result;
  290. g_free (res);
  291. items_read = items_written = 0;
  292. err = 0;
  293. res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
  294. check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
  295. if (check_result) return check_result;
  296. g_free (res);
  297. // This loop tests the bounds of the conversion algorithm
  298. current_write_index = 0;
  299. for (i=0;i<6;i++) {
  300. items_read = items_written = 0;
  301. err = 0;
  302. res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
  303. check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
  304. items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
  305. if (check_result) return check_result;
  306. g_free (res);
  307. current_write_index += items_written;
  308. }
  309. return OK;
  310. }
  311. static RESULT
  312. utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
  313. glong result_items_read, glong expected_items_read,
  314. glong result_items_written, glong expected_items_written,
  315. GError* result_error, gboolean expect_error)
  316. {
  317. glong i;
  318. if (result_items_read != expected_items_read)
  319. return FAILED("Incorrect number of items read %d", result_items_read);
  320. if (result_items_written != expected_items_written)
  321. return FAILED("Incorrect number of items written %d", result_items_written);
  322. if (result_error && !expect_error)
  323. return FAILED("There should not be an error code.");
  324. if (!result_error && expect_error)
  325. return FAILED("Unexpected error object.");
  326. if (expect_error && result_str)
  327. return FAILED("NULL should be returned when an error occurs.");
  328. if (!expect_error && !result_str)
  329. return FAILED("When no error occurs NULL should not be returned.");
  330. for (i=0; i<expected_items_written;i++) {
  331. if (result_str [i] != expected_str [i])
  332. return FAILED("Incorrect value %d at index %d", result_str [i], i);
  333. }
  334. if (result_str && result_str[expected_items_written] != '\0')
  335. return FAILED("Null termination not found at the end of the string.");
  336. return OK;
  337. }
  338. RESULT
  339. test_utf16_to_ucs4 ()
  340. {
  341. static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  342. static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
  343. static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
  344. static gunichar exp2[3] = {'H',0x00010001,'\0'};
  345. static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
  346. static gunichar exp3[2] = {'H','\0'};
  347. static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
  348. 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
  349. static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
  350. static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
  351. gunichar* res;
  352. glong items_read, items_written, current_read_index,current_write_index;
  353. GError* err=0;
  354. RESULT check_result;
  355. glong i;
  356. res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
  357. check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
  358. if (check_result) return check_result;
  359. g_free (res);
  360. items_read = items_written = 0;
  361. res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
  362. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
  363. if (check_result) return check_result;
  364. g_free (res);
  365. items_read = items_written = 0;
  366. res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
  367. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  368. if (check_result) return check_result;
  369. g_free (res);
  370. items_read = items_written = 0;
  371. res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
  372. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
  373. if (check_result) return check_result;
  374. g_free (res);
  375. items_read = items_written = 0;
  376. res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
  377. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
  378. if (check_result) return check_result;
  379. g_free (res);
  380. items_read = items_written = 0;
  381. res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
  382. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
  383. if (check_result) return check_result;
  384. g_free (res);
  385. items_read = items_written = 0;
  386. res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
  387. check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
  388. if (check_result) return check_result;
  389. g_free (res);
  390. items_read = items_written = 0;
  391. err = 0;
  392. res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
  393. check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
  394. if (check_result) return check_result;
  395. g_free (res);
  396. // This loop tests the bounds of the conversion algorithm
  397. current_read_index = current_write_index = 0;
  398. for (i=0;i<11;i++) {
  399. items_read = items_written = 0;
  400. err = 0;
  401. res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
  402. check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
  403. read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
  404. !read_write[(i*3)+2]);
  405. if (check_result) return check_result;
  406. g_free (res);
  407. current_read_index += read_write[i*3];
  408. current_write_index += items_written;
  409. }
  410. return OK;
  411. }
  412. /*
  413. * test initialization
  414. */
  415. static Test utf8_tests [] = {
  416. {"g_utf16_to_utf8", test_utf16_to_utf8},
  417. {"g_utf8_to_utf16", test_utf8_to_utf16},
  418. {"g_utf8_seq", test_utf8_seq},
  419. {"g_convert", test_convert },
  420. {"g_unichar_xdigit_value", test_xdigit },
  421. {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
  422. {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
  423. {NULL, NULL}
  424. };
  425. DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)