test-buffer.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. /*
  2. * Copyright © 2011 Google, Inc.
  3. *
  4. * This is part of HarfBuzz, a text shaping library.
  5. *
  6. * Permission is hereby granted, without written agreement and without
  7. * license or royalty fees, to use, copy, modify, and distribute this
  8. * software and its documentation for any purpose, provided that the
  9. * above copyright notice and the following two paragraphs appear in
  10. * all copies of this software.
  11. *
  12. * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13. * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14. * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15. * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16. * DAMAGE.
  17. *
  18. * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19. * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20. * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
  21. * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22. * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23. *
  24. * Google Author(s): Behdad Esfahbod
  25. */
  26. #include "hb-test.h"
  27. /* Unit tests for hb-buffer.h */
  28. static const char utf8[10] = "ab\360\240\200\200defg";
  29. static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
  30. static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
  31. typedef enum {
  32. BUFFER_EMPTY,
  33. BUFFER_ONE_BY_ONE,
  34. BUFFER_UTF32,
  35. BUFFER_UTF16,
  36. BUFFER_UTF8,
  37. BUFFER_NUM_TYPES,
  38. } buffer_type_t;
  39. static const char *buffer_names[] = {
  40. "empty",
  41. "one-by-one",
  42. "utf32",
  43. "utf16",
  44. "utf8"
  45. };
  46. typedef struct
  47. {
  48. hb_buffer_t *buffer;
  49. } fixture_t;
  50. static void
  51. fixture_init (fixture_t *fixture, gconstpointer user_data)
  52. {
  53. hb_buffer_t *b;
  54. unsigned int i;
  55. b = fixture->buffer = hb_buffer_create ();
  56. switch (GPOINTER_TO_INT (user_data))
  57. {
  58. case BUFFER_EMPTY:
  59. break;
  60. case BUFFER_ONE_BY_ONE:
  61. for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
  62. hb_buffer_add (b, utf32[i], i);
  63. break;
  64. case BUFFER_UTF32:
  65. hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
  66. break;
  67. case BUFFER_UTF16:
  68. hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
  69. break;
  70. case BUFFER_UTF8:
  71. hb_buffer_add_utf8 (b, utf8, G_N_ELEMENTS (utf8), 1, G_N_ELEMENTS (utf8) - 2);
  72. break;
  73. default:
  74. g_assert_not_reached ();
  75. }
  76. }
  77. static void
  78. fixture_finish (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
  79. {
  80. hb_buffer_destroy (fixture->buffer);
  81. }
  82. static void
  83. test_buffer_properties (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
  84. {
  85. hb_buffer_t *b = fixture->buffer;
  86. hb_unicode_funcs_t *ufuncs;
  87. /* test default properties */
  88. g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
  89. g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
  90. g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
  91. g_assert (hb_buffer_get_language (b) == NULL);
  92. /* test property changes are retained */
  93. ufuncs = hb_unicode_funcs_create (NULL);
  94. hb_buffer_set_unicode_funcs (b, ufuncs);
  95. hb_unicode_funcs_destroy (ufuncs);
  96. g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
  97. hb_buffer_set_direction (b, HB_DIRECTION_RTL);
  98. g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
  99. hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
  100. g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
  101. hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
  102. g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
  103. hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
  104. g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
  105. hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
  106. g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
  107. /* test clear_contents clears all these properties: */
  108. hb_buffer_clear_contents (b);
  109. g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
  110. g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
  111. g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
  112. g_assert (hb_buffer_get_language (b) == NULL);
  113. /* but not these: */
  114. g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAG_DEFAULT);
  115. g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
  116. /* test reset clears all properties */
  117. hb_buffer_set_direction (b, HB_DIRECTION_RTL);
  118. g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
  119. hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
  120. g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
  121. hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
  122. g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
  123. hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
  124. g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
  125. hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
  126. g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
  127. hb_buffer_reset (b);
  128. g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
  129. g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
  130. g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
  131. g_assert (hb_buffer_get_language (b) == NULL);
  132. g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_DEFAULT);
  133. g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
  134. }
  135. static void
  136. test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
  137. {
  138. hb_buffer_t *b = fixture->buffer;
  139. unsigned int i, len, len2;
  140. buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
  141. hb_glyph_info_t *glyphs;
  142. if (buffer_type == BUFFER_EMPTY) {
  143. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  144. return;
  145. }
  146. len = hb_buffer_get_length (b);
  147. hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
  148. glyphs = hb_buffer_get_glyph_infos (b, &len2);
  149. g_assert_cmpint (len, ==, len2);
  150. g_assert_cmpint (len, ==, 5);
  151. for (i = 0; i < len; i++) {
  152. g_assert_cmphex (glyphs[i].mask, ==, 0);
  153. g_assert_cmphex (glyphs[i].var1.u32, ==, 0);
  154. g_assert_cmphex (glyphs[i].var2.u32, ==, 0);
  155. }
  156. for (i = 0; i < len; i++) {
  157. unsigned int cluster;
  158. cluster = 1+i;
  159. if (i >= 2) {
  160. if (buffer_type == BUFFER_UTF16)
  161. cluster++;
  162. else if (buffer_type == BUFFER_UTF8)
  163. cluster += 3;
  164. }
  165. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  166. g_assert_cmphex (glyphs[i].cluster, ==, cluster);
  167. }
  168. /* reverse, test, and reverse back */
  169. hb_buffer_reverse (b);
  170. for (i = 0; i < len; i++)
  171. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
  172. hb_buffer_reverse (b);
  173. for (i = 0; i < len; i++)
  174. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  175. /* reverse_clusters works same as reverse for now since each codepoint is
  176. * in its own cluster */
  177. hb_buffer_reverse_clusters (b);
  178. for (i = 0; i < len; i++)
  179. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
  180. hb_buffer_reverse_clusters (b);
  181. for (i = 0; i < len; i++)
  182. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  183. /* now form a cluster and test again */
  184. glyphs[2].cluster = glyphs[1].cluster;
  185. /* reverse, test, and reverse back */
  186. hb_buffer_reverse (b);
  187. for (i = 0; i < len; i++)
  188. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
  189. hb_buffer_reverse (b);
  190. for (i = 0; i < len; i++)
  191. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  192. /* reverse_clusters twice still should return the original string,
  193. * but when applied once, the 1-2 cluster should be retained. */
  194. hb_buffer_reverse_clusters (b);
  195. for (i = 0; i < len; i++) {
  196. unsigned int j = len-1-i;
  197. if (j == 1)
  198. j = 2;
  199. else if (j == 2)
  200. j = 1;
  201. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
  202. }
  203. hb_buffer_reverse_clusters (b);
  204. for (i = 0; i < len; i++)
  205. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  206. /* test setting length */
  207. /* enlarge */
  208. g_assert (hb_buffer_set_length (b, 10));
  209. glyphs = hb_buffer_get_glyph_infos (b, NULL);
  210. g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
  211. for (i = 0; i < 5; i++)
  212. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  213. for (i = 5; i < 10; i++)
  214. g_assert_cmphex (glyphs[i].codepoint, ==, 0);
  215. /* shrink */
  216. g_assert (hb_buffer_set_length (b, 3));
  217. glyphs = hb_buffer_get_glyph_infos (b, NULL);
  218. g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
  219. for (i = 0; i < 3; i++)
  220. g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
  221. g_assert (hb_buffer_allocation_successful (b));
  222. /* test reset clears content */
  223. hb_buffer_reset (b);
  224. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  225. }
  226. static void
  227. test_buffer_positions (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
  228. {
  229. hb_buffer_t *b = fixture->buffer;
  230. unsigned int i, len, len2;
  231. hb_glyph_position_t *positions;
  232. /* Without shaping, positions should all be zero */
  233. len = hb_buffer_get_length (b);
  234. hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
  235. positions = hb_buffer_get_glyph_positions (b, &len2);
  236. g_assert_cmpint (len, ==, len2);
  237. for (i = 0; i < len; i++) {
  238. g_assert_cmpint (0, ==, positions[i].x_advance);
  239. g_assert_cmpint (0, ==, positions[i].y_advance);
  240. g_assert_cmpint (0, ==, positions[i].x_offset);
  241. g_assert_cmpint (0, ==, positions[i].y_offset);
  242. g_assert_cmpint (0, ==, positions[i].var.i32);
  243. }
  244. /* test reset clears content */
  245. hb_buffer_reset (b);
  246. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  247. }
  248. static void
  249. test_buffer_allocation (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
  250. {
  251. hb_buffer_t *b = fixture->buffer;
  252. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  253. g_assert (hb_buffer_pre_allocate (b, 100));
  254. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  255. g_assert (hb_buffer_allocation_successful (b));
  256. /* lets try a huge allocation, make sure it fails */
  257. g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
  258. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  259. g_assert (!hb_buffer_allocation_successful (b));
  260. /* small one again */
  261. g_assert (hb_buffer_pre_allocate (b, 50));
  262. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  263. g_assert (!hb_buffer_allocation_successful (b));
  264. hb_buffer_reset (b);
  265. g_assert (hb_buffer_allocation_successful (b));
  266. /* all allocation and size */
  267. g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
  268. g_assert (!hb_buffer_allocation_successful (b));
  269. hb_buffer_reset (b);
  270. g_assert (hb_buffer_allocation_successful (b));
  271. /* technically, this one can actually pass on 64bit machines, but
  272. * I'm doubtful that any malloc allows 4GB allocations at a time.
  273. * But let's only enable it on a 32-bit machine. */
  274. if (sizeof (long) == 4) {
  275. g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
  276. g_assert (!hb_buffer_allocation_successful (b));
  277. }
  278. hb_buffer_reset (b);
  279. g_assert (hb_buffer_allocation_successful (b));
  280. }
  281. typedef struct {
  282. const char utf8[8];
  283. const uint32_t codepoints[8];
  284. } utf8_conversion_test_t;
  285. /* note: we skip the first and last byte when adding to buffer */
  286. static const utf8_conversion_test_t utf8_conversion_tests[] = {
  287. {"a\303\207", {(hb_codepoint_t) -1}},
  288. {"a\303\207b", {0xC7}},
  289. {"ab\303cd", {'b', (hb_codepoint_t) -1, 'c'}},
  290. {"ab\303\302\301cd", {'b', (hb_codepoint_t) -1, (hb_codepoint_t) -1, (hb_codepoint_t) -1, 'c'}}
  291. };
  292. static void
  293. test_buffer_utf8_conversion (void)
  294. {
  295. hb_buffer_t *b;
  296. unsigned int chars, i, j, len;
  297. b = hb_buffer_create ();
  298. hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
  299. for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
  300. {
  301. unsigned int bytes;
  302. hb_glyph_info_t *glyphs;
  303. const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
  304. char *escaped;
  305. escaped = g_strescape (test->utf8, NULL);
  306. g_test_message ("UTF-8 test #%d: %s", i, escaped);
  307. g_free (escaped);
  308. bytes = strlen (test->utf8);
  309. for (chars = 0; test->codepoints[chars]; chars++)
  310. ;
  311. hb_buffer_clear_contents (b);
  312. hb_buffer_add_utf8 (b, test->utf8, bytes, 1, bytes - 2);
  313. glyphs = hb_buffer_get_glyph_infos (b, &len);
  314. g_assert_cmpint (len, ==, chars);
  315. for (j = 0; j < chars; j++)
  316. g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
  317. }
  318. hb_buffer_destroy (b);
  319. }
  320. /* Following test table is adapted from glib/glib/tests/utf8-validate.c
  321. * with relicensing permission from Matthias Clasen. */
  322. typedef struct {
  323. const char *utf8;
  324. int max_len;
  325. unsigned int offset;
  326. gboolean valid;
  327. } utf8_validity_test_t;
  328. static const utf8_validity_test_t utf8_validity_tests[] = {
  329. /* some tests to check max_len handling */
  330. /* length 1 */
  331. { "abcde", -1, 5, TRUE },
  332. { "abcde", 3, 3, TRUE },
  333. { "abcde", 5, 5, TRUE },
  334. /* length 2 */
  335. { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
  336. { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE },
  337. { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE },
  338. { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE },
  339. { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE },
  340. { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE },
  341. { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE },
  342. /* length 3 */
  343. { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
  344. { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE },
  345. { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE },
  346. { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE },
  347. { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE },
  348. { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE },
  349. { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE },
  350. /* examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
  351. /* greek 'kosme' */
  352. { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
  353. /* first sequence of each length */
  354. { "\x00", -1, 0, TRUE },
  355. { "\xc2\x80", -1, 2, TRUE },
  356. { "\xe0\xa0\x80", -1, 3, TRUE },
  357. { "\xf0\x90\x80\x80", -1, 4, TRUE },
  358. { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
  359. { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
  360. /* last sequence of each length */
  361. { "\x7f", -1, 1, TRUE },
  362. { "\xdf\xbf", -1, 2, TRUE },
  363. { "\xef\xbf\xbf", -1, 0, TRUE },
  364. { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
  365. { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
  366. { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
  367. { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
  368. { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
  369. /* other boundary conditions */
  370. { "\xed\x9f\xbf", -1, 3, TRUE },
  371. { "\xed\xa0\x80", -1, 0, FALSE },
  372. { "\xed\xbf\xbf", -1, 0, FALSE },
  373. { "\xee\x80\x80", -1, 3, TRUE },
  374. { "\xef\xbf\xbd", -1, 3, TRUE },
  375. { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
  376. /* malformed sequences */
  377. /* continuation bytes */
  378. { "\x80", -1, 0, FALSE },
  379. { "\xbf", -1, 0, FALSE },
  380. { "\x80\xbf", -1, 0, FALSE },
  381. { "\x80\xbf\x80", -1, 0, FALSE },
  382. { "\x80\xbf\x80\xbf", -1, 0, FALSE },
  383. { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
  384. { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
  385. { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
  386. /* all possible continuation byte */
  387. { "\x80", -1, 0, FALSE },
  388. { "\x81", -1, 0, FALSE },
  389. { "\x82", -1, 0, FALSE },
  390. { "\x83", -1, 0, FALSE },
  391. { "\x84", -1, 0, FALSE },
  392. { "\x85", -1, 0, FALSE },
  393. { "\x86", -1, 0, FALSE },
  394. { "\x87", -1, 0, FALSE },
  395. { "\x88", -1, 0, FALSE },
  396. { "\x89", -1, 0, FALSE },
  397. { "\x8a", -1, 0, FALSE },
  398. { "\x8b", -1, 0, FALSE },
  399. { "\x8c", -1, 0, FALSE },
  400. { "\x8d", -1, 0, FALSE },
  401. { "\x8e", -1, 0, FALSE },
  402. { "\x8f", -1, 0, FALSE },
  403. { "\x90", -1, 0, FALSE },
  404. { "\x91", -1, 0, FALSE },
  405. { "\x92", -1, 0, FALSE },
  406. { "\x93", -1, 0, FALSE },
  407. { "\x94", -1, 0, FALSE },
  408. { "\x95", -1, 0, FALSE },
  409. { "\x96", -1, 0, FALSE },
  410. { "\x97", -1, 0, FALSE },
  411. { "\x98", -1, 0, FALSE },
  412. { "\x99", -1, 0, FALSE },
  413. { "\x9a", -1, 0, FALSE },
  414. { "\x9b", -1, 0, FALSE },
  415. { "\x9c", -1, 0, FALSE },
  416. { "\x9d", -1, 0, FALSE },
  417. { "\x9e", -1, 0, FALSE },
  418. { "\x9f", -1, 0, FALSE },
  419. { "\xa0", -1, 0, FALSE },
  420. { "\xa1", -1, 0, FALSE },
  421. { "\xa2", -1, 0, FALSE },
  422. { "\xa3", -1, 0, FALSE },
  423. { "\xa4", -1, 0, FALSE },
  424. { "\xa5", -1, 0, FALSE },
  425. { "\xa6", -1, 0, FALSE },
  426. { "\xa7", -1, 0, FALSE },
  427. { "\xa8", -1, 0, FALSE },
  428. { "\xa9", -1, 0, FALSE },
  429. { "\xaa", -1, 0, FALSE },
  430. { "\xab", -1, 0, FALSE },
  431. { "\xac", -1, 0, FALSE },
  432. { "\xad", -1, 0, FALSE },
  433. { "\xae", -1, 0, FALSE },
  434. { "\xaf", -1, 0, FALSE },
  435. { "\xb0", -1, 0, FALSE },
  436. { "\xb1", -1, 0, FALSE },
  437. { "\xb2", -1, 0, FALSE },
  438. { "\xb3", -1, 0, FALSE },
  439. { "\xb4", -1, 0, FALSE },
  440. { "\xb5", -1, 0, FALSE },
  441. { "\xb6", -1, 0, FALSE },
  442. { "\xb7", -1, 0, FALSE },
  443. { "\xb8", -1, 0, FALSE },
  444. { "\xb9", -1, 0, FALSE },
  445. { "\xba", -1, 0, FALSE },
  446. { "\xbb", -1, 0, FALSE },
  447. { "\xbc", -1, 0, FALSE },
  448. { "\xbd", -1, 0, FALSE },
  449. { "\xbe", -1, 0, FALSE },
  450. { "\xbf", -1, 0, FALSE },
  451. /* lone start characters */
  452. { "\xc0\x20", -1, 0, FALSE },
  453. { "\xc1\x20", -1, 0, FALSE },
  454. { "\xc2\x20", -1, 0, FALSE },
  455. { "\xc3\x20", -1, 0, FALSE },
  456. { "\xc4\x20", -1, 0, FALSE },
  457. { "\xc5\x20", -1, 0, FALSE },
  458. { "\xc6\x20", -1, 0, FALSE },
  459. { "\xc7\x20", -1, 0, FALSE },
  460. { "\xc8\x20", -1, 0, FALSE },
  461. { "\xc9\x20", -1, 0, FALSE },
  462. { "\xca\x20", -1, 0, FALSE },
  463. { "\xcb\x20", -1, 0, FALSE },
  464. { "\xcc\x20", -1, 0, FALSE },
  465. { "\xcd\x20", -1, 0, FALSE },
  466. { "\xce\x20", -1, 0, FALSE },
  467. { "\xcf\x20", -1, 0, FALSE },
  468. { "\xd0\x20", -1, 0, FALSE },
  469. { "\xd1\x20", -1, 0, FALSE },
  470. { "\xd2\x20", -1, 0, FALSE },
  471. { "\xd3\x20", -1, 0, FALSE },
  472. { "\xd4\x20", -1, 0, FALSE },
  473. { "\xd5\x20", -1, 0, FALSE },
  474. { "\xd6\x20", -1, 0, FALSE },
  475. { "\xd7\x20", -1, 0, FALSE },
  476. { "\xd8\x20", -1, 0, FALSE },
  477. { "\xd9\x20", -1, 0, FALSE },
  478. { "\xda\x20", -1, 0, FALSE },
  479. { "\xdb\x20", -1, 0, FALSE },
  480. { "\xdc\x20", -1, 0, FALSE },
  481. { "\xdd\x20", -1, 0, FALSE },
  482. { "\xde\x20", -1, 0, FALSE },
  483. { "\xdf\x20", -1, 0, FALSE },
  484. { "\xe0\x20", -1, 0, FALSE },
  485. { "\xe1\x20", -1, 0, FALSE },
  486. { "\xe2\x20", -1, 0, FALSE },
  487. { "\xe3\x20", -1, 0, FALSE },
  488. { "\xe4\x20", -1, 0, FALSE },
  489. { "\xe5\x20", -1, 0, FALSE },
  490. { "\xe6\x20", -1, 0, FALSE },
  491. { "\xe7\x20", -1, 0, FALSE },
  492. { "\xe8\x20", -1, 0, FALSE },
  493. { "\xe9\x20", -1, 0, FALSE },
  494. { "\xea\x20", -1, 0, FALSE },
  495. { "\xeb\x20", -1, 0, FALSE },
  496. { "\xec\x20", -1, 0, FALSE },
  497. { "\xed\x20", -1, 0, FALSE },
  498. { "\xee\x20", -1, 0, FALSE },
  499. { "\xef\x20", -1, 0, FALSE },
  500. { "\xf0\x20", -1, 0, FALSE },
  501. { "\xf1\x20", -1, 0, FALSE },
  502. { "\xf2\x20", -1, 0, FALSE },
  503. { "\xf3\x20", -1, 0, FALSE },
  504. { "\xf4\x20", -1, 0, FALSE },
  505. { "\xf5\x20", -1, 0, FALSE },
  506. { "\xf6\x20", -1, 0, FALSE },
  507. { "\xf7\x20", -1, 0, FALSE },
  508. { "\xf8\x20", -1, 0, FALSE },
  509. { "\xf9\x20", -1, 0, FALSE },
  510. { "\xfa\x20", -1, 0, FALSE },
  511. { "\xfb\x20", -1, 0, FALSE },
  512. { "\xfc\x20", -1, 0, FALSE },
  513. { "\xfd\x20", -1, 0, FALSE },
  514. /* missing continuation bytes */
  515. { "\x20\xc0", -1, 1, FALSE },
  516. { "\x20\xe0\x80", -1, 1, FALSE },
  517. { "\x20\xf0\x80\x80", -1, 1, FALSE },
  518. { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
  519. { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
  520. { "\x20\xdf", -1, 1, FALSE },
  521. { "\x20\xef\xbf", -1, 1, FALSE },
  522. { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
  523. { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
  524. { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
  525. /* impossible bytes */
  526. { "\x20\xfe\x20", -1, 1, FALSE },
  527. { "\x20\xff\x20", -1, 1, FALSE },
  528. /* overlong sequences */
  529. { "\x20\xc0\xaf\x20", -1, 1, FALSE },
  530. { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
  531. { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
  532. { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
  533. { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
  534. { "\x20\xc1\xbf\x20", -1, 1, FALSE },
  535. { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
  536. { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
  537. { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
  538. { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
  539. { "\x20\xc0\x80\x20", -1, 1, FALSE },
  540. { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
  541. { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
  542. { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
  543. { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
  544. /* illegal code positions */
  545. { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
  546. { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
  547. { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
  548. { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
  549. { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
  550. { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
  551. { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
  552. { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
  553. { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
  554. { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
  555. { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
  556. { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
  557. { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
  558. { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
  559. { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
  560. #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
  561. { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
  562. { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
  563. #endif
  564. { "", -1, 0, TRUE }
  565. };
  566. static void
  567. test_buffer_utf8_validity (void)
  568. {
  569. hb_buffer_t *b;
  570. unsigned int i;
  571. b = hb_buffer_create ();
  572. hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
  573. for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
  574. {
  575. const utf8_validity_test_t *test = &utf8_validity_tests[i];
  576. unsigned int text_bytes, segment_bytes, j, len;
  577. hb_glyph_info_t *glyphs;
  578. char *escaped;
  579. escaped = g_strescape (test->utf8, NULL);
  580. g_test_message ("UTF-8 test #%d: %s", i, escaped);
  581. g_free (escaped);
  582. text_bytes = strlen (test->utf8);
  583. if (test->max_len == -1)
  584. segment_bytes = text_bytes;
  585. else
  586. segment_bytes = test->max_len;
  587. hb_buffer_clear_contents (b);
  588. hb_buffer_add_utf8 (b, test->utf8, text_bytes, 0, segment_bytes);
  589. glyphs = hb_buffer_get_glyph_infos (b, &len);
  590. for (j = 0; j < len; j++)
  591. if (glyphs[j].codepoint == (hb_codepoint_t) -1)
  592. break;
  593. g_assert (test->valid ? j == len : j < len);
  594. if (!test->valid)
  595. g_assert (glyphs[j].cluster == test->offset);
  596. }
  597. hb_buffer_destroy (b);
  598. }
  599. typedef struct {
  600. const uint16_t utf16[8];
  601. const uint32_t codepoints[8];
  602. } utf16_conversion_test_t;
  603. /* note: we skip the first and last item from utf16 when adding to buffer */
  604. static const utf16_conversion_test_t utf16_conversion_tests[] = {
  605. {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
  606. {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
  607. {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -1}},
  608. {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -1}},
  609. {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -1, 0x61}},
  610. {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -1}},
  611. {{0x41, 0x61}, {0}}
  612. };
  613. static void
  614. test_buffer_utf16_conversion (void)
  615. {
  616. hb_buffer_t *b;
  617. unsigned int i;
  618. b = hb_buffer_create ();
  619. hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
  620. for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
  621. {
  622. const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
  623. unsigned int u_len, chars, j, len;
  624. hb_glyph_info_t *glyphs;
  625. g_test_message ("UTF-16 test #%d", i);
  626. for (u_len = 0; test->utf16[u_len]; u_len++)
  627. ;
  628. for (chars = 0; test->codepoints[chars]; chars++)
  629. ;
  630. hb_buffer_clear_contents (b);
  631. hb_buffer_add_utf16 (b, test->utf16, u_len, 1, u_len - 2);
  632. glyphs = hb_buffer_get_glyph_infos (b, &len);
  633. g_assert_cmpint (len, ==, chars);
  634. for (j = 0; j < chars; j++)
  635. g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
  636. }
  637. hb_buffer_destroy (b);
  638. }
  639. typedef struct {
  640. const uint32_t utf32[8];
  641. const uint32_t codepoints[8];
  642. } utf32_conversion_test_t;
  643. /* note: we skip the first and last item from utf32 when adding to buffer */
  644. static const utf32_conversion_test_t utf32_conversion_tests[] = {
  645. {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, (hb_codepoint_t) -3, (hb_codepoint_t) -3}},
  646. {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
  647. {{0x41, 0xD800, 0xDF02, 0x61}, {(hb_codepoint_t) -3, (hb_codepoint_t) -3}},
  648. {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -3}},
  649. {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -3}},
  650. {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -3, 0x61}},
  651. {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -3}},
  652. {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
  653. {{0x41, 0x110000, 0x61}, {(hb_codepoint_t) -3}},
  654. {{0x41, 0x61}, {0}}
  655. };
  656. static void
  657. test_buffer_utf32_conversion (void)
  658. {
  659. hb_buffer_t *b;
  660. unsigned int i;
  661. b = hb_buffer_create ();
  662. hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
  663. for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
  664. {
  665. const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
  666. unsigned int u_len, chars, j, len;
  667. hb_glyph_info_t *glyphs;
  668. g_test_message ("UTF-32 test #%d", i);
  669. for (u_len = 0; test->utf32[u_len]; u_len++)
  670. ;
  671. for (chars = 0; test->codepoints[chars]; chars++)
  672. ;
  673. hb_buffer_clear_contents (b);
  674. hb_buffer_add_utf32 (b, test->utf32, u_len, 1, u_len - 2);
  675. glyphs = hb_buffer_get_glyph_infos (b, &len);
  676. g_assert_cmpint (len, ==, chars);
  677. for (j = 0; j < chars; j++)
  678. g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
  679. }
  680. hb_buffer_destroy (b);
  681. }
  682. static void
  683. test_empty (hb_buffer_t *b)
  684. {
  685. g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
  686. g_assert (!hb_buffer_get_glyph_infos (b, NULL));
  687. g_assert (!hb_buffer_get_glyph_positions (b, NULL));
  688. }
  689. static void
  690. test_buffer_empty (void)
  691. {
  692. hb_buffer_t *b = hb_buffer_get_empty ();
  693. g_assert (hb_buffer_get_empty ());
  694. g_assert (hb_buffer_get_empty () == b);
  695. g_assert (!hb_buffer_allocation_successful (b));
  696. test_empty (b);
  697. hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
  698. test_empty (b);
  699. hb_buffer_reverse (b);
  700. hb_buffer_reverse_clusters (b);
  701. g_assert (!hb_buffer_set_length (b, 10));
  702. test_empty (b);
  703. g_assert (hb_buffer_set_length (b, 0));
  704. test_empty (b);
  705. g_assert (!hb_buffer_allocation_successful (b));
  706. hb_buffer_reset (b);
  707. test_empty (b);
  708. g_assert (!hb_buffer_allocation_successful (b));
  709. }
  710. typedef struct {
  711. const char *contents;
  712. hb_buffer_serialize_format_t format;
  713. unsigned int num_items;
  714. hb_bool_t success;
  715. } serialization_test_t;
  716. static const serialization_test_t serialization_tests[] = {
  717. { "<U+0640=0|U+0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 2, 1 },
  718. { "[{\"u\":1600,\"cl\":0},{\"u\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 2, 1 },
  719. /* Mixed glyphs/Unicodes -> parse fail */
  720. { "[{\"u\":1600,\"cl\":0},{\"g\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 0, 0 },
  721. { "<U+0640=0|uni0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 0, 0 },
  722. };
  723. static void
  724. test_buffer_serialize_deserialize (void)
  725. {
  726. hb_buffer_t *b;
  727. unsigned int i;
  728. for (i = 0; i < G_N_ELEMENTS (serialization_tests); i++)
  729. {
  730. unsigned int consumed;
  731. char round_trip[1024];
  732. b = hb_buffer_create ();
  733. hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
  734. const serialization_test_t *test = &serialization_tests[i];
  735. g_test_message ("serialize test #%d", i);
  736. (void) hb_buffer_deserialize_unicode (b, test->contents, -1, NULL, test->format);
  737. // Expected parse failure, got one, don't round-trip
  738. if (test->success != 0)
  739. {
  740. unsigned int num_glyphs = hb_buffer_get_length (b);
  741. g_assert_cmpint (num_glyphs, ==, test->num_items);
  742. hb_buffer_serialize_unicode (b, 0, num_glyphs, round_trip,
  743. sizeof(round_trip), &consumed, test->format,
  744. HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
  745. g_assert_cmpstr (round_trip, ==, test->contents);
  746. }
  747. hb_buffer_destroy (b);
  748. }
  749. char test[1024];
  750. unsigned int consumed;
  751. hb_buffer_t *indeterminate = hb_buffer_get_empty ();
  752. hb_buffer_serialize (indeterminate, 0, (unsigned) -1,
  753. test, sizeof(test), &consumed, NULL,
  754. HB_BUFFER_SERIALIZE_FORMAT_JSON,
  755. HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
  756. g_assert_cmpstr ( test, ==, "[]");
  757. hb_buffer_serialize (indeterminate, 0, (unsigned) - 1,
  758. test, sizeof(test), &consumed, NULL,
  759. HB_BUFFER_SERIALIZE_FORMAT_TEXT,
  760. HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
  761. g_assert_cmpstr ( test, ==, "!!");
  762. }
  763. int
  764. main (int argc, char **argv)
  765. {
  766. unsigned int i;
  767. hb_test_init (&argc, &argv);
  768. for (i = 0; i < BUFFER_NUM_TYPES; i++)
  769. {
  770. const void *buffer_type = GINT_TO_POINTER (i);
  771. const char *buffer_name = buffer_names[i];
  772. hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
  773. hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
  774. hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
  775. }
  776. hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
  777. hb_test_add (test_buffer_utf8_conversion);
  778. hb_test_add (test_buffer_utf8_validity);
  779. hb_test_add (test_buffer_utf16_conversion);
  780. hb_test_add (test_buffer_utf32_conversion);
  781. hb_test_add (test_buffer_empty);
  782. hb_test_add (test_buffer_serialize_deserialize);
  783. return hb_test_run();
  784. }