2
0

test_core_unicode.odin 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. package test_core_unicode
  2. import "core:log"
  3. import "core:testing"
  4. import "core:unicode/utf8"
  5. Test_Case :: struct {
  6. str: string,
  7. expected_clusters: int,
  8. }
  9. run_test_cases :: proc(t: ^testing.T, test_cases: []Test_Case, loc := #caller_location) {
  10. failed := 0
  11. for c, i in test_cases {
  12. log.debugf("(#% 4i) %q ...", i, c.str)
  13. result, _, _ := utf8.grapheme_count(c.str)
  14. if !testing.expectf(t, result == c.expected_clusters,
  15. "(#% 4i) graphemes: %i != %i, %q %s", i, result, c.expected_clusters, c.str, c.str,
  16. loc = loc) {
  17. failed += 1
  18. }
  19. }
  20. log.logf(.Error if failed > 0 else .Info, "% 4i/% 4i test cases failed.", failed, len(test_cases), location = loc)
  21. }
  22. @test
  23. test_official_gcb_cases :: proc(t: ^testing.T) {
  24. run_test_cases(t, official_grapheme_break_test_cases)
  25. }
  26. @test
  27. test_official_emoji_cases :: proc(t: ^testing.T) {
  28. run_test_cases(t, official_emoji_test_cases)
  29. }
  30. @test
  31. test_grapheme_byte_index_segmentation :: proc(t: ^testing.T) {
  32. SAMPLE_1 :: "\U0001F600"
  33. SAMPLE_2 :: "\U0001F3F4\U000E0067\U000E0062\U000E0065\U000E006E\U000E0067\U000E007F"
  34. SAMPLE_3 :: "\U0001F468\U0001F3FB\u200D\U0001F9B0"
  35. str := SAMPLE_1 + SAMPLE_2 + SAMPLE_3 + SAMPLE_2 + SAMPLE_1
  36. graphemes, _, _, _ := utf8.decode_grapheme_clusters(str)
  37. defer delete(graphemes)
  38. defer if testing.failed(t) {
  39. log.infof("%#v\n%q\n%v", graphemes, str, transmute([]u8)str)
  40. }
  41. if !testing.expect_value(t, len(graphemes), 5) {
  42. return
  43. }
  44. testing.expect_value(t, graphemes[0].rune_index, 0)
  45. testing.expect_value(t, graphemes[1].rune_index, 1)
  46. testing.expect_value(t, graphemes[2].rune_index, 8)
  47. testing.expect_value(t, graphemes[3].rune_index, 12)
  48. testing.expect_value(t, graphemes[4].rune_index, 19)
  49. grapheme_1 := str[graphemes[0].byte_index:graphemes[1].byte_index]
  50. grapheme_2 := str[graphemes[1].byte_index:graphemes[2].byte_index]
  51. grapheme_3 := str[graphemes[2].byte_index:graphemes[3].byte_index]
  52. grapheme_4 := str[graphemes[3].byte_index:graphemes[4].byte_index]
  53. grapheme_5 := str[graphemes[4].byte_index:]
  54. testing.expectf(t, grapheme_1 == SAMPLE_1, "expected %q, got %q", SAMPLE_1, grapheme_1)
  55. testing.expectf(t, grapheme_2 == SAMPLE_2, "expected %q, got %q", SAMPLE_2, grapheme_2)
  56. testing.expectf(t, grapheme_3 == SAMPLE_3, "expected %q, got %q", SAMPLE_3, grapheme_3)
  57. testing.expectf(t, grapheme_4 == SAMPLE_2, "expected %q, got %q", SAMPLE_2, grapheme_2)
  58. testing.expectf(t, grapheme_5 == SAMPLE_1, "expected %q, got %q", SAMPLE_1, grapheme_1)
  59. }
  60. @test
  61. test_width :: proc(t: ^testing.T) {
  62. {
  63. str := "He\u200dllo"
  64. graphemes, _, width := utf8.grapheme_count(str)
  65. testing.expect_value(t, graphemes, 5)
  66. testing.expect_value(t, width, 5)
  67. }
  68. {
  69. // Note that a zero-width space is still considered a grapheme as far
  70. // as the specification is concerned.
  71. str := "He\u200bllo"
  72. graphemes, _, width := utf8.grapheme_count(str)
  73. testing.expect_value(t, graphemes, 6)
  74. testing.expect_value(t, width, 5)
  75. }
  76. {
  77. str := "\U0001F926\U0001F3FC\u200D\u2642"
  78. graphemes, _, width := utf8.grapheme_count(str)
  79. testing.expect_value(t, graphemes, 1)
  80. testing.expect_value(t, width, 2)
  81. }
  82. {
  83. str := "H̷e̶l̵l̸o̴p̵e̷ ̸w̶o̸r̵l̶d̵!̴"
  84. graphemes, _, width := utf8.grapheme_count(str)
  85. testing.expect_value(t, graphemes, 14)
  86. testing.expect_value(t, width, 14)
  87. }
  88. {
  89. str := "aカ.ヒフ"
  90. graphemes, grapheme_count, _, width := utf8.decode_grapheme_clusters(str)
  91. defer delete(graphemes)
  92. testing.expect_value(t, grapheme_count, 5)
  93. testing.expect_value(t, width, 8)
  94. if grapheme_count == 5 {
  95. testing.expect_value(t, graphemes[0].width, 1)
  96. testing.expect_value(t, graphemes[1].width, 2)
  97. testing.expect_value(t, graphemes[2].width, 1)
  98. testing.expect_value(t, graphemes[3].width, 2)
  99. testing.expect_value(t, graphemes[4].width, 2)
  100. }
  101. }
  102. {
  103. str := "いろはにほへ"
  104. graphemes, _, width := utf8.grapheme_count(str)
  105. testing.expect_value(t, graphemes, 6)
  106. testing.expect_value(t, width, 12)
  107. }
  108. {
  109. str := "舍利弗,是諸法空相,不生不滅,不垢不淨,不增不減。"
  110. graphemes, _, width := utf8.grapheme_count(str)
  111. testing.expect_value(t, graphemes, 25)
  112. testing.expect_value(t, width, 50)
  113. }
  114. }