c
/
odin-lang.Odin
-ын хуулбар https://github.com/odin-lang/Odin


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
							package test_core_unicode

import "core:log"
import "core:testing"
import "core:unicode/utf8"

Test_Case :: struct {
	str: string,
	expected_clusters: int,
}

run_test_cases :: proc(t: ^testing.T, test_cases: []Test_Case, loc := #caller_location) {
	failed := 0
	for c, i in test_cases {
		log.debugf("(#% 4i) %q ...", i, c.str)
		result, _, _ := utf8.grapheme_count(c.str)
		if !testing.expectf(t, result == c.expected_clusters,
			"(#% 4i) graphemes: %i != %i, %q %s", i, result, c.expected_clusters, c.str, c.str,
			loc = loc) {
			failed += 1
		}
	}

	log.logf(.Error if failed > 0 else .Info, "% 4i/% 4i test cases failed.", failed, len(test_cases), location = loc)
}

@test
test_official_gcb_cases :: proc(t: ^testing.T) {
	run_test_cases(t, official_grapheme_break_test_cases)
}

@test
test_official_emoji_cases :: proc(t: ^testing.T) {
	run_test_cases(t, official_emoji_test_cases)
}

@test
test_grapheme_byte_index_segmentation :: proc(t: ^testing.T) {
	SAMPLE_1 :: "\U0001F600"
	SAMPLE_2 :: "\U0001F3F4\U000E0067\U000E0062\U000E0065\U000E006E\U000E0067\U000E007F"
	SAMPLE_3 :: "\U0001F468\U0001F3FB\u200D\U0001F9B0"

	str := SAMPLE_1 + SAMPLE_2 + SAMPLE_3 + SAMPLE_2 + SAMPLE_1

	graphemes, _, _, _ := utf8.decode_grapheme_clusters(str)
	defer delete(graphemes)

	defer if testing.failed(t) {
		log.infof("%#v\n%q\n%v", graphemes, str, transmute([]u8)str)
	}
	if !testing.expect_value(t, len(graphemes), 5) {
		return
	}

	testing.expect_value(t, graphemes[0].rune_index, 0)
	testing.expect_value(t, graphemes[1].rune_index, 1)
	testing.expect_value(t, graphemes[2].rune_index, 8)
	testing.expect_value(t, graphemes[3].rune_index, 12)
	testing.expect_value(t, graphemes[4].rune_index, 19)

	grapheme_1 := str[graphemes[0].byte_index:graphemes[1].byte_index]
	grapheme_2 := str[graphemes[1].byte_index:graphemes[2].byte_index]
	grapheme_3 := str[graphemes[2].byte_index:graphemes[3].byte_index]
	grapheme_4 := str[graphemes[3].byte_index:graphemes[4].byte_index]
	grapheme_5 := str[graphemes[4].byte_index:]

	testing.expectf(t, grapheme_1 == SAMPLE_1, "expected %q, got %q", SAMPLE_1, grapheme_1)
	testing.expectf(t, grapheme_2 == SAMPLE_2, "expected %q, got %q", SAMPLE_2, grapheme_2)
	testing.expectf(t, grapheme_3 == SAMPLE_3, "expected %q, got %q", SAMPLE_3, grapheme_3)
	testing.expectf(t, grapheme_4 == SAMPLE_2, "expected %q, got %q", SAMPLE_2, grapheme_2)
	testing.expectf(t, grapheme_5 == SAMPLE_1, "expected %q, got %q", SAMPLE_1, grapheme_1)
}

@test
test_width :: proc(t: ^testing.T) {
	{
		str := "He\u200dllo"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 5)
		testing.expect_value(t, width, 5)
	}

	{
		// Note that a zero-width space is still considered a grapheme as far
		// as the specification is concerned.
		str := "He\u200bllo"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 6)
		testing.expect_value(t, width, 5)
	}

	{
		str := "\U0001F926\U0001F3FC\u200D\u2642"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 1)
		testing.expect_value(t, width, 2)
	}

	{
		str := "H̷e̶l̵l̸o̴p̵e̷ ̸w̶o̸r̵l̶d̵!̴"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 14)
		testing.expect_value(t, width, 14)
	}

	{
		str := "aカ.ヒフ"
		graphemes, grapheme_count, _, width := utf8.decode_grapheme_clusters(str)
		defer delete(graphemes)
		testing.expect_value(t, grapheme_count, 5)
		testing.expect_value(t, width, 8)
		if grapheme_count == 5 {
			testing.expect_value(t, graphemes[0].width, 1)
			testing.expect_value(t, graphemes[1].width, 2)
			testing.expect_value(t, graphemes[2].width, 1)
			testing.expect_value(t, graphemes[3].width, 2)
			testing.expect_value(t, graphemes[4].width, 2)
		}
	}

	{
		str := "いろはにほへ"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 6)
		testing.expect_value(t, width, 12)
	}

	{
		str := "舍利弗，是諸法空相，不生不滅，不垢不淨，不增不減。"
		graphemes, _, width := utf8.grapheme_count(str)
		testing.expect_value(t, graphemes, 25)
		testing.expect_value(t, width, 50)
	}
}