|
@@ -1,5 +1,7 @@
|
|
|
package unicode
|
|
|
|
|
|
+import "base:runtime"
|
|
|
+
|
|
|
MAX_RUNE :: '\U00010fff' // Maximum valid unicode code point
|
|
|
REPLACEMENT_CHAR :: '\ufffd' // Represented an invalid code point
|
|
|
MAX_ASCII :: '\u007f' // Maximum ASCII value
|
|
@@ -11,7 +13,8 @@ ZERO_WIDTH_JOINER :: '\u200D'
|
|
|
WORD_JOINER :: '\u2060'
|
|
|
|
|
|
@(require_results)
|
|
|
-binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
|
|
|
+binary_search :: proc(c: i32, table: []i32, length, stride: int, loc := #caller_location) -> int #no_bounds_check {
|
|
|
+ runtime.bounds_check_error_loc(loc, length*stride-1, len(table))
|
|
|
n := length
|
|
|
t := 0
|
|
|
for n > 1 {
|
|
@@ -31,7 +34,7 @@ binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-to_lower :: proc(r: rune) -> rune {
|
|
|
+to_lower :: proc(r: rune) -> rune #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
|
|
|
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
|
|
@@ -44,7 +47,7 @@ to_lower :: proc(r: rune) -> rune {
|
|
|
return rune(c)
|
|
|
}
|
|
|
@(require_results)
|
|
|
-to_upper :: proc(r: rune) -> rune {
|
|
|
+to_upper :: proc(r: rune) -> rune #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
|
|
|
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
|
|
@@ -57,7 +60,7 @@ to_upper :: proc(r: rune) -> rune {
|
|
|
return rune(c)
|
|
|
}
|
|
|
@(require_results)
|
|
|
-to_title :: proc(r: rune) -> rune {
|
|
|
+to_title :: proc(r: rune) -> rune #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2)
|
|
|
if p >= 0 && c == to_upper_singlets[p] {
|
|
@@ -68,7 +71,7 @@ to_title :: proc(r: rune) -> rune {
|
|
|
|
|
|
|
|
|
@(require_results)
|
|
|
-is_lower :: proc(r: rune) -> bool {
|
|
|
+is_lower :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if r <= MAX_ASCII {
|
|
|
return u32(r)-'a' < 26
|
|
|
}
|
|
@@ -85,7 +88,7 @@ is_lower :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_upper :: proc(r: rune) -> bool {
|
|
|
+is_upper :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if r <= MAX_ASCII {
|
|
|
return u32(r)-'A' < 26
|
|
|
}
|
|
@@ -103,7 +106,7 @@ is_upper :: proc(r: rune) -> bool {
|
|
|
|
|
|
is_alpha :: is_letter
|
|
|
@(require_results)
|
|
|
-is_letter :: proc(r: rune) -> bool {
|
|
|
+is_letter :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pLmask != 0
|
|
|
}
|
|
@@ -139,7 +142,7 @@ is_digit :: proc(r: rune) -> bool {
|
|
|
|
|
|
is_white_space :: is_space
|
|
|
@(require_results)
|
|
|
-is_space :: proc(r: rune) -> bool {
|
|
|
+is_space :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
switch r {
|
|
|
case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0:
|
|
@@ -177,7 +180,7 @@ is_graphic :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_print :: proc(r: rune) -> bool {
|
|
|
+is_print :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pp != 0
|
|
|
}
|
|
@@ -185,7 +188,7 @@ is_print :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_control :: proc(r: rune) -> bool {
|
|
|
+is_control :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pC != 0
|
|
|
}
|
|
@@ -193,7 +196,7 @@ is_control :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_number :: proc(r: rune) -> bool {
|
|
|
+is_number :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pN != 0
|
|
|
}
|
|
@@ -201,7 +204,7 @@ is_number :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_punct :: proc(r: rune) -> bool {
|
|
|
+is_punct :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pP != 0
|
|
|
}
|
|
@@ -209,7 +212,7 @@ is_punct :: proc(r: rune) -> bool {
|
|
|
}
|
|
|
|
|
|
@(require_results)
|
|
|
-is_symbol :: proc(r: rune) -> bool {
|
|
|
+is_symbol :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
if u32(r) <= MAX_LATIN1 {
|
|
|
return char_properties[u8(r)]&pS != 0
|
|
|
}
|
|
@@ -267,7 +270,7 @@ is_prepended_concatenation_mark :: proc(r: rune) -> bool {
|
|
|
|
|
|
// General_Category=Spacing_Mark
|
|
|
@(require_results)
|
|
|
-is_spacing_mark :: proc(r: rune) -> bool {
|
|
|
+is_spacing_mark :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, spacing_mark_ranges[:], len(spacing_mark_ranges)/2, 2)
|
|
|
if p >= 0 && spacing_mark_ranges[p] <= c && c <= spacing_mark_ranges[p+1] {
|
|
@@ -278,7 +281,7 @@ is_spacing_mark :: proc(r: rune) -> bool {
|
|
|
|
|
|
// General_Category=Nonspacing_Mark
|
|
|
@(require_results)
|
|
|
-is_nonspacing_mark :: proc(r: rune) -> bool {
|
|
|
+is_nonspacing_mark :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, nonspacing_mark_ranges[:], len(nonspacing_mark_ranges)/2, 2)
|
|
|
if p >= 0 && nonspacing_mark_ranges[p] <= c && c <= nonspacing_mark_ranges[p+1] {
|
|
@@ -289,7 +292,7 @@ is_nonspacing_mark :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Extended_Pictographic
|
|
|
@(require_results)
|
|
|
-is_emoji_extended_pictographic :: proc(r: rune) -> bool {
|
|
|
+is_emoji_extended_pictographic :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, emoji_extended_pictographic_ranges[:], len(emoji_extended_pictographic_ranges)/2, 2)
|
|
|
if p >= 0 && emoji_extended_pictographic_ranges[p] <= c && c <= emoji_extended_pictographic_ranges[p+1] {
|
|
@@ -300,7 +303,7 @@ is_emoji_extended_pictographic :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Grapheme_Extend
|
|
|
@(require_results)
|
|
|
-is_grapheme_extend :: proc(r: rune) -> bool {
|
|
|
+is_grapheme_extend :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, grapheme_extend_ranges[:], len(grapheme_extend_ranges)/2, 2)
|
|
|
if p >= 0 && grapheme_extend_ranges[p] <= c && c <= grapheme_extend_ranges[p+1] {
|
|
@@ -330,7 +333,7 @@ is_hangul_syllable_trailing :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Hangul_Syllable_Type=LV_Syllable
|
|
|
@(require_results)
|
|
|
-is_hangul_syllable_lv :: proc(r: rune) -> bool {
|
|
|
+is_hangul_syllable_lv :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, hangul_syllable_lv_singlets[:], len(hangul_syllable_lv_singlets), 1)
|
|
|
if p >= 0 && c == hangul_syllable_lv_singlets[p] {
|
|
@@ -341,7 +344,7 @@ is_hangul_syllable_lv :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Hangul_Syllable_Type=LVT_Syllable
|
|
|
@(require_results)
|
|
|
-is_hangul_syllable_lvt :: proc(r: rune) -> bool {
|
|
|
+is_hangul_syllable_lvt :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, hangul_syllable_lvt_ranges[:], len(hangul_syllable_lvt_ranges)/2, 2)
|
|
|
if p >= 0 && hangul_syllable_lvt_ranges[p] <= c && c <= hangul_syllable_lvt_ranges[p+1] {
|
|
@@ -397,7 +400,7 @@ is_indic_conjunct_break_linker :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Indic_Conjunct_Break=Consonant
|
|
|
@(require_results)
|
|
|
-is_indic_conjunct_break_consonant :: proc(r: rune) -> bool {
|
|
|
+is_indic_conjunct_break_consonant :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, indic_conjunct_break_consonant_ranges[:], len(indic_conjunct_break_consonant_ranges)/2, 2)
|
|
|
if p >= 0 && indic_conjunct_break_consonant_ranges[p] <= c && c <= indic_conjunct_break_consonant_ranges[p+1] {
|
|
@@ -408,7 +411,7 @@ is_indic_conjunct_break_consonant :: proc(r: rune) -> bool {
|
|
|
|
|
|
// Indic_Conjunct_Break=Extend
|
|
|
@(require_results)
|
|
|
-is_indic_conjunct_break_extend :: proc(r: rune) -> bool {
|
|
|
+is_indic_conjunct_break_extend :: proc(r: rune) -> bool #no_bounds_check {
|
|
|
c := i32(r)
|
|
|
p := binary_search(c, indic_conjunct_break_extend_ranges[:], len(indic_conjunct_break_extend_ranges)/2, 2)
|
|
|
if p >= 0 && indic_conjunct_break_extend_ranges[p] <= c && c <= indic_conjunct_break_extend_ranges[p+1] {
|
|
@@ -459,7 +462,7 @@ is_gcb_extend_class :: proc(r: rune) -> bool {
|
|
|
// - 1 in all other cases.
|
|
|
//
|
|
|
@(require_results)
|
|
|
-normalized_east_asian_width :: proc(r: rune) -> int {
|
|
|
+normalized_east_asian_width :: proc(r: rune) -> int #no_bounds_check {
|
|
|
// This is a different interpretation of the BOM which occurs in the middle of text.
|
|
|
ZERO_WIDTH_NO_BREAK_SPACE :: '\uFEFF'
|
|
|
|