| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- package unicode
- MAX_RUNE :: '\U00010fff' // Maximum valid unicode code point
- REPLACEMENT_CHAR :: '\ufffd' // Represented an invalid code point
- MAX_ASCII :: '\u007f' // Maximum ASCII value
- MAX_LATIN1 :: '\u00ff' // Maximum Latin-1 value
- binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
- n := length
- t := 0
- for n > 1 {
- m := n / 2
- p := t + m*stride
- if c >= table[p] {
- t = p
- n = n-m
- } else {
- n = m
- }
- }
- if n != 0 && c >= table[t] {
- return t
- }
- return -1
- }
- to_lower :: proc(r: rune) -> rune {
- c := i32(r)
- p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
- if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
- return rune(c + to_lower_ranges[p+2] - 500)
- }
- p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
- if p >= 0 && c == to_lower_singlets[p] {
- return rune(c + to_lower_singlets[p+1] - 500)
- }
- return rune(c)
- }
- to_upper :: proc(r: rune) -> rune {
- c := i32(r)
- p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
- if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
- return rune(c + to_upper_ranges[p+2] - 500)
- }
- p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
- if p >= 0 && c == to_upper_singlets[p] {
- return rune(c + to_upper_singlets[p+1] - 500)
- }
- return rune(c)
- }
- to_title :: proc(r: rune) -> rune {
- c := i32(r)
- p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2)
- if p >= 0 && c == to_upper_singlets[p] {
- return rune(c + to_title_singlets[p+1] - 500)
- }
- return rune(c)
- }
- is_lower :: proc(r: rune) -> bool {
- if r <= MAX_ASCII {
- return u32(r)-'a' < 26
- }
- c := i32(r)
- p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
- if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
- return true
- }
- p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
- if p >= 0 && c == to_upper_singlets[p] {
- return true
- }
- return false
- }
- is_upper :: proc(r: rune) -> bool {
- if r <= MAX_ASCII {
- return u32(r)-'A' < 26
- }
- c := i32(r)
- p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
- if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
- return true
- }
- p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
- if p >= 0 && c == to_lower_singlets[p] {
- return true
- }
- return false
- }
- is_alpha :: is_letter
- is_letter :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pLmask != 0
- }
- if is_upper(r) || is_lower(r) {
- return true
- }
- c := i32(r)
- p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2)
- if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1] {
- return true
- }
- p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1)
- if p >= 0 && c == alpha_singlets[p] {
- return true
- }
- return false
- }
- is_title :: proc(r: rune) -> bool {
- return is_upper(r) && is_lower(r)
- }
- is_digit :: proc(r: rune) -> bool {
- if r <= MAX_LATIN1 {
- return '0' <= r && r <= '9'
- }
- return false
- }
- is_white_space :: is_space
- is_space :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- switch r {
- case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0:
- return true
- }
- return false
- }
- c := i32(r)
- p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2)
- if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {
- return true
- }
- return false
- }
- is_combining :: proc(r: rune) -> bool {
- c := i32(r)
- return c >= 0x0300 && (c <= 0x036f ||
- (c >= 0x1ab0 && c <= 0x1aff) ||
- (c >= 0x1dc0 && c <= 0x1dff) ||
- (c >= 0x20d0 && c <= 0x20ff) ||
- (c >= 0xfe20 && c <= 0xfe2f))
- }
- is_graphic :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pg != 0
- }
- return false
- }
- is_print :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pp != 0
- }
- return false
- }
- is_control :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pC != 0
- }
- return false
- }
- is_number :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pN != 0
- }
- return false
- }
- is_punct :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pP != 0
- }
- return false
- }
- is_symbol :: proc(r: rune) -> bool {
- if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pS != 0
- }
- return false
- }
|