letter.odin 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. package unicode
  2. MAX_RUNE :: '\U00010fff' // Maximum valid unicode code point
  3. REPLACEMENT_CHAR :: '\ufffd' // Represented an invalid code point
  4. MAX_ASCII :: '\u007f' // Maximum ASCII value
  5. MAX_LATIN1 :: '\u00ff' // Maximum Latin-1 value
  6. binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
  7. n := length
  8. t := 0
  9. for n > 1 {
  10. m := n / 2
  11. p := t + m*stride
  12. if c >= table[p] {
  13. t = p
  14. n = n-m
  15. } else {
  16. n = m
  17. }
  18. }
  19. if n != 0 && c >= table[t] {
  20. return t
  21. }
  22. return -1
  23. }
  24. to_lower :: proc(r: rune) -> rune {
  25. c := i32(r)
  26. p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
  27. if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
  28. return rune(c + to_lower_ranges[p+2] - 500)
  29. }
  30. p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
  31. if p >= 0 && c == to_lower_singlets[p] {
  32. return rune(c + to_lower_singlets[p+1] - 500)
  33. }
  34. return rune(c)
  35. }
  36. to_upper :: proc(r: rune) -> rune {
  37. c := i32(r)
  38. p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
  39. if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
  40. return rune(c + to_upper_ranges[p+2] - 500)
  41. }
  42. p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
  43. if p >= 0 && c == to_upper_singlets[p] {
  44. return rune(c + to_upper_singlets[p+1] - 500)
  45. }
  46. return rune(c)
  47. }
  48. to_title :: proc(r: rune) -> rune {
  49. c := i32(r)
  50. p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2)
  51. if p >= 0 && c == to_upper_singlets[p] {
  52. return rune(c + to_title_singlets[p+1] - 500)
  53. }
  54. return rune(c)
  55. }
  56. is_lower :: proc(r: rune) -> bool {
  57. if r <= MAX_ASCII {
  58. return u32(r)-'a' < 26
  59. }
  60. c := i32(r)
  61. p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
  62. if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
  63. return true
  64. }
  65. p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
  66. if p >= 0 && c == to_upper_singlets[p] {
  67. return true
  68. }
  69. return false
  70. }
  71. is_upper :: proc(r: rune) -> bool {
  72. if r <= MAX_ASCII {
  73. return u32(r)-'A' < 26
  74. }
  75. c := i32(r)
  76. p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
  77. if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
  78. return true
  79. }
  80. p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
  81. if p >= 0 && c == to_lower_singlets[p] {
  82. return true
  83. }
  84. return false
  85. }
  86. is_alpha :: is_letter
  87. is_letter :: proc(r: rune) -> bool {
  88. if u32(r) <= MAX_LATIN1 {
  89. return char_properties[u8(r)]&pLmask != 0
  90. }
  91. if is_upper(r) || is_lower(r) {
  92. return true
  93. }
  94. c := i32(r)
  95. p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2)
  96. if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1] {
  97. return true
  98. }
  99. p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1)
  100. if p >= 0 && c == alpha_singlets[p] {
  101. return true
  102. }
  103. return false
  104. }
  105. is_title :: proc(r: rune) -> bool {
  106. return is_upper(r) && is_lower(r)
  107. }
  108. is_digit :: proc(r: rune) -> bool {
  109. if r <= MAX_LATIN1 {
  110. return '0' <= r && r <= '9'
  111. }
  112. return false
  113. }
  114. is_white_space :: is_space
  115. is_space :: proc(r: rune) -> bool {
  116. if u32(r) <= MAX_LATIN1 {
  117. switch r {
  118. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0:
  119. return true
  120. }
  121. return false
  122. }
  123. c := i32(r)
  124. p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2)
  125. if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {
  126. return true
  127. }
  128. return false
  129. }
  130. is_combining :: proc(r: rune) -> bool {
  131. c := i32(r)
  132. return c >= 0x0300 && (c <= 0x036f ||
  133. (c >= 0x1ab0 && c <= 0x1aff) ||
  134. (c >= 0x1dc0 && c <= 0x1dff) ||
  135. (c >= 0x20d0 && c <= 0x20ff) ||
  136. (c >= 0xfe20 && c <= 0xfe2f))
  137. }
  138. is_graphic :: proc(r: rune) -> bool {
  139. if u32(r) <= MAX_LATIN1 {
  140. return char_properties[u8(r)]&pg != 0
  141. }
  142. return false
  143. }
  144. is_print :: proc(r: rune) -> bool {
  145. if u32(r) <= MAX_LATIN1 {
  146. return char_properties[u8(r)]&pp != 0
  147. }
  148. return false
  149. }
  150. is_control :: proc(r: rune) -> bool {
  151. if u32(r) <= MAX_LATIN1 {
  152. return char_properties[u8(r)]&pC != 0
  153. }
  154. return false
  155. }
  156. is_number :: proc(r: rune) -> bool {
  157. if u32(r) <= MAX_LATIN1 {
  158. return char_properties[u8(r)]&pN != 0
  159. }
  160. return false
  161. }
  162. is_punct :: proc(r: rune) -> bool {
  163. if u32(r) <= MAX_LATIN1 {
  164. return char_properties[u8(r)]&pP != 0
  165. }
  166. return false
  167. }
  168. is_symbol :: proc(r: rune) -> bool {
  169. if u32(r) <= MAX_LATIN1 {
  170. return char_properties[u8(r)]&pS != 0
  171. }
  172. return false
  173. }