conversion.odin 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. package strings
  2. import "core:io"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> string {
  6. if len(s) == 0 {
  7. return ""
  8. }
  9. b: Builder
  10. init_builder(&b, 0, 0, allocator)
  11. s := s
  12. for c, i in s {
  13. if c != utf8.RUNE_ERROR {
  14. continue
  15. }
  16. _, w := utf8.decode_rune_in_string(s[i:])
  17. if w == 1 {
  18. grow_builder(&b, len(s) + len(replacement))
  19. write_string(&b, s[:i])
  20. s = s[i:]
  21. break
  22. }
  23. }
  24. if builder_cap(b) == 0 {
  25. return clone(s, allocator)
  26. }
  27. invalid := false
  28. for i := 0; i < len(s); /**/ {
  29. c := s[i]
  30. if c < utf8.RUNE_SELF {
  31. i += 1
  32. invalid = false
  33. write_byte(&b, c)
  34. continue
  35. }
  36. _, w := utf8.decode_rune_in_string(s[i:])
  37. if w == 1 {
  38. i += 1
  39. if !invalid {
  40. invalid = true
  41. write_string(&b, replacement)
  42. }
  43. continue
  44. }
  45. invalid = false
  46. write_string(&b, s[i:][:w])
  47. i += w
  48. }
  49. return to_string(b)
  50. }
  51. /*
  52. returns the input string `s` with all runes set to lowered case
  53. always allocates using the `allocator`
  54. strings.to_lower("test") -> test
  55. strings.to_lower("Test") -> test
  56. */
  57. to_lower :: proc(s: string, allocator := context.allocator) -> string {
  58. b: Builder
  59. init_builder(&b, 0, len(s), allocator)
  60. for r in s {
  61. write_rune_builder(&b, unicode.to_lower(r))
  62. }
  63. return to_string(b)
  64. }
  65. /*
  66. returns the input string `s` with all runes set to upper case
  67. always allocates using the `allocator`
  68. strings.to_lower("test") -> TEST
  69. strings.to_lower("Test") -> TEST
  70. */
  71. to_upper :: proc(s: string, allocator := context.allocator) -> string {
  72. b: Builder
  73. init_builder(&b, 0, len(s), allocator)
  74. for r in s {
  75. write_rune_builder(&b, unicode.to_upper(r))
  76. }
  77. return to_string(b)
  78. }
  79. // returns true when the `c` rune is a space, '-' or '_'
  80. // useful when treating strings like words in a text editor or html paths
  81. is_delimiter :: proc(c: rune) -> bool {
  82. return c == '-' || c == '_' || is_space(c)
  83. }
  84. // returns true when the `r` rune is a non alpha or `unicode.is_space` rune
  85. is_separator :: proc(r: rune) -> bool {
  86. if r <= 0x7f {
  87. switch r {
  88. case '0'..='9': return false
  89. case 'a'..='z': return false
  90. case 'A'..='Z': return false
  91. case '_': return false
  92. }
  93. return true
  94. }
  95. // TODO(bill): unicode categories
  96. // if unicode.is_letter(r) || unicode.is_digit(r) {
  97. // return false;
  98. // }
  99. return unicode.is_space(r)
  100. }
  101. /*
  102. iterator that loops through the string and calls the callback with the `prev`, `curr` and `next` rune
  103. on empty string `s` the callback gets called once with empty runes
  104. */
  105. string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Writer, prev, curr, next: rune)) {
  106. prev, curr: rune
  107. for next in s {
  108. if curr == 0 {
  109. prev = curr
  110. curr = next
  111. continue
  112. }
  113. callback(w, prev, curr, next)
  114. prev = curr
  115. curr = next
  116. }
  117. if len(s) > 0 {
  118. callback(w, prev, curr, 0)
  119. }
  120. }
  121. to_lower_camel_case :: to_camel_case
  122. // converts the `s` string to "lowerCamelCase"
  123. to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
  124. s := s
  125. s = trim_space(s)
  126. b: Builder
  127. init_builder(&b, 0, len(s), allocator)
  128. w := to_writer(&b)
  129. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  130. if !is_delimiter(curr) {
  131. if is_delimiter(prev) {
  132. io.write_rune(w, unicode.to_upper(curr))
  133. } else if unicode.is_lower(prev) {
  134. io.write_rune(w, curr)
  135. } else {
  136. io.write_rune(w, unicode.to_lower(curr))
  137. }
  138. }
  139. })
  140. return to_string(b)
  141. }
  142. to_upper_camel_case :: to_pascal_case
  143. // converts the `s` string to "PascalCase"
  144. to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
  145. s := s
  146. s = trim_space(s)
  147. b: Builder
  148. init_builder(&b, 0, len(s), allocator)
  149. w := to_writer(&b)
  150. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  151. if !is_delimiter(curr) {
  152. if is_delimiter(prev) || prev == 0 {
  153. io.write_rune(w, unicode.to_upper(curr))
  154. } else if unicode.is_lower(prev) {
  155. io.write_rune(w, curr)
  156. } else {
  157. io.write_rune(w, unicode.to_lower(curr))
  158. }
  159. }
  160. })
  161. return to_string(b)
  162. }
  163. /*
  164. returns the `s` string to words seperated by the given `delimiter` rune
  165. all runes will be upper or lowercased based on the `all_uppercase` bool
  166. strings.to_delimiter_case("Hello World", '_', false) -> hello_world
  167. strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
  168. strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
  169. strings.to_delimiter_case("aBC", '_', false) -> a_b_c
  170. */
  171. to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
  172. s := s
  173. s = trim_space(s)
  174. b: Builder
  175. init_builder(&b, 0, len(s), allocator)
  176. w := to_writer(&b)
  177. adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower
  178. prev, curr: rune
  179. for next in s {
  180. if is_delimiter(curr) {
  181. if !is_delimiter(prev) {
  182. io.write_rune(w, delimiter)
  183. }
  184. } else if unicode.is_upper(curr) {
  185. if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
  186. io.write_rune(w, delimiter)
  187. }
  188. io.write_rune(w, adjust_case(curr))
  189. } else if curr != 0 {
  190. io.write_rune(w, adjust_case(curr))
  191. }
  192. prev = curr
  193. curr = next
  194. }
  195. if len(s) > 0 {
  196. if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
  197. io.write_rune(w, delimiter)
  198. }
  199. io.write_rune(w, adjust_case(curr))
  200. }
  201. return to_string(b)
  202. }
  203. /*
  204. converts the `s` string to "snake_case" with all runes lowercased
  205. strings.to_snake_case("HelloWorld") -> hello_world
  206. strings.to_snake_case("Hello World") -> hello_world
  207. */
  208. to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
  209. return to_delimiter_case(s, '_', false, allocator)
  210. }
  211. to_screaming_snake_case :: to_upper_snake_case
  212. // converts the `s` string to "SNAKE_CASE" with all runes uppercased
  213. to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
  214. return to_delimiter_case(s, '_', true, allocator)
  215. }
  216. // converts the `s` string to "kebab-case" with all runes lowercased
  217. to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
  218. return to_delimiter_case(s, '-', false, allocator)
  219. }
  220. // converts the `s` string to "KEBAB-CASE" with all runes uppercased
  221. to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
  222. return to_delimiter_case(s, '-', true, allocator)
  223. }
  224. // converts the `s` string to "Ada_case"
  225. to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
  226. delimiter :: '_'
  227. s := s
  228. s = trim_space(s)
  229. b: Builder
  230. init_builder(&b, 0, len(s), allocator)
  231. w := to_writer(&b)
  232. prev, curr: rune
  233. for next in s {
  234. if is_delimiter(curr) {
  235. if !is_delimiter(prev) {
  236. io.write_rune(w, delimiter)
  237. }
  238. } else if unicode.is_upper(curr) {
  239. if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
  240. io.write_rune(w, delimiter)
  241. }
  242. io.write_rune(w, unicode.to_upper(curr))
  243. } else if curr != 0 {
  244. io.write_rune(w, unicode.to_lower(curr))
  245. }
  246. prev = curr
  247. curr = next
  248. }
  249. if len(s) > 0 {
  250. if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
  251. io.write_rune(w, delimiter)
  252. io.write_rune(w, unicode.to_upper(curr))
  253. } else {
  254. io.write_rune(w, unicode.to_lower(curr))
  255. }
  256. }
  257. return to_string(b)
  258. }