conversion.odin 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. package strings
  2. import "core:io"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. /*
  6. Converts invalid UTF-8 sequences in the input string `s` to the `replacement` string.
  7. *Allocates Using Provided Allocator*
  8. Inputs:
  9. - s: Input string that may contain invalid UTF-8 sequences.
  10. - replacement: String to replace invalid UTF-8 sequences with.
  11. - allocator: (default: context.allocator).
  12. WARNING: Allocation does not occur when len(s) == 0
  13. Returns:
  14. A valid UTF-8 string with invalid sequences replaced by `replacement`.
  15. */
  16. to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> string {
  17. if len(s) == 0 {
  18. return ""
  19. }
  20. b: Builder
  21. builder_init(&b, 0, 0, allocator)
  22. s := s
  23. for c, i in s {
  24. if c != utf8.RUNE_ERROR {
  25. continue
  26. }
  27. _, w := utf8.decode_rune_in_string(s[i:])
  28. if w == 1 {
  29. builder_grow(&b, len(s) + len(replacement))
  30. write_string(&b, s[:i])
  31. s = s[i:]
  32. break
  33. }
  34. }
  35. if builder_cap(b) == 0 {
  36. return clone(s, allocator)
  37. }
  38. invalid := false
  39. for i := 0; i < len(s); /**/{
  40. c := s[i]
  41. if c < utf8.RUNE_SELF {
  42. i += 1
  43. invalid = false
  44. write_byte(&b, c)
  45. continue
  46. }
  47. _, w := utf8.decode_rune_in_string(s[i:])
  48. if w == 1 {
  49. i += 1
  50. if !invalid {
  51. invalid = true
  52. write_string(&b, replacement)
  53. }
  54. continue
  55. }
  56. invalid = false
  57. write_string(&b, s[i:][:w])
  58. i += w
  59. }
  60. return to_string(b)
  61. }
  62. /*
  63. Converts the input string `s` to all lowercase characters.
  64. *Allocates Using Provided Allocator*
  65. Inputs:
  66. - s: Input string to be converted.
  67. - allocator: (default: context.allocator).
  68. Returns:
  69. A new string with all characters converted to lowercase.
  70. Example:
  71. import "core:fmt"
  72. import "core:strings"
  73. to_lower_example :: proc() {
  74. fmt.println(strings.to_lower("TeST"))
  75. }
  76. Output:
  77. test
  78. */
  79. to_lower :: proc(s: string, allocator := context.allocator) -> string {
  80. b: Builder
  81. builder_init(&b, 0, len(s), allocator)
  82. for r in s {
  83. write_rune(&b, unicode.to_lower(r))
  84. }
  85. return to_string(b)
  86. }
  87. /*
  88. Converts the input string `s` to all uppercase characters.
  89. *Allocates Using Provided Allocator*
  90. Inputs:
  91. - s: Input string to be converted.
  92. - allocator: (default: context.allocator).
  93. Returns:
  94. A new string with all characters converted to uppercase.
  95. Example:
  96. import "core:fmt"
  97. import "core:strings"
  98. to_upper_example :: proc() {
  99. fmt.println(strings.to_upper("Test"))
  100. }
  101. Output:
  102. TEST
  103. */
  104. to_upper :: proc(s: string, allocator := context.allocator) -> string {
  105. b: Builder
  106. builder_init(&b, 0, len(s), allocator)
  107. for r in s {
  108. write_rune(&b, unicode.to_upper(r))
  109. }
  110. return to_string(b)
  111. }
  112. /*
  113. Checks if the rune `r` is a delimiter (' ', '-', or '_').
  114. Inputs:
  115. - r: Rune to check for delimiter status.
  116. Returns:
  117. True if `r` is a delimiter, false otherwise.
  118. */
  119. is_delimiter :: proc(r: rune) -> bool {
  120. return r == '-' || r == '_' || is_space(r)
  121. }
  122. /*
  123. Checks if the rune `r` is a non-alphanumeric or space character.
  124. Inputs:
  125. - r: Rune to check for separator status.
  126. Returns:
  127. True if `r` is a non-alpha or `unicode.is_space` rune.
  128. */
  129. is_separator :: proc(r: rune) -> bool {
  130. if r <= 0x7f {
  131. switch r {
  132. case '0' ..= '9':
  133. return false
  134. case 'a' ..= 'z':
  135. return false
  136. case 'A' ..= 'Z':
  137. return false
  138. case '_':
  139. return false
  140. }
  141. return true
  142. }
  143. // TODO(bill): unicode categories
  144. // if unicode.is_letter(r) || unicode.is_digit(r) {
  145. // return false;
  146. // }
  147. return unicode.is_space(r)
  148. }
  149. /*
  150. Iterates over a string, calling a callback for each rune with the previous, current, and next runes as arguments.
  151. Inputs:
  152. - w: An io.Writer to be used by the callback for writing output.
  153. - s: The input string to be iterated over.
  154. - callback: A procedure to be called for each rune in the string, with arguments (w: io.Writer, prev, curr, next: rune).
  155. The callback can utilize the provided io.Writer to write output during the iteration.
  156. Example:
  157. import "core:fmt"
  158. import "core:strings"
  159. import "core:io"
  160. string_case_iterator_example :: proc() {
  161. my_callback :: proc(w: io.Writer, prev, curr, next: rune) {
  162. fmt.println("my_callback", curr) // <-- Custom logic here
  163. }
  164. s := "hello"
  165. b: strings.Builder
  166. strings.builder_init_len(&b, len(s))
  167. w := strings.to_writer(&b)
  168. strings.string_case_iterator(w, s, my_callback)
  169. }
  170. Output:
  171. my_callback h
  172. my_callback e
  173. my_callback l
  174. my_callback l
  175. my_callback o
  176. */
  177. string_case_iterator :: proc(
  178. w: io.Writer,
  179. s: string,
  180. callback: proc(w: io.Writer, prev, curr, next: rune),
  181. ) {
  182. prev, curr: rune
  183. for next in s {
  184. if curr == 0 {
  185. prev = curr
  186. curr = next
  187. continue
  188. }
  189. callback(w, prev, curr, next)
  190. prev = curr
  191. curr = next
  192. }
  193. if len(s) > 0 {
  194. callback(w, prev, curr, 0)
  195. }
  196. }
  197. // Alias to `to_camel_case`
  198. to_lower_camel_case :: to_camel_case
  199. /*
  200. Converts the input string `s` to "lowerCamelCase".
  201. *Allocates Using Provided Allocator*
  202. Inputs:
  203. - s: Input string to be converted.
  204. - allocator: (default: context.allocator).
  205. Returns:
  206. A "lowerCamelCase" formatted string.
  207. */
  208. to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
  209. s := s
  210. s = trim_space(s)
  211. b: Builder
  212. builder_init(&b, 0, len(s), allocator)
  213. w := to_writer(&b)
  214. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  215. if !is_delimiter(curr) {
  216. if is_delimiter(prev) {
  217. io.write_rune(w, unicode.to_upper(curr))
  218. } else if unicode.is_lower(prev) {
  219. io.write_rune(w, curr)
  220. } else {
  221. io.write_rune(w, unicode.to_lower(curr))
  222. }
  223. }
  224. })
  225. return to_string(b)
  226. }
  227. // Alias to `to_pascal_case`
  228. to_upper_camel_case :: to_pascal_case
  229. /*
  230. Converts the input string `s` to "UpperCamelCase" (PascalCase).
  231. *Allocates Using Provided Allocator*
  232. Inputs:
  233. - s: Input string to be converted.
  234. - allocator: (default: context.allocator).
  235. Returns:
  236. A "PascalCase" formatted string.
  237. */
  238. to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
  239. s := s
  240. s = trim_space(s)
  241. b: Builder
  242. builder_init(&b, 0, len(s), allocator)
  243. w := to_writer(&b)
  244. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  245. if !is_delimiter(curr) {
  246. if is_delimiter(prev) || prev == 0 {
  247. io.write_rune(w, unicode.to_upper(curr))
  248. } else if unicode.is_lower(prev) {
  249. io.write_rune(w, curr)
  250. } else {
  251. io.write_rune(w, unicode.to_lower(curr))
  252. }
  253. }
  254. })
  255. return to_string(b)
  256. }
  257. /*
  258. Returns a string converted to a delimiter-separated case with configurable casing
  259. *Allocates Using Provided Allocator*
  260. Inputs:
  261. - s: The input string to be converted
  262. - delimiter: The rune to be used as the delimiter between words
  263. - all_upper_case: A boolean indicating if the output should be all uppercased (true) or lowercased (false)
  264. - allocator: (default: context.allocator).
  265. Returns:
  266. The converted string
  267. Example:
  268. import "core:fmt"
  269. import "core:strings"
  270. to_delimiter_case_example :: proc() {
  271. fmt.println(strings.to_delimiter_case("Hello World", '_', false))
  272. fmt.println(strings.to_delimiter_case("Hello World", ' ', true))
  273. fmt.println(strings.to_delimiter_case("aBC", '_', false))
  274. }
  275. Output:
  276. hello_world
  277. HELLO WORLD
  278. a_bc
  279. */
  280. to_delimiter_case :: proc(
  281. s: string,
  282. delimiter: rune,
  283. all_upper_case: bool,
  284. allocator := context.allocator,
  285. ) -> string {
  286. s := s
  287. s = trim_space(s)
  288. b: Builder
  289. builder_init(&b, 0, len(s), allocator)
  290. w := to_writer(&b)
  291. adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower
  292. prev, curr: rune
  293. for next in s {
  294. if is_delimiter(curr) {
  295. if !is_delimiter(prev) {
  296. io.write_rune(w, delimiter)
  297. }
  298. } else if unicode.is_upper(curr) {
  299. if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
  300. io.write_rune(w, delimiter)
  301. }
  302. io.write_rune(w, adjust_case(curr))
  303. } else if curr != 0 {
  304. io.write_rune(w, adjust_case(curr))
  305. }
  306. prev = curr
  307. curr = next
  308. }
  309. if len(s) > 0 {
  310. if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
  311. io.write_rune(w, delimiter)
  312. }
  313. io.write_rune(w, adjust_case(curr))
  314. }
  315. return to_string(b)
  316. }
  317. /*
  318. Converts a string to "snake_case" with all runes lowercased
  319. *Allocates Using Provided Allocator*
  320. Inputs:
  321. - s: The input string to be converted
  322. - allocator: (default: context.allocator).
  323. Returns:
  324. The converted string
  325. Example:
  326. import "core:fmt"
  327. import "core:strings"
  328. to_snake_case_example :: proc() {
  329. fmt.println(strings.to_snake_case("HelloWorld"))
  330. fmt.println(strings.to_snake_case("Hello World"))
  331. }
  332. Output:
  333. hello_world
  334. hello_world
  335. */
  336. to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
  337. return to_delimiter_case(s, '_', false, allocator)
  338. }
  339. // Alias for `to_upper_snake_case`
  340. to_screaming_snake_case :: to_upper_snake_case
  341. /*
  342. Converts a string to "SNAKE_CASE" with all runes uppercased
  343. *Allocates Using Provided Allocator*
  344. Inputs:
  345. - s: The input string to be converted
  346. - allocator: (default: context.allocator).
  347. Returns:
  348. The converted string
  349. Example:
  350. import "core:fmt"
  351. import "core:strings"
  352. to_upper_snake_case_example :: proc() {
  353. fmt.println(strings.to_upper_snake_case("HelloWorld"))
  354. }
  355. Output:
  356. HELLO_WORLD
  357. */
  358. to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
  359. return to_delimiter_case(s, '_', true, allocator)
  360. }
  361. /*
  362. Converts a string to "kebab-case" with all runes lowercased
  363. *Allocates Using Provided Allocator*
  364. Inputs:
  365. - s: The input string to be converted
  366. - allocator: (default: context.allocator).
  367. Returns:
  368. The converted string
  369. Example:
  370. import "core:fmt"
  371. import "core:strings"
  372. to_kebab_case_example :: proc() {
  373. fmt.println(strings.to_kebab_case("HelloWorld"))
  374. }
  375. Output:
  376. hello-world
  377. */
  378. to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
  379. return to_delimiter_case(s, '-', false, allocator)
  380. }
  381. /*
  382. Converts a string to "KEBAB-CASE" with all runes uppercased
  383. *Allocates Using Provided Allocator*
  384. Inputs:
  385. - s: The input string to be converted
  386. - allocator: (default: context.allocator).
  387. Returns:
  388. The converted string
  389. Example:
  390. import "core:fmt"
  391. import "core:strings"
  392. to_upper_kebab_case_example :: proc() {
  393. fmt.println(strings.to_upper_kebab_case("HelloWorld"))
  394. }
  395. Output:
  396. HELLO-WORLD
  397. */
  398. to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
  399. return to_delimiter_case(s, '-', true, allocator)
  400. }
  401. /*
  402. Converts a string to "Ada_Case"
  403. *Allocates Using Provided Allocator*
  404. Inputs:
  405. - s: The input string to be converted
  406. - allocator: (default: context.allocator).
  407. Returns:
  408. The converted string
  409. Example:
  410. import "core:fmt"
  411. import "core:strings"
  412. to_ada_case_example :: proc() {
  413. fmt.println(strings.to_ada_case("HelloWorld"))
  414. }
  415. Output:
  416. Hello_World
  417. */
  418. to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
  419. s := s
  420. s = trim_space(s)
  421. b: Builder
  422. builder_init(&b, 0, len(s), allocator)
  423. w := to_writer(&b)
  424. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  425. if !is_delimiter(curr) {
  426. if is_delimiter(prev) || prev == 0 || (unicode.is_lower(prev) && unicode.is_upper(curr)) {
  427. if prev != 0 {
  428. io.write_rune(w, '_')
  429. }
  430. io.write_rune(w, unicode.to_upper(curr))
  431. } else {
  432. io.write_rune(w, unicode.to_lower(curr))
  433. }
  434. }
  435. })
  436. return to_string(b)
  437. }