conversion.odin 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. package strings
  2. import "core:io"
  3. import "core:mem"
  4. import "core:unicode"
  5. import "core:unicode/utf8"
  6. /*
  7. Converts invalid UTF-8 sequences in the input string `s` to the `replacement` string.
  8. *Allocates Using Provided Allocator*
  9. Inputs:
  10. - s: Input string that may contain invalid UTF-8 sequences.
  11. - replacement: String to replace invalid UTF-8 sequences with.
  12. - allocator: (default: context.allocator).
  13. WARNING: Allocation does not occur when len(s) == 0
  14. Returns:
  15. - res: A valid UTF-8 string with invalid sequences replaced by `replacement`.
  16. - err: An optional allocator error if one occured, `nil` otherwise
  17. */
  18. to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  19. if len(s) == 0 {
  20. return "", nil
  21. }
  22. b: Builder
  23. builder_init(&b, 0, 0, allocator) or_return
  24. s := s
  25. for c, i in s {
  26. if c != utf8.RUNE_ERROR {
  27. continue
  28. }
  29. _, w := utf8.decode_rune_in_string(s[i:])
  30. if w == 1 {
  31. builder_grow(&b, len(s) + len(replacement))
  32. write_string(&b, s[:i])
  33. s = s[i:]
  34. break
  35. }
  36. }
  37. if builder_cap(b) == 0 {
  38. return clone(s, allocator)
  39. }
  40. invalid := false
  41. for i := 0; i < len(s); /**/{
  42. c := s[i]
  43. if c < utf8.RUNE_SELF {
  44. i += 1
  45. invalid = false
  46. write_byte(&b, c)
  47. continue
  48. }
  49. _, w := utf8.decode_rune_in_string(s[i:])
  50. if w == 1 {
  51. i += 1
  52. if !invalid {
  53. invalid = true
  54. write_string(&b, replacement)
  55. }
  56. continue
  57. }
  58. invalid = false
  59. write_string(&b, s[i:][:w])
  60. i += w
  61. }
  62. return to_string(b), nil
  63. }
  64. /*
  65. Converts the input string `s` to all lowercase characters.
  66. *Allocates Using Provided Allocator*
  67. Inputs:
  68. - s: Input string to be converted.
  69. - allocator: (default: context.allocator).
  70. Returns:
  71. - res: The new string with all characters converted to lowercase
  72. - err: An optional allocator error if one occured, `nil` otherwise
  73. Example:
  74. import "core:fmt"
  75. import "core:strings"
  76. to_lower_example :: proc() {
  77. fmt.println(strings.to_lower("TeST"))
  78. }
  79. Output:
  80. test
  81. */
  82. to_lower :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  83. b: Builder
  84. builder_init(&b, 0, len(s), allocator) or_return
  85. for r in s {
  86. write_rune(&b, unicode.to_lower(r))
  87. }
  88. return to_string(b), nil
  89. }
  90. /*
  91. Converts the input string `s` to all uppercase characters.
  92. *Allocates Using Provided Allocator*
  93. Inputs:
  94. - s: Input string to be converted.
  95. - allocator: (default: context.allocator).
  96. Returns:
  97. - res: The new string with all characters converted to uppercase
  98. - err: An optional allocator error if one occured, `nil` otherwise
  99. Example:
  100. import "core:fmt"
  101. import "core:strings"
  102. to_upper_example :: proc() {
  103. fmt.println(strings.to_upper("Test"))
  104. }
  105. Output:
  106. TEST
  107. */
  108. to_upper :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  109. b: Builder
  110. builder_init(&b, 0, len(s), allocator) or_return
  111. for r in s {
  112. write_rune(&b, unicode.to_upper(r))
  113. }
  114. return to_string(b), nil
  115. }
  116. /*
  117. Checks if the rune `r` is a delimiter (' ', '-', or '_').
  118. Inputs:
  119. - r: Rune to check for delimiter status.
  120. Returns:
  121. - res: True if `r` is a delimiter, false otherwise.
  122. */
  123. is_delimiter :: proc(r: rune) -> (res: bool) {
  124. return r == '-' || r == '_' || is_space(r)
  125. }
  126. /*
  127. Checks if the rune `r` is a non-alphanumeric or space character.
  128. Inputs:
  129. - r: Rune to check for separator status.
  130. Returns:
  131. - res: True if `r` is a non-alpha or `unicode.is_space` rune.
  132. */
  133. is_separator :: proc(r: rune) -> (res: bool) {
  134. if r <= 0x7f {
  135. switch r {
  136. case '0' ..= '9':
  137. return false
  138. case 'a' ..= 'z':
  139. return false
  140. case 'A' ..= 'Z':
  141. return false
  142. case '_':
  143. return false
  144. }
  145. return true
  146. }
  147. // TODO(bill): unicode categories
  148. // if unicode.is_letter(r) || unicode.is_digit(r) {
  149. // return false;
  150. // }
  151. return unicode.is_space(r)
  152. }
  153. /*
  154. Iterates over a string, calling a callback for each rune with the previous, current, and next runes as arguments.
  155. Inputs:
  156. - w: An io.Writer to be used by the callback for writing output.
  157. - s: The input string to be iterated over.
  158. - callback: A procedure to be called for each rune in the string, with arguments (w: io.Writer, prev, curr, next: rune).
  159. The callback can utilize the provided io.Writer to write output during the iteration.
  160. Example:
  161. import "core:fmt"
  162. import "core:strings"
  163. import "core:io"
  164. string_case_iterator_example :: proc() {
  165. my_callback :: proc(w: io.Writer, prev, curr, next: rune) {
  166. fmt.println("my_callback", curr) // <-- Custom logic here
  167. }
  168. s := "hello"
  169. b: strings.Builder
  170. strings.builder_init_len(&b, len(s))
  171. w := strings.to_writer(&b)
  172. strings.string_case_iterator(w, s, my_callback)
  173. }
  174. Output:
  175. my_callback h
  176. my_callback e
  177. my_callback l
  178. my_callback l
  179. my_callback o
  180. */
  181. string_case_iterator :: proc(
  182. w: io.Writer,
  183. s: string,
  184. callback: proc(w: io.Writer, prev, curr, next: rune),
  185. ) {
  186. prev, curr: rune
  187. for next in s {
  188. if curr == 0 {
  189. prev = curr
  190. curr = next
  191. continue
  192. }
  193. callback(w, prev, curr, next)
  194. prev = curr
  195. curr = next
  196. }
  197. if len(s) > 0 {
  198. callback(w, prev, curr, 0)
  199. }
  200. }
  201. // Alias to `to_camel_case`
  202. to_lower_camel_case :: to_camel_case
  203. /*
  204. Converts the input string `s` to "lowerCamelCase".
  205. *Allocates Using Provided Allocator*
  206. Inputs:
  207. - s: Input string to be converted.
  208. - allocator: (default: context.allocator).
  209. Returns:
  210. - res: The converted string
  211. - err: An optional allocator error if one occured, `nil` otherwise
  212. */
  213. to_camel_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  214. s := s
  215. s = trim_space(s)
  216. b: Builder
  217. builder_init(&b, 0, len(s), allocator) or_return
  218. w := to_writer(&b)
  219. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  220. if !is_delimiter(curr) {
  221. if is_delimiter(prev) {
  222. io.write_rune(w, unicode.to_upper(curr))
  223. } else if unicode.is_lower(prev) {
  224. io.write_rune(w, curr)
  225. } else {
  226. io.write_rune(w, unicode.to_lower(curr))
  227. }
  228. }
  229. })
  230. return to_string(b), nil
  231. }
  232. // Alias to `to_pascal_case`
  233. to_upper_camel_case :: to_pascal_case
  234. /*
  235. Converts the input string `s` to "UpperCamelCase" (PascalCase).
  236. *Allocates Using Provided Allocator*
  237. Inputs:
  238. - s: Input string to be converted.
  239. - allocator: (default: context.allocator).
  240. Returns:
  241. - res: The converted string
  242. - err: An optional allocator error if one occured, `nil` otherwise
  243. */
  244. to_pascal_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  245. s := s
  246. s = trim_space(s)
  247. b: Builder
  248. builder_init(&b, 0, len(s), allocator) or_return
  249. w := to_writer(&b)
  250. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  251. if !is_delimiter(curr) {
  252. if is_delimiter(prev) || prev == 0 {
  253. io.write_rune(w, unicode.to_upper(curr))
  254. } else if unicode.is_lower(prev) {
  255. io.write_rune(w, curr)
  256. } else {
  257. io.write_rune(w, unicode.to_lower(curr))
  258. }
  259. }
  260. })
  261. return to_string(b), nil
  262. }
  263. /*
  264. Returns a string converted to a delimiter-separated case with configurable casing
  265. *Allocates Using Provided Allocator*
  266. Inputs:
  267. - s: The input string to be converted
  268. - delimiter: The rune to be used as the delimiter between words
  269. - all_upper_case: A boolean indicating if the output should be all uppercased (true) or lowercased (false)
  270. - allocator: (default: context.allocator).
  271. Returns:
  272. - res: The converted string
  273. - err: An optional allocator error if one occured, `nil` otherwise
  274. Example:
  275. import "core:fmt"
  276. import "core:strings"
  277. to_delimiter_case_example :: proc() {
  278. fmt.println(strings.to_delimiter_case("Hello World", '_', false))
  279. fmt.println(strings.to_delimiter_case("Hello World", ' ', true))
  280. fmt.println(strings.to_delimiter_case("aBC", '_', false))
  281. }
  282. Output:
  283. hello_world
  284. HELLO WORLD
  285. a_bc
  286. */
  287. to_delimiter_case :: proc(
  288. s: string,
  289. delimiter: rune,
  290. all_upper_case: bool,
  291. allocator := context.allocator,
  292. ) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  293. s := s
  294. s = trim_space(s)
  295. b: Builder
  296. builder_init(&b, 0, len(s), allocator) or_return
  297. w := to_writer(&b)
  298. adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower
  299. prev, curr: rune
  300. for next in s {
  301. if is_delimiter(curr) {
  302. if !is_delimiter(prev) {
  303. io.write_rune(w, delimiter)
  304. }
  305. } else if unicode.is_upper(curr) {
  306. if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
  307. io.write_rune(w, delimiter)
  308. }
  309. io.write_rune(w, adjust_case(curr))
  310. } else if curr != 0 {
  311. io.write_rune(w, adjust_case(curr))
  312. }
  313. prev = curr
  314. curr = next
  315. }
  316. if len(s) > 0 {
  317. if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
  318. io.write_rune(w, delimiter)
  319. }
  320. io.write_rune(w, adjust_case(curr))
  321. }
  322. return to_string(b), nil
  323. }
  324. /*
  325. Converts a string to "snake_case" with all runes lowercased
  326. *Allocates Using Provided Allocator*
  327. Inputs:
  328. - s: The input string to be converted
  329. - allocator: (default: context.allocator).
  330. Returns:
  331. - res: The converted string
  332. - err: An optional allocator error if one occured, `nil` otherwise
  333. Example:
  334. import "core:fmt"
  335. import "core:strings"
  336. to_snake_case_example :: proc() {
  337. fmt.println(strings.to_snake_case("HelloWorld"))
  338. fmt.println(strings.to_snake_case("Hello World"))
  339. }
  340. Output:
  341. hello_world
  342. hello_world
  343. */
  344. to_snake_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  345. return to_delimiter_case(s, '_', false, allocator)
  346. }
  347. // Alias for `to_upper_snake_case`
  348. to_screaming_snake_case :: to_upper_snake_case
  349. /*
  350. Converts a string to "SNAKE_CASE" with all runes uppercased
  351. *Allocates Using Provided Allocator*
  352. Inputs:
  353. - s: The input string to be converted
  354. - allocator: (default: context.allocator).
  355. Returns:
  356. - res: The converted string
  357. - err: An optional allocator error if one occured, `nil` otherwise
  358. Example:
  359. import "core:fmt"
  360. import "core:strings"
  361. to_upper_snake_case_example :: proc() {
  362. fmt.println(strings.to_upper_snake_case("HelloWorld"))
  363. }
  364. Output:
  365. HELLO_WORLD
  366. */
  367. to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  368. return to_delimiter_case(s, '_', true, allocator)
  369. }
  370. /*
  371. Converts a string to "kebab-case" with all runes lowercased
  372. *Allocates Using Provided Allocator*
  373. Inputs:
  374. - s: The input string to be converted
  375. - allocator: (default: context.allocator).
  376. Returns:
  377. - res: The converted string
  378. - err: An optional allocator error if one occured, `nil` otherwise
  379. Example:
  380. import "core:fmt"
  381. import "core:strings"
  382. to_kebab_case_example :: proc() {
  383. fmt.println(strings.to_kebab_case("HelloWorld"))
  384. }
  385. Output:
  386. hello-world
  387. */
  388. to_kebab_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  389. return to_delimiter_case(s, '-', false, allocator)
  390. }
  391. /*
  392. Converts a string to "KEBAB-CASE" with all runes uppercased
  393. *Allocates Using Provided Allocator*
  394. Inputs:
  395. - s: The input string to be converted
  396. - allocator: (default: context.allocator).
  397. Returns:
  398. - res: The converted string
  399. - err: An optional allocator error if one occured, `nil` otherwise
  400. Example:
  401. import "core:fmt"
  402. import "core:strings"
  403. to_upper_kebab_case_example :: proc() {
  404. fmt.println(strings.to_upper_kebab_case("HelloWorld"))
  405. }
  406. Output:
  407. HELLO-WORLD
  408. */
  409. to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  410. return to_delimiter_case(s, '-', true, allocator)
  411. }
  412. /*
  413. Converts a string to "Ada_Case"
  414. *Allocates Using Provided Allocator*
  415. Inputs:
  416. - s: The input string to be converted
  417. - allocator: (default: context.allocator).
  418. Returns:
  419. - res: The converted string
  420. - err: An optional allocator error if one occured, `nil` otherwise
  421. Example:
  422. import "core:fmt"
  423. import "core:strings"
  424. to_ada_case_example :: proc() {
  425. fmt.println(strings.to_ada_case("HelloWorld"))
  426. }
  427. Output:
  428. Hello_World
  429. */
  430. to_ada_case :: proc(s: string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) #optional_allocator_error {
  431. s := s
  432. s = trim_space(s)
  433. b: Builder
  434. builder_init(&b, 0, len(s), allocator) or_return
  435. w := to_writer(&b)
  436. string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
  437. if !is_delimiter(curr) {
  438. if is_delimiter(prev) || prev == 0 || (unicode.is_lower(prev) && unicode.is_upper(curr)) {
  439. if prev != 0 {
  440. io.write_rune(w, '_')
  441. }
  442. io.write_rune(w, unicode.to_upper(curr))
  443. } else {
  444. io.write_rune(w, unicode.to_lower(curr))
  445. }
  446. }
  447. })
  448. return to_string(b), nil
  449. }