strings.odin 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. package strings
  2. import "core:mem"
  3. import "core:unicode/utf8"
  4. clone :: proc(s: string, allocator := context.allocator) -> string {
  5. c := make([]byte, len(s)+1, allocator);
  6. copy(c, cast([]byte)s);
  7. c[len(s)] = 0;
  8. return string(c[:len(s)]);
  9. }
  10. clone_to_cstring :: proc(s: string, allocator := context.allocator) -> cstring {
  11. c := make([]byte, len(s)+1, allocator);
  12. copy(c, cast([]byte)s);
  13. c[len(s)] = 0;
  14. return cstring(&c[0]);
  15. }
  16. @(deprecated="Please use 'strings.clone'")
  17. new_string :: proc(s: string, allocator := context.allocator) -> string {
  18. c := make([]byte, len(s)+1, allocator);
  19. copy(c, cast([]byte)s);
  20. c[len(s)] = 0;
  21. return string(c[:len(s)]);
  22. }
  23. @(deprecated="Please use 'strings.clone_to_cstring'")
  24. new_cstring :: proc(s: string, allocator := context.allocator) -> cstring {
  25. c := make([]byte, len(s)+1, allocator);
  26. copy(c, cast([]byte)s);
  27. c[len(s)] = 0;
  28. return cstring(&c[0]);
  29. }
  30. @(deprecated="Please use a standard cast for cstring to string")
  31. to_odin_string :: proc(str: cstring) -> string {
  32. return string(str);
  33. }
  34. string_from_ptr :: proc(ptr: ^byte, len: int) -> string {
  35. return transmute(string)mem.Raw_String{ptr, len};
  36. }
  37. compare :: proc(lhs, rhs: string) -> int {
  38. return mem.compare(cast([]byte)lhs, cast([]byte)rhs);
  39. }
  40. contains_rune :: proc(s: string, r: rune) -> int {
  41. for c, offset in s {
  42. if c == r do return offset;
  43. }
  44. return -1;
  45. }
  46. contains :: proc(s, substr: string) -> bool {
  47. return index(s, substr) >= 0;
  48. }
  49. contains_any :: proc(s, chars: string) -> bool {
  50. return index_any(s, chars) >= 0;
  51. }
  52. rune_count :: proc(s: string) -> int {
  53. return utf8.rune_count_in_string(s);
  54. }
  55. equal_fold :: proc(s, t: string) -> bool {
  56. loop: for s != "" && t != "" {
  57. sr, tr: rune;
  58. if s[0] < utf8.RUNE_SELF {
  59. sr, s = rune(s[0]), s[1:];
  60. } else {
  61. r, size := utf8.decode_rune_in_string(s);
  62. sr, s = r, s[size:];
  63. }
  64. if t[0] < utf8.RUNE_SELF {
  65. tr, t = rune(t[0]), t[1:];
  66. } else {
  67. r, size := utf8.decode_rune_in_string(t);
  68. tr, t = r, t[size:];
  69. }
  70. if tr == sr { // easy case
  71. continue loop;
  72. }
  73. if tr < sr {
  74. tr, sr = sr, tr;
  75. }
  76. if tr < utf8.RUNE_SELF {
  77. switch sr {
  78. case 'A'..'Z':
  79. if tr == (sr+'a')-'A' {
  80. continue loop;
  81. }
  82. }
  83. return false;
  84. }
  85. // TODO(bill): Unicode folding
  86. return false;
  87. }
  88. return s == t;
  89. }
  90. has_prefix :: proc(s, prefix: string) -> bool {
  91. return len(s) >= len(prefix) && s[0:len(prefix)] == prefix;
  92. }
  93. has_suffix :: proc(s, suffix: string) -> bool {
  94. return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix;
  95. }
  96. join :: proc(a: []string, sep: string, allocator := context.allocator) -> string {
  97. if len(a) == 0 {
  98. return "";
  99. }
  100. n := len(sep) * (len(a) - 1);
  101. for s in a {
  102. n += len(s);
  103. }
  104. b := make([]byte, n, allocator);
  105. i := copy(b, cast([]byte)a[0]);
  106. for s in a[1:] {
  107. i += copy(b[i:], cast([]byte)sep);
  108. i += copy(b[i:], cast([]byte)s);
  109. }
  110. return string(b);
  111. }
  112. concatenate :: proc(a: []string, allocator := context.allocator) -> string {
  113. if len(a) == 0 {
  114. return "";
  115. }
  116. n := 0;
  117. for s in a {
  118. n += len(s);
  119. }
  120. b := make([]byte, n, allocator);
  121. i := 0;
  122. for s in a {
  123. i += copy(b[i:], cast([]byte)s);
  124. }
  125. return string(b);
  126. }
  127. index_byte :: proc(s: string, c: byte) -> int {
  128. for i := 0; i < len(s); i += 1 {
  129. if s[i] == c do return i;
  130. }
  131. return -1;
  132. }
  133. // Returns i1 if c is not present
  134. last_index_byte :: proc(s: string, c: byte) -> int {
  135. for i := len(s)-1; i >= 0; i -= 1 {
  136. if s[i] == c do return i;
  137. }
  138. return -1;
  139. }
  140. index :: proc(s, substr: string) -> int {
  141. n := len(substr);
  142. switch {
  143. case n == 0:
  144. return 0;
  145. case n == 1:
  146. return index_byte(s, substr[0]);
  147. case n == len(s):
  148. if s == substr {
  149. return 0;
  150. }
  151. return -1;
  152. case n > len(s):
  153. return -1;
  154. }
  155. for i := 0; i < len(s)-n+1; i += 1 {
  156. x := s[i:i+n];
  157. if x == substr {
  158. return i;
  159. }
  160. }
  161. return -1;
  162. }
  163. index_any :: proc(s, chars: string) -> int {
  164. if chars == "" {
  165. return -1;
  166. }
  167. // TODO(bill): Optimize
  168. for r, i in s {
  169. for c in chars {
  170. if r == c {
  171. return i;
  172. }
  173. }
  174. }
  175. return -1;
  176. }
  177. last_index_any :: proc(s, chars: string) -> int {
  178. if chars == "" {
  179. return -1;
  180. }
  181. for i := len(s); i > 0; {
  182. r, w := utf8.decode_last_rune_in_string(s[:i]);
  183. i -= w;
  184. for c in chars {
  185. if r == c {
  186. return i;
  187. }
  188. }
  189. }
  190. return -1;
  191. }
  192. count :: proc(s, substr: string) -> int {
  193. if len(substr) == 0 { // special case
  194. return rune_count(s) + 1;
  195. }
  196. if len(substr) == 1 {
  197. c := substr[0];
  198. switch len(s) {
  199. case 0:
  200. return 0;
  201. case 1:
  202. return int(s[0] == c);
  203. }
  204. n := 0;
  205. for i := 0; i < len(s); i += 1 {
  206. if s[i] == c {
  207. n += 1;
  208. }
  209. }
  210. return n;
  211. }
  212. // TODO(bill): Use a non-brute for approach
  213. n := 0;
  214. for {
  215. i := index(s, substr);
  216. if i == -1 {
  217. return n;
  218. }
  219. n += 1;
  220. s = s[i+len(substr):];
  221. }
  222. return n;
  223. }
  224. repeat :: proc(s: string, count: int, allocator := context.allocator) -> string {
  225. if count < 0 {
  226. panic("strings: negative repeat count");
  227. } else if count > 0 && (len(s)*count)/count != len(s) {
  228. panic("strings: repeat count will cause an overflow");
  229. }
  230. b := make([]byte, len(s)*count, allocator);
  231. i := copy(b, cast([]byte)s);
  232. for i < len(b) { // 2^N trick to reduce the need to copy
  233. copy(b[i:], b[:i]);
  234. i *= 2;
  235. }
  236. return string(b);
  237. }
  238. replace_all :: proc(s, old, new: string, allocator := context.allocator) -> (output: string, was_allocation: bool) {
  239. return replace(s, old, new, -1, allocator);
  240. }
  241. // if n < 0, no limit on the number of replacements
  242. replace :: proc(s, old, new: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) {
  243. if old == new || n == 0 {
  244. was_allocation = false;
  245. output = s;
  246. return;
  247. }
  248. if m := count(s, old); m == 0 {
  249. was_allocation = false;
  250. output = s;
  251. return;
  252. } else if n < 0 || m < n {
  253. n = m;
  254. }
  255. t := make([]byte, len(s) + n*(len(new) - len(old)), allocator);
  256. was_allocation = true;
  257. w := 0;
  258. start := 0;
  259. for i := 0; i < n; i += 1 {
  260. j := start;
  261. if len(old) == 0 {
  262. if i > 0 {
  263. _, width := utf8.decode_rune_in_string(s[start:]);
  264. j += width;
  265. }
  266. } else {
  267. j += index(s[start:], old);
  268. }
  269. w += copy(t[w:], cast([]byte)s[start:j]);
  270. w += copy(t[w:], cast([]byte)new);
  271. start = j + len(old);
  272. }
  273. w += copy(t[w:], cast([]byte)s[start:]);
  274. output = string(t[0:w]);
  275. return;
  276. }
  277. is_ascii_space :: proc(r: rune) -> bool {
  278. switch r {
  279. case '\t', '\n', '\v', '\f', '\r', ' ':
  280. return true;
  281. }
  282. return false;
  283. }
  284. is_space :: proc(r: rune) -> bool {
  285. if r < 0x2000 {
  286. switch r {
  287. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  288. return true;
  289. }
  290. } else {
  291. if r <= 0x200a {
  292. return true;
  293. }
  294. switch r {
  295. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  296. return true;
  297. }
  298. }
  299. return false;
  300. }
  301. is_null :: proc(r: rune) -> bool {
  302. return r == 0x0000;
  303. }
  304. index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
  305. for r, i in s {
  306. if p(r) == truth {
  307. return i;
  308. }
  309. }
  310. return -1;
  311. }
  312. index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  313. for r, i in s {
  314. if p(state, r) == truth {
  315. return i;
  316. }
  317. }
  318. return -1;
  319. }
  320. last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
  321. // TODO(bill): Probably use Rabin-Karp Search
  322. for i := len(s); i > 0; {
  323. r, size := utf8.decode_last_rune_in_string(s[:i]);
  324. i -= size;
  325. if p(r) == truth {
  326. return i;
  327. }
  328. }
  329. return -1;
  330. }
  331. last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  332. // TODO(bill): Probably use Rabin-Karp Search
  333. for i := len(s); i > 0; {
  334. r, size := utf8.decode_last_rune_in_string(s[:i]);
  335. i -= size;
  336. if p(state, r) == truth {
  337. return i;
  338. }
  339. }
  340. return -1;
  341. }
  342. trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
  343. i := index_proc(s, p, false);
  344. if i == -1 {
  345. return "";
  346. }
  347. return s[i:];
  348. }
  349. index_rune :: proc(s: string, r: rune) -> int {
  350. switch {
  351. case 0 <= r && r < utf8.RUNE_SELF:
  352. return index_byte(s, byte(r));
  353. case r == utf8.RUNE_ERROR:
  354. for c, i in s {
  355. if c == utf8.RUNE_ERROR {
  356. return i;
  357. }
  358. }
  359. return -1;
  360. case !utf8.valid_rune(r):
  361. return -1;
  362. }
  363. b, w := utf8.encode_rune(r);
  364. return index(s, string(b[:w]));
  365. }
  366. trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
  367. i := index_proc_with_state(s, p, state, false);
  368. if i == -1 {
  369. return "";
  370. }
  371. return s[i:];
  372. }
  373. trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
  374. i := last_index_proc(s, p, false);
  375. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  376. _, w := utf8.decode_rune_in_string(s[i:]);
  377. i += w;
  378. } else {
  379. i += 1;
  380. }
  381. return s[0:i];
  382. }
  383. trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
  384. i := last_index_proc_with_state(s, p, state, false);
  385. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  386. _, w := utf8.decode_rune_in_string(s[i:]);
  387. i += w;
  388. } else {
  389. i += 1;
  390. }
  391. return s[0:i];
  392. }
  393. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  394. if state == nil {
  395. return false;
  396. }
  397. cutset := (^string)(state)^;
  398. for c in cutset {
  399. if r == c {
  400. return true;
  401. }
  402. }
  403. return false;
  404. }
  405. trim_left :: proc(s: string, cutset: string) -> string {
  406. if s == "" || cutset == "" {
  407. return s;
  408. }
  409. return trim_left_proc_with_state(s, is_in_cutset, &cutset);
  410. }
  411. trim_right :: proc(s: string, cutset: string) -> string {
  412. if s == "" || cutset == "" {
  413. return s;
  414. }
  415. return trim_right_proc_with_state(s, is_in_cutset, &cutset);
  416. }
  417. trim :: proc(s: string, cutset: string) -> string {
  418. return trim_right(trim_left(s, cutset), cutset);
  419. }
  420. trim_left_space :: proc(s: string) -> string {
  421. return trim_left_proc(s, is_space);
  422. }
  423. trim_right_space :: proc(s: string) -> string {
  424. return trim_right_proc(s, is_space);
  425. }
  426. trim_space :: proc(s: string) -> string {
  427. return trim_right_space(trim_left_space(s));
  428. }
  429. trim_left_null :: proc(s: string) -> string {
  430. return trim_left_proc(s, is_null);
  431. }
  432. trim_right_null :: proc(s: string) -> string {
  433. return trim_right_proc(s, is_null);
  434. }
  435. trim_null :: proc(s: string) -> string {
  436. return trim_right_null(trim_left_null(s));
  437. }
  438. // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
  439. // Adjacent invalid bytes are only replaced once
  440. scrub :: proc(str: string, replacement: string, allocator := context.allocator) -> string {
  441. b := make_builder(allocator);;
  442. grow_builder(&b, len(str));
  443. has_error := false;
  444. cursor := 0;
  445. origin := str;
  446. for len(str) > 0 {
  447. r, w := utf8.decode_rune_in_string(str);
  448. if r == utf8.RUNE_ERROR {
  449. if !has_error {
  450. has_error = true;
  451. write_string(&b, origin[:cursor]);
  452. }
  453. } else if has_error {
  454. has_error = false;
  455. write_string(&b, replacement);
  456. origin = origin[cursor:];
  457. cursor = 0;
  458. }
  459. cursor += w;
  460. str = str[w:];
  461. }
  462. return to_string(b);
  463. }
  464. reverse :: proc(str: string, allocator := context.allocator) -> string {
  465. n := len(str);
  466. buf := make([]byte, n);
  467. i := 0;
  468. for len(str) > 0 {
  469. _, w := utf8.decode_rune_in_string(str);
  470. copy(buf[i:], cast([]byte)str[:w]);
  471. str = str[w:];
  472. }
  473. return string(buf);
  474. }
  475. expand_tabs :: proc(str: string, tab_size: int, allocator := context.allocator) -> string {
  476. if tab_size <= 0 {
  477. panic("tab size must be positive");
  478. }
  479. if str == "" {
  480. return "";
  481. }
  482. b := make_builder(allocator);
  483. column: int;
  484. for len(str) > 0 {
  485. r, w := utf8.decode_rune_in_string(str);
  486. if r == '\t' {
  487. expand := tab_size - column%tab_size;
  488. for i := 0; i < expand; i += 1 {
  489. write_byte(&b, ' ');
  490. }
  491. column += expand;
  492. } else {
  493. if r == '\n' {
  494. column = 0;
  495. } else {
  496. column += w;
  497. }
  498. write_rune(&b, r);
  499. }
  500. str = str[w:];
  501. }
  502. return to_string(b);
  503. }
  504. partition :: proc(str, sep: string) -> (head, match, tail: string) {
  505. i := index(str, sep);
  506. if i == -1 {
  507. head = str;
  508. return;
  509. }
  510. head = str[:i];
  511. match = str[i:i+len(sep)];
  512. tail = str[i+len(sep):];
  513. return;
  514. }
  515. center_justify :: centre_justify; // NOTE(bill): Because Americans exist
  516. // centre_justify returns a string with a pad string at boths sides if the str's rune length is smaller than length
  517. centre_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string {
  518. n := rune_count(str);
  519. if n >= length || pad == "" {
  520. return clone(str, allocator);
  521. }
  522. remains := length-1;
  523. pad_len := rune_count(pad);
  524. b := make_builder(allocator);
  525. grow_builder(&b, len(str) + (remains/pad_len + 1)*len(pad));
  526. write_pad_string(&b, pad, pad_len, remains/2);
  527. write_string(&b, str);
  528. write_pad_string(&b, pad, pad_len, (remains+1)/2);
  529. return to_string(b);
  530. }
  531. // left_justify returns a string with a pad string at left side if the str's rune length is smaller than length
  532. left_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string {
  533. n := rune_count(str);
  534. if n >= length || pad == "" {
  535. return clone(str, allocator);
  536. }
  537. remains := length-1;
  538. pad_len := rune_count(pad);
  539. b := make_builder(allocator);
  540. grow_builder(&b, len(str) + (remains/pad_len + 1)*len(pad));
  541. write_string(&b, str);
  542. write_pad_string(&b, pad, pad_len, remains);
  543. return to_string(b);
  544. }
  545. // right_justify returns a string with a pad string at right side if the str's rune length is smaller than length
  546. right_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string {
  547. n := rune_count(str);
  548. if n >= length || pad == "" {
  549. return clone(str, allocator);
  550. }
  551. remains := length-1;
  552. pad_len := rune_count(pad);
  553. b := make_builder(allocator);
  554. grow_builder(&b, len(str) + (remains/pad_len + 1)*len(pad));
  555. write_pad_string(&b, pad, pad_len, remains);
  556. write_string(&b, str);
  557. return to_string(b);
  558. }
  559. @private
  560. write_pad_string :: proc(b: ^Builder, pad: string, pad_len, remains: int) {
  561. repeats := remains / pad_len;
  562. for i := 0; i < repeats; i += 1 {
  563. write_string(b, pad);
  564. }
  565. remains = remains % pad_len;
  566. if remains != 0 do for i := 0; i < remains; i += 1 {
  567. r, w := utf8.decode_rune_in_string(pad);
  568. write_rune(b, r);
  569. pad = pad[w:];
  570. }
  571. }