bytes.odin 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149
  1. package bytes
  2. import "core:mem"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  6. c := make([]byte, len(s), allocator, loc)
  7. copy(c, s)
  8. return c[:len(s)]
  9. }
  10. ptr_from_slice :: proc(str: []byte) -> ^byte {
  11. d := transmute(mem.Raw_String)str
  12. return d.data
  13. }
  14. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  15. n := index_byte(str, b)
  16. if n < 0 {
  17. n = len(str)
  18. }
  19. return str[:n]
  20. }
  21. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  22. n := index_rune(str, r)
  23. if n < 0 {
  24. n = len(str)
  25. }
  26. return str[:n]
  27. }
  28. // Compares two strings, returning a value representing which one comes first lexiographically.
  29. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  30. compare :: proc(lhs, rhs: []byte) -> int {
  31. return mem.compare(lhs, rhs)
  32. }
  33. contains_rune :: proc(s: []byte, r: rune) -> int {
  34. for c, offset in string(s) {
  35. if c == r {
  36. return offset
  37. }
  38. }
  39. return -1
  40. }
  41. contains :: proc(s, substr: []byte) -> bool {
  42. return index(s, substr) >= 0
  43. }
  44. contains_any :: proc(s, chars: []byte) -> bool {
  45. return index_any(s, chars) >= 0
  46. }
  47. rune_count :: proc(s: []byte) -> int {
  48. return utf8.rune_count(s)
  49. }
  50. equal :: proc(a, b: []byte) -> bool {
  51. return string(a) == string(b)
  52. }
  53. equal_fold :: proc(u, v: []byte) -> bool {
  54. s, t := string(u), string(v)
  55. loop: for s != "" && t != "" {
  56. sr, tr: rune
  57. if s[0] < utf8.RUNE_SELF {
  58. sr, s = rune(s[0]), s[1:]
  59. } else {
  60. r, size := utf8.decode_rune_in_string(s)
  61. sr, s = r, s[size:]
  62. }
  63. if t[0] < utf8.RUNE_SELF {
  64. tr, t = rune(t[0]), t[1:]
  65. } else {
  66. r, size := utf8.decode_rune_in_string(t)
  67. tr, t = r, t[size:]
  68. }
  69. if tr == sr { // easy case
  70. continue loop
  71. }
  72. if tr < sr {
  73. tr, sr = sr, tr
  74. }
  75. if tr < utf8.RUNE_SELF {
  76. switch sr {
  77. case 'A'..='Z':
  78. if tr == (sr+'a')-'A' {
  79. continue loop
  80. }
  81. }
  82. return false
  83. }
  84. // TODO(bill): Unicode folding
  85. return false
  86. }
  87. return s == t
  88. }
  89. has_prefix :: proc(s, prefix: []byte) -> bool {
  90. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  91. }
  92. has_suffix :: proc(s, suffix: []byte) -> bool {
  93. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  94. }
  95. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  96. if len(a) == 0 {
  97. return nil
  98. }
  99. n := len(sep) * (len(a) - 1)
  100. for s in a {
  101. n += len(s)
  102. }
  103. b := make([]byte, n, allocator)
  104. i := copy(b, a[0])
  105. for s in a[1:] {
  106. i += copy(b[i:], sep)
  107. i += copy(b[i:], s)
  108. }
  109. return b
  110. }
  111. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  112. if len(a) == 0 {
  113. return nil
  114. }
  115. n := 0
  116. for s in a {
  117. n += len(s)
  118. }
  119. b := make([]byte, n, allocator)
  120. i := 0
  121. for s in a {
  122. i += copy(b[i:], s)
  123. }
  124. return b
  125. }
  126. @private
  127. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  128. s, n := s, n
  129. if n == 0 {
  130. return nil
  131. }
  132. if sep == nil {
  133. l := utf8.rune_count(s)
  134. if n < 0 || n > l {
  135. n = l
  136. }
  137. res := make([dynamic][]byte, n, allocator)
  138. for i := 0; i < n-1; i += 1 {
  139. _, w := utf8.decode_rune(s)
  140. res[i] = s[:w]
  141. s = s[w:]
  142. }
  143. if n > 0 {
  144. res[n-1] = s
  145. }
  146. return res[:]
  147. }
  148. if n < 0 {
  149. n = count(s, sep) + 1
  150. }
  151. res := make([dynamic][]byte, n, allocator)
  152. n -= 1
  153. i := 0
  154. for ; i < n; i += 1 {
  155. m := index(s, sep)
  156. if m < 0 {
  157. break
  158. }
  159. res[i] = s[:m+sep_save]
  160. s = s[m+len(sep):]
  161. }
  162. res[i] = s
  163. return res[:i+1]
  164. }
  165. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  166. return _split(s, sep, 0, -1, allocator)
  167. }
  168. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  169. return _split(s, sep, 0, n, allocator)
  170. }
  171. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  172. return _split(s, sep, len(sep), -1, allocator)
  173. }
  174. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  175. return _split(s, sep, len(sep), n, allocator)
  176. }
  177. @private
  178. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save, n: int) -> (res: []byte, ok: bool) {
  179. s, n := s, n
  180. if n == 0 {
  181. return
  182. }
  183. if sep == nil {
  184. res = s[:]
  185. ok = true
  186. s^ = s[len(s):]
  187. return
  188. }
  189. if n < 0 {
  190. n = count(s^, sep) + 1
  191. }
  192. n -= 1
  193. i := 0
  194. for ; i < n; i += 1 {
  195. m := index(s^, sep)
  196. if m < 0 {
  197. break
  198. }
  199. res = s[:m+sep_save]
  200. ok = true
  201. s^ = s[m+len(sep):]
  202. return
  203. }
  204. res = s[:]
  205. ok = res != nil
  206. s^ = s[len(s):]
  207. return
  208. }
  209. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  210. return _split_iterator(s, sep, 0, -1)
  211. }
  212. split_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) {
  213. return _split_iterator(s, sep, 0, n)
  214. }
  215. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  216. return _split_iterator(s, sep, len(sep), -1)
  217. }
  218. split_after_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) {
  219. return _split_iterator(s, sep, len(sep), n)
  220. }
  221. index_byte :: proc(s: []byte, c: byte) -> int {
  222. for i := 0; i < len(s); i += 1 {
  223. if s[i] == c {
  224. return i
  225. }
  226. }
  227. return -1
  228. }
  229. // Returns -1 if c is not present
  230. last_index_byte :: proc(s: []byte, c: byte) -> int {
  231. for i := len(s)-1; i >= 0; i -= 1 {
  232. if s[i] == c {
  233. return i
  234. }
  235. }
  236. return -1
  237. }
  238. @private PRIME_RABIN_KARP :: 16777619
  239. index :: proc(s, substr: []byte) -> int {
  240. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  241. for i := 0; i < len(s); i += 1 {
  242. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  243. }
  244. sq := u32(PRIME_RABIN_KARP)
  245. for i := len(s); i > 0; i >>= 1 {
  246. if (i & 1) != 0 {
  247. pow *= sq
  248. }
  249. sq *= sq
  250. }
  251. return
  252. }
  253. n := len(substr)
  254. switch {
  255. case n == 0:
  256. return 0
  257. case n == 1:
  258. return index_byte(s, substr[0])
  259. case n == len(s):
  260. if string(s) == string(substr) {
  261. return 0
  262. }
  263. return -1
  264. case n > len(s):
  265. return -1
  266. }
  267. hash, pow := hash_str_rabin_karp(substr)
  268. h: u32
  269. for i := 0; i < n; i += 1 {
  270. h = h*PRIME_RABIN_KARP + u32(s[i])
  271. }
  272. if h == hash && string(s[:n]) == string(substr) {
  273. return 0
  274. }
  275. for i := n; i < len(s); /**/ {
  276. h *= PRIME_RABIN_KARP
  277. h += u32(s[i])
  278. h -= pow * u32(s[i-n])
  279. i += 1
  280. if h == hash && string(s[i-n:i]) == string(substr) {
  281. return i - n
  282. }
  283. }
  284. return -1
  285. }
  286. last_index :: proc(s, substr: []byte) -> int {
  287. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  288. for i := len(s) - 1; i >= 0; i -= 1 {
  289. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  290. }
  291. sq := u32(PRIME_RABIN_KARP)
  292. for i := len(s); i > 0; i >>= 1 {
  293. if (i & 1) != 0 {
  294. pow *= sq
  295. }
  296. sq *= sq
  297. }
  298. return
  299. }
  300. n := len(substr)
  301. switch {
  302. case n == 0:
  303. return len(s)
  304. case n == 1:
  305. return last_index_byte(s, substr[0])
  306. case n == len(s):
  307. return 0 if string(substr) == string(s) else -1
  308. case n > len(s):
  309. return -1
  310. }
  311. hash, pow := hash_str_rabin_karp_reverse(substr)
  312. last := len(s) - n
  313. h: u32
  314. for i := len(s)-1; i >= last; i -= 1 {
  315. h = h*PRIME_RABIN_KARP + u32(s[i])
  316. }
  317. if h == hash && string(s[last:]) == string(substr) {
  318. return last
  319. }
  320. for i := last-1; i >= 0; i -= 1 {
  321. h *= PRIME_RABIN_KARP
  322. h += u32(s[i])
  323. h -= pow * u32(s[i+n])
  324. if h == hash && string(s[i:i+n]) == string(substr) {
  325. return i
  326. }
  327. }
  328. return -1
  329. }
  330. index_any :: proc(s, chars: []byte) -> int {
  331. if chars == nil {
  332. return -1
  333. }
  334. // TODO(bill): Optimize
  335. for r, i in s {
  336. for c in chars {
  337. if r == c {
  338. return i
  339. }
  340. }
  341. }
  342. return -1
  343. }
  344. last_index_any :: proc(s, chars: []byte) -> int {
  345. if chars == nil {
  346. return -1
  347. }
  348. for i := len(s); i > 0; {
  349. r, w := utf8.decode_last_rune(s[:i])
  350. i -= w
  351. for c in string(chars) {
  352. if r == c {
  353. return i
  354. }
  355. }
  356. }
  357. return -1
  358. }
  359. count :: proc(s, substr: []byte) -> int {
  360. if len(substr) == 0 { // special case
  361. return rune_count(s) + 1
  362. }
  363. if len(substr) == 1 {
  364. c := substr[0]
  365. switch len(s) {
  366. case 0:
  367. return 0
  368. case 1:
  369. return int(s[0] == c)
  370. }
  371. n := 0
  372. for i := 0; i < len(s); i += 1 {
  373. if s[i] == c {
  374. n += 1
  375. }
  376. }
  377. return n
  378. }
  379. // TODO(bill): Use a non-brute for approach
  380. n := 0
  381. str := s
  382. for {
  383. i := index(str, substr)
  384. if i == -1 {
  385. return n
  386. }
  387. n += 1
  388. str = str[i+len(substr):]
  389. }
  390. return n
  391. }
  392. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  393. if count < 0 {
  394. panic("bytes: negative repeat count")
  395. } else if count > 0 && (len(s)*count)/count != len(s) {
  396. panic("bytes: repeat count will cause an overflow")
  397. }
  398. b := make([]byte, len(s)*count, allocator)
  399. i := copy(b, s)
  400. for i < len(b) { // 2^N trick to reduce the need to copy
  401. copy(b[i:], b[:i])
  402. i *= 2
  403. }
  404. return b
  405. }
  406. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  407. return replace(s, old, new, -1, allocator)
  408. }
  409. // if n < 0, no limit on the number of replacements
  410. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  411. if string(old) == string(new) || n == 0 {
  412. was_allocation = false
  413. output = s
  414. return
  415. }
  416. byte_count := n
  417. if m := count(s, old); m == 0 {
  418. was_allocation = false
  419. output = s
  420. return
  421. } else if n < 0 || m < n {
  422. byte_count = m
  423. }
  424. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  425. was_allocation = true
  426. w := 0
  427. start := 0
  428. for i := 0; i < byte_count; i += 1 {
  429. j := start
  430. if len(old) == 0 {
  431. if i > 0 {
  432. _, width := utf8.decode_rune(s[start:])
  433. j += width
  434. }
  435. } else {
  436. j += index(s[start:], old)
  437. }
  438. w += copy(t[w:], s[start:j])
  439. w += copy(t[w:], new)
  440. start = j + len(old)
  441. }
  442. w += copy(t[w:], s[start:])
  443. output = t[0:w]
  444. return
  445. }
  446. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  447. return replace(s, key, {}, n, allocator)
  448. }
  449. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  450. return remove(s, key, -1, allocator)
  451. }
  452. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  453. is_ascii_space :: proc(r: rune) -> bool {
  454. if r < utf8.RUNE_SELF {
  455. return _ascii_space[u8(r)] != 0
  456. }
  457. return false
  458. }
  459. is_space :: proc(r: rune) -> bool {
  460. if r < 0x2000 {
  461. switch r {
  462. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  463. return true
  464. }
  465. } else {
  466. if r <= 0x200a {
  467. return true
  468. }
  469. switch r {
  470. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  471. return true
  472. }
  473. }
  474. return false
  475. }
  476. is_null :: proc(r: rune) -> bool {
  477. return r == 0x0000
  478. }
  479. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  480. for r, i in string(s) {
  481. if p(r) == truth {
  482. return i
  483. }
  484. }
  485. return -1
  486. }
  487. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  488. for r, i in string(s) {
  489. if p(state, r) == truth {
  490. return i
  491. }
  492. }
  493. return -1
  494. }
  495. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  496. // TODO(bill): Probably use Rabin-Karp Search
  497. for i := len(s); i > 0; {
  498. r, size := utf8.decode_last_rune(s[:i])
  499. i -= size
  500. if p(r) == truth {
  501. return i
  502. }
  503. }
  504. return -1
  505. }
  506. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  507. // TODO(bill): Probably use Rabin-Karp Search
  508. for i := len(s); i > 0; {
  509. r, size := utf8.decode_last_rune(s[:i])
  510. i -= size
  511. if p(state, r) == truth {
  512. return i
  513. }
  514. }
  515. return -1
  516. }
  517. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  518. i := index_proc(s, p, false)
  519. if i == -1 {
  520. return nil
  521. }
  522. return s[i:]
  523. }
  524. index_rune :: proc(s: []byte, r: rune) -> int {
  525. switch {
  526. case 0 <= r && r < utf8.RUNE_SELF:
  527. return index_byte(s, byte(r))
  528. case r == utf8.RUNE_ERROR:
  529. for c, i in string(s) {
  530. if c == utf8.RUNE_ERROR {
  531. return i
  532. }
  533. }
  534. return -1
  535. case !utf8.valid_rune(r):
  536. return -1
  537. }
  538. b, w := utf8.encode_rune(r)
  539. return index(s, b[:w])
  540. }
  541. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  542. i := index_proc_with_state(s, p, state, false)
  543. if i == -1 {
  544. return nil
  545. }
  546. return s[i:]
  547. }
  548. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  549. i := last_index_proc(s, p, false)
  550. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  551. _, w := utf8.decode_rune(s[i:])
  552. i += w
  553. } else {
  554. i += 1
  555. }
  556. return s[0:i]
  557. }
  558. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  559. i := last_index_proc_with_state(s, p, state, false)
  560. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  561. _, w := utf8.decode_rune(s[i:])
  562. i += w
  563. } else {
  564. i += 1
  565. }
  566. return s[0:i]
  567. }
  568. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  569. if state == nil {
  570. return false
  571. }
  572. cutset := (^string)(state)^
  573. for c in cutset {
  574. if r == c {
  575. return true
  576. }
  577. }
  578. return false
  579. }
  580. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  581. if s == nil || cutset == nil {
  582. return s
  583. }
  584. state := cutset
  585. return trim_left_proc_with_state(s, is_in_cutset, &state)
  586. }
  587. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  588. if s == nil || cutset == nil {
  589. return s
  590. }
  591. state := cutset
  592. return trim_right_proc_with_state(s, is_in_cutset, &state)
  593. }
  594. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  595. return trim_right(trim_left(s, cutset), cutset)
  596. }
  597. trim_left_space :: proc(s: []byte) -> []byte {
  598. return trim_left_proc(s, is_space)
  599. }
  600. trim_right_space :: proc(s: []byte) -> []byte {
  601. return trim_right_proc(s, is_space)
  602. }
  603. trim_space :: proc(s: []byte) -> []byte {
  604. return trim_right_space(trim_left_space(s))
  605. }
  606. trim_left_null :: proc(s: []byte) -> []byte {
  607. return trim_left_proc(s, is_null)
  608. }
  609. trim_right_null :: proc(s: []byte) -> []byte {
  610. return trim_right_proc(s, is_null)
  611. }
  612. trim_null :: proc(s: []byte) -> []byte {
  613. return trim_right_null(trim_left_null(s))
  614. }
  615. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  616. if has_prefix(s, prefix) {
  617. return s[len(prefix):]
  618. }
  619. return s
  620. }
  621. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  622. if has_suffix(s, suffix) {
  623. return s[:len(s)-len(suffix)]
  624. }
  625. return s
  626. }
  627. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  628. if s == nil || len(substrs) <= 0 {
  629. return nil
  630. }
  631. sublen := len(substrs[0])
  632. for substr in substrs[1:] {
  633. sublen = min(sublen, len(substr))
  634. }
  635. shared := len(s) - sublen
  636. if shared <= 0 {
  637. return nil
  638. }
  639. // number, index, last
  640. n, i, l := 0, 0, 0
  641. // count results
  642. first_pass: for i <= shared {
  643. for substr in substrs {
  644. if string(s[i:i+sublen]) == string(substr) {
  645. if !skip_empty || i - l > 0 {
  646. n += 1
  647. }
  648. i += sublen
  649. l = i
  650. continue first_pass
  651. }
  652. }
  653. _, skip := utf8.decode_rune(s[i:])
  654. i += skip
  655. }
  656. if !skip_empty || len(s) - l > 0 {
  657. n += 1
  658. }
  659. if n < 1 {
  660. // no results
  661. return nil
  662. }
  663. buf := make([][]byte, n, allocator)
  664. n, i, l = 0, 0, 0
  665. // slice results
  666. second_pass: for i <= shared {
  667. for substr in substrs {
  668. if string(s[i:i+sublen]) == string(substr) {
  669. if !skip_empty || i - l > 0 {
  670. buf[n] = s[l:i]
  671. n += 1
  672. }
  673. i += sublen
  674. l = i
  675. continue second_pass
  676. }
  677. }
  678. _, skip := utf8.decode_rune(s[i:])
  679. i += skip
  680. }
  681. if !skip_empty || len(s) - l > 0 {
  682. buf[n] = s[l:]
  683. }
  684. return buf
  685. }
  686. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  687. if s == nil || s^ == nil || len(substrs) <= 0 {
  688. return nil, false
  689. }
  690. sublen := len(substrs[0])
  691. for substr in substrs[1:] {
  692. sublen = min(sublen, len(substr))
  693. }
  694. shared := len(s) - sublen
  695. if shared <= 0 {
  696. return nil, false
  697. }
  698. // index, last
  699. i, l := 0, 0
  700. loop: for i <= shared {
  701. for substr in substrs {
  702. if string(s[i:i+sublen]) == string(substr) {
  703. if !skip_empty || i - l > 0 {
  704. res := s[l:i]
  705. s^ = s[i:]
  706. return res, true
  707. }
  708. i += sublen
  709. l = i
  710. continue loop
  711. }
  712. }
  713. _, skip := utf8.decode_rune(s[i:])
  714. i += skip
  715. }
  716. if !skip_empty || len(s) - l > 0 {
  717. res := s[l:]
  718. s^ = s[len(s):]
  719. return res, true
  720. }
  721. return nil, false
  722. }
  723. // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
  724. // Adjacent invalid bytes are only replaced once
  725. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  726. str := s
  727. b: Buffer
  728. buffer_init_allocator(&b, 0, len(s), allocator)
  729. has_error := false
  730. cursor := 0
  731. origin := str
  732. for len(str) > 0 {
  733. r, w := utf8.decode_rune(str)
  734. if r == utf8.RUNE_ERROR {
  735. if !has_error {
  736. has_error = true
  737. buffer_write(&b, origin[:cursor])
  738. }
  739. } else if has_error {
  740. has_error = false
  741. buffer_write(&b, replacement)
  742. origin = origin[cursor:]
  743. cursor = 0
  744. }
  745. cursor += w
  746. str = str[w:]
  747. }
  748. return buffer_to_bytes(&b)
  749. }
  750. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  751. str := s
  752. n := len(str)
  753. buf := make([]byte, n)
  754. i := n
  755. for len(str) > 0 {
  756. _, w := utf8.decode_rune(str)
  757. i -= w
  758. copy(buf[i:], str[:w])
  759. str = str[w:]
  760. }
  761. return buf
  762. }
  763. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  764. if tab_size <= 0 {
  765. panic("tab size must be positive")
  766. }
  767. if s == nil {
  768. return nil
  769. }
  770. b: Buffer
  771. buffer_init_allocator(&b, 0, len(s), allocator)
  772. str := s
  773. column: int
  774. for len(str) > 0 {
  775. r, w := utf8.decode_rune(str)
  776. if r == '\t' {
  777. expand := tab_size - column%tab_size
  778. for i := 0; i < expand; i += 1 {
  779. buffer_write_byte(&b, ' ')
  780. }
  781. column += expand
  782. } else {
  783. if r == '\n' {
  784. column = 0
  785. } else {
  786. column += w
  787. }
  788. buffer_write_rune(&b, r)
  789. }
  790. str = str[w:]
  791. }
  792. return buffer_to_bytes(&b)
  793. }
  794. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  795. i := index(str, sep)
  796. if i == -1 {
  797. head = str
  798. return
  799. }
  800. head = str[:i]
  801. match = str[i:i+len(sep)]
  802. tail = str[i+len(sep):]
  803. return
  804. }
  805. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  806. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  807. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  808. n := rune_count(str)
  809. if n >= length || pad == nil {
  810. return clone(str, allocator)
  811. }
  812. remains := length-1
  813. pad_len := rune_count(pad)
  814. b: Buffer
  815. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  816. write_pad_string(&b, pad, pad_len, remains/2)
  817. buffer_write(&b, str)
  818. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  819. return buffer_to_bytes(&b)
  820. }
  821. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  822. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  823. n := rune_count(str)
  824. if n >= length || pad == nil {
  825. return clone(str, allocator)
  826. }
  827. remains := length-1
  828. pad_len := rune_count(pad)
  829. b: Buffer
  830. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  831. buffer_write(&b, str)
  832. write_pad_string(&b, pad, pad_len, remains)
  833. return buffer_to_bytes(&b)
  834. }
  835. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  836. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  837. n := rune_count(str)
  838. if n >= length || pad == nil {
  839. return clone(str, allocator)
  840. }
  841. remains := length-1
  842. pad_len := rune_count(pad)
  843. b: Buffer
  844. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  845. write_pad_string(&b, pad, pad_len, remains)
  846. buffer_write(&b, str)
  847. return buffer_to_bytes(&b)
  848. }
  849. @private
  850. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  851. repeats := remains / pad_len
  852. for i := 0; i < repeats; i += 1 {
  853. buffer_write(b, pad)
  854. }
  855. n := remains % pad_len
  856. p := pad
  857. for i := 0; i < n; i += 1 {
  858. r, width := utf8.decode_rune(p)
  859. buffer_write_rune(b, r)
  860. p = p[width:]
  861. }
  862. }
  863. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  864. // returning a slice of subslices of s or an empty slice if s only contains white space
  865. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  866. n := 0
  867. was_space := 1
  868. set_bits := u8(0)
  869. // check to see
  870. for i in 0..<len(s) {
  871. r := s[i]
  872. set_bits |= r
  873. is_space := int(_ascii_space[r])
  874. n += was_space & ~is_space
  875. was_space = is_space
  876. }
  877. if set_bits >= utf8.RUNE_SELF {
  878. return fields_proc(s, unicode.is_space, allocator)
  879. }
  880. if n == 0 {
  881. return nil
  882. }
  883. a := make([][]byte, n, allocator)
  884. na := 0
  885. field_start := 0
  886. i := 0
  887. for i < len(s) && _ascii_space[s[i]] != 0 {
  888. i += 1
  889. }
  890. field_start = i
  891. for i < len(s) {
  892. if _ascii_space[s[i]] == 0 {
  893. i += 1
  894. continue
  895. }
  896. a[na] = s[field_start : i]
  897. na += 1
  898. i += 1
  899. for i < len(s) && _ascii_space[s[i]] != 0 {
  900. i += 1
  901. }
  902. field_start = i
  903. }
  904. if field_start < len(s) {
  905. a[na] = s[field_start:]
  906. }
  907. return a
  908. }
  909. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  910. // returns a slice of subslices of s
  911. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  912. //
  913. // fields_proc makes no guarantee about the order in which it calls f(ch)
  914. // it assumes that `f` always returns the same value for a given ch
  915. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  916. subslices := make([dynamic][]byte, 0, 32, allocator)
  917. start, end := -1, -1
  918. for r, offset in string(s) {
  919. end = offset
  920. if f(r) {
  921. if start >= 0 {
  922. append(&subslices, s[start : end])
  923. // -1 could be used, but just speed it up through bitwise not
  924. // gotta love 2's complement
  925. start = ~start
  926. }
  927. } else {
  928. if start < 0 {
  929. start = end
  930. }
  931. }
  932. }
  933. if start >= 0 {
  934. append(&subslices, s[start : len(s)])
  935. }
  936. return subslices[:]
  937. }