bytes.odin 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. package bytes
  2. import "core:mem"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  6. c := make([]byte, len(s), allocator, loc)
  7. copy(c, s)
  8. return c[:len(s)]
  9. }
  10. ptr_from_slice :: proc(str: []byte) -> ^byte {
  11. d := transmute(mem.Raw_String)str
  12. return d.data
  13. }
  14. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  15. n := index_byte(str, b)
  16. if n < 0 {
  17. n = len(str)
  18. }
  19. return str[:n]
  20. }
  21. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  22. n := index_rune(str, r)
  23. if n < 0 {
  24. n = len(str)
  25. }
  26. return str[:n]
  27. }
  28. // Compares two strings, returning a value representing which one comes first lexiographically.
  29. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  30. compare :: proc(lhs, rhs: []byte) -> int {
  31. return mem.compare(lhs, rhs)
  32. }
  33. contains_rune :: proc(s: []byte, r: rune) -> int {
  34. for c, offset in string(s) {
  35. if c == r {
  36. return offset
  37. }
  38. }
  39. return -1
  40. }
  41. contains :: proc(s, substr: []byte) -> bool {
  42. return index(s, substr) >= 0
  43. }
  44. contains_any :: proc(s, chars: []byte) -> bool {
  45. return index_any(s, chars) >= 0
  46. }
  47. rune_count :: proc(s: []byte) -> int {
  48. return utf8.rune_count(s)
  49. }
  50. equal :: proc(a, b: []byte) -> bool {
  51. return string(a) == string(b)
  52. }
  53. equal_fold :: proc(u, v: []byte) -> bool {
  54. s, t := string(u), string(v)
  55. loop: for s != "" && t != "" {
  56. sr, tr: rune
  57. if s[0] < utf8.RUNE_SELF {
  58. sr, s = rune(s[0]), s[1:]
  59. } else {
  60. r, size := utf8.decode_rune_in_string(s)
  61. sr, s = r, s[size:]
  62. }
  63. if t[0] < utf8.RUNE_SELF {
  64. tr, t = rune(t[0]), t[1:]
  65. } else {
  66. r, size := utf8.decode_rune_in_string(t)
  67. tr, t = r, t[size:]
  68. }
  69. if tr == sr { // easy case
  70. continue loop
  71. }
  72. if tr < sr {
  73. tr, sr = sr, tr
  74. }
  75. if tr < utf8.RUNE_SELF {
  76. switch sr {
  77. case 'A'..='Z':
  78. if tr == (sr+'a')-'A' {
  79. continue loop
  80. }
  81. }
  82. return false
  83. }
  84. // TODO(bill): Unicode folding
  85. return false
  86. }
  87. return s == t
  88. }
  89. has_prefix :: proc(s, prefix: []byte) -> bool {
  90. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  91. }
  92. has_suffix :: proc(s, suffix: []byte) -> bool {
  93. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  94. }
  95. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  96. if len(a) == 0 {
  97. return nil
  98. }
  99. n := len(sep) * (len(a) - 1)
  100. for s in a {
  101. n += len(s)
  102. }
  103. b := make([]byte, n, allocator)
  104. i := copy(b, a[0])
  105. for s in a[1:] {
  106. i += copy(b[i:], sep)
  107. i += copy(b[i:], s)
  108. }
  109. return b
  110. }
  111. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  112. if len(a) == 0 {
  113. return nil
  114. }
  115. n := 0
  116. for s in a {
  117. n += len(s)
  118. }
  119. b := make([]byte, n, allocator)
  120. i := 0
  121. for s in a {
  122. i += copy(b[i:], s)
  123. }
  124. return b
  125. }
  126. @private
  127. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  128. s, n := s, n
  129. if n == 0 {
  130. return nil
  131. }
  132. if sep == nil {
  133. l := utf8.rune_count(s)
  134. if n < 0 || n > l {
  135. n = l
  136. }
  137. res := make([dynamic][]byte, n, allocator)
  138. for i := 0; i < n-1; i += 1 {
  139. _, w := utf8.decode_rune(s)
  140. res[i] = s[:w]
  141. s = s[w:]
  142. }
  143. if n > 0 {
  144. res[n-1] = s
  145. }
  146. return res[:]
  147. }
  148. if n < 0 {
  149. n = count(s, sep) + 1
  150. }
  151. res := make([dynamic][]byte, n, allocator)
  152. n -= 1
  153. i := 0
  154. for ; i < n; i += 1 {
  155. m := index(s, sep)
  156. if m < 0 {
  157. break
  158. }
  159. res[i] = s[:m+sep_save]
  160. s = s[m+len(sep):]
  161. }
  162. res[i] = s
  163. return res[:i+1]
  164. }
  165. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  166. return _split(s, sep, 0, -1, allocator)
  167. }
  168. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  169. return _split(s, sep, 0, n, allocator)
  170. }
  171. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  172. return _split(s, sep, len(sep), -1, allocator)
  173. }
  174. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  175. return _split(s, sep, len(sep), n, allocator)
  176. }
  177. @private
  178. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save: int) -> (res: []byte, ok: bool) {
  179. if len(sep) == 0 {
  180. res = s[:]
  181. ok = true
  182. s^ = s[len(s):]
  183. return
  184. }
  185. m := index(s^, sep)
  186. if m < 0 {
  187. // not found
  188. res = s[:]
  189. ok = len(res) != 0
  190. s^ = s[len(s):]
  191. } else {
  192. res = s[:m+sep_save]
  193. ok = true
  194. s^ = s[m+len(sep):]
  195. }
  196. return
  197. }
  198. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  199. return _split_iterator(s, sep, 0)
  200. }
  201. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  202. return _split_iterator(s, sep, len(sep))
  203. }
  204. index_byte :: proc(s: []byte, c: byte) -> int {
  205. for i := 0; i < len(s); i += 1 {
  206. if s[i] == c {
  207. return i
  208. }
  209. }
  210. return -1
  211. }
  212. // Returns -1 if c is not present
  213. last_index_byte :: proc(s: []byte, c: byte) -> int {
  214. for i := len(s)-1; i >= 0; i -= 1 {
  215. if s[i] == c {
  216. return i
  217. }
  218. }
  219. return -1
  220. }
  221. @private PRIME_RABIN_KARP :: 16777619
  222. index :: proc(s, substr: []byte) -> int {
  223. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  224. for i := 0; i < len(s); i += 1 {
  225. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  226. }
  227. sq := u32(PRIME_RABIN_KARP)
  228. for i := len(s); i > 0; i >>= 1 {
  229. if (i & 1) != 0 {
  230. pow *= sq
  231. }
  232. sq *= sq
  233. }
  234. return
  235. }
  236. n := len(substr)
  237. switch {
  238. case n == 0:
  239. return 0
  240. case n == 1:
  241. return index_byte(s, substr[0])
  242. case n == len(s):
  243. if string(s) == string(substr) {
  244. return 0
  245. }
  246. return -1
  247. case n > len(s):
  248. return -1
  249. }
  250. hash, pow := hash_str_rabin_karp(substr)
  251. h: u32
  252. for i := 0; i < n; i += 1 {
  253. h = h*PRIME_RABIN_KARP + u32(s[i])
  254. }
  255. if h == hash && string(s[:n]) == string(substr) {
  256. return 0
  257. }
  258. for i := n; i < len(s); /**/ {
  259. h *= PRIME_RABIN_KARP
  260. h += u32(s[i])
  261. h -= pow * u32(s[i-n])
  262. i += 1
  263. if h == hash && string(s[i-n:i]) == string(substr) {
  264. return i - n
  265. }
  266. }
  267. return -1
  268. }
  269. last_index :: proc(s, substr: []byte) -> int {
  270. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  271. for i := len(s) - 1; i >= 0; i -= 1 {
  272. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  273. }
  274. sq := u32(PRIME_RABIN_KARP)
  275. for i := len(s); i > 0; i >>= 1 {
  276. if (i & 1) != 0 {
  277. pow *= sq
  278. }
  279. sq *= sq
  280. }
  281. return
  282. }
  283. n := len(substr)
  284. switch {
  285. case n == 0:
  286. return len(s)
  287. case n == 1:
  288. return last_index_byte(s, substr[0])
  289. case n == len(s):
  290. return 0 if string(substr) == string(s) else -1
  291. case n > len(s):
  292. return -1
  293. }
  294. hash, pow := hash_str_rabin_karp_reverse(substr)
  295. last := len(s) - n
  296. h: u32
  297. for i := len(s)-1; i >= last; i -= 1 {
  298. h = h*PRIME_RABIN_KARP + u32(s[i])
  299. }
  300. if h == hash && string(s[last:]) == string(substr) {
  301. return last
  302. }
  303. for i := last-1; i >= 0; i -= 1 {
  304. h *= PRIME_RABIN_KARP
  305. h += u32(s[i])
  306. h -= pow * u32(s[i+n])
  307. if h == hash && string(s[i:i+n]) == string(substr) {
  308. return i
  309. }
  310. }
  311. return -1
  312. }
  313. index_any :: proc(s, chars: []byte) -> int {
  314. if chars == nil {
  315. return -1
  316. }
  317. // TODO(bill): Optimize
  318. for r, i in s {
  319. for c in chars {
  320. if r == c {
  321. return i
  322. }
  323. }
  324. }
  325. return -1
  326. }
  327. last_index_any :: proc(s, chars: []byte) -> int {
  328. if chars == nil {
  329. return -1
  330. }
  331. for i := len(s); i > 0; {
  332. r, w := utf8.decode_last_rune(s[:i])
  333. i -= w
  334. for c in string(chars) {
  335. if r == c {
  336. return i
  337. }
  338. }
  339. }
  340. return -1
  341. }
  342. count :: proc(s, substr: []byte) -> int {
  343. if len(substr) == 0 { // special case
  344. return rune_count(s) + 1
  345. }
  346. if len(substr) == 1 {
  347. c := substr[0]
  348. switch len(s) {
  349. case 0:
  350. return 0
  351. case 1:
  352. return int(s[0] == c)
  353. }
  354. n := 0
  355. for i := 0; i < len(s); i += 1 {
  356. if s[i] == c {
  357. n += 1
  358. }
  359. }
  360. return n
  361. }
  362. // TODO(bill): Use a non-brute for approach
  363. n := 0
  364. str := s
  365. for {
  366. i := index(str, substr)
  367. if i == -1 {
  368. return n
  369. }
  370. n += 1
  371. str = str[i+len(substr):]
  372. }
  373. return n
  374. }
  375. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  376. if count < 0 {
  377. panic("bytes: negative repeat count")
  378. } else if count > 0 && (len(s)*count)/count != len(s) {
  379. panic("bytes: repeat count will cause an overflow")
  380. }
  381. b := make([]byte, len(s)*count, allocator)
  382. i := copy(b, s)
  383. for i < len(b) { // 2^N trick to reduce the need to copy
  384. copy(b[i:], b[:i])
  385. i *= 2
  386. }
  387. return b
  388. }
  389. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  390. return replace(s, old, new, -1, allocator)
  391. }
  392. // if n < 0, no limit on the number of replacements
  393. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  394. if string(old) == string(new) || n == 0 {
  395. was_allocation = false
  396. output = s
  397. return
  398. }
  399. byte_count := n
  400. if m := count(s, old); m == 0 {
  401. was_allocation = false
  402. output = s
  403. return
  404. } else if n < 0 || m < n {
  405. byte_count = m
  406. }
  407. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  408. was_allocation = true
  409. w := 0
  410. start := 0
  411. for i := 0; i < byte_count; i += 1 {
  412. j := start
  413. if len(old) == 0 {
  414. if i > 0 {
  415. _, width := utf8.decode_rune(s[start:])
  416. j += width
  417. }
  418. } else {
  419. j += index(s[start:], old)
  420. }
  421. w += copy(t[w:], s[start:j])
  422. w += copy(t[w:], new)
  423. start = j + len(old)
  424. }
  425. w += copy(t[w:], s[start:])
  426. output = t[0:w]
  427. return
  428. }
  429. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  430. return replace(s, key, {}, n, allocator)
  431. }
  432. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  433. return remove(s, key, -1, allocator)
  434. }
  435. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  436. is_ascii_space :: proc(r: rune) -> bool {
  437. if r < utf8.RUNE_SELF {
  438. return _ascii_space[u8(r)] != 0
  439. }
  440. return false
  441. }
  442. is_space :: proc(r: rune) -> bool {
  443. if r < 0x2000 {
  444. switch r {
  445. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  446. return true
  447. }
  448. } else {
  449. if r <= 0x200a {
  450. return true
  451. }
  452. switch r {
  453. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  454. return true
  455. }
  456. }
  457. return false
  458. }
  459. is_null :: proc(r: rune) -> bool {
  460. return r == 0x0000
  461. }
  462. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  463. for r, i in string(s) {
  464. if p(r) == truth {
  465. return i
  466. }
  467. }
  468. return -1
  469. }
  470. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  471. for r, i in string(s) {
  472. if p(state, r) == truth {
  473. return i
  474. }
  475. }
  476. return -1
  477. }
  478. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  479. // TODO(bill): Probably use Rabin-Karp Search
  480. for i := len(s); i > 0; {
  481. r, size := utf8.decode_last_rune(s[:i])
  482. i -= size
  483. if p(r) == truth {
  484. return i
  485. }
  486. }
  487. return -1
  488. }
  489. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  490. // TODO(bill): Probably use Rabin-Karp Search
  491. for i := len(s); i > 0; {
  492. r, size := utf8.decode_last_rune(s[:i])
  493. i -= size
  494. if p(state, r) == truth {
  495. return i
  496. }
  497. }
  498. return -1
  499. }
  500. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  501. i := index_proc(s, p, false)
  502. if i == -1 {
  503. return nil
  504. }
  505. return s[i:]
  506. }
  507. index_rune :: proc(s: []byte, r: rune) -> int {
  508. switch {
  509. case 0 <= r && r < utf8.RUNE_SELF:
  510. return index_byte(s, byte(r))
  511. case r == utf8.RUNE_ERROR:
  512. for c, i in string(s) {
  513. if c == utf8.RUNE_ERROR {
  514. return i
  515. }
  516. }
  517. return -1
  518. case !utf8.valid_rune(r):
  519. return -1
  520. }
  521. b, w := utf8.encode_rune(r)
  522. return index(s, b[:w])
  523. }
  524. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  525. i := index_proc_with_state(s, p, state, false)
  526. if i == -1 {
  527. return nil
  528. }
  529. return s[i:]
  530. }
  531. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  532. i := last_index_proc(s, p, false)
  533. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  534. _, w := utf8.decode_rune(s[i:])
  535. i += w
  536. } else {
  537. i += 1
  538. }
  539. return s[0:i]
  540. }
  541. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  542. i := last_index_proc_with_state(s, p, state, false)
  543. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  544. _, w := utf8.decode_rune(s[i:])
  545. i += w
  546. } else {
  547. i += 1
  548. }
  549. return s[0:i]
  550. }
  551. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  552. if state == nil {
  553. return false
  554. }
  555. cutset := (^string)(state)^
  556. for c in cutset {
  557. if r == c {
  558. return true
  559. }
  560. }
  561. return false
  562. }
  563. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  564. if s == nil || cutset == nil {
  565. return s
  566. }
  567. state := cutset
  568. return trim_left_proc_with_state(s, is_in_cutset, &state)
  569. }
  570. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  571. if s == nil || cutset == nil {
  572. return s
  573. }
  574. state := cutset
  575. return trim_right_proc_with_state(s, is_in_cutset, &state)
  576. }
  577. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  578. return trim_right(trim_left(s, cutset), cutset)
  579. }
  580. trim_left_space :: proc(s: []byte) -> []byte {
  581. return trim_left_proc(s, is_space)
  582. }
  583. trim_right_space :: proc(s: []byte) -> []byte {
  584. return trim_right_proc(s, is_space)
  585. }
  586. trim_space :: proc(s: []byte) -> []byte {
  587. return trim_right_space(trim_left_space(s))
  588. }
  589. trim_left_null :: proc(s: []byte) -> []byte {
  590. return trim_left_proc(s, is_null)
  591. }
  592. trim_right_null :: proc(s: []byte) -> []byte {
  593. return trim_right_proc(s, is_null)
  594. }
  595. trim_null :: proc(s: []byte) -> []byte {
  596. return trim_right_null(trim_left_null(s))
  597. }
  598. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  599. if has_prefix(s, prefix) {
  600. return s[len(prefix):]
  601. }
  602. return s
  603. }
  604. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  605. if has_suffix(s, suffix) {
  606. return s[:len(s)-len(suffix)]
  607. }
  608. return s
  609. }
  610. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  611. if s == nil || len(substrs) <= 0 {
  612. return nil
  613. }
  614. sublen := len(substrs[0])
  615. for substr in substrs[1:] {
  616. sublen = min(sublen, len(substr))
  617. }
  618. shared := len(s) - sublen
  619. if shared <= 0 {
  620. return nil
  621. }
  622. // number, index, last
  623. n, i, l := 0, 0, 0
  624. // count results
  625. first_pass: for i <= shared {
  626. for substr in substrs {
  627. if string(s[i:i+sublen]) == string(substr) {
  628. if !skip_empty || i - l > 0 {
  629. n += 1
  630. }
  631. i += sublen
  632. l = i
  633. continue first_pass
  634. }
  635. }
  636. _, skip := utf8.decode_rune(s[i:])
  637. i += skip
  638. }
  639. if !skip_empty || len(s) - l > 0 {
  640. n += 1
  641. }
  642. if n < 1 {
  643. // no results
  644. return nil
  645. }
  646. buf := make([][]byte, n, allocator)
  647. n, i, l = 0, 0, 0
  648. // slice results
  649. second_pass: for i <= shared {
  650. for substr in substrs {
  651. if string(s[i:i+sublen]) == string(substr) {
  652. if !skip_empty || i - l > 0 {
  653. buf[n] = s[l:i]
  654. n += 1
  655. }
  656. i += sublen
  657. l = i
  658. continue second_pass
  659. }
  660. }
  661. _, skip := utf8.decode_rune(s[i:])
  662. i += skip
  663. }
  664. if !skip_empty || len(s) - l > 0 {
  665. buf[n] = s[l:]
  666. }
  667. return buf
  668. }
  669. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  670. if s == nil || s^ == nil || len(substrs) <= 0 {
  671. return nil, false
  672. }
  673. sublen := len(substrs[0])
  674. for substr in substrs[1:] {
  675. sublen = min(sublen, len(substr))
  676. }
  677. shared := len(s) - sublen
  678. if shared <= 0 {
  679. return nil, false
  680. }
  681. // index, last
  682. i, l := 0, 0
  683. loop: for i <= shared {
  684. for substr in substrs {
  685. if string(s[i:i+sublen]) == string(substr) {
  686. if !skip_empty || i - l > 0 {
  687. res := s[l:i]
  688. s^ = s[i:]
  689. return res, true
  690. }
  691. i += sublen
  692. l = i
  693. continue loop
  694. }
  695. }
  696. _, skip := utf8.decode_rune(s[i:])
  697. i += skip
  698. }
  699. if !skip_empty || len(s) - l > 0 {
  700. res := s[l:]
  701. s^ = s[len(s):]
  702. return res, true
  703. }
  704. return nil, false
  705. }
  706. // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
  707. // Adjacent invalid bytes are only replaced once
  708. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  709. str := s
  710. b: Buffer
  711. buffer_init_allocator(&b, 0, len(s), allocator)
  712. has_error := false
  713. cursor := 0
  714. origin := str
  715. for len(str) > 0 {
  716. r, w := utf8.decode_rune(str)
  717. if r == utf8.RUNE_ERROR {
  718. if !has_error {
  719. has_error = true
  720. buffer_write(&b, origin[:cursor])
  721. }
  722. } else if has_error {
  723. has_error = false
  724. buffer_write(&b, replacement)
  725. origin = origin[cursor:]
  726. cursor = 0
  727. }
  728. cursor += w
  729. str = str[w:]
  730. }
  731. return buffer_to_bytes(&b)
  732. }
  733. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  734. str := s
  735. n := len(str)
  736. buf := make([]byte, n)
  737. i := n
  738. for len(str) > 0 {
  739. _, w := utf8.decode_rune(str)
  740. i -= w
  741. copy(buf[i:], str[:w])
  742. str = str[w:]
  743. }
  744. return buf
  745. }
  746. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  747. if tab_size <= 0 {
  748. panic("tab size must be positive")
  749. }
  750. if s == nil {
  751. return nil
  752. }
  753. b: Buffer
  754. buffer_init_allocator(&b, 0, len(s), allocator)
  755. str := s
  756. column: int
  757. for len(str) > 0 {
  758. r, w := utf8.decode_rune(str)
  759. if r == '\t' {
  760. expand := tab_size - column%tab_size
  761. for i := 0; i < expand; i += 1 {
  762. buffer_write_byte(&b, ' ')
  763. }
  764. column += expand
  765. } else {
  766. if r == '\n' {
  767. column = 0
  768. } else {
  769. column += w
  770. }
  771. buffer_write_rune(&b, r)
  772. }
  773. str = str[w:]
  774. }
  775. return buffer_to_bytes(&b)
  776. }
  777. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  778. i := index(str, sep)
  779. if i == -1 {
  780. head = str
  781. return
  782. }
  783. head = str[:i]
  784. match = str[i:i+len(sep)]
  785. tail = str[i+len(sep):]
  786. return
  787. }
  788. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  789. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  790. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  791. n := rune_count(str)
  792. if n >= length || pad == nil {
  793. return clone(str, allocator)
  794. }
  795. remains := length-1
  796. pad_len := rune_count(pad)
  797. b: Buffer
  798. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  799. write_pad_string(&b, pad, pad_len, remains/2)
  800. buffer_write(&b, str)
  801. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  802. return buffer_to_bytes(&b)
  803. }
  804. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  805. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  806. n := rune_count(str)
  807. if n >= length || pad == nil {
  808. return clone(str, allocator)
  809. }
  810. remains := length-1
  811. pad_len := rune_count(pad)
  812. b: Buffer
  813. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  814. buffer_write(&b, str)
  815. write_pad_string(&b, pad, pad_len, remains)
  816. return buffer_to_bytes(&b)
  817. }
  818. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  819. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  820. n := rune_count(str)
  821. if n >= length || pad == nil {
  822. return clone(str, allocator)
  823. }
  824. remains := length-1
  825. pad_len := rune_count(pad)
  826. b: Buffer
  827. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  828. write_pad_string(&b, pad, pad_len, remains)
  829. buffer_write(&b, str)
  830. return buffer_to_bytes(&b)
  831. }
  832. @private
  833. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  834. repeats := remains / pad_len
  835. for i := 0; i < repeats; i += 1 {
  836. buffer_write(b, pad)
  837. }
  838. n := remains % pad_len
  839. p := pad
  840. for i := 0; i < n; i += 1 {
  841. r, width := utf8.decode_rune(p)
  842. buffer_write_rune(b, r)
  843. p = p[width:]
  844. }
  845. }
  846. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  847. // returning a slice of subslices of s or an empty slice if s only contains white space
  848. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  849. n := 0
  850. was_space := 1
  851. set_bits := u8(0)
  852. // check to see
  853. for i in 0..<len(s) {
  854. r := s[i]
  855. set_bits |= r
  856. is_space := int(_ascii_space[r])
  857. n += was_space & ~is_space
  858. was_space = is_space
  859. }
  860. if set_bits >= utf8.RUNE_SELF {
  861. return fields_proc(s, unicode.is_space, allocator)
  862. }
  863. if n == 0 {
  864. return nil
  865. }
  866. a := make([][]byte, n, allocator)
  867. na := 0
  868. field_start := 0
  869. i := 0
  870. for i < len(s) && _ascii_space[s[i]] != 0 {
  871. i += 1
  872. }
  873. field_start = i
  874. for i < len(s) {
  875. if _ascii_space[s[i]] == 0 {
  876. i += 1
  877. continue
  878. }
  879. a[na] = s[field_start : i]
  880. na += 1
  881. i += 1
  882. for i < len(s) && _ascii_space[s[i]] != 0 {
  883. i += 1
  884. }
  885. field_start = i
  886. }
  887. if field_start < len(s) {
  888. a[na] = s[field_start:]
  889. }
  890. return a
  891. }
  892. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  893. // returns a slice of subslices of s
  894. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  895. //
  896. // fields_proc makes no guarantee about the order in which it calls f(ch)
  897. // it assumes that `f` always returns the same value for a given ch
  898. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  899. subslices := make([dynamic][]byte, 0, 32, allocator)
  900. start, end := -1, -1
  901. for r, offset in string(s) {
  902. end = offset
  903. if f(r) {
  904. if start >= 0 {
  905. append(&subslices, s[start : end])
  906. // -1 could be used, but just speed it up through bitwise not
  907. // gotta love 2's complement
  908. start = ~start
  909. }
  910. } else {
  911. if start < 0 {
  912. start = end
  913. }
  914. }
  915. }
  916. if start >= 0 {
  917. append(&subslices, s[start : len(s)])
  918. }
  919. return subslices[:]
  920. }