bytes.odin 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150
  1. package bytes
  2. import "core:mem"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  6. c := make([]byte, len(s)+1, allocator, loc)
  7. copy(c, s)
  8. c[len(s)] = 0
  9. return c[:len(s)]
  10. }
  11. ptr_from_slice :: proc(str: []byte) -> ^byte {
  12. d := transmute(mem.Raw_String)str
  13. return d.data
  14. }
  15. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  16. n := index_byte(str, b)
  17. if n < 0 {
  18. n = len(str)
  19. }
  20. return str[:n]
  21. }
  22. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  23. n := index_rune(str, r)
  24. if n < 0 {
  25. n = len(str)
  26. }
  27. return str[:n]
  28. }
  29. // Compares two strings, returning a value representing which one comes first lexiographically.
  30. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  31. compare :: proc(lhs, rhs: []byte) -> int {
  32. return mem.compare(lhs, rhs)
  33. }
  34. contains_rune :: proc(s: []byte, r: rune) -> int {
  35. for c, offset in string(s) {
  36. if c == r {
  37. return offset
  38. }
  39. }
  40. return -1
  41. }
  42. contains :: proc(s, substr: []byte) -> bool {
  43. return index(s, substr) >= 0
  44. }
  45. contains_any :: proc(s, chars: []byte) -> bool {
  46. return index_any(s, chars) >= 0
  47. }
  48. rune_count :: proc(s: []byte) -> int {
  49. return utf8.rune_count(s)
  50. }
  51. equal :: proc(a, b: []byte) -> bool {
  52. return string(a) == string(b)
  53. }
  54. equal_fold :: proc(u, v: []byte) -> bool {
  55. s, t := string(u), string(v)
  56. loop: for s != "" && t != "" {
  57. sr, tr: rune
  58. if s[0] < utf8.RUNE_SELF {
  59. sr, s = rune(s[0]), s[1:]
  60. } else {
  61. r, size := utf8.decode_rune_in_string(s)
  62. sr, s = r, s[size:]
  63. }
  64. if t[0] < utf8.RUNE_SELF {
  65. tr, t = rune(t[0]), t[1:]
  66. } else {
  67. r, size := utf8.decode_rune_in_string(t)
  68. tr, t = r, t[size:]
  69. }
  70. if tr == sr { // easy case
  71. continue loop
  72. }
  73. if tr < sr {
  74. tr, sr = sr, tr
  75. }
  76. if tr < utf8.RUNE_SELF {
  77. switch sr {
  78. case 'A'..='Z':
  79. if tr == (sr+'a')-'A' {
  80. continue loop
  81. }
  82. }
  83. return false
  84. }
  85. // TODO(bill): Unicode folding
  86. return false
  87. }
  88. return s == t
  89. }
  90. has_prefix :: proc(s, prefix: []byte) -> bool {
  91. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  92. }
  93. has_suffix :: proc(s, suffix: []byte) -> bool {
  94. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  95. }
  96. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  97. if len(a) == 0 {
  98. return nil
  99. }
  100. n := len(sep) * (len(a) - 1)
  101. for s in a {
  102. n += len(s)
  103. }
  104. b := make([]byte, n, allocator)
  105. i := copy(b, a[0])
  106. for s in a[1:] {
  107. i += copy(b[i:], sep)
  108. i += copy(b[i:], s)
  109. }
  110. return b
  111. }
  112. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  113. if len(a) == 0 {
  114. return nil
  115. }
  116. n := 0
  117. for s in a {
  118. n += len(s)
  119. }
  120. b := make([]byte, n, allocator)
  121. i := 0
  122. for s in a {
  123. i += copy(b[i:], s)
  124. }
  125. return b
  126. }
  127. @private
  128. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  129. s, n := s, n
  130. if n == 0 {
  131. return nil
  132. }
  133. if sep == nil {
  134. l := utf8.rune_count(s)
  135. if n < 0 || n > l {
  136. n = l
  137. }
  138. res := make([dynamic][]byte, n, allocator)
  139. for i := 0; i < n-1; i += 1 {
  140. _, w := utf8.decode_rune(s)
  141. res[i] = s[:w]
  142. s = s[w:]
  143. }
  144. if n > 0 {
  145. res[n-1] = s
  146. }
  147. return res[:]
  148. }
  149. if n < 0 {
  150. n = count(s, sep) + 1
  151. }
  152. res := make([dynamic][]byte, n, allocator)
  153. n -= 1
  154. i := 0
  155. for ; i < n; i += 1 {
  156. m := index(s, sep)
  157. if m < 0 {
  158. break
  159. }
  160. res[i] = s[:m+sep_save]
  161. s = s[m+len(sep):]
  162. }
  163. res[i] = s
  164. return res[:i+1]
  165. }
  166. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  167. return _split(s, sep, 0, -1, allocator)
  168. }
  169. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  170. return _split(s, sep, 0, n, allocator)
  171. }
  172. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  173. return _split(s, sep, len(sep), -1, allocator)
  174. }
  175. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  176. return _split(s, sep, len(sep), n, allocator)
  177. }
  178. @private
  179. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save, n: int) -> (res: []byte, ok: bool) {
  180. s, n := s, n
  181. if n == 0 {
  182. return
  183. }
  184. if sep == nil {
  185. res = s[:]
  186. ok = true
  187. s^ = s[len(s):]
  188. return
  189. }
  190. if n < 0 {
  191. n = count(s^, sep) + 1
  192. }
  193. n -= 1
  194. i := 0
  195. for ; i < n; i += 1 {
  196. m := index(s^, sep)
  197. if m < 0 {
  198. break
  199. }
  200. res = s[:m+sep_save]
  201. ok = true
  202. s^ = s[m+len(sep):]
  203. return
  204. }
  205. res = s[:]
  206. ok = res != nil
  207. s^ = s[len(s):]
  208. return
  209. }
  210. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  211. return _split_iterator(s, sep, 0, -1)
  212. }
  213. split_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) {
  214. return _split_iterator(s, sep, 0, n)
  215. }
  216. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  217. return _split_iterator(s, sep, len(sep), -1)
  218. }
  219. split_after_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) {
  220. return _split_iterator(s, sep, len(sep), n)
  221. }
  222. index_byte :: proc(s: []byte, c: byte) -> int {
  223. for i := 0; i < len(s); i += 1 {
  224. if s[i] == c {
  225. return i
  226. }
  227. }
  228. return -1
  229. }
  230. // Returns -1 if c is not present
  231. last_index_byte :: proc(s: []byte, c: byte) -> int {
  232. for i := len(s)-1; i >= 0; i -= 1 {
  233. if s[i] == c {
  234. return i
  235. }
  236. }
  237. return -1
  238. }
  239. @private PRIME_RABIN_KARP :: 16777619
  240. index :: proc(s, substr: []byte) -> int {
  241. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  242. for i := 0; i < len(s); i += 1 {
  243. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  244. }
  245. sq := u32(PRIME_RABIN_KARP)
  246. for i := len(s); i > 0; i >>= 1 {
  247. if (i & 1) != 0 {
  248. pow *= sq
  249. }
  250. sq *= sq
  251. }
  252. return
  253. }
  254. n := len(substr)
  255. switch {
  256. case n == 0:
  257. return 0
  258. case n == 1:
  259. return index_byte(s, substr[0])
  260. case n == len(s):
  261. if string(s) == string(substr) {
  262. return 0
  263. }
  264. return -1
  265. case n > len(s):
  266. return -1
  267. }
  268. hash, pow := hash_str_rabin_karp(substr)
  269. h: u32
  270. for i := 0; i < n; i += 1 {
  271. h = h*PRIME_RABIN_KARP + u32(s[i])
  272. }
  273. if h == hash && string(s[:n]) == string(substr) {
  274. return 0
  275. }
  276. for i := n; i < len(s); /**/ {
  277. h *= PRIME_RABIN_KARP
  278. h += u32(s[i])
  279. h -= pow * u32(s[i-n])
  280. i += 1
  281. if h == hash && string(s[i-n:i]) == string(substr) {
  282. return i - n
  283. }
  284. }
  285. return -1
  286. }
  287. last_index :: proc(s, substr: []byte) -> int {
  288. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  289. for i := len(s) - 1; i >= 0; i -= 1 {
  290. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  291. }
  292. sq := u32(PRIME_RABIN_KARP)
  293. for i := len(s); i > 0; i >>= 1 {
  294. if (i & 1) != 0 {
  295. pow *= sq
  296. }
  297. sq *= sq
  298. }
  299. return
  300. }
  301. n := len(substr)
  302. switch {
  303. case n == 0:
  304. return len(s)
  305. case n == 1:
  306. return last_index_byte(s, substr[0])
  307. case n == len(s):
  308. return 0 if string(substr) == string(s) else -1
  309. case n > len(s):
  310. return -1
  311. }
  312. hash, pow := hash_str_rabin_karp_reverse(substr)
  313. last := len(s) - n
  314. h: u32
  315. for i := len(s)-1; i >= last; i -= 1 {
  316. h = h*PRIME_RABIN_KARP + u32(s[i])
  317. }
  318. if h == hash && string(s[last:]) == string(substr) {
  319. return last
  320. }
  321. for i := last-1; i >= 0; i -= 1 {
  322. h *= PRIME_RABIN_KARP
  323. h += u32(s[i])
  324. h -= pow * u32(s[i+n])
  325. if h == hash && string(s[i:i+n]) == string(substr) {
  326. return i
  327. }
  328. }
  329. return -1
  330. }
  331. index_any :: proc(s, chars: []byte) -> int {
  332. if chars == nil {
  333. return -1
  334. }
  335. // TODO(bill): Optimize
  336. for r, i in s {
  337. for c in chars {
  338. if r == c {
  339. return i
  340. }
  341. }
  342. }
  343. return -1
  344. }
  345. last_index_any :: proc(s, chars: []byte) -> int {
  346. if chars == nil {
  347. return -1
  348. }
  349. for i := len(s); i > 0; {
  350. r, w := utf8.decode_last_rune(s[:i])
  351. i -= w
  352. for c in string(chars) {
  353. if r == c {
  354. return i
  355. }
  356. }
  357. }
  358. return -1
  359. }
  360. count :: proc(s, substr: []byte) -> int {
  361. if len(substr) == 0 { // special case
  362. return rune_count(s) + 1
  363. }
  364. if len(substr) == 1 {
  365. c := substr[0]
  366. switch len(s) {
  367. case 0:
  368. return 0
  369. case 1:
  370. return int(s[0] == c)
  371. }
  372. n := 0
  373. for i := 0; i < len(s); i += 1 {
  374. if s[i] == c {
  375. n += 1
  376. }
  377. }
  378. return n
  379. }
  380. // TODO(bill): Use a non-brute for approach
  381. n := 0
  382. str := s
  383. for {
  384. i := index(str, substr)
  385. if i == -1 {
  386. return n
  387. }
  388. n += 1
  389. str = str[i+len(substr):]
  390. }
  391. return n
  392. }
  393. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  394. if count < 0 {
  395. panic("bytes: negative repeat count")
  396. } else if count > 0 && (len(s)*count)/count != len(s) {
  397. panic("bytes: repeat count will cause an overflow")
  398. }
  399. b := make([]byte, len(s)*count, allocator)
  400. i := copy(b, s)
  401. for i < len(b) { // 2^N trick to reduce the need to copy
  402. copy(b[i:], b[:i])
  403. i *= 2
  404. }
  405. return b
  406. }
  407. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  408. return replace(s, old, new, -1, allocator)
  409. }
  410. // if n < 0, no limit on the number of replacements
  411. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  412. if string(old) == string(new) || n == 0 {
  413. was_allocation = false
  414. output = s
  415. return
  416. }
  417. byte_count := n
  418. if m := count(s, old); m == 0 {
  419. was_allocation = false
  420. output = s
  421. return
  422. } else if n < 0 || m < n {
  423. byte_count = m
  424. }
  425. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  426. was_allocation = true
  427. w := 0
  428. start := 0
  429. for i := 0; i < byte_count; i += 1 {
  430. j := start
  431. if len(old) == 0 {
  432. if i > 0 {
  433. _, width := utf8.decode_rune(s[start:])
  434. j += width
  435. }
  436. } else {
  437. j += index(s[start:], old)
  438. }
  439. w += copy(t[w:], s[start:j])
  440. w += copy(t[w:], new)
  441. start = j + len(old)
  442. }
  443. w += copy(t[w:], s[start:])
  444. output = t[0:w]
  445. return
  446. }
  447. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  448. return replace(s, key, {}, n, allocator)
  449. }
  450. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  451. return remove(s, key, -1, allocator)
  452. }
  453. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  454. is_ascii_space :: proc(r: rune) -> bool {
  455. if r < utf8.RUNE_SELF {
  456. return _ascii_space[u8(r)] != 0
  457. }
  458. return false
  459. }
  460. is_space :: proc(r: rune) -> bool {
  461. if r < 0x2000 {
  462. switch r {
  463. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  464. return true
  465. }
  466. } else {
  467. if r <= 0x200a {
  468. return true
  469. }
  470. switch r {
  471. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  472. return true
  473. }
  474. }
  475. return false
  476. }
  477. is_null :: proc(r: rune) -> bool {
  478. return r == 0x0000
  479. }
  480. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  481. for r, i in string(s) {
  482. if p(r) == truth {
  483. return i
  484. }
  485. }
  486. return -1
  487. }
  488. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  489. for r, i in string(s) {
  490. if p(state, r) == truth {
  491. return i
  492. }
  493. }
  494. return -1
  495. }
  496. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  497. // TODO(bill): Probably use Rabin-Karp Search
  498. for i := len(s); i > 0; {
  499. r, size := utf8.decode_last_rune(s[:i])
  500. i -= size
  501. if p(r) == truth {
  502. return i
  503. }
  504. }
  505. return -1
  506. }
  507. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  508. // TODO(bill): Probably use Rabin-Karp Search
  509. for i := len(s); i > 0; {
  510. r, size := utf8.decode_last_rune(s[:i])
  511. i -= size
  512. if p(state, r) == truth {
  513. return i
  514. }
  515. }
  516. return -1
  517. }
  518. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  519. i := index_proc(s, p, false)
  520. if i == -1 {
  521. return nil
  522. }
  523. return s[i:]
  524. }
  525. index_rune :: proc(s: []byte, r: rune) -> int {
  526. switch {
  527. case 0 <= r && r < utf8.RUNE_SELF:
  528. return index_byte(s, byte(r))
  529. case r == utf8.RUNE_ERROR:
  530. for c, i in string(s) {
  531. if c == utf8.RUNE_ERROR {
  532. return i
  533. }
  534. }
  535. return -1
  536. case !utf8.valid_rune(r):
  537. return -1
  538. }
  539. b, w := utf8.encode_rune(r)
  540. return index(s, b[:w])
  541. }
  542. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  543. i := index_proc_with_state(s, p, state, false)
  544. if i == -1 {
  545. return nil
  546. }
  547. return s[i:]
  548. }
  549. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  550. i := last_index_proc(s, p, false)
  551. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  552. _, w := utf8.decode_rune(s[i:])
  553. i += w
  554. } else {
  555. i += 1
  556. }
  557. return s[0:i]
  558. }
  559. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  560. i := last_index_proc_with_state(s, p, state, false)
  561. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  562. _, w := utf8.decode_rune(s[i:])
  563. i += w
  564. } else {
  565. i += 1
  566. }
  567. return s[0:i]
  568. }
  569. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  570. if state == nil {
  571. return false
  572. }
  573. cutset := (^string)(state)^
  574. for c in cutset {
  575. if r == c {
  576. return true
  577. }
  578. }
  579. return false
  580. }
  581. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  582. if s == nil || cutset == nil {
  583. return s
  584. }
  585. state := cutset
  586. return trim_left_proc_with_state(s, is_in_cutset, &state)
  587. }
  588. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  589. if s == nil || cutset == nil {
  590. return s
  591. }
  592. state := cutset
  593. return trim_right_proc_with_state(s, is_in_cutset, &state)
  594. }
  595. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  596. return trim_right(trim_left(s, cutset), cutset)
  597. }
  598. trim_left_space :: proc(s: []byte) -> []byte {
  599. return trim_left_proc(s, is_space)
  600. }
  601. trim_right_space :: proc(s: []byte) -> []byte {
  602. return trim_right_proc(s, is_space)
  603. }
  604. trim_space :: proc(s: []byte) -> []byte {
  605. return trim_right_space(trim_left_space(s))
  606. }
  607. trim_left_null :: proc(s: []byte) -> []byte {
  608. return trim_left_proc(s, is_null)
  609. }
  610. trim_right_null :: proc(s: []byte) -> []byte {
  611. return trim_right_proc(s, is_null)
  612. }
  613. trim_null :: proc(s: []byte) -> []byte {
  614. return trim_right_null(trim_left_null(s))
  615. }
  616. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  617. if has_prefix(s, prefix) {
  618. return s[len(prefix):]
  619. }
  620. return s
  621. }
  622. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  623. if has_suffix(s, suffix) {
  624. return s[:len(s)-len(suffix)]
  625. }
  626. return s
  627. }
  628. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  629. if s == nil || len(substrs) <= 0 {
  630. return nil
  631. }
  632. sublen := len(substrs[0])
  633. for substr in substrs[1:] {
  634. sublen = min(sublen, len(substr))
  635. }
  636. shared := len(s) - sublen
  637. if shared <= 0 {
  638. return nil
  639. }
  640. // number, index, last
  641. n, i, l := 0, 0, 0
  642. // count results
  643. first_pass: for i <= shared {
  644. for substr in substrs {
  645. if string(s[i:i+sublen]) == string(substr) {
  646. if !skip_empty || i - l > 0 {
  647. n += 1
  648. }
  649. i += sublen
  650. l = i
  651. continue first_pass
  652. }
  653. }
  654. _, skip := utf8.decode_rune(s[i:])
  655. i += skip
  656. }
  657. if !skip_empty || len(s) - l > 0 {
  658. n += 1
  659. }
  660. if n < 1 {
  661. // no results
  662. return nil
  663. }
  664. buf := make([][]byte, n, allocator)
  665. n, i, l = 0, 0, 0
  666. // slice results
  667. second_pass: for i <= shared {
  668. for substr in substrs {
  669. if string(s[i:i+sublen]) == string(substr) {
  670. if !skip_empty || i - l > 0 {
  671. buf[n] = s[l:i]
  672. n += 1
  673. }
  674. i += sublen
  675. l = i
  676. continue second_pass
  677. }
  678. }
  679. _, skip := utf8.decode_rune(s[i:])
  680. i += skip
  681. }
  682. if !skip_empty || len(s) - l > 0 {
  683. buf[n] = s[l:]
  684. }
  685. return buf
  686. }
  687. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  688. if s == nil || s^ == nil || len(substrs) <= 0 {
  689. return nil, false
  690. }
  691. sublen := len(substrs[0])
  692. for substr in substrs[1:] {
  693. sublen = min(sublen, len(substr))
  694. }
  695. shared := len(s) - sublen
  696. if shared <= 0 {
  697. return nil, false
  698. }
  699. // index, last
  700. i, l := 0, 0
  701. loop: for i <= shared {
  702. for substr in substrs {
  703. if string(s[i:i+sublen]) == string(substr) {
  704. if !skip_empty || i - l > 0 {
  705. res := s[l:i]
  706. s^ = s[i:]
  707. return res, true
  708. }
  709. i += sublen
  710. l = i
  711. continue loop
  712. }
  713. }
  714. _, skip := utf8.decode_rune(s[i:])
  715. i += skip
  716. }
  717. if !skip_empty || len(s) - l > 0 {
  718. res := s[l:]
  719. s^ = s[len(s):]
  720. return res, true
  721. }
  722. return nil, false
  723. }
  724. // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
  725. // Adjacent invalid bytes are only replaced once
  726. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  727. str := s
  728. b: Buffer
  729. buffer_init_allocator(&b, 0, len(s), allocator)
  730. has_error := false
  731. cursor := 0
  732. origin := str
  733. for len(str) > 0 {
  734. r, w := utf8.decode_rune(str)
  735. if r == utf8.RUNE_ERROR {
  736. if !has_error {
  737. has_error = true
  738. buffer_write(&b, origin[:cursor])
  739. }
  740. } else if has_error {
  741. has_error = false
  742. buffer_write(&b, replacement)
  743. origin = origin[cursor:]
  744. cursor = 0
  745. }
  746. cursor += w
  747. str = str[w:]
  748. }
  749. return buffer_to_bytes(&b)
  750. }
  751. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  752. str := s
  753. n := len(str)
  754. buf := make([]byte, n)
  755. i := n
  756. for len(str) > 0 {
  757. _, w := utf8.decode_rune(str)
  758. i -= w
  759. copy(buf[i:], str[:w])
  760. str = str[w:]
  761. }
  762. return buf
  763. }
  764. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  765. if tab_size <= 0 {
  766. panic("tab size must be positive")
  767. }
  768. if s == nil {
  769. return nil
  770. }
  771. b: Buffer
  772. buffer_init_allocator(&b, 0, len(s), allocator)
  773. str := s
  774. column: int
  775. for len(str) > 0 {
  776. r, w := utf8.decode_rune(str)
  777. if r == '\t' {
  778. expand := tab_size - column%tab_size
  779. for i := 0; i < expand; i += 1 {
  780. buffer_write_byte(&b, ' ')
  781. }
  782. column += expand
  783. } else {
  784. if r == '\n' {
  785. column = 0
  786. } else {
  787. column += w
  788. }
  789. buffer_write_rune(&b, r)
  790. }
  791. str = str[w:]
  792. }
  793. return buffer_to_bytes(&b)
  794. }
  795. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  796. i := index(str, sep)
  797. if i == -1 {
  798. head = str
  799. return
  800. }
  801. head = str[:i]
  802. match = str[i:i+len(sep)]
  803. tail = str[i+len(sep):]
  804. return
  805. }
  806. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  807. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  808. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  809. n := rune_count(str)
  810. if n >= length || pad == nil {
  811. return clone(str, allocator)
  812. }
  813. remains := length-1
  814. pad_len := rune_count(pad)
  815. b: Buffer
  816. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  817. write_pad_string(&b, pad, pad_len, remains/2)
  818. buffer_write(&b, str)
  819. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  820. return buffer_to_bytes(&b)
  821. }
  822. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  823. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  824. n := rune_count(str)
  825. if n >= length || pad == nil {
  826. return clone(str, allocator)
  827. }
  828. remains := length-1
  829. pad_len := rune_count(pad)
  830. b: Buffer
  831. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  832. buffer_write(&b, str)
  833. write_pad_string(&b, pad, pad_len, remains)
  834. return buffer_to_bytes(&b)
  835. }
  836. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  837. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  838. n := rune_count(str)
  839. if n >= length || pad == nil {
  840. return clone(str, allocator)
  841. }
  842. remains := length-1
  843. pad_len := rune_count(pad)
  844. b: Buffer
  845. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  846. write_pad_string(&b, pad, pad_len, remains)
  847. buffer_write(&b, str)
  848. return buffer_to_bytes(&b)
  849. }
  850. @private
  851. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  852. repeats := remains / pad_len
  853. for i := 0; i < repeats; i += 1 {
  854. buffer_write(b, pad)
  855. }
  856. n := remains % pad_len
  857. p := pad
  858. for i := 0; i < n; i += 1 {
  859. r, width := utf8.decode_rune(p)
  860. buffer_write_rune(b, r)
  861. p = p[width:]
  862. }
  863. }
  864. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  865. // returning a slice of subslices of s or an empty slice if s only contains white space
  866. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  867. n := 0
  868. was_space := 1
  869. set_bits := u8(0)
  870. // check to see
  871. for i in 0..<len(s) {
  872. r := s[i]
  873. set_bits |= r
  874. is_space := int(_ascii_space[r])
  875. n += was_space & ~is_space
  876. was_space = is_space
  877. }
  878. if set_bits >= utf8.RUNE_SELF {
  879. return fields_proc(s, unicode.is_space, allocator)
  880. }
  881. if n == 0 {
  882. return nil
  883. }
  884. a := make([][]byte, n, allocator)
  885. na := 0
  886. field_start := 0
  887. i := 0
  888. for i < len(s) && _ascii_space[s[i]] != 0 {
  889. i += 1
  890. }
  891. field_start = i
  892. for i < len(s) {
  893. if _ascii_space[s[i]] == 0 {
  894. i += 1
  895. continue
  896. }
  897. a[na] = s[field_start : i]
  898. na += 1
  899. i += 1
  900. for i < len(s) && _ascii_space[s[i]] != 0 {
  901. i += 1
  902. }
  903. field_start = i
  904. }
  905. if field_start < len(s) {
  906. a[na] = s[field_start:]
  907. }
  908. return a
  909. }
  910. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  911. // returns a slice of subslices of s
  912. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  913. //
  914. // fields_proc makes no guarantee about the order in which it calls f(ch)
  915. // it assumes that `f` always returns the same value for a given ch
  916. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  917. subslices := make([dynamic][]byte, 0, 32, allocator)
  918. start, end := -1, -1
  919. for r, offset in string(s) {
  920. end = offset
  921. if f(r) {
  922. if start >= 0 {
  923. append(&subslices, s[start : end])
  924. // -1 could be used, but just speed it up through bitwise not
  925. // gotta love 2's complement
  926. start = ~start
  927. }
  928. } else {
  929. if start < 0 {
  930. start = end
  931. }
  932. }
  933. }
  934. if start >= 0 {
  935. append(&subslices, s[start : end])
  936. }
  937. return subslices[:]
  938. }