bytes.odin 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169
  1. package bytes
  2. import "core:mem"
  3. import "core:unicode"
  4. import "core:unicode/utf8"
  5. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  6. c := make([]byte, len(s), allocator, loc)
  7. copy(c, s)
  8. return c[:len(s)]
  9. }
  10. clone_safe :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: mem.Allocator_Error) {
  11. c := make([]byte, len(s), allocator, loc) or_return
  12. copy(c, s)
  13. return c[:len(s)], nil
  14. }
  15. ptr_from_slice :: ptr_from_bytes
  16. ptr_from_bytes :: proc(str: []byte) -> ^byte {
  17. d := transmute(mem.Raw_String)str
  18. return d.data
  19. }
  20. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  21. n := index_byte(str, b)
  22. if n < 0 {
  23. n = len(str)
  24. }
  25. return str[:n]
  26. }
  27. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  28. n := index_rune(str, r)
  29. if n < 0 {
  30. n = len(str)
  31. }
  32. return str[:n]
  33. }
  34. // Compares two strings, returning a value representing which one comes first lexiographically.
  35. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  36. compare :: proc(lhs, rhs: []byte) -> int {
  37. return mem.compare(lhs, rhs)
  38. }
  39. contains_rune :: proc(s: []byte, r: rune) -> int {
  40. for c, offset in string(s) {
  41. if c == r {
  42. return offset
  43. }
  44. }
  45. return -1
  46. }
  47. contains :: proc(s, substr: []byte) -> bool {
  48. return index(s, substr) >= 0
  49. }
  50. contains_any :: proc(s, chars: []byte) -> bool {
  51. return index_any(s, chars) >= 0
  52. }
  53. rune_count :: proc(s: []byte) -> int {
  54. return utf8.rune_count(s)
  55. }
  56. equal :: proc(a, b: []byte) -> bool {
  57. return string(a) == string(b)
  58. }
  59. equal_fold :: proc(u, v: []byte) -> bool {
  60. s, t := string(u), string(v)
  61. loop: for s != "" && t != "" {
  62. sr, tr: rune
  63. if s[0] < utf8.RUNE_SELF {
  64. sr, s = rune(s[0]), s[1:]
  65. } else {
  66. r, size := utf8.decode_rune_in_string(s)
  67. sr, s = r, s[size:]
  68. }
  69. if t[0] < utf8.RUNE_SELF {
  70. tr, t = rune(t[0]), t[1:]
  71. } else {
  72. r, size := utf8.decode_rune_in_string(t)
  73. tr, t = r, t[size:]
  74. }
  75. if tr == sr { // easy case
  76. continue loop
  77. }
  78. if tr < sr {
  79. tr, sr = sr, tr
  80. }
  81. if tr < utf8.RUNE_SELF {
  82. switch sr {
  83. case 'A'..='Z':
  84. if tr == (sr+'a')-'A' {
  85. continue loop
  86. }
  87. }
  88. return false
  89. }
  90. // TODO(bill): Unicode folding
  91. return false
  92. }
  93. return s == t
  94. }
  95. has_prefix :: proc(s, prefix: []byte) -> bool {
  96. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  97. }
  98. has_suffix :: proc(s, suffix: []byte) -> bool {
  99. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  100. }
  101. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  102. if len(a) == 0 {
  103. return nil
  104. }
  105. n := len(sep) * (len(a) - 1)
  106. for s in a {
  107. n += len(s)
  108. }
  109. b := make([]byte, n, allocator)
  110. i := copy(b, a[0])
  111. for s in a[1:] {
  112. i += copy(b[i:], sep)
  113. i += copy(b[i:], s)
  114. }
  115. return b
  116. }
  117. join_safe :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  118. if len(a) == 0 {
  119. return nil, nil
  120. }
  121. n := len(sep) * (len(a) - 1)
  122. for s in a {
  123. n += len(s)
  124. }
  125. b := make([]byte, n, allocator) or_return
  126. i := copy(b, a[0])
  127. for s in a[1:] {
  128. i += copy(b[i:], sep)
  129. i += copy(b[i:], s)
  130. }
  131. return b, nil
  132. }
  133. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  134. if len(a) == 0 {
  135. return nil
  136. }
  137. n := 0
  138. for s in a {
  139. n += len(s)
  140. }
  141. b := make([]byte, n, allocator)
  142. i := 0
  143. for s in a {
  144. i += copy(b[i:], s)
  145. }
  146. return b
  147. }
  148. concatenate_safe :: proc(a: [][]byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  149. if len(a) == 0 {
  150. return nil, nil
  151. }
  152. n := 0
  153. for s in a {
  154. n += len(s)
  155. }
  156. b := make([]byte, n, allocator) or_return
  157. i := 0
  158. for s in a {
  159. i += copy(b[i:], s)
  160. }
  161. return b, nil
  162. }
  163. @private
  164. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  165. s, n := s, n
  166. if n == 0 {
  167. return nil
  168. }
  169. if sep == nil {
  170. l := utf8.rune_count(s)
  171. if n < 0 || n > l {
  172. n = l
  173. }
  174. res := make([dynamic][]byte, n, allocator)
  175. for i := 0; i < n-1; i += 1 {
  176. _, w := utf8.decode_rune(s)
  177. res[i] = s[:w]
  178. s = s[w:]
  179. }
  180. if n > 0 {
  181. res[n-1] = s
  182. }
  183. return res[:]
  184. }
  185. if n < 0 {
  186. n = count(s, sep) + 1
  187. }
  188. res := make([dynamic][]byte, n, allocator)
  189. n -= 1
  190. i := 0
  191. for ; i < n; i += 1 {
  192. m := index(s, sep)
  193. if m < 0 {
  194. break
  195. }
  196. res[i] = s[:m+sep_save]
  197. s = s[m+len(sep):]
  198. }
  199. res[i] = s
  200. return res[:i+1]
  201. }
  202. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  203. return _split(s, sep, 0, -1, allocator)
  204. }
  205. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  206. return _split(s, sep, 0, n, allocator)
  207. }
  208. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  209. return _split(s, sep, len(sep), -1, allocator)
  210. }
  211. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  212. return _split(s, sep, len(sep), n, allocator)
  213. }
  214. @private
  215. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save: int) -> (res: []byte, ok: bool) {
  216. if len(sep) == 0 {
  217. res = s[:]
  218. ok = true
  219. s^ = s[len(s):]
  220. return
  221. }
  222. m := index(s^, sep)
  223. if m < 0 {
  224. // not found
  225. res = s[:]
  226. ok = len(res) != 0
  227. s^ = s[len(s):]
  228. } else {
  229. res = s[:m+sep_save]
  230. ok = true
  231. s^ = s[m+len(sep):]
  232. }
  233. return
  234. }
  235. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  236. return _split_iterator(s, sep, 0)
  237. }
  238. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  239. return _split_iterator(s, sep, len(sep))
  240. }
  241. index_byte :: proc(s: []byte, c: byte) -> int {
  242. for i := 0; i < len(s); i += 1 {
  243. if s[i] == c {
  244. return i
  245. }
  246. }
  247. return -1
  248. }
  249. // Returns -1 if c is not present
  250. last_index_byte :: proc(s: []byte, c: byte) -> int {
  251. for i := len(s)-1; i >= 0; i -= 1 {
  252. if s[i] == c {
  253. return i
  254. }
  255. }
  256. return -1
  257. }
  258. @private PRIME_RABIN_KARP :: 16777619
  259. index :: proc(s, substr: []byte) -> int {
  260. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  261. for i := 0; i < len(s); i += 1 {
  262. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  263. }
  264. sq := u32(PRIME_RABIN_KARP)
  265. for i := len(s); i > 0; i >>= 1 {
  266. if (i & 1) != 0 {
  267. pow *= sq
  268. }
  269. sq *= sq
  270. }
  271. return
  272. }
  273. n := len(substr)
  274. switch {
  275. case n == 0:
  276. return 0
  277. case n == 1:
  278. return index_byte(s, substr[0])
  279. case n == len(s):
  280. if string(s) == string(substr) {
  281. return 0
  282. }
  283. return -1
  284. case n > len(s):
  285. return -1
  286. }
  287. hash, pow := hash_str_rabin_karp(substr)
  288. h: u32
  289. for i := 0; i < n; i += 1 {
  290. h = h*PRIME_RABIN_KARP + u32(s[i])
  291. }
  292. if h == hash && string(s[:n]) == string(substr) {
  293. return 0
  294. }
  295. for i := n; i < len(s); /**/ {
  296. h *= PRIME_RABIN_KARP
  297. h += u32(s[i])
  298. h -= pow * u32(s[i-n])
  299. i += 1
  300. if h == hash && string(s[i-n:i]) == string(substr) {
  301. return i - n
  302. }
  303. }
  304. return -1
  305. }
  306. last_index :: proc(s, substr: []byte) -> int {
  307. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  308. for i := len(s) - 1; i >= 0; i -= 1 {
  309. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  310. }
  311. sq := u32(PRIME_RABIN_KARP)
  312. for i := len(s); i > 0; i >>= 1 {
  313. if (i & 1) != 0 {
  314. pow *= sq
  315. }
  316. sq *= sq
  317. }
  318. return
  319. }
  320. n := len(substr)
  321. switch {
  322. case n == 0:
  323. return len(s)
  324. case n == 1:
  325. return last_index_byte(s, substr[0])
  326. case n == len(s):
  327. return 0 if string(substr) == string(s) else -1
  328. case n > len(s):
  329. return -1
  330. }
  331. hash, pow := hash_str_rabin_karp_reverse(substr)
  332. last := len(s) - n
  333. h: u32
  334. for i := len(s)-1; i >= last; i -= 1 {
  335. h = h*PRIME_RABIN_KARP + u32(s[i])
  336. }
  337. if h == hash && string(s[last:]) == string(substr) {
  338. return last
  339. }
  340. for i := last-1; i >= 0; i -= 1 {
  341. h *= PRIME_RABIN_KARP
  342. h += u32(s[i])
  343. h -= pow * u32(s[i+n])
  344. if h == hash && string(s[i:i+n]) == string(substr) {
  345. return i
  346. }
  347. }
  348. return -1
  349. }
  350. index_any :: proc(s, chars: []byte) -> int {
  351. if chars == nil {
  352. return -1
  353. }
  354. // TODO(bill): Optimize
  355. for r, i in s {
  356. for c in chars {
  357. if r == c {
  358. return i
  359. }
  360. }
  361. }
  362. return -1
  363. }
  364. last_index_any :: proc(s, chars: []byte) -> int {
  365. if chars == nil {
  366. return -1
  367. }
  368. for i := len(s); i > 0; {
  369. r, w := utf8.decode_last_rune(s[:i])
  370. i -= w
  371. for c in string(chars) {
  372. if r == c {
  373. return i
  374. }
  375. }
  376. }
  377. return -1
  378. }
  379. count :: proc(s, substr: []byte) -> int {
  380. if len(substr) == 0 { // special case
  381. return rune_count(s) + 1
  382. }
  383. if len(substr) == 1 {
  384. c := substr[0]
  385. switch len(s) {
  386. case 0:
  387. return 0
  388. case 1:
  389. return int(s[0] == c)
  390. }
  391. n := 0
  392. for i := 0; i < len(s); i += 1 {
  393. if s[i] == c {
  394. n += 1
  395. }
  396. }
  397. return n
  398. }
  399. // TODO(bill): Use a non-brute for approach
  400. n := 0
  401. str := s
  402. for {
  403. i := index(str, substr)
  404. if i == -1 {
  405. return n
  406. }
  407. n += 1
  408. str = str[i+len(substr):]
  409. }
  410. return n
  411. }
  412. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  413. if count < 0 {
  414. panic("bytes: negative repeat count")
  415. } else if count > 0 && (len(s)*count)/count != len(s) {
  416. panic("bytes: repeat count will cause an overflow")
  417. }
  418. b := make([]byte, len(s)*count, allocator)
  419. i := copy(b, s)
  420. for i < len(b) { // 2^N trick to reduce the need to copy
  421. copy(b[i:], b[:i])
  422. i *= 2
  423. }
  424. return b
  425. }
  426. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  427. return replace(s, old, new, -1, allocator)
  428. }
  429. // if n < 0, no limit on the number of replacements
  430. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  431. if string(old) == string(new) || n == 0 {
  432. was_allocation = false
  433. output = s
  434. return
  435. }
  436. byte_count := n
  437. if m := count(s, old); m == 0 {
  438. was_allocation = false
  439. output = s
  440. return
  441. } else if n < 0 || m < n {
  442. byte_count = m
  443. }
  444. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  445. was_allocation = true
  446. w := 0
  447. start := 0
  448. for i := 0; i < byte_count; i += 1 {
  449. j := start
  450. if len(old) == 0 {
  451. if i > 0 {
  452. _, width := utf8.decode_rune(s[start:])
  453. j += width
  454. }
  455. } else {
  456. j += index(s[start:], old)
  457. }
  458. w += copy(t[w:], s[start:j])
  459. w += copy(t[w:], new)
  460. start = j + len(old)
  461. }
  462. w += copy(t[w:], s[start:])
  463. output = t[0:w]
  464. return
  465. }
  466. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  467. return replace(s, key, {}, n, allocator)
  468. }
  469. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  470. return remove(s, key, -1, allocator)
  471. }
  472. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  473. is_ascii_space :: proc(r: rune) -> bool {
  474. if r < utf8.RUNE_SELF {
  475. return _ascii_space[u8(r)] != 0
  476. }
  477. return false
  478. }
  479. is_space :: proc(r: rune) -> bool {
  480. if r < 0x2000 {
  481. switch r {
  482. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  483. return true
  484. }
  485. } else {
  486. if r <= 0x200a {
  487. return true
  488. }
  489. switch r {
  490. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  491. return true
  492. }
  493. }
  494. return false
  495. }
  496. is_null :: proc(r: rune) -> bool {
  497. return r == 0x0000
  498. }
  499. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  500. for r, i in string(s) {
  501. if p(r) == truth {
  502. return i
  503. }
  504. }
  505. return -1
  506. }
  507. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  508. for r, i in string(s) {
  509. if p(state, r) == truth {
  510. return i
  511. }
  512. }
  513. return -1
  514. }
  515. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  516. // TODO(bill): Probably use Rabin-Karp Search
  517. for i := len(s); i > 0; {
  518. r, size := utf8.decode_last_rune(s[:i])
  519. i -= size
  520. if p(r) == truth {
  521. return i
  522. }
  523. }
  524. return -1
  525. }
  526. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  527. // TODO(bill): Probably use Rabin-Karp Search
  528. for i := len(s); i > 0; {
  529. r, size := utf8.decode_last_rune(s[:i])
  530. i -= size
  531. if p(state, r) == truth {
  532. return i
  533. }
  534. }
  535. return -1
  536. }
  537. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  538. i := index_proc(s, p, false)
  539. if i == -1 {
  540. return nil
  541. }
  542. return s[i:]
  543. }
  544. index_rune :: proc(s: []byte, r: rune) -> int {
  545. switch {
  546. case u32(r) < utf8.RUNE_SELF:
  547. return index_byte(s, byte(r))
  548. case r == utf8.RUNE_ERROR:
  549. for c, i in string(s) {
  550. if c == utf8.RUNE_ERROR {
  551. return i
  552. }
  553. }
  554. return -1
  555. case !utf8.valid_rune(r):
  556. return -1
  557. }
  558. b, w := utf8.encode_rune(r)
  559. return index(s, b[:w])
  560. }
  561. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  562. i := index_proc_with_state(s, p, state, false)
  563. if i == -1 {
  564. return nil
  565. }
  566. return s[i:]
  567. }
  568. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  569. i := last_index_proc(s, p, false)
  570. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  571. _, w := utf8.decode_rune(s[i:])
  572. i += w
  573. } else {
  574. i += 1
  575. }
  576. return s[0:i]
  577. }
  578. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  579. i := last_index_proc_with_state(s, p, state, false)
  580. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  581. _, w := utf8.decode_rune(s[i:])
  582. i += w
  583. } else {
  584. i += 1
  585. }
  586. return s[0:i]
  587. }
  588. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  589. if state == nil {
  590. return false
  591. }
  592. cutset := (^string)(state)^
  593. for c in cutset {
  594. if r == c {
  595. return true
  596. }
  597. }
  598. return false
  599. }
  600. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  601. if s == nil || cutset == nil {
  602. return s
  603. }
  604. state := cutset
  605. return trim_left_proc_with_state(s, is_in_cutset, &state)
  606. }
  607. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  608. if s == nil || cutset == nil {
  609. return s
  610. }
  611. state := cutset
  612. return trim_right_proc_with_state(s, is_in_cutset, &state)
  613. }
  614. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  615. return trim_right(trim_left(s, cutset), cutset)
  616. }
  617. trim_left_space :: proc(s: []byte) -> []byte {
  618. return trim_left_proc(s, is_space)
  619. }
  620. trim_right_space :: proc(s: []byte) -> []byte {
  621. return trim_right_proc(s, is_space)
  622. }
  623. trim_space :: proc(s: []byte) -> []byte {
  624. return trim_right_space(trim_left_space(s))
  625. }
  626. trim_left_null :: proc(s: []byte) -> []byte {
  627. return trim_left_proc(s, is_null)
  628. }
  629. trim_right_null :: proc(s: []byte) -> []byte {
  630. return trim_right_proc(s, is_null)
  631. }
  632. trim_null :: proc(s: []byte) -> []byte {
  633. return trim_right_null(trim_left_null(s))
  634. }
  635. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  636. if has_prefix(s, prefix) {
  637. return s[len(prefix):]
  638. }
  639. return s
  640. }
  641. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  642. if has_suffix(s, suffix) {
  643. return s[:len(s)-len(suffix)]
  644. }
  645. return s
  646. }
  647. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  648. if s == nil || len(substrs) <= 0 {
  649. return nil
  650. }
  651. sublen := len(substrs[0])
  652. for substr in substrs[1:] {
  653. sublen = min(sublen, len(substr))
  654. }
  655. shared := len(s) - sublen
  656. if shared <= 0 {
  657. return nil
  658. }
  659. // number, index, last
  660. n, i, l := 0, 0, 0
  661. // count results
  662. first_pass: for i <= shared {
  663. for substr in substrs {
  664. if string(s[i:i+sublen]) == string(substr) {
  665. if !skip_empty || i - l > 0 {
  666. n += 1
  667. }
  668. i += sublen
  669. l = i
  670. continue first_pass
  671. }
  672. }
  673. _, skip := utf8.decode_rune(s[i:])
  674. i += skip
  675. }
  676. if !skip_empty || len(s) - l > 0 {
  677. n += 1
  678. }
  679. if n < 1 {
  680. // no results
  681. return nil
  682. }
  683. buf := make([][]byte, n, allocator)
  684. n, i, l = 0, 0, 0
  685. // slice results
  686. second_pass: for i <= shared {
  687. for substr in substrs {
  688. if string(s[i:i+sublen]) == string(substr) {
  689. if !skip_empty || i - l > 0 {
  690. buf[n] = s[l:i]
  691. n += 1
  692. }
  693. i += sublen
  694. l = i
  695. continue second_pass
  696. }
  697. }
  698. _, skip := utf8.decode_rune(s[i:])
  699. i += skip
  700. }
  701. if !skip_empty || len(s) - l > 0 {
  702. buf[n] = s[l:]
  703. }
  704. return buf
  705. }
  706. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  707. if s == nil || s^ == nil || len(substrs) <= 0 {
  708. return nil, false
  709. }
  710. sublen := len(substrs[0])
  711. for substr in substrs[1:] {
  712. sublen = min(sublen, len(substr))
  713. }
  714. shared := len(s) - sublen
  715. if shared <= 0 {
  716. return nil, false
  717. }
  718. // index, last
  719. i, l := 0, 0
  720. loop: for i <= shared {
  721. for substr in substrs {
  722. if string(s[i:i+sublen]) == string(substr) {
  723. if !skip_empty || i - l > 0 {
  724. res := s[l:i]
  725. s^ = s[i:]
  726. return res, true
  727. }
  728. i += sublen
  729. l = i
  730. continue loop
  731. }
  732. }
  733. _, skip := utf8.decode_rune(s[i:])
  734. i += skip
  735. }
  736. if !skip_empty || len(s) - l > 0 {
  737. res := s[l:]
  738. s^ = s[len(s):]
  739. return res, true
  740. }
  741. return nil, false
  742. }
  743. // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
  744. // Adjacent invalid bytes are only replaced once
  745. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  746. str := s
  747. b: Buffer
  748. buffer_init_allocator(&b, 0, len(s), allocator)
  749. has_error := false
  750. cursor := 0
  751. origin := str
  752. for len(str) > 0 {
  753. r, w := utf8.decode_rune(str)
  754. if r == utf8.RUNE_ERROR {
  755. if !has_error {
  756. has_error = true
  757. buffer_write(&b, origin[:cursor])
  758. }
  759. } else if has_error {
  760. has_error = false
  761. buffer_write(&b, replacement)
  762. origin = origin[cursor:]
  763. cursor = 0
  764. }
  765. cursor += w
  766. str = str[w:]
  767. }
  768. return buffer_to_bytes(&b)
  769. }
  770. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  771. str := s
  772. n := len(str)
  773. buf := make([]byte, n)
  774. i := n
  775. for len(str) > 0 {
  776. _, w := utf8.decode_rune(str)
  777. i -= w
  778. copy(buf[i:], str[:w])
  779. str = str[w:]
  780. }
  781. return buf
  782. }
  783. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  784. if tab_size <= 0 {
  785. panic("tab size must be positive")
  786. }
  787. if s == nil {
  788. return nil
  789. }
  790. b: Buffer
  791. buffer_init_allocator(&b, 0, len(s), allocator)
  792. str := s
  793. column: int
  794. for len(str) > 0 {
  795. r, w := utf8.decode_rune(str)
  796. if r == '\t' {
  797. expand := tab_size - column%tab_size
  798. for i := 0; i < expand; i += 1 {
  799. buffer_write_byte(&b, ' ')
  800. }
  801. column += expand
  802. } else {
  803. if r == '\n' {
  804. column = 0
  805. } else {
  806. column += w
  807. }
  808. buffer_write_rune(&b, r)
  809. }
  810. str = str[w:]
  811. }
  812. return buffer_to_bytes(&b)
  813. }
  814. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  815. i := index(str, sep)
  816. if i == -1 {
  817. head = str
  818. return
  819. }
  820. head = str[:i]
  821. match = str[i:i+len(sep)]
  822. tail = str[i+len(sep):]
  823. return
  824. }
  825. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  826. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  827. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  828. n := rune_count(str)
  829. if n >= length || pad == nil {
  830. return clone(str, allocator)
  831. }
  832. remains := length-1
  833. pad_len := rune_count(pad)
  834. b: Buffer
  835. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  836. write_pad_string(&b, pad, pad_len, remains/2)
  837. buffer_write(&b, str)
  838. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  839. return buffer_to_bytes(&b)
  840. }
  841. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  842. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  843. n := rune_count(str)
  844. if n >= length || pad == nil {
  845. return clone(str, allocator)
  846. }
  847. remains := length-1
  848. pad_len := rune_count(pad)
  849. b: Buffer
  850. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  851. buffer_write(&b, str)
  852. write_pad_string(&b, pad, pad_len, remains)
  853. return buffer_to_bytes(&b)
  854. }
  855. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  856. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  857. n := rune_count(str)
  858. if n >= length || pad == nil {
  859. return clone(str, allocator)
  860. }
  861. remains := length-1
  862. pad_len := rune_count(pad)
  863. b: Buffer
  864. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  865. write_pad_string(&b, pad, pad_len, remains)
  866. buffer_write(&b, str)
  867. return buffer_to_bytes(&b)
  868. }
  869. @private
  870. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  871. repeats := remains / pad_len
  872. for i := 0; i < repeats; i += 1 {
  873. buffer_write(b, pad)
  874. }
  875. n := remains % pad_len
  876. p := pad
  877. for i := 0; i < n; i += 1 {
  878. r, width := utf8.decode_rune(p)
  879. buffer_write_rune(b, r)
  880. p = p[width:]
  881. }
  882. }
  883. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  884. // returning a slice of subslices of s or an empty slice if s only contains white space
  885. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  886. n := 0
  887. was_space := 1
  888. set_bits := u8(0)
  889. // check to see
  890. for i in 0..<len(s) {
  891. r := s[i]
  892. set_bits |= r
  893. is_space := int(_ascii_space[r])
  894. n += was_space & ~is_space
  895. was_space = is_space
  896. }
  897. if set_bits >= utf8.RUNE_SELF {
  898. return fields_proc(s, unicode.is_space, allocator)
  899. }
  900. if n == 0 {
  901. return nil
  902. }
  903. a := make([][]byte, n, allocator)
  904. na := 0
  905. field_start := 0
  906. i := 0
  907. for i < len(s) && _ascii_space[s[i]] != 0 {
  908. i += 1
  909. }
  910. field_start = i
  911. for i < len(s) {
  912. if _ascii_space[s[i]] == 0 {
  913. i += 1
  914. continue
  915. }
  916. a[na] = s[field_start : i]
  917. na += 1
  918. i += 1
  919. for i < len(s) && _ascii_space[s[i]] != 0 {
  920. i += 1
  921. }
  922. field_start = i
  923. }
  924. if field_start < len(s) {
  925. a[na] = s[field_start:]
  926. }
  927. return a
  928. }
  929. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  930. // returns a slice of subslices of s
  931. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  932. //
  933. // fields_proc makes no guarantee about the order in which it calls f(ch)
  934. // it assumes that `f` always returns the same value for a given ch
  935. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  936. subslices := make([dynamic][]byte, 0, 32, allocator)
  937. start, end := -1, -1
  938. for r, offset in string(s) {
  939. end = offset
  940. if f(r) {
  941. if start >= 0 {
  942. append(&subslices, s[start : end])
  943. // -1 could be used, but just speed it up through bitwise not
  944. // gotta love 2's complement
  945. start = ~start
  946. }
  947. } else {
  948. if start < 0 {
  949. start = end
  950. }
  951. }
  952. }
  953. if start >= 0 {
  954. append(&subslices, s[start : len(s)])
  955. }
  956. return subslices[:]
  957. }