bytes.odin 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472
  1. package bytes
  2. import "base:intrinsics"
  3. import "core:mem"
  4. import "core:simd"
  5. import "core:unicode"
  6. import "core:unicode/utf8"
  7. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  8. @(private)
  9. SCANNER_INDICES_256 : simd.u8x32 : {
  10. 0, 1, 2, 3, 4, 5, 6, 7,
  11. 8, 9, 10, 11, 12, 13, 14, 15,
  12. 16, 17, 18, 19, 20, 21, 22, 23,
  13. 24, 25, 26, 27, 28, 29, 30, 31,
  14. }
  15. @(private)
  16. SCANNER_SENTINEL_MAX_256: simd.u8x32 : u8(0x00)
  17. @(private)
  18. SCANNER_SENTINEL_MIN_256: simd.u8x32 : u8(0xff)
  19. @(private)
  20. SIMD_REG_SIZE_256 :: 32
  21. }
  22. @(private)
  23. SCANNER_INDICES_128 : simd.u8x16 : {
  24. 0, 1, 2, 3, 4, 5, 6, 7,
  25. 8, 9, 10, 11, 12, 13, 14, 15,
  26. }
  27. @(private)
  28. SCANNER_SENTINEL_MAX_128: simd.u8x16 : u8(0x00)
  29. @(private)
  30. SCANNER_SENTINEL_MIN_128: simd.u8x16 : u8(0xff)
  31. @(private)
  32. SIMD_REG_SIZE_128 :: 16
  33. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  34. c := make([]byte, len(s), allocator, loc)
  35. copy(c, s)
  36. return c[:len(s)]
  37. }
  38. clone_safe :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: mem.Allocator_Error) {
  39. c := make([]byte, len(s), allocator, loc) or_return
  40. copy(c, s)
  41. return c[:len(s)], nil
  42. }
  43. ptr_from_slice :: ptr_from_bytes
  44. ptr_from_bytes :: proc(str: []byte) -> ^byte {
  45. d := transmute(mem.Raw_String)str
  46. return d.data
  47. }
  48. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  49. n := index_byte(str, b)
  50. if n < 0 {
  51. n = len(str)
  52. }
  53. return str[:n]
  54. }
  55. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  56. n := index_rune(str, r)
  57. if n < 0 {
  58. n = len(str)
  59. }
  60. return str[:n]
  61. }
  62. // Compares two strings, returning a value representing which one comes first lexiographically.
  63. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  64. compare :: proc(lhs, rhs: []byte) -> int {
  65. return mem.compare(lhs, rhs)
  66. }
  67. contains_rune :: proc(s: []byte, r: rune) -> int {
  68. for c, offset in string(s) {
  69. if c == r {
  70. return offset
  71. }
  72. }
  73. return -1
  74. }
  75. contains :: proc(s, substr: []byte) -> bool {
  76. return index(s, substr) >= 0
  77. }
  78. contains_any :: proc(s, chars: []byte) -> bool {
  79. return index_any(s, chars) >= 0
  80. }
  81. rune_count :: proc(s: []byte) -> int {
  82. return utf8.rune_count(s)
  83. }
  84. equal :: proc(a, b: []byte) -> bool {
  85. return string(a) == string(b)
  86. }
  87. equal_fold :: proc(u, v: []byte) -> bool {
  88. s, t := string(u), string(v)
  89. loop: for s != "" && t != "" {
  90. sr, tr: rune
  91. if s[0] < utf8.RUNE_SELF {
  92. sr, s = rune(s[0]), s[1:]
  93. } else {
  94. r, size := utf8.decode_rune_in_string(s)
  95. sr, s = r, s[size:]
  96. }
  97. if t[0] < utf8.RUNE_SELF {
  98. tr, t = rune(t[0]), t[1:]
  99. } else {
  100. r, size := utf8.decode_rune_in_string(t)
  101. tr, t = r, t[size:]
  102. }
  103. if tr == sr { // easy case
  104. continue loop
  105. }
  106. if tr < sr {
  107. tr, sr = sr, tr
  108. }
  109. if tr < utf8.RUNE_SELF {
  110. switch sr {
  111. case 'A'..='Z':
  112. if tr == (sr+'a')-'A' {
  113. continue loop
  114. }
  115. }
  116. return false
  117. }
  118. // TODO(bill): Unicode folding
  119. return false
  120. }
  121. return s == t
  122. }
  123. has_prefix :: proc(s, prefix: []byte) -> bool {
  124. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  125. }
  126. has_suffix :: proc(s, suffix: []byte) -> bool {
  127. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  128. }
  129. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  130. if len(a) == 0 {
  131. return nil
  132. }
  133. n := len(sep) * (len(a) - 1)
  134. for s in a {
  135. n += len(s)
  136. }
  137. b := make([]byte, n, allocator)
  138. i := copy(b, a[0])
  139. for s in a[1:] {
  140. i += copy(b[i:], sep)
  141. i += copy(b[i:], s)
  142. }
  143. return b
  144. }
  145. join_safe :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  146. if len(a) == 0 {
  147. return nil, nil
  148. }
  149. n := len(sep) * (len(a) - 1)
  150. for s in a {
  151. n += len(s)
  152. }
  153. b := make([]byte, n, allocator) or_return
  154. i := copy(b, a[0])
  155. for s in a[1:] {
  156. i += copy(b[i:], sep)
  157. i += copy(b[i:], s)
  158. }
  159. return b, nil
  160. }
  161. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  162. if len(a) == 0 {
  163. return nil
  164. }
  165. n := 0
  166. for s in a {
  167. n += len(s)
  168. }
  169. b := make([]byte, n, allocator)
  170. i := 0
  171. for s in a {
  172. i += copy(b[i:], s)
  173. }
  174. return b
  175. }
  176. concatenate_safe :: proc(a: [][]byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  177. if len(a) == 0 {
  178. return nil, nil
  179. }
  180. n := 0
  181. for s in a {
  182. n += len(s)
  183. }
  184. b := make([]byte, n, allocator) or_return
  185. i := 0
  186. for s in a {
  187. i += copy(b[i:], s)
  188. }
  189. return b, nil
  190. }
  191. @private
  192. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  193. s, n := s, n
  194. if n == 0 {
  195. return nil
  196. }
  197. if sep == nil {
  198. l := utf8.rune_count(s)
  199. if n < 0 || n > l {
  200. n = l
  201. }
  202. res := make([dynamic][]byte, n, allocator)
  203. for i := 0; i < n-1; i += 1 {
  204. _, w := utf8.decode_rune(s)
  205. res[i] = s[:w]
  206. s = s[w:]
  207. }
  208. if n > 0 {
  209. res[n-1] = s
  210. }
  211. return res[:]
  212. }
  213. if n < 0 {
  214. n = count(s, sep) + 1
  215. }
  216. res := make([dynamic][]byte, n, allocator)
  217. n -= 1
  218. i := 0
  219. for ; i < n; i += 1 {
  220. m := index(s, sep)
  221. if m < 0 {
  222. break
  223. }
  224. res[i] = s[:m+sep_save]
  225. s = s[m+len(sep):]
  226. }
  227. res[i] = s
  228. return res[:i+1]
  229. }
  230. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  231. return _split(s, sep, 0, -1, allocator)
  232. }
  233. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  234. return _split(s, sep, 0, n, allocator)
  235. }
  236. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  237. return _split(s, sep, len(sep), -1, allocator)
  238. }
  239. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  240. return _split(s, sep, len(sep), n, allocator)
  241. }
  242. @private
  243. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save: int) -> (res: []byte, ok: bool) {
  244. if len(sep) == 0 {
  245. res = s[:]
  246. ok = true
  247. s^ = s[len(s):]
  248. return
  249. }
  250. m := index(s^, sep)
  251. if m < 0 {
  252. // not found
  253. res = s[:]
  254. ok = len(res) != 0
  255. s^ = s[len(s):]
  256. } else {
  257. res = s[:m+sep_save]
  258. ok = true
  259. s^ = s[m+len(sep):]
  260. }
  261. return
  262. }
  263. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  264. return _split_iterator(s, sep, 0)
  265. }
  266. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  267. return _split_iterator(s, sep, len(sep))
  268. }
  269. /*
  270. Scan a slice of bytes for a specific byte.
  271. This procedure safely handles slices of any length, including empty slices.
  272. Inputs:
  273. - data: A slice of bytes.
  274. - c: The byte to search for.
  275. Returns:
  276. - index: The index of the byte `c`, or -1 if it was not found.
  277. */
  278. index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds_check {
  279. i, l := 0, len(s)
  280. // Guard against small strings. On modern systems, it is ALWAYS
  281. // worth vectorizing assuming there is a hardware vector unit, and
  282. // the data size is large enough.
  283. if l < SIMD_REG_SIZE_128 {
  284. for /**/; i < l; i += 1 {
  285. if s[i] == c {
  286. return i
  287. }
  288. }
  289. return -1
  290. }
  291. c_vec: simd.u8x16 = c
  292. when !simd.IS_EMULATED {
  293. // Note: While this is something that could also logically take
  294. // advantage of AVX512, the various downclocking and power
  295. // consumption related woes make premature to have a dedicated
  296. // code path.
  297. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  298. c_vec_256: simd.u8x32 = c
  299. s_vecs: [4]simd.u8x32 = ---
  300. c_vecs: [4]simd.u8x32 = ---
  301. m_vec: [4]u8 = ---
  302. // Scan 128-byte chunks, using 256-bit SIMD.
  303. for nr_blocks := l / (4 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
  304. #unroll for j in 0..<4 {
  305. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  306. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  307. m_vec[j] = simd.reduce_or(c_vecs[j])
  308. }
  309. if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
  310. #unroll for j in 0..<4 {
  311. if m_vec[j] > 0 {
  312. sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
  313. off := simd.reduce_min(sel)
  314. return i + j * SIMD_REG_SIZE_256 + int(off)
  315. }
  316. }
  317. }
  318. i += 4 * SIMD_REG_SIZE_256
  319. }
  320. // Scan 64-byte chunks, using 256-bit SIMD.
  321. for nr_blocks := (l - i) / (2 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
  322. #unroll for j in 0..<2 {
  323. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  324. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  325. m_vec[j] = simd.reduce_or(c_vecs[j])
  326. }
  327. if m_vec[0] | m_vec[1] > 0 {
  328. #unroll for j in 0..<2 {
  329. if m_vec[j] > 0 {
  330. sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
  331. off := simd.reduce_min(sel)
  332. return i + j * SIMD_REG_SIZE_256 + int(off)
  333. }
  334. }
  335. }
  336. i += 2 * SIMD_REG_SIZE_256
  337. }
  338. } else {
  339. s_vecs: [4]simd.u8x16 = ---
  340. c_vecs: [4]simd.u8x16 = ---
  341. m_vecs: [4]u8 = ---
  342. // Scan 64-byte chunks, using 128-bit SIMD.
  343. for nr_blocks := l / (4 * SIMD_REG_SIZE_128); nr_blocks > 0; nr_blocks -= 1 {
  344. #unroll for j in 0..<4 {
  345. s_vecs[j]= intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
  346. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
  347. m_vecs[j] = simd.reduce_or(c_vecs[j])
  348. }
  349. if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
  350. #unroll for j in 0..<4 {
  351. if m_vecs[j] > 0 {
  352. sel := simd.select(c_vecs[j], SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
  353. off := simd.reduce_min(sel)
  354. return i + j * SIMD_REG_SIZE_128 + int(off)
  355. }
  356. }
  357. }
  358. i += 4 * SIMD_REG_SIZE_128
  359. }
  360. }
  361. }
  362. // Scan the remaining SIMD register sized chunks.
  363. //
  364. // Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
  365. // on potato targets. Scanning more at a time when LLVM is emulating SIMD
  366. // likely does not buy much, as all that does is increase GP register
  367. // pressure.
  368. for nr_blocks := (l - i) / SIMD_REG_SIZE_128; nr_blocks > 0; nr_blocks -= 1 {
  369. s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
  370. c0 := simd.lanes_eq(s0, c_vec)
  371. if simd.reduce_or(c0) > 0 {
  372. sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
  373. off := simd.reduce_min(sel)
  374. return i + int(off)
  375. }
  376. i += SIMD_REG_SIZE_128
  377. }
  378. // Scan serially for the remainder.
  379. for /**/; i < l; i += 1 {
  380. if s[i] == c {
  381. return i
  382. }
  383. }
  384. return -1
  385. }
  386. /*
  387. Scan a slice of bytes for a specific byte, starting from the end and working
  388. backwards to the start.
  389. This procedure safely handles slices of any length, including empty slices.
  390. Inputs:
  391. - data: A slice of bytes.
  392. - c: The byte to search for.
  393. Returns:
  394. - index: The index of the byte `c`, or -1 if it was not found.
  395. */
  396. last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_check {
  397. i := len(s)
  398. // Guard against small strings. On modern systems, it is ALWAYS
  399. // worth vectorizing assuming there is a hardware vector unit, and
  400. // the data size is large enough.
  401. if i < SIMD_REG_SIZE_128 {
  402. #reverse for ch, j in s {
  403. if ch == c {
  404. return j
  405. }
  406. }
  407. return -1
  408. }
  409. c_vec: simd.u8x16 = c
  410. when !simd.IS_EMULATED {
  411. // Note: While this is something that could also logically take
  412. // advantage of AVX512, the various downclocking and power
  413. // consumption related woes make premature to have a dedicated
  414. // code path.
  415. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  416. c_vec_256: simd.u8x32 = c
  417. s_vecs: [4]simd.u8x32 = ---
  418. c_vecs: [4]simd.u8x32 = ---
  419. m_vec: [4]u8 = ---
  420. // Scan 128-byte chunks, using 256-bit SIMD.
  421. for i >= 4 * SIMD_REG_SIZE_256 {
  422. i -= 4 * SIMD_REG_SIZE_256
  423. #unroll for j in 0..<4 {
  424. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  425. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  426. m_vec[j] = simd.reduce_or(c_vecs[j])
  427. }
  428. if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
  429. #unroll for j in 0..<4 {
  430. if m_vec[3-j] > 0 {
  431. sel := simd.select(c_vecs[3-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
  432. off := simd.reduce_max(sel)
  433. return i + (3-j) * SIMD_REG_SIZE_256 + int(off)
  434. }
  435. }
  436. }
  437. }
  438. // Scan 64-byte chunks, using 256-bit SIMD.
  439. for i >= 2 * SIMD_REG_SIZE_256 {
  440. i -= 2 * SIMD_REG_SIZE_256
  441. #unroll for j in 0..<2 {
  442. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  443. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  444. m_vec[j] = simd.reduce_or(c_vecs[j])
  445. }
  446. if m_vec[0] | m_vec[1] > 0 {
  447. #unroll for j in 0..<2 {
  448. if m_vec[1-j] > 0 {
  449. sel := simd.select(c_vecs[1-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
  450. off := simd.reduce_max(sel)
  451. return i + (1-j) * SIMD_REG_SIZE_256 + int(off)
  452. }
  453. }
  454. }
  455. }
  456. } else {
  457. s_vecs: [4]simd.u8x16 = ---
  458. c_vecs: [4]simd.u8x16 = ---
  459. m_vecs: [4]u8 = ---
  460. // Scan 64-byte chunks, using 128-bit SIMD.
  461. for i >= 4 * SIMD_REG_SIZE_128 {
  462. i -= 4 * SIMD_REG_SIZE_128
  463. #unroll for j in 0..<4 {
  464. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
  465. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
  466. m_vecs[j] = simd.reduce_or(c_vecs[j])
  467. }
  468. if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
  469. #unroll for j in 0..<4 {
  470. if m_vecs[3-j] > 0 {
  471. sel := simd.select(c_vecs[3-j], SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
  472. off := simd.reduce_max(sel)
  473. return i + (3-j) * SIMD_REG_SIZE_128 + int(off)
  474. }
  475. }
  476. }
  477. }
  478. }
  479. }
  480. // Scan the remaining SIMD register sized chunks.
  481. //
  482. // Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
  483. // on potato targets. Scanning more at a time when LLVM is emulating SIMD
  484. // likely does not buy much, as all that does is increase GP register
  485. // pressure.
  486. for i >= SIMD_REG_SIZE_128 {
  487. i -= SIMD_REG_SIZE_128
  488. s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
  489. c0 := simd.lanes_eq(s0, c_vec)
  490. if simd.reduce_or(c0) > 0 {
  491. sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
  492. off := simd.reduce_max(sel)
  493. return i + int(off)
  494. }
  495. }
  496. // Scan serially for the remainder.
  497. for i > 0 {
  498. i -= 1
  499. if s[i] == c {
  500. return i
  501. }
  502. }
  503. return -1
  504. }
  505. @private PRIME_RABIN_KARP :: 16777619
  506. index :: proc(s, substr: []byte) -> int {
  507. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  508. for i := 0; i < len(s); i += 1 {
  509. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  510. }
  511. sq := u32(PRIME_RABIN_KARP)
  512. for i := len(s); i > 0; i >>= 1 {
  513. if (i & 1) != 0 {
  514. pow *= sq
  515. }
  516. sq *= sq
  517. }
  518. return
  519. }
  520. n := len(substr)
  521. switch {
  522. case n == 0:
  523. return 0
  524. case n == 1:
  525. return index_byte(s, substr[0])
  526. case n == len(s):
  527. if string(s) == string(substr) {
  528. return 0
  529. }
  530. return -1
  531. case n > len(s):
  532. return -1
  533. }
  534. hash, pow := hash_str_rabin_karp(substr)
  535. h: u32
  536. for i := 0; i < n; i += 1 {
  537. h = h*PRIME_RABIN_KARP + u32(s[i])
  538. }
  539. if h == hash && string(s[:n]) == string(substr) {
  540. return 0
  541. }
  542. for i := n; i < len(s); /**/ {
  543. h *= PRIME_RABIN_KARP
  544. h += u32(s[i])
  545. h -= pow * u32(s[i-n])
  546. i += 1
  547. if h == hash && string(s[i-n:i]) == string(substr) {
  548. return i - n
  549. }
  550. }
  551. return -1
  552. }
  553. last_index :: proc(s, substr: []byte) -> int {
  554. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  555. for i := len(s) - 1; i >= 0; i -= 1 {
  556. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  557. }
  558. sq := u32(PRIME_RABIN_KARP)
  559. for i := len(s); i > 0; i >>= 1 {
  560. if (i & 1) != 0 {
  561. pow *= sq
  562. }
  563. sq *= sq
  564. }
  565. return
  566. }
  567. n := len(substr)
  568. switch {
  569. case n == 0:
  570. return len(s)
  571. case n == 1:
  572. return last_index_byte(s, substr[0])
  573. case n == len(s):
  574. return 0 if string(substr) == string(s) else -1
  575. case n > len(s):
  576. return -1
  577. }
  578. hash, pow := hash_str_rabin_karp_reverse(substr)
  579. last := len(s) - n
  580. h: u32
  581. for i := len(s)-1; i >= last; i -= 1 {
  582. h = h*PRIME_RABIN_KARP + u32(s[i])
  583. }
  584. if h == hash && string(s[last:]) == string(substr) {
  585. return last
  586. }
  587. for i := last-1; i >= 0; i -= 1 {
  588. h *= PRIME_RABIN_KARP
  589. h += u32(s[i])
  590. h -= pow * u32(s[i+n])
  591. if h == hash && string(s[i:i+n]) == string(substr) {
  592. return i
  593. }
  594. }
  595. return -1
  596. }
  597. index_any :: proc(s, chars: []byte) -> int {
  598. if chars == nil {
  599. return -1
  600. }
  601. // TODO(bill): Optimize
  602. for r, i in s {
  603. for c in chars {
  604. if r == c {
  605. return i
  606. }
  607. }
  608. }
  609. return -1
  610. }
  611. last_index_any :: proc(s, chars: []byte) -> int {
  612. if chars == nil {
  613. return -1
  614. }
  615. for i := len(s); i > 0; {
  616. r, w := utf8.decode_last_rune(s[:i])
  617. i -= w
  618. for c in string(chars) {
  619. if r == c {
  620. return i
  621. }
  622. }
  623. }
  624. return -1
  625. }
  626. count :: proc(s, substr: []byte) -> int {
  627. if len(substr) == 0 { // special case
  628. return rune_count(s) + 1
  629. }
  630. if len(substr) == 1 {
  631. c := substr[0]
  632. switch len(s) {
  633. case 0:
  634. return 0
  635. case 1:
  636. return int(s[0] == c)
  637. }
  638. n := 0
  639. for i := 0; i < len(s); i += 1 {
  640. if s[i] == c {
  641. n += 1
  642. }
  643. }
  644. return n
  645. }
  646. // TODO(bill): Use a non-brute for approach
  647. n := 0
  648. str := s
  649. for {
  650. i := index(str, substr)
  651. if i == -1 {
  652. return n
  653. }
  654. n += 1
  655. str = str[i+len(substr):]
  656. }
  657. return n
  658. }
  659. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  660. if count < 0 {
  661. panic("bytes: negative repeat count")
  662. } else if count > 0 && (len(s)*count)/count != len(s) {
  663. panic("bytes: repeat count will cause an overflow")
  664. }
  665. b := make([]byte, len(s)*count, allocator)
  666. i := copy(b, s)
  667. for i < len(b) { // 2^N trick to reduce the need to copy
  668. copy(b[i:], b[:i])
  669. i *= 2
  670. }
  671. return b
  672. }
  673. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  674. return replace(s, old, new, -1, allocator)
  675. }
  676. // if n < 0, no limit on the number of replacements
  677. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  678. if string(old) == string(new) || n == 0 {
  679. was_allocation = false
  680. output = s
  681. return
  682. }
  683. byte_count := n
  684. if m := count(s, old); m == 0 {
  685. was_allocation = false
  686. output = s
  687. return
  688. } else if n < 0 || m < n {
  689. byte_count = m
  690. }
  691. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  692. was_allocation = true
  693. w := 0
  694. start := 0
  695. for i := 0; i < byte_count; i += 1 {
  696. j := start
  697. if len(old) == 0 {
  698. if i > 0 {
  699. _, width := utf8.decode_rune(s[start:])
  700. j += width
  701. }
  702. } else {
  703. j += index(s[start:], old)
  704. }
  705. w += copy(t[w:], s[start:j])
  706. w += copy(t[w:], new)
  707. start = j + len(old)
  708. }
  709. w += copy(t[w:], s[start:])
  710. output = t[0:w]
  711. return
  712. }
  713. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  714. return replace(s, key, {}, n, allocator)
  715. }
  716. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  717. return remove(s, key, -1, allocator)
  718. }
  719. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  720. is_ascii_space :: proc(r: rune) -> bool {
  721. if r < utf8.RUNE_SELF {
  722. return _ascii_space[u8(r)] != 0
  723. }
  724. return false
  725. }
  726. is_space :: proc(r: rune) -> bool {
  727. if r < 0x2000 {
  728. switch r {
  729. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  730. return true
  731. }
  732. } else {
  733. if r <= 0x200a {
  734. return true
  735. }
  736. switch r {
  737. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  738. return true
  739. }
  740. }
  741. return false
  742. }
  743. is_null :: proc(r: rune) -> bool {
  744. return r == 0x0000
  745. }
  746. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  747. for r, i in string(s) {
  748. if p(r) == truth {
  749. return i
  750. }
  751. }
  752. return -1
  753. }
  754. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  755. for r, i in string(s) {
  756. if p(state, r) == truth {
  757. return i
  758. }
  759. }
  760. return -1
  761. }
  762. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  763. // TODO(bill): Probably use Rabin-Karp Search
  764. for i := len(s); i > 0; {
  765. r, size := utf8.decode_last_rune(s[:i])
  766. i -= size
  767. if p(r) == truth {
  768. return i
  769. }
  770. }
  771. return -1
  772. }
  773. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  774. // TODO(bill): Probably use Rabin-Karp Search
  775. for i := len(s); i > 0; {
  776. r, size := utf8.decode_last_rune(s[:i])
  777. i -= size
  778. if p(state, r) == truth {
  779. return i
  780. }
  781. }
  782. return -1
  783. }
  784. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  785. i := index_proc(s, p, false)
  786. if i == -1 {
  787. return nil
  788. }
  789. return s[i:]
  790. }
  791. index_rune :: proc(s: []byte, r: rune) -> int {
  792. switch {
  793. case u32(r) < utf8.RUNE_SELF:
  794. return index_byte(s, byte(r))
  795. case r == utf8.RUNE_ERROR:
  796. for c, i in string(s) {
  797. if c == utf8.RUNE_ERROR {
  798. return i
  799. }
  800. }
  801. return -1
  802. case !utf8.valid_rune(r):
  803. return -1
  804. }
  805. b, w := utf8.encode_rune(r)
  806. return index(s, b[:w])
  807. }
  808. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  809. i := index_proc_with_state(s, p, state, false)
  810. if i == -1 {
  811. return nil
  812. }
  813. return s[i:]
  814. }
  815. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  816. i := last_index_proc(s, p, false)
  817. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  818. _, w := utf8.decode_rune(s[i:])
  819. i += w
  820. } else {
  821. i += 1
  822. }
  823. return s[0:i]
  824. }
  825. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  826. i := last_index_proc_with_state(s, p, state, false)
  827. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  828. _, w := utf8.decode_rune(s[i:])
  829. i += w
  830. } else {
  831. i += 1
  832. }
  833. return s[0:i]
  834. }
  835. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  836. if state == nil {
  837. return false
  838. }
  839. cutset := (^string)(state)^
  840. for c in cutset {
  841. if r == c {
  842. return true
  843. }
  844. }
  845. return false
  846. }
  847. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  848. if s == nil || cutset == nil {
  849. return s
  850. }
  851. state := cutset
  852. return trim_left_proc_with_state(s, is_in_cutset, &state)
  853. }
  854. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  855. if s == nil || cutset == nil {
  856. return s
  857. }
  858. state := cutset
  859. return trim_right_proc_with_state(s, is_in_cutset, &state)
  860. }
  861. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  862. return trim_right(trim_left(s, cutset), cutset)
  863. }
  864. trim_left_space :: proc(s: []byte) -> []byte {
  865. return trim_left_proc(s, is_space)
  866. }
  867. trim_right_space :: proc(s: []byte) -> []byte {
  868. return trim_right_proc(s, is_space)
  869. }
  870. trim_space :: proc(s: []byte) -> []byte {
  871. return trim_right_space(trim_left_space(s))
  872. }
  873. trim_left_null :: proc(s: []byte) -> []byte {
  874. return trim_left_proc(s, is_null)
  875. }
  876. trim_right_null :: proc(s: []byte) -> []byte {
  877. return trim_right_proc(s, is_null)
  878. }
  879. trim_null :: proc(s: []byte) -> []byte {
  880. return trim_right_null(trim_left_null(s))
  881. }
  882. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  883. if has_prefix(s, prefix) {
  884. return s[len(prefix):]
  885. }
  886. return s
  887. }
  888. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  889. if has_suffix(s, suffix) {
  890. return s[:len(s)-len(suffix)]
  891. }
  892. return s
  893. }
  894. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  895. if s == nil || len(substrs) <= 0 {
  896. return nil
  897. }
  898. sublen := len(substrs[0])
  899. for substr in substrs[1:] {
  900. sublen = min(sublen, len(substr))
  901. }
  902. shared := len(s) - sublen
  903. if shared <= 0 {
  904. return nil
  905. }
  906. // number, index, last
  907. n, i, l := 0, 0, 0
  908. // count results
  909. first_pass: for i <= shared {
  910. for substr in substrs {
  911. if string(s[i:i+sublen]) == string(substr) {
  912. if !skip_empty || i - l > 0 {
  913. n += 1
  914. }
  915. i += sublen
  916. l = i
  917. continue first_pass
  918. }
  919. }
  920. _, skip := utf8.decode_rune(s[i:])
  921. i += skip
  922. }
  923. if !skip_empty || len(s) - l > 0 {
  924. n += 1
  925. }
  926. if n < 1 {
  927. // no results
  928. return nil
  929. }
  930. buf := make([][]byte, n, allocator)
  931. n, i, l = 0, 0, 0
  932. // slice results
  933. second_pass: for i <= shared {
  934. for substr in substrs {
  935. if string(s[i:i+sublen]) == string(substr) {
  936. if !skip_empty || i - l > 0 {
  937. buf[n] = s[l:i]
  938. n += 1
  939. }
  940. i += sublen
  941. l = i
  942. continue second_pass
  943. }
  944. }
  945. _, skip := utf8.decode_rune(s[i:])
  946. i += skip
  947. }
  948. if !skip_empty || len(s) - l > 0 {
  949. buf[n] = s[l:]
  950. }
  951. return buf
  952. }
  953. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  954. if s == nil || s^ == nil || len(substrs) <= 0 {
  955. return nil, false
  956. }
  957. sublen := len(substrs[0])
  958. for substr in substrs[1:] {
  959. sublen = min(sublen, len(substr))
  960. }
  961. shared := len(s) - sublen
  962. if shared <= 0 {
  963. return nil, false
  964. }
  965. // index, last
  966. i, l := 0, 0
  967. loop: for i <= shared {
  968. for substr in substrs {
  969. if string(s[i:i+sublen]) == string(substr) {
  970. if !skip_empty || i - l > 0 {
  971. res := s[l:i]
  972. s^ = s[i:]
  973. return res, true
  974. }
  975. i += sublen
  976. l = i
  977. continue loop
  978. }
  979. }
  980. _, skip := utf8.decode_rune(s[i:])
  981. i += skip
  982. }
  983. if !skip_empty || len(s) - l > 0 {
  984. res := s[l:]
  985. s^ = s[len(s):]
  986. return res, true
  987. }
  988. return nil, false
  989. }
  990. // Scrubs invalid utf-8 characters and replaces them with the replacement string
  991. // Adjacent invalid bytes are only replaced once
  992. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  993. str := s
  994. b: Buffer
  995. buffer_init_allocator(&b, 0, len(s), allocator)
  996. has_error := false
  997. cursor := 0
  998. origin := str
  999. for len(str) > 0 {
  1000. r, w := utf8.decode_rune(str)
  1001. if r == utf8.RUNE_ERROR {
  1002. if !has_error {
  1003. has_error = true
  1004. buffer_write(&b, origin[:cursor])
  1005. }
  1006. } else if has_error {
  1007. has_error = false
  1008. buffer_write(&b, replacement)
  1009. origin = origin[cursor:]
  1010. cursor = 0
  1011. }
  1012. cursor += w
  1013. str = str[w:]
  1014. }
  1015. return buffer_to_bytes(&b)
  1016. }
  1017. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  1018. str := s
  1019. n := len(str)
  1020. buf := make([]byte, n)
  1021. i := n
  1022. for len(str) > 0 {
  1023. _, w := utf8.decode_rune(str)
  1024. i -= w
  1025. copy(buf[i:], str[:w])
  1026. str = str[w:]
  1027. }
  1028. return buf
  1029. }
  1030. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  1031. if tab_size <= 0 {
  1032. panic("tab size must be positive")
  1033. }
  1034. if s == nil {
  1035. return nil
  1036. }
  1037. b: Buffer
  1038. buffer_init_allocator(&b, 0, len(s), allocator)
  1039. str := s
  1040. column: int
  1041. for len(str) > 0 {
  1042. r, w := utf8.decode_rune(str)
  1043. if r == '\t' {
  1044. expand := tab_size - column%tab_size
  1045. for i := 0; i < expand; i += 1 {
  1046. buffer_write_byte(&b, ' ')
  1047. }
  1048. column += expand
  1049. } else {
  1050. if r == '\n' {
  1051. column = 0
  1052. } else {
  1053. column += w
  1054. }
  1055. buffer_write_rune(&b, r)
  1056. }
  1057. str = str[w:]
  1058. }
  1059. return buffer_to_bytes(&b)
  1060. }
  1061. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  1062. i := index(str, sep)
  1063. if i == -1 {
  1064. head = str
  1065. return
  1066. }
  1067. head = str[:i]
  1068. match = str[i:i+len(sep)]
  1069. tail = str[i+len(sep):]
  1070. return
  1071. }
  1072. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  1073. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  1074. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1075. n := rune_count(str)
  1076. if n >= length || pad == nil {
  1077. return clone(str, allocator)
  1078. }
  1079. remains := length-1
  1080. pad_len := rune_count(pad)
  1081. b: Buffer
  1082. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1083. write_pad_string(&b, pad, pad_len, remains/2)
  1084. buffer_write(&b, str)
  1085. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  1086. return buffer_to_bytes(&b)
  1087. }
  1088. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  1089. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1090. n := rune_count(str)
  1091. if n >= length || pad == nil {
  1092. return clone(str, allocator)
  1093. }
  1094. remains := length-1
  1095. pad_len := rune_count(pad)
  1096. b: Buffer
  1097. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1098. buffer_write(&b, str)
  1099. write_pad_string(&b, pad, pad_len, remains)
  1100. return buffer_to_bytes(&b)
  1101. }
  1102. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  1103. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1104. n := rune_count(str)
  1105. if n >= length || pad == nil {
  1106. return clone(str, allocator)
  1107. }
  1108. remains := length-1
  1109. pad_len := rune_count(pad)
  1110. b: Buffer
  1111. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1112. write_pad_string(&b, pad, pad_len, remains)
  1113. buffer_write(&b, str)
  1114. return buffer_to_bytes(&b)
  1115. }
  1116. @private
  1117. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  1118. repeats := remains / pad_len
  1119. for i := 0; i < repeats; i += 1 {
  1120. buffer_write(b, pad)
  1121. }
  1122. n := remains % pad_len
  1123. p := pad
  1124. for i := 0; i < n; i += 1 {
  1125. r, width := utf8.decode_rune(p)
  1126. buffer_write_rune(b, r)
  1127. p = p[width:]
  1128. }
  1129. }
  1130. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  1131. // returning a slice of subslices of s or an empty slice if s only contains white space
  1132. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  1133. n := 0
  1134. was_space := 1
  1135. set_bits := u8(0)
  1136. // check to see
  1137. for i in 0..<len(s) {
  1138. r := s[i]
  1139. set_bits |= r
  1140. is_space := int(_ascii_space[r])
  1141. n += was_space & ~is_space
  1142. was_space = is_space
  1143. }
  1144. if set_bits >= utf8.RUNE_SELF {
  1145. return fields_proc(s, unicode.is_space, allocator)
  1146. }
  1147. if n == 0 {
  1148. return nil
  1149. }
  1150. a := make([][]byte, n, allocator)
  1151. na := 0
  1152. field_start := 0
  1153. i := 0
  1154. for i < len(s) && _ascii_space[s[i]] != 0 {
  1155. i += 1
  1156. }
  1157. field_start = i
  1158. for i < len(s) {
  1159. if _ascii_space[s[i]] == 0 {
  1160. i += 1
  1161. continue
  1162. }
  1163. a[na] = s[field_start : i]
  1164. na += 1
  1165. i += 1
  1166. for i < len(s) && _ascii_space[s[i]] != 0 {
  1167. i += 1
  1168. }
  1169. field_start = i
  1170. }
  1171. if field_start < len(s) {
  1172. a[na] = s[field_start:]
  1173. }
  1174. return a
  1175. }
  1176. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  1177. // returns a slice of subslices of s
  1178. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  1179. //
  1180. // fields_proc makes no guarantee about the order in which it calls f(ch)
  1181. // it assumes that `f` always returns the same value for a given ch
  1182. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  1183. subslices := make([dynamic][]byte, 0, 32, allocator)
  1184. start, end := -1, -1
  1185. for r, offset in string(s) {
  1186. end = offset
  1187. if f(r) {
  1188. if start >= 0 {
  1189. append(&subslices, s[start : end])
  1190. // -1 could be used, but just speed it up through bitwise not
  1191. // gotta love 2's complement
  1192. start = ~start
  1193. }
  1194. } else {
  1195. if start < 0 {
  1196. start = end
  1197. }
  1198. }
  1199. }
  1200. if start >= 0 {
  1201. append(&subslices, s[start : len(s)])
  1202. }
  1203. return subslices[:]
  1204. }
  1205. // alias returns true iff a and b have a non-zero length, and any part of
  1206. // a overlaps with b.
  1207. alias :: proc "contextless" (a, b: []byte) -> bool {
  1208. a_len, b_len := len(a), len(b)
  1209. if a_len == 0 || b_len == 0 {
  1210. return false
  1211. }
  1212. a_start, b_start := uintptr(raw_data(a)), uintptr(raw_data(b))
  1213. a_end, b_end := a_start + uintptr(a_len-1), b_start + uintptr(b_len-1)
  1214. return a_start <= b_end && b_start <= a_end
  1215. }
  1216. // alias_inexactly returns true iff a and b have a non-zero length,
  1217. // the base pointer of a and b are NOT equal, and any part of a overlaps
  1218. // with b (ie: `alias(a, b)` with an exception that returns false for
  1219. // `a == b`, `b = a[:len(a)-69]` and similar conditions).
  1220. alias_inexactly :: proc "contextless" (a, b: []byte) -> bool {
  1221. if raw_data(a) == raw_data(b) {
  1222. return false
  1223. }
  1224. return alias(a, b)
  1225. }