bytes.odin 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478
  1. // Procedures for manipulation of `[]byte` slices.
  2. package bytes
  3. import "base:intrinsics"
  4. import "core:mem"
  5. import "core:simd"
  6. import "core:unicode"
  7. import "core:unicode/utf8"
  8. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  9. @(private)
  10. SCANNER_INDICES_256 : simd.u8x32 : {
  11. 0, 1, 2, 3, 4, 5, 6, 7,
  12. 8, 9, 10, 11, 12, 13, 14, 15,
  13. 16, 17, 18, 19, 20, 21, 22, 23,
  14. 24, 25, 26, 27, 28, 29, 30, 31,
  15. }
  16. @(private)
  17. SCANNER_SENTINEL_MAX_256: simd.u8x32 : u8(0x00)
  18. @(private)
  19. SCANNER_SENTINEL_MIN_256: simd.u8x32 : u8(0xff)
  20. @(private)
  21. SIMD_REG_SIZE_256 :: 32
  22. }
  23. @(private)
  24. SCANNER_INDICES_128 : simd.u8x16 : {
  25. 0, 1, 2, 3, 4, 5, 6, 7,
  26. 8, 9, 10, 11, 12, 13, 14, 15,
  27. }
  28. @(private)
  29. SCANNER_SENTINEL_MAX_128: simd.u8x16 : u8(0x00)
  30. @(private)
  31. SCANNER_SENTINEL_MIN_128: simd.u8x16 : u8(0xff)
  32. @(private)
  33. SIMD_REG_SIZE_128 :: 16
  34. clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
  35. c := make([]byte, len(s), allocator, loc)
  36. copy(c, s)
  37. return c[:len(s)]
  38. }
  39. clone_safe :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: mem.Allocator_Error) {
  40. c := make([]byte, len(s), allocator, loc) or_return
  41. copy(c, s)
  42. return c[:len(s)], nil
  43. }
  44. ptr_from_slice :: ptr_from_bytes
  45. ptr_from_bytes :: proc(str: []byte) -> ^byte {
  46. d := transmute(mem.Raw_String)str
  47. return d.data
  48. }
  49. truncate_to_byte :: proc(str: []byte, b: byte) -> []byte {
  50. n := index_byte(str, b)
  51. if n < 0 {
  52. n = len(str)
  53. }
  54. return str[:n]
  55. }
  56. truncate_to_rune :: proc(str: []byte, r: rune) -> []byte {
  57. n := index_rune(str, r)
  58. if n < 0 {
  59. n = len(str)
  60. }
  61. return str[:n]
  62. }
  63. // Compares two strings, returning a value representing which one comes first lexiographically.
  64. // -1 for `a`; 1 for `b`, or 0 if they are equal.
  65. compare :: proc(lhs, rhs: []byte) -> int {
  66. return mem.compare(lhs, rhs)
  67. }
  68. contains_rune :: proc(s: []byte, r: rune) -> int {
  69. for c, offset in string(s) {
  70. if c == r {
  71. return offset
  72. }
  73. }
  74. return -1
  75. }
  76. contains :: proc(s, substr: []byte) -> bool {
  77. return index(s, substr) >= 0
  78. }
  79. contains_any :: proc(s, chars: []byte) -> bool {
  80. return index_any(s, chars) >= 0
  81. }
  82. rune_count :: proc(s: []byte) -> int {
  83. return utf8.rune_count(s)
  84. }
  85. equal :: proc(a, b: []byte) -> bool {
  86. return string(a) == string(b)
  87. }
  88. equal_fold :: proc(u, v: []byte) -> bool {
  89. s, t := string(u), string(v)
  90. loop: for s != "" && t != "" {
  91. sr, tr: rune
  92. if s[0] < utf8.RUNE_SELF {
  93. sr, s = rune(s[0]), s[1:]
  94. } else {
  95. r, size := utf8.decode_rune_in_string(s)
  96. sr, s = r, s[size:]
  97. }
  98. if t[0] < utf8.RUNE_SELF {
  99. tr, t = rune(t[0]), t[1:]
  100. } else {
  101. r, size := utf8.decode_rune_in_string(t)
  102. tr, t = r, t[size:]
  103. }
  104. if tr == sr { // easy case
  105. continue loop
  106. }
  107. if tr < sr {
  108. tr, sr = sr, tr
  109. }
  110. if tr < utf8.RUNE_SELF {
  111. switch sr {
  112. case 'A'..='Z':
  113. if tr == (sr+'a')-'A' {
  114. continue loop
  115. }
  116. }
  117. return false
  118. }
  119. r := unicode.simple_fold(sr)
  120. for r != sr && r < tr {
  121. r = unicode.simple_fold(sr)
  122. }
  123. if r == tr {
  124. continue loop
  125. }
  126. return false
  127. }
  128. return s == t
  129. }
  130. has_prefix :: proc(s, prefix: []byte) -> bool {
  131. return len(s) >= len(prefix) && string(s[0:len(prefix)]) == string(prefix)
  132. }
  133. has_suffix :: proc(s, suffix: []byte) -> bool {
  134. return len(s) >= len(suffix) && string(s[len(s)-len(suffix):]) == string(suffix)
  135. }
  136. join :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> []byte {
  137. if len(a) == 0 {
  138. return nil
  139. }
  140. n := len(sep) * (len(a) - 1)
  141. for s in a {
  142. n += len(s)
  143. }
  144. b := make([]byte, n, allocator)
  145. i := copy(b, a[0])
  146. for s in a[1:] {
  147. i += copy(b[i:], sep)
  148. i += copy(b[i:], s)
  149. }
  150. return b
  151. }
  152. join_safe :: proc(a: [][]byte, sep: []byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  153. if len(a) == 0 {
  154. return nil, nil
  155. }
  156. n := len(sep) * (len(a) - 1)
  157. for s in a {
  158. n += len(s)
  159. }
  160. b := make([]byte, n, allocator) or_return
  161. i := copy(b, a[0])
  162. for s in a[1:] {
  163. i += copy(b[i:], sep)
  164. i += copy(b[i:], s)
  165. }
  166. return b, nil
  167. }
  168. concatenate :: proc(a: [][]byte, allocator := context.allocator) -> []byte {
  169. if len(a) == 0 {
  170. return nil
  171. }
  172. n := 0
  173. for s in a {
  174. n += len(s)
  175. }
  176. b := make([]byte, n, allocator)
  177. i := 0
  178. for s in a {
  179. i += copy(b[i:], s)
  180. }
  181. return b
  182. }
  183. concatenate_safe :: proc(a: [][]byte, allocator := context.allocator) -> (data: []byte, err: mem.Allocator_Error) {
  184. if len(a) == 0 {
  185. return nil, nil
  186. }
  187. n := 0
  188. for s in a {
  189. n += len(s)
  190. }
  191. b := make([]byte, n, allocator) or_return
  192. i := 0
  193. for s in a {
  194. i += copy(b[i:], s)
  195. }
  196. return b, nil
  197. }
  198. @private
  199. _split :: proc(s, sep: []byte, sep_save, n: int, allocator := context.allocator) -> [][]byte {
  200. s, n := s, n
  201. if n == 0 {
  202. return nil
  203. }
  204. if sep == nil {
  205. l := utf8.rune_count(s)
  206. if n < 0 || n > l {
  207. n = l
  208. }
  209. res := make([dynamic][]byte, n, allocator)
  210. for i := 0; i < n-1; i += 1 {
  211. _, w := utf8.decode_rune(s)
  212. res[i] = s[:w]
  213. s = s[w:]
  214. }
  215. if n > 0 {
  216. res[n-1] = s
  217. }
  218. return res[:]
  219. }
  220. if n < 0 {
  221. n = count(s, sep) + 1
  222. }
  223. res := make([dynamic][]byte, n, allocator)
  224. n -= 1
  225. i := 0
  226. for ; i < n; i += 1 {
  227. m := index(s, sep)
  228. if m < 0 {
  229. break
  230. }
  231. res[i] = s[:m+sep_save]
  232. s = s[m+len(sep):]
  233. }
  234. res[i] = s
  235. return res[:i+1]
  236. }
  237. split :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  238. return _split(s, sep, 0, -1, allocator)
  239. }
  240. split_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  241. return _split(s, sep, 0, n, allocator)
  242. }
  243. split_after :: proc(s, sep: []byte, allocator := context.allocator) -> [][]byte {
  244. return _split(s, sep, len(sep), -1, allocator)
  245. }
  246. split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> [][]byte {
  247. return _split(s, sep, len(sep), n, allocator)
  248. }
  249. @private
  250. _split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save: int) -> (res: []byte, ok: bool) {
  251. if len(sep) == 0 {
  252. res = s[:]
  253. ok = true
  254. s^ = s[len(s):]
  255. return
  256. }
  257. m := index(s^, sep)
  258. if m < 0 {
  259. // not found
  260. res = s[:]
  261. ok = len(res) != 0
  262. s^ = s[len(s):]
  263. } else {
  264. res = s[:m+sep_save]
  265. ok = true
  266. s^ = s[m+len(sep):]
  267. }
  268. return
  269. }
  270. split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  271. return _split_iterator(s, sep, 0)
  272. }
  273. split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
  274. return _split_iterator(s, sep, len(sep))
  275. }
  276. /*
  277. Scan a slice of bytes for a specific byte.
  278. This procedure safely handles slices of any length, including empty slices.
  279. Inputs:
  280. - data: A slice of bytes.
  281. - c: The byte to search for.
  282. Returns:
  283. - index: The index of the byte `c`, or -1 if it was not found.
  284. */
  285. index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds_check {
  286. i, l := 0, len(s)
  287. // Guard against small strings. On modern systems, it is ALWAYS
  288. // worth vectorizing assuming there is a hardware vector unit, and
  289. // the data size is large enough.
  290. if l < SIMD_REG_SIZE_128 {
  291. for /**/; i < l; i += 1 {
  292. if s[i] == c {
  293. return i
  294. }
  295. }
  296. return -1
  297. }
  298. c_vec: simd.u8x16 = c
  299. when simd.HAS_HARDWARE_SIMD {
  300. // Note: While this is something that could also logically take
  301. // advantage of AVX512, the various downclocking and power
  302. // consumption related woes make premature to have a dedicated
  303. // code path.
  304. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  305. c_vec_256: simd.u8x32 = c
  306. s_vecs: [4]simd.u8x32 = ---
  307. c_vecs: [4]simd.u8x32 = ---
  308. m_vec: [4]u8 = ---
  309. // Scan 128-byte chunks, using 256-bit SIMD.
  310. for nr_blocks := l / (4 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
  311. #unroll for j in 0..<4 {
  312. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  313. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  314. m_vec[j] = simd.reduce_or(c_vecs[j])
  315. }
  316. if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
  317. #unroll for j in 0..<4 {
  318. if m_vec[j] > 0 {
  319. sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
  320. off := simd.reduce_min(sel)
  321. return i + j * SIMD_REG_SIZE_256 + int(off)
  322. }
  323. }
  324. }
  325. i += 4 * SIMD_REG_SIZE_256
  326. }
  327. // Scan 64-byte chunks, using 256-bit SIMD.
  328. for nr_blocks := (l - i) / (2 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
  329. #unroll for j in 0..<2 {
  330. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  331. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  332. m_vec[j] = simd.reduce_or(c_vecs[j])
  333. }
  334. if m_vec[0] | m_vec[1] > 0 {
  335. #unroll for j in 0..<2 {
  336. if m_vec[j] > 0 {
  337. sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
  338. off := simd.reduce_min(sel)
  339. return i + j * SIMD_REG_SIZE_256 + int(off)
  340. }
  341. }
  342. }
  343. i += 2 * SIMD_REG_SIZE_256
  344. }
  345. } else {
  346. s_vecs: [4]simd.u8x16 = ---
  347. c_vecs: [4]simd.u8x16 = ---
  348. m_vecs: [4]u8 = ---
  349. // Scan 64-byte chunks, using 128-bit SIMD.
  350. for nr_blocks := l / (4 * SIMD_REG_SIZE_128); nr_blocks > 0; nr_blocks -= 1 {
  351. #unroll for j in 0..<4 {
  352. s_vecs[j]= intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
  353. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
  354. m_vecs[j] = simd.reduce_or(c_vecs[j])
  355. }
  356. if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
  357. #unroll for j in 0..<4 {
  358. if m_vecs[j] > 0 {
  359. sel := simd.select(c_vecs[j], SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
  360. off := simd.reduce_min(sel)
  361. return i + j * SIMD_REG_SIZE_128 + int(off)
  362. }
  363. }
  364. }
  365. i += 4 * SIMD_REG_SIZE_128
  366. }
  367. }
  368. }
  369. // Scan the remaining SIMD register sized chunks.
  370. //
  371. // Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
  372. // on potato targets. Scanning more at a time when LLVM is emulating SIMD
  373. // likely does not buy much, as all that does is increase GP register
  374. // pressure.
  375. for nr_blocks := (l - i) / SIMD_REG_SIZE_128; nr_blocks > 0; nr_blocks -= 1 {
  376. s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
  377. c0 := simd.lanes_eq(s0, c_vec)
  378. if simd.reduce_or(c0) > 0 {
  379. sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
  380. off := simd.reduce_min(sel)
  381. return i + int(off)
  382. }
  383. i += SIMD_REG_SIZE_128
  384. }
  385. // Scan serially for the remainder.
  386. for /**/; i < l; i += 1 {
  387. if s[i] == c {
  388. return i
  389. }
  390. }
  391. return -1
  392. }
  393. /*
  394. Scan a slice of bytes for a specific byte, starting from the end and working
  395. backwards to the start.
  396. This procedure safely handles slices of any length, including empty slices.
  397. Inputs:
  398. - data: A slice of bytes.
  399. - c: The byte to search for.
  400. Returns:
  401. - index: The index of the byte `c`, or -1 if it was not found.
  402. */
  403. last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_check {
  404. i := len(s)
  405. // Guard against small strings. On modern systems, it is ALWAYS
  406. // worth vectorizing assuming there is a hardware vector unit, and
  407. // the data size is large enough.
  408. if i < SIMD_REG_SIZE_128 {
  409. #reverse for ch, j in s {
  410. if ch == c {
  411. return j
  412. }
  413. }
  414. return -1
  415. }
  416. c_vec: simd.u8x16 = c
  417. when simd.HAS_HARDWARE_SIMD {
  418. // Note: While this is something that could also logically take
  419. // advantage of AVX512, the various downclocking and power
  420. // consumption related woes make premature to have a dedicated
  421. // code path.
  422. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  423. c_vec_256: simd.u8x32 = c
  424. s_vecs: [4]simd.u8x32 = ---
  425. c_vecs: [4]simd.u8x32 = ---
  426. m_vec: [4]u8 = ---
  427. // Scan 128-byte chunks, using 256-bit SIMD.
  428. for i >= 4 * SIMD_REG_SIZE_256 {
  429. i -= 4 * SIMD_REG_SIZE_256
  430. #unroll for j in 0..<4 {
  431. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  432. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  433. m_vec[j] = simd.reduce_or(c_vecs[j])
  434. }
  435. if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
  436. #unroll for j in 0..<4 {
  437. if m_vec[3-j] > 0 {
  438. sel := simd.select(c_vecs[3-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
  439. off := simd.reduce_max(sel)
  440. return i + (3-j) * SIMD_REG_SIZE_256 + int(off)
  441. }
  442. }
  443. }
  444. }
  445. // Scan 64-byte chunks, using 256-bit SIMD.
  446. for i >= 2 * SIMD_REG_SIZE_256 {
  447. i -= 2 * SIMD_REG_SIZE_256
  448. #unroll for j in 0..<2 {
  449. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
  450. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
  451. m_vec[j] = simd.reduce_or(c_vecs[j])
  452. }
  453. if m_vec[0] | m_vec[1] > 0 {
  454. #unroll for j in 0..<2 {
  455. if m_vec[1-j] > 0 {
  456. sel := simd.select(c_vecs[1-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
  457. off := simd.reduce_max(sel)
  458. return i + (1-j) * SIMD_REG_SIZE_256 + int(off)
  459. }
  460. }
  461. }
  462. }
  463. } else {
  464. s_vecs: [4]simd.u8x16 = ---
  465. c_vecs: [4]simd.u8x16 = ---
  466. m_vecs: [4]u8 = ---
  467. // Scan 64-byte chunks, using 128-bit SIMD.
  468. for i >= 4 * SIMD_REG_SIZE_128 {
  469. i -= 4 * SIMD_REG_SIZE_128
  470. #unroll for j in 0..<4 {
  471. s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
  472. c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
  473. m_vecs[j] = simd.reduce_or(c_vecs[j])
  474. }
  475. if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
  476. #unroll for j in 0..<4 {
  477. if m_vecs[3-j] > 0 {
  478. sel := simd.select(c_vecs[3-j], SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
  479. off := simd.reduce_max(sel)
  480. return i + (3-j) * SIMD_REG_SIZE_128 + int(off)
  481. }
  482. }
  483. }
  484. }
  485. }
  486. }
  487. // Scan the remaining SIMD register sized chunks.
  488. //
  489. // Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
  490. // on potato targets. Scanning more at a time when LLVM is emulating SIMD
  491. // likely does not buy much, as all that does is increase GP register
  492. // pressure.
  493. for i >= SIMD_REG_SIZE_128 {
  494. i -= SIMD_REG_SIZE_128
  495. s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
  496. c0 := simd.lanes_eq(s0, c_vec)
  497. if simd.reduce_or(c0) > 0 {
  498. sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
  499. off := simd.reduce_max(sel)
  500. return i + int(off)
  501. }
  502. }
  503. // Scan serially for the remainder.
  504. for i > 0 {
  505. i -= 1
  506. if s[i] == c {
  507. return i
  508. }
  509. }
  510. return -1
  511. }
  512. @private PRIME_RABIN_KARP :: 16777619
  513. index :: proc(s, substr: []byte) -> int {
  514. hash_str_rabin_karp :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  515. for i := 0; i < len(s); i += 1 {
  516. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  517. }
  518. sq := u32(PRIME_RABIN_KARP)
  519. for i := len(s); i > 0; i >>= 1 {
  520. if (i & 1) != 0 {
  521. pow *= sq
  522. }
  523. sq *= sq
  524. }
  525. return
  526. }
  527. n := len(substr)
  528. switch {
  529. case n == 0:
  530. return 0
  531. case n == 1:
  532. return index_byte(s, substr[0])
  533. case n == len(s):
  534. if string(s) == string(substr) {
  535. return 0
  536. }
  537. return -1
  538. case n > len(s):
  539. return -1
  540. }
  541. hash, pow := hash_str_rabin_karp(substr)
  542. h: u32
  543. for i := 0; i < n; i += 1 {
  544. h = h*PRIME_RABIN_KARP + u32(s[i])
  545. }
  546. if h == hash && string(s[:n]) == string(substr) {
  547. return 0
  548. }
  549. for i := n; i < len(s); /**/ {
  550. h *= PRIME_RABIN_KARP
  551. h += u32(s[i])
  552. h -= pow * u32(s[i-n])
  553. i += 1
  554. if h == hash && string(s[i-n:i]) == string(substr) {
  555. return i - n
  556. }
  557. }
  558. return -1
  559. }
  560. last_index :: proc(s, substr: []byte) -> int {
  561. hash_str_rabin_karp_reverse :: proc(s: []byte) -> (hash: u32 = 0, pow: u32 = 1) {
  562. for i := len(s) - 1; i >= 0; i -= 1 {
  563. hash = hash*PRIME_RABIN_KARP + u32(s[i])
  564. }
  565. sq := u32(PRIME_RABIN_KARP)
  566. for i := len(s); i > 0; i >>= 1 {
  567. if (i & 1) != 0 {
  568. pow *= sq
  569. }
  570. sq *= sq
  571. }
  572. return
  573. }
  574. n := len(substr)
  575. switch {
  576. case n == 0:
  577. return len(s)
  578. case n == 1:
  579. return last_index_byte(s, substr[0])
  580. case n == len(s):
  581. return 0 if string(substr) == string(s) else -1
  582. case n > len(s):
  583. return -1
  584. }
  585. hash, pow := hash_str_rabin_karp_reverse(substr)
  586. last := len(s) - n
  587. h: u32
  588. for i := len(s)-1; i >= last; i -= 1 {
  589. h = h*PRIME_RABIN_KARP + u32(s[i])
  590. }
  591. if h == hash && string(s[last:]) == string(substr) {
  592. return last
  593. }
  594. for i := last-1; i >= 0; i -= 1 {
  595. h *= PRIME_RABIN_KARP
  596. h += u32(s[i])
  597. h -= pow * u32(s[i+n])
  598. if h == hash && string(s[i:i+n]) == string(substr) {
  599. return i
  600. }
  601. }
  602. return -1
  603. }
  604. index_any :: proc(s, chars: []byte) -> int {
  605. if chars == nil {
  606. return -1
  607. }
  608. // TODO(bill): Optimize
  609. for r, i in s {
  610. for c in chars {
  611. if r == c {
  612. return i
  613. }
  614. }
  615. }
  616. return -1
  617. }
  618. last_index_any :: proc(s, chars: []byte) -> int {
  619. if chars == nil {
  620. return -1
  621. }
  622. for i := len(s); i > 0; {
  623. r, w := utf8.decode_last_rune(s[:i])
  624. i -= w
  625. for c in string(chars) {
  626. if r == c {
  627. return i
  628. }
  629. }
  630. }
  631. return -1
  632. }
  633. count :: proc(s, substr: []byte) -> int {
  634. if len(substr) == 0 { // special case
  635. return rune_count(s) + 1
  636. }
  637. if len(substr) == 1 {
  638. c := substr[0]
  639. switch len(s) {
  640. case 0:
  641. return 0
  642. case 1:
  643. return int(s[0] == c)
  644. }
  645. n := 0
  646. for i := 0; i < len(s); i += 1 {
  647. if s[i] == c {
  648. n += 1
  649. }
  650. }
  651. return n
  652. }
  653. // TODO(bill): Use a non-brute for approach
  654. n := 0
  655. str := s
  656. for {
  657. i := index(str, substr)
  658. if i == -1 {
  659. return n
  660. }
  661. n += 1
  662. str = str[i+len(substr):]
  663. }
  664. return n
  665. }
  666. repeat :: proc(s: []byte, count: int, allocator := context.allocator) -> []byte {
  667. if count < 0 {
  668. panic("bytes: negative repeat count")
  669. } else if count > 0 && (len(s)*count)/count != len(s) {
  670. panic("bytes: repeat count will cause an overflow")
  671. }
  672. b := make([]byte, len(s)*count, allocator)
  673. i := copy(b, s)
  674. for i < len(b) { // 2^N trick to reduce the need to copy
  675. copy(b[i:], b[:i])
  676. i *= 2
  677. }
  678. return b
  679. }
  680. replace_all :: proc(s, old, new: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  681. return replace(s, old, new, -1, allocator)
  682. }
  683. // if n < 0, no limit on the number of replacements
  684. replace :: proc(s, old, new: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  685. if string(old) == string(new) || n == 0 {
  686. was_allocation = false
  687. output = s
  688. return
  689. }
  690. byte_count := n
  691. if m := count(s, old); m == 0 {
  692. was_allocation = false
  693. output = s
  694. return
  695. } else if n < 0 || m < n {
  696. byte_count = m
  697. }
  698. t := make([]byte, len(s) + byte_count*(len(new) - len(old)), allocator)
  699. was_allocation = true
  700. w := 0
  701. start := 0
  702. for i := 0; i < byte_count; i += 1 {
  703. j := start
  704. if len(old) == 0 {
  705. if i > 0 {
  706. _, width := utf8.decode_rune(s[start:])
  707. j += width
  708. }
  709. } else {
  710. j += index(s[start:], old)
  711. }
  712. w += copy(t[w:], s[start:j])
  713. w += copy(t[w:], new)
  714. start = j + len(old)
  715. }
  716. w += copy(t[w:], s[start:])
  717. output = t[0:w]
  718. return
  719. }
  720. remove :: proc(s, key: []byte, n: int, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  721. return replace(s, key, {}, n, allocator)
  722. }
  723. remove_all :: proc(s, key: []byte, allocator := context.allocator) -> (output: []byte, was_allocation: bool) {
  724. return remove(s, key, -1, allocator)
  725. }
  726. @(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1}
  727. is_ascii_space :: proc(r: rune) -> bool {
  728. if r < utf8.RUNE_SELF {
  729. return _ascii_space[u8(r)] != 0
  730. }
  731. return false
  732. }
  733. is_space :: proc(r: rune) -> bool {
  734. if r < 0x2000 {
  735. switch r {
  736. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0, 0x1680:
  737. return true
  738. }
  739. } else {
  740. if r <= 0x200a {
  741. return true
  742. }
  743. switch r {
  744. case 0x2028, 0x2029, 0x202f, 0x205f, 0x3000:
  745. return true
  746. }
  747. }
  748. return false
  749. }
  750. is_null :: proc(r: rune) -> bool {
  751. return r == 0x0000
  752. }
  753. index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  754. for r, i in string(s) {
  755. if p(r) == truth {
  756. return i
  757. }
  758. }
  759. return -1
  760. }
  761. index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  762. for r, i in string(s) {
  763. if p(state, r) == truth {
  764. return i
  765. }
  766. }
  767. return -1
  768. }
  769. last_index_proc :: proc(s: []byte, p: proc(rune) -> bool, truth := true) -> int {
  770. // TODO(bill): Probably use Rabin-Karp Search
  771. for i := len(s); i > 0; {
  772. r, size := utf8.decode_last_rune(s[:i])
  773. i -= size
  774. if p(r) == truth {
  775. return i
  776. }
  777. }
  778. return -1
  779. }
  780. last_index_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
  781. // TODO(bill): Probably use Rabin-Karp Search
  782. for i := len(s); i > 0; {
  783. r, size := utf8.decode_last_rune(s[:i])
  784. i -= size
  785. if p(state, r) == truth {
  786. return i
  787. }
  788. }
  789. return -1
  790. }
  791. trim_left_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  792. i := index_proc(s, p, false)
  793. if i == -1 {
  794. return nil
  795. }
  796. return s[i:]
  797. }
  798. index_rune :: proc(s: []byte, r: rune) -> int {
  799. switch {
  800. case u32(r) < utf8.RUNE_SELF:
  801. return index_byte(s, byte(r))
  802. case r == utf8.RUNE_ERROR:
  803. for c, i in string(s) {
  804. if c == utf8.RUNE_ERROR {
  805. return i
  806. }
  807. }
  808. return -1
  809. case !utf8.valid_rune(r):
  810. return -1
  811. }
  812. b, w := utf8.encode_rune(r)
  813. return index(s, b[:w])
  814. }
  815. trim_left_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  816. i := index_proc_with_state(s, p, state, false)
  817. if i == -1 {
  818. return nil
  819. }
  820. return s[i:]
  821. }
  822. trim_right_proc :: proc(s: []byte, p: proc(rune) -> bool) -> []byte {
  823. i := last_index_proc(s, p, false)
  824. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  825. _, w := utf8.decode_rune(s[i:])
  826. i += w
  827. } else {
  828. i += 1
  829. }
  830. return s[0:i]
  831. }
  832. trim_right_proc_with_state :: proc(s: []byte, p: proc(rawptr, rune) -> bool, state: rawptr) -> []byte {
  833. i := last_index_proc_with_state(s, p, state, false)
  834. if i >= 0 && s[i] >= utf8.RUNE_SELF {
  835. _, w := utf8.decode_rune(s[i:])
  836. i += w
  837. } else {
  838. i += 1
  839. }
  840. return s[0:i]
  841. }
  842. is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
  843. if state == nil {
  844. return false
  845. }
  846. cutset := (^string)(state)^
  847. for c in cutset {
  848. if r == c {
  849. return true
  850. }
  851. }
  852. return false
  853. }
  854. trim_left :: proc(s: []byte, cutset: []byte) -> []byte {
  855. if s == nil || cutset == nil {
  856. return s
  857. }
  858. state := cutset
  859. return trim_left_proc_with_state(s, is_in_cutset, &state)
  860. }
  861. trim_right :: proc(s: []byte, cutset: []byte) -> []byte {
  862. if s == nil || cutset == nil {
  863. return s
  864. }
  865. state := cutset
  866. return trim_right_proc_with_state(s, is_in_cutset, &state)
  867. }
  868. trim :: proc(s: []byte, cutset: []byte) -> []byte {
  869. return trim_right(trim_left(s, cutset), cutset)
  870. }
  871. trim_left_space :: proc(s: []byte) -> []byte {
  872. return trim_left_proc(s, is_space)
  873. }
  874. trim_right_space :: proc(s: []byte) -> []byte {
  875. return trim_right_proc(s, is_space)
  876. }
  877. trim_space :: proc(s: []byte) -> []byte {
  878. return trim_right_space(trim_left_space(s))
  879. }
  880. trim_left_null :: proc(s: []byte) -> []byte {
  881. return trim_left_proc(s, is_null)
  882. }
  883. trim_right_null :: proc(s: []byte) -> []byte {
  884. return trim_right_proc(s, is_null)
  885. }
  886. trim_null :: proc(s: []byte) -> []byte {
  887. return trim_right_null(trim_left_null(s))
  888. }
  889. trim_prefix :: proc(s, prefix: []byte) -> []byte {
  890. if has_prefix(s, prefix) {
  891. return s[len(prefix):]
  892. }
  893. return s
  894. }
  895. trim_suffix :: proc(s, suffix: []byte) -> []byte {
  896. if has_suffix(s, suffix) {
  897. return s[:len(s)-len(suffix)]
  898. }
  899. return s
  900. }
  901. split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator := context.allocator) -> [][]byte #no_bounds_check {
  902. if s == nil || len(substrs) <= 0 {
  903. return nil
  904. }
  905. sublen := len(substrs[0])
  906. for substr in substrs[1:] {
  907. sublen = min(sublen, len(substr))
  908. }
  909. shared := len(s) - sublen
  910. if shared <= 0 {
  911. return nil
  912. }
  913. // number, index, last
  914. n, i, l := 0, 0, 0
  915. // count results
  916. first_pass: for i <= shared {
  917. for substr in substrs {
  918. if string(s[i:i+sublen]) == string(substr) {
  919. if !skip_empty || i - l > 0 {
  920. n += 1
  921. }
  922. i += sublen
  923. l = i
  924. continue first_pass
  925. }
  926. }
  927. _, skip := utf8.decode_rune(s[i:])
  928. i += skip
  929. }
  930. if !skip_empty || len(s) - l > 0 {
  931. n += 1
  932. }
  933. if n < 1 {
  934. // no results
  935. return nil
  936. }
  937. buf := make([][]byte, n, allocator)
  938. n, i, l = 0, 0, 0
  939. // slice results
  940. second_pass: for i <= shared {
  941. for substr in substrs {
  942. if string(s[i:i+sublen]) == string(substr) {
  943. if !skip_empty || i - l > 0 {
  944. buf[n] = s[l:i]
  945. n += 1
  946. }
  947. i += sublen
  948. l = i
  949. continue second_pass
  950. }
  951. }
  952. _, skip := utf8.decode_rune(s[i:])
  953. i += skip
  954. }
  955. if !skip_empty || len(s) - l > 0 {
  956. buf[n] = s[l:]
  957. }
  958. return buf
  959. }
  960. split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check {
  961. if s == nil || s^ == nil || len(substrs) <= 0 {
  962. return nil, false
  963. }
  964. sublen := len(substrs[0])
  965. for substr in substrs[1:] {
  966. sublen = min(sublen, len(substr))
  967. }
  968. shared := len(s) - sublen
  969. if shared <= 0 {
  970. return nil, false
  971. }
  972. // index, last
  973. i, l := 0, 0
  974. loop: for i <= shared {
  975. for substr in substrs {
  976. if string(s[i:i+sublen]) == string(substr) {
  977. if !skip_empty || i - l > 0 {
  978. res := s[l:i]
  979. s^ = s[i:]
  980. return res, true
  981. }
  982. i += sublen
  983. l = i
  984. continue loop
  985. }
  986. }
  987. _, skip := utf8.decode_rune(s[i:])
  988. i += skip
  989. }
  990. if !skip_empty || len(s) - l > 0 {
  991. res := s[l:]
  992. s^ = s[len(s):]
  993. return res, true
  994. }
  995. return nil, false
  996. }
  997. // Scrubs invalid utf-8 characters and replaces them with the replacement string
  998. // Adjacent invalid bytes are only replaced once
  999. scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte {
  1000. str := s
  1001. b: Buffer
  1002. buffer_init_allocator(&b, 0, len(s), allocator)
  1003. has_error := false
  1004. cursor := 0
  1005. origin := str
  1006. for len(str) > 0 {
  1007. r, w := utf8.decode_rune(str)
  1008. if r == utf8.RUNE_ERROR {
  1009. if !has_error {
  1010. has_error = true
  1011. buffer_write(&b, origin[:cursor])
  1012. }
  1013. } else if has_error {
  1014. has_error = false
  1015. buffer_write(&b, replacement)
  1016. origin = origin[cursor:]
  1017. cursor = 0
  1018. }
  1019. cursor += w
  1020. str = str[w:]
  1021. }
  1022. return buffer_to_bytes(&b)
  1023. }
  1024. reverse :: proc(s: []byte, allocator := context.allocator) -> []byte {
  1025. str := s
  1026. n := len(str)
  1027. buf := make([]byte, n)
  1028. i := n
  1029. for len(str) > 0 {
  1030. _, w := utf8.decode_rune(str)
  1031. i -= w
  1032. copy(buf[i:], str[:w])
  1033. str = str[w:]
  1034. }
  1035. return buf
  1036. }
  1037. expand_tabs :: proc(s: []byte, tab_size: int, allocator := context.allocator) -> []byte {
  1038. if tab_size <= 0 {
  1039. panic("tab size must be positive")
  1040. }
  1041. if s == nil {
  1042. return nil
  1043. }
  1044. b: Buffer
  1045. buffer_init_allocator(&b, 0, len(s), allocator)
  1046. str := s
  1047. column: int
  1048. for len(str) > 0 {
  1049. r, w := utf8.decode_rune(str)
  1050. if r == '\t' {
  1051. expand := tab_size - column%tab_size
  1052. for i := 0; i < expand; i += 1 {
  1053. buffer_write_byte(&b, ' ')
  1054. }
  1055. column += expand
  1056. } else {
  1057. if r == '\n' {
  1058. column = 0
  1059. } else {
  1060. column += w
  1061. }
  1062. buffer_write_rune(&b, r)
  1063. }
  1064. str = str[w:]
  1065. }
  1066. return buffer_to_bytes(&b)
  1067. }
  1068. partition :: proc(str, sep: []byte) -> (head, match, tail: []byte) {
  1069. i := index(str, sep)
  1070. if i == -1 {
  1071. head = str
  1072. return
  1073. }
  1074. head = str[:i]
  1075. match = str[i:i+len(sep)]
  1076. tail = str[i+len(sep):]
  1077. return
  1078. }
  1079. center_justify :: centre_justify // NOTE(bill): Because Americans exist
  1080. // centre_justify returns a byte slice with a pad byte slice at boths sides if the str's rune length is smaller than length
  1081. centre_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1082. n := rune_count(str)
  1083. if n >= length || pad == nil {
  1084. return clone(str, allocator)
  1085. }
  1086. remains := length-1
  1087. pad_len := rune_count(pad)
  1088. b: Buffer
  1089. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1090. write_pad_string(&b, pad, pad_len, remains/2)
  1091. buffer_write(&b, str)
  1092. write_pad_string(&b, pad, pad_len, (remains+1)/2)
  1093. return buffer_to_bytes(&b)
  1094. }
  1095. // left_justify returns a byte slice with a pad byte slice at left side if the str's rune length is smaller than length
  1096. left_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1097. n := rune_count(str)
  1098. if n >= length || pad == nil {
  1099. return clone(str, allocator)
  1100. }
  1101. remains := length-1
  1102. pad_len := rune_count(pad)
  1103. b: Buffer
  1104. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1105. buffer_write(&b, str)
  1106. write_pad_string(&b, pad, pad_len, remains)
  1107. return buffer_to_bytes(&b)
  1108. }
  1109. // right_justify returns a byte slice with a pad byte slice at right side if the str's rune length is smaller than length
  1110. right_justify :: proc(str: []byte, length: int, pad: []byte, allocator := context.allocator) -> []byte {
  1111. n := rune_count(str)
  1112. if n >= length || pad == nil {
  1113. return clone(str, allocator)
  1114. }
  1115. remains := length-1
  1116. pad_len := rune_count(pad)
  1117. b: Buffer
  1118. buffer_init_allocator(&b, 0, len(str) + (remains/pad_len + 1)*len(pad), allocator)
  1119. write_pad_string(&b, pad, pad_len, remains)
  1120. buffer_write(&b, str)
  1121. return buffer_to_bytes(&b)
  1122. }
  1123. @private
  1124. write_pad_string :: proc(b: ^Buffer, pad: []byte, pad_len, remains: int) {
  1125. repeats := remains / pad_len
  1126. for i := 0; i < repeats; i += 1 {
  1127. buffer_write(b, pad)
  1128. }
  1129. n := remains % pad_len
  1130. p := pad
  1131. for i := 0; i < n; i += 1 {
  1132. r, width := utf8.decode_rune(p)
  1133. buffer_write_rune(b, r)
  1134. p = p[width:]
  1135. }
  1136. }
  1137. // fields splits the byte slice s around each instance of one or more consecutive white space character, defined by unicode.is_space
  1138. // returning a slice of subslices of s or an empty slice if s only contains white space
  1139. fields :: proc(s: []byte, allocator := context.allocator) -> [][]byte #no_bounds_check {
  1140. n := 0
  1141. was_space := 1
  1142. set_bits := u8(0)
  1143. // check to see
  1144. for i in 0..<len(s) {
  1145. r := s[i]
  1146. set_bits |= r
  1147. is_space := int(_ascii_space[r])
  1148. n += was_space & ~is_space
  1149. was_space = is_space
  1150. }
  1151. if set_bits >= utf8.RUNE_SELF {
  1152. return fields_proc(s, unicode.is_space, allocator)
  1153. }
  1154. if n == 0 {
  1155. return nil
  1156. }
  1157. a := make([][]byte, n, allocator)
  1158. na := 0
  1159. field_start := 0
  1160. i := 0
  1161. for i < len(s) && _ascii_space[s[i]] != 0 {
  1162. i += 1
  1163. }
  1164. field_start = i
  1165. for i < len(s) {
  1166. if _ascii_space[s[i]] == 0 {
  1167. i += 1
  1168. continue
  1169. }
  1170. a[na] = s[field_start : i]
  1171. na += 1
  1172. i += 1
  1173. for i < len(s) && _ascii_space[s[i]] != 0 {
  1174. i += 1
  1175. }
  1176. field_start = i
  1177. }
  1178. if field_start < len(s) {
  1179. a[na] = s[field_start:]
  1180. }
  1181. return a
  1182. }
  1183. // fields_proc splits the byte slice s at each run of unicode code points `ch` satisfying f(ch)
  1184. // returns a slice of subslices of s
  1185. // If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
  1186. //
  1187. // fields_proc makes no guarantee about the order in which it calls f(ch)
  1188. // it assumes that `f` always returns the same value for a given ch
  1189. fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.allocator) -> [][]byte #no_bounds_check {
  1190. subslices := make([dynamic][]byte, 0, 32, allocator)
  1191. start, end := -1, -1
  1192. for r, offset in string(s) {
  1193. end = offset
  1194. if f(r) {
  1195. if start >= 0 {
  1196. append(&subslices, s[start : end])
  1197. // -1 could be used, but just speed it up through bitwise not
  1198. // gotta love 2's complement
  1199. start = ~start
  1200. }
  1201. } else {
  1202. if start < 0 {
  1203. start = end
  1204. }
  1205. }
  1206. }
  1207. if start >= 0 {
  1208. append(&subslices, s[start : len(s)])
  1209. }
  1210. return subslices[:]
  1211. }
  1212. // alias returns true iff a and b have a non-zero length, and any part of
  1213. // a overlaps with b.
  1214. alias :: proc "contextless" (a, b: []byte) -> bool {
  1215. a_len, b_len := len(a), len(b)
  1216. if a_len == 0 || b_len == 0 {
  1217. return false
  1218. }
  1219. a_start, b_start := uintptr(raw_data(a)), uintptr(raw_data(b))
  1220. a_end, b_end := a_start + uintptr(a_len-1), b_start + uintptr(b_len-1)
  1221. return a_start <= b_end && b_start <= a_end
  1222. }
  1223. // alias_inexactly returns true iff a and b have a non-zero length,
  1224. // the base pointer of a and b are NOT equal, and any part of a overlaps
  1225. // with b (ie: `alias(a, b)` with an exception that returns false for
  1226. // `a == b`, `b = a[:len(a)-69]` and similar conditions).
  1227. alias_inexactly :: proc "contextless" (a, b: []byte) -> bool {
  1228. if raw_data(a) == raw_data(b) {
  1229. return false
  1230. }
  1231. return alias(a, b)
  1232. }