internal.odin 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411
  1. #+vet !cast
  2. package runtime
  3. import "base:intrinsics"
  4. @(private="file")
  5. IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
  6. @(private)
  7. RUNTIME_LINKAGE :: "strong" when (
  8. ODIN_USE_SEPARATE_MODULES ||
  9. ODIN_BUILD_MODE == .Dynamic ||
  10. !ODIN_NO_CRT) else "internal"
  11. RUNTIME_REQUIRE :: false // !ODIN_TILDE
  12. @(private)
  13. __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
  14. HAS_HARDWARE_SIMD :: false when (ODIN_ARCH == .amd64 || ODIN_ARCH == .i386) && !intrinsics.has_target_feature("sse2") else
  15. false when (ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32) && !intrinsics.has_target_feature("neon") else
  16. false when (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") else
  17. false when (ODIN_ARCH == .riscv64) && !intrinsics.has_target_feature("v") else
  18. true
  19. @(private)
  20. byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte #no_bounds_check {
  21. return ([^]byte)(data)[:max(len, 0)]
  22. }
  23. is_power_of_two_int :: #force_inline proc "contextless" (x: int) -> bool {
  24. if x <= 0 {
  25. return false
  26. }
  27. return (x & (x-1)) == 0
  28. }
  29. align_forward_int :: #force_inline proc "odin" (ptr, align: int) -> int {
  30. assert(is_power_of_two_int(align))
  31. p := ptr
  32. modulo := p & (align-1)
  33. if modulo != 0 {
  34. p += align - modulo
  35. }
  36. return p
  37. }
  38. is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool {
  39. if x <= 0 {
  40. return false
  41. }
  42. return (x & (x-1)) == 0
  43. }
  44. align_forward_uint :: #force_inline proc "odin" (ptr, align: uint) -> uint {
  45. assert(is_power_of_two_uint(align))
  46. p := ptr
  47. modulo := p & (align-1)
  48. if modulo != 0 {
  49. p += align - modulo
  50. }
  51. return p
  52. }
  53. is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
  54. if x <= 0 {
  55. return false
  56. }
  57. return (x & (x-1)) == 0
  58. }
  59. align_forward_uintptr :: #force_inline proc "odin" (ptr, align: uintptr) -> uintptr {
  60. assert(is_power_of_two_uintptr(align))
  61. p := ptr
  62. modulo := p & (align-1)
  63. if modulo != 0 {
  64. p += align - modulo
  65. }
  66. return p
  67. }
  68. is_power_of_two :: proc {
  69. is_power_of_two_int,
  70. is_power_of_two_uint,
  71. is_power_of_two_uintptr,
  72. }
  73. align_forward :: proc {
  74. align_forward_int,
  75. align_forward_uint,
  76. align_forward_uintptr,
  77. }
  78. mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
  79. if data == nil {
  80. return nil
  81. }
  82. if len <= 0 {
  83. return data
  84. }
  85. intrinsics.mem_zero(data, len)
  86. return data
  87. }
  88. mem_copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
  89. if src != nil && dst != src && len > 0 {
  90. // NOTE(bill): This _must_ be implemented like C's memmove
  91. intrinsics.mem_copy(dst, src, len)
  92. }
  93. return dst
  94. }
  95. mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
  96. if src != nil && dst != src && len > 0 {
  97. // NOTE(bill): This _must_ be implemented like C's memcpy
  98. intrinsics.mem_copy_non_overlapping(dst, src, len)
  99. }
  100. return dst
  101. }
  102. DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
  103. mem_alloc_bytes :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  104. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  105. if size == 0 || allocator.procedure == nil{
  106. return nil, nil
  107. }
  108. return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
  109. }
  110. mem_alloc :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  111. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  112. if size == 0 || allocator.procedure == nil {
  113. return nil, nil
  114. }
  115. return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
  116. }
  117. mem_alloc_non_zeroed :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  118. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  119. if size == 0 || allocator.procedure == nil {
  120. return nil, nil
  121. }
  122. return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, size, alignment, nil, 0, loc)
  123. }
  124. mem_free :: #force_no_inline proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  125. if ptr == nil || allocator.procedure == nil {
  126. return nil
  127. }
  128. _, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, 0, loc)
  129. return err
  130. }
  131. mem_free_with_size :: #force_no_inline proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  132. if ptr == nil || allocator.procedure == nil {
  133. return nil
  134. }
  135. _, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, byte_count, loc)
  136. return err
  137. }
  138. mem_free_bytes :: #force_no_inline proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  139. if bytes == nil || allocator.procedure == nil {
  140. return nil
  141. }
  142. _, err := allocator.procedure(allocator.data, .Free, 0, 0, raw_data(bytes), len(bytes), loc)
  143. return err
  144. }
  145. mem_free_all :: #force_no_inline proc(allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
  146. if allocator.procedure != nil {
  147. _, err = allocator.procedure(allocator.data, .Free_All, 0, 0, nil, 0, loc)
  148. }
  149. return
  150. }
  151. _mem_resize :: #force_no_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  152. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  153. if allocator.procedure == nil {
  154. return nil, nil
  155. }
  156. if new_size == 0 {
  157. if ptr != nil {
  158. _, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
  159. return
  160. }
  161. return
  162. } else if ptr == nil {
  163. if should_zero {
  164. return allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
  165. } else {
  166. return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
  167. }
  168. } else if old_size == new_size && uintptr(ptr) % uintptr(alignment) == 0 {
  169. data = ([^]byte)(ptr)[:old_size]
  170. return
  171. }
  172. if should_zero {
  173. data, err = allocator.procedure(allocator.data, .Resize, new_size, alignment, ptr, old_size, loc)
  174. } else {
  175. data, err = allocator.procedure(allocator.data, .Resize_Non_Zeroed, new_size, alignment, ptr, old_size, loc)
  176. }
  177. if err == .Mode_Not_Implemented {
  178. if should_zero {
  179. data, err = allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
  180. } else {
  181. data, err = allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
  182. }
  183. if err != nil {
  184. return
  185. }
  186. copy(data, ([^]byte)(ptr)[:old_size])
  187. _, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
  188. }
  189. return
  190. }
  191. mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  192. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  193. return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
  194. }
  195. non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  196. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  197. return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
  198. }
  199. conditional_mem_zero :: proc "contextless" (data: rawptr, n_: int) #no_bounds_check {
  200. // When acquiring memory from the OS for the first time it's likely that the
  201. // OS already gives the zero page mapped multiple times for the request. The
  202. // actual allocation does not have physical pages allocated to it until those
  203. // pages are written to which causes a page-fault. This is often called COW
  204. // (Copy on Write)
  205. //
  206. // You do not want to actually zero out memory in this case because it would
  207. // cause a bunch of page faults decreasing the speed of allocations and
  208. // increase the amount of actual resident physical memory used.
  209. //
  210. // Instead a better technique is to check if memory is zerored before zeroing
  211. // it. This turns out to be an important optimization in practice, saving
  212. // nearly half (or more) the amount of physical memory used by an application.
  213. // This is why every implementation of calloc in libc does this optimization.
  214. //
  215. // It may seem counter-intuitive but most allocations in an application are
  216. // wasted and never used. When you consider something like a [dynamic]T which
  217. // always doubles in capacity on resize but you rarely ever actually use the
  218. // full capacity of a dynamic array it means you have a lot of resident waste
  219. // if you actually zeroed the remainder of the memory.
  220. //
  221. // Keep in mind the OS is already guaranteed to give you zeroed memory by
  222. // mapping in this zero page multiple times so in the best case there is no
  223. // need to actually zero anything. As for testing all this memory for a zero
  224. // value, it costs nothing because the the same zero page is used for the
  225. // whole allocation and will exist in L1 cache for the entire zero checking
  226. // process.
  227. if n_ <= 0 {
  228. return
  229. }
  230. n := uint(n_)
  231. n_words := n / size_of(uintptr)
  232. p_words := ([^]uintptr)(data)[:n_words]
  233. p_bytes := ([^]byte)(data)[size_of(uintptr) * n_words:n]
  234. for &p_word in p_words {
  235. if p_word != 0 {
  236. p_word = 0
  237. }
  238. }
  239. for &p_byte in p_bytes {
  240. if p_byte != 0 {
  241. p_byte = 0
  242. }
  243. }
  244. }
  245. memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
  246. switch {
  247. case n == 0: return true
  248. case x == y: return true
  249. }
  250. a, b := cast([^]byte)x, cast([^]byte)y
  251. n := uint(n)
  252. i := uint(0)
  253. m := uint(0)
  254. if n >= 8 {
  255. when HAS_HARDWARE_SIMD {
  256. // Avoid using 256-bit SIMD on platforms where its emulation is
  257. // likely to be less than ideal.
  258. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  259. m = n / 32 * 32
  260. for /**/; i < m; i += 32 {
  261. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  262. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  263. ne := intrinsics.simd_lanes_ne(load_a, load_b)
  264. if intrinsics.simd_reduce_or(ne) != 0 {
  265. return false
  266. }
  267. }
  268. }
  269. }
  270. m = (n-i) / 16 * 16
  271. for /**/; i < m; i += 16 {
  272. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  273. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  274. ne := intrinsics.simd_lanes_ne(load_a, load_b)
  275. if intrinsics.simd_reduce_or(ne) != 0 {
  276. return false
  277. }
  278. }
  279. m = (n-i) / size_of(uintptr) * size_of(uintptr)
  280. for /**/; i < m; i += size_of(uintptr) {
  281. if intrinsics.unaligned_load(cast(^uintptr)&a[i]) != intrinsics.unaligned_load(cast(^uintptr)&b[i]) {
  282. return false
  283. }
  284. }
  285. }
  286. for /**/; i < n; i += 1 {
  287. if a[i] != b[i] {
  288. return false
  289. }
  290. }
  291. return true
  292. }
  293. memory_compare :: proc "contextless" (x, y: rawptr, n: int) -> int #no_bounds_check {
  294. switch {
  295. case x == y: return 0
  296. case x == nil: return -1
  297. case y == nil: return +1
  298. }
  299. a, b := cast([^]byte)x, cast([^]byte)y
  300. n := uint(n)
  301. i := uint(0)
  302. m := uint(0)
  303. when HAS_HARDWARE_SIMD {
  304. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  305. m = n / 32 * 32
  306. for /**/; i < m; i += 32 {
  307. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  308. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  309. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  310. if intrinsics.simd_reduce_or(comparison) != 0 {
  311. sentinel: #simd[32]u8 = u8(0xFF)
  312. indices := intrinsics.simd_indices(#simd[32]u8)
  313. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  314. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  315. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  316. }
  317. }
  318. }
  319. }
  320. m = (n-i) / 16 * 16
  321. for /**/; i < m; i += 16 {
  322. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  323. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  324. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  325. if intrinsics.simd_reduce_or(comparison) != 0 {
  326. sentinel: #simd[16]u8 = u8(0xFF)
  327. indices := intrinsics.simd_indices(#simd[16]u8)
  328. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  329. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  330. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  331. }
  332. }
  333. // 64-bit SIMD is faster than using a `uintptr` to detect a difference then
  334. // re-iterating with the byte-by-byte loop, at least on AMD64.
  335. m = (n-i) / 8 * 8
  336. for /**/; i < m; i += 8 {
  337. load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
  338. load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
  339. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  340. if intrinsics.simd_reduce_or(comparison) != 0 {
  341. sentinel: #simd[8]u8 = u8(0xFF)
  342. indices := intrinsics.simd_indices(#simd[8]u8)
  343. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  344. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  345. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  346. }
  347. }
  348. for /**/; i < n; i += 1 {
  349. if a[i] ~ b[i] != 0 {
  350. return -1 if int(a[i]) - int(b[i]) < 0 else +1
  351. }
  352. }
  353. return 0
  354. }
  355. memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
  356. n := uint(n)
  357. i := uint(0)
  358. m := uint(0)
  359. // Because we're comparing against zero, we never return -1, as that would
  360. // indicate the compared value is less than zero.
  361. //
  362. // Note that a zero return value here means equality.
  363. bytes := ([^]u8)(a)
  364. if n >= 8 {
  365. when HAS_HARDWARE_SIMD {
  366. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  367. scanner32: #simd[32]u8
  368. m = n / 32 * 32
  369. for /**/; i < m; i += 32 {
  370. load := intrinsics.unaligned_load(cast(^#simd[32]u8)&bytes[i])
  371. ne := intrinsics.simd_lanes_ne(scanner32, load)
  372. if intrinsics.simd_reduce_or(ne) > 0 {
  373. return 1
  374. }
  375. }
  376. }
  377. }
  378. scanner16: #simd[16]u8
  379. m = (n-i) / 16 * 16
  380. for /**/; i < m; i += 16 {
  381. load := intrinsics.unaligned_load(cast(^#simd[16]u8)&bytes[i])
  382. ne := intrinsics.simd_lanes_ne(scanner16, load)
  383. if intrinsics.simd_reduce_or(ne) != 0 {
  384. return 1
  385. }
  386. }
  387. m = (n-i) / size_of(uintptr) * size_of(uintptr)
  388. for /**/; i < m; i += size_of(uintptr) {
  389. if intrinsics.unaligned_load(cast(^uintptr)&bytes[i]) != 0 {
  390. return 1
  391. }
  392. }
  393. }
  394. for /**/; i < n; i += 1 {
  395. if bytes[i] != 0 {
  396. return 1
  397. }
  398. }
  399. return 0
  400. }
  401. memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check {
  402. switch {
  403. case x == y: return n
  404. case x == nil: return 0
  405. case y == nil: return 0
  406. }
  407. a, b := cast([^]byte)x, cast([^]byte)y
  408. n := uint(n)
  409. i := uint(0)
  410. m := uint(0)
  411. when HAS_HARDWARE_SIMD {
  412. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  413. m = n / 32 * 32
  414. for /**/; i < m; i += 32 {
  415. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  416. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  417. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  418. if intrinsics.simd_reduce_or(comparison) != 0 {
  419. sentinel: #simd[32]u8 = u8(0xFF)
  420. indices := intrinsics.simd_indices(#simd[32]u8)
  421. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  422. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  423. return int(i + index_reduce)
  424. }
  425. }
  426. }
  427. }
  428. m = (n-i) / 16 * 16
  429. for /**/; i < m; i += 16 {
  430. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  431. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  432. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  433. if intrinsics.simd_reduce_or(comparison) != 0 {
  434. sentinel: #simd[16]u8 = u8(0xFF)
  435. indices := intrinsics.simd_indices(#simd[16]u8)
  436. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  437. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  438. return int(i + index_reduce)
  439. }
  440. }
  441. // 64-bit SIMD is faster than using a `uintptr` to detect a difference then
  442. // re-iterating with the byte-by-byte loop, at least on AMD64.
  443. m = (n-i) / 8 * 8
  444. for /**/; i < m; i += 8 {
  445. load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
  446. load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
  447. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  448. if intrinsics.simd_reduce_or(comparison) != 0 {
  449. sentinel: #simd[8]u8 = u8(0xFF)
  450. indices := intrinsics.simd_indices(#simd[8]u8)
  451. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  452. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  453. return int(i + index_reduce)
  454. }
  455. }
  456. for /**/; i < n; i += 1 {
  457. if a[i] ~ b[i] != 0 {
  458. return int(i)
  459. }
  460. }
  461. return int(n)
  462. }
  463. string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
  464. x := transmute(Raw_String)lhs
  465. y := transmute(Raw_String)rhs
  466. if x.len != y.len {
  467. return false
  468. }
  469. return #force_inline memory_equal(x.data, y.data, x.len)
  470. }
  471. string_cmp :: proc "contextless" (a, b: string) -> int {
  472. x := transmute(Raw_String)a
  473. y := transmute(Raw_String)b
  474. ret := memory_compare(x.data, y.data, min(x.len, y.len))
  475. if ret == 0 && x.len != y.len {
  476. return -1 if x.len < y.len else +1
  477. }
  478. return ret
  479. }
  480. string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool {
  481. x := transmute(Raw_String16)lhs
  482. y := transmute(Raw_String16)rhs
  483. if x.len != y.len {
  484. return false
  485. }
  486. return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16))
  487. }
  488. string16_cmp :: proc "contextless" (a, b: string16) -> int {
  489. x := transmute(Raw_String16)a
  490. y := transmute(Raw_String16)b
  491. ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16))
  492. if ret == 0 && x.len != y.len {
  493. return -1 if x.len < y.len else +1
  494. }
  495. return ret
  496. }
  497. string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
  498. string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
  499. string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
  500. string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
  501. string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }
  502. string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) }
  503. string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 }
  504. string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 }
  505. string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 }
  506. string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 }
  507. cstring_len :: proc "contextless" (s: cstring) -> int {
  508. p0 := uintptr((^byte)(s))
  509. p := p0
  510. for p != 0 && (^byte)(p)^ != 0 {
  511. p += 1
  512. }
  513. return int(p - p0)
  514. }
  515. cstring16_len :: proc "contextless" (s: cstring16) -> int {
  516. p := ([^]u16)(s)
  517. n := 0
  518. for p != nil && p[0] != 0 {
  519. p = p[1:]
  520. n += 1
  521. }
  522. return n
  523. }
  524. cstring_to_string :: proc "contextless" (s: cstring) -> string {
  525. if s == nil {
  526. return ""
  527. }
  528. ptr := (^byte)(s)
  529. n := cstring_len(s)
  530. return transmute(string)Raw_String{ptr, n}
  531. }
  532. cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 {
  533. if s == nil {
  534. return ""
  535. }
  536. ptr := (^u16)(s)
  537. n := cstring16_len(s)
  538. return transmute(string16)Raw_String16{ptr, n}
  539. }
  540. cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
  541. x := ([^]byte)(lhs)
  542. y := ([^]byte)(rhs)
  543. if x == y {
  544. return true
  545. }
  546. if (x == nil) ~ (y == nil) {
  547. return false
  548. }
  549. xn := cstring_len(lhs)
  550. yn := cstring_len(rhs)
  551. if xn != yn {
  552. return false
  553. }
  554. return #force_inline memory_equal(x, y, xn)
  555. }
  556. cstring_cmp :: proc "contextless" (lhs, rhs: cstring) -> int {
  557. x := ([^]byte)(lhs)
  558. y := ([^]byte)(rhs)
  559. if x == y {
  560. return 0
  561. }
  562. if (x == nil) ~ (y == nil) {
  563. return -1 if x == nil else +1
  564. }
  565. xn := cstring_len(lhs)
  566. yn := cstring_len(rhs)
  567. ret := memory_compare(x, y, min(xn, yn))
  568. if ret == 0 && xn != yn {
  569. return -1 if xn < yn else +1
  570. }
  571. return ret
  572. }
  573. cstring_ne :: #force_inline proc "contextless" (a, b: cstring) -> bool { return !cstring_eq(a, b) }
  574. cstring_lt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) < 0 }
  575. cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) > 0 }
  576. cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
  577. cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }
  578. cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool {
  579. x := ([^]u16)(lhs)
  580. y := ([^]u16)(rhs)
  581. if x == y {
  582. return true
  583. }
  584. if (x == nil) ~ (y == nil) {
  585. return false
  586. }
  587. xn := cstring16_len(lhs)
  588. yn := cstring16_len(rhs)
  589. if xn != yn {
  590. return false
  591. }
  592. return #force_inline memory_equal(x, y, xn*size_of(u16))
  593. }
  594. cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int {
  595. x := ([^]u16)(lhs)
  596. y := ([^]u16)(rhs)
  597. if x == y {
  598. return 0
  599. }
  600. if (x == nil) ~ (y == nil) {
  601. return -1 if x == nil else +1
  602. }
  603. xn := cstring16_len(lhs)
  604. yn := cstring16_len(rhs)
  605. ret := memory_compare(x, y, min(xn, yn)*size_of(u16))
  606. if ret == 0 && xn != yn {
  607. return -1 if xn < yn else +1
  608. }
  609. return ret
  610. }
  611. cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) }
  612. cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 }
  613. cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 }
  614. cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 }
  615. cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 }
  616. complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  617. complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  618. complex64_eq :: #force_inline proc "contextless" (a, b: complex64) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  619. complex64_ne :: #force_inline proc "contextless" (a, b: complex64) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  620. complex128_eq :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  621. complex128_ne :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  622. quaternion64_eq :: #force_inline proc "contextless" (a, b: quaternion64) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  623. quaternion64_ne :: #force_inline proc "contextless" (a, b: quaternion64) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  624. quaternion128_eq :: #force_inline proc "contextless" (a, b: quaternion128) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  625. quaternion128_ne :: #force_inline proc "contextless" (a, b: quaternion128) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  626. quaternion256_eq :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  627. quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  628. string_decode_rune :: proc "contextless" (s: string) -> (rune, int) {
  629. // NOTE(bill): Duplicated here to remove dependency on package unicode/utf8
  630. @(static, rodata) accept_sizes := [256]u8{
  631. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f
  632. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f
  633. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f
  634. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x30-0x3f
  635. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x40-0x4f
  636. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x50-0x5f
  637. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x60-0x6f
  638. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x70-0x7f
  639. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x80-0x8f
  640. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x90-0x9f
  641. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xa0-0xaf
  642. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xb0-0xbf
  643. 0xf1, 0xf1, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0-0xcf
  644. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0-0xdf
  645. 0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x23, 0x03, 0x03, // 0xe0-0xef
  646. 0x34, 0x04, 0x04, 0x04, 0x44, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xf0-0xff
  647. }
  648. Accept_Range :: struct {lo, hi: u8}
  649. @(static, rodata) accept_ranges := [5]Accept_Range{
  650. {0x80, 0xbf},
  651. {0xa0, 0xbf},
  652. {0x80, 0x9f},
  653. {0x90, 0xbf},
  654. {0x80, 0x8f},
  655. }
  656. MASKX :: 0b0011_1111
  657. MASK2 :: 0b0001_1111
  658. MASK3 :: 0b0000_1111
  659. MASK4 :: 0b0000_0111
  660. LOCB :: 0b1000_0000
  661. HICB :: 0b1011_1111
  662. RUNE_ERROR :: '\ufffd'
  663. n := len(s)
  664. if n < 1 {
  665. return RUNE_ERROR, 0
  666. }
  667. s0 := s[0]
  668. x := accept_sizes[s0]
  669. if x >= 0xF0 {
  670. mask := rune(x) << 31 >> 31 // NOTE(bill): Create 0x0000 or 0xffff.
  671. return rune(s[0])&~mask | RUNE_ERROR&mask, 1
  672. }
  673. sz := x & 7
  674. accept := accept_ranges[x>>4]
  675. if n < int(sz) {
  676. return RUNE_ERROR, 1
  677. }
  678. b1 := s[1]
  679. if b1 < accept.lo || accept.hi < b1 {
  680. return RUNE_ERROR, 1
  681. }
  682. if sz == 2 {
  683. return rune(s0&MASK2)<<6 | rune(b1&MASKX), 2
  684. }
  685. b2 := s[2]
  686. if b2 < LOCB || HICB < b2 {
  687. return RUNE_ERROR, 1
  688. }
  689. if sz == 3 {
  690. return rune(s0&MASK3)<<12 | rune(b1&MASKX)<<6 | rune(b2&MASKX), 3
  691. }
  692. b3 := s[3]
  693. if b3 < LOCB || HICB < b3 {
  694. return RUNE_ERROR, 1
  695. }
  696. return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4
  697. }
  698. string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
  699. RUNE_ERROR :: '\ufffd'
  700. RUNE_SELF :: 0x80
  701. UTF_MAX :: 4
  702. r: rune
  703. size: int
  704. start, end, limit: int
  705. end = len(s)
  706. if end == 0 {
  707. return RUNE_ERROR, 0
  708. }
  709. start = end-1
  710. r = rune(s[start])
  711. if r < RUNE_SELF {
  712. return r, 1
  713. }
  714. limit = max(end - UTF_MAX, 0)
  715. for start-=1; start >= limit; start-=1 {
  716. if (s[start] & 0xc0) != RUNE_SELF {
  717. break
  718. }
  719. }
  720. start = max(start, 0)
  721. r, size = string_decode_rune(s[start:end])
  722. if start+size != end {
  723. return RUNE_ERROR, 1
  724. }
  725. return r, size
  726. }
  727. string16_decode_rune :: proc "contextless" (s: string16) -> (rune, int) {
  728. REPLACEMENT_CHAR :: '\ufffd'
  729. _surr1 :: 0xd800
  730. _surr2 :: 0xdc00
  731. _surr3 :: 0xe000
  732. _surr_self :: 0x10000
  733. r := rune(REPLACEMENT_CHAR)
  734. if len(s) < 1 {
  735. return r, 0
  736. }
  737. w := 1
  738. switch c := s[0]; {
  739. case c < _surr1, _surr3 <= c:
  740. r = rune(c)
  741. case _surr1 <= c && c < _surr2 && 1 < len(s) &&
  742. _surr2 <= s[1] && s[1] < _surr3:
  743. r1, r2 := rune(c), rune(s[1])
  744. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  745. r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
  746. }
  747. w += 1
  748. }
  749. return r, w
  750. }
  751. string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) {
  752. REPLACEMENT_CHAR :: '\ufffd'
  753. _surr1 :: 0xd800
  754. _surr2 :: 0xdc00
  755. _surr3 :: 0xe000
  756. _surr_self :: 0x10000
  757. r := rune(REPLACEMENT_CHAR)
  758. if len(s) < 1 {
  759. return r, 0
  760. }
  761. n := len(s)-1
  762. c := s[n]
  763. w := 1
  764. if _surr2 <= c && c < _surr3 {
  765. if n >= 1 {
  766. r1 := rune(s[n-1])
  767. r2 := rune(c)
  768. if _surr1 <= r1 && r1 < _surr2 {
  769. r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
  770. }
  771. w = 2
  772. }
  773. } else if c < _surr1 || _surr3 <= c {
  774. r = rune(c)
  775. }
  776. return r, w
  777. }
  778. abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
  779. p, q := abs(real(x)), abs(imag(x))
  780. if p < q {
  781. p, q = q, p
  782. }
  783. if p == 0 {
  784. return 0
  785. }
  786. q = q / p
  787. return p * f16(intrinsics.sqrt(f32(1 + q*q)))
  788. }
  789. abs_complex64 :: #force_inline proc "contextless" (x: complex64) -> f32 {
  790. p, q := abs(real(x)), abs(imag(x))
  791. if p < q {
  792. p, q = q, p
  793. }
  794. if p == 0 {
  795. return 0
  796. }
  797. q = q / p
  798. return p * intrinsics.sqrt(1 + q*q)
  799. }
  800. abs_complex128 :: #force_inline proc "contextless" (x: complex128) -> f64 {
  801. p, q := abs(real(x)), abs(imag(x))
  802. if p < q {
  803. p, q = q, p
  804. }
  805. if p == 0 {
  806. return 0
  807. }
  808. q = q / p
  809. return p * intrinsics.sqrt(1 + q*q)
  810. }
  811. abs_quaternion64 :: #force_inline proc "contextless" (x: quaternion64) -> f16 {
  812. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  813. return f16(intrinsics.sqrt(f32(r*r + i*i + j*j + k*k)))
  814. }
  815. abs_quaternion128 :: #force_inline proc "contextless" (x: quaternion128) -> f32 {
  816. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  817. return intrinsics.sqrt(r*r + i*i + j*j + k*k)
  818. }
  819. abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64 {
  820. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  821. return intrinsics.sqrt(r*r + i*i + j*j + k*k)
  822. }
  823. quo_complex32 :: proc "contextless" (n, m: complex32) -> complex32 {
  824. nr, ni := f32(real(n)), f32(imag(n))
  825. mr, mi := f32(real(m)), f32(imag(m))
  826. e, f: f32
  827. if abs(mr) >= abs(mi) {
  828. ratio := mi / mr
  829. denom := mr + ratio*mi
  830. e = (nr + ni*ratio) / denom
  831. f = (ni - nr*ratio) / denom
  832. } else {
  833. ratio := mr / mi
  834. denom := mi + ratio*mr
  835. e = (nr*ratio + ni) / denom
  836. f = (ni*ratio - nr) / denom
  837. }
  838. return complex(f16(e), f16(f))
  839. }
  840. quo_complex64 :: proc "contextless" (n, m: complex64) -> complex64 {
  841. e, f: f32
  842. if abs(real(m)) >= abs(imag(m)) {
  843. ratio := imag(m) / real(m)
  844. denom := real(m) + ratio*imag(m)
  845. e = (real(n) + imag(n)*ratio) / denom
  846. f = (imag(n) - real(n)*ratio) / denom
  847. } else {
  848. ratio := real(m) / imag(m)
  849. denom := imag(m) + ratio*real(m)
  850. e = (real(n)*ratio + imag(n)) / denom
  851. f = (imag(n)*ratio - real(n)) / denom
  852. }
  853. return complex(e, f)
  854. }
  855. quo_complex128 :: proc "contextless" (n, m: complex128) -> complex128 {
  856. e, f: f64
  857. if abs(real(m)) >= abs(imag(m)) {
  858. ratio := imag(m) / real(m)
  859. denom := real(m) + ratio*imag(m)
  860. e = (real(n) + imag(n)*ratio) / denom
  861. f = (imag(n) - real(n)*ratio) / denom
  862. } else {
  863. ratio := real(m) / imag(m)
  864. denom := imag(m) + ratio*real(m)
  865. e = (real(n)*ratio + imag(n)) / denom
  866. f = (imag(n)*ratio - real(n)) / denom
  867. }
  868. return complex(e, f)
  869. }
  870. mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
  871. q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
  872. r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
  873. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  874. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  875. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  876. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  877. return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
  878. }
  879. mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
  880. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  881. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  882. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  883. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  884. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  885. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  886. return quaternion(w=t0, x=t1, y=t2, z=t3)
  887. }
  888. mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
  889. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  890. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  891. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  892. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  893. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  894. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  895. return quaternion(w=t0, x=t1, y=t2, z=t3)
  896. }
  897. quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
  898. q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
  899. r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
  900. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  901. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  902. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  903. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  904. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  905. return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
  906. }
  907. quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
  908. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  909. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  910. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  911. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  912. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  913. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  914. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  915. return quaternion(w=t0, x=t1, y=t2, z=t3)
  916. }
  917. quo_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
  918. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  919. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  920. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  921. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  922. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  923. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  924. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  925. return quaternion(w=t0, x=t1, y=t2, z=t3)
  926. }
  927. @(link_name="__truncsfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  928. truncsfhf2 :: proc "c" (value: f32) -> __float16 {
  929. v: struct #raw_union { i: u32, f: f32 }
  930. i, s, e, m: i32
  931. v.f = value
  932. i = i32(v.i)
  933. s = (i >> 16) & 0x00008000
  934. e = ((i >> 23) & 0x000000ff) - (127 - 15)
  935. m = i & 0x007fffff
  936. if e <= 0 {
  937. if e < -10 {
  938. return transmute(__float16)u16(s)
  939. }
  940. m = (m | 0x00800000) >> u32(1 - e)
  941. if m & 0x00001000 != 0 {
  942. m += 0x00002000
  943. }
  944. return transmute(__float16)u16(s | (m >> 13))
  945. } else if e == 0xff - (127 - 15) {
  946. if m == 0 {
  947. return transmute(__float16)u16(s | 0x7c00) /* NOTE(bill): infinity */
  948. } else {
  949. /* NOTE(bill): NAN */
  950. m >>= 13
  951. return transmute(__float16)u16(s | 0x7c00 | m | i32(m == 0))
  952. }
  953. } else {
  954. if m & 0x00001000 != 0 {
  955. m += 0x00002000
  956. if (m & 0x00800000) != 0 {
  957. m = 0
  958. e += 1
  959. }
  960. }
  961. if e > 30 {
  962. f := i64(1e12)
  963. for j := 0; j < 10; j += 1 {
  964. /* NOTE(bill): Cause overflow */
  965. g := intrinsics.volatile_load(&f)
  966. g *= g
  967. intrinsics.volatile_store(&f, g)
  968. }
  969. return transmute(__float16)u16(s | 0x7c00)
  970. }
  971. return transmute(__float16)u16(s | (e << 10) | (m >> 13))
  972. }
  973. }
  974. @(link_name="__aeabi_d2h", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  975. aeabi_d2h :: proc "c" (value: f64) -> __float16 {
  976. return truncsfhf2(f32(value))
  977. }
  978. @(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  979. truncdfhf2 :: proc "c" (value: f64) -> __float16 {
  980. return truncsfhf2(f32(value))
  981. }
  982. @(link_name="__gnu_h2f_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  983. gnu_h2f_ieee :: proc "c" (value_: __float16) -> f32 {
  984. fp32 :: struct #raw_union { u: u32, f: f32 }
  985. value := transmute(u16)value_
  986. v: fp32
  987. magic, inf_or_nan: fp32
  988. magic.u = u32((254 - 15) << 23)
  989. inf_or_nan.u = u32((127 + 16) << 23)
  990. v.u = u32(value & 0x7fff) << 13
  991. v.f *= magic.f
  992. if v.f >= inf_or_nan.f {
  993. v.u |= 255 << 23
  994. }
  995. v.u |= u32(value & 0x8000) << 16
  996. return v.f
  997. }
  998. @(link_name="__gnu_f2h_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  999. gnu_f2h_ieee :: proc "c" (value: f32) -> __float16 {
  1000. return truncsfhf2(value)
  1001. }
  1002. @(link_name="__extendhfsf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1003. extendhfsf2 :: proc "c" (value: __float16) -> f32 {
  1004. return gnu_h2f_ieee(value)
  1005. }
  1006. @(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1007. floattidf :: proc "c" (a: i128) -> f64 {
  1008. DBL_MANT_DIG :: 53
  1009. if a == 0 {
  1010. return 0.0
  1011. }
  1012. a := a
  1013. N :: size_of(i128) * 8
  1014. s := a >> (N-1)
  1015. a = (a ~ s) - s
  1016. sd: = N - intrinsics.count_leading_zeros(a) // number of significant digits
  1017. e := i32(sd - 1) // exponent
  1018. if sd > DBL_MANT_DIG {
  1019. switch sd {
  1020. case DBL_MANT_DIG + 1:
  1021. a <<= 1
  1022. case DBL_MANT_DIG + 2:
  1023. // okay
  1024. case:
  1025. a = i128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
  1026. i128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
  1027. }
  1028. a |= i128((a & 4) != 0)
  1029. a += 1
  1030. a >>= 2
  1031. if a & (i128(1) << DBL_MANT_DIG) != 0 {
  1032. a >>= 1
  1033. e += 1
  1034. }
  1035. } else {
  1036. a <<= u128(DBL_MANT_DIG - sd) & 127
  1037. }
  1038. fb: [2]u32
  1039. fb[1] = (u32(s) & 0x80000000) | // sign
  1040. (u32(e + 1023) << 20) | // exponent
  1041. u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
  1042. fb[0] = u32(a) // mantissa-low
  1043. return transmute(f64)fb
  1044. }
  1045. @(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1046. floattidf_unsigned :: proc "c" (a: u128) -> f64 {
  1047. DBL_MANT_DIG :: 53
  1048. if a == 0 {
  1049. return 0.0
  1050. }
  1051. a := a
  1052. N :: size_of(u128) * 8
  1053. sd: = N - intrinsics.count_leading_zeros(a) // number of significant digits
  1054. e := i32(sd - 1) // exponent
  1055. if sd > DBL_MANT_DIG {
  1056. switch sd {
  1057. case DBL_MANT_DIG + 1:
  1058. a <<= 1
  1059. case DBL_MANT_DIG + 2:
  1060. // okay
  1061. case:
  1062. a = u128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
  1063. u128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
  1064. }
  1065. a |= u128((a & 4) != 0)
  1066. a += 1
  1067. a >>= 2
  1068. if a & (1 << DBL_MANT_DIG) != 0 {
  1069. a >>= 1
  1070. e += 1
  1071. }
  1072. } else {
  1073. a <<= u128(DBL_MANT_DIG - sd)
  1074. }
  1075. fb: [2]u32
  1076. fb[1] = (0) | // sign
  1077. u32((e + 1023) << 20) | // exponent
  1078. u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
  1079. fb[0] = u32(a) // mantissa-low
  1080. return transmute(f64)fb
  1081. }
  1082. @(link_name="__fixunsdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1083. fixunsdfti :: #force_no_inline proc "c" (a: f64) -> u128 {
  1084. // TODO(bill): implement `fixunsdfti` correctly
  1085. x := u64(a)
  1086. return u128(x)
  1087. }
  1088. @(link_name="__fixunsdfdi", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1089. fixunsdfdi :: #force_no_inline proc "c" (a: f64) -> i128 {
  1090. // TODO(bill): implement `fixunsdfdi` correctly
  1091. x := i64(a)
  1092. return i128(x)
  1093. }
  1094. @(link_name="__umodti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1095. umodti3 :: proc "c" (a, b: u128) -> u128 {
  1096. r: u128 = ---
  1097. _ = udivmod128(a, b, &r)
  1098. return r
  1099. }
  1100. @(link_name="__udivmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1101. udivmodti4 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
  1102. return udivmod128(a, b, rem)
  1103. }
  1104. when !IS_WASM {
  1105. @(link_name="__udivti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1106. udivti3 :: proc "c" (a, b: u128) -> u128 {
  1107. return udivmodti4(a, b, nil)
  1108. }
  1109. }
  1110. @(link_name="__modti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1111. modti3 :: proc "c" (a, b: i128) -> i128 {
  1112. s_a := a >> (128 - 1)
  1113. s_b := b >> (128 - 1)
  1114. an := (a ~ s_a) - s_a
  1115. bn := (b ~ s_b) - s_b
  1116. r: u128 = ---
  1117. _ = udivmod128(u128(an), u128(bn), &r)
  1118. return (i128(r) ~ s_a) - s_a
  1119. }
  1120. @(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1121. divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
  1122. s_a := a >> (128 - 1) // -1 if negative or 0
  1123. s_b := b >> (128 - 1)
  1124. an := (a ~ s_a) - s_a // absolute
  1125. bn := (b ~ s_b) - s_b
  1126. s_b ~= s_a // quotient sign
  1127. u_s_b := u128(s_b)
  1128. u_s_a := u128(s_a)
  1129. r: u128 = ---
  1130. u := i128((udivmodti4(u128(an), u128(bn), &r) ~ u_s_b) - u_s_b) // negate if negative
  1131. rem^ = i128((r ~ u_s_a) - u_s_a)
  1132. return u
  1133. }
  1134. @(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1135. divti3 :: proc "c" (a, b: i128) -> i128 {
  1136. s_a := a >> (128 - 1) // -1 if negative or 0
  1137. s_b := b >> (128 - 1)
  1138. an := (a ~ s_a) - s_a // absolute
  1139. bn := (b ~ s_b) - s_b
  1140. s_a ~= s_b // quotient sign
  1141. u_s_a := u128(s_a)
  1142. return i128((udivmodti4(u128(an), u128(bn), nil) ~ u_s_a) - u_s_a) // negate if negative
  1143. }
  1144. @(link_name="__fixdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1145. fixdfti :: proc "c" (a: u64) -> i128 {
  1146. significandBits :: 52
  1147. typeWidth :: (size_of(u64)*8)
  1148. exponentBits :: (typeWidth - significandBits - 1)
  1149. maxExponent :: ((1 << exponentBits) - 1)
  1150. exponentBias :: (maxExponent >> 1)
  1151. implicitBit :: (u64(1) << significandBits)
  1152. significandMask :: (implicitBit - 1)
  1153. signBit :: (u64(1) << (significandBits + exponentBits))
  1154. absMask :: (signBit - 1)
  1155. exponentMask :: (absMask ~ significandMask)
  1156. // Break a into sign, exponent, significand
  1157. aRep := a
  1158. aAbs := aRep & absMask
  1159. sign := i128(-1 if aRep & signBit != 0 else 1)
  1160. exponent := u64((aAbs >> significandBits) - exponentBias)
  1161. significand := u64((aAbs & significandMask) | implicitBit)
  1162. // If exponent is negative, the result is zero.
  1163. if exponent < 0 {
  1164. return 0
  1165. }
  1166. // If the value is too large for the integer type, saturate.
  1167. if exponent >= size_of(i128) * 8 {
  1168. return max(i128) if sign == 1 else min(i128)
  1169. }
  1170. // If 0 <= exponent < significandBits, right shift to get the result.
  1171. // Otherwise, shift left.
  1172. if exponent < significandBits {
  1173. return sign * i128(significand >> (significandBits - exponent))
  1174. } else {
  1175. return sign * (i128(significand) << (exponent - significandBits))
  1176. }
  1177. }
  1178. __write_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
  1179. for i in 0..<size {
  1180. j := offset+i
  1181. the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
  1182. dst[j>>3] &~= 1<<(j&7)
  1183. dst[j>>3] |= the_bit<<(j&7)
  1184. }
  1185. }
  1186. __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
  1187. for j in 0..<size {
  1188. i := offset+j
  1189. the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
  1190. dst[j>>3] &~= 1<<(j&7)
  1191. dst[j>>3] |= the_bit<<(j&7)
  1192. }
  1193. }
  1194. when .Address in ODIN_SANITIZER_FLAGS {
  1195. foreign {
  1196. @(require)
  1197. __asan_unpoison_memory_region :: proc "system" (address: rawptr, size: uint) ---
  1198. }
  1199. }