internal.odin 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412
  1. #+vet !cast
  2. package runtime
  3. import "base:intrinsics"
  4. @(private="file")
  5. IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
  6. @(private)
  7. RUNTIME_LINKAGE :: "strong" when ODIN_USE_SEPARATE_MODULES else
  8. "internal" when ODIN_NO_ENTRY_POINT && (ODIN_BUILD_MODE == .Static || ODIN_BUILD_MODE == .Dynamic || ODIN_BUILD_MODE == .Object) else
  9. "strong" when ODIN_BUILD_MODE == .Dynamic else
  10. "strong" when !ODIN_NO_CRT else
  11. "internal"
  12. RUNTIME_REQUIRE :: false // !ODIN_TILDE
  13. @(private)
  14. __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
  15. HAS_HARDWARE_SIMD :: false when (ODIN_ARCH == .amd64 || ODIN_ARCH == .i386) && !intrinsics.has_target_feature("sse2") else
  16. false when (ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32) && !intrinsics.has_target_feature("neon") else
  17. false when (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") else
  18. false when (ODIN_ARCH == .riscv64) && !intrinsics.has_target_feature("v") else
  19. true
  20. @(private)
  21. byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte #no_bounds_check {
  22. return ([^]byte)(data)[:max(len, 0)]
  23. }
  24. is_power_of_two_int :: #force_inline proc "contextless" (x: int) -> bool {
  25. if x <= 0 {
  26. return false
  27. }
  28. return (x & (x-1)) == 0
  29. }
  30. align_forward_int :: #force_inline proc "odin" (ptr, align: int) -> int {
  31. assert(is_power_of_two_int(align))
  32. p := ptr
  33. modulo := p & (align-1)
  34. if modulo != 0 {
  35. p += align - modulo
  36. }
  37. return p
  38. }
  39. is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool {
  40. if x <= 0 {
  41. return false
  42. }
  43. return (x & (x-1)) == 0
  44. }
  45. align_forward_uint :: #force_inline proc "odin" (ptr, align: uint) -> uint {
  46. assert(is_power_of_two_uint(align))
  47. p := ptr
  48. modulo := p & (align-1)
  49. if modulo != 0 {
  50. p += align - modulo
  51. }
  52. return p
  53. }
  54. is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
  55. if x <= 0 {
  56. return false
  57. }
  58. return (x & (x-1)) == 0
  59. }
  60. align_forward_uintptr :: #force_inline proc "odin" (ptr, align: uintptr) -> uintptr {
  61. assert(is_power_of_two_uintptr(align))
  62. p := ptr
  63. modulo := p & (align-1)
  64. if modulo != 0 {
  65. p += align - modulo
  66. }
  67. return p
  68. }
  69. is_power_of_two :: proc {
  70. is_power_of_two_int,
  71. is_power_of_two_uint,
  72. is_power_of_two_uintptr,
  73. }
  74. align_forward :: proc {
  75. align_forward_int,
  76. align_forward_uint,
  77. align_forward_uintptr,
  78. }
  79. mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
  80. if data == nil {
  81. return nil
  82. }
  83. if len <= 0 {
  84. return data
  85. }
  86. intrinsics.mem_zero(data, len)
  87. return data
  88. }
  89. mem_copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
  90. if src != nil && dst != src && len > 0 {
  91. // NOTE(bill): This _must_ be implemented like C's memmove
  92. intrinsics.mem_copy(dst, src, len)
  93. }
  94. return dst
  95. }
  96. mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
  97. if src != nil && dst != src && len > 0 {
  98. // NOTE(bill): This _must_ be implemented like C's memcpy
  99. intrinsics.mem_copy_non_overlapping(dst, src, len)
  100. }
  101. return dst
  102. }
  103. DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
  104. mem_alloc_bytes :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  105. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  106. if size == 0 || allocator.procedure == nil{
  107. return nil, nil
  108. }
  109. return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
  110. }
  111. mem_alloc :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  112. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  113. if size == 0 || allocator.procedure == nil {
  114. return nil, nil
  115. }
  116. return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
  117. }
  118. mem_alloc_non_zeroed :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
  119. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  120. if size == 0 || allocator.procedure == nil {
  121. return nil, nil
  122. }
  123. return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, size, alignment, nil, 0, loc)
  124. }
  125. @builtin
  126. mem_free :: #force_no_inline proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  127. if ptr == nil || allocator.procedure == nil {
  128. return nil
  129. }
  130. _, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, 0, loc)
  131. return err
  132. }
  133. mem_free_with_size :: #force_no_inline proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  134. if ptr == nil || allocator.procedure == nil {
  135. return nil
  136. }
  137. _, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, byte_count, loc)
  138. return err
  139. }
  140. mem_free_bytes :: #force_no_inline proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
  141. if bytes == nil || allocator.procedure == nil {
  142. return nil
  143. }
  144. _, err := allocator.procedure(allocator.data, .Free, 0, 0, raw_data(bytes), len(bytes), loc)
  145. return err
  146. }
  147. @builtin
  148. mem_free_all :: #force_no_inline proc(allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
  149. if allocator.procedure != nil {
  150. _, err = allocator.procedure(allocator.data, .Free_All, 0, 0, nil, 0, loc)
  151. }
  152. return
  153. }
  154. _mem_resize :: #force_no_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  155. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  156. if allocator.procedure == nil {
  157. return nil, nil
  158. }
  159. if new_size == 0 {
  160. if ptr != nil {
  161. _, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
  162. return
  163. }
  164. return
  165. } else if ptr == nil {
  166. if should_zero {
  167. return allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
  168. } else {
  169. return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
  170. }
  171. } else if old_size == new_size && uintptr(ptr) % uintptr(alignment) == 0 {
  172. data = ([^]byte)(ptr)[:old_size]
  173. return
  174. }
  175. if should_zero {
  176. data, err = allocator.procedure(allocator.data, .Resize, new_size, alignment, ptr, old_size, loc)
  177. } else {
  178. data, err = allocator.procedure(allocator.data, .Resize_Non_Zeroed, new_size, alignment, ptr, old_size, loc)
  179. }
  180. if err == .Mode_Not_Implemented {
  181. if should_zero {
  182. data, err = allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
  183. } else {
  184. data, err = allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
  185. }
  186. if err != nil {
  187. return
  188. }
  189. copy(data, ([^]byte)(ptr)[:old_size])
  190. _, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
  191. }
  192. return
  193. }
  194. mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  195. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  196. return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
  197. }
  198. non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
  199. assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
  200. return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
  201. }
  202. conditional_mem_zero :: proc "contextless" (data: rawptr, n_: int) #no_bounds_check {
  203. // When acquiring memory from the OS for the first time it's likely that the
  204. // OS already gives the zero page mapped multiple times for the request. The
  205. // actual allocation does not have physical pages allocated to it until those
  206. // pages are written to which causes a page-fault. This is often called COW
  207. // (Copy on Write)
  208. //
  209. // You do not want to actually zero out memory in this case because it would
  210. // cause a bunch of page faults decreasing the speed of allocations and
  211. // increase the amount of actual resident physical memory used.
  212. //
  213. // Instead a better technique is to check if memory is zerored before zeroing
  214. // it. This turns out to be an important optimization in practice, saving
  215. // nearly half (or more) the amount of physical memory used by an application.
  216. // This is why every implementation of calloc in libc does this optimization.
  217. //
  218. // It may seem counter-intuitive but most allocations in an application are
  219. // wasted and never used. When you consider something like a [dynamic]T which
  220. // always doubles in capacity on resize but you rarely ever actually use the
  221. // full capacity of a dynamic array it means you have a lot of resident waste
  222. // if you actually zeroed the remainder of the memory.
  223. //
  224. // Keep in mind the OS is already guaranteed to give you zeroed memory by
  225. // mapping in this zero page multiple times so in the best case there is no
  226. // need to actually zero anything. As for testing all this memory for a zero
  227. // value, it costs nothing because the the same zero page is used for the
  228. // whole allocation and will exist in L1 cache for the entire zero checking
  229. // process.
  230. if n_ <= 0 {
  231. return
  232. }
  233. n := uint(n_)
  234. n_words := n / size_of(uintptr)
  235. p_words := ([^]uintptr)(data)[:n_words]
  236. p_bytes := ([^]byte)(data)[size_of(uintptr) * n_words:n]
  237. for &p_word in p_words {
  238. if p_word != 0 {
  239. p_word = 0
  240. }
  241. }
  242. for &p_byte in p_bytes {
  243. if p_byte != 0 {
  244. p_byte = 0
  245. }
  246. }
  247. }
  248. memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
  249. switch {
  250. case n == 0: return true
  251. case x == y: return true
  252. }
  253. a, b := cast([^]byte)x, cast([^]byte)y
  254. n := uint(n)
  255. i := uint(0)
  256. m := uint(0)
  257. if n >= 8 {
  258. when HAS_HARDWARE_SIMD {
  259. // Avoid using 256-bit SIMD on platforms where its emulation is
  260. // likely to be less than ideal.
  261. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  262. m = n / 32 * 32
  263. for /**/; i < m; i += 32 {
  264. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  265. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  266. ne := intrinsics.simd_lanes_ne(load_a, load_b)
  267. if intrinsics.simd_reduce_or(ne) != 0 {
  268. return false
  269. }
  270. }
  271. }
  272. }
  273. m = (n-i) / 16 * 16
  274. for /**/; i < m; i += 16 {
  275. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  276. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  277. ne := intrinsics.simd_lanes_ne(load_a, load_b)
  278. if intrinsics.simd_reduce_or(ne) != 0 {
  279. return false
  280. }
  281. }
  282. m = (n-i) / size_of(uintptr) * size_of(uintptr)
  283. for /**/; i < m; i += size_of(uintptr) {
  284. if intrinsics.unaligned_load(cast(^uintptr)&a[i]) != intrinsics.unaligned_load(cast(^uintptr)&b[i]) {
  285. return false
  286. }
  287. }
  288. }
  289. for /**/; i < n; i += 1 {
  290. if a[i] != b[i] {
  291. return false
  292. }
  293. }
  294. return true
  295. }
  296. memory_compare :: proc "contextless" (x, y: rawptr, n: int) -> int #no_bounds_check {
  297. switch {
  298. case x == y: return 0
  299. case x == nil: return -1
  300. case y == nil: return +1
  301. }
  302. a, b := cast([^]byte)x, cast([^]byte)y
  303. n := uint(n)
  304. i := uint(0)
  305. m := uint(0)
  306. when HAS_HARDWARE_SIMD {
  307. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  308. m = n / 32 * 32
  309. for /**/; i < m; i += 32 {
  310. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  311. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  312. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  313. if intrinsics.simd_reduce_or(comparison) != 0 {
  314. sentinel: #simd[32]u8 = u8(0xFF)
  315. indices := intrinsics.simd_indices(#simd[32]u8)
  316. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  317. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  318. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  319. }
  320. }
  321. }
  322. }
  323. m = (n-i) / 16 * 16
  324. for /**/; i < m; i += 16 {
  325. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  326. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  327. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  328. if intrinsics.simd_reduce_or(comparison) != 0 {
  329. sentinel: #simd[16]u8 = u8(0xFF)
  330. indices := intrinsics.simd_indices(#simd[16]u8)
  331. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  332. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  333. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  334. }
  335. }
  336. // 64-bit SIMD is faster than using a `uintptr` to detect a difference then
  337. // re-iterating with the byte-by-byte loop, at least on AMD64.
  338. m = (n-i) / 8 * 8
  339. for /**/; i < m; i += 8 {
  340. load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
  341. load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
  342. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  343. if intrinsics.simd_reduce_or(comparison) != 0 {
  344. sentinel: #simd[8]u8 = u8(0xFF)
  345. indices := intrinsics.simd_indices(#simd[8]u8)
  346. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  347. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  348. return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
  349. }
  350. }
  351. for /**/; i < n; i += 1 {
  352. if a[i] ~ b[i] != 0 {
  353. return -1 if int(a[i]) - int(b[i]) < 0 else +1
  354. }
  355. }
  356. return 0
  357. }
  358. memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
  359. n := uint(n)
  360. i := uint(0)
  361. m := uint(0)
  362. // Because we're comparing against zero, we never return -1, as that would
  363. // indicate the compared value is less than zero.
  364. //
  365. // Note that a zero return value here means equality.
  366. bytes := ([^]u8)(a)
  367. if n >= 8 {
  368. when HAS_HARDWARE_SIMD {
  369. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  370. scanner32: #simd[32]u8
  371. m = n / 32 * 32
  372. for /**/; i < m; i += 32 {
  373. load := intrinsics.unaligned_load(cast(^#simd[32]u8)&bytes[i])
  374. ne := intrinsics.simd_lanes_ne(scanner32, load)
  375. if intrinsics.simd_reduce_or(ne) > 0 {
  376. return 1
  377. }
  378. }
  379. }
  380. }
  381. scanner16: #simd[16]u8
  382. m = (n-i) / 16 * 16
  383. for /**/; i < m; i += 16 {
  384. load := intrinsics.unaligned_load(cast(^#simd[16]u8)&bytes[i])
  385. ne := intrinsics.simd_lanes_ne(scanner16, load)
  386. if intrinsics.simd_reduce_or(ne) != 0 {
  387. return 1
  388. }
  389. }
  390. m = (n-i) / size_of(uintptr) * size_of(uintptr)
  391. for /**/; i < m; i += size_of(uintptr) {
  392. if intrinsics.unaligned_load(cast(^uintptr)&bytes[i]) != 0 {
  393. return 1
  394. }
  395. }
  396. }
  397. for /**/; i < n; i += 1 {
  398. if bytes[i] != 0 {
  399. return 1
  400. }
  401. }
  402. return 0
  403. }
  404. memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check {
  405. switch {
  406. case x == y: return n
  407. case x == nil: return 0
  408. case y == nil: return 0
  409. }
  410. a, b := cast([^]byte)x, cast([^]byte)y
  411. n := uint(n)
  412. i := uint(0)
  413. m := uint(0)
  414. when HAS_HARDWARE_SIMD {
  415. when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
  416. m = n / 32 * 32
  417. for /**/; i < m; i += 32 {
  418. load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
  419. load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
  420. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  421. if intrinsics.simd_reduce_or(comparison) != 0 {
  422. sentinel: #simd[32]u8 = u8(0xFF)
  423. indices := intrinsics.simd_indices(#simd[32]u8)
  424. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  425. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  426. return int(i + index_reduce)
  427. }
  428. }
  429. }
  430. }
  431. m = (n-i) / 16 * 16
  432. for /**/; i < m; i += 16 {
  433. load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
  434. load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
  435. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  436. if intrinsics.simd_reduce_or(comparison) != 0 {
  437. sentinel: #simd[16]u8 = u8(0xFF)
  438. indices := intrinsics.simd_indices(#simd[16]u8)
  439. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  440. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  441. return int(i + index_reduce)
  442. }
  443. }
  444. // 64-bit SIMD is faster than using a `uintptr` to detect a difference then
  445. // re-iterating with the byte-by-byte loop, at least on AMD64.
  446. m = (n-i) / 8 * 8
  447. for /**/; i < m; i += 8 {
  448. load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
  449. load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
  450. comparison := intrinsics.simd_lanes_ne(load_a, load_b)
  451. if intrinsics.simd_reduce_or(comparison) != 0 {
  452. sentinel: #simd[8]u8 = u8(0xFF)
  453. indices := intrinsics.simd_indices(#simd[8]u8)
  454. index_select := intrinsics.simd_select(comparison, indices, sentinel)
  455. index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
  456. return int(i + index_reduce)
  457. }
  458. }
  459. for /**/; i < n; i += 1 {
  460. if a[i] ~ b[i] != 0 {
  461. return int(i)
  462. }
  463. }
  464. return int(n)
  465. }
  466. string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
  467. x := transmute(Raw_String)lhs
  468. y := transmute(Raw_String)rhs
  469. if x.len != y.len {
  470. return false
  471. }
  472. return #force_inline memory_equal(x.data, y.data, x.len)
  473. }
  474. string_cmp :: proc "contextless" (a, b: string) -> int {
  475. x := transmute(Raw_String)a
  476. y := transmute(Raw_String)b
  477. ret := memory_compare(x.data, y.data, min(x.len, y.len))
  478. if ret == 0 && x.len != y.len {
  479. return -1 if x.len < y.len else +1
  480. }
  481. return ret
  482. }
  483. string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool {
  484. x := transmute(Raw_String16)lhs
  485. y := transmute(Raw_String16)rhs
  486. if x.len != y.len {
  487. return false
  488. }
  489. return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16))
  490. }
  491. string16_cmp :: proc "contextless" (a, b: string16) -> int {
  492. x := transmute(Raw_String16)a
  493. y := transmute(Raw_String16)b
  494. ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16))
  495. if ret == 0 && x.len != y.len {
  496. return -1 if x.len < y.len else +1
  497. }
  498. return ret
  499. }
  500. string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
  501. string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
  502. string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
  503. string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
  504. string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }
  505. string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) }
  506. string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 }
  507. string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 }
  508. string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 }
  509. string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 }
  510. cstring_len :: proc "contextless" (s: cstring) -> int {
  511. p0 := uintptr((^byte)(s))
  512. p := p0
  513. for p != 0 && (^byte)(p)^ != 0 {
  514. p += 1
  515. }
  516. return int(p - p0)
  517. }
  518. cstring16_len :: proc "contextless" (s: cstring16) -> int {
  519. p := ([^]u16)(s)
  520. n := 0
  521. for p != nil && p[0] != 0 {
  522. p = p[1:]
  523. n += 1
  524. }
  525. return n
  526. }
  527. cstring_to_string :: proc "contextless" (s: cstring) -> string {
  528. if s == nil {
  529. return ""
  530. }
  531. ptr := (^byte)(s)
  532. n := cstring_len(s)
  533. return transmute(string)Raw_String{ptr, n}
  534. }
  535. cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 {
  536. if s == nil {
  537. return ""
  538. }
  539. ptr := (^u16)(s)
  540. n := cstring16_len(s)
  541. return transmute(string16)Raw_String16{ptr, n}
  542. }
  543. cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
  544. x := ([^]byte)(lhs)
  545. y := ([^]byte)(rhs)
  546. if x == y {
  547. return true
  548. }
  549. if (x == nil) ~ (y == nil) {
  550. return false
  551. }
  552. xn := cstring_len(lhs)
  553. yn := cstring_len(rhs)
  554. if xn != yn {
  555. return false
  556. }
  557. return #force_inline memory_equal(x, y, xn)
  558. }
  559. cstring_cmp :: proc "contextless" (lhs, rhs: cstring) -> int {
  560. x := ([^]byte)(lhs)
  561. y := ([^]byte)(rhs)
  562. if x == y {
  563. return 0
  564. }
  565. if (x == nil) ~ (y == nil) {
  566. return -1 if x == nil else +1
  567. }
  568. xn := cstring_len(lhs)
  569. yn := cstring_len(rhs)
  570. ret := memory_compare(x, y, min(xn, yn))
  571. if ret == 0 && xn != yn {
  572. return -1 if xn < yn else +1
  573. }
  574. return ret
  575. }
  576. cstring_ne :: #force_inline proc "contextless" (a, b: cstring) -> bool { return !cstring_eq(a, b) }
  577. cstring_lt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) < 0 }
  578. cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) > 0 }
  579. cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
  580. cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }
  581. cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool {
  582. x := ([^]u16)(lhs)
  583. y := ([^]u16)(rhs)
  584. if x == y {
  585. return true
  586. }
  587. if (x == nil) ~ (y == nil) {
  588. return false
  589. }
  590. xn := cstring16_len(lhs)
  591. yn := cstring16_len(rhs)
  592. if xn != yn {
  593. return false
  594. }
  595. return #force_inline memory_equal(x, y, xn*size_of(u16))
  596. }
  597. cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int {
  598. x := ([^]u16)(lhs)
  599. y := ([^]u16)(rhs)
  600. if x == y {
  601. return 0
  602. }
  603. if (x == nil) ~ (y == nil) {
  604. return -1 if x == nil else +1
  605. }
  606. xn := cstring16_len(lhs)
  607. yn := cstring16_len(rhs)
  608. ret := memory_compare(x, y, min(xn, yn)*size_of(u16))
  609. if ret == 0 && xn != yn {
  610. return -1 if xn < yn else +1
  611. }
  612. return ret
  613. }
  614. cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) }
  615. cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 }
  616. cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 }
  617. cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 }
  618. cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 }
  619. complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  620. complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  621. complex64_eq :: #force_inline proc "contextless" (a, b: complex64) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  622. complex64_ne :: #force_inline proc "contextless" (a, b: complex64) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  623. complex128_eq :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
  624. complex128_ne :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
  625. quaternion64_eq :: #force_inline proc "contextless" (a, b: quaternion64) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  626. quaternion64_ne :: #force_inline proc "contextless" (a, b: quaternion64) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  627. quaternion128_eq :: #force_inline proc "contextless" (a, b: quaternion128) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  628. quaternion128_ne :: #force_inline proc "contextless" (a, b: quaternion128) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  629. quaternion256_eq :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
  630. quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
  631. string_decode_rune :: proc "contextless" (s: string) -> (rune, int) {
  632. // NOTE(bill): Duplicated here to remove dependency on package unicode/utf8
  633. @(static, rodata) accept_sizes := [256]u8{
  634. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f
  635. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f
  636. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f
  637. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x30-0x3f
  638. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x40-0x4f
  639. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x50-0x5f
  640. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x60-0x6f
  641. 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x70-0x7f
  642. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x80-0x8f
  643. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x90-0x9f
  644. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xa0-0xaf
  645. 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xb0-0xbf
  646. 0xf1, 0xf1, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0-0xcf
  647. 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0-0xdf
  648. 0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x23, 0x03, 0x03, // 0xe0-0xef
  649. 0x34, 0x04, 0x04, 0x04, 0x44, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xf0-0xff
  650. }
  651. Accept_Range :: struct {lo, hi: u8}
  652. @(static, rodata) accept_ranges := [5]Accept_Range{
  653. {0x80, 0xbf},
  654. {0xa0, 0xbf},
  655. {0x80, 0x9f},
  656. {0x90, 0xbf},
  657. {0x80, 0x8f},
  658. }
  659. MASKX :: 0b0011_1111
  660. MASK2 :: 0b0001_1111
  661. MASK3 :: 0b0000_1111
  662. MASK4 :: 0b0000_0111
  663. LOCB :: 0b1000_0000
  664. HICB :: 0b1011_1111
  665. RUNE_ERROR :: '\ufffd'
  666. n := len(s)
  667. if n < 1 {
  668. return RUNE_ERROR, 0
  669. }
  670. s0 := s[0]
  671. x := accept_sizes[s0]
  672. if x >= 0xF0 {
  673. mask := rune(x) << 31 >> 31 // NOTE(bill): Create 0x0000 or 0xffff.
  674. return rune(s[0])&~mask | RUNE_ERROR&mask, 1
  675. }
  676. sz := x & 7
  677. accept := accept_ranges[x>>4]
  678. if n < int(sz) {
  679. return RUNE_ERROR, 1
  680. }
  681. b1 := s[1]
  682. if b1 < accept.lo || accept.hi < b1 {
  683. return RUNE_ERROR, 1
  684. }
  685. if sz == 2 {
  686. return rune(s0&MASK2)<<6 | rune(b1&MASKX), 2
  687. }
  688. b2 := s[2]
  689. if b2 < LOCB || HICB < b2 {
  690. return RUNE_ERROR, 1
  691. }
  692. if sz == 3 {
  693. return rune(s0&MASK3)<<12 | rune(b1&MASKX)<<6 | rune(b2&MASKX), 3
  694. }
  695. b3 := s[3]
  696. if b3 < LOCB || HICB < b3 {
  697. return RUNE_ERROR, 1
  698. }
  699. return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4
  700. }
  701. string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
  702. RUNE_ERROR :: '\ufffd'
  703. RUNE_SELF :: 0x80
  704. UTF_MAX :: 4
  705. r: rune
  706. size: int
  707. start, end, limit: int
  708. end = len(s)
  709. if end == 0 {
  710. return RUNE_ERROR, 0
  711. }
  712. start = end-1
  713. r = rune(s[start])
  714. if r < RUNE_SELF {
  715. return r, 1
  716. }
  717. limit = max(end - UTF_MAX, 0)
  718. for start-=1; start >= limit; start-=1 {
  719. if (s[start] & 0xc0) != RUNE_SELF {
  720. break
  721. }
  722. }
  723. start = max(start, 0)
  724. r, size = string_decode_rune(s[start:end])
  725. if start+size != end {
  726. return RUNE_ERROR, 1
  727. }
  728. return r, size
  729. }
  730. string16_decode_rune :: proc "contextless" (s: string16) -> (rune, int) {
  731. REPLACEMENT_CHAR :: '\ufffd'
  732. _surr1 :: 0xd800
  733. _surr2 :: 0xdc00
  734. _surr3 :: 0xe000
  735. _surr_self :: 0x10000
  736. r := rune(REPLACEMENT_CHAR)
  737. if len(s) < 1 {
  738. return r, 0
  739. }
  740. w := 1
  741. switch c := s[0]; {
  742. case c < _surr1, _surr3 <= c:
  743. r = rune(c)
  744. case _surr1 <= c && c < _surr2 && 1 < len(s) &&
  745. _surr2 <= s[1] && s[1] < _surr3:
  746. r1, r2 := rune(c), rune(s[1])
  747. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  748. r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
  749. }
  750. w += 1
  751. }
  752. return r, w
  753. }
  754. string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) {
  755. REPLACEMENT_CHAR :: '\ufffd'
  756. _surr1 :: 0xd800
  757. _surr2 :: 0xdc00
  758. _surr3 :: 0xe000
  759. _surr_self :: 0x10000
  760. r := rune(REPLACEMENT_CHAR)
  761. if len(s) < 1 {
  762. return r, 0
  763. }
  764. n := len(s)-1
  765. c := s[n]
  766. w := 1
  767. if _surr2 <= c && c < _surr3 {
  768. if n >= 1 {
  769. r1 := rune(s[n-1])
  770. r2 := rune(c)
  771. if _surr1 <= r1 && r1 < _surr2 {
  772. r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
  773. }
  774. w = 2
  775. }
  776. } else if c < _surr1 || _surr3 <= c {
  777. r = rune(c)
  778. }
  779. return r, w
  780. }
  781. abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
  782. p, q := abs(real(x)), abs(imag(x))
  783. if p < q {
  784. p, q = q, p
  785. }
  786. if p == 0 {
  787. return 0
  788. }
  789. q = q / p
  790. return p * f16(intrinsics.sqrt(f32(1 + q*q)))
  791. }
  792. abs_complex64 :: #force_inline proc "contextless" (x: complex64) -> f32 {
  793. p, q := abs(real(x)), abs(imag(x))
  794. if p < q {
  795. p, q = q, p
  796. }
  797. if p == 0 {
  798. return 0
  799. }
  800. q = q / p
  801. return p * intrinsics.sqrt(1 + q*q)
  802. }
  803. abs_complex128 :: #force_inline proc "contextless" (x: complex128) -> f64 {
  804. p, q := abs(real(x)), abs(imag(x))
  805. if p < q {
  806. p, q = q, p
  807. }
  808. if p == 0 {
  809. return 0
  810. }
  811. q = q / p
  812. return p * intrinsics.sqrt(1 + q*q)
  813. }
  814. abs_quaternion64 :: #force_inline proc "contextless" (x: quaternion64) -> f16 {
  815. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  816. return f16(intrinsics.sqrt(f32(r*r + i*i + j*j + k*k)))
  817. }
  818. abs_quaternion128 :: #force_inline proc "contextless" (x: quaternion128) -> f32 {
  819. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  820. return intrinsics.sqrt(r*r + i*i + j*j + k*k)
  821. }
  822. abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64 {
  823. r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
  824. return intrinsics.sqrt(r*r + i*i + j*j + k*k)
  825. }
  826. quo_complex32 :: proc "contextless" (n, m: complex32) -> complex32 {
  827. nr, ni := f32(real(n)), f32(imag(n))
  828. mr, mi := f32(real(m)), f32(imag(m))
  829. e, f: f32
  830. if abs(mr) >= abs(mi) {
  831. ratio := mi / mr
  832. denom := mr + ratio*mi
  833. e = (nr + ni*ratio) / denom
  834. f = (ni - nr*ratio) / denom
  835. } else {
  836. ratio := mr / mi
  837. denom := mi + ratio*mr
  838. e = (nr*ratio + ni) / denom
  839. f = (ni*ratio - nr) / denom
  840. }
  841. return complex(f16(e), f16(f))
  842. }
  843. quo_complex64 :: proc "contextless" (n, m: complex64) -> complex64 {
  844. e, f: f32
  845. if abs(real(m)) >= abs(imag(m)) {
  846. ratio := imag(m) / real(m)
  847. denom := real(m) + ratio*imag(m)
  848. e = (real(n) + imag(n)*ratio) / denom
  849. f = (imag(n) - real(n)*ratio) / denom
  850. } else {
  851. ratio := real(m) / imag(m)
  852. denom := imag(m) + ratio*real(m)
  853. e = (real(n)*ratio + imag(n)) / denom
  854. f = (imag(n)*ratio - real(n)) / denom
  855. }
  856. return complex(e, f)
  857. }
  858. quo_complex128 :: proc "contextless" (n, m: complex128) -> complex128 {
  859. e, f: f64
  860. if abs(real(m)) >= abs(imag(m)) {
  861. ratio := imag(m) / real(m)
  862. denom := real(m) + ratio*imag(m)
  863. e = (real(n) + imag(n)*ratio) / denom
  864. f = (imag(n) - real(n)*ratio) / denom
  865. } else {
  866. ratio := real(m) / imag(m)
  867. denom := imag(m) + ratio*real(m)
  868. e = (real(n)*ratio + imag(n)) / denom
  869. f = (imag(n)*ratio - real(n)) / denom
  870. }
  871. return complex(e, f)
  872. }
  873. mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
  874. q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
  875. r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
  876. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  877. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  878. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  879. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  880. return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
  881. }
  882. mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
  883. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  884. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  885. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  886. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  887. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  888. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  889. return quaternion(w=t0, x=t1, y=t2, z=t3)
  890. }
  891. mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
  892. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  893. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  894. t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
  895. t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
  896. t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
  897. t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
  898. return quaternion(w=t0, x=t1, y=t2, z=t3)
  899. }
  900. quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
  901. q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
  902. r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
  903. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  904. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  905. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  906. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  907. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  908. return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
  909. }
  910. quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
  911. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  912. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  913. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  914. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  915. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  916. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  917. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  918. return quaternion(w=t0, x=t1, y=t2, z=t3)
  919. }
  920. quo_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
  921. q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
  922. r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
  923. invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
  924. t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
  925. t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
  926. t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
  927. t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
  928. return quaternion(w=t0, x=t1, y=t2, z=t3)
  929. }
  930. @(link_name="__truncsfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  931. truncsfhf2 :: proc "c" (value: f32) -> __float16 {
  932. v: struct #raw_union { i: u32, f: f32 }
  933. i, s, e, m: i32
  934. v.f = value
  935. i = i32(v.i)
  936. s = (i >> 16) & 0x00008000
  937. e = ((i >> 23) & 0x000000ff) - (127 - 15)
  938. m = i & 0x007fffff
  939. if e <= 0 {
  940. if e < -10 {
  941. return transmute(__float16)u16(s)
  942. }
  943. m = (m | 0x00800000) >> u32(1 - e)
  944. if m & 0x00001000 != 0 {
  945. m += 0x00002000
  946. }
  947. return transmute(__float16)u16(s | (m >> 13))
  948. } else if e == 0xff - (127 - 15) {
  949. if m == 0 {
  950. return transmute(__float16)u16(s | 0x7c00) /* NOTE(bill): infinity */
  951. } else {
  952. /* NOTE(bill): NAN */
  953. m >>= 13
  954. return transmute(__float16)u16(s | 0x7c00 | m | i32(m == 0))
  955. }
  956. } else {
  957. if m & 0x00001000 != 0 {
  958. m += 0x00002000
  959. if (m & 0x00800000) != 0 {
  960. m = 0
  961. e += 1
  962. }
  963. }
  964. if e > 30 {
  965. f := i64(1e12)
  966. for j := 0; j < 10; j += 1 {
  967. /* NOTE(bill): Cause overflow */
  968. g := intrinsics.volatile_load(&f)
  969. g *= g
  970. intrinsics.volatile_store(&f, g)
  971. }
  972. return transmute(__float16)u16(s | 0x7c00)
  973. }
  974. return transmute(__float16)u16(s | (e << 10) | (m >> 13))
  975. }
  976. }
  977. @(link_name="__aeabi_d2h", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  978. aeabi_d2h :: proc "c" (value: f64) -> __float16 {
  979. return truncsfhf2(f32(value))
  980. }
  981. @(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  982. truncdfhf2 :: proc "c" (value: f64) -> __float16 {
  983. return truncsfhf2(f32(value))
  984. }
  985. @(link_name="__gnu_h2f_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  986. gnu_h2f_ieee :: proc "c" (value_: __float16) -> f32 {
  987. fp32 :: struct #raw_union { u: u32, f: f32 }
  988. value := transmute(u16)value_
  989. v: fp32
  990. magic, inf_or_nan: fp32
  991. magic.u = u32((254 - 15) << 23)
  992. inf_or_nan.u = u32((127 + 16) << 23)
  993. v.u = u32(value & 0x7fff) << 13
  994. v.f *= magic.f
  995. if v.f >= inf_or_nan.f {
  996. v.u |= 255 << 23
  997. }
  998. v.u |= u32(value & 0x8000) << 16
  999. return v.f
  1000. }
  1001. @(link_name="__gnu_f2h_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1002. gnu_f2h_ieee :: proc "c" (value: f32) -> __float16 {
  1003. return truncsfhf2(value)
  1004. }
  1005. @(link_name="__extendhfsf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1006. extendhfsf2 :: proc "c" (value: __float16) -> f32 {
  1007. return gnu_h2f_ieee(value)
  1008. }
  1009. @(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1010. floattidf :: proc "c" (a: i128) -> f64 {
  1011. DBL_MANT_DIG :: 53
  1012. if a == 0 {
  1013. return 0.0
  1014. }
  1015. a := a
  1016. N :: size_of(i128) * 8
  1017. s := a >> (N-1)
  1018. a = (a ~ s) - s
  1019. sd: = N - intrinsics.count_leading_zeros(a) // number of significant digits
  1020. e := i32(sd - 1) // exponent
  1021. if sd > DBL_MANT_DIG {
  1022. switch sd {
  1023. case DBL_MANT_DIG + 1:
  1024. a <<= 1
  1025. case DBL_MANT_DIG + 2:
  1026. // okay
  1027. case:
  1028. a = i128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
  1029. i128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
  1030. }
  1031. a |= i128((a & 4) != 0)
  1032. a += 1
  1033. a >>= 2
  1034. if a & (i128(1) << DBL_MANT_DIG) != 0 {
  1035. a >>= 1
  1036. e += 1
  1037. }
  1038. } else {
  1039. a <<= u128(DBL_MANT_DIG - sd) & 127
  1040. }
  1041. fb: [2]u32
  1042. fb[1] = (u32(s) & 0x80000000) | // sign
  1043. (u32(e + 1023) << 20) | // exponent
  1044. u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
  1045. fb[0] = u32(a) // mantissa-low
  1046. return transmute(f64)fb
  1047. }
  1048. @(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1049. floattidf_unsigned :: proc "c" (a: u128) -> f64 {
  1050. DBL_MANT_DIG :: 53
  1051. if a == 0 {
  1052. return 0.0
  1053. }
  1054. a := a
  1055. N :: size_of(u128) * 8
  1056. sd: = N - intrinsics.count_leading_zeros(a) // number of significant digits
  1057. e := i32(sd - 1) // exponent
  1058. if sd > DBL_MANT_DIG {
  1059. switch sd {
  1060. case DBL_MANT_DIG + 1:
  1061. a <<= 1
  1062. case DBL_MANT_DIG + 2:
  1063. // okay
  1064. case:
  1065. a = u128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
  1066. u128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
  1067. }
  1068. a |= u128((a & 4) != 0)
  1069. a += 1
  1070. a >>= 2
  1071. if a & (1 << DBL_MANT_DIG) != 0 {
  1072. a >>= 1
  1073. e += 1
  1074. }
  1075. } else {
  1076. a <<= u128(DBL_MANT_DIG - sd)
  1077. }
  1078. fb: [2]u32
  1079. fb[1] = (0) | // sign
  1080. u32((e + 1023) << 20) | // exponent
  1081. u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
  1082. fb[0] = u32(a) // mantissa-low
  1083. return transmute(f64)fb
  1084. }
  1085. @(link_name="__fixunsdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1086. fixunsdfti :: #force_no_inline proc "c" (a: f64) -> u128 {
  1087. // TODO(bill): implement `fixunsdfti` correctly
  1088. x := u64(a)
  1089. return u128(x)
  1090. }
  1091. @(link_name="__fixunsdfdi", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1092. fixunsdfdi :: #force_no_inline proc "c" (a: f64) -> i128 {
  1093. // TODO(bill): implement `fixunsdfdi` correctly
  1094. x := i64(a)
  1095. return i128(x)
  1096. }
  1097. @(link_name="__umodti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1098. umodti3 :: proc "c" (a, b: u128) -> u128 {
  1099. r: u128 = ---
  1100. _ = udivmod128(a, b, &r)
  1101. return r
  1102. }
  1103. @(link_name="__udivmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1104. udivmodti4 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
  1105. return udivmod128(a, b, rem)
  1106. }
  1107. when !IS_WASM {
  1108. @(link_name="__udivti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1109. udivti3 :: proc "c" (a, b: u128) -> u128 {
  1110. return udivmodti4(a, b, nil)
  1111. }
  1112. }
  1113. @(link_name="__modti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1114. modti3 :: proc "c" (a, b: i128) -> i128 {
  1115. s_a := a >> (128 - 1)
  1116. s_b := b >> (128 - 1)
  1117. an := (a ~ s_a) - s_a
  1118. bn := (b ~ s_b) - s_b
  1119. r: u128 = ---
  1120. _ = udivmod128(u128(an), u128(bn), &r)
  1121. return (i128(r) ~ s_a) - s_a
  1122. }
  1123. @(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1124. divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
  1125. s_a := a >> (128 - 1) // -1 if negative or 0
  1126. s_b := b >> (128 - 1)
  1127. an := (a ~ s_a) - s_a // absolute
  1128. bn := (b ~ s_b) - s_b
  1129. s_b ~= s_a // quotient sign
  1130. u_s_b := u128(s_b)
  1131. u_s_a := u128(s_a)
  1132. r: u128 = ---
  1133. u := i128((udivmodti4(u128(an), u128(bn), &r) ~ u_s_b) - u_s_b) // negate if negative
  1134. rem^ = i128((r ~ u_s_a) - u_s_a)
  1135. return u
  1136. }
  1137. @(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1138. divti3 :: proc "c" (a, b: i128) -> i128 {
  1139. s_a := a >> (128 - 1) // -1 if negative or 0
  1140. s_b := b >> (128 - 1)
  1141. an := (a ~ s_a) - s_a // absolute
  1142. bn := (b ~ s_b) - s_b
  1143. s_a ~= s_b // quotient sign
  1144. u_s_a := u128(s_a)
  1145. return i128((udivmodti4(u128(an), u128(bn), nil) ~ u_s_a) - u_s_a) // negate if negative
  1146. }
  1147. @(link_name="__fixdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
  1148. fixdfti :: proc "c" (a: u64) -> i128 {
  1149. significandBits :: 52
  1150. typeWidth :: (size_of(u64)*8)
  1151. exponentBits :: (typeWidth - significandBits - 1)
  1152. maxExponent :: ((1 << exponentBits) - 1)
  1153. exponentBias :: (maxExponent >> 1)
  1154. implicitBit :: (u64(1) << significandBits)
  1155. significandMask :: (implicitBit - 1)
  1156. signBit :: (u64(1) << (significandBits + exponentBits))
  1157. absMask :: (signBit - 1)
  1158. exponentMask :: (absMask ~ significandMask)
  1159. // Break a into sign, exponent, significand
  1160. aRep := a
  1161. aAbs := aRep & absMask
  1162. sign := i128(-1 if aRep & signBit != 0 else 1)
  1163. exponent := u64((aAbs >> significandBits) - exponentBias)
  1164. significand := u64((aAbs & significandMask) | implicitBit)
  1165. // If exponent is negative, the result is zero.
  1166. if exponent < 0 {
  1167. return 0
  1168. }
  1169. // If the value is too large for the integer type, saturate.
  1170. if exponent >= size_of(i128) * 8 {
  1171. return max(i128) if sign == 1 else min(i128)
  1172. }
  1173. // If 0 <= exponent < significandBits, right shift to get the result.
  1174. // Otherwise, shift left.
  1175. if exponent < significandBits {
  1176. return sign * i128(significand >> (significandBits - exponent))
  1177. } else {
  1178. return sign * (i128(significand) << (exponent - significandBits))
  1179. }
  1180. }
  1181. __write_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
  1182. for i in 0..<size {
  1183. j := offset+i
  1184. the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
  1185. dst[j>>3] &~= 1<<(j&7)
  1186. dst[j>>3] |= the_bit<<(j&7)
  1187. }
  1188. }
  1189. __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
  1190. for j in 0..<size {
  1191. i := offset+j
  1192. the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
  1193. dst[j>>3] &~= 1<<(j&7)
  1194. dst[j>>3] |= the_bit<<(j&7)
  1195. }
  1196. }
  1197. when .Address in ODIN_SANITIZER_FLAGS {
  1198. foreign {
  1199. @(require)
  1200. __asan_unpoison_memory_region :: proc "system" (address: rawptr, size: uint) ---
  1201. }
  1202. }