3 роки тому · 0a87ffe0e6
--- a/core/c/libc/complex.odin
+++ b/core/c/libc/complex.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/ctype.odin
+++ b/core/c/libc/ctype.odin
@@ -2,6 +2,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/errno.odin
+++ b/core/c/libc/errno.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -38,6 +40,20 @@ when ODIN_OS == "windows" {
 
				 	ERANGE :: 34
			
 
				 }
			
 
				 
			
 
				+when ODIN_OS == "darwin" {
			
 
				+	@(private="file")
			
 
				+	@(default_calling_convention="c")
			
 
				+	foreign libc {
			
 
				+		@(link_name="__error")
			
 
				+		_get_errno :: proc() -> ^int ---
			
 
				+	}
			
 
				+
			
 
				+	// Unknown
			
 
				+	EDOM   :: 33
			
 
				+	EILSEQ :: 92
			
 
				+	ERANGE :: 34
			
 
				+}
			
 
				+
			
 
				 // Odin has no way to make an identifier "errno" behave as a function call to
			
 
				 // read the value, or to produce an lvalue such that you can assign a different
			
 
				 // error value to errno. To work around this, just expose it as a function like
			
--- a/core/c/libc/math.odin
+++ b/core/c/libc/math.odin
@@ -6,6 +6,8 @@ import "core:intrinsics"
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/setjmp.odin
+++ b/core/c/libc/setjmp.odin
@@ -4,10 +4,11 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
 
				-
			
 
				 when ODIN_OS == "windows" {
			
 
				 	@(default_calling_convention="c")
			
 
				 	foreign libc {
			
--- a/core/c/libc/signal.odin
+++ b/core/c/libc/signal.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -32,7 +34,20 @@ when ODIN_OS == "windows" {
 
				 	SIGTERM :: 15
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == "linux" || ODIN_OS == "freebsd" || ODIN_OS == "darwin" {
			
 
				+when ODIN_OS == "linux" || ODIN_OS == "freebsd" {
			
 
				+	SIG_ERR  :: rawptr(~uintptr(0))
			
 
				+	SIG_DFL  :: rawptr(uintptr(0))
			
 
				+	SIG_IGN  :: rawptr(uintptr(1)) 
			
 
				+
			
 
				+	SIGABRT  :: 6
			
 
				+	SIGFPE   :: 8
			
 
				+	SIGILL   :: 4
			
 
				+	SIGINT   :: 2
			
 
				+	SIGSEGV  :: 11
			
 
				+	SIGTERM  :: 15
			
 
				+}
			
 
				+
			
 
				+when ODIN_OS == "darwin" {
			
 
				 	SIG_ERR  :: rawptr(~uintptr(0))
			
 
				 	SIG_DFL  :: rawptr(uintptr(0))
			
 
				 	SIG_IGN  :: rawptr(uintptr(1)) 
			
--- a/core/c/libc/stdio.odin
+++ b/core/c/libc/stdio.odin
@@ -2,6 +2,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -67,7 +69,7 @@ when ODIN_OS == "linux" {
 
				 	SEEK_CUR      :: 1
			
 
				 	SEEK_END      :: 2
			
 
				 
			
 
				-	TMP_MAX       :: 10000
			
 
				+	TMP_MAX       :: 308915776
			
 
				 
			
 
				 	foreign libc {
			
 
				 		stderr: ^FILE
			
@@ -76,6 +78,36 @@ when ODIN_OS == "linux" {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+when ODIN_OS == "darwin" {
			
 
				+	fpos_t :: distinct i64
			
 
				+	
			
 
				+	_IOFBF        :: 0
			
 
				+	_IOLBF        :: 1
			
 
				+	_IONBF        :: 2
			
 
				+
			
 
				+	BUFSIZ        :: 1024
			
 
				+
			
 
				+	EOF           :: int(-1)
			
 
				+
			
 
				+	FOPEN_MAX     :: 20
			
 
				+
			
 
				+	FILENAME_MAX  :: 1024
			
 
				+
			
 
				+	L_tmpnam      :: 1024
			
 
				+
			
 
				+	SEEK_SET      :: 0
			
 
				+	SEEK_CUR      :: 1
			
 
				+	SEEK_END      :: 2
			
 
				+
			
 
				+	TMP_MAX       :: 308915776
			
 
				+
			
 
				+	foreign libc {
			
 
				+		@(link_name="__stderrp") stderr: ^FILE
			
 
				+		@(link_name="__stdinp")  stdin:  ^FILE
			
 
				+		@(link_name="__stdoutp") stdout: ^FILE
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 @(default_calling_convention="c")
			
 
				 foreign libc {
			
 
				 	// 7.21.4 Operations on files
			
--- a/core/c/libc/stdlib.odin
+++ b/core/c/libc/stdlib.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -33,7 +35,23 @@ when ODIN_OS == "linux" {
 
				 	}
			
 
				 
			
 
				 	MB_CUR_MAX :: #force_inline proc() -> size_t {
			
 
				-		return __ctype_get_mb_cur_max()
			
 
				+		return size_t(__ctype_get_mb_cur_max())
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+when ODIN_OS == "darwin" {
			
 
				+	RAND_MAX :: 0x7fffffff
			
 
				+
			
 
				+	// GLIBC and MUSL only
			
 
				+	@(private="file")
			
 
				+	@(default_calling_convention="c")
			
 
				+	foreign libc {
			
 
				+		___mb_cur_max :: proc() -> int ---
			
 
				+	}
			
 
				+
			
 
				+	MB_CUR_MAX :: #force_inline proc() -> size_t {
			
 
				+		return size_t(___mb_cur_max())
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/core/c/libc/string.odin
+++ b/core/c/libc/string.odin
@@ -6,6 +6,8 @@ import "core:runtime"
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/threads.odin
+++ b/core/c/libc/threads.odin
@@ -136,3 +136,8 @@ when ODIN_OS == "linux" {
 
				 		tss_set       :: proc(key: tss_t, val: rawptr) -> int ---
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+
			
 
				+when ODIN_OS == "darwin" {
			
 
				+	// TODO: find out what this is meant to be!
			
 
				+}
			
--- a/core/c/libc/time.odin
+++ b/core/c/libc/time.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -43,7 +45,7 @@ when ODIN_OS == "windows" {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == "linux" || ODIN_OS == "freebsd" {
			
 
				+when ODIN_OS == "linux" || ODIN_OS == "freebsd" || ODIN_OS == "darwin" {
			
 
				 	@(default_calling_convention="c")
			
 
				 	foreign libc {
			
 
				 		// 7.27.2 Time manipulation functions
			
@@ -75,7 +77,7 @@ when ODIN_OS == "linux" || ODIN_OS == "freebsd" {
 
				 
			
 
				 	tm :: struct {
			
 
				 		tm_sec, tm_min, tm_hour, tm_mday, tm_mon, tm_year, tm_wday, tm_yday, tm_isdst: int,
			
 
				-		_: long,
			
 
				-		_: rawptr,
			
 
				+		tm_gmtoff: long,
			
 
				+		tm_zone: rawptr,
			
 
				 	}
			
 
				 }
			
--- a/core/c/libc/uchar.odin
+++ b/core/c/libc/uchar.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/wchar.odin
+++ b/core/c/libc/wchar.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
--- a/core/c/libc/wctype.odin
+++ b/core/c/libc/wctype.odin
@@ -4,6 +4,8 @@ package libc
 
				 
			
 
				 when ODIN_OS == "windows" {
			
 
				 	foreign import libc "system:libucrt.lib"
			
 
				+} else when ODIN_OS == "darwin" {
			
 
				+	foreign import libc "system:System.framework"
			
 
				 } else {
			
 
				 	foreign import libc "system:c"
			
 
				 }
			
@@ -14,10 +16,15 @@ when ODIN_OS == "windows" {
 
				 }
			
 
				 
			
 
				 when ODIN_OS == "linux" {
			
 
				-	wctrans_t :: distinct rawptr
			
 
				+	wctrans_t :: distinct intptr_t
			
 
				 	wctype_t  :: distinct ulong
			
 
				 }
			
 
				 
			
 
				+when ODIN_OS == "darwin" {
			
 
				+	wctrans_t :: distinct int
			
 
				+	wctype_t  :: distinct u32
			
 
				+}
			
 
				+
			
 
				 @(default_calling_convention="c")
			
 
				 foreign libc {
			
 
				 	// 7.30.2.1 Wide character classification functions
			
--- a/core/crypto/_fiat/README.md
+++ b/core/crypto/_fiat/README.md
@@ -0,0 +1,35 @@
 
				+# fiat
			
 
				+
			
 
				+This package contains low level arithmetic required to implement certain
			
 
				+cryptographic primitives, ported from the [fiat-crypto project][1]
			
 
				+along with some higher-level helpers.
			
 
				+
			
 
				+## Notes
			
 
				+
			
 
				+fiat-crypto gives the choice of 3 licenses for derived works.  The 1-Clause
			
 
				+BSD license is chosen as it is compatible with Odin's existing licensing.
			
 
				+
			
 
				+The routines are intended to be timing-safe, as long as the underlying
			
 
				+integer arithmetic is constant time.  This is true on most systems commonly
			
 
				+used today, with the notable exception of WASM.
			
 
				+
			
 
				+While fiat-crypto provides both output targeting both 32-bit and 64-bit
			
 
				+architectures, only the 64-bit versions were used, as 32-bit architectures
			
 
				+are becoming increasingly uncommon and irrelevant.
			
 
				+
			
 
				+With the current Odin syntax, the Go output is trivially ported in most
			
 
				+cases and was used as the basis of the port.
			
 
				+
			
 
				+In the future, it would be better to auto-generate Odin either directly
			
 
				+by adding an appropriate code-gen backend written in Coq, or perhaps by
			
 
				+parsing the JSON output.
			
 
				+
			
 
				+As this is a port rather than autogenerated output, none of fiat-crypto's
			
 
				+formal verification guarantees apply, unless it is possible to prove binary
			
 
				+equivalence.
			
 
				+
			
 
				+For the most part, alterations to the base fiat-crypto generated code was
			
 
				+kept to a minimum, to aid auditability.  This results in a somewhat
			
 
				+ideosyncratic style, and in some cases minor performance penalties.
			
 
				+
			
 
				+[1]: https://github.com/mit-plv/fiat-crypto
			
--- a/core/crypto/_fiat/fiat.odin
+++ b/core/crypto/_fiat/fiat.odin
@@ -0,0 +1,24 @@
 
				+package fiat
			
 
				+
			
 
				+// This package provides various helpers and types common to all of the
			
 
				+// fiat-crypto derived backends.
			
 
				+
			
 
				+// This code only works on a two's complement system.
			
 
				+#assert((-1 & 3) == 3)
			
 
				+
			
 
				+u1 :: distinct u8
			
 
				+i1 :: distinct i8
			
 
				+
			
 
				+cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
			
 
				+	x1 := (u64(arg1) * 0xffffffffffffffff)
			
 
				+	x2 := ((x1 & arg3) | ((~x1) & arg2))
			
 
				+	out1 = x2
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
			
 
				+	x1 := (u32(arg1) * 0xffffffff)
			
 
				+	x2 := ((x1 & arg3) | ((~x1) & arg2))
			
 
				+	out1 = x2
			
 
				+	return
			
 
				+}
			
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -0,0 +1,138 @@
 
				+package field_curve25519
			
 
				+
			
 
				+import "core:crypto"
			
 
				+import "core:mem"
			
 
				+
			
 
				+fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
			
 
				+	return transmute(^Loose_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
			
 
				+	return transmute(^Tight_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
			
 
				+	// Ignore the unused bit by copying the input and masking the bit off
			
 
				+	// prior to deserialization.
			
 
				+	tmp1: [32]byte = ---
			
 
				+	copy_slice(tmp1[:], arg1[:])
			
 
				+	tmp1[31] &= 127
			
 
				+
			
 
				+	_fe_from_bytes(out1, &tmp1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+}
			
 
				+
			
 
				+fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
			
 
				+	tmp2: [32]byte = ---
			
 
				+
			
 
				+	fe_to_bytes(&tmp2, arg2)
			
 
				+	ret := fe_equal_bytes(arg1, &tmp2)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp2, size_of(tmp2))
			
 
				+
			
 
				+	return ret
			
 
				+}
			
 
				+
			
 
				+fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byte) -> int {
			
 
				+	tmp1: [32]byte = ---
			
 
				+
			
 
				+	fe_to_bytes(&tmp1, arg1)
			
 
				+
			
 
				+	ret := crypto.compare_constant_time(tmp1[:], arg2[:])
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+
			
 
				+	return ret
			
 
				+}
			
 
				+
			
 
				+fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
			
 
				+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
			
 
				+	if arg2 == 0 {
			
 
				+		fe_one(out1)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	fe_carry_square(out1, arg1)
			
 
				+	for _ in 1..<arg2 {
			
 
				+		fe_carry_square(out1, fe_relax_cast(out1))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	fe_opp(fe_relax_cast(out1), arg1)
			
 
				+	fe_carry(out1, fe_relax_cast(out1))
			
 
				+}
			
 
				+
			
 
				+fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
			
 
				+	// Inverse square root taken from Monocypher.
			
 
				+
			
 
				+	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
			
 
				+
			
 
				+	// t0 = x^((p-5)/8)
			
 
				+	// Can be achieved with a simple double & add ladder,
			
 
				+	// but it would be slower.
			
 
				+	fe_carry_pow2k(&tmp1, arg1, 1)
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
			
 
				+	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 5)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 10)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 20)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 10)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 50)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 100)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
			
 
				+
			
 
				+	// quartic = x^((p-1)/4)
			
 
				+	quartic := &tmp2
			
 
				+	fe_carry_square(quartic, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
			
 
				+
			
 
				+	// Serialize quartic once to save on repeated serialization/sanitization.
			
 
				+	quartic_buf: [32]byte = ---
			
 
				+	fe_to_bytes(&quartic_buf, quartic)
			
 
				+	check := &tmp3
			
 
				+
			
 
				+	fe_one(check)
			
 
				+	p1 := fe_equal_bytes(check, &quartic_buf)
			
 
				+	fe_carry_opp(check, check)
			
 
				+	m1 := fe_equal_bytes(check, &quartic_buf)
			
 
				+	fe_carry_opp(check, &SQRT_M1)
			
 
				+	ms := fe_equal_bytes(check, &quartic_buf)
			
 
				+
			
 
				+	// if quartic == -1 or sqrt(-1)
			
 
				+	// then  isr = x^((p-1)/4) * sqrt(-1)
			
 
				+	// else  isr = x^((p-1)/4)
			
 
				+	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
			
 
				+	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+	mem.zero_explicit(&tmp2, size_of(tmp2))
			
 
				+	mem.zero_explicit(&tmp3, size_of(tmp3))
			
 
				+	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
			
 
				+
			
 
				+	return p1 | m1
			
 
				+}
			
 
				+
			
 
				+fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	tmp1: Tight_Field_Element
			
 
				+
			
 
				+	fe_carry_square(&tmp1, arg1)
			
 
				+	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+}
			
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -0,0 +1,616 @@
 
				+// The BSD 1-Clause License (BSD-1-Clause)
			
 
				+//
			
 
				+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+//     1. Redistributions of source code must retain the above copyright
			
 
				+//        notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
			
 
				+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
			
 
				+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
			
 
				+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package field_curve25519
			
 
				+
			
 
				+// The file provides arithmetic on the field Z/(2^255-19) using
			
 
				+// unsaturated 64-bit integer arithmetic.  It is derived primarily
			
 
				+// from the machine generated Golang output from the fiat-crypto project.
			
 
				+//
			
 
				+// While the base implementation is provably correct, this implementation
			
 
				+// makes no such claims as the port and optimizations were done by hand.
			
 
				+// At some point, it may be worth adding support to fiat-crypto for
			
 
				+// generating Odin output.
			
 
				+//
			
 
				+// TODO:
			
 
				+//  * When fiat-crypto supports it, using a saturated 64-bit limbs
			
 
				+//    instead of 51-bit limbs will be faster, though the gains are
			
 
				+//    minimal unless adcx/adox/mulx are used.
			
 
				+
			
 
				+import fiat "core:crypto/_fiat"
			
 
				+import "core:math/bits"
			
 
				+
			
 
				+Loose_Field_Element :: distinct [5]u64
			
 
				+Tight_Field_Element :: distinct [5]u64
			
 
				+
			
 
				+SQRT_M1 := Tight_Field_Element{
			
 
				+	1718705420411056,
			
 
				+	234908883556509,
			
 
				+	2233514472574048,
			
 
				+	2117202627021982,
			
 
				+	765476049583133,
			
 
				+}
			
 
				+
			
 
				+_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((u64(arg1) + arg2) + arg3)
			
 
				+	x2 := (x1 & 0x7ffffffffffff)
			
 
				+	x3 := fiat.u1((x1 >> 51))
			
 
				+	out1 = x2
			
 
				+	out2 = x3
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
			
 
				+	x2 := fiat.i1((x1 >> 51))
			
 
				+	x3 := (u64(x1) & 0x7ffffffffffff)
			
 
				+	out1 = x3
			
 
				+	out2 = (0x0 - fiat.u1(x2))
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
			
 
				+	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
			
 
				+	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
			
 
				+	x8, x7 := bits.mul_u64(arg1[4], (arg2[1] * 0x13))
			
 
				+	x10, x9 := bits.mul_u64(arg1[3], (arg2[4] * 0x13))
			
 
				+	x12, x11 := bits.mul_u64(arg1[3], (arg2[3] * 0x13))
			
 
				+	x14, x13 := bits.mul_u64(arg1[3], (arg2[2] * 0x13))
			
 
				+	x16, x15 := bits.mul_u64(arg1[2], (arg2[4] * 0x13))
			
 
				+	x18, x17 := bits.mul_u64(arg1[2], (arg2[3] * 0x13))
			
 
				+	x20, x19 := bits.mul_u64(arg1[1], (arg2[4] * 0x13))
			
 
				+	x22, x21 := bits.mul_u64(arg1[4], arg2[0])
			
 
				+	x24, x23 := bits.mul_u64(arg1[3], arg2[1])
			
 
				+	x26, x25 := bits.mul_u64(arg1[3], arg2[0])
			
 
				+	x28, x27 := bits.mul_u64(arg1[2], arg2[2])
			
 
				+	x30, x29 := bits.mul_u64(arg1[2], arg2[1])
			
 
				+	x32, x31 := bits.mul_u64(arg1[2], arg2[0])
			
 
				+	x34, x33 := bits.mul_u64(arg1[1], arg2[3])
			
 
				+	x36, x35 := bits.mul_u64(arg1[1], arg2[2])
			
 
				+	x38, x37 := bits.mul_u64(arg1[1], arg2[1])
			
 
				+	x40, x39 := bits.mul_u64(arg1[1], arg2[0])
			
 
				+	x42, x41 := bits.mul_u64(arg1[0], arg2[4])
			
 
				+	x44, x43 := bits.mul_u64(arg1[0], arg2[3])
			
 
				+	x46, x45 := bits.mul_u64(arg1[0], arg2[2])
			
 
				+	x48, x47 := bits.mul_u64(arg1[0], arg2[1])
			
 
				+	x50, x49 := bits.mul_u64(arg1[0], arg2[0])
			
 
				+	x51, x52 := bits.add_u64(x13, x7, u64(0x0))
			
 
				+	x53, _ := bits.add_u64(x14, x8, u64(fiat.u1(x52)))
			
 
				+	x55, x56 := bits.add_u64(x17, x51, u64(0x0))
			
 
				+	x57, _ := bits.add_u64(x18, x53, u64(fiat.u1(x56)))
			
 
				+	x59, x60 := bits.add_u64(x19, x55, u64(0x0))
			
 
				+	x61, _ := bits.add_u64(x20, x57, u64(fiat.u1(x60)))
			
 
				+	x63, x64 := bits.add_u64(x49, x59, u64(0x0))
			
 
				+	x65, _ := bits.add_u64(x50, x61, u64(fiat.u1(x64)))
			
 
				+	x67 := ((x63 >> 51) | ((x65 << 13) & 0xffffffffffffffff))
			
 
				+	x68 := (x63 & 0x7ffffffffffff)
			
 
				+	x69, x70 := bits.add_u64(x23, x21, u64(0x0))
			
 
				+	x71, _ := bits.add_u64(x24, x22, u64(fiat.u1(x70)))
			
 
				+	x73, x74 := bits.add_u64(x27, x69, u64(0x0))
			
 
				+	x75, _ := bits.add_u64(x28, x71, u64(fiat.u1(x74)))
			
 
				+	x77, x78 := bits.add_u64(x33, x73, u64(0x0))
			
 
				+	x79, _ := bits.add_u64(x34, x75, u64(fiat.u1(x78)))
			
 
				+	x81, x82 := bits.add_u64(x41, x77, u64(0x0))
			
 
				+	x83, _ := bits.add_u64(x42, x79, u64(fiat.u1(x82)))
			
 
				+	x85, x86 := bits.add_u64(x25, x1, u64(0x0))
			
 
				+	x87, _ := bits.add_u64(x26, x2, u64(fiat.u1(x86)))
			
 
				+	x89, x90 := bits.add_u64(x29, x85, u64(0x0))
			
 
				+	x91, _ := bits.add_u64(x30, x87, u64(fiat.u1(x90)))
			
 
				+	x93, x94 := bits.add_u64(x35, x89, u64(0x0))
			
 
				+	x95, _ := bits.add_u64(x36, x91, u64(fiat.u1(x94)))
			
 
				+	x97, x98 := bits.add_u64(x43, x93, u64(0x0))
			
 
				+	x99, _ := bits.add_u64(x44, x95, u64(fiat.u1(x98)))
			
 
				+	x101, x102 := bits.add_u64(x9, x3, u64(0x0))
			
 
				+	x103, _ := bits.add_u64(x10, x4, u64(fiat.u1(x102)))
			
 
				+	x105, x106 := bits.add_u64(x31, x101, u64(0x0))
			
 
				+	x107, _ := bits.add_u64(x32, x103, u64(fiat.u1(x106)))
			
 
				+	x109, x110 := bits.add_u64(x37, x105, u64(0x0))
			
 
				+	x111, _ := bits.add_u64(x38, x107, u64(fiat.u1(x110)))
			
 
				+	x113, x114 := bits.add_u64(x45, x109, u64(0x0))
			
 
				+	x115, _ := bits.add_u64(x46, x111, u64(fiat.u1(x114)))
			
 
				+	x117, x118 := bits.add_u64(x11, x5, u64(0x0))
			
 
				+	x119, _ := bits.add_u64(x12, x6, u64(fiat.u1(x118)))
			
 
				+	x121, x122 := bits.add_u64(x15, x117, u64(0x0))
			
 
				+	x123, _ := bits.add_u64(x16, x119, u64(fiat.u1(x122)))
			
 
				+	x125, x126 := bits.add_u64(x39, x121, u64(0x0))
			
 
				+	x127, _ := bits.add_u64(x40, x123, u64(fiat.u1(x126)))
			
 
				+	x129, x130 := bits.add_u64(x47, x125, u64(0x0))
			
 
				+	x131, _ := bits.add_u64(x48, x127, u64(fiat.u1(x130)))
			
 
				+	x133, x134 := bits.add_u64(x67, x129, u64(0x0))
			
 
				+	x135 := (u64(fiat.u1(x134)) + x131)
			
 
				+	x136 := ((x133 >> 51) | ((x135 << 13) & 0xffffffffffffffff))
			
 
				+	x137 := (x133 & 0x7ffffffffffff)
			
 
				+	x138, x139 := bits.add_u64(x136, x113, u64(0x0))
			
 
				+	x140 := (u64(fiat.u1(x139)) + x115)
			
 
				+	x141 := ((x138 >> 51) | ((x140 << 13) & 0xffffffffffffffff))
			
 
				+	x142 := (x138 & 0x7ffffffffffff)
			
 
				+	x143, x144 := bits.add_u64(x141, x97, u64(0x0))
			
 
				+	x145 := (u64(fiat.u1(x144)) + x99)
			
 
				+	x146 := ((x143 >> 51) | ((x145 << 13) & 0xffffffffffffffff))
			
 
				+	x147 := (x143 & 0x7ffffffffffff)
			
 
				+	x148, x149 := bits.add_u64(x146, x81, u64(0x0))
			
 
				+	x150 := (u64(fiat.u1(x149)) + x83)
			
 
				+	x151 := ((x148 >> 51) | ((x150 << 13) & 0xffffffffffffffff))
			
 
				+	x152 := (x148 & 0x7ffffffffffff)
			
 
				+	x153 := (x151 * 0x13)
			
 
				+	x154 := (x68 + x153)
			
 
				+	x155 := (x154 >> 51)
			
 
				+	x156 := (x154 & 0x7ffffffffffff)
			
 
				+	x157 := (x155 + x137)
			
 
				+	x158 := fiat.u1((x157 >> 51))
			
 
				+	x159 := (x157 & 0x7ffffffffffff)
			
 
				+	x160 := (u64(x158) + x142)
			
 
				+	out1[0] = x156
			
 
				+	out1[1] = x159
			
 
				+	out1[2] = x160
			
 
				+	out1[3] = x147
			
 
				+	out1[4] = x152
			
 
				+}
			
 
				+
			
 
				+fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := (arg1[4] * 0x13)
			
 
				+	x2 := (x1 * 0x2)
			
 
				+	x3 := (arg1[4] * 0x2)
			
 
				+	x4 := (arg1[3] * 0x13)
			
 
				+	x5 := (x4 * 0x2)
			
 
				+	x6 := (arg1[3] * 0x2)
			
 
				+	x7 := (arg1[2] * 0x2)
			
 
				+	x8 := (arg1[1] * 0x2)
			
 
				+	x10, x9 := bits.mul_u64(arg1[4], x1)
			
 
				+	x12, x11 := bits.mul_u64(arg1[3], x2)
			
 
				+	x14, x13 := bits.mul_u64(arg1[3], x4)
			
 
				+	x16, x15 := bits.mul_u64(arg1[2], x2)
			
 
				+	x18, x17 := bits.mul_u64(arg1[2], x5)
			
 
				+	x20, x19 := bits.mul_u64(arg1[2], arg1[2])
			
 
				+	x22, x21 := bits.mul_u64(arg1[1], x2)
			
 
				+	x24, x23 := bits.mul_u64(arg1[1], x6)
			
 
				+	x26, x25 := bits.mul_u64(arg1[1], x7)
			
 
				+	x28, x27 := bits.mul_u64(arg1[1], arg1[1])
			
 
				+	x30, x29 := bits.mul_u64(arg1[0], x3)
			
 
				+	x32, x31 := bits.mul_u64(arg1[0], x6)
			
 
				+	x34, x33 := bits.mul_u64(arg1[0], x7)
			
 
				+	x36, x35 := bits.mul_u64(arg1[0], x8)
			
 
				+	x38, x37 := bits.mul_u64(arg1[0], arg1[0])
			
 
				+	x39, x40 := bits.add_u64(x21, x17, u64(0x0))
			
 
				+	x41, _ := bits.add_u64(x22, x18, u64(fiat.u1(x40)))
			
 
				+	x43, x44 := bits.add_u64(x37, x39, u64(0x0))
			
 
				+	x45, _ := bits.add_u64(x38, x41, u64(fiat.u1(x44)))
			
 
				+	x47 := ((x43 >> 51) | ((x45 << 13) & 0xffffffffffffffff))
			
 
				+	x48 := (x43 & 0x7ffffffffffff)
			
 
				+	x49, x50 := bits.add_u64(x23, x19, u64(0x0))
			
 
				+	x51, _ := bits.add_u64(x24, x20, u64(fiat.u1(x50)))
			
 
				+	x53, x54 := bits.add_u64(x29, x49, u64(0x0))
			
 
				+	x55, _ := bits.add_u64(x30, x51, u64(fiat.u1(x54)))
			
 
				+	x57, x58 := bits.add_u64(x25, x9, u64(0x0))
			
 
				+	x59, _ := bits.add_u64(x26, x10, u64(fiat.u1(x58)))
			
 
				+	x61, x62 := bits.add_u64(x31, x57, u64(0x0))
			
 
				+	x63, _ := bits.add_u64(x32, x59, u64(fiat.u1(x62)))
			
 
				+	x65, x66 := bits.add_u64(x27, x11, u64(0x0))
			
 
				+	x67, _ := bits.add_u64(x28, x12, u64(fiat.u1(x66)))
			
 
				+	x69, x70 := bits.add_u64(x33, x65, u64(0x0))
			
 
				+	x71, _ := bits.add_u64(x34, x67, u64(fiat.u1(x70)))
			
 
				+	x73, x74 := bits.add_u64(x15, x13, u64(0x0))
			
 
				+	x75, _ := bits.add_u64(x16, x14, u64(fiat.u1(x74)))
			
 
				+	x77, x78 := bits.add_u64(x35, x73, u64(0x0))
			
 
				+	x79, _ := bits.add_u64(x36, x75, u64(fiat.u1(x78)))
			
 
				+	x81, x82 := bits.add_u64(x47, x77, u64(0x0))
			
 
				+	x83 := (u64(fiat.u1(x82)) + x79)
			
 
				+	x84 := ((x81 >> 51) | ((x83 << 13) & 0xffffffffffffffff))
			
 
				+	x85 := (x81 & 0x7ffffffffffff)
			
 
				+	x86, x87 := bits.add_u64(x84, x69, u64(0x0))
			
 
				+	x88 := (u64(fiat.u1(x87)) + x71)
			
 
				+	x89 := ((x86 >> 51) | ((x88 << 13) & 0xffffffffffffffff))
			
 
				+	x90 := (x86 & 0x7ffffffffffff)
			
 
				+	x91, x92 := bits.add_u64(x89, x61, u64(0x0))
			
 
				+	x93 := (u64(fiat.u1(x92)) + x63)
			
 
				+	x94 := ((x91 >> 51) | ((x93 << 13) & 0xffffffffffffffff))
			
 
				+	x95 := (x91 & 0x7ffffffffffff)
			
 
				+	x96, x97 := bits.add_u64(x94, x53, u64(0x0))
			
 
				+	x98 := (u64(fiat.u1(x97)) + x55)
			
 
				+	x99 := ((x96 >> 51) | ((x98 << 13) & 0xffffffffffffffff))
			
 
				+	x100 := (x96 & 0x7ffffffffffff)
			
 
				+	x101 := (x99 * 0x13)
			
 
				+	x102 := (x48 + x101)
			
 
				+	x103 := (x102 >> 51)
			
 
				+	x104 := (x102 & 0x7ffffffffffff)
			
 
				+	x105 := (x103 + x85)
			
 
				+	x106 := fiat.u1((x105 >> 51))
			
 
				+	x107 := (x105 & 0x7ffffffffffff)
			
 
				+	x108 := (u64(x106) + x90)
			
 
				+	out1[0] = x104
			
 
				+	out1[1] = x107
			
 
				+	out1[2] = x108
			
 
				+	out1[3] = x95
			
 
				+	out1[4] = x100
			
 
				+}
			
 
				+
			
 
				+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := ((x1 >> 51) + arg1[1])
			
 
				+	x3 := ((x2 >> 51) + arg1[2])
			
 
				+	x4 := ((x3 >> 51) + arg1[3])
			
 
				+	x5 := ((x4 >> 51) + arg1[4])
			
 
				+	x6 := ((x1 & 0x7ffffffffffff) + ((x5 >> 51) * 0x13))
			
 
				+	x7 := (u64(fiat.u1((x6 >> 51))) + (x2 & 0x7ffffffffffff))
			
 
				+	x8 := (x6 & 0x7ffffffffffff)
			
 
				+	x9 := (x7 & 0x7ffffffffffff)
			
 
				+	x10 := (u64(fiat.u1((x7 >> 51))) + (x3 & 0x7ffffffffffff))
			
 
				+	x11 := (x4 & 0x7ffffffffffff)
			
 
				+	x12 := (x5 & 0x7ffffffffffff)
			
 
				+	out1[0] = x8
			
 
				+	out1[1] = x9
			
 
				+	out1[2] = x10
			
 
				+	out1[3] = x11
			
 
				+	out1[4] = x12
			
 
				+}
			
 
				+
			
 
				+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := (arg1[0] + arg2[0])
			
 
				+	x2 := (arg1[1] + arg2[1])
			
 
				+	x3 := (arg1[2] + arg2[2])
			
 
				+	x4 := (arg1[3] + arg2[3])
			
 
				+	x5 := (arg1[4] + arg2[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := ((0xfffffffffffda + arg1[0]) - arg2[0])
			
 
				+	x2 := ((0xffffffffffffe + arg1[1]) - arg2[1])
			
 
				+	x3 := ((0xffffffffffffe + arg1[2]) - arg2[2])
			
 
				+	x4 := ((0xffffffffffffe + arg1[3]) - arg2[3])
			
 
				+	x5 := ((0xffffffffffffe + arg1[4]) - arg2[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := (0xfffffffffffda - arg1[0])
			
 
				+	x2 := (0xffffffffffffe - arg1[1])
			
 
				+	x3 := (0xffffffffffffe - arg1[2])
			
 
				+	x4 := (0xffffffffffffe - arg1[3])
			
 
				+	x5 := (0xffffffffffffe - arg1[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
			
 
				+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
			
 
				+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
			
 
				+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
			
 
				+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
			
 
				+	x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
			
 
				+	x1, x2 := _subborrowx_u51(0x0, arg1[0], 0x7ffffffffffed)
			
 
				+	x3, x4 := _subborrowx_u51(x2, arg1[1], 0x7ffffffffffff)
			
 
				+	x5, x6 := _subborrowx_u51(x4, arg1[2], 0x7ffffffffffff)
			
 
				+	x7, x8 := _subborrowx_u51(x6, arg1[3], 0x7ffffffffffff)
			
 
				+	x9, x10 := _subborrowx_u51(x8, arg1[4], 0x7ffffffffffff)
			
 
				+	x11 := fiat.cmovznz_u64(x10, u64(0x0), 0xffffffffffffffff)
			
 
				+	x12, x13 := _addcarryx_u51(0x0, x1, (x11 & 0x7ffffffffffed))
			
 
				+	x14, x15 := _addcarryx_u51(x13, x3, (x11 & 0x7ffffffffffff))
			
 
				+	x16, x17 := _addcarryx_u51(x15, x5, (x11 & 0x7ffffffffffff))
			
 
				+	x18, x19 := _addcarryx_u51(x17, x7, (x11 & 0x7ffffffffffff))
			
 
				+	x20, _ := _addcarryx_u51(x19, x9, (x11 & 0x7ffffffffffff))
			
 
				+	x22 := (x20 << 4)
			
 
				+	x23 := (x18 * u64(0x2))
			
 
				+	x24 := (x16 << 6)
			
 
				+	x25 := (x14 << 3)
			
 
				+	x26 := (u8(x12) & 0xff)
			
 
				+	x27 := (x12 >> 8)
			
 
				+	x28 := (u8(x27) & 0xff)
			
 
				+	x29 := (x27 >> 8)
			
 
				+	x30 := (u8(x29) & 0xff)
			
 
				+	x31 := (x29 >> 8)
			
 
				+	x32 := (u8(x31) & 0xff)
			
 
				+	x33 := (x31 >> 8)
			
 
				+	x34 := (u8(x33) & 0xff)
			
 
				+	x35 := (x33 >> 8)
			
 
				+	x36 := (u8(x35) & 0xff)
			
 
				+	x37 := u8((x35 >> 8))
			
 
				+	x38 := (x25 + u64(x37))
			
 
				+	x39 := (u8(x38) & 0xff)
			
 
				+	x40 := (x38 >> 8)
			
 
				+	x41 := (u8(x40) & 0xff)
			
 
				+	x42 := (x40 >> 8)
			
 
				+	x43 := (u8(x42) & 0xff)
			
 
				+	x44 := (x42 >> 8)
			
 
				+	x45 := (u8(x44) & 0xff)
			
 
				+	x46 := (x44 >> 8)
			
 
				+	x47 := (u8(x46) & 0xff)
			
 
				+	x48 := (x46 >> 8)
			
 
				+	x49 := (u8(x48) & 0xff)
			
 
				+	x50 := u8((x48 >> 8))
			
 
				+	x51 := (x24 + u64(x50))
			
 
				+	x52 := (u8(x51) & 0xff)
			
 
				+	x53 := (x51 >> 8)
			
 
				+	x54 := (u8(x53) & 0xff)
			
 
				+	x55 := (x53 >> 8)
			
 
				+	x56 := (u8(x55) & 0xff)
			
 
				+	x57 := (x55 >> 8)
			
 
				+	x58 := (u8(x57) & 0xff)
			
 
				+	x59 := (x57 >> 8)
			
 
				+	x60 := (u8(x59) & 0xff)
			
 
				+	x61 := (x59 >> 8)
			
 
				+	x62 := (u8(x61) & 0xff)
			
 
				+	x63 := (x61 >> 8)
			
 
				+	x64 := (u8(x63) & 0xff)
			
 
				+	x65 := fiat.u1((x63 >> 8))
			
 
				+	x66 := (x23 + u64(x65))
			
 
				+	x67 := (u8(x66) & 0xff)
			
 
				+	x68 := (x66 >> 8)
			
 
				+	x69 := (u8(x68) & 0xff)
			
 
				+	x70 := (x68 >> 8)
			
 
				+	x71 := (u8(x70) & 0xff)
			
 
				+	x72 := (x70 >> 8)
			
 
				+	x73 := (u8(x72) & 0xff)
			
 
				+	x74 := (x72 >> 8)
			
 
				+	x75 := (u8(x74) & 0xff)
			
 
				+	x76 := (x74 >> 8)
			
 
				+	x77 := (u8(x76) & 0xff)
			
 
				+	x78 := u8((x76 >> 8))
			
 
				+	x79 := (x22 + u64(x78))
			
 
				+	x80 := (u8(x79) & 0xff)
			
 
				+	x81 := (x79 >> 8)
			
 
				+	x82 := (u8(x81) & 0xff)
			
 
				+	x83 := (x81 >> 8)
			
 
				+	x84 := (u8(x83) & 0xff)
			
 
				+	x85 := (x83 >> 8)
			
 
				+	x86 := (u8(x85) & 0xff)
			
 
				+	x87 := (x85 >> 8)
			
 
				+	x88 := (u8(x87) & 0xff)
			
 
				+	x89 := (x87 >> 8)
			
 
				+	x90 := (u8(x89) & 0xff)
			
 
				+	x91 := u8((x89 >> 8))
			
 
				+	out1[0] = x26
			
 
				+	out1[1] = x28
			
 
				+	out1[2] = x30
			
 
				+	out1[3] = x32
			
 
				+	out1[4] = x34
			
 
				+	out1[5] = x36
			
 
				+	out1[6] = x39
			
 
				+	out1[7] = x41
			
 
				+	out1[8] = x43
			
 
				+	out1[9] = x45
			
 
				+	out1[10] = x47
			
 
				+	out1[11] = x49
			
 
				+	out1[12] = x52
			
 
				+	out1[13] = x54
			
 
				+	out1[14] = x56
			
 
				+	out1[15] = x58
			
 
				+	out1[16] = x60
			
 
				+	out1[17] = x62
			
 
				+	out1[18] = x64
			
 
				+	out1[19] = x67
			
 
				+	out1[20] = x69
			
 
				+	out1[21] = x71
			
 
				+	out1[22] = x73
			
 
				+	out1[23] = x75
			
 
				+	out1[24] = x77
			
 
				+	out1[25] = x80
			
 
				+	out1[26] = x82
			
 
				+	out1[27] = x84
			
 
				+	out1[28] = x86
			
 
				+	out1[29] = x88
			
 
				+	out1[30] = x90
			
 
				+	out1[31] = x91
			
 
				+}
			
 
				+
			
 
				+_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
			
 
				+	x1 := (u64(arg1[31]) << 44)
			
 
				+	x2 := (u64(arg1[30]) << 36)
			
 
				+	x3 := (u64(arg1[29]) << 28)
			
 
				+	x4 := (u64(arg1[28]) << 20)
			
 
				+	x5 := (u64(arg1[27]) << 12)
			
 
				+	x6 := (u64(arg1[26]) << 4)
			
 
				+	x7 := (u64(arg1[25]) << 47)
			
 
				+	x8 := (u64(arg1[24]) << 39)
			
 
				+	x9 := (u64(arg1[23]) << 31)
			
 
				+	x10 := (u64(arg1[22]) << 23)
			
 
				+	x11 := (u64(arg1[21]) << 15)
			
 
				+	x12 := (u64(arg1[20]) << 7)
			
 
				+	x13 := (u64(arg1[19]) << 50)
			
 
				+	x14 := (u64(arg1[18]) << 42)
			
 
				+	x15 := (u64(arg1[17]) << 34)
			
 
				+	x16 := (u64(arg1[16]) << 26)
			
 
				+	x17 := (u64(arg1[15]) << 18)
			
 
				+	x18 := (u64(arg1[14]) << 10)
			
 
				+	x19 := (u64(arg1[13]) << 2)
			
 
				+	x20 := (u64(arg1[12]) << 45)
			
 
				+	x21 := (u64(arg1[11]) << 37)
			
 
				+	x22 := (u64(arg1[10]) << 29)
			
 
				+	x23 := (u64(arg1[9]) << 21)
			
 
				+	x24 := (u64(arg1[8]) << 13)
			
 
				+	x25 := (u64(arg1[7]) << 5)
			
 
				+	x26 := (u64(arg1[6]) << 48)
			
 
				+	x27 := (u64(arg1[5]) << 40)
			
 
				+	x28 := (u64(arg1[4]) << 32)
			
 
				+	x29 := (u64(arg1[3]) << 24)
			
 
				+	x30 := (u64(arg1[2]) << 16)
			
 
				+	x31 := (u64(arg1[1]) << 8)
			
 
				+	x32 := arg1[0]
			
 
				+	x33 := (x31 + u64(x32))
			
 
				+	x34 := (x30 + x33)
			
 
				+	x35 := (x29 + x34)
			
 
				+	x36 := (x28 + x35)
			
 
				+	x37 := (x27 + x36)
			
 
				+	x38 := (x26 + x37)
			
 
				+	x39 := (x38 & 0x7ffffffffffff)
			
 
				+	x40 := u8((x38 >> 51))
			
 
				+	x41 := (x25 + u64(x40))
			
 
				+	x42 := (x24 + x41)
			
 
				+	x43 := (x23 + x42)
			
 
				+	x44 := (x22 + x43)
			
 
				+	x45 := (x21 + x44)
			
 
				+	x46 := (x20 + x45)
			
 
				+	x47 := (x46 & 0x7ffffffffffff)
			
 
				+	x48 := u8((x46 >> 51))
			
 
				+	x49 := (x19 + u64(x48))
			
 
				+	x50 := (x18 + x49)
			
 
				+	x51 := (x17 + x50)
			
 
				+	x52 := (x16 + x51)
			
 
				+	x53 := (x15 + x52)
			
 
				+	x54 := (x14 + x53)
			
 
				+	x55 := (x13 + x54)
			
 
				+	x56 := (x55 & 0x7ffffffffffff)
			
 
				+	x57 := u8((x55 >> 51))
			
 
				+	x58 := (x12 + u64(x57))
			
 
				+	x59 := (x11 + x58)
			
 
				+	x60 := (x10 + x59)
			
 
				+	x61 := (x9 + x60)
			
 
				+	x62 := (x8 + x61)
			
 
				+	x63 := (x7 + x62)
			
 
				+	x64 := (x63 & 0x7ffffffffffff)
			
 
				+	x65 := u8((x63 >> 51))
			
 
				+	x66 := (x6 + u64(x65))
			
 
				+	x67 := (x5 + x66)
			
 
				+	x68 := (x4 + x67)
			
 
				+	x69 := (x3 + x68)
			
 
				+	x70 := (x2 + x69)
			
 
				+	x71 := (x1 + x70)
			
 
				+	out1[0] = x39
			
 
				+	out1[1] = x47
			
 
				+	out1[2] = x56
			
 
				+	out1[3] = x64
			
 
				+	out1[4] = x71
			
 
				+}
			
 
				+
			
 
				+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
			
 
				+	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
			
 
				+	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
			
 
				+	x8, x7 := bits.mul_u64(0x1db42, arg1[1])
			
 
				+	x10, x9 := bits.mul_u64(0x1db42, arg1[0])
			
 
				+	x11 := ((x9 >> 51) | ((x10 << 13) & 0xffffffffffffffff))
			
 
				+	x12 := (x9 & 0x7ffffffffffff)
			
 
				+	x13, x14 := bits.add_u64(x11, x7, u64(0x0))
			
 
				+	x15 := (u64(fiat.u1(x14)) + x8)
			
 
				+	x16 := ((x13 >> 51) | ((x15 << 13) & 0xffffffffffffffff))
			
 
				+	x17 := (x13 & 0x7ffffffffffff)
			
 
				+	x18, x19 := bits.add_u64(x16, x5, u64(0x0))
			
 
				+	x20 := (u64(fiat.u1(x19)) + x6)
			
 
				+	x21 := ((x18 >> 51) | ((x20 << 13) & 0xffffffffffffffff))
			
 
				+	x22 := (x18 & 0x7ffffffffffff)
			
 
				+	x23, x24 := bits.add_u64(x21, x3, u64(0x0))
			
 
				+	x25 := (u64(fiat.u1(x24)) + x4)
			
 
				+	x26 := ((x23 >> 51) | ((x25 << 13) & 0xffffffffffffffff))
			
 
				+	x27 := (x23 & 0x7ffffffffffff)
			
 
				+	x28, x29 := bits.add_u64(x26, x1, u64(0x0))
			
 
				+	x30 := (u64(fiat.u1(x29)) + x2)
			
 
				+	x31 := ((x28 >> 51) | ((x30 << 13) & 0xffffffffffffffff))
			
 
				+	x32 := (x28 & 0x7ffffffffffff)
			
 
				+	x33 := (x31 * 0x13)
			
 
				+	x34 := (x12 + x33)
			
 
				+	x35 := fiat.u1((x34 >> 51))
			
 
				+	x36 := (x34 & 0x7ffffffffffff)
			
 
				+	x37 := (u64(x35) + x17)
			
 
				+	x38 := fiat.u1((x37 >> 51))
			
 
				+	x39 := (x37 & 0x7ffffffffffff)
			
 
				+	x40 := (u64(x38) + x22)
			
 
				+	out1[0] = x36
			
 
				+	out1[1] = x39
			
 
				+	out1[2] = x40
			
 
				+	out1[3] = x27
			
 
				+	out1[4] = x32
			
 
				+}
			
 
				+
			
 
				+// The following routines were added by hand, and do not come from fiat-crypto.
			
 
				+
			
 
				+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 0
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+}
			
 
				+
			
 
				+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 1
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+}
			
 
				+
			
 
				+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
			
 
				+	mask := -u64(arg1)
			
 
				+	x := (out1[0] ~ out2[0]) & mask
			
 
				+	x1, y1 := out1[0] ~ x, out2[0] ~ x
			
 
				+	x = (out1[1] ~ out2[1]) & mask
			
 
				+	x2, y2 := out1[1] ~ x, out2[1] ~ x
			
 
				+	x = (out1[2] ~ out2[2]) & mask
			
 
				+	x3, y3 := out1[2] ~ x, out2[2] ~ x
			
 
				+	x = (out1[3] ~ out2[3]) & mask
			
 
				+	x4, y4 := out1[3] ~ x, out2[3] ~ x
			
 
				+	x = (out1[4] ~ out2[4]) & mask
			
 
				+	x5, y5 := out1[4] ~ x, out2[4] ~ x
			
 
				+	out1[0], out2[0] = x1, y1
			
 
				+	out1[1], out2[1] = x2, y2
			
 
				+	out1[2], out2[2] = x3, y3
			
 
				+	out1[3], out2[3] = x4, y4
			
 
				+	out1[4], out2[4] = x5, y5
			
 
				+}
			
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -0,0 +1,66 @@
 
				+package field_poly1305
			
 
				+
			
 
				+import "core:crypto/util"
			
 
				+import "core:mem"
			
 
				+
			
 
				+fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
			
 
				+	return transmute(^Loose_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
			
 
				+	return transmute(^Tight_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte, sanitize: bool = true) {
			
 
				+	// fiat-crypto's deserialization routine effectively processes a
			
 
				+	// single byte at a time, and wants 256-bits of input for a value
			
 
				+	// that will be 128-bits or 129-bits.
			
 
				+	//
			
 
				+	// This is somewhat cumbersome to use, so at a minimum a wrapper
			
 
				+	// makes implementing the actual MAC block processing considerably
			
 
				+	// neater.
			
 
				+
			
 
				+	assert(len(arg1) == 16)
			
 
				+
			
 
				+	when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
			
 
				+		// While it may be unwise to do deserialization here on our
			
 
				+		// own when fiat-crypto provides equivalent functionality,
			
 
				+		// doing it this way provides a little under 3x performance
			
 
				+		// improvement when optimization is enabled.
			
 
				+		src_p := transmute(^[2]u64)(&arg1[0])
			
 
				+		lo := src_p[0]
			
 
				+		hi := src_p[1]
			
 
				+
			
 
				+		// This is inspired by poly1305-donna, though adjustments were
			
 
				+		// made since a Tight_Field_Element's limbs are 44-bits, 43-bits,
			
 
				+		// and 43-bits wide.
			
 
				+		//
			
 
				+		// Note: This could be transplated into fe_from_u64s, but that
			
 
				+		// code is called once per MAC, and is non-criticial path.
			
 
				+		hibit := u64(arg2) << 41 // arg2 << 128
			
 
				+		out1[0] = lo & 0xfffffffffff
			
 
				+		out1[1] = ((lo >> 44) | (hi << 20)) & 0x7ffffffffff
			
 
				+		out1[2] = ((hi >> 23) & 0x7ffffffffff) | hibit
			
 
				+	} else {
			
 
				+		tmp: [32]byte
			
 
				+		copy_slice(tmp[0:16], arg1[:])
			
 
				+		tmp[16] = arg2
			
 
				+
			
 
				+		_fe_from_bytes(out1, &tmp)
			
 
				+		if sanitize {
			
 
				+			// This is used to deserialize `s` which is confidential.
			
 
				+			mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
			
 
				+	tmp: [32]byte
			
 
				+	util.PUT_U64_LE(tmp[0:8], lo)
			
 
				+	util.PUT_U64_LE(tmp[8:16], hi)
			
 
				+
			
 
				+	_fe_from_bytes(out1, &tmp)
			
 
				+
			
 
				+	// This routine is only used to deserialize `r` which is confidential.
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+}
			
--- a/core/crypto/_fiat/field_poly1305/field4344.odin
+++ b/core/crypto/_fiat/field_poly1305/field4344.odin
@@ -0,0 +1,356 @@
 
				+// The BSD 1-Clause License (BSD-1-Clause)
			
 
				+//
			
 
				+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+//     1. Redistributions of source code must retain the above copyright
			
 
				+//        notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
			
 
				+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
			
 
				+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
			
 
				+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package field_poly1305
			
 
				+
			
 
				+// This file provides arithmetic on the field Z/(2^130 - 5) using
			
 
				+// unsaturated 64-bit integer arithmetic.  It is derived primarily
			
 
				+// from the machine generate Golang output from the fiat-crypto project.
			
 
				+//
			
 
				+// While the base implementation is provably correct, this implementation
			
 
				+// makes no such claims as the port and optimizations were done by hand.
			
 
				+// At some point, it may be worth adding support to fiat-crypto for
			
 
				+// generating Odin output.
			
 
				+
			
 
				+import fiat "core:crypto/_fiat"
			
 
				+import "core:math/bits"
			
 
				+
			
 
				+Loose_Field_Element :: distinct [3]u64
			
 
				+Tight_Field_Element :: distinct [3]u64
			
 
				+
			
 
				+_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((u64(arg1) + arg2) + arg3)
			
 
				+	x2 := (x1 & 0xfffffffffff)
			
 
				+	x3 := fiat.u1((x1 >> 44))
			
 
				+	out1 = x2
			
 
				+	out2 = x3
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
			
 
				+	x2 := fiat.i1((x1 >> 44))
			
 
				+	x3 := (u64(x1) & 0xfffffffffff)
			
 
				+	out1 = x3
			
 
				+	out2 = (0x0 - fiat.u1(x2))
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((u64(arg1) + arg2) + arg3)
			
 
				+	x2 := (x1 & 0x7ffffffffff)
			
 
				+	x3 := fiat.u1((x1 >> 43))
			
 
				+	out1 = x2
			
 
				+	out2 = x3
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
			
 
				+	x2 := fiat.i1((x1 >> 43))
			
 
				+	x3 := (u64(x1) & 0x7ffffffffff)
			
 
				+	out1 = x3
			
 
				+	out2 = (0x0 - fiat.u1(x2))
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
			
 
				+	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
			
 
				+	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
			
 
				+	x8, x7 := bits.mul_u64(arg1[2], arg2[0])
			
 
				+	x10, x9 := bits.mul_u64(arg1[1], (arg2[1] * 0x2))
			
 
				+	x12, x11 := bits.mul_u64(arg1[1], arg2[0])
			
 
				+	x14, x13 := bits.mul_u64(arg1[0], arg2[2])
			
 
				+	x16, x15 := bits.mul_u64(arg1[0], arg2[1])
			
 
				+	x18, x17 := bits.mul_u64(arg1[0], arg2[0])
			
 
				+	x19, x20 := bits.add_u64(x5, x3, u64(0x0))
			
 
				+	x21, _ := bits.add_u64(x6, x4, u64(fiat.u1(x20)))
			
 
				+	x23, x24 := bits.add_u64(x17, x19, u64(0x0))
			
 
				+	x25, _ := bits.add_u64(x18, x21, u64(fiat.u1(x24)))
			
 
				+	x27 := ((x23 >> 44) | ((x25 << 20) & 0xffffffffffffffff))
			
 
				+	x28 := (x23 & 0xfffffffffff)
			
 
				+	x29, x30 := bits.add_u64(x9, x7, u64(0x0))
			
 
				+	x31, _ := bits.add_u64(x10, x8, u64(fiat.u1(x30)))
			
 
				+	x33, x34 := bits.add_u64(x13, x29, u64(0x0))
			
 
				+	x35, _ := bits.add_u64(x14, x31, u64(fiat.u1(x34)))
			
 
				+	x37, x38 := bits.add_u64(x11, x1, u64(0x0))
			
 
				+	x39, _ := bits.add_u64(x12, x2, u64(fiat.u1(x38)))
			
 
				+	x41, x42 := bits.add_u64(x15, x37, u64(0x0))
			
 
				+	x43, _ := bits.add_u64(x16, x39, u64(fiat.u1(x42)))
			
 
				+	x45, x46 := bits.add_u64(x27, x41, u64(0x0))
			
 
				+	x47 := (u64(fiat.u1(x46)) + x43)
			
 
				+	x48 := ((x45 >> 43) | ((x47 << 21) & 0xffffffffffffffff))
			
 
				+	x49 := (x45 & 0x7ffffffffff)
			
 
				+	x50, x51 := bits.add_u64(x48, x33, u64(0x0))
			
 
				+	x52 := (u64(fiat.u1(x51)) + x35)
			
 
				+	x53 := ((x50 >> 43) | ((x52 << 21) & 0xffffffffffffffff))
			
 
				+	x54 := (x50 & 0x7ffffffffff)
			
 
				+	x55 := (x53 * 0x5)
			
 
				+	x56 := (x28 + x55)
			
 
				+	x57 := (x56 >> 44)
			
 
				+	x58 := (x56 & 0xfffffffffff)
			
 
				+	x59 := (x57 + x49)
			
 
				+	x60 := fiat.u1((x59 >> 43))
			
 
				+	x61 := (x59 & 0x7ffffffffff)
			
 
				+	x62 := (u64(x60) + x54)
			
 
				+	out1[0] = x58
			
 
				+	out1[1] = x61
			
 
				+	out1[2] = x62
			
 
				+}
			
 
				+
			
 
				+fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := (arg1[2] * 0x5)
			
 
				+	x2 := (x1 * 0x2)
			
 
				+	x3 := (arg1[2] * 0x2)
			
 
				+	x4 := (arg1[1] * 0x2)
			
 
				+	x6, x5 := bits.mul_u64(arg1[2], x1)
			
 
				+	x8, x7 := bits.mul_u64(arg1[1], (x2 * 0x2))
			
 
				+	x10, x9 := bits.mul_u64(arg1[1], (arg1[1] * 0x2))
			
 
				+	x12, x11 := bits.mul_u64(arg1[0], x3)
			
 
				+	x14, x13 := bits.mul_u64(arg1[0], x4)
			
 
				+	x16, x15 := bits.mul_u64(arg1[0], arg1[0])
			
 
				+	x17, x18 := bits.add_u64(x15, x7, u64(0x0))
			
 
				+	x19, _ := bits.add_u64(x16, x8, u64(fiat.u1(x18)))
			
 
				+	x21 := ((x17 >> 44) | ((x19 << 20) & 0xffffffffffffffff))
			
 
				+	x22 := (x17 & 0xfffffffffff)
			
 
				+	x23, x24 := bits.add_u64(x11, x9, u64(0x0))
			
 
				+	x25, _ := bits.add_u64(x12, x10, u64(fiat.u1(x24)))
			
 
				+	x27, x28 := bits.add_u64(x13, x5, u64(0x0))
			
 
				+	x29, _ := bits.add_u64(x14, x6, u64(fiat.u1(x28)))
			
 
				+	x31, x32 := bits.add_u64(x21, x27, u64(0x0))
			
 
				+	x33 := (u64(fiat.u1(x32)) + x29)
			
 
				+	x34 := ((x31 >> 43) | ((x33 << 21) & 0xffffffffffffffff))
			
 
				+	x35 := (x31 & 0x7ffffffffff)
			
 
				+	x36, x37 := bits.add_u64(x34, x23, u64(0x0))
			
 
				+	x38 := (u64(fiat.u1(x37)) + x25)
			
 
				+	x39 := ((x36 >> 43) | ((x38 << 21) & 0xffffffffffffffff))
			
 
				+	x40 := (x36 & 0x7ffffffffff)
			
 
				+	x41 := (x39 * 0x5)
			
 
				+	x42 := (x22 + x41)
			
 
				+	x43 := (x42 >> 44)
			
 
				+	x44 := (x42 & 0xfffffffffff)
			
 
				+	x45 := (x43 + x35)
			
 
				+	x46 := fiat.u1((x45 >> 43))
			
 
				+	x47 := (x45 & 0x7ffffffffff)
			
 
				+	x48 := (u64(x46) + x40)
			
 
				+	out1[0] = x44
			
 
				+	out1[1] = x47
			
 
				+	out1[2] = x48
			
 
				+}
			
 
				+
			
 
				+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := ((x1 >> 44) + arg1[1])
			
 
				+	x3 := ((x2 >> 43) + arg1[2])
			
 
				+	x4 := ((x1 & 0xfffffffffff) + ((x3 >> 43) * 0x5))
			
 
				+	x5 := (u64(fiat.u1((x4 >> 44))) + (x2 & 0x7ffffffffff))
			
 
				+	x6 := (x4 & 0xfffffffffff)
			
 
				+	x7 := (x5 & 0x7ffffffffff)
			
 
				+	x8 := (u64(fiat.u1((x5 >> 43))) + (x3 & 0x7ffffffffff))
			
 
				+	out1[0] = x6
			
 
				+	out1[1] = x7
			
 
				+	out1[2] = x8
			
 
				+}
			
 
				+
			
 
				+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := (arg1[0] + arg2[0])
			
 
				+	x2 := (arg1[1] + arg2[1])
			
 
				+	x3 := (arg1[2] + arg2[2])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := ((0x1ffffffffff6 + arg1[0]) - arg2[0])
			
 
				+	x2 := ((0xffffffffffe + arg1[1]) - arg2[1])
			
 
				+	x3 := ((0xffffffffffe + arg1[2]) - arg2[2])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := (0x1ffffffffff6 - arg1[0])
			
 
				+	x2 := (0xffffffffffe - arg1[1])
			
 
				+	x3 := (0xffffffffffe - arg1[2])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
			
 
				+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
			
 
				+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
			
 
				+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
			
 
				+	x1, x2 := _subborrowx_u44(0x0, arg1[0], 0xffffffffffb)
			
 
				+	x3, x4 := _subborrowx_u43(x2, arg1[1], 0x7ffffffffff)
			
 
				+	x5, x6 := _subborrowx_u43(x4, arg1[2], 0x7ffffffffff)
			
 
				+	x7 := fiat.cmovznz_u64(x6, u64(0x0), 0xffffffffffffffff)
			
 
				+	x8, x9 := _addcarryx_u44(0x0, x1, (x7 & 0xffffffffffb))
			
 
				+	x10, x11 := _addcarryx_u43(x9, x3, (x7 & 0x7ffffffffff))
			
 
				+	x12, _ := _addcarryx_u43(x11, x5, (x7 & 0x7ffffffffff))
			
 
				+	x14 := (x12 << 7)
			
 
				+	x15 := (x10 << 4)
			
 
				+	x16 := (u8(x8) & 0xff)
			
 
				+	x17 := (x8 >> 8)
			
 
				+	x18 := (u8(x17) & 0xff)
			
 
				+	x19 := (x17 >> 8)
			
 
				+	x20 := (u8(x19) & 0xff)
			
 
				+	x21 := (x19 >> 8)
			
 
				+	x22 := (u8(x21) & 0xff)
			
 
				+	x23 := (x21 >> 8)
			
 
				+	x24 := (u8(x23) & 0xff)
			
 
				+	x25 := u8((x23 >> 8))
			
 
				+	x26 := (x15 + u64(x25))
			
 
				+	x27 := (u8(x26) & 0xff)
			
 
				+	x28 := (x26 >> 8)
			
 
				+	x29 := (u8(x28) & 0xff)
			
 
				+	x30 := (x28 >> 8)
			
 
				+	x31 := (u8(x30) & 0xff)
			
 
				+	x32 := (x30 >> 8)
			
 
				+	x33 := (u8(x32) & 0xff)
			
 
				+	x34 := (x32 >> 8)
			
 
				+	x35 := (u8(x34) & 0xff)
			
 
				+	x36 := u8((x34 >> 8))
			
 
				+	x37 := (x14 + u64(x36))
			
 
				+	x38 := (u8(x37) & 0xff)
			
 
				+	x39 := (x37 >> 8)
			
 
				+	x40 := (u8(x39) & 0xff)
			
 
				+	x41 := (x39 >> 8)
			
 
				+	x42 := (u8(x41) & 0xff)
			
 
				+	x43 := (x41 >> 8)
			
 
				+	x44 := (u8(x43) & 0xff)
			
 
				+	x45 := (x43 >> 8)
			
 
				+	x46 := (u8(x45) & 0xff)
			
 
				+	x47 := (x45 >> 8)
			
 
				+	x48 := (u8(x47) & 0xff)
			
 
				+	x49 := u8((x47 >> 8))
			
 
				+	out1[0] = x16
			
 
				+	out1[1] = x18
			
 
				+	out1[2] = x20
			
 
				+	out1[3] = x22
			
 
				+	out1[4] = x24
			
 
				+	out1[5] = x27
			
 
				+	out1[6] = x29
			
 
				+	out1[7] = x31
			
 
				+	out1[8] = x33
			
 
				+	out1[9] = x35
			
 
				+	out1[10] = x38
			
 
				+	out1[11] = x40
			
 
				+	out1[12] = x42
			
 
				+	out1[13] = x44
			
 
				+	out1[14] = x46
			
 
				+	out1[15] = x48
			
 
				+	out1[16] = x49
			
 
				+}
			
 
				+
			
 
				+_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
			
 
				+	x1 := (u64(arg1[16]) << 41)
			
 
				+	x2 := (u64(arg1[15]) << 33)
			
 
				+	x3 := (u64(arg1[14]) << 25)
			
 
				+	x4 := (u64(arg1[13]) << 17)
			
 
				+	x5 := (u64(arg1[12]) << 9)
			
 
				+	x6 := (u64(arg1[11]) * u64(0x2))
			
 
				+	x7 := (u64(arg1[10]) << 36)
			
 
				+	x8 := (u64(arg1[9]) << 28)
			
 
				+	x9 := (u64(arg1[8]) << 20)
			
 
				+	x10 := (u64(arg1[7]) << 12)
			
 
				+	x11 := (u64(arg1[6]) << 4)
			
 
				+	x12 := (u64(arg1[5]) << 40)
			
 
				+	x13 := (u64(arg1[4]) << 32)
			
 
				+	x14 := (u64(arg1[3]) << 24)
			
 
				+	x15 := (u64(arg1[2]) << 16)
			
 
				+	x16 := (u64(arg1[1]) << 8)
			
 
				+	x17 := arg1[0]
			
 
				+	x18 := (x16 + u64(x17))
			
 
				+	x19 := (x15 + x18)
			
 
				+	x20 := (x14 + x19)
			
 
				+	x21 := (x13 + x20)
			
 
				+	x22 := (x12 + x21)
			
 
				+	x23 := (x22 & 0xfffffffffff)
			
 
				+	x24 := u8((x22 >> 44))
			
 
				+	x25 := (x11 + u64(x24))
			
 
				+	x26 := (x10 + x25)
			
 
				+	x27 := (x9 + x26)
			
 
				+	x28 := (x8 + x27)
			
 
				+	x29 := (x7 + x28)
			
 
				+	x30 := (x29 & 0x7ffffffffff)
			
 
				+	x31 := fiat.u1((x29 >> 43))
			
 
				+	x32 := (x6 + u64(x31))
			
 
				+	x33 := (x5 + x32)
			
 
				+	x34 := (x4 + x33)
			
 
				+	x35 := (x3 + x34)
			
 
				+	x36 := (x2 + x35)
			
 
				+	x37 := (x1 + x36)
			
 
				+	out1[0] = x23
			
 
				+	out1[1] = x30
			
 
				+	out1[2] = x37
			
 
				+}
			
 
				+
			
 
				+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+// The following routines were added by hand, and do not come from fiat-crypto.
			
 
				+
			
 
				+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 0
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+}
			
 
				+
			
 
				+fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+}
			
 
				+
			
 
				+fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
			
 
				+	mask := -u64(arg1)
			
 
				+	x := (out1[0] ~ out2[0]) & mask
			
 
				+	x1, y1 := out1[0] ~ x, out2[0] ~ x
			
 
				+	x = (out1[1] ~ out2[1]) & mask
			
 
				+	x2, y2 := out1[1] ~ x, out2[1] ~ x
			
 
				+	x = (out1[2] ~ out2[2]) & mask
			
 
				+	x3, y3 := out1[2] ~ x, out2[2] ~ x
			
 
				+	out1[0], out2[0] = x1, y1
			
 
				+	out1[1], out2[1] = x2, y2
			
 
				+	out1[2], out2[2] = x3, y3
			
 
				+}
			
--- a/core/crypto/chacha20/chacha20.odin
+++ b/core/crypto/chacha20/chacha20.odin
@@ -0,0 +1,581 @@
 
				+package chacha20
			
 
				+
			
 
				+import "core:crypto/util"
			
 
				+import "core:math/bits"
			
 
				+import "core:mem"
			
 
				+
			
 
				+KEY_SIZE :: 32
			
 
				+NONCE_SIZE :: 12
			
 
				+XNONCE_SIZE :: 24
			
 
				+
			
 
				+_MAX_CTR_IETF :: 0xffffffff
			
 
				+
			
 
				+_BLOCK_SIZE :: 64
			
 
				+_STATE_SIZE_U32 :: 16
			
 
				+_ROUNDS :: 20
			
 
				+
			
 
				+_SIGMA_0 : u32 : 0x61707865
			
 
				+_SIGMA_1 : u32 : 0x3320646e
			
 
				+_SIGMA_2 : u32 : 0x79622d32
			
 
				+_SIGMA_3 : u32 : 0x6b206574
			
 
				+
			
 
				+Context :: struct {
			
 
				+	_s: [_STATE_SIZE_U32]u32,
			
 
				+
			
 
				+	_buffer: [_BLOCK_SIZE]byte,
			
 
				+	_off: int,
			
 
				+
			
 
				+	_is_ietf_flavor: bool,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+init :: proc (ctx: ^Context, key, nonce: []byte) {
			
 
				+	if len(key) != KEY_SIZE {
			
 
				+		panic("crypto/chacha20: invalid ChaCha20 key size")
			
 
				+	}
			
 
				+	if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
			
 
				+		panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
			
 
				+	}
			
 
				+
			
 
				+	k, n := key, nonce
			
 
				+
			
 
				+	// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
			
 
				+	is_xchacha := len(nonce) == XNONCE_SIZE
			
 
				+	if is_xchacha {
			
 
				+		sub_key := ctx._buffer[:KEY_SIZE]
			
 
				+		_hchacha20(sub_key, k, n)
			
 
				+		k = sub_key
			
 
				+		n = n[16:24]
			
 
				+	}
			
 
				+
			
 
				+	ctx._s[0] = _SIGMA_0
			
 
				+	ctx._s[1] = _SIGMA_1
			
 
				+	ctx._s[2] = _SIGMA_2
			
 
				+	ctx._s[3] = _SIGMA_3
			
 
				+	ctx._s[4] = util.U32_LE(k[0:4])
			
 
				+	ctx._s[5] = util.U32_LE(k[4:8])
			
 
				+	ctx._s[6] = util.U32_LE(k[8:12])
			
 
				+	ctx._s[7] = util.U32_LE(k[12:16])
			
 
				+	ctx._s[8] = util.U32_LE(k[16:20])
			
 
				+	ctx._s[9] = util.U32_LE(k[20:24])
			
 
				+	ctx._s[10] = util.U32_LE(k[24:28])
			
 
				+	ctx._s[11] = util.U32_LE(k[28:32])
			
 
				+	ctx._s[12] = 0
			
 
				+	if !is_xchacha {
			
 
				+		ctx._s[13] = util.U32_LE(n[0:4])
			
 
				+		ctx._s[14] = util.U32_LE(n[4:8])
			
 
				+		ctx._s[15] = util.U32_LE(n[8:12])
			
 
				+	} else {
			
 
				+		ctx._s[13] = 0
			
 
				+		ctx._s[14] = util.U32_LE(n[0:4])
			
 
				+		ctx._s[15] = util.U32_LE(n[4:8])
			
 
				+
			
 
				+		// The sub-key is stored in the keystream buffer.  While
			
 
				+		// this will be overwritten in most circumstances, explicitly
			
 
				+		// clear it out early.
			
 
				+		mem.zero_explicit(&ctx._buffer, KEY_SIZE)
			
 
				+	}
			
 
				+
			
 
				+	ctx._off = _BLOCK_SIZE
			
 
				+	ctx._is_ietf_flavor = !is_xchacha
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+seek :: proc (ctx: ^Context, block_nr: u64) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	if ctx._is_ietf_flavor {
			
 
				+		if block_nr > _MAX_CTR_IETF {
			
 
				+			panic("crypto/chacha20: attempted to seek past maximum counter")
			
 
				+		}
			
 
				+	} else {
			
 
				+		ctx._s[13] = u32(block_nr >> 32)
			
 
				+	}
			
 
				+	ctx._s[12] = u32(block_nr)
			
 
				+	ctx._off = _BLOCK_SIZE
			
 
				+}
			
 
				+
			
 
				+xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	// TODO: Enforcing that dst and src alias exactly or not at all
			
 
				+	// is a good idea, though odd aliasing should be extremely uncommon.
			
 
				+
			
 
				+	src, dst := src, dst
			
 
				+	if dst_len := len(dst); dst_len < len(src) {
			
 
				+		src = src[:dst_len]
			
 
				+	}
			
 
				+
			
 
				+	for remaining := len(src); remaining > 0; {
			
 
				+		// Process multiple blocks at once
			
 
				+		if ctx._off == _BLOCK_SIZE {
			
 
				+			if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
			
 
				+				direct_bytes := nr_blocks * _BLOCK_SIZE
			
 
				+				_do_blocks(ctx, dst, src, nr_blocks)
			
 
				+				remaining -= direct_bytes
			
 
				+				if remaining == 0 {
			
 
				+					return
			
 
				+				}
			
 
				+				dst = dst[direct_bytes:]
			
 
				+				src = src[direct_bytes:]
			
 
				+			}
			
 
				+
			
 
				+			// If there is a partial block, generate and buffer 1 block
			
 
				+			// worth of keystream.
			
 
				+			_do_blocks(ctx, ctx._buffer[:], nil, 1)
			
 
				+			ctx._off = 0
			
 
				+		}
			
 
				+
			
 
				+		// Process partial blocks from the buffered keystream.
			
 
				+		to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
			
 
				+		buffered_keystream := ctx._buffer[ctx._off:]
			
 
				+		for i := 0; i < to_xor; i = i + 1 {
			
 
				+			dst[i] = buffered_keystream[i] ~ src[i]
			
 
				+		}
			
 
				+		ctx._off += to_xor
			
 
				+		dst = dst[to_xor:]
			
 
				+		src = src[to_xor:]
			
 
				+		remaining -= to_xor
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	dst := dst
			
 
				+	for remaining := len(dst); remaining > 0; {
			
 
				+		// Process multiple blocks at once
			
 
				+		if ctx._off == _BLOCK_SIZE {
			
 
				+			if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
			
 
				+				direct_bytes := nr_blocks * _BLOCK_SIZE
			
 
				+				_do_blocks(ctx, dst, nil, nr_blocks)
			
 
				+				remaining -= direct_bytes
			
 
				+				if remaining == 0 {
			
 
				+					return
			
 
				+				}
			
 
				+				dst = dst[direct_bytes:]
			
 
				+			}
			
 
				+
			
 
				+			// If there is a partial block, generate and buffer 1 block
			
 
				+			// worth of keystream.
			
 
				+			_do_blocks(ctx, ctx._buffer[:], nil, 1)
			
 
				+			ctx._off = 0
			
 
				+		}
			
 
				+
			
 
				+		// Process partial blocks from the buffered keystream.
			
 
				+		to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
			
 
				+		buffered_keystream := ctx._buffer[ctx._off:]
			
 
				+		copy(dst[:to_copy], buffered_keystream[:to_copy])
			
 
				+		ctx._off += to_copy
			
 
				+		dst = dst[to_copy:]
			
 
				+		remaining -= to_copy
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+reset :: proc (ctx: ^Context) {
			
 
				+	mem.zero_explicit(&ctx._s, size_of(ctx._s))
			
 
				+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
			
 
				+
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
 
				+
			
 
				+_do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) {
			
 
				+	// Enforce the maximum consumed keystream per nonce.
			
 
				+	//
			
 
				+	// While all modern "standard" definitions of ChaCha20 use
			
 
				+	// the IETF 32-bit counter, for XChaCha20 most common
			
 
				+	// implementations allow for a 64-bit counter.
			
 
				+	//
			
 
				+	// Honestly, the answer here is "use a MRAE primitive", but
			
 
				+	// go with common practice in the case of XChaCha20.
			
 
				+	if ctx._is_ietf_flavor {
			
 
				+		if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
			
 
				+			panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
			
 
				+		}
			
 
				+	} else {
			
 
				+		ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
			
 
				+		if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
			
 
				+			panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	dst, src := dst, src
			
 
				+	x := &ctx._s
			
 
				+	for n := 0; n < nr_blocks; n = n + 1 {
			
 
				+		x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
			
 
				+		x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
			
 
				+
			
 
				+		for i := _ROUNDS; i > 0; i = i - 2 {
			
 
				+			// Even when forcing inlining manually inlining all of
			
 
				+			// these is decently faster.
			
 
				+
			
 
				+			// quarterround(x, 0, 4, 8, 12)
			
 
				+			x0 += x4
			
 
				+			x12 ~= x0
			
 
				+			x12 = util.ROTL32(x12, 16)
			
 
				+			x8 += x12
			
 
				+			x4 ~= x8
			
 
				+			x4 = util.ROTL32(x4, 12)
			
 
				+			x0 += x4
			
 
				+			x12 ~= x0
			
 
				+			x12 = util.ROTL32(x12, 8)
			
 
				+			x8 += x12
			
 
				+			x4 ~= x8
			
 
				+			x4 = util.ROTL32(x4, 7)
			
 
				+
			
 
				+			// quarterround(x, 1, 5, 9, 13)
			
 
				+			x1 += x5
			
 
				+			x13 ~= x1
			
 
				+			x13 = util.ROTL32(x13, 16)
			
 
				+			x9 += x13
			
 
				+			x5 ~= x9
			
 
				+			x5 = util.ROTL32(x5, 12)
			
 
				+			x1 += x5
			
 
				+			x13 ~= x1
			
 
				+			x13 = util.ROTL32(x13, 8)
			
 
				+			x9 += x13
			
 
				+			x5 ~= x9
			
 
				+			x5 = util.ROTL32(x5, 7)
			
 
				+
			
 
				+			// quarterround(x, 2, 6, 10, 14)
			
 
				+			x2 += x6
			
 
				+			x14 ~= x2
			
 
				+			x14 = util.ROTL32(x14, 16)
			
 
				+			x10 += x14
			
 
				+			x6 ~= x10
			
 
				+			x6 = util.ROTL32(x6, 12)
			
 
				+			x2 += x6
			
 
				+			x14 ~= x2
			
 
				+			x14 = util.ROTL32(x14, 8)
			
 
				+			x10 += x14
			
 
				+			x6 ~= x10
			
 
				+			x6 = util.ROTL32(x6, 7)
			
 
				+
			
 
				+			// quarterround(x, 3, 7, 11, 15)
			
 
				+			x3 += x7
			
 
				+			x15 ~= x3
			
 
				+			x15 = util.ROTL32(x15, 16)
			
 
				+			x11 += x15
			
 
				+			x7 ~= x11
			
 
				+			x7 = util.ROTL32(x7, 12)
			
 
				+			x3 += x7
			
 
				+			x15 ~= x3
			
 
				+			x15 = util.ROTL32(x15, 8)
			
 
				+			x11 += x15
			
 
				+			x7 ~= x11
			
 
				+			x7 = util.ROTL32(x7, 7)
			
 
				+
			
 
				+			// quarterround(x, 0, 5, 10, 15)
			
 
				+			x0 += x5
			
 
				+			x15 ~= x0
			
 
				+			x15 = util.ROTL32(x15, 16)
			
 
				+			x10 += x15
			
 
				+			x5 ~= x10
			
 
				+			x5 = util.ROTL32(x5, 12)
			
 
				+			x0 += x5
			
 
				+			x15 ~= x0
			
 
				+			x15 = util.ROTL32(x15, 8)
			
 
				+			x10 += x15
			
 
				+			x5 ~= x10
			
 
				+			x5 = util.ROTL32(x5, 7)
			
 
				+
			
 
				+			// quarterround(x, 1, 6, 11, 12)
			
 
				+			x1 += x6
			
 
				+			x12 ~= x1
			
 
				+			x12 = util.ROTL32(x12, 16)
			
 
				+			x11 += x12
			
 
				+			x6 ~= x11
			
 
				+			x6 = util.ROTL32(x6, 12)
			
 
				+			x1 += x6
			
 
				+			x12 ~= x1
			
 
				+			x12 = util.ROTL32(x12, 8)
			
 
				+			x11 += x12
			
 
				+			x6 ~= x11
			
 
				+			x6 = util.ROTL32(x6, 7)
			
 
				+
			
 
				+			// quarterround(x, 2, 7, 8, 13)
			
 
				+			x2 += x7
			
 
				+			x13 ~= x2
			
 
				+			x13 = util.ROTL32(x13, 16)
			
 
				+			x8 += x13
			
 
				+			x7 ~= x8
			
 
				+			x7 = util.ROTL32(x7, 12)
			
 
				+			x2 += x7
			
 
				+			x13 ~= x2
			
 
				+			x13 = util.ROTL32(x13, 8)
			
 
				+			x8 += x13
			
 
				+			x7 ~= x8
			
 
				+			x7 = util.ROTL32(x7, 7)
			
 
				+
			
 
				+			// quarterround(x, 3, 4, 9, 14)
			
 
				+			x3 += x4
			
 
				+			x14 ~= x3
			
 
				+			x14 = util.ROTL32(x14, 16)
			
 
				+			x9 += x14
			
 
				+			x4 ~= x9
			
 
				+			x4 = util.ROTL32(x4, 12)
			
 
				+			x3 += x4
			
 
				+			x14 ~= x3
			
 
				+			x14 = util.ROTL32(x14, 8)
			
 
				+			x9 += x14
			
 
				+			x4 ~= x9
			
 
				+			x4 = util.ROTL32(x4, 7)
			
 
				+		}
			
 
				+
			
 
				+		x0 += _SIGMA_0
			
 
				+		x1 += _SIGMA_1
			
 
				+		x2 += _SIGMA_2
			
 
				+		x3 += _SIGMA_3
			
 
				+		x4 += x[4]
			
 
				+		x5 += x[5]
			
 
				+		x6 += x[6]
			
 
				+		x7 += x[7]
			
 
				+		x8 += x[8]
			
 
				+		x9 += x[9]
			
 
				+		x10 += x[10]
			
 
				+		x11 += x[11]
			
 
				+		x12 += x[12]
			
 
				+		x13 += x[13]
			
 
				+		x14 += x[14]
			
 
				+		x15 += x[15]
			
 
				+
			
 
				+		// While the "correct" answer to getting more performance out of
			
 
				+		// this is "use vector operations", support for that is currently
			
 
				+		// a work in progress/to be designed.
			
 
				+		//
			
 
				+		// Until dedicated assembly can be written leverage the fact that
			
 
				+		// the callers of this routine ensure that src/dst are valid.
			
 
				+
			
 
				+		when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
			
 
				+			// util.PUT_U32_LE/util.U32_LE are not required on little-endian
			
 
				+			// systems that also happen to not be strict about aligned
			
 
				+			// memory access.
			
 
				+
			
 
				+			dst_p := transmute(^[16]u32)(&dst[0])
			
 
				+			if src != nil {
			
 
				+				src_p := transmute(^[16]u32)(&src[0])
			
 
				+				dst_p[0] = src_p[0] ~ x0
			
 
				+				dst_p[1] = src_p[1] ~ x1
			
 
				+				dst_p[2] = src_p[2] ~ x2
			
 
				+				dst_p[3] = src_p[3] ~ x3
			
 
				+				dst_p[4] = src_p[4] ~ x4
			
 
				+				dst_p[5] = src_p[5] ~ x5
			
 
				+				dst_p[6] = src_p[6] ~ x6
			
 
				+				dst_p[7] = src_p[7] ~ x7
			
 
				+				dst_p[8] = src_p[8] ~ x8
			
 
				+				dst_p[9] = src_p[9] ~ x9
			
 
				+				dst_p[10] = src_p[10] ~ x10
			
 
				+				dst_p[11] = src_p[11] ~ x11
			
 
				+				dst_p[12] = src_p[12] ~ x12
			
 
				+				dst_p[13] = src_p[13] ~ x13
			
 
				+				dst_p[14] = src_p[14] ~ x14
			
 
				+				dst_p[15] = src_p[15] ~ x15
			
 
				+				src = src[_BLOCK_SIZE:]
			
 
				+			} else {
			
 
				+				dst_p[0] = x0
			
 
				+				dst_p[1] = x1
			
 
				+				dst_p[2] = x2
			
 
				+				dst_p[3] = x3
			
 
				+				dst_p[4] = x4
			
 
				+				dst_p[5] = x5
			
 
				+				dst_p[6] = x6
			
 
				+				dst_p[7] = x7
			
 
				+				dst_p[8] = x8
			
 
				+				dst_p[9] = x9
			
 
				+				dst_p[10] = x10
			
 
				+				dst_p[11] = x11
			
 
				+				dst_p[12] = x12
			
 
				+				dst_p[13] = x13
			
 
				+				dst_p[14] = x14
			
 
				+				dst_p[15] = x15
			
 
				+			}
			
 
				+			dst = dst[_BLOCK_SIZE:]
			
 
				+		} else {
			
 
				+			#no_bounds_check {
			
 
				+				if src != nil {
			
 
				+					util.PUT_U32_LE(dst[0:4], util.U32_LE(src[0:4]) ~ x0)
			
 
				+					util.PUT_U32_LE(dst[4:8], util.U32_LE(src[4:8]) ~ x1)
			
 
				+					util.PUT_U32_LE(dst[8:12], util.U32_LE(src[8:12]) ~ x2)
			
 
				+					util.PUT_U32_LE(dst[12:16], util.U32_LE(src[12:16]) ~ x3)
			
 
				+					util.PUT_U32_LE(dst[16:20], util.U32_LE(src[16:20]) ~ x4)
			
 
				+					util.PUT_U32_LE(dst[20:24], util.U32_LE(src[20:24]) ~ x5)
			
 
				+					util.PUT_U32_LE(dst[24:28], util.U32_LE(src[24:28]) ~ x6)
			
 
				+					util.PUT_U32_LE(dst[28:32], util.U32_LE(src[28:32]) ~ x7)
			
 
				+					util.PUT_U32_LE(dst[32:36], util.U32_LE(src[32:36]) ~ x8)
			
 
				+					util.PUT_U32_LE(dst[36:40], util.U32_LE(src[36:40]) ~ x9)
			
 
				+					util.PUT_U32_LE(dst[40:44], util.U32_LE(src[40:44]) ~ x10)
			
 
				+					util.PUT_U32_LE(dst[44:48], util.U32_LE(src[44:48]) ~ x11)
			
 
				+					util.PUT_U32_LE(dst[48:52], util.U32_LE(src[48:52]) ~ x12)
			
 
				+					util.PUT_U32_LE(dst[52:56], util.U32_LE(src[52:56]) ~ x13)
			
 
				+					util.PUT_U32_LE(dst[56:60], util.U32_LE(src[56:60]) ~ x14)
			
 
				+					util.PUT_U32_LE(dst[60:64], util.U32_LE(src[60:64]) ~ x15)
			
 
				+					src = src[_BLOCK_SIZE:]
			
 
				+				} else {
			
 
				+					util.PUT_U32_LE(dst[0:4], x0)
			
 
				+					util.PUT_U32_LE(dst[4:8], x1)
			
 
				+					util.PUT_U32_LE(dst[8:12], x2)
			
 
				+					util.PUT_U32_LE(dst[12:16], x3)
			
 
				+					util.PUT_U32_LE(dst[16:20], x4)
			
 
				+					util.PUT_U32_LE(dst[20:24], x5)
			
 
				+					util.PUT_U32_LE(dst[24:28], x6)
			
 
				+					util.PUT_U32_LE(dst[28:32], x7)
			
 
				+					util.PUT_U32_LE(dst[32:36], x8)
			
 
				+					util.PUT_U32_LE(dst[36:40], x9)
			
 
				+					util.PUT_U32_LE(dst[40:44], x10)
			
 
				+					util.PUT_U32_LE(dst[44:48], x11)
			
 
				+					util.PUT_U32_LE(dst[48:52], x12)
			
 
				+					util.PUT_U32_LE(dst[52:56], x13)
			
 
				+					util.PUT_U32_LE(dst[56:60], x14)
			
 
				+					util.PUT_U32_LE(dst[60:64], x15)
			
 
				+				}
			
 
				+				dst = dst[_BLOCK_SIZE:]
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Increment the counter.  Overflow checking is done upon
			
 
				+		// entry into the routine, so a 64-bit increment safely
			
 
				+		// covers both cases.
			
 
				+		new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
			
 
				+		x[12] = u32(new_ctr)
			
 
				+		x[13] = u32(new_ctr >> 32)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+_hchacha20 :: proc (dst, key, nonce: []byte) {
			
 
				+	x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
			
 
				+	x4 := util.U32_LE(key[0:4])
			
 
				+	x5 := util.U32_LE(key[4:8])
			
 
				+	x6 := util.U32_LE(key[8:12])
			
 
				+	x7 := util.U32_LE(key[12:16])
			
 
				+	x8 := util.U32_LE(key[16:20])
			
 
				+	x9 := util.U32_LE(key[20:24])
			
 
				+	x10 := util.U32_LE(key[24:28])
			
 
				+	x11 := util.U32_LE(key[28:32])
			
 
				+	x12 := util.U32_LE(nonce[0:4])
			
 
				+	x13 := util.U32_LE(nonce[4:8])
			
 
				+	x14 := util.U32_LE(nonce[8:12])
			
 
				+	x15 := util.U32_LE(nonce[12:16])
			
 
				+
			
 
				+	for i := _ROUNDS; i > 0; i = i - 2 {
			
 
				+		// quarterround(x, 0, 4, 8, 12)
			
 
				+		x0 += x4
			
 
				+		x12 ~= x0
			
 
				+		x12 = util.ROTL32(x12, 16)
			
 
				+		x8 += x12
			
 
				+		x4 ~= x8
			
 
				+		x4 = util.ROTL32(x4, 12)
			
 
				+		x0 += x4
			
 
				+		x12 ~= x0
			
 
				+		x12 = util.ROTL32(x12, 8)
			
 
				+		x8 += x12
			
 
				+		x4 ~= x8
			
 
				+		x4 = util.ROTL32(x4, 7)
			
 
				+
			
 
				+		// quarterround(x, 1, 5, 9, 13)
			
 
				+		x1 += x5
			
 
				+		x13 ~= x1
			
 
				+		x13 = util.ROTL32(x13, 16)
			
 
				+		x9 += x13
			
 
				+		x5 ~= x9
			
 
				+		x5 = util.ROTL32(x5, 12)
			
 
				+		x1 += x5
			
 
				+		x13 ~= x1
			
 
				+		x13 = util.ROTL32(x13, 8)
			
 
				+		x9 += x13
			
 
				+		x5 ~= x9
			
 
				+		x5 = util.ROTL32(x5, 7)
			
 
				+
			
 
				+		// quarterround(x, 2, 6, 10, 14)
			
 
				+		x2 += x6
			
 
				+		x14 ~= x2
			
 
				+		x14 = util.ROTL32(x14, 16)
			
 
				+		x10 += x14
			
 
				+		x6 ~= x10
			
 
				+		x6 = util.ROTL32(x6, 12)
			
 
				+		x2 += x6
			
 
				+		x14 ~= x2
			
 
				+		x14 = util.ROTL32(x14, 8)
			
 
				+		x10 += x14
			
 
				+		x6 ~= x10
			
 
				+		x6 = util.ROTL32(x6, 7)
			
 
				+
			
 
				+		// quarterround(x, 3, 7, 11, 15)
			
 
				+		x3 += x7
			
 
				+		x15 ~= x3
			
 
				+		x15 = util.ROTL32(x15, 16)
			
 
				+		x11 += x15
			
 
				+		x7 ~= x11
			
 
				+		x7 = util.ROTL32(x7, 12)
			
 
				+		x3 += x7
			
 
				+		x15 ~= x3
			
 
				+		x15 = util.ROTL32(x15, 8)
			
 
				+		x11 += x15
			
 
				+		x7 ~= x11
			
 
				+		x7 = util.ROTL32(x7, 7)
			
 
				+
			
 
				+		// quarterround(x, 0, 5, 10, 15)
			
 
				+		x0 += x5
			
 
				+		x15 ~= x0
			
 
				+		x15 = util.ROTL32(x15, 16)
			
 
				+		x10 += x15
			
 
				+		x5 ~= x10
			
 
				+		x5 = util.ROTL32(x5, 12)
			
 
				+		x0 += x5
			
 
				+		x15 ~= x0
			
 
				+		x15 = util.ROTL32(x15, 8)
			
 
				+		x10 += x15
			
 
				+		x5 ~= x10
			
 
				+		x5 = util.ROTL32(x5, 7)
			
 
				+
			
 
				+		// quarterround(x, 1, 6, 11, 12)
			
 
				+		x1 += x6
			
 
				+		x12 ~= x1
			
 
				+		x12 = util.ROTL32(x12, 16)
			
 
				+		x11 += x12
			
 
				+		x6 ~= x11
			
 
				+		x6 = util.ROTL32(x6, 12)
			
 
				+		x1 += x6
			
 
				+		x12 ~= x1
			
 
				+		x12 = util.ROTL32(x12, 8)
			
 
				+		x11 += x12
			
 
				+		x6 ~= x11
			
 
				+		x6 = util.ROTL32(x6, 7)
			
 
				+
			
 
				+		// quarterround(x, 2, 7, 8, 13)
			
 
				+		x2 += x7
			
 
				+		x13 ~= x2
			
 
				+		x13 = util.ROTL32(x13, 16)
			
 
				+		x8 += x13
			
 
				+		x7 ~= x8
			
 
				+		x7 = util.ROTL32(x7, 12)
			
 
				+		x2 += x7
			
 
				+		x13 ~= x2
			
 
				+		x13 = util.ROTL32(x13, 8)
			
 
				+		x8 += x13
			
 
				+		x7 ~= x8
			
 
				+		x7 = util.ROTL32(x7, 7)
			
 
				+
			
 
				+		// quarterround(x, 3, 4, 9, 14)
			
 
				+		x3 += x4
			
 
				+		x14 ~= x3
			
 
				+		x14 = util.ROTL32(x14, 16)
			
 
				+		x9 += x14
			
 
				+		x4 ~= x9
			
 
				+		x4 = util.ROTL32(x4, 12)
			
 
				+		x3 += x4
			
 
				+		x14 ~= x3
			
 
				+		x14 = util.ROTL32(x14, 8)
			
 
				+		x9 += x14
			
 
				+		x4 ~= x9
			
 
				+		x4 = util.ROTL32(x4, 7)
			
 
				+	}
			
 
				+
			
 
				+	util.PUT_U32_LE(dst[0:4], x0)
			
 
				+	util.PUT_U32_LE(dst[4:8], x1)
			
 
				+	util.PUT_U32_LE(dst[8:12], x2)
			
 
				+	util.PUT_U32_LE(dst[12:16], x3)
			
 
				+	util.PUT_U32_LE(dst[16:20], x12)
			
 
				+	util.PUT_U32_LE(dst[20:24], x13)
			
 
				+	util.PUT_U32_LE(dst[24:28], x14)
			
 
				+	util.PUT_U32_LE(dst[28:32], x15)
			
 
				+}
			
--- a/core/crypto/chacha20poly1305/chacha20poly1305.odin
+++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin
@@ -0,0 +1,146 @@
 
				+package chacha20poly1305
			
 
				+
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/chacha20"
			
 
				+import "core:crypto/poly1305"
			
 
				+import "core:crypto/util"
			
 
				+import "core:mem"
			
 
				+
			
 
				+KEY_SIZE :: chacha20.KEY_SIZE
			
 
				+NONCE_SIZE :: chacha20.NONCE_SIZE
			
 
				+TAG_SIZE :: poly1305.TAG_SIZE
			
 
				+
			
 
				+_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
			
 
				+
			
 
				+_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
			
 
				+	if len(tag) != TAG_SIZE {
			
 
				+		panic("crypto/chacha20poly1305: invalid destination tag size")
			
 
				+	}
			
 
				+	if len(key) != KEY_SIZE {
			
 
				+		panic("crypto/chacha20poly1305: invalid key size")
			
 
				+	}
			
 
				+	if len(nonce) != NONCE_SIZE {
			
 
				+		panic("crypto/chacha20poly1305: invalid nonce size")
			
 
				+	}
			
 
				+
			
 
				+	#assert(size_of(int) == 8 || size_of(int) <= 4)
			
 
				+	when size_of(int) == 8 {
			
 
				+		// A_MAX = 2^64 - 1 due to the length field limit.
			
 
				+		// P_MAX = 64 * (2^32 - 1) due to the IETF ChaCha20 counter limit.
			
 
				+		//
			
 
				+		// A_MAX is limited by size_of(int), so there is no need to
			
 
				+		// enforce it. P_MAX only needs to be checked on 64-bit targets,
			
 
				+		// for reasons that should be obvious.
			
 
				+		if text_len := len(text); text_len > _P_MAX {
			
 
				+			panic("crypto/chacha20poly1305: oversized src data")
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+_PAD: [16]byte
			
 
				+_update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
			
 
				+	if pad_len := 16 - (x_len & (16-1)); pad_len != 16 {
			
 
				+		poly1305.update(ctx, _PAD[:pad_len])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
			
 
				+	_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
			
 
				+	if len(ciphertext) != len(plaintext) {
			
 
				+		panic("crypto/chacha20poly1305: invalid destination ciphertext size")
			
 
				+	}
			
 
				+
			
 
				+	stream_ctx: chacha20.Context = ---
			
 
				+	chacha20.init(&stream_ctx, key, nonce)
			
 
				+
			
 
				+	// otk = poly1305_key_gen(key, nonce)
			
 
				+	otk: [poly1305.KEY_SIZE]byte = ---
			
 
				+	chacha20.keystream_bytes(&stream_ctx, otk[:])
			
 
				+	mac_ctx: poly1305.Context = ---
			
 
				+	poly1305.init(&mac_ctx, otk[:])
			
 
				+	mem.zero_explicit(&otk, size_of(otk))
			
 
				+
			
 
				+	aad_len, ciphertext_len := len(aad), len(ciphertext)
			
 
				+
			
 
				+	// There is nothing preventing aad and ciphertext from overlapping
			
 
				+	// so auth the AAD before encrypting (slightly different from the
			
 
				+	// RFC, since the RFC encrypts into a new buffer).
			
 
				+	//
			
 
				+	// mac_data = aad | pad16(aad)
			
 
				+	poly1305.update(&mac_ctx, aad)
			
 
				+	_update_mac_pad16(&mac_ctx, aad_len)
			
 
				+
			
 
				+	// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
			
 
				+	chacha20.seek(&stream_ctx, 1)
			
 
				+	chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
			
 
				+	chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
			
 
				+
			
 
				+	// mac_data |= ciphertext | pad16(ciphertext)
			
 
				+	poly1305.update(&mac_ctx, ciphertext)
			
 
				+	_update_mac_pad16(&mac_ctx, ciphertext_len)
			
 
				+
			
 
				+	// mac_data |= num_to_8_le_bytes(aad.length)
			
 
				+	// mac_data |= num_to_8_le_bytes(ciphertext.length)
			
 
				+	l_buf := otk[0:16] // Reuse the scratch buffer.
			
 
				+	util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
			
 
				+	util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
			
 
				+	poly1305.update(&mac_ctx, l_buf)
			
 
				+
			
 
				+	// tag = poly1305_mac(mac_data, otk)
			
 
				+	poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
			
 
				+}
			
 
				+
			
 
				+decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
			
 
				+	_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
			
 
				+	if len(ciphertext) != len(plaintext) {
			
 
				+		panic("crypto/chacha20poly1305: invalid destination plaintext size")
			
 
				+	}
			
 
				+
			
 
				+	// Note: Unlike encrypt, this can fail early, so use defer for
			
 
				+	// sanitization rather than assuming control flow reaches certain
			
 
				+	// points where needed.
			
 
				+
			
 
				+	stream_ctx: chacha20.Context = ---
			
 
				+	chacha20.init(&stream_ctx, key, nonce)
			
 
				+
			
 
				+	// otk = poly1305_key_gen(key, nonce)
			
 
				+	otk: [poly1305.KEY_SIZE]byte = ---
			
 
				+	chacha20.keystream_bytes(&stream_ctx, otk[:])
			
 
				+	defer chacha20.reset(&stream_ctx)
			
 
				+
			
 
				+	mac_ctx: poly1305.Context = ---
			
 
				+	poly1305.init(&mac_ctx, otk[:])
			
 
				+	defer mem.zero_explicit(&otk, size_of(otk))
			
 
				+
			
 
				+	aad_len, ciphertext_len := len(aad), len(ciphertext)
			
 
				+
			
 
				+	// mac_data = aad | pad16(aad)
			
 
				+	// mac_data |= ciphertext | pad16(ciphertext)
			
 
				+	// mac_data |= num_to_8_le_bytes(aad.length)
			
 
				+	// mac_data |= num_to_8_le_bytes(ciphertext.length)
			
 
				+	poly1305.update(&mac_ctx, aad)
			
 
				+	_update_mac_pad16(&mac_ctx, aad_len)
			
 
				+	poly1305.update(&mac_ctx, ciphertext)
			
 
				+	_update_mac_pad16(&mac_ctx, ciphertext_len)
			
 
				+	l_buf := otk[0:16] // Reuse the scratch buffer.
			
 
				+	util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
			
 
				+	util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
			
 
				+	poly1305.update(&mac_ctx, l_buf)
			
 
				+
			
 
				+	// tag = poly1305_mac(mac_data, otk)
			
 
				+	derived_tag := otk[0:poly1305.TAG_SIZE] // Reuse the scratch buffer again.
			
 
				+	poly1305.final(&mac_ctx, derived_tag) // Implicitly sanitizes context.
			
 
				+
			
 
				+	// Validate the tag in constant time.
			
 
				+	if crypto.compare_constant_time(tag, derived_tag) != 1 {
			
 
				+		// Zero out the plaintext, as a defense in depth measure.
			
 
				+		mem.zero_explicit(raw_data(plaintext), ciphertext_len)
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
			
 
				+	chacha20.seek(&stream_ctx, 1)
			
 
				+	chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
			
 
				+
			
 
				+	return true
			
 
				+}
			
--- a/core/crypto/crypto.odin
+++ b/core/crypto/crypto.odin
@@ -0,0 +1,52 @@
 
				+package crypto
			
 
				+
			
 
				+import "core:mem"
			
 
				+
			
 
				+// compare_constant_time returns 1 iff a and b are equal, 0 otherwise.
			
 
				+//
			
 
				+// The execution time of this routine is constant regardless of the contents
			
 
				+// of the slices being compared, as long as the length of the slices is equal.
			
 
				+// If the length of the two slices is different, it will early-return 0.
			
 
				+compare_constant_time :: proc "contextless" (a, b: []byte) -> int {
			
 
				+	// If the length of the slices is different, early return.
			
 
				+	//
			
 
				+	// This leaks the fact that the slices have a different length,
			
 
				+	// but the routine is primarily intended for comparing things
			
 
				+	// like MACS and password digests.
			
 
				+	n := len(a)
			
 
				+	if n != len(b) {
			
 
				+		return 0
			
 
				+	}
			
 
				+
			
 
				+	return compare_byte_ptrs_constant_time(raw_data(a), raw_data(b), n)
			
 
				+}
			
 
				+
			
 
				+// compare_byte_ptrs_constant_time returns 1 iff the bytes pointed to by
			
 
				+// a and b are equal, 0 otherwise.
			
 
				+//
			
 
				+// The execution time of this routine is constant regardless of the
			
 
				+// contents of the memory being compared.
			
 
				+compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> int {
			
 
				+	x := mem.slice_ptr(a, n)
			
 
				+	y := mem.slice_ptr(b, n)
			
 
				+
			
 
				+	v: byte
			
 
				+	for i in 0..<n {
			
 
				+		v |= x[i] ~ y[i]
			
 
				+	}
			
 
				+
			
 
				+	// After the loop, v == 0 iff a == b.  The subtraction will underflow
			
 
				+	// iff v == 0, setting the sign-bit, which gets returned.
			
 
				+	return int((u32(v)-1) >> 31)
			
 
				+}
			
 
				+
			
 
				+// rand_bytes fills the dst buffer with cryptographic entropy taken from
			
 
				+// the system entropy source.  This routine will block if the system entropy
			
 
				+// source is not ready yet.  All system entropy source failures are treated
			
 
				+// as catastrophic, resulting in a panic.
			
 
				+rand_bytes :: proc (dst: []byte) {
			
 
				+	// zero-fill the buffer first
			
 
				+	mem.zero_explicit(raw_data(dst), len(dst))
			
 
				+
			
 
				+	_rand_bytes(dst)
			
 
				+}
			
--- a/core/crypto/poly1305/poly1305.odin
+++ b/core/crypto/poly1305/poly1305.odin
@@ -0,0 +1,163 @@
 
				+package poly1305
			
 
				+
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/util"
			
 
				+import field "core:crypto/_fiat/field_poly1305"
			
 
				+import "core:mem"
			
 
				+
			
 
				+KEY_SIZE :: 32
			
 
				+TAG_SIZE :: 16
			
 
				+
			
 
				+_BLOCK_SIZE :: 16
			
 
				+
			
 
				+sum :: proc (dst, msg, key: []byte) {
			
 
				+	ctx: Context = ---
			
 
				+
			
 
				+	init(&ctx, key)
			
 
				+	update(&ctx, msg)
			
 
				+	final(&ctx, dst)
			
 
				+}
			
 
				+
			
 
				+verify :: proc (tag, msg, key: []byte) -> bool {
			
 
				+	ctx: Context = ---
			
 
				+	derived_tag: [16]byte = ---
			
 
				+
			
 
				+	if len(tag) != TAG_SIZE {
			
 
				+		panic("crypto/poly1305: invalid tag size")
			
 
				+	}
			
 
				+
			
 
				+	init(&ctx, key)
			
 
				+	update(&ctx, msg)
			
 
				+	final(&ctx, derived_tag[:])
			
 
				+
			
 
				+	return crypto.compare_constant_time(derived_tag[:], tag) == 1
			
 
				+}
			
 
				+
			
 
				+Context :: struct {
			
 
				+	_r: field.Tight_Field_Element,
			
 
				+	_a: field.Tight_Field_Element,
			
 
				+	_s: field.Tight_Field_Element,
			
 
				+
			
 
				+	_buffer: [_BLOCK_SIZE]byte,
			
 
				+	_leftover: int,
			
 
				+
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+init :: proc (ctx: ^Context, key: []byte) {
			
 
				+	if len(key) != KEY_SIZE {
			
 
				+		panic("crypto/poly1305: invalid key size")
			
 
				+	}
			
 
				+
			
 
				+	// r = le_bytes_to_num(key[0..15])
			
 
				+	// r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff)
			
 
				+	tmp_lo := util.U64_LE(key[0:8]) & 0x0ffffffc0fffffff
			
 
				+	tmp_hi := util.U64_LE(key[8:16]) & 0xffffffc0ffffffc
			
 
				+	field.fe_from_u64s(&ctx._r, tmp_lo, tmp_hi)
			
 
				+
			
 
				+	// s = le_bytes_to_num(key[16..31])
			
 
				+	field.fe_from_bytes(&ctx._s, key[16:32], 0)
			
 
				+
			
 
				+	// a = 0
			
 
				+	field.fe_zero(&ctx._a)
			
 
				+
			
 
				+	// No leftover in buffer
			
 
				+	ctx._leftover = 0
			
 
				+
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+update :: proc (ctx: ^Context, data: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	msg := data
			
 
				+	msg_len := len(data)
			
 
				+
			
 
				+	// Handle leftover
			
 
				+	if ctx._leftover > 0 {
			
 
				+		want := min(_BLOCK_SIZE - ctx._leftover, msg_len)
			
 
				+		copy_slice(ctx._buffer[ctx._leftover:], msg[:want])
			
 
				+		msg_len = msg_len - want
			
 
				+		msg = msg[want:]
			
 
				+		ctx._leftover = ctx._leftover + want
			
 
				+		if ctx._leftover < _BLOCK_SIZE {
			
 
				+			return
			
 
				+		}
			
 
				+		_blocks(ctx, ctx._buffer[:])
			
 
				+		ctx._leftover = 0
			
 
				+	}
			
 
				+
			
 
				+	// Process full blocks
			
 
				+	if msg_len >= _BLOCK_SIZE {
			
 
				+		want := msg_len & (~int(_BLOCK_SIZE - 1))
			
 
				+		_blocks(ctx, msg[:want])
			
 
				+		msg = msg[want:]
			
 
				+		msg_len = msg_len - want
			
 
				+	}
			
 
				+
			
 
				+	// Store leftover
			
 
				+	if msg_len > 0 {
			
 
				+		// TODO: While -donna does it this way, I'm fairly sure that
			
 
				+		// `ctx._leftover == 0` is an invariant at this point.
			
 
				+		copy(ctx._buffer[ctx._leftover:], msg)
			
 
				+		ctx._leftover = ctx._leftover + msg_len
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+final :: proc (ctx: ^Context, dst: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	if len(dst) != TAG_SIZE {
			
 
				+		panic("poly1305: invalid destination tag size")
			
 
				+	}
			
 
				+
			
 
				+	// Process remaining block
			
 
				+	if ctx._leftover > 0 {
			
 
				+		ctx._buffer[ctx._leftover] = 1
			
 
				+		for i := ctx._leftover + 1; i < _BLOCK_SIZE; i = i + 1 {
			
 
				+			ctx._buffer[i] = 0
			
 
				+		}
			
 
				+		_blocks(ctx, ctx._buffer[:], true)
			
 
				+	}
			
 
				+
			
 
				+	// a += s
			
 
				+	field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &ctx._s) // _a unreduced
			
 
				+	field.fe_carry(&ctx._a, field.fe_relax_cast(&ctx._a)) // _a reduced
			
 
				+
			
 
				+	// return num_to_16_le_bytes(a)
			
 
				+	tmp: [32]byte = ---
			
 
				+	field.fe_to_bytes(&tmp, &ctx._a)
			
 
				+	copy_slice(dst, tmp[0:16])
			
 
				+
			
 
				+	reset(ctx)
			
 
				+}
			
 
				+
			
 
				+reset :: proc (ctx: ^Context) {
			
 
				+	mem.zero_explicit(&ctx._r, size_of(ctx._r))
			
 
				+	mem.zero_explicit(&ctx._a, size_of(ctx._a))
			
 
				+	mem.zero_explicit(&ctx._s, size_of(ctx._s))
			
 
				+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
			
 
				+
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
 
				+
			
 
				+_blocks :: proc (ctx: ^Context, msg: []byte, final := false) {
			
 
				+	n: field.Tight_Field_Element = ---
			
 
				+	final_byte := byte(!final)
			
 
				+
			
 
				+	data := msg
			
 
				+	data_len := len(data)
			
 
				+	for data_len >= _BLOCK_SIZE {
			
 
				+		// n = le_bytes_to_num(msg[((i-1)*16)..*i*16] | [0x01])
			
 
				+		field.fe_from_bytes(&n, data[:_BLOCK_SIZE], final_byte, false)
			
 
				+
			
 
				+		// a += n
			
 
				+		field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &n) // _a unreduced
			
 
				+
			
 
				+		// a = (r * a) % p
			
 
				+		field.fe_carry_mul(&ctx._a, field.fe_relax_cast(&ctx._a), field.fe_relax_cast(&ctx._r)) // _a reduced
			
 
				+
			
 
				+		data = data[_BLOCK_SIZE:]
			
 
				+		data_len = data_len - _BLOCK_SIZE
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/rand_generic.odin
+++ b/core/crypto/rand_generic.odin
@@ -0,0 +1,7 @@
 
				+package crypto
			
 
				+
			
 
				+when ODIN_OS != "linux" {
			
 
				+	_rand_bytes :: proc (dst: []byte) {
			
 
				+		unimplemented("crypto: rand_bytes not supported on this OS")
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/rand_linux.odin
+++ b/core/crypto/rand_linux.odin
@@ -0,0 +1,37 @@
 
				+package crypto
			
 
				+
			
 
				+import "core:fmt"
			
 
				+import "core:os"
			
 
				+import "core:sys/unix"
			
 
				+
			
 
				+_MAX_PER_CALL_BYTES :: 33554431 // 2^25 - 1
			
 
				+
			
 
				+_rand_bytes :: proc (dst: []byte) {
			
 
				+	dst := dst
			
 
				+	l := len(dst)
			
 
				+
			
 
				+	for l > 0 {
			
 
				+		to_read := min(l, _MAX_PER_CALL_BYTES)
			
 
				+		ret := unix.sys_getrandom(raw_data(dst), to_read, 0)
			
 
				+		if ret < 0 {
			
 
				+			switch os.Errno(-ret) {
			
 
				+			case os.EINTR:
			
 
				+				// Call interupted by a signal handler, just retry the
			
 
				+				// request.
			
 
				+				continue
			
 
				+			case os.ENOSYS:
			
 
				+				// The kernel is apparently prehistoric (< 3.17 circa 2014)
			
 
				+				// and does not support getrandom.
			
 
				+				panic("crypto: getrandom not available in kernel")
			
 
				+			case:
			
 
				+				// All other failures are things that should NEVER happen
			
 
				+				// unless the kernel interface changes (ie: the Linux
			
 
				+				// developers break userland).
			
 
				+				panic(fmt.tprintf("crypto: getrandom failed: %d", ret))
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		l -= ret
			
 
				+		dst = dst[ret:]
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -0,0 +1,126 @@
 
				+package x25519
			
 
				+
			
 
				+import field "core:crypto/_fiat/field_curve25519"
			
 
				+import "core:mem"
			
 
				+
			
 
				+SCALAR_SIZE :: 32
			
 
				+POINT_SIZE :: 32
			
 
				+
			
 
				+_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
			
 
				+
			
 
				+_scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
			
 
				+	if i < 0 {
			
 
				+		return 0
			
 
				+	}
			
 
				+	return (s[i>>3] >> uint(i&7)) & 1
			
 
				+}
			
 
				+
			
 
				+_scalarmult :: proc (out, scalar, point: ^[32]byte) {
			
 
				+	// Montgomery pseduo-multiplication taken from Monocypher.
			
 
				+
			
 
				+	// computes the scalar product
			
 
				+	x1: field.Tight_Field_Element = ---
			
 
				+	field.fe_from_bytes(&x1, point)
			
 
				+
			
 
				+	// computes the actual scalar product (the result is in x2 and z2)
			
 
				+	x2, x3, z2, z3: field.Tight_Field_Element =  ---, ---, ---, ---
			
 
				+	t0, t1: field.Loose_Field_Element = ---, ---
			
 
				+
			
 
				+	// Montgomery ladder
			
 
				+	// In projective coordinates, to avoid divisions: x = X / Z
			
 
				+	// We don't care about the y coordinate, it's only 1 bit of information
			
 
				+	field.fe_one(&x2) // "zero" point
			
 
				+	field.fe_zero(&z2)
			
 
				+	field.fe_set(&x3, &x1) // "one" point
			
 
				+	field.fe_one(&z3)
			
 
				+
			
 
				+	swap: int
			
 
				+	for pos := 255-1; pos >= 0; pos = pos - 1 	{
			
 
				+		// constant time conditional swap before ladder step
			
 
				+		b := int(_scalar_bit(scalar, pos))
			
 
				+		swap ~= b // xor trick avoids swapping at the end of the loop
			
 
				+		field.fe_cond_swap(&x2, &x3, swap)
			
 
				+		field.fe_cond_swap(&z2, &z3, swap)
			
 
				+		swap = b // anticipates one last swap after the loop
			
 
				+
			
 
				+		// Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
			
 
				+		// with differential addition
			
 
				+		//
			
 
				+		// Note: This deliberately omits reductions after add/sub operations
			
 
				+		// if the result is only ever used as the input to a mul/square since
			
 
				+		// the implementations of those can deal with non-reduced inputs.
			
 
				+		//
			
 
				+		// fe_tighten_cast is only used to store a fully reduced
			
 
				+		// output in a Loose_Field_Element, or to provide such a
			
 
				+		// Loose_Field_Element as a Tight_Field_Element argument.
			
 
				+		field.fe_sub(&t0, &x3, &z3)
			
 
				+		field.fe_sub(&t1, &x2, &z2)
			
 
				+		field.fe_add(field.fe_relax_cast(&x2), &x2, &z2) // x2 - unreduced
			
 
				+		field.fe_add(field.fe_relax_cast(&z2), &x3, &z3) // z2 - unreduced
			
 
				+		field.fe_carry_mul(&z3, &t0, field.fe_relax_cast(&x2))
			
 
				+		field.fe_carry_mul(&z2, field.fe_relax_cast(&z2), &t1) // z2 - reduced
			
 
				+		field.fe_carry_square(field.fe_tighten_cast(&t0), &t1) // t0 - reduced
			
 
				+		field.fe_carry_square(field.fe_tighten_cast(&t1), field.fe_relax_cast(&x2)) // t1 - reduced
			
 
				+		field.fe_add(field.fe_relax_cast(&x3), &z3, &z2) // x3 - unreduced
			
 
				+		field.fe_sub(field.fe_relax_cast(&z2), &z3, &z2) // z2 - unreduced
			
 
				+		field.fe_carry_mul(&x2, &t1, &t0) // x2 - reduced
			
 
				+		field.fe_sub(&t1, field.fe_tighten_cast(&t1), field.fe_tighten_cast(&t0)) // safe - t1/t0 is reduced
			
 
				+		field.fe_carry_square(&z2, field.fe_relax_cast(&z2)) // z2 - reduced
			
 
				+		field.fe_carry_scmul_121666(&z3, &t1)
			
 
				+		field.fe_carry_square(&x3, field.fe_relax_cast(&x3)) // x3 - reduced
			
 
				+		field.fe_add(&t0, field.fe_tighten_cast(&t0), &z3) // safe - t0 is reduced
			
 
				+		field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z2))
			
 
				+		field.fe_carry_mul(&z2, &t1, &t0)
			
 
				+	}
			
 
				+	// last swap is necessary to compensate for the xor trick
			
 
				+	// Note: after this swap, P3 == P2 + P1.
			
 
				+	field.fe_cond_swap(&x2, &x3, swap)
			
 
				+	field.fe_cond_swap(&z2, &z3, swap)
			
 
				+
			
 
				+	// normalises the coordinates: x == X / Z
			
 
				+	field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
			
 
				+	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
			
 
				+	field.fe_to_bytes(out, &x2)
			
 
				+
			
 
				+	mem.zero_explicit(&x1, size_of(x1))
			
 
				+	mem.zero_explicit(&x2, size_of(x2))
			
 
				+	mem.zero_explicit(&x3, size_of(x3))
			
 
				+	mem.zero_explicit(&z2, size_of(z2))
			
 
				+	mem.zero_explicit(&z3, size_of(z3))
			
 
				+	mem.zero_explicit(&t0, size_of(t0))
			
 
				+	mem.zero_explicit(&t1, size_of(t1))
			
 
				+}
			
 
				+
			
 
				+scalarmult :: proc (dst, scalar, point: []byte) {
			
 
				+	if len(scalar) != SCALAR_SIZE {
			
 
				+		panic("crypto/x25519: invalid scalar size")
			
 
				+	}
			
 
				+	if len(point) != POINT_SIZE {
			
 
				+		panic("crypto/x25519: invalid point size")
			
 
				+	}
			
 
				+	if len(dst) != POINT_SIZE {
			
 
				+		panic("crypto/x25519: invalid destination point size")
			
 
				+	}
			
 
				+
			
 
				+	// "clamp" the scalar
			
 
				+	e: [32]byte = ---
			
 
				+	copy_slice(e[:], scalar)
			
 
				+	e[0] &= 248
			
 
				+	e[31] &= 127
			
 
				+	e[31] |= 64
			
 
				+
			
 
				+	p: [32]byte = ---
			
 
				+	copy_slice(p[:], point)
			
 
				+
			
 
				+	d: [32]byte = ---
			
 
				+	_scalarmult(&d, &e, &p)
			
 
				+	copy_slice(dst, d[:])
			
 
				+
			
 
				+	mem.zero_explicit(&e, size_of(e))
			
 
				+	mem.zero_explicit(&d, size_of(d))
			
 
				+}
			
 
				+
			
 
				+scalarmult_basepoint :: proc (dst, scalar: []byte) {
			
 
				+	// TODO/perf: Switch to using a precomputed table.
			
 
				+	scalarmult(dst, scalar, _BASE_POINT[:])
			
 
				+}
			
--- a/core/hash/hash.odin
+++ b/core/hash/hash.odin
@@ -47,8 +47,8 @@ adler32 :: proc(data: []byte, seed := u32(1)) -> u32 #no_bounds_check {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-djb2 :: proc(data: []byte) -> u32 {
			
 
				-	hash: u32 = 5381
			
 
				+djb2 :: proc(data: []byte, seed := u32(5381)) -> u32 {
			
 
				+	hash: u32 = seed
			
 
				 	for b in data {
			
 
				 		hash = (hash << 5) + hash + u32(b) // hash * 33 + u32(b)
			
 
				 	}
			
@@ -56,8 +56,8 @@ djb2 :: proc(data: []byte) -> u32 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-fnv32 :: proc(data: []byte) -> u32 {
			
 
				-	h: u32 = 0x811c9dc5
			
 
				+fnv32 :: proc(data: []byte, seed := u32(0x811c9dc5)) -> u32 {
			
 
				+	h: u32 = seed
			
 
				 	for b in data {
			
 
				 		h = (h * 0x01000193) ~ u32(b)
			
 
				 	}
			
@@ -65,8 +65,8 @@ fnv32 :: proc(data: []byte) -> u32 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-fnv64 :: proc(data: []byte) -> u64 {
			
 
				-	h: u64 = 0xcbf29ce484222325
			
 
				+fnv64 :: proc(data: []byte, seed := u64(0xcbf29ce484222325)) -> u64 {
			
 
				+	h: u64 = seed
			
 
				 	for b in data {
			
 
				 		h = (h * 0x100000001b3) ~ u64(b)
			
 
				 	}
			
@@ -74,8 +74,8 @@ fnv64 :: proc(data: []byte) -> u64 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-fnv32a :: proc(data: []byte) -> u32 {
			
 
				-	h: u32 = 0x811c9dc5
			
 
				+fnv32a :: proc(data: []byte, seed := u32(0x811c9dc5)) -> u32 {
			
 
				+	h: u32 = seed
			
 
				 	for b in data {
			
 
				 		h = (h ~ u32(b)) * 0x01000193
			
 
				 	}
			
@@ -83,8 +83,8 @@ fnv32a :: proc(data: []byte) -> u32 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-fnv64a :: proc(data: []byte) -> u64 {
			
 
				-	h: u64 = 0xcbf29ce484222325
			
 
				+fnv64a :: proc(data: []byte, seed := u64(0xcbf29ce484222325)) -> u64 {
			
 
				+	h: u64 = seed
			
 
				 	for b in data {
			
 
				 		h = (h ~ u64(b)) * 0x100000001b3
			
 
				 	}
			
@@ -92,8 +92,8 @@ fnv64a :: proc(data: []byte) -> u64 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-jenkins :: proc(data: []byte) -> u32 {
			
 
				-	hash: u32 = 0
			
 
				+jenkins :: proc(data: []byte, seed := u32(0)) -> u32 {
			
 
				+	hash: u32 = seed
			
 
				 	for b in data {
			
 
				 		hash += u32(b)
			
 
				 		hash += hash << 10
			
@@ -106,11 +106,11 @@ jenkins :: proc(data: []byte) -> u32 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-murmur32 :: proc(data: []byte) -> u32 {
			
 
				+murmur32 :: proc(data: []byte, seed := u32(0)) -> u32 {
			
 
				 	c1_32: u32 : 0xcc9e2d51
			
 
				 	c2_32: u32 : 0x1b873593
			
 
				 
			
 
				-	h1: u32 = 0
			
 
				+	h1: u32 = seed
			
 
				 	nblocks := len(data)/4
			
 
				 	p := raw_data(data)
			
 
				 	p1 := mem.ptr_offset(p, 4*nblocks)
			
@@ -156,14 +156,12 @@ murmur32 :: proc(data: []byte) -> u32 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-murmur64 :: proc(data: []byte) -> u64 {
			
 
				-	SEED :: 0x9747b28c
			
 
				-
			
 
				+murmur64 :: proc(data: []byte, seed := u64(0x9747b28c)) -> u64 {
			
 
				 	when size_of(int) == 8 {
			
 
				 		m :: 0xc6a4a7935bd1e995
			
 
				 		r :: 47
			
 
				 
			
 
				-		h: u64 = SEED ~ (u64(len(data)) * m)
			
 
				+		h: u64 = seed ~ (u64(len(data)) * m)
			
 
				 		data64 := mem.slice_ptr(cast(^u64)raw_data(data), len(data)/size_of(u64))
			
 
				 
			
 
				 		for _, i in data64 {
			
@@ -198,8 +196,8 @@ murmur64 :: proc(data: []byte) -> u64 {
 
				 		m :: 0x5bd1e995
			
 
				 		r :: 24
			
 
				 
			
 
				-		h1 := u32(SEED) ~ u32(len(data))
			
 
				-		h2 := u32(SEED) >> 32
			
 
				+		h1 := u32(seed) ~ u32(len(data))
			
 
				+		h2 := u32(seed) >> 32
			
 
				 		data32 := mem.slice_ptr(cast(^u32)raw_data(data), len(data)/size_of(u32))
			
 
				 		len := len(data)
			
 
				 		i := 0
			
@@ -262,8 +260,8 @@ murmur64 :: proc(data: []byte) -> u64 {
 
				 }
			
 
				 
			
 
				 @(optimization_mode="speed")
			
 
				-sdbm :: proc(data: []byte) -> u32 {
			
 
				-	hash: u32 = 0
			
 
				+sdbm :: proc(data: []byte, seed := u32(0)) -> u32 {
			
 
				+	hash: u32 = seed
			
 
				 	for b in data {
			
 
				 		hash = u32(b) + (hash<<6) + (hash<<16) - hash
			
 
				 	}
			
--- a/core/math/big/rat.odin
+++ b/core/math/big/rat.odin
@@ -436,7 +436,7 @@ internal_rat_to_float :: proc($T: typeid, z: ^Rat, allocator := context.allocato
 
				 	
			
 
				 	mantissa >>= 1
			
 
				 	
			
 
				-	f = T(math.ldexp(f64(mantissa), i32(exp-MSIZE1)))
			
 
				+	f = T(math.ldexp(f64(mantissa), exp-MSIZE1))
			
 
				 	if math.is_inf(f, 0) {
			
 
				 		exact = false
			
 
				 	}
			
--- a/core/math/math.odin
+++ b/core/math/math.odin
@@ -96,18 +96,6 @@ fmuladd       :: proc{
 
				 	fmuladd_f64, fmuladd_f64le, fmuladd_f64be,
			
 
				 }
			
 
				 
			
 
				-ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f16(f16(x))) }
			
 
				-ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f16(f16(x))) }
			
 
				-ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f32(f32(x))) }
			
 
				-ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f32(f32(x))) }
			
 
				-ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
			
 
				-ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
			
 
				-ln       :: proc{
			
 
				-	ln_f16, ln_f16le, ln_f16be,
			
 
				-	ln_f32, ln_f32le, ln_f32be,
			
 
				-	ln_f64, ln_f64le, ln_f64be,
			
 
				-}
			
 
				-
			
 
				 exp_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(exp_f16(f16(x))) }
			
 
				 exp_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(exp_f16(f16(x))) }
			
 
				 exp_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(exp_f32(f32(x))) }
			
@@ -120,13 +108,60 @@ exp       :: proc{
 
				 	exp_f64, exp_f64le, exp_f64be,
			
 
				 }
			
 
				 
			
 
				-ldexp_f16le :: proc "contextless" (val: f16le, exp: i32) -> f16le { return #force_inline f16le(ldexp_f16(f16(val), exp)) }
			
 
				-ldexp_f16be :: proc "contextless" (val: f16be, exp: i32) -> f16be { return #force_inline f16be(ldexp_f16(f16(val), exp)) }
			
 
				-ldexp_f32le :: proc "contextless" (val: f32le, exp: i32) -> f32le { return #force_inline f32le(ldexp_f32(f32(val), exp)) }
			
 
				-ldexp_f32be :: proc "contextless" (val: f32be, exp: i32) -> f32be { return #force_inline f32be(ldexp_f32(f32(val), exp)) }
			
 
				-ldexp_f64le :: proc "contextless" (val: f64le, exp: i32) -> f64le { return #force_inline f64le(ldexp_f64(f64(val), exp)) }
			
 
				-ldexp_f64be :: proc "contextless" (val: f64be, exp: i32) -> f64be { return #force_inline f64be(ldexp_f64(f64(val), exp)) }
			
 
				-ldexp       :: proc{
			
 
				+
			
 
				+
			
 
				+ldexp_f64 :: proc "contextless" (val: f64, exp: int) -> f64 {
			
 
				+	mask  :: F64_MASK
			
 
				+	shift :: F64_SHIFT
			
 
				+	bias  :: F64_BIAS
			
 
				+	
			
 
				+	switch {
			
 
				+	case val == 0:
			
 
				+		return val
			
 
				+	case is_inf(val) || is_nan(val):
			
 
				+		return val
			
 
				+	}
			
 
				+	exp := exp
			
 
				+	frac, e := normalize_f64(val)
			
 
				+	exp += e
			
 
				+	x := transmute(u64)frac
			
 
				+	exp += int(x>>shift)&mask - bias
			
 
				+	if exp < -1075 { // underflow
			
 
				+		return copy_sign(0, frac) 
			
 
				+	} else if exp > 1023 { // overflow
			
 
				+		if frac < 0 {
			
 
				+			return inf_f64(-1)
			
 
				+		}
			
 
				+		return inf_f64(+1)
			
 
				+	}
			
 
				+	
			
 
				+	m: f64 = 1
			
 
				+	if exp < -1022 { // denormal
			
 
				+		exp += 53
			
 
				+		m = 1.0 / (1<<53)
			
 
				+	}
			
 
				+	x &~= mask << shift
			
 
				+	x |= u64(exp+bias) << shift
			
 
				+	return m * transmute(f64)x	
			
 
				+}
			
 
				+ldexp_f16   :: proc "contextless" (val: f16, exp: int) -> f16 { return f16(ldexp_f64(f64(val), exp)) }
			
 
				+ldexp_f32   :: proc "contextless" (val: f32, exp: int) -> f32 { return f32(ldexp_f64(f64(val), exp)) }
			
 
				+ldexp_f16le :: proc "contextless" (val: f16le, exp: int) -> f16le { return #force_inline f16le(ldexp_f16(f16(val), exp)) }
			
 
				+ldexp_f16be :: proc "contextless" (val: f16be, exp: int) -> f16be { return #force_inline f16be(ldexp_f16(f16(val), exp)) }
			
 
				+ldexp_f32le :: proc "contextless" (val: f32le, exp: int) -> f32le { return #force_inline f32le(ldexp_f32(f32(val), exp)) }
			
 
				+ldexp_f32be :: proc "contextless" (val: f32be, exp: int) -> f32be { return #force_inline f32be(ldexp_f32(f32(val), exp)) }
			
 
				+ldexp_f64le :: proc "contextless" (val: f64le, exp: int) -> f64le { return #force_inline f64le(ldexp_f64(f64(val), exp)) }
			
 
				+ldexp_f64be :: proc "contextless" (val: f64be, exp: int) -> f64be { return #force_inline f64be(ldexp_f64(f64(val), exp)) }
			
 
				+// ldexp is the inverse of frexp
			
 
				+// it returns val * 2**exp.
			
 
				+// 
			
 
				+// Special cases:
			
 
				+// 	ldexp(+0,   exp) = +0
			
 
				+// 	ldexp(-0,   exp) = -0
			
 
				+// 	ldexp(+inf, exp) = +inf
			
 
				+// 	ldexp(-inf, exp) = -inf
			
 
				+// 	ldexp(NaN,  exp) = NaN
			
 
				+ldexp :: proc{
			
 
				 	ldexp_f16, ldexp_f16le, ldexp_f16be,
			
 
				 	ldexp_f32, ldexp_f32le, ldexp_f32be,
			
 
				 	ldexp_f64, ldexp_f64le, ldexp_f64be,
			
@@ -150,22 +185,16 @@ log       :: proc{
 
				 	log_f64, log_f64le, log_f64be,
			
 
				 }
			
 
				 
			
 
				-log2_f16   :: proc "contextless" (x: f16)   -> f16   { return ln(x)/LN2 }
			
 
				-log2_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log2_f16(f16(x))) }
			
 
				-log2_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log2_f16(f16(x))) }
			
 
				-
			
 
				-log2_f32   :: proc "contextless" (x: f32)   -> f32   { return ln(x)/LN2 }
			
 
				-log2_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log2_f32(f32(x))) }
			
 
				-log2_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log2_f32(f32(x))) }
			
 
				-
			
 
				-log2_f64   :: proc "contextless" (x: f64)   -> f64   { return ln(x)/LN2 }
			
 
				-log2_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log2_f64(f64(x))) }
			
 
				-log2_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log2_f64(f64(x))) }
			
 
				-log2       :: proc{
			
 
				-	log2_f16, log2_f16le, log2_f16be,
			
 
				-	log2_f32, log2_f32le, log2_f32be,
			
 
				-	log2_f64, log2_f64le, log2_f64be,
			
 
				-}
			
 
				+log2_f16   :: logb_f16
			
 
				+log2_f16le :: logb_f16le
			
 
				+log2_f16be :: logb_f16be
			
 
				+log2_f32   :: logb_f32
			
 
				+log2_f32le :: logb_f32le
			
 
				+log2_f32be :: logb_f32be
			
 
				+log2_f64   :: logb_f64
			
 
				+log2_f64le :: logb_f64le
			
 
				+log2_f64be :: logb_f64be
			
 
				+log2       :: logb
			
 
				 
			
 
				 log10_f16   :: proc "contextless" (x: f16)   -> f16   { return ln(x)/LN10 }
			
 
				 log10_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log10_f16(f16(x))) }
			
@@ -351,9 +380,9 @@ to_degrees       :: proc{
 
				 
			
 
				 trunc_f16   :: proc "contextless" (x: f16) -> f16 {
			
 
				 	trunc_internal :: proc "contextless" (f: f16) -> f16 {
			
 
				-		mask :: 0x1f
			
 
				-		shift :: 16 - 6
			
 
				-		bias :: 0xf
			
 
				+		mask  :: F16_MASK
			
 
				+		shift :: F16_SHIFT
			
 
				+		bias  :: F16_BIAS
			
 
				 
			
 
				 		if f < 1 {
			
 
				 			switch {
			
@@ -383,9 +412,9 @@ trunc_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16
 
				 
			
 
				 trunc_f32   :: proc "contextless" (x: f32) -> f32 {
			
 
				 	trunc_internal :: proc "contextless" (f: f32) -> f32 {
			
 
				-		mask :: 0xff
			
 
				-		shift :: 32 - 9
			
 
				-		bias :: 0x7f
			
 
				+		mask  :: F32_MASK
			
 
				+		shift :: F32_SHIFT
			
 
				+		bias  :: F32_BIAS
			
 
				 
			
 
				 		if f < 1 {
			
 
				 			switch {
			
@@ -415,9 +444,9 @@ trunc_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32
 
				 
			
 
				 trunc_f64   :: proc "contextless" (x: f64) -> f64 {
			
 
				 	trunc_internal :: proc "contextless" (f: f64) -> f64 {
			
 
				-		mask :: 0x7ff
			
 
				-		shift :: 64 - 12
			
 
				-		bias :: 0x3ff
			
 
				+		mask  :: F64_MASK
			
 
				+		shift :: F64_SHIFT
			
 
				+		bias  :: F64_BIAS
			
 
				 
			
 
				 		if f < 1 {
			
 
				 			switch {
			
@@ -578,9 +607,9 @@ floor_mod :: proc "contextless" (x, y: $T) -> T
 
				 }
			
 
				 
			
 
				 modf_f16   :: proc "contextless" (x: f16) -> (int: f16, frac: f16) {
			
 
				-	shift :: 16 - 5 - 1
			
 
				-	mask  :: 0x1f
			
 
				-	bias  :: 15
			
 
				+	shift :: F16_SHIFT
			
 
				+	mask  :: F16_MASK
			
 
				+	bias  :: F16_BIAS
			
 
				 
			
 
				 	if x < 1 {
			
 
				 		switch {
			
@@ -612,9 +641,9 @@ modf_f16be :: proc "contextless" (x: f16be) -> (int: f16be, frac: f16be) {
 
				 	return f16be(i), f16be(f)
			
 
				 }
			
 
				 modf_f32   :: proc "contextless" (x: f32) -> (int: f32, frac: f32) {
			
 
				-	shift :: 32 - 8 - 1
			
 
				-	mask  :: 0xff
			
 
				-	bias  :: 127
			
 
				+	shift :: F32_SHIFT
			
 
				+	mask  :: F32_MASK
			
 
				+	bias  :: F32_BIAS
			
 
				 
			
 
				 	if x < 1 {
			
 
				 		switch {
			
@@ -645,10 +674,10 @@ modf_f32be :: proc "contextless" (x: f32be) -> (int: f32be, frac: f32be) {
 
				 	i, f := #force_inline modf_f32(f32(x))
			
 
				 	return f32be(i), f32be(f)
			
 
				 }
			
 
				-modf_f64   :: proc "contextless" (x: f64) -> (int: f64, frac: f64) {
			
 
				-	shift :: 64 - 11 - 1
			
 
				-	mask  :: 0x7ff
			
 
				-	bias  :: 1023
			
 
				+modf_f64 :: proc "contextless" (x: f64) -> (int: f64, frac: f64) {
			
 
				+	shift :: F64_SHIFT
			
 
				+	mask  :: F64_MASK
			
 
				+	bias  :: F64_BIAS
			
 
				 
			
 
				 	if x < 1 {
			
 
				 		switch {
			
@@ -679,7 +708,7 @@ modf_f64be :: proc "contextless" (x: f64be) -> (int: f64be, frac: f64be) {
 
				 	i, f := #force_inline modf_f64(f64(x))
			
 
				 	return f64be(i), f64be(f)
			
 
				 }
			
 
				-modf       :: proc{
			
 
				+modf :: proc{
			
 
				 	modf_f16, modf_f16le, modf_f16be,
			
 
				 	modf_f32, modf_f32le, modf_f32be,
			
 
				 	modf_f64, modf_f64le, modf_f64be,
			
@@ -752,6 +781,44 @@ lcm :: proc "contextless" (x, y: $T) -> T
 
				 	return x / gcd(x, y) * y
			
 
				 }
			
 
				 
			
 
				+normalize_f16 :: proc "contextless" (x: f16) -> (y: f16, exponent: int) {
			
 
				+	if abs(x) < F16_MIN {
			
 
				+		return x * (1<<F16_SHIFT), -F16_SHIFT
			
 
				+	}
			
 
				+	return x, 0
			
 
				+}
			
 
				+normalize_f32 :: proc "contextless" (x: f32) -> (y: f32, exponent: int) {
			
 
				+	if abs(x) < F32_MIN {
			
 
				+		return x * (1<<F32_SHIFT), -F32_SHIFT
			
 
				+	}
			
 
				+	return x, 0
			
 
				+}
			
 
				+normalize_f64 :: proc "contextless" (x: f64) -> (y: f64, exponent: int) {
			
 
				+	if abs(x) < F64_MIN {
			
 
				+		return x * (1<<F64_SHIFT), -F64_SHIFT
			
 
				+	}
			
 
				+	return x, 0
			
 
				+}
			
 
				+
			
 
				+normalize_f16le :: proc "contextless" (x: f16le) -> (y: f16le, exponent: int) { y0, e := normalize_f16(f16(x)); return f16le(y0), e }
			
 
				+normalize_f16be :: proc "contextless" (x: f16be) -> (y: f16be, exponent: int) { y0, e := normalize_f16(f16(x)); return f16be(y0), e }
			
 
				+normalize_f32le :: proc "contextless" (x: f32le) -> (y: f32le, exponent: int) { y0, e := normalize_f32(f32(x)); return f32le(y0), e }
			
 
				+normalize_f32be :: proc "contextless" (x: f32be) -> (y: f32be, exponent: int) { y0, e := normalize_f32(f32(x)); return f32be(y0), e }
			
 
				+normalize_f64le :: proc "contextless" (x: f64le) -> (y: f64le, exponent: int) { y0, e := normalize_f64(f64(x)); return f64le(y0), e }
			
 
				+normalize_f64be :: proc "contextless" (x: f64be) -> (y: f64be, exponent: int) { y0, e := normalize_f64(f64(x)); return f64be(y0), e }
			
 
				+
			
 
				+normalize :: proc{
			
 
				+	normalize_f16,
			
 
				+	normalize_f32,
			
 
				+	normalize_f64,
			
 
				+	normalize_f16le,
			
 
				+	normalize_f16be,
			
 
				+	normalize_f32le,
			
 
				+	normalize_f32be,
			
 
				+	normalize_f64le,
			
 
				+	normalize_f64be,
			
 
				+}
			
 
				+
			
 
				 frexp_f16   :: proc "contextless" (x: f16)   -> (significand: f16,   exponent: int) {
			
 
				 	f, e := frexp_f64(f64(x))
			
 
				 	return f16(f), e
			
@@ -776,24 +843,25 @@ frexp_f32be :: proc "contextless" (x: f32be) -> (significand: f32be, exponent: i
 
				 	f, e := frexp_f64(f64(x))
			
 
				 	return f32be(f), e
			
 
				 }
			
 
				-frexp_f64 :: proc "contextless" (x: f64) -> (significand: f64, exponent: int) {
			
 
				+frexp_f64 :: proc "contextless" (f: f64) -> (significand: f64, exponent: int) {
			
 
				+	mask  :: F64_MASK
			
 
				+	shift :: F64_SHIFT
			
 
				+	bias  :: F64_BIAS
			
 
				+	
			
 
				 	switch {
			
 
				-	case x == 0:
			
 
				+	case f == 0:
			
 
				 		return 0, 0
			
 
				-	case x < 0:
			
 
				-		significand, exponent = frexp(-x)
			
 
				-		return -significand, exponent
			
 
				-	}
			
 
				-	ex := trunc(log2(x))
			
 
				-	exponent = int(ex)
			
 
				-	significand = x / pow(2.0, ex)
			
 
				-	if abs(significand) >= 1 {
			
 
				-		exponent += 1
			
 
				-		significand /= 2
			
 
				-	}
			
 
				-	if exponent == 1024 && significand == 0 {
			
 
				-		significand = 0.99999999999999988898
			
 
				+	case is_inf(f) || is_nan(f):
			
 
				+		return f, 0
			
 
				 	}
			
 
				+	f := f
			
 
				+	
			
 
				+	f, exponent = normalize_f64(f)
			
 
				+	x := transmute(u64)f
			
 
				+	exponent += int((x>>shift)&mask) - bias + 1
			
 
				+	x &~= mask << shift
			
 
				+	x |= (-1 + bias) << shift
			
 
				+	significand = transmute(f64)x
			
 
				 	return
			
 
				 }
			
 
				 frexp_f64le :: proc "contextless" (x: f64le) -> (significand: f64le, exponent: int) {
			
@@ -804,7 +872,18 @@ frexp_f64be :: proc "contextless" (x: f64be) -> (significand: f64be, exponent: i
 
				 	f, e := frexp_f64(f64(x))
			
 
				 	return f64be(f), e
			
 
				 }
			
 
				-frexp       :: proc{
			
 
				+
			
 
				+// frexp breaks the value into a normalized fraction, and an integral power of two
			
 
				+// It returns a significand and exponent satisfying x == significand * 2**exponent
			
 
				+// with the absolute value of significand in the intervalue of [0.5, 1).
			
 
				+//
			
 
				+// Special cases: 
			
 
				+// 	frexp(+0)   = +0,   0
			
 
				+// 	frexp(-0)   = -0,   0
			
 
				+// 	frexp(+inf) = +inf, 0
			
 
				+// 	frexp(-inf) = -inf, 0
			
 
				+// 	frexp(NaN)  = NaN,  0
			
 
				+frexp :: proc{
			
 
				 	frexp_f16, frexp_f16le, frexp_f16be,
			
 
				 	frexp_f32, frexp_f32le, frexp_f32be,
			
 
				 	frexp_f64, frexp_f64le, frexp_f64be, 
			
@@ -1048,13 +1127,11 @@ inf_f32be :: proc "contextless" (sign: int) -> f32be {
 
				 	return f32be(inf_f64(sign))
			
 
				 }
			
 
				 inf_f64   :: proc "contextless" (sign: int) -> f64 {
			
 
				-	v: u64
			
 
				 	if sign >= 0 {
			
 
				-		v = 0x7ff00000_00000000
			
 
				+		return 0h7ff00000_00000000
			
 
				 	} else {
			
 
				-		v = 0xfff00000_00000000
			
 
				+		return 0hfff00000_00000000
			
 
				 	}
			
 
				-	return transmute(f64)v
			
 
				 }
			
 
				 inf_f64le :: proc "contextless" (sign: int) -> f64le {
			
 
				 	return f64le(inf_f64(sign))
			
@@ -1082,8 +1159,7 @@ nan_f32be :: proc "contextless" () -> f32be {
 
				 	return f32be(nan_f64())
			
 
				 }
			
 
				 nan_f64   :: proc "contextless" () -> f64 {
			
 
				-	v: u64 = 0x7ff80000_00000001
			
 
				-	return transmute(f64)v
			
 
				+	return 0h7ff80000_00000001
			
 
				 }
			
 
				 nan_f64le :: proc "contextless" () -> f64le {
			
 
				 	return f64le(nan_f64())
			
@@ -1297,18 +1373,295 @@ tanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
 
				 	return (t - 1) / (t + 1)
			
 
				 }
			
 
				 
			
 
				-asinh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
			
 
				-	return ln(x + sqrt(x*x + 1))
			
 
				+asinh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
			
 
				+	// The original C code, the long comment, and the constants
			
 
				+	// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c
			
 
				+	// and came with this notice. 
			
 
				+	//
			
 
				+	// ====================================================
			
 
				+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+	//
			
 
				+	// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+	// Permission to use, copy, modify, and distribute this
			
 
				+	// software is freely granted, provided that this notice
			
 
				+	// is preserved.
			
 
				+	// ====================================================
			
 
				+	
			
 
				+	LN2       :: 0h3FE62E42FEFA39EF
			
 
				+	NEAR_ZERO :: 1.0 / (1 << 28)
			
 
				+	LARGE     :: 1 << 28
			
 
				+	
			
 
				+	x := f64(y)
			
 
				+	
			
 
				+	if is_nan(x) || is_inf(x) {
			
 
				+		return T(x)
			
 
				+	}
			
 
				+	sign := false
			
 
				+	if x < 0 {
			
 
				+		x = -x
			
 
				+		sign = true
			
 
				+	}
			
 
				+	temp: f64
			
 
				+	switch {
			
 
				+	case x > LARGE:
			
 
				+		temp = ln(x) + LN2
			
 
				+	case x > 2:
			
 
				+		temp = ln(2*x + 1/(sqrt(x*x + 1) + x))
			
 
				+	case x < NEAR_ZERO:
			
 
				+		temp = x
			
 
				+	case:
			
 
				+		temp = log1p(x + x*x/(1 + sqrt(1 + x*x)))
			
 
				+	}
			
 
				+	
			
 
				+	if sign {
			
 
				+		temp = -temp
			
 
				+	}
			
 
				+	return T(temp)
			
 
				+}
			
 
				+
			
 
				+acosh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
			
 
				+	// The original C code, the long comment, and the constants
			
 
				+	// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c
			
 
				+	// and came with this notice. 
			
 
				+	//
			
 
				+	// ====================================================
			
 
				+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+	//
			
 
				+	// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+	// Permission to use, copy, modify, and distribute this
			
 
				+	// software is freely granted, provided that this notice
			
 
				+	// is preserved.
			
 
				+	// ====================================================
			
 
				+	
			
 
				+	LARGE :: 1<<28
			
 
				+	LN2 :: 0h3FE62E42FEFA39EF
			
 
				+	x := f64(y)
			
 
				+	switch {
			
 
				+	case x < 1 || is_nan(x):
			
 
				+		return T(nan_f64())
			
 
				+	case x == 1:
			
 
				+		return 0
			
 
				+	case x >= LARGE:
			
 
				+		return T(ln(x) + LN2)
			
 
				+	case x > 2:
			
 
				+		return T(ln(2*x - 1/(x+sqrt(x*x-1))))
			
 
				+	}
			
 
				+	t := x-1
			
 
				+	return T(log1p(t + sqrt(2*t + t*t)))
			
 
				+}
			
 
				+
			
 
				+atanh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
			
 
				+	// The original C code, the long comment, and the constants
			
 
				+	// below are from FreeBSD's /usr/src/lib/msun/src/e_atanh.c
			
 
				+	// and came with this notice. 
			
 
				+	//
			
 
				+	// ====================================================
			
 
				+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+	//
			
 
				+	// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+	// Permission to use, copy, modify, and distribute this
			
 
				+	// software is freely granted, provided that this notice
			
 
				+	// is preserved.
			
 
				+	// ====================================================
			
 
				+	NEAR_ZERO :: 1.0 / (1 << 28)
			
 
				+	x := f64(y)
			
 
				+	switch {
			
 
				+	case x < -1 || x > 1 || is_nan(x):
			
 
				+		return T(nan_f64())
			
 
				+	case x == 1:
			
 
				+		return T(inf_f64(1))
			
 
				+	case x == -1:
			
 
				+		return T(inf_f64(-1))
			
 
				+	}
			
 
				+	sign := false
			
 
				+	if x < 0 {
			
 
				+		x = -x
			
 
				+		sign = true
			
 
				+	}
			
 
				+	temp: f64
			
 
				+	switch {
			
 
				+	case x < NEAR_ZERO:
			
 
				+		temp = x
			
 
				+	case x < 0.5:
			
 
				+		temp = x + x
			
 
				+		temp = 0.5 * log1p(temp + temp*x/(1-x))
			
 
				+	case:
			
 
				+		temp = 0.5 * log1p((x+x)/(1-x))
			
 
				+	}
			
 
				+	if sign {
			
 
				+		temp = -temp
			
 
				+	}
			
 
				+	return T(temp)
			
 
				+}
			
 
				+
			
 
				+ilogb_f16 :: proc "contextless" (val: f16) -> int {
			
 
				+	switch {
			
 
				+	case val == 0:    return int(min(i32))
			
 
				+	case is_nan(val): return int(max(i32))
			
 
				+	case is_inf(val): return int(max(i32))
			
 
				+	}
			
 
				+	x, exp := normalize_f16(val)
			
 
				+	return int(((transmute(u16)x)>>F16_SHIFT)&F16_MASK) - F16_BIAS + exp
			
 
				+}
			
 
				+ilogb_f32 :: proc "contextless" (val: f32) -> int {
			
 
				+	switch {
			
 
				+	case val == 0:    return int(min(i32))
			
 
				+	case is_nan(val): return int(max(i32))
			
 
				+	case is_inf(val): return int(max(i32))
			
 
				+	}
			
 
				+	x, exp := normalize_f32(val)
			
 
				+	return int(((transmute(u32)x)>>F32_SHIFT)&F32_MASK) - F32_BIAS + exp
			
 
				+}
			
 
				+ilogb_f64 :: proc "contextless" (val: f64) -> int {
			
 
				+	switch {
			
 
				+	case val == 0:    return int(min(i32))
			
 
				+	case is_nan(val): return int(max(i32))
			
 
				+	case is_inf(val): return int(max(i32))
			
 
				+	}
			
 
				+	x, exp := normalize_f64(val)
			
 
				+	return int(((transmute(u64)x)>>F64_SHIFT)&F64_MASK) - F64_BIAS + exp
			
 
				+}
			
 
				+ilogb_f16le :: proc "contextless" (value: f16le) -> int { return ilogb_f16(f16(value)) }
			
 
				+ilogb_f16be :: proc "contextless" (value: f16be) -> int { return ilogb_f16(f16(value)) }
			
 
				+ilogb_f32le :: proc "contextless" (value: f32le) -> int { return ilogb_f32(f32(value)) }
			
 
				+ilogb_f32be :: proc "contextless" (value: f32be) -> int { return ilogb_f32(f32(value)) }
			
 
				+ilogb_f64le :: proc "contextless" (value: f64le) -> int { return ilogb_f64(f64(value)) }
			
 
				+ilogb_f64be :: proc "contextless" (value: f64be) -> int { return ilogb_f64(f64(value)) }
			
 
				+ilogb :: proc {
			
 
				+	ilogb_f16,
			
 
				+	ilogb_f32,
			
 
				+	ilogb_f64,
			
 
				+	ilogb_f16le,
			
 
				+	ilogb_f16be,
			
 
				+	ilogb_f32le,
			
 
				+	ilogb_f32be,
			
 
				+	ilogb_f64le,
			
 
				+	ilogb_f64be,
			
 
				+}
			
 
				+
			
 
				+logb_f16 :: proc "contextless" (val: f16) -> f16 {
			
 
				+	switch {
			
 
				+	case val == 0:    return inf_f16(-1)
			
 
				+	case is_inf(val): return inf_f16(+1)
			
 
				+	case is_nan(val): return val
			
 
				+	}
			
 
				+	return f16(ilogb(val))
			
 
				+}
			
 
				+logb_f32 :: proc "contextless" (val: f32) -> f32 {
			
 
				+	switch {
			
 
				+	case val == 0:    return inf_f32(-1)
			
 
				+	case is_inf(val): return inf_f32(+1)
			
 
				+	case is_nan(val): return val
			
 
				+	}
			
 
				+	return f32(ilogb(val))
			
 
				+}
			
 
				+logb_f64 :: proc "contextless" (val: f64) -> f64 {
			
 
				+	switch {
			
 
				+	case val == 0:    return inf_f64(-1)
			
 
				+	case is_inf(val): return inf_f64(+1)
			
 
				+	case is_nan(val): return val
			
 
				+	}
			
 
				+	return f64(ilogb(val))
			
 
				+}
			
 
				+logb_f16le :: proc "contextless" (value: f16le) -> f16le { return f16le(logb_f16(f16(value))) }
			
 
				+logb_f16be :: proc "contextless" (value: f16be) -> f16be { return f16be(logb_f16(f16(value))) }
			
 
				+logb_f32le :: proc "contextless" (value: f32le) -> f32le { return f32le(logb_f32(f32(value))) }
			
 
				+logb_f32be :: proc "contextless" (value: f32be) -> f32be { return f32be(logb_f32(f32(value))) }
			
 
				+logb_f64le :: proc "contextless" (value: f64le) -> f64le { return f64le(logb_f64(f64(value))) }
			
 
				+logb_f64be :: proc "contextless" (value: f64be) -> f64be { return f64be(logb_f64(f64(value))) }
			
 
				+logb :: proc {
			
 
				+	logb_f16,
			
 
				+	logb_f32,
			
 
				+	logb_f64,
			
 
				+	logb_f16le,
			
 
				+	logb_f16be,
			
 
				+	logb_f32le,
			
 
				+	logb_f32be,
			
 
				+	logb_f64le,
			
 
				+	logb_f64be,
			
 
				+}
			
 
				+
			
 
				+nextafter_f16 :: proc "contextless" (x, y: f16) -> (r: f16) {
			
 
				+	switch {
			
 
				+	case is_nan(x) || is_nan(y):
			
 
				+		r = nan_f16()
			
 
				+	case x == y:
			
 
				+		r = x
			
 
				+	case x == 0:
			
 
				+		r = copy_sign_f16(1, y)
			
 
				+	case (y > x) == (x > 0):
			
 
				+		r = transmute(f16)(transmute(u16)x + 1)
			
 
				+	case:
			
 
				+		r = transmute(f16)(transmute(u16)x - 1)
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+nextafter_f32 :: proc "contextless" (x, y: f32) -> (r: f32) {
			
 
				+	switch {
			
 
				+	case is_nan(x) || is_nan(y):
			
 
				+		r = nan_f32()
			
 
				+	case x == y:
			
 
				+		r = x
			
 
				+	case x == 0:
			
 
				+		r = copy_sign_f32(1, y)
			
 
				+	case (y > x) == (x > 0):
			
 
				+		r = transmute(f32)(transmute(u32)x + 1)
			
 
				+	case:
			
 
				+		r = transmute(f32)(transmute(u32)x - 1)
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+nextafter_f64 :: proc "contextless" (x, y: f64) -> (r: f64) {
			
 
				+	switch {
			
 
				+	case is_nan(x) || is_nan(y):
			
 
				+		r = nan_f64()
			
 
				+	case x == y:
			
 
				+		r = x
			
 
				+	case x == 0:
			
 
				+		r = copy_sign_f64(1, y)
			
 
				+	case (y > x) == (x > 0):
			
 
				+		r = transmute(f64)(transmute(u64)x + 1)
			
 
				+	case:
			
 
				+		r = transmute(f64)(transmute(u64)x - 1)
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+nextafter_f16le :: proc "contextless" (x, y: f16le) -> (r: f16le) { return f16le(nextafter_f16(f16(x), f16(y))) }
			
 
				+nextafter_f16be :: proc "contextless" (x, y: f16be) -> (r: f16be) { return f16be(nextafter_f16(f16(x), f16(y))) }
			
 
				+nextafter_f32le :: proc "contextless" (x, y: f32le) -> (r: f32le) { return f32le(nextafter_f32(f32(x), f32(y))) }
			
 
				+nextafter_f32be :: proc "contextless" (x, y: f32be) -> (r: f32be) { return f32be(nextafter_f32(f32(x), f32(y))) }
			
 
				+nextafter_f64le :: proc "contextless" (x, y: f64le) -> (r: f64le) { return f64le(nextafter_f64(f64(x), f64(y))) }
			
 
				+nextafter_f64be :: proc "contextless" (x, y: f64be) -> (r: f64be) { return f64be(nextafter_f64(f64(x), f64(y))) }
			
 
				+
			
 
				+nextafter :: proc{
			
 
				+	nextafter_f16, nextafter_f16le, nextafter_f16be,
			
 
				+	nextafter_f32, nextafter_f32le, nextafter_f32be,
			
 
				+	nextafter_f64, nextafter_f64le, nextafter_f64be,
			
 
				 }
			
 
				 
			
 
				-acosh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
			
 
				-	return ln(x + sqrt(x*x - 1))
			
 
				+signbit_f16 :: proc "contextless" (x: f16) -> bool {
			
 
				+	return (transmute(u16)x)&(1<<15) != 0
			
 
				+}
			
 
				+signbit_f32 :: proc "contextless" (x: f32) -> bool {
			
 
				+	return (transmute(u32)x)&(1<<31) != 0
			
 
				+}
			
 
				+signbit_f64 :: proc "contextless" (x: f64) -> bool {
			
 
				+	return (transmute(u64)x)&(1<<63) != 0
			
 
				 }
			
 
				+signbit_f16le :: proc "contextless" (x: f16le) -> bool { return signbit_f16(f16(x)) }
			
 
				+signbit_f32le :: proc "contextless" (x: f32le) -> bool { return signbit_f32(f32(x)) }
			
 
				+signbit_f64le :: proc "contextless" (x: f64le) -> bool { return signbit_f64(f64(x)) }
			
 
				+signbit_f16be :: proc "contextless" (x: f16be) -> bool { return signbit_f16(f16(x)) }
			
 
				+signbit_f32be :: proc "contextless" (x: f32be) -> bool { return signbit_f32(f32(x)) }
			
 
				+signbit_f64be :: proc "contextless" (x: f64be) -> bool { return signbit_f64(f64(x)) }
			
 
				 
			
 
				-atanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
			
 
				-	return 0.5*ln((1+x)/(1-x))
			
 
				+signbit :: proc{
			
 
				+	signbit_f16, signbit_f16le, signbit_f16be,
			
 
				+	signbit_f32, signbit_f32le, signbit_f32be,
			
 
				+	signbit_f64, signbit_f64le, signbit_f64be,
			
 
				 }
			
 
				 
			
 
				+
			
 
				 F16_DIG        :: 3
			
 
				 F16_EPSILON    :: 0.00097656
			
 
				 F16_GUARD      :: 0
			
@@ -1349,3 +1702,16 @@ F64_MIN_10_EXP :: -307                     // min decimal exponent
 
				 F64_MIN_EXP    :: -1021                    // min binary exponent
			
 
				 F64_RADIX      :: 2                        // exponent radix
			
 
				 F64_ROUNDS     :: 1                        // addition rounding: near
			
 
				+
			
 
				+
			
 
				+F16_MASK  :: 0x1f
			
 
				+F16_SHIFT :: 16 - 6
			
 
				+F16_BIAS  :: 0xf
			
 
				+
			
 
				+F32_MASK  :: 0xff
			
 
				+F32_SHIFT :: 32 - 9
			
 
				+F32_BIAS  :: 0x7f
			
 
				+
			
 
				+F64_MASK  :: 0x7ff
			
 
				+F64_SHIFT :: 64 - 12
			
 
				+F64_BIAS  :: 0x3ff
			
--- a/core/math/math_basic.odin
+++ b/core/math/math_basic.odin
@@ -1,15 +1,10 @@
 
				 //+build !js
			
 
				 package math
			
 
				 
			
 
				+import "core:intrinsics"
			
 
				+
			
 
				 @(default_calling_convention="none")
			
 
				 foreign _ {
			
 
				-	@(link_name="llvm.sqrt.f16")
			
 
				-	sqrt_f16 :: proc(x: f16) -> f16 ---
			
 
				-	@(link_name="llvm.sqrt.f32")
			
 
				-	sqrt_f32 :: proc(x: f32) -> f32 ---
			
 
				-	@(link_name="llvm.sqrt.f64")
			
 
				-	sqrt_f64 :: proc(x: f64) -> f64 ---
			
 
				-
			
 
				 	@(link_name="llvm.sin.f16")
			
 
				 	sin_f16 :: proc(θ: f16) -> f16 ---
			
 
				 	@(link_name="llvm.sin.f32")
			
@@ -38,24 +33,137 @@ foreign _ {
 
				 	@(link_name="llvm.fmuladd.f64")
			
 
				 	fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
			
 
				 
			
 
				-	@(link_name="llvm.log.f16")
			
 
				-	ln_f16 :: proc(x: f16) -> f16 ---
			
 
				-	@(link_name="llvm.log.f32")
			
 
				-	ln_f32 :: proc(x: f32) -> f32 ---
			
 
				-	@(link_name="llvm.log.f64")
			
 
				-	ln_f64 :: proc(x: f64) -> f64 ---
			
 
				-
			
 
				 	@(link_name="llvm.exp.f16")
			
 
				 	exp_f16 :: proc(x: f16) -> f16 ---
			
 
				 	@(link_name="llvm.exp.f32")
			
 
				 	exp_f32 :: proc(x: f32) -> f32 ---
			
 
				 	@(link_name="llvm.exp.f64")
			
 
				 	exp_f64 :: proc(x: f64) -> f64 ---
			
 
				+}
			
 
				 
			
 
				-	@(link_name="llvm.ldexp.f16")
			
 
				-	ldexp_f16 :: proc(val: f16, exp: i32) -> f16 ---
			
 
				-	@(link_name="llvm.ldexp.f32")
			
 
				-	ldexp_f32 :: proc(val: f32, exp: i32) -> f32 ---
			
 
				-	@(link_name="llvm.ldexp.f64")
			
 
				-	ldexp_f64 :: proc(val: f64, exp: i32) -> f64 ---
			
 
				+sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
			
 
				+	return intrinsics.sqrt(x)
			
 
				 }
			
 
				+sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
			
 
				+	return intrinsics.sqrt(x)
			
 
				+}
			
 
				+sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	return intrinsics.sqrt(x)
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+ln_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	// The original C code, the long comment, and the constants
			
 
				+	// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c
			
 
				+	// and came with this notice.
			
 
				+	//
			
 
				+	// ====================================================
			
 
				+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+	//
			
 
				+	// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+	// Permission to use, copy, modify, and distribute this
			
 
				+	// software is freely granted, provided that this notice
			
 
				+	// is preserved.
			
 
				+	// ====================================================
			
 
				+	//
			
 
				+	// __ieee754_log(x)
			
 
				+	// Return the logarithm of x
			
 
				+	//
			
 
				+	// Method :
			
 
				+	//   1. Argument Reduction: find k and f such that
			
 
				+	//			x = 2**k * (1+f),
			
 
				+	//	   where  sqrt(2)/2 < 1+f < sqrt(2) .
			
 
				+	//
			
 
				+	//   2. Approximation of log(1+f).
			
 
				+	//	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
			
 
				+	//		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
			
 
				+	//	     	 = 2s + s*R
			
 
				+	//      We use a special Reme algorithm on [0,0.1716] to generate
			
 
				+	//	a polynomial of degree 14 to approximate R.  The maximum error
			
 
				+	//	of this polynomial approximation is bounded by 2**-58.45. In
			
 
				+	//	other words,
			
 
				+	//		        2      4      6      8      10      12      14
			
 
				+	//	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
			
 
				+	//	(the values of L1 to L7 are listed in the program) and
			
 
				+	//	    |      2          14          |     -58.45
			
 
				+	//	    | L1*s +...+L7*s    -  R(z) | <= 2
			
 
				+	//	    |                             |
			
 
				+	//	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
			
 
				+	//	In order to guarantee error in log below 1ulp, we compute log by
			
 
				+	//		log(1+f) = f - s*(f - R)		(if f is not too large)
			
 
				+	//		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
			
 
				+	//
			
 
				+	//	3. Finally,  log(x) = k*Ln2 + log(1+f).
			
 
				+	//			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
			
 
				+	//	   Here Ln2 is split into two floating point number:
			
 
				+	//			Ln2_hi + Ln2_lo,
			
 
				+	//	   where n*Ln2_hi is always exact for |n| < 2000.
			
 
				+	//
			
 
				+	// Special cases:
			
 
				+	//	log(x) is NaN with signal if x < 0 (including -INF) ;
			
 
				+	//	log(+INF) is +INF; log(0) is -INF with signal;
			
 
				+	//	log(NaN) is that NaN with no signal.
			
 
				+	//
			
 
				+	// Accuracy:
			
 
				+	//	according to an error analysis, the error is always less than
			
 
				+	//	1 ulp (unit in the last place).
			
 
				+	//
			
 
				+	// Constants:
			
 
				+	// The hexadecimal values are the intended ones for the following
			
 
				+	// constants. The decimal values may be used, provided that the
			
 
				+	// compiler will convert from decimal to binary accurately enough
			
 
				+	// to produce the hexadecimal values shown.
			
 
				+	
			
 
				+	LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01
			
 
				+	LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10
			
 
				+	L1     :: 0h3fe55555_55555593 // 6.666666666666735130e-01
			
 
				+	L2     :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01
			
 
				+	L3     :: 0h3fd24924_94229359 // 2.857142874366239149e-01
			
 
				+	L4     :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01
			
 
				+	L5     :: 0h3fc74664_96cb03de // 1.818357216161805012e-01
			
 
				+	L6     :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01
			
 
				+	L7     :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01
			
 
				+	
			
 
				+	switch {
			
 
				+	case is_nan(x) || is_inf(x, 1):
			
 
				+		return x
			
 
				+	case x < 0:
			
 
				+		return nan_f64()
			
 
				+	case x == 0:
			
 
				+		return inf_f64(-1)
			
 
				+	}
			
 
				+
			
 
				+	// reduce
			
 
				+	f1, ki := frexp(x)
			
 
				+	if f1 < SQRT_TWO/2 {
			
 
				+		f1 *= 2
			
 
				+		ki -= 1
			
 
				+	}
			
 
				+	f := f1 - 1
			
 
				+	k := f64(ki)
			
 
				+
			
 
				+	// compute
			
 
				+	s := f / (2 + f)
			
 
				+	s2 := s * s
			
 
				+	s4 := s2 * s2
			
 
				+	t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
			
 
				+	t2 := s4 * (L2 + s4*(L4+s4*L6))
			
 
				+	R := t1 + t2
			
 
				+	hfsq := 0.5 * f * f
			
 
				+	return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f)
			
 
				+}
			
 
				+
			
 
				+ln_f16 :: proc "contextless" (x: f16) -> f16 { return #force_inline f16(ln_f64(f64(x))) }
			
 
				+ln_f32 :: proc "contextless" (x: f32) -> f32 { return #force_inline f32(ln_f64(f64(x))) }
			
 
				+ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
			
 
				+ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
			
 
				+ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
			
 
				+ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
			
 
				+ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
			
 
				+ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
			
 
				+ln :: proc{
			
 
				+	ln_f16, ln_f16le, ln_f16be,
			
 
				+	ln_f32, ln_f32le, ln_f32be,
			
 
				+	ln_f64, ln_f64le, ln_f64be,
			
 
				+}
			
--- a/core/math/math_basic_js.odin
+++ b/core/math/math_basic_js.odin
@@ -1,12 +1,12 @@
 
				 //+build js
			
 
				 package math
			
 
				 
			
 
				+import "core:intrinsics"
			
 
				+
			
 
				 foreign import "odin_env"
			
 
				 
			
 
				 @(default_calling_convention="c")
			
 
				 foreign odin_env {
			
 
				-	@(link_name="sqrt")
			
 
				-	sqrt_f64 :: proc(x: f64) -> f64 ---
			
 
				 	@(link_name="sin")
			
 
				 	sin_f64 :: proc(θ: f64) -> f64 ---
			
 
				 	@(link_name="cos")
			
@@ -19,10 +19,11 @@ foreign odin_env {
 
				 	ln_f64 :: proc(x: f64) -> f64 ---
			
 
				 	@(link_name="exp")
			
 
				 	exp_f64 :: proc(x: f64) -> f64 ---
			
 
				-	@(link_name="ldexp")
			
 
				-	ldexp_f64 :: proc(val: f64, exp: i32) -> f64 ---
			
 
				 }
			
 
				 
			
 
				+sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	return intrinsics.sqrt(x)
			
 
				+}
			
 
				 
			
 
				 sqrt_f16    :: proc "c" (x: f16) -> f16             { return f16(sqrt_f64(f64(x)))                    }
			
 
				 sin_f16     :: proc "c" (θ: f16) -> f16             { return f16(sin_f64(f64(θ)))                     }
			
@@ -31,7 +32,6 @@ pow_f16     :: proc "c" (x, power: f16) -> f16      { return f16(pow_f64(f64(x),
 
				 fmuladd_f16 :: proc "c" (a, b, c: f16) -> f16       { return f16(fmuladd_f64(f64(a), f64(a), f64(c))) }
			
 
				 ln_f16      :: proc "c" (x: f16) -> f16             { return f16(ln_f64(f64(x)))                      }
			
 
				 exp_f16     :: proc "c" (x: f16) -> f16             { return f16(exp_f64(f64(x)))                     }
			
 
				-ldexp_f16   :: proc "c" (val: f16, exp: i32) -> f16 { return f16(ldexp_f64(f64(val), exp) )           }
			
 
				 
			
 
				 sqrt_f32    :: proc "c" (x: f32) -> f32             { return f32(sqrt_f64(f64(x)))                    }
			
 
				 sin_f32     :: proc "c" (θ: f32) -> f32             { return f32(sin_f64(f64(θ)))                     }
			
@@ -40,4 +40,15 @@ pow_f32     :: proc "c" (x, power: f32) -> f32      { return f32(pow_f64(f64(x),
 
				 fmuladd_f32 :: proc "c" (a, b, c: f32) -> f32       { return f32(fmuladd_f64(f64(a), f64(a), f64(c))) }
			
 
				 ln_f32      :: proc "c" (x: f32) -> f32             { return f32(ln_f64(f64(x)))                      }
			
 
				 exp_f32     :: proc "c" (x: f32) -> f32             { return f32(exp_f64(f64(x)))                     }
			
 
				-ldexp_f32   :: proc "c" (val: f32, exp: i32) -> f32 { return f32(ldexp_f64(f64(val), exp) )           }
			
 
				+
			
 
				+ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
			
 
				+ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
			
 
				+ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
			
 
				+ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
			
 
				+ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
			
 
				+ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
			
 
				+ln :: proc{
			
 
				+	ln_f16, ln_f16le, ln_f16be,
			
 
				+	ln_f32, ln_f32le, ln_f32be,
			
 
				+	ln_f64, ln_f64le, ln_f64be,
			
 
				+}
			
--- a/core/math/math_erf.odin
+++ b/core/math/math_erf.odin
@@ -0,0 +1,410 @@
 
				+package math
			
 
				+
			
 
				+// The original C code and the long comment below are
			
 
				+// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and
			
 
				+// came with this notice. 
			
 
				+//
			
 
				+// ====================================================
			
 
				+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+//
			
 
				+// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+// Permission to use, copy, modify, and distribute this
			
 
				+// software is freely granted, provided that this notice
			
 
				+// is preserved.
			
 
				+// ====================================================
			
 
				+//
			
 
				+//
			
 
				+// double erf(double x)
			
 
				+// double erfc(double x)
			
 
				+//                           x
			
 
				+//                    2      |\
			
 
				+//     erf(x)  =  ---------  | exp(-t*t)dt
			
 
				+//                 sqrt(pi) \|
			
 
				+//                           0
			
 
				+//
			
 
				+//     erfc(x) =  1-erf(x)
			
 
				+//  Note that
			
 
				+//              erf(-x) = -erf(x)
			
 
				+//              erfc(-x) = 2 - erfc(x)
			
 
				+//
			
 
				+// Method:
			
 
				+//      1. For |x| in [0, 0.84375]
			
 
				+//          erf(x)  = x + x*R(x**2)
			
 
				+//          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
			
 
				+//                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
			
 
				+//         where R = P/Q where P is an odd poly of degree 8 and
			
 
				+//         Q is an odd poly of degree 10.
			
 
				+//                                               -57.90
			
 
				+//                      | R - (erf(x)-x)/x | <= 2
			
 
				+//
			
 
				+//
			
 
				+//         Remark. The formula is derived by noting
			
 
				+//          erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....)
			
 
				+//         and that
			
 
				+//          2/sqrt(pi) = 1.128379167095512573896158903121545171688
			
 
				+//         is close to one. The interval is chosen because the fix
			
 
				+//         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
			
 
				+//         near 0.6174), and by some experiment, 0.84375 is chosen to
			
 
				+//         guarantee the error is less than one ulp for erf.
			
 
				+//
			
 
				+//      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
			
 
				+//         c = 0.84506291151 rounded to single (24 bits)
			
 
				+//              erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
			
 
				+//              erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
			
 
				+//                        1+(c+P1(s)/Q1(s))    if x < 0
			
 
				+//              |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
			
 
				+//         Remark: here we use the taylor series expansion at x=1.
			
 
				+//              erf(1+s) = erf(1) + s*Poly(s)
			
 
				+//                       = 0.845.. + P1(s)/Q1(s)
			
 
				+//         That is, we use rational approximation to approximate
			
 
				+//                      erf(1+s) - (c = (single)0.84506291151)
			
 
				+//         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
			
 
				+//         where
			
 
				+//              P1(s) = degree 6 poly in s
			
 
				+//              Q1(s) = degree 6 poly in s
			
 
				+//
			
 
				+//      3. For x in [1.25,1/0.35(~2.857143)],
			
 
				+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
			
 
				+//              erf(x)  = 1 - erfc(x)
			
 
				+//         where
			
 
				+//              R1(z) = degree 7 poly in z, (z=1/x**2)
			
 
				+//              S1(z) = degree 8 poly in z
			
 
				+//
			
 
				+//      4. For x in [1/0.35,28]
			
 
				+//              erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
			
 
				+//                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
			
 
				+//                      = 2.0 - tiny            (if x <= -6)
			
 
				+//              erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
			
 
				+//              erf(x)  = sign(x)*(1.0 - tiny)
			
 
				+//         where
			
 
				+//              R2(z) = degree 6 poly in z, (z=1/x**2)
			
 
				+//              S2(z) = degree 7 poly in z
			
 
				+//
			
 
				+//      Note1:
			
 
				+//         To compute exp(-x*x-0.5625+R/S), let s be a single
			
 
				+//         precision number and s := x; then
			
 
				+//              -x*x = -s*s + (s-x)*(s+x)
			
 
				+//              exp(-x*x-0.5626+R/S) =
			
 
				+//                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
			
 
				+//      Note2:
			
 
				+//         Here 4 and 5 make use of the asymptotic series
			
 
				+//                        exp(-x*x)
			
 
				+//              erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) )
			
 
				+//                        x*sqrt(pi)
			
 
				+//         We use rational approximation to approximate
			
 
				+//              g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625
			
 
				+//         Here is the error bound for R1/S1 and R2/S2
			
 
				+//              |R1/S1 - f(x)|  < 2**(-62.57)
			
 
				+//              |R2/S2 - f(x)|  < 2**(-61.52)
			
 
				+//
			
 
				+//      5. For inf > x >= 28
			
 
				+//              erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
			
 
				+//              erfc(x) = tiny*tiny (raise underflow) if x > 0
			
 
				+//                      = 2 - tiny if x<0
			
 
				+//
			
 
				+//      7. Special case:
			
 
				+//              erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
			
 
				+//              erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
			
 
				+//              erfc/erf(NaN) is NaN
			
 
				+
			
 
				+erf :: proc{
			
 
				+	erf_f16,
			
 
				+	erf_f16le,
			
 
				+	erf_f16be,
			
 
				+	erf_f32,
			
 
				+	erf_f32le,
			
 
				+	erf_f32be,
			
 
				+	erf_f64,
			
 
				+}
			
 
				+
			
 
				+erf_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(erf_f64(f64(x))) }
			
 
				+erf_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erf_f64(f64(x))) }
			
 
				+erf_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erf_f64(f64(x))) }
			
 
				+erf_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(erf_f64(f64(x))) }
			
 
				+erf_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erf_f64(f64(x))) }
			
 
				+erf_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erf_f64(f64(x))) }
			
 
				+
			
 
				+erf_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	erx :: 0h3FEB0AC160000000
			
 
				+	// Coefficients for approximation to  erf in [0, 0.84375]
			
 
				+	efx  :: 0h3FC06EBA8214DB69
			
 
				+	efx8 :: 0h3FF06EBA8214DB69
			
 
				+	pp0  :: 0h3FC06EBA8214DB68
			
 
				+	pp1  :: 0hBFD4CD7D691CB913
			
 
				+	pp2  :: 0hBF9D2A51DBD7194F
			
 
				+	pp3  :: 0hBF77A291236668E4
			
 
				+	pp4  :: 0hBEF8EAD6120016AC
			
 
				+	qq1  :: 0h3FD97779CDDADC09
			
 
				+	qq2  :: 0h3FB0A54C5536CEBA
			
 
				+	qq3  :: 0h3F74D022C4D36B0F
			
 
				+	qq4  :: 0h3F215DC9221C1A10
			
 
				+	qq5  :: 0hBED09C4342A26120
			
 
				+	// Coefficients for approximation to  erf  in [0.84375, 1.25]
			
 
				+	pa0 :: 0hBF6359B8BEF77538
			
 
				+	pa1 :: 0h3FDA8D00AD92B34D
			
 
				+	pa2 :: 0hBFD7D240FBB8C3F1
			
 
				+	pa3 :: 0h3FD45FCA805120E4
			
 
				+	pa4 :: 0hBFBC63983D3E28EC
			
 
				+	pa5 :: 0h3FA22A36599795EB
			
 
				+	pa6 :: 0hBF61BF380A96073F
			
 
				+	qa1 :: 0h3FBB3E6618EEE323
			
 
				+	qa2 :: 0h3FE14AF092EB6F33
			
 
				+	qa3 :: 0h3FB2635CD99FE9A7
			
 
				+	qa4 :: 0h3FC02660E763351F
			
 
				+	qa5 :: 0h3F8BEDC26B51DD1C
			
 
				+	qa6 :: 0h3F888B545735151D
			
 
				+	// Coefficients for approximation to  erfc in [1.25, 1/0.35]
			
 
				+	ra0 :: 0hBF843412600D6435
			
 
				+	ra1 :: 0hBFE63416E4BA7360
			
 
				+	ra2 :: 0hC0251E0441B0E726
			
 
				+	ra3 :: 0hC04F300AE4CBA38D
			
 
				+	ra4 :: 0hC0644CB184282266
			
 
				+	ra5 :: 0hC067135CEBCCABB2
			
 
				+	ra6 :: 0hC054526557E4D2F2
			
 
				+	ra7 :: 0hC023A0EFC69AC25C
			
 
				+	sa1 :: 0h4033A6B9BD707687
			
 
				+	sa2 :: 0h4061350C526AE721
			
 
				+	sa3 :: 0h407B290DD58A1A71
			
 
				+	sa4 :: 0h40842B1921EC2868
			
 
				+	sa5 :: 0h407AD02157700314
			
 
				+	sa6 :: 0h405B28A3EE48AE2C
			
 
				+	sa7 :: 0h401A47EF8E484A93
			
 
				+	sa8 :: 0hBFAEEFF2EE749A62
			
 
				+	// Coefficients for approximation to  erfc in [1/.35, 28]
			
 
				+	rb0 :: 0hBF84341239E86F4A
			
 
				+	rb1 :: 0hBFE993BA70C285DE
			
 
				+	rb2 :: 0hC031C209555F995A
			
 
				+	rb3 :: 0hC064145D43C5ED98
			
 
				+	rb4 :: 0hC083EC881375F228
			
 
				+	rb5 :: 0hC09004616A2E5992
			
 
				+	rb6 :: 0hC07E384E9BDC383F
			
 
				+	sb1 :: 0h403E568B261D5190
			
 
				+	sb2 :: 0h40745CAE221B9F0A
			
 
				+	sb3 :: 0h409802EB189D5118
			
 
				+	sb4 :: 0h40A8FFB7688C246A
			
 
				+	sb5 :: 0h40A3F219CEDF3BE6
			
 
				+	sb6 :: 0h407DA874E79FE763
			
 
				+	sb7 :: 0hC03670E242712D62
			
 
				+	
			
 
				+	
			
 
				+	VERY_TINY :: 0h0080000000000000
			
 
				+	SMALL     :: 1.0 / (1 << 28)        // 2**-28
			
 
				+
			
 
				+	// special cases
			
 
				+	switch {
			
 
				+	case is_nan(x):
			
 
				+		return nan_f64()
			
 
				+	case is_inf(x, 1):
			
 
				+		return 1
			
 
				+	case is_inf(x, -1):
			
 
				+		return -1
			
 
				+	}
			
 
				+	x := x
			
 
				+	sign := false
			
 
				+	if x < 0 {
			
 
				+		x = -x
			
 
				+		sign = true
			
 
				+	}
			
 
				+	if x < 0.84375 { // |x| < 0.84375
			
 
				+		temp: f64
			
 
				+		if x < SMALL { // |x| < 2**-28
			
 
				+			if x < VERY_TINY {
			
 
				+				temp = 0.125 * (8.0*x + efx8*x) // avoid underflow
			
 
				+			} else {
			
 
				+				temp = x + efx*x
			
 
				+			}
			
 
				+		} else {
			
 
				+			z := x * x
			
 
				+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
			
 
				+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
			
 
				+			y := r / s
			
 
				+			temp = x + x*y
			
 
				+		}
			
 
				+		if sign {
			
 
				+			return -temp
			
 
				+		}
			
 
				+		return temp
			
 
				+	}
			
 
				+	if x < 1.25 { // 0.84375 <= |x| < 1.25
			
 
				+		s := x - 1
			
 
				+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
			
 
				+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
			
 
				+		if sign {
			
 
				+			return -erx - P/Q
			
 
				+		}
			
 
				+		return erx + P/Q
			
 
				+	}
			
 
				+	if x >= 6 { // inf > |x| >= 6
			
 
				+		if sign {
			
 
				+			return -1
			
 
				+		}
			
 
				+		return 1
			
 
				+	}
			
 
				+	s := 1 / (x * x)
			
 
				+	R, S: f64
			
 
				+	if x < 1/0.35 { // |x| < 1 / 0.35  ~ 2.857143
			
 
				+		R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
			
 
				+		S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
			
 
				+	} else { // |x| >= 1 / 0.35  ~ 2.857143
			
 
				+		R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
			
 
				+		S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
			
 
				+	}
			
 
				+	z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
			
 
				+	r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
			
 
				+	if sign {
			
 
				+		return r/x - 1
			
 
				+	}
			
 
				+	return 1 - r/x
			
 
				+}
			
 
				+
			
 
				+
			
 
				+erfc :: proc{
			
 
				+	erfc_f16,
			
 
				+	erfc_f16le,
			
 
				+	erfc_f16be,
			
 
				+	erfc_f32,
			
 
				+	erfc_f32le,
			
 
				+	erfc_f32be,
			
 
				+	erfc_f64,
			
 
				+}
			
 
				+
			
 
				+erfc_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(erfc_f64(f64(x))) }
			
 
				+erfc_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erfc_f64(f64(x))) }
			
 
				+erfc_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erfc_f64(f64(x))) }
			
 
				+erfc_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(erfc_f64(f64(x))) }
			
 
				+erfc_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erfc_f64(f64(x))) }
			
 
				+erfc_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erfc_f64(f64(x))) }
			
 
				+
			
 
				+erfc_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	erx :: 0h3FEB0AC160000000
			
 
				+	// Coefficients for approximation to  erf in [0, 0.84375]
			
 
				+	efx  :: 0h3FC06EBA8214DB69
			
 
				+	efx8 :: 0h3FF06EBA8214DB69
			
 
				+	pp0  :: 0h3FC06EBA8214DB68
			
 
				+	pp1  :: 0hBFD4CD7D691CB913
			
 
				+	pp2  :: 0hBF9D2A51DBD7194F
			
 
				+	pp3  :: 0hBF77A291236668E4
			
 
				+	pp4  :: 0hBEF8EAD6120016AC
			
 
				+	qq1  :: 0h3FD97779CDDADC09
			
 
				+	qq2  :: 0h3FB0A54C5536CEBA
			
 
				+	qq3  :: 0h3F74D022C4D36B0F
			
 
				+	qq4  :: 0h3F215DC9221C1A10
			
 
				+	qq5  :: 0hBED09C4342A26120
			
 
				+	// Coefficients for approximation to  erf  in [0.84375, 1.25]
			
 
				+	pa0 :: 0hBF6359B8BEF77538
			
 
				+	pa1 :: 0h3FDA8D00AD92B34D
			
 
				+	pa2 :: 0hBFD7D240FBB8C3F1
			
 
				+	pa3 :: 0h3FD45FCA805120E4
			
 
				+	pa4 :: 0hBFBC63983D3E28EC
			
 
				+	pa5 :: 0h3FA22A36599795EB
			
 
				+	pa6 :: 0hBF61BF380A96073F
			
 
				+	qa1 :: 0h3FBB3E6618EEE323
			
 
				+	qa2 :: 0h3FE14AF092EB6F33
			
 
				+	qa3 :: 0h3FB2635CD99FE9A7
			
 
				+	qa4 :: 0h3FC02660E763351F
			
 
				+	qa5 :: 0h3F8BEDC26B51DD1C
			
 
				+	qa6 :: 0h3F888B545735151D
			
 
				+	// Coefficients for approximation to  erfc in [1.25, 1/0.35]
			
 
				+	ra0 :: 0hBF843412600D6435
			
 
				+	ra1 :: 0hBFE63416E4BA7360
			
 
				+	ra2 :: 0hC0251E0441B0E726
			
 
				+	ra3 :: 0hC04F300AE4CBA38D
			
 
				+	ra4 :: 0hC0644CB184282266
			
 
				+	ra5 :: 0hC067135CEBCCABB2
			
 
				+	ra6 :: 0hC054526557E4D2F2
			
 
				+	ra7 :: 0hC023A0EFC69AC25C
			
 
				+	sa1 :: 0h4033A6B9BD707687
			
 
				+	sa2 :: 0h4061350C526AE721
			
 
				+	sa3 :: 0h407B290DD58A1A71
			
 
				+	sa4 :: 0h40842B1921EC2868
			
 
				+	sa5 :: 0h407AD02157700314
			
 
				+	sa6 :: 0h405B28A3EE48AE2C
			
 
				+	sa7 :: 0h401A47EF8E484A93
			
 
				+	sa8 :: 0hBFAEEFF2EE749A62
			
 
				+	// Coefficients for approximation to  erfc in [1/.35, 28]
			
 
				+	rb0 :: 0hBF84341239E86F4A
			
 
				+	rb1 :: 0hBFE993BA70C285DE
			
 
				+	rb2 :: 0hC031C209555F995A
			
 
				+	rb3 :: 0hC064145D43C5ED98
			
 
				+	rb4 :: 0hC083EC881375F228
			
 
				+	rb5 :: 0hC09004616A2E5992
			
 
				+	rb6 :: 0hC07E384E9BDC383F
			
 
				+	sb1 :: 0h403E568B261D5190
			
 
				+	sb2 :: 0h40745CAE221B9F0A
			
 
				+	sb3 :: 0h409802EB189D5118
			
 
				+	sb4 :: 0h40A8FFB7688C246A
			
 
				+	sb5 :: 0h40A3F219CEDF3BE6
			
 
				+	sb6 :: 0h407DA874E79FE763
			
 
				+	sb7 :: 0hC03670E242712D62
			
 
				+	
			
 
				+	TINY :: 1.0 / (1 << 56) // 2**-56
			
 
				+	// special cases
			
 
				+	switch {
			
 
				+	case is_nan(x):
			
 
				+		return nan_f64()
			
 
				+	case is_inf(x, 1):
			
 
				+		return 0
			
 
				+	case is_inf(x, -1):
			
 
				+		return 2
			
 
				+	}
			
 
				+	x := x
			
 
				+	sign := false
			
 
				+	if x < 0 {
			
 
				+		x = -x
			
 
				+		sign = true
			
 
				+	}
			
 
				+	if x < 0.84375 { // |x| < 0.84375
			
 
				+		temp: f64
			
 
				+		if x < TINY { // |x| < 2**-56
			
 
				+			temp = x
			
 
				+		} else {
			
 
				+			z := x * x
			
 
				+			r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
			
 
				+			s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
			
 
				+			y := r / s
			
 
				+			if x < 0.25 { // |x| < 1/4
			
 
				+				temp = x + x*y
			
 
				+			} else {
			
 
				+				temp = 0.5 + (x*y + (x - 0.5))
			
 
				+			}
			
 
				+		}
			
 
				+		if sign {
			
 
				+			return 1 + temp
			
 
				+		}
			
 
				+		return 1 - temp
			
 
				+	}
			
 
				+	if x < 1.25 { // 0.84375 <= |x| < 1.25
			
 
				+		s := x - 1
			
 
				+		P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
			
 
				+		Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
			
 
				+		if sign {
			
 
				+			return 1 + erx + P/Q
			
 
				+		}
			
 
				+		return 1 - erx - P/Q
			
 
				+
			
 
				+	}
			
 
				+	if x < 28 { // |x| < 28
			
 
				+		s := 1 / (x * x)
			
 
				+		R, S: f64
			
 
				+		if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
			
 
				+			R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
			
 
				+			S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
			
 
				+		} else { // |x| >= 1 / 0.35 ~ 2.857143
			
 
				+			if sign && x > 6 {
			
 
				+				return 2 // x < -6
			
 
				+			}
			
 
				+			R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
			
 
				+			S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
			
 
				+		}
			
 
				+		z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
			
 
				+		r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
			
 
				+		if sign {
			
 
				+			return 2 - r/x
			
 
				+		}
			
 
				+		return r / x
			
 
				+	}
			
 
				+	if sign {
			
 
				+		return 2
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
--- a/core/math/math_gamma.odin
+++ b/core/math/math_gamma.odin
@@ -0,0 +1,226 @@
 
				+package math
			
 
				+
			
 
				+// The original C code, the long comment, and the constants
			
 
				+// below are from http://netlib.sandia.gov/cephes/cprob/gamma.c.
			
 
				+//
			
 
				+//      tgamma.c
			
 
				+//
			
 
				+//      Gamma function
			
 
				+//
			
 
				+// SYNOPSIS:
			
 
				+//
			
 
				+// double x, y, tgamma();
			
 
				+// extern int signgam;
			
 
				+//
			
 
				+// y = tgamma( x );
			
 
				+//
			
 
				+// DESCRIPTION:
			
 
				+//
			
 
				+// Returns gamma function of the argument. The result is
			
 
				+// correctly signed, and the sign (+1 or -1) is also
			
 
				+// returned in a global (extern) variable named signgam.
			
 
				+// This variable is also filled in by the logarithmic gamma
			
 
				+// function lgamma().
			
 
				+//
			
 
				+// Arguments |x| <= 34 are reduced by recurrence and the function
			
 
				+// approximated by a rational function of degree 6/7 in the
			
 
				+// interval (2,3).  Large arguments are handled by Stirling's
			
 
				+// formula. Large negative arguments are made positive using
			
 
				+// a reflection formula.
			
 
				+//
			
 
				+// ACCURACY:
			
 
				+//
			
 
				+//                      Relative error:
			
 
				+// arithmetic   domain     # trials      peak         rms
			
 
				+//    DEC      -34, 34      10000       1.3e-16     2.5e-17
			
 
				+//    IEEE    -170,-33      20000       2.3e-15     3.3e-16
			
 
				+//    IEEE     -33,  33     20000       9.4e-16     2.2e-16
			
 
				+//    IEEE      33, 171.6   20000       2.3e-15     3.2e-16
			
 
				+//
			
 
				+// Error for arguments outside the test range will be larger
			
 
				+// owing to error amplification by the exponential function.
			
 
				+//
			
 
				+// Cephes Math Library Release 2.8:  June, 2000
			
 
				+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
			
 
				+//
			
 
				+// The readme file at http://netlib.sandia.gov/cephes/ says:
			
 
				+//    Some software in this archive may be from the book _Methods and
			
 
				+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
			
 
				+// International, 1989) or from the Cephes Mathematical Library, a
			
 
				+// commercial product. In either event, it is copyrighted by the author.
			
 
				+// What you see here may be used freely but it comes with no support or
			
 
				+// guarantee.
			
 
				+//
			
 
				+//   The two known misprints in the book are repaired here in the
			
 
				+// source listings for the gamma function and the incomplete beta
			
 
				+// integral.
			
 
				+//
			
 
				+//   Stephen L. Moshier
			
 
				+//   [email protected]
			
 
				+
			
 
				+// Gamma function computed by Stirling's formula.
			
 
				+// The pair of results must be multiplied together to get the actual answer.
			
 
				+// The multiplication is left to the caller so that, if careful, the caller can avoid
			
 
				+// infinity for 172 <= x <= 180.
			
 
				+// The polynomial is valid for 33 <= x <= 172; larger values are only used
			
 
				+// in reciprocal and produce denormalized floats. The lower precision there
			
 
				+// masks any imprecision in the polynomial.
			
 
				+@(private="file")
			
 
				+stirling :: proc "contextless" (x: f64) -> (f64, f64) {
			
 
				+	@(static) gamS := [?]f64{
			
 
				+		+7.87311395793093628397e-04,
			
 
				+		-2.29549961613378126380e-04,
			
 
				+		-2.68132617805781232825e-03,
			
 
				+		+3.47222221605458667310e-03,
			
 
				+		+8.33333333333482257126e-02,
			
 
				+	}
			
 
				+	
			
 
				+	if x > 200 {
			
 
				+		return inf_f64(1), 1
			
 
				+	}
			
 
				+	SQRT_TWO_PI :: 0h40040d931ff62706 // 2.506628274631000502417
			
 
				+	MAX_STIRLING :: 143.01608
			
 
				+	w := 1 / x
			
 
				+	w = 1 + w*((((gamS[0]*w+gamS[1])*w+gamS[2])*w+gamS[3])*w+gamS[4])
			
 
				+	y1 := exp(x)
			
 
				+	y2 := 1.0
			
 
				+	if x > MAX_STIRLING { // avoid pow() overflow
			
 
				+		v := pow(x, 0.5*x-0.25)
			
 
				+		y1, y2 = v, v/y1
			
 
				+	} else {
			
 
				+		y1 = pow(x, x-0.5) / y1
			
 
				+	}
			
 
				+	return y1, SQRT_TWO_PI * w * y2
			
 
				+}
			
 
				+
			
 
				+gamma_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	is_neg_int :: proc "contextless" (x: f64) -> bool {
			
 
				+		if x < 0 {
			
 
				+			_, xf := modf(x)
			
 
				+			return xf == 0
			
 
				+		}
			
 
				+		return false
			
 
				+	}
			
 
				+	
			
 
				+	@(static) gamP := [?]f64{
			
 
				+		1.60119522476751861407e-04,
			
 
				+		1.19135147006586384913e-03,
			
 
				+		1.04213797561761569935e-02,
			
 
				+		4.76367800457137231464e-02,
			
 
				+		2.07448227648435975150e-01,
			
 
				+		4.94214826801497100753e-01,
			
 
				+		9.99999999999999996796e-01,
			
 
				+	}
			
 
				+	@(static) gamQ := [?]f64{
			
 
				+		-2.31581873324120129819e-05,
			
 
				+		+5.39605580493303397842e-04,
			
 
				+		-4.45641913851797240494e-03,
			
 
				+		+1.18139785222060435552e-02,
			
 
				+		+3.58236398605498653373e-02,
			
 
				+		-2.34591795718243348568e-01,
			
 
				+		+7.14304917030273074085e-02,
			
 
				+		+1.00000000000000000320e+00,
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	EULER :: 0.57721566490153286060651209008240243104215933593992 // A001620
			
 
				+	
			
 
				+	switch {
			
 
				+	case is_neg_int(x) || is_inf(x, -1) || is_nan(x):
			
 
				+		return nan_f64()
			
 
				+	case is_inf(x, 1):
			
 
				+		return inf_f64(1)
			
 
				+	case x == 0:
			
 
				+		if signbit(x) {
			
 
				+			return inf_f64(-1)
			
 
				+		}
			
 
				+		return inf_f64(1)
			
 
				+	}
			
 
				+	
			
 
				+	x := x
			
 
				+	q := abs(x)
			
 
				+	p := floor(q)
			
 
				+	if q > 33 {
			
 
				+		if x >= 0 {
			
 
				+			y1, y2 := stirling(x)
			
 
				+			return y1 * y2
			
 
				+		}
			
 
				+		// Note: x is negative but (checked above) not a negative integer,
			
 
				+		// so x must be small enough to be in range for conversion to i64.
			
 
				+		// If |x| were >= 2⁶³ it would have to be an integer.
			
 
				+		signgam := 1
			
 
				+		if ip := i64(p); ip&1 == 0 {
			
 
				+			signgam = -1
			
 
				+		}
			
 
				+		z := q - p
			
 
				+		if z > 0.5 {
			
 
				+			p = p + 1
			
 
				+			z = q - p
			
 
				+		}
			
 
				+		z = q * sin(PI*z)
			
 
				+		if z == 0 {
			
 
				+			return inf_f64(signgam)
			
 
				+		}
			
 
				+		sq1, sq2 := stirling(q)
			
 
				+		absz := abs(z)
			
 
				+		d := absz * sq1 * sq2
			
 
				+		if is_inf(d, 0) {
			
 
				+			z = PI / absz / sq1 / sq2
			
 
				+		} else {
			
 
				+			z = PI / d
			
 
				+		}
			
 
				+		return f64(signgam) * z
			
 
				+	}
			
 
				+
			
 
				+	// Reduce argument
			
 
				+	z := 1.0
			
 
				+	for x >= 3 {
			
 
				+		x = x - 1
			
 
				+		z = z * x
			
 
				+	}
			
 
				+	for x < 0 {
			
 
				+		if x > -1e-09 {
			
 
				+			if x == 0 {
			
 
				+				return inf_f64(1)
			
 
				+			}
			
 
				+			return z / ((1 + EULER*x) * x)
			
 
				+		}
			
 
				+		z = z / x
			
 
				+		x = x + 1
			
 
				+	}
			
 
				+	for x < 2 {
			
 
				+		if x < 1e-09 {
			
 
				+			if x == 0 {
			
 
				+				return inf_f64(1)
			
 
				+			}
			
 
				+			return z / ((1 + EULER*x) * x)
			
 
				+		}
			
 
				+		z = z / x
			
 
				+		x = x + 1
			
 
				+	}
			
 
				+
			
 
				+	if x == 2 {
			
 
				+		return z
			
 
				+	}
			
 
				+
			
 
				+	x = x - 2
			
 
				+	p = (((((x*gamP[0]+gamP[1])*x+gamP[2])*x+gamP[3])*x+gamP[4])*x+gamP[5])*x + gamP[6]
			
 
				+	q = ((((((x*gamQ[0]+gamQ[1])*x+gamQ[2])*x+gamQ[3])*x+gamQ[4])*x+gamQ[5])*x+gamQ[6])*x + gamQ[7]
			
 
				+	return z * p / q
			
 
				+}
			
 
				+
			
 
				+
			
 
				+gamma_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(gamma_f64(f64(x))) }
			
 
				+gamma_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(gamma_f64(f64(x))) }
			
 
				+gamma_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(gamma_f64(f64(x))) }
			
 
				+gamma_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(gamma_f64(f64(x))) }
			
 
				+gamma_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(gamma_f64(f64(x))) }
			
 
				+gamma_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(gamma_f64(f64(x))) }
			
 
				+gamma_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(gamma_f64(f64(x))) }
			
 
				+gamma_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(gamma_f64(f64(x))) }
			
 
				+
			
 
				+gamma :: proc{
			
 
				+	gamma_f16, gamma_f16le, gamma_f16be,
			
 
				+	gamma_f32, gamma_f32le, gamma_f32be,
			
 
				+	gamma_f64, gamma_f64le, gamma_f64be,
			
 
				+}
			
--- a/core/math/math_lgamma.odin
+++ b/core/math/math_lgamma.odin
@@ -0,0 +1,361 @@
 
				+package math
			
 
				+
			
 
				+// The original C code and the long comment below are
			
 
				+// from FreeBSD's /usr/src/lib/msun/src/e_lgamma_r.c and
			
 
				+// came with this notice. 
			
 
				+//
			
 
				+// ====================================================
			
 
				+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+//
			
 
				+// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+// Permission to use, copy, modify, and distribute this
			
 
				+// software is freely granted, provided that this notice
			
 
				+// is preserved.
			
 
				+// ====================================================
			
 
				+//
			
 
				+// __ieee754_lgamma_r(x, signgamp)
			
 
				+// Reentrant version of the logarithm of the Gamma function
			
 
				+// with user provided pointer for the sign of Gamma(x).
			
 
				+//
			
 
				+// Method:
			
 
				+//   1. Argument Reduction for 0 < x <= 8
			
 
				+//      Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
			
 
				+//      reduce x to a number in [1.5,2.5] by
			
 
				+//              lgamma(1+s) = log(s) + lgamma(s)
			
 
				+//      for example,
			
 
				+//              lgamma(7.3) = log(6.3) + lgamma(6.3)
			
 
				+//                          = log(6.3*5.3) + lgamma(5.3)
			
 
				+//                          = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
			
 
				+//   2. Polynomial approximation of lgamma around its
			
 
				+//      minimum (ymin=1.461632144968362245) to maintain monotonicity.
			
 
				+//      On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
			
 
				+//              Let z = x-ymin;
			
 
				+//              lgamma(x) = -1.214862905358496078218 + z**2*poly(z)
			
 
				+//              poly(z) is a 14 degree polynomial.
			
 
				+//   2. Rational approximation in the primary interval [2,3]
			
 
				+//      We use the following approximation:
			
 
				+//              s = x-2.0;
			
 
				+//              lgamma(x) = 0.5*s + s*P(s)/Q(s)
			
 
				+//      with accuracy
			
 
				+//              |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
			
 
				+//      Our algorithms are based on the following observation
			
 
				+//
			
 
				+//                             zeta(2)-1    2    zeta(3)-1    3
			
 
				+// lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
			
 
				+//                                 2                 3
			
 
				+//
			
 
				+//      where Euler = 0.5772156649... is the Euler constant, which
			
 
				+//      is very close to 0.5.
			
 
				+//
			
 
				+//   3. For x>=8, we have
			
 
				+//      lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
			
 
				+//      (better formula:
			
 
				+//         lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
			
 
				+//      Let z = 1/x, then we approximation
			
 
				+//              f(z) = lgamma(x) - (x-0.5)(log(x)-1)
			
 
				+//      by
			
 
				+//                                  3       5             11
			
 
				+//              w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
			
 
				+//      where
			
 
				+//              |w - f(z)| < 2**-58.74
			
 
				+//
			
 
				+//   4. For negative x, since (G is gamma function)
			
 
				+//              -x*G(-x)*G(x) = pi/sin(pi*x),
			
 
				+//      we have
			
 
				+//              G(x) = pi/(sin(pi*x)*(-x)*G(-x))
			
 
				+//      since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
			
 
				+//      Hence, for x<0, signgam = sign(sin(pi*x)) and
			
 
				+//              lgamma(x) = log(|Gamma(x)|)
			
 
				+//                        = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
			
 
				+//      Note: one should avoid computing pi*(-x) directly in the
			
 
				+//            computation of sin(pi*(-x)).
			
 
				+//
			
 
				+//   5. Special Cases
			
 
				+//              lgamma(2+s) ~ s*(1-Euler) for tiny s
			
 
				+//              lgamma(1)=lgamma(2)=0
			
 
				+//              lgamma(x) ~ -log(x) for tiny x
			
 
				+//              lgamma(0) = lgamma(inf) = inf
			
 
				+//              lgamma(-integer) = +-inf
			
 
				+//
			
 
				+//
			
 
				+
			
 
				+
			
 
				+lgamma_f64 :: proc "contextless" (x: f64) -> (lgamma: f64, sign: int) {
			
 
				+	sin_pi :: proc "contextless" (x: f64) -> f64 {
			
 
				+		if x < 0.25 {
			
 
				+			return -sin(PI * x)
			
 
				+		}
			
 
				+		x := x
			
 
				+
			
 
				+		// argument reduction
			
 
				+		z := floor(x)
			
 
				+		n: int
			
 
				+		if z != x { // inexact
			
 
				+			x = mod(x, 2)
			
 
				+			n = int(x * 4)
			
 
				+		} else {
			
 
				+			if x >= TWO_53 { // x must be even
			
 
				+				x = 0
			
 
				+				n = 0
			
 
				+			} else {
			
 
				+				if x < TWO_52 {
			
 
				+					z = x + TWO_52 // exact
			
 
				+				}
			
 
				+				n = int(1 & transmute(u64)z)
			
 
				+				x = f64(n)
			
 
				+				n <<= 2
			
 
				+			}
			
 
				+		}
			
 
				+		switch n {
			
 
				+		case 0:
			
 
				+			x = sin(PI * x)
			
 
				+		case 1, 2:
			
 
				+			x = cos(PI * (0.5 - x))
			
 
				+		case 3, 4:
			
 
				+			x = sin(PI * (1 - x))
			
 
				+		case 5, 6:
			
 
				+			x = -cos(PI * (x - 1.5))
			
 
				+		case:
			
 
				+			x = sin(PI * (x - 2))
			
 
				+		}
			
 
				+		return -x
			
 
				+	}
			
 
				+	
			
 
				+	@static lgamA := [?]f64{
			
 
				+		0h3FB3C467E37DB0C8,
			
 
				+		0h3FD4A34CC4A60FAD,
			
 
				+		0h3FB13E001A5562A7,
			
 
				+		0h3F951322AC92547B,
			
 
				+		0h3F7E404FB68FEFE8,
			
 
				+		0h3F67ADD8CCB7926B,
			
 
				+		0h3F538A94116F3F5D,
			
 
				+		0h3F40B6C689B99C00,
			
 
				+		0h3F2CF2ECED10E54D,
			
 
				+		0h3F1C5088987DFB07,
			
 
				+		0h3EFA7074428CFA52,
			
 
				+		0h3F07858E90A45837,
			
 
				+	}
			
 
				+	@static lgamR := [?]f64{
			
 
				+		1.0,
			
 
				+		0h3FF645A762C4AB74,
			
 
				+		0h3FE71A1893D3DCDC,
			
 
				+		0h3FC601EDCCFBDF27,
			
 
				+		0h3F9317EA742ED475,
			
 
				+		0h3F497DDACA41A95B,
			
 
				+		0h3EDEBAF7A5B38140,
			
 
				+	}
			
 
				+	@static lgamS := [?]f64{
			
 
				+		0hBFB3C467E37DB0C8,
			
 
				+		0h3FCB848B36E20878,
			
 
				+		0h3FD4D98F4F139F59,
			
 
				+		0h3FC2BB9CBEE5F2F7,
			
 
				+		0h3F9B481C7E939961,
			
 
				+		0h3F5E26B67368F239,
			
 
				+		0h3F00BFECDD17E945,
			
 
				+	}
			
 
				+	@static lgamT := [?]f64{
			
 
				+		0h3FDEF72BC8EE38A2,
			
 
				+		0hBFC2E4278DC6C509,
			
 
				+		0h3FB08B4294D5419B,
			
 
				+		0hBFA0C9A8DF35B713,
			
 
				+		0h3F9266E7970AF9EC,
			
 
				+		0hBF851F9FBA91EC6A,
			
 
				+		0h3F78FCE0E370E344,
			
 
				+		0hBF6E2EFFB3E914D7,
			
 
				+		0h3F6282D32E15C915,
			
 
				+		0hBF56FE8EBF2D1AF1,
			
 
				+		0h3F4CDF0CEF61A8E9,
			
 
				+		0hBF41A6109C73E0EC,
			
 
				+		0h3F34AF6D6C0EBBF7,
			
 
				+		0hBF347F24ECC38C38,
			
 
				+		0h3F35FD3EE8C2D3F4,
			
 
				+	}
			
 
				+	@static lgamU := [?]f64{
			
 
				+		0hBFB3C467E37DB0C8,
			
 
				+		0h3FE4401E8B005DFF,
			
 
				+		0h3FF7475CD119BD6F,
			
 
				+		0h3FEF497644EA8450,
			
 
				+		0h3FCD4EAEF6010924,
			
 
				+		0h3F8B678BBF2BAB09,
			
 
				+	}
			
 
				+	@static lgamV := [?]f64{
			
 
				+		1.0,
			
 
				+		0h4003A5D7C2BD619C,
			
 
				+		0h40010725A42B18F5,
			
 
				+		0h3FE89DFBE45050AF,
			
 
				+		0h3FBAAE55D6537C88,
			
 
				+		0h3F6A5ABB57D0CF61,
			
 
				+	}
			
 
				+	@static lgamW := [?]f64{
			
 
				+		0h3FDACFE390C97D69,
			
 
				+		0h3FB555555555553B,
			
 
				+		0hBF66C16C16B02E5C,
			
 
				+		0h3F4A019F98CF38B6,
			
 
				+		0hBF4380CB8C0FE741,
			
 
				+		0h3F4B67BA4CDAD5D1,
			
 
				+		0hBF5AB89D0B9E43E4,
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	Y_MIN  :: 0h3ff762d86356be3f // 1.461632144968362245
			
 
				+	TWO_52 :: 0h4330000000000000 // ~4.5036e+15
			
 
				+	TWO_53 :: 0h4340000000000000 // ~9.0072e+15
			
 
				+	TWO_58 :: 0h4390000000000000 // ~2.8823e+17
			
 
				+	TINY   :: 0h3b90000000000000 // ~8.47033e-22
			
 
				+	Tc     :: 0h3FF762D86356BE3F
			
 
				+	Tf     :: 0hBFBF19B9BCC38A42
			
 
				+	Tt     :: 0hBC50C7CAA48A971F
			
 
				+	
			
 
				+	// special cases
			
 
				+	sign = 1
			
 
				+	switch {
			
 
				+	case is_nan(x):
			
 
				+		lgamma = x
			
 
				+		return
			
 
				+	case is_inf(x):
			
 
				+		lgamma = x
			
 
				+		return
			
 
				+	case x == 0:
			
 
				+		lgamma = inf_f64(1)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	x := x
			
 
				+	neg := false
			
 
				+	if x < 0 {
			
 
				+		x = -x
			
 
				+		neg = true
			
 
				+	}
			
 
				+
			
 
				+	if x < TINY { // if |x| < 2**-70, return -log(|x|)
			
 
				+		if neg {
			
 
				+			sign = -1
			
 
				+		}
			
 
				+		lgamma = -ln(x)
			
 
				+		return
			
 
				+	}
			
 
				+	nadj: f64
			
 
				+	if neg {
			
 
				+		if x >= TWO_52 { // |x| >= 2**52, must be -integer
			
 
				+			lgamma = inf_f64(1)
			
 
				+			return
			
 
				+		}
			
 
				+		t := sin_pi(x)
			
 
				+		if t == 0 {
			
 
				+			lgamma = inf_f64(1) // -integer
			
 
				+			return
			
 
				+		}
			
 
				+		nadj = ln(PI / abs(t*x))
			
 
				+		if t < 0 {
			
 
				+			sign = -1
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	switch {
			
 
				+	case x == 1 || x == 2: // purge off 1 and 2
			
 
				+		lgamma = 0
			
 
				+		return
			
 
				+	case x < 2: // use lgamma(x) = lgamma(x+1) - log(x)
			
 
				+		y: f64
			
 
				+		i: int
			
 
				+		if x <= 0.9 {
			
 
				+			lgamma = -ln(x)
			
 
				+			switch {
			
 
				+			case x >= (Y_MIN - 1 + 0.27): // 0.7316 <= x <=  0.9
			
 
				+				y = 1 - x
			
 
				+				i = 0
			
 
				+			case x >= (Y_MIN - 1 - 0.27): // 0.2316 <= x < 0.7316
			
 
				+				y = x - (Tc - 1)
			
 
				+				i = 1
			
 
				+			case: // 0 < x < 0.2316
			
 
				+				y = x
			
 
				+				i = 2
			
 
				+			}
			
 
				+		} else {
			
 
				+			lgamma = 0
			
 
				+			switch {
			
 
				+			case x >= (Y_MIN + 0.27): // 1.7316 <= x < 2
			
 
				+				y = 2 - x
			
 
				+				i = 0
			
 
				+			case x >= (Y_MIN - 0.27): // 1.2316 <= x < 1.7316
			
 
				+				y = x - Tc
			
 
				+				i = 1
			
 
				+			case: // 0.9 < x < 1.2316
			
 
				+				y = x - 1
			
 
				+				i = 2
			
 
				+			}
			
 
				+		}
			
 
				+		switch i {
			
 
				+		case 0:
			
 
				+			z := y * y
			
 
				+			p1 := lgamA[0] + z*(lgamA[2]+z*(lgamA[4]+z*(lgamA[6]+z*(lgamA[8]+z*lgamA[10]))))
			
 
				+			p2 := z * (lgamA[1] + z*(+lgamA[3]+z*(lgamA[5]+z*(lgamA[7]+z*(lgamA[9]+z*lgamA[11])))))
			
 
				+			p := y*p1 + p2
			
 
				+			lgamma += (p - 0.5*y)
			
 
				+		case 1:
			
 
				+			z := y * y
			
 
				+			w := z * y
			
 
				+			p1 := lgamT[0] + w*(lgamT[3]+w*(lgamT[6]+w*(lgamT[9]+w*lgamT[12]))) // parallel comp
			
 
				+			p2 := lgamT[1] + w*(lgamT[4]+w*(lgamT[7]+w*(lgamT[10]+w*lgamT[13])))
			
 
				+			p3 := lgamT[2] + w*(lgamT[5]+w*(lgamT[8]+w*(lgamT[11]+w*lgamT[14])))
			
 
				+			p := z*p1 - (Tt - w*(p2+y*p3))
			
 
				+			lgamma += (Tf + p)
			
 
				+		case 2:
			
 
				+			p1 := y * (lgamU[0] + y*(lgamU[1]+y*(lgamU[2]+y*(lgamU[3]+y*(lgamU[4]+y*lgamU[5])))))
			
 
				+			p2 := 1 + y*(lgamV[1]+y*(lgamV[2]+y*(lgamV[3]+y*(lgamV[4]+y*lgamV[5]))))
			
 
				+			lgamma += (-0.5*y + p1/p2)
			
 
				+		}
			
 
				+	case x < 8: // 2 <= x < 8
			
 
				+		i := int(x)
			
 
				+		y := x - f64(i)
			
 
				+		p := y * (lgamS[0] + y*(lgamS[1]+y*(lgamS[2]+y*(lgamS[3]+y*(lgamS[4]+y*(lgamS[5]+y*lgamS[6]))))))
			
 
				+		q := 1 + y*(lgamR[1]+y*(lgamR[2]+y*(lgamR[3]+y*(lgamR[4]+y*(lgamR[5]+y*lgamR[6])))))
			
 
				+		lgamma = 0.5*y + p/q
			
 
				+		z := 1.0 // lgamma(1+s) = ln(s) + lgamma(s)
			
 
				+		switch i {
			
 
				+		case 7:
			
 
				+			z *= (y + 6)
			
 
				+			fallthrough
			
 
				+		case 6:
			
 
				+			z *= (y + 5)
			
 
				+			fallthrough
			
 
				+		case 5:
			
 
				+			z *= (y + 4)
			
 
				+			fallthrough
			
 
				+		case 4:
			
 
				+			z *= (y + 3)
			
 
				+			fallthrough
			
 
				+		case 3:
			
 
				+			z *= (y + 2)
			
 
				+			lgamma += ln(z)
			
 
				+		}
			
 
				+	case x < TWO_58: // 8 <= x < 2**58
			
 
				+		t := ln(x)
			
 
				+		z := 1 / x
			
 
				+		y := z * z
			
 
				+		w := lgamW[0] + z*(lgamW[1]+y*(lgamW[2]+y*(lgamW[3]+y*(lgamW[4]+y*(lgamW[5]+y*lgamW[6])))))
			
 
				+		lgamma = (x-0.5)*(t-1) + w
			
 
				+	case: // 2**58 <= x <= Inf
			
 
				+		lgamma = x * (ln(x) - 1)
			
 
				+	}
			
 
				+	if neg {
			
 
				+		lgamma = nadj - lgamma
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+
			
 
				+lgamma_f16   :: proc "contextless" (x: f16)   -> (lgamma: f16, sign: int)   { r, s := lgamma_f64(f64(x)); return f16(r), s }
			
 
				+lgamma_f32   :: proc "contextless" (x: f32)   -> (lgamma: f32, sign: int)   { r, s := lgamma_f64(f64(x)); return f32(r), s }
			
 
				+lgamma_f16le :: proc "contextless" (x: f16le) -> (lgamma: f16le, sign: int) { r, s := lgamma_f64(f64(x)); return f16le(r), s }
			
 
				+lgamma_f16be :: proc "contextless" (x: f16be) -> (lgamma: f16be, sign: int) { r, s := lgamma_f64(f64(x)); return f16be(r), s }
			
 
				+lgamma_f32le :: proc "contextless" (x: f32le) -> (lgamma: f32le, sign: int) { r, s := lgamma_f64(f64(x)); return f32le(r), s }
			
 
				+lgamma_f32be :: proc "contextless" (x: f32be) -> (lgamma: f32be, sign: int) { r, s := lgamma_f64(f64(x)); return f32be(r), s }
			
 
				+lgamma_f64le :: proc "contextless" (x: f64le) -> (lgamma: f64le, sign: int) { r, s := lgamma_f64(f64(x)); return f64le(r), s }
			
 
				+lgamma_f64be :: proc "contextless" (x: f64be) -> (lgamma: f64be, sign: int) { r, s := lgamma_f64(f64(x)); return f64be(r), s }
			
 
				+
			
 
				+lgamma :: proc{
			
 
				+	lgamma_f16, lgamma_f16le, lgamma_f16be,
			
 
				+	lgamma_f32, lgamma_f32le, lgamma_f32be,
			
 
				+	lgamma_f64, lgamma_f64le, lgamma_f64be,
			
 
				+}
			
--- a/core/math/math_log1p.odin
+++ b/core/math/math_log1p.odin
@@ -0,0 +1,198 @@
 
				+package math
			
 
				+
			
 
				+// The original C code, the long comment, and the constants
			
 
				+// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
			
 
				+// and came with this notice. The go code is a simplified
			
 
				+// version of the original C.
			
 
				+//
			
 
				+// ====================================================
			
 
				+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
			
 
				+//
			
 
				+// Developed at SunPro, a Sun Microsystems, Inc. business.
			
 
				+// Permission to use, copy, modify, and distribute this
			
 
				+// software is freely granted, provided that this notice
			
 
				+// is preserved.
			
 
				+// ====================================================
			
 
				+//
			
 
				+//
			
 
				+// double log1p(double x)
			
 
				+//
			
 
				+// Method :
			
 
				+//   1. Argument Reduction: find k and f such that
			
 
				+//                      1+x = 2**k * (1+f),
			
 
				+//         where  sqrt(2)/2 < 1+f < sqrt(2) .
			
 
				+//
			
 
				+//      Note. If k=0, then f=x is exact. However, if k!=0, then f
			
 
				+//      may not be representable exactly. In that case, a correction
			
 
				+//      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
			
 
				+//      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
			
 
				+//      and add back the correction term c/u.
			
 
				+//      (Note: when x > 2**53, one can simply return log(x))
			
 
				+//
			
 
				+//   2. Approximation of log1p(f).
			
 
				+//      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
			
 
				+//               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
			
 
				+//               = 2s + s*R
			
 
				+//      We use a special Reme algorithm on [0,0.1716] to generate
			
 
				+//      a polynomial of degree 14 to approximate R The maximum error
			
 
				+//      of this polynomial approximation is bounded by 2**-58.45. In
			
 
				+//      other words,
			
 
				+//                      2      4      6      8      10      12      14
			
 
				+//          R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
			
 
				+//      (the values of Lp1 to Lp7 are listed in the program)
			
 
				+//      and
			
 
				+//          |      2          14          |     -58.45
			
 
				+//          | Lp1*s +...+Lp7*s    -  R(z) | <= 2
			
 
				+//          |                             |
			
 
				+//      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
			
 
				+//      In order to guarantee error in log below 1ulp, we compute log
			
 
				+//      by
			
 
				+//              log1p(f) = f - (hfsq - s*(hfsq+R)).
			
 
				+//
			
 
				+//   3. Finally, log1p(x) = k*ln2 + log1p(f).
			
 
				+//                        = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
			
 
				+//      Here ln2 is split into two floating point number:
			
 
				+//                   ln2_hi + ln2_lo,
			
 
				+//      where n*ln2_hi is always exact for |n| < 2000.
			
 
				+//
			
 
				+// Special cases:
			
 
				+//      log1p(x) is NaN with signal if x < -1 (including -INF) ;
			
 
				+//      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
			
 
				+//      log1p(NaN) is that NaN with no signal.
			
 
				+//
			
 
				+// Accuracy:
			
 
				+//      according to an error analysis, the error is always less than
			
 
				+//      1 ulp (unit in the last place).
			
 
				+//
			
 
				+// Constants:
			
 
				+// The hexadecimal values are the intended ones for the following
			
 
				+// constants. The decimal values may be used, provided that the
			
 
				+// compiler will convert from decimal to binary accurately enough
			
 
				+// to produce the hexadecimal values shown.
			
 
				+//
			
 
				+// Note: Assuming log() return accurate answer, the following
			
 
				+//       algorithm can be used to compute log1p(x) to within a few ULP:
			
 
				+//
			
 
				+//              u = 1+x;
			
 
				+//              if(u==1.0) return x ; else
			
 
				+//                         return log(u)*(x/(u-1.0));
			
 
				+//
			
 
				+//       See HP-15C Advanced Functions Handbook, p.193.
			
 
				+
			
 
				+log1p :: proc {
			
 
				+	log1p_f16,
			
 
				+	log1p_f32,
			
 
				+	log1p_f64,
			
 
				+	log1p_f16le,
			
 
				+	log1p_f16be,
			
 
				+	log1p_f32le,
			
 
				+	log1p_f32be,
			
 
				+	log1p_f64le,
			
 
				+	log1p_f64be,
			
 
				+}
			
 
				+log1p_f16   :: proc "contextless" (x: f16)   -> f16   { return f16(log1p_f64(f64(x))) }
			
 
				+log1p_f32   :: proc "contextless" (x: f32)   -> f32   { return f32(log1p_f64(f64(x))) }
			
 
				+log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) }
			
 
				+log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) }
			
 
				+log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) }
			
 
				+log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) }
			
 
				+log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) }
			
 
				+log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) }
			
 
				+
			
 
				+log1p_f64 :: proc "contextless" (x: f64) -> f64 {
			
 
				+	SQRT2_M1      :: 0h3fda827999fcef34 // sqrt(2)-1 
			
 
				+	SQRT2_HALF_M1 :: 0hbfd2bec333018866 // sqrt(2)/2-1
			
 
				+	SMALL         :: 0h3e20000000000000 // 2**-29
			
 
				+	TINY          :: 0h3c90000000000000 // 2**-54
			
 
				+	TWO53         :: 0h4340000000000000 // 2**53
			
 
				+	LN2HI         :: 0h3fe62e42fee00000
			
 
				+	LN2LO         :: 0h3dea39ef35793c76
			
 
				+	LP1           :: 0h3FE5555555555593
			
 
				+	LP2           :: 0h3FD999999997FA04
			
 
				+	LP3           :: 0h3FD2492494229359
			
 
				+	LP4           :: 0h3FCC71C51D8E78AF
			
 
				+	LP5           :: 0h3FC7466496CB03DE
			
 
				+	LP6           :: 0h3FC39A09D078C69F
			
 
				+	LP7           :: 0h3FC2F112DF3E5244
			
 
				+	
			
 
				+	switch {
			
 
				+	case x < -1 || is_nan(x):
			
 
				+		return nan_f64()
			
 
				+	case x == -1:
			
 
				+		return inf_f64(-1)
			
 
				+	case is_inf(x, 1):
			
 
				+		return inf_f64(+1)
			
 
				+	}
			
 
				+	absx := abs(x)
			
 
				+	
			
 
				+	f: f64
			
 
				+	iu: u64
			
 
				+	k := 1
			
 
				+	if absx < SQRT2_M1 { //  |x| < sqrt(2)-1
			
 
				+		if absx < SMALL { // |x| < 2**-29
			
 
				+			if absx < TINY { // |x| < 2**-54
			
 
				+				return x
			
 
				+			}
			
 
				+			return x - x*x*0.5
			
 
				+		}
			
 
				+		if x > SQRT2_HALF_M1 { // sqrt(2)/2-1 < x
			
 
				+			// (sqrt(2)/2-1) < x < (sqrt(2)-1)
			
 
				+			k = 0
			
 
				+			f = x
			
 
				+			iu = 1
			
 
				+		}
			
 
				+	}
			
 
				+	c: f64
			
 
				+	if k != 0 {
			
 
				+		u: f64
			
 
				+		if absx < TWO53 { // 1<<53
			
 
				+			u = 1.0 + x
			
 
				+			iu = transmute(u64)u
			
 
				+			k = int((iu >> 52) - 1023)
			
 
				+			// correction term
			
 
				+			if k > 0 {
			
 
				+				c = 1.0 - (u - x)
			
 
				+			} else {
			
 
				+				c = x - (u - 1.0)
			
 
				+			}
			
 
				+			c /= u
			
 
				+		} else {
			
 
				+			u = x
			
 
				+			iu = transmute(u64)u
			
 
				+			k = int((iu >> 52) - 1023)
			
 
				+			c = 0
			
 
				+		}
			
 
				+		iu &= 0x000fffffffffffff
			
 
				+		if iu < 0x0006a09e667f3bcd { // mantissa of sqrt(2)
			
 
				+			u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u
			
 
				+		} else {
			
 
				+			k += 1
			
 
				+			u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2
			
 
				+			iu = (0x0010000000000000 - iu) >> 2
			
 
				+		}
			
 
				+		f = u - 1.0 // sqrt(2)/2 < u < sqrt(2)
			
 
				+	}
			
 
				+	hfsq := 0.5 * f * f
			
 
				+	s, R, z: f64
			
 
				+	if iu == 0 { // |f| < 2**-20
			
 
				+		if f == 0 {
			
 
				+			if k == 0 {
			
 
				+				return 0
			
 
				+			}
			
 
				+			c += f64(k) * LN2LO
			
 
				+			return f64(k)*LN2HI + c
			
 
				+		}
			
 
				+		R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
			
 
				+		if k == 0 {
			
 
				+			return f - R
			
 
				+		}
			
 
				+		return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f)
			
 
				+	}
			
 
				+	s = f / (2.0 + f)
			
 
				+	z = s * s
			
 
				+	R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7))))))
			
 
				+	if k == 0 {
			
 
				+		return f - (hfsq - s*(hfsq+R))
			
 
				+	}
			
 
				+	return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f)
			
 
				+}
			
--- a/core/mem/virtual/virtual_linux.odin
+++ b/core/mem/virtual/virtual_linux.odin
@@ -4,64 +4,56 @@ package mem_virtual
 
				 
			
 
				 import "core:c"
			
 
				 import "core:intrinsics"
			
 
				+import "core:sys/unix"
			
 
				 
			
 
				-when ODIN_ARCH == "amd64" {
			
 
				-	SYS_mmap     :: 9
			
 
				-	SYS_mprotect :: 10
			
 
				-	SYS_munmap   :: 11
			
 
				-	SYS_madvise  :: 28
			
 
				-	
			
 
				-	PROT_NONE  :: 0x0
			
 
				-	PROT_READ  :: 0x1
			
 
				-	PROT_WRITE :: 0x2
			
 
				-	PROT_EXEC  :: 0x4
			
 
				-	PROT_GROWSDOWN :: 0x01000000
			
 
				-	PROT_GROWSUP :: 0x02000000
			
 
				+PROT_NONE  :: 0x0
			
 
				+PROT_READ  :: 0x1
			
 
				+PROT_WRITE :: 0x2
			
 
				+PROT_EXEC  :: 0x4
			
 
				+PROT_GROWSDOWN :: 0x01000000
			
 
				+PROT_GROWSUP :: 0x02000000
			
 
				 
			
 
				-	MAP_FIXED     :: 0x1
			
 
				-	MAP_PRIVATE   :: 0x2
			
 
				-	MAP_SHARED    :: 0x4
			
 
				-	MAP_ANONYMOUS :: 0x20
			
 
				-	
			
 
				-	MADV_NORMAL      :: 0
			
 
				-	MADV_RANDOM      :: 1
			
 
				-	MADV_SEQUENTIAL  :: 2
			
 
				-	MADV_WILLNEED    :: 3
			
 
				-	MADV_DONTNEED    :: 4
			
 
				-	MADV_FREE        :: 8
			
 
				-	MADV_REMOVE      :: 9
			
 
				-	MADV_DONTFORK    :: 10
			
 
				-	MADV_DOFORK      :: 11
			
 
				-	MADV_MERGEABLE   :: 12
			
 
				-	MADV_UNMERGEABLE :: 13
			
 
				-	MADV_HUGEPAGE    :: 14
			
 
				-	MADV_NOHUGEPAGE  :: 15
			
 
				-	MADV_DONTDUMP    :: 16
			
 
				-	MADV_DODUMP      :: 17
			
 
				-	MADV_WIPEONFORK  :: 18
			
 
				-	MADV_KEEPONFORK  :: 19
			
 
				-	MADV_HWPOISON    :: 100
			
 
				-} else {
			
 
				-	#panic("Unsupported architecture")
			
 
				-}
			
 
				+MAP_FIXED     :: 0x1
			
 
				+MAP_PRIVATE   :: 0x2
			
 
				+MAP_SHARED    :: 0x4
			
 
				+MAP_ANONYMOUS :: 0x20
			
 
				+
			
 
				+MADV_NORMAL      :: 0
			
 
				+MADV_RANDOM      :: 1
			
 
				+MADV_SEQUENTIAL  :: 2
			
 
				+MADV_WILLNEED    :: 3
			
 
				+MADV_DONTNEED    :: 4
			
 
				+MADV_FREE        :: 8
			
 
				+MADV_REMOVE      :: 9
			
 
				+MADV_DONTFORK    :: 10
			
 
				+MADV_DOFORK      :: 11
			
 
				+MADV_MERGEABLE   :: 12
			
 
				+MADV_UNMERGEABLE :: 13
			
 
				+MADV_HUGEPAGE    :: 14
			
 
				+MADV_NOHUGEPAGE  :: 15
			
 
				+MADV_DONTDUMP    :: 16
			
 
				+MADV_DODUMP      :: 17
			
 
				+MADV_WIPEONFORK  :: 18
			
 
				+MADV_KEEPONFORK  :: 19
			
 
				+MADV_HWPOISON    :: 100
			
 
				 
			
 
				 mmap :: proc "contextless" (addr: rawptr, length: uint, prot: c.int, flags: c.int, fd: c.int, offset: uintptr) -> rawptr {
			
 
				-	res := intrinsics.syscall(SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
			
 
				+	res := intrinsics.syscall(unix.SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
			
 
				 	return rawptr(res)
			
 
				 }
			
 
				 
			
 
				 munmap :: proc "contextless" (addr: rawptr, length: uint) -> c.int {
			
 
				-	res := intrinsics.syscall(SYS_munmap, uintptr(addr), uintptr(length))
			
 
				+	res := intrinsics.syscall(unix.SYS_munmap, uintptr(addr), uintptr(length))
			
 
				 	return c.int(res)
			
 
				 }
			
 
				 
			
 
				 mprotect :: proc "contextless" (addr: rawptr, length: uint, prot: c.int) -> c.int {
			
 
				-	res := intrinsics.syscall(SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
			
 
				+	res := intrinsics.syscall(unix.SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
			
 
				 	return c.int(res)
			
 
				 }
			
 
				 
			
 
				 madvise :: proc "contextless" (addr: rawptr, length: uint, advice: c.int) -> c.int {
			
 
				-	res := intrinsics.syscall(SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
			
 
				+	res := intrinsics.syscall(unix.SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
			
 
				 	return c.int(res)
			
 
				 }
			
 
				 
			
--- a/core/odin/doc-format/doc_format.odin
+++ b/core/odin/doc-format/doc_format.odin
@@ -11,7 +11,7 @@ String :: distinct Array(byte)
 
				 
			
 
				 Version_Type_Major :: 0
			
 
				 Version_Type_Minor :: 2
			
 
				-Version_Type_Patch :: 0
			
 
				+Version_Type_Patch :: 1
			
 
				 
			
 
				 Version_Type :: struct {
			
 
				 	major, minor, patch: u8,
			
--- a/core/os/os_linux.odin
+++ b/core/os/os_linux.odin
@@ -8,6 +8,7 @@ import "core:strings"
 
				 import "core:c"
			
 
				 import "core:strconv"
			
 
				 import "core:intrinsics"
			
 
				+import "core:sys/unix"
			
 
				 
			
 
				 Handle    :: distinct i32
			
 
				 File_Time :: distinct u64
			
@@ -265,8 +266,6 @@ X_OK :: 1 // Test for execute permission
 
				 W_OK :: 2 // Test for write permission
			
 
				 R_OK :: 4 // Test for read permission
			
 
				 
			
 
				-SYS_GETTID :: 186
			
 
				-
			
 
				 foreign libc {
			
 
				 	@(link_name="__errno_location") __errno_location    :: proc() -> ^int ---
			
 
				 
			
@@ -594,7 +593,7 @@ exit :: proc "contextless" (code: int) -> ! {
 
				 }
			
 
				 
			
 
				 current_thread_id :: proc "contextless" () -> int {
			
 
				-	return cast(int)intrinsics.syscall(SYS_GETTID)
			
 
				+	return unix.sys_gettid()
			
 
				 }
			
 
				 
			
 
				 dlopen :: proc(filename: string, flags: int) -> rawptr {
			
--- a/core/path/path.odin
+++ b/core/path/path.odin
@@ -150,7 +150,7 @@ join :: proc(elems: ..string, allocator := context.allocator) -> string {
 
				 	context.allocator = allocator

			
 
				 	for elem, i in elems {

			
 
				 		if elem != "" {

			
 
				-			s := strings.join(elems[i:], "/")

			
 
				+			s := strings.join(elems[i:], "/", context.temp_allocator)

			
 
				 			return clean(s)

			
 
				 		}

			
 
				 	}

			
--- a/core/runtime/procs.odin
+++ b/core/runtime/procs.odin
@@ -42,7 +42,6 @@ when ODIN_NO_CRT && ODIN_OS == "windows" {
 
				 	memmove :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
			
 
				 		if dst != src {
			
 
				 			d, s := ([^]byte)(dst), ([^]byte)(src)
			
 
				-			d_end, s_end := d[len:], s[len:]
			
 
				 			for i := len-1; i >= 0; i -= 1 {
			
 
				 				d[i] = s[i]
			
 
				 			}
			
@@ -54,7 +53,6 @@ when ODIN_NO_CRT && ODIN_OS == "windows" {
 
				 	memcpy :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
			
 
				 		if dst != src {
			
 
				 			d, s := ([^]byte)(dst), ([^]byte)(src)
			
 
				-			d_end, s_end := d[len:], s[len:]
			
 
				 			for i := len-1; i >= 0; i -= 1 {
			
 
				 				d[i] = s[i]
			
 
				 			}
			
--- a/core/slice/slice.odin
+++ b/core/slice/slice.odin
@@ -20,7 +20,7 @@ swap :: proc(array: $T/[]$E, a, b: int) {
 
				 }
			
 
				 
			
 
				 swap_between :: proc(a, b: $T/[]$E) {
			
 
				-	n := min(len(a), len(b))
			
 
				+	n := builtin.min(len(a), len(b))
			
 
				 	if n >= 0 {
			
 
				 		ptr_swap_overlapping(&a[0], &b[0], size_of(E)*n)
			
 
				 	}	
			
--- a/core/sync/sync2/futex_linux.odin
+++ b/core/sync/sync2/futex_linux.odin
@@ -5,6 +5,7 @@ package sync2
 
				 import "core:c"
			
 
				 import "core:time"
			
 
				 import "core:intrinsics"
			
 
				+import "core:sys/unix"
			
 
				 
			
 
				 FUTEX_WAIT :: 0
			
 
				 FUTEX_WAKE :: 1
			
@@ -34,7 +35,7 @@ get_errno :: proc(r: int) -> int {
 
				 }
			
 
				 
			
 
				 internal_futex :: proc(f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> int {
			
 
				-	code := int(intrinsics.syscall(202, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
			
 
				+	code := int(intrinsics.syscall(unix.SYS_futex, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
			
 
				 	return get_errno(code)
			
 
				 }
			
 
				 
			
--- a/core/sync/sync2/primitives_linux.odin
+++ b/core/sync/sync2/primitives_linux.odin
@@ -2,9 +2,8 @@
 
				 //+private
			
 
				 package sync2
			
 
				 
			
 
				-import "core:intrinsics"
			
 
				+import "core:sys/unix"
			
 
				 
			
 
				 _current_thread_id :: proc "contextless" () -> int {
			
 
				-	SYS_GETTID :: 186
			
 
				-	return int(intrinsics.syscall(SYS_GETTID))
			
 
				+	return unix.sys_gettid()
			
 
				 }
			
--- a/core/sync/sync_linux.odin
+++ b/core/sync/sync_linux.odin
@@ -1,11 +1,9 @@
 
				 package sync
			
 
				 
			
 
				 import "core:sys/unix"
			
 
				-import "core:intrinsics"
			
 
				 
			
 
				 current_thread_id :: proc "contextless" () -> int {
			
 
				-	SYS_GETTID :: 186
			
 
				-	return int(intrinsics.syscall(SYS_GETTID))
			
 
				+	return unix.sys_gettid()
			
 
				 }
			
 
				 
			
 
				 
			
--- a/core/sys/unix/syscalls_linux.odin
+++ b/core/sys/unix/syscalls_linux.odin
@@ -0,0 +1,60 @@
 
				+package unix
			
 
				+
			
 
				+import "core:intrinsics"
			
 
				+
			
 
				+// Linux has inconsistent system call numbering across architectures,
			
 
				+// for largely historical reasons.  This attempts to provide a unified
			
 
				+// Odin-side interface for system calls that are required for the core
			
 
				+// library to work.
			
 
				+
			
 
				+// For authorative system call numbers, the following files in the kernel
			
 
				+// source can be used:
			
 
				+//
			
 
				+//  amd64: arch/x86/entry/syscalls/syscall_64.tbl
			
 
				+//  arm64: include/uapi/asm-generic/unistd.h
			
 
				+//  386: arch/x86/entry/syscalls/sycall_32.tbl
			
 
				+//  arm: arch/arm/tools/syscall.tbl
			
 
				+
			
 
				+when ODIN_ARCH == "amd64" {
			
 
				+	SYS_mmap : uintptr : 9
			
 
				+	SYS_mprotect : uintptr : 10
			
 
				+	SYS_munmap : uintptr : 11
			
 
				+	SYS_madvise : uintptr : 28
			
 
				+	SYS_futex : uintptr : 202
			
 
				+	SYS_gettid : uintptr : 186
			
 
				+	SYS_getrandom : uintptr : 318
			
 
				+} else when ODIN_ARCH == "arm64" {
			
 
				+	SYS_mmap : uintptr : 222
			
 
				+	SYS_mprotect : uintptr : 226
			
 
				+	SYS_munmap : uintptr : 215
			
 
				+	SYS_madvise : uintptr : 233
			
 
				+	SYS_futex : uintptr : 98
			
 
				+	SYS_gettid : uintptr : 178
			
 
				+	SYS_getrandom : uintptr : 278
			
 
				+} else when ODIN_ARCH == "386" {
			
 
				+	SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
			
 
				+	SYS_mprotect : uintptr : 125
			
 
				+	SYS_munmap : uintptr : 91
			
 
				+	SYS_madvise : uintptr : 219
			
 
				+	SYS_futex : uintptr : 240
			
 
				+	SYS_gettid : uintptr : 224
			
 
				+	SYS_getrandom : uintptr : 355
			
 
				+} else when ODIN_ARCH == "arm" {
			
 
				+	SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
			
 
				+	SYS_mprotect : uintptr : 125
			
 
				+	SYS_munmap: uintptr : 91
			
 
				+	SYS_madvise: uintptr : 220
			
 
				+	SYS_futex : uintptr : 240
			
 
				+	SYS_gettid : uintptr: 224
			
 
				+	SYS_getrandom : uintptr : 384
			
 
				+} else {
			
 
				+	#panic("Unsupported architecture")
			
 
				+}
			
 
				+
			
 
				+sys_gettid :: proc "contextless" () -> int {
			
 
				+	return cast(int)intrinsics.syscall(SYS_gettid)
			
 
				+}
			
 
				+
			
 
				+sys_getrandom :: proc "contextless" (buf: ^byte, buflen: int, flags: uint) -> int {
			
 
				+	return cast(int)intrinsics.syscall(SYS_getrandom, buf, cast(uintptr)(buflen), cast(uintptr)(flags))
			
 
				+}
			
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2453,6 +2453,9 @@ bool check_is_castable_to(CheckerContext *c, Operand *operand, Type *y) {
 
				 		return true;
			
 
				 	}
			
 
				 
			
 
				+	if (is_type_float(src) && is_type_complex(dst)) {
			
 
				+		return true;
			
 
				+	}
			
 
				 	if (is_type_float(src) && is_type_quaternion(dst)) {
			
 
				 		return true;
			
 
				 	}
			
--- a/src/checker.cpp
+++ b/src/checker.cpp
@@ -2603,11 +2603,18 @@ ExactValue check_decl_attribute_value(CheckerContext *c, Ast *value) {
 
				 	return ev;
			
 
				 }
			
 
				 
			
 
				+#define ATTRIBUTE_USER_TAG_NAME "tag"
			
 
				+
			
 
				 
			
 
				 DECL_ATTRIBUTE_PROC(foreign_block_decl_attribute) {
			
 
				 	ExactValue ev = check_decl_attribute_value(c, value);
			
 
				 
			
 
				-	if (name == "default_calling_convention") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "default_calling_convention") {
			
 
				 		if (ev.kind == ExactValue_String) {
			
 
				 			auto cc = string_to_calling_convention(ev.value_string);
			
 
				 			if (cc == ProcCC_Invalid) {
			
@@ -2655,7 +2662,13 @@ DECL_ATTRIBUTE_PROC(foreign_block_decl_attribute) {
 
				 }
			
 
				 
			
 
				 DECL_ATTRIBUTE_PROC(proc_decl_attribute) {
			
 
				-	if (name == "test") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		ExactValue ev = check_decl_attribute_value(c, value);
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "test") {
			
 
				 		if (value != nullptr) {
			
 
				 			error(value, "'%.*s' expects no parameter, or a string literal containing \"file\" or \"package\"", LIT(name));
			
 
				 		}
			
@@ -2896,7 +2909,12 @@ DECL_ATTRIBUTE_PROC(proc_decl_attribute) {
 
				 DECL_ATTRIBUTE_PROC(var_decl_attribute) {
			
 
				 	ExactValue ev = check_decl_attribute_value(c, value);
			
 
				 
			
 
				-	if (name == "static") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "static") {
			
 
				 		if (value != nullptr) {
			
 
				 			error(elem, "'static' does not have any parameters");
			
 
				 		}
			
@@ -3011,7 +3029,13 @@ DECL_ATTRIBUTE_PROC(var_decl_attribute) {
 
				 }
			
 
				 
			
 
				 DECL_ATTRIBUTE_PROC(const_decl_attribute) {
			
 
				-	if (name == "private") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		ExactValue ev = check_decl_attribute_value(c, value);
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "private") {
			
 
				 		// NOTE(bill): Handled elsewhere `check_collect_value_decl`
			
 
				 		return true;
			
 
				 	}
			
@@ -3019,7 +3043,13 @@ DECL_ATTRIBUTE_PROC(const_decl_attribute) {
 
				 }
			
 
				 
			
 
				 DECL_ATTRIBUTE_PROC(type_decl_attribute) {
			
 
				-	if (name == "private") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		ExactValue ev = check_decl_attribute_value(c, value);
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "private") {
			
 
				 		// NOTE(bill): Handled elsewhere `check_collect_value_decl`
			
 
				 		return true;
			
 
				 	}
			
@@ -4020,7 +4050,13 @@ void check_add_import_decl(CheckerContext *ctx, Ast *decl) {
 
				 }
			
 
				 
			
 
				 DECL_ATTRIBUTE_PROC(foreign_import_decl_attribute) {
			
 
				-	if (name == "force" || name == "require") {
			
 
				+	if (name == ATTRIBUTE_USER_TAG_NAME) {
			
 
				+		ExactValue ev = check_decl_attribute_value(c, value);
			
 
				+		if (ev.kind != ExactValue_String) {
			
 
				+			error(elem, "Expected a string value for '%.*s'", LIT(name));
			
 
				+		}
			
 
				+		return true;
			
 
				+	} else if (name == "force" || name == "require") {
			
 
				 		if (value != nullptr) {
			
 
				 			error(elem, "Expected no parameter for '%.*s'", LIT(name));
			
 
				 		} else if (name == "force") {
			
--- a/src/docs_format.cpp
+++ b/src/docs_format.cpp
@@ -15,7 +15,7 @@ struct OdinDocVersionType {
 
				 
			
 
				 #define OdinDocVersionType_Major 0
			
 
				 #define OdinDocVersionType_Minor 2
			
 
				-#define OdinDocVersionType_Patch 0
			
 
				+#define OdinDocVersionType_Patch 1
			
 
				 
			
 
				 struct OdinDocHeaderBase {
			
 
				 	u8                 magic[8];
			
@@ -175,7 +175,8 @@ enum OdinDocEntityFlag : u64 {
 
				 
			
 
				 struct OdinDocEntity {
			
 
				 	OdinDocEntityKind  kind;
			
 
				-	u32                flags;
			
 
				+	u32                reserved;
			
 
				+	u64                flags;
			
 
				 	OdinDocPosition    pos;
			
 
				 	OdinDocString      name;
			
 
				 	OdinDocTypeIndex   type;
			
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -1369,7 +1369,7 @@ lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) {
 
				 			Type *rt = base_type(right.type);
			
 
				 			if (is_type_pointer(rt)) {
			
 
				 				right = lb_emit_load(p, right);
			
 
				-				rt = type_deref(rt);
			
 
				+				rt = base_type(type_deref(rt));
			
 
				 			}
			
 
				 
			
 
				 			switch (rt->kind) {
			
@@ -1841,7 +1841,6 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
 
				 		return res;
			
 
				 	}
			
 
				 
			
 
				-	#if 1
			
 
				 	if (is_type_union(dst)) {
			
 
				 		for_array(i, dst->Union.variants) {
			
 
				 			Type *vt = dst->Union.variants[i];
			
@@ -1852,7 +1851,6 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	#endif
			
 
				 
			
 
				 	// NOTE(bill): This has to be done before 'Pointer <-> Pointer' as it's
			
 
				 	// subtype polymorphism casting
			
--- a/src/llvm_backend_general.cpp
+++ b/src/llvm_backend_general.cpp
@@ -1142,7 +1142,7 @@ lbValue lb_emit_union_tag_ptr(lbProcedure *p, lbValue u) {
 
				 
			
 
				 	LLVMTypeRef uvt = LLVMGetElementType(LLVMTypeOf(u.value));
			
 
				 	unsigned element_count = LLVMCountStructElementTypes(uvt);
			
 
				-	GB_ASSERT_MSG(element_count == 2, "element_count=%u (%s) != (%s)", element_count, type_to_string(ut), LLVMPrintTypeToString(uvt));
			
 
				+	GB_ASSERT_MSG(element_count >= 2, "element_count=%u (%s) != (%s)", element_count, type_to_string(ut), LLVMPrintTypeToString(uvt));
			
 
				 
			
 
				 	lbValue tag_ptr = {};
			
 
				 	tag_ptr.value = LLVMBuildStructGEP(p->builder, u.value, 1, "");
			
@@ -1795,7 +1795,7 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) {
 
				 
			
 
				 			unsigned block_size = cast(unsigned)type->Union.variant_block_size;
			
 
				 
			
 
				-			auto fields = array_make<LLVMTypeRef>(temporary_allocator(), 0, 2);
			
 
				+			auto fields = array_make<LLVMTypeRef>(temporary_allocator(), 0, 3);
			
 
				 			if (is_type_union_maybe_pointer(type)) {
			
 
				 				LLVMTypeRef variant = lb_type(m, type->Union.variants[0]);
			
 
				 				array_add(&fields, variant);
			
@@ -1804,7 +1804,12 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) {
 
				 				LLVMTypeRef tag_type   = lb_type(m, union_tag_type(type));
			
 
				 				array_add(&fields, block_type);
			
 
				 				array_add(&fields, tag_type);
			
 
				-				
			
 
				+				i64 used_size = lb_sizeof(block_type) + lb_sizeof(tag_type);
			
 
				+				i64 padding = size - used_size;
			
 
				+				if (padding > 0) {
			
 
				+					LLVMTypeRef padding_type = lb_type_padding_filler(m, padding, align);
			
 
				+					array_add(&fields, padding_type);
			
 
				+				}
			
 
				 			}
			
 
				 			
			
 
				 			return LLVMStructTypeInContext(ctx, fields.data, cast(unsigned)fields.count, false);
			
--- a/src/llvm_backend_stmt.cpp
+++ b/src/llvm_backend_stmt.cpp
@@ -1485,7 +1485,14 @@ void lb_build_return_stmt_internal(lbProcedure *p, lbValue const &res) {
 
				 
			
 
				 	if (return_by_pointer) {
			
 
				 		if (res.value != nullptr) {
			
 
				-			LLVMBuildStore(p->builder, res.value, p->return_ptr.addr.value);
			
 
				+			LLVMValueRef res_val = res.value;
			
 
				+			i64 sz = type_size_of(res.type);
			
 
				+			if (LLVMIsALoadInst(res_val) && sz > build_context.word_size) {
			
 
				+				lbValue ptr = lb_address_from_load_or_generate_local(p, res);
			
 
				+				lb_mem_copy_non_overlapping(p, p->return_ptr.addr, ptr, lb_const_int(p->module, t_int, sz));
			
 
				+			} else {
			
 
				+				LLVMBuildStore(p->builder, res_val, p->return_ptr.addr.value);
			
 
				+			}
			
 
				 		} else {
			
 
				 			LLVMBuildStore(p->builder, LLVMConstNull(p->abi_function_type->ret.type), p->return_ptr.addr.value);
			
 
				 		}
			
--- a/src/llvm_backend_utility.cpp
+++ b/src/llvm_backend_utility.cpp
@@ -179,32 +179,24 @@ lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t) {
 
				 	GB_ASSERT_MSG(sz == dz, "Invalid transmute conversion: '%s' to '%s'", type_to_string(src_type), type_to_string(t));
			
 
				 
			
 
				 	// NOTE(bill): Casting between an integer and a pointer cannot be done through a bitcast
			
 
				-	if (is_type_uintptr(src) && is_type_pointer(dst)) {
			
 
				+	if (is_type_uintptr(src) && is_type_internally_pointer_like(dst)) {
			
 
				 		res.value = LLVMBuildIntToPtr(p->builder, value.value, lb_type(m, t), "");
			
 
				 		return res;
			
 
				 	}
			
 
				-	if (is_type_pointer(src) && is_type_uintptr(dst)) {
			
 
				-		res.value = LLVMBuildPtrToInt(p->builder, value.value, lb_type(m, t), "");
			
 
				-		return res;
			
 
				-	}
			
 
				-	if (is_type_uintptr(src) && is_type_proc(dst)) {
			
 
				-		res.value = LLVMBuildIntToPtr(p->builder, value.value, lb_type(m, t), "");
			
 
				-		return res;
			
 
				-	}
			
 
				-	if (is_type_proc(src) && is_type_uintptr(dst)) {
			
 
				+	if (is_type_internally_pointer_like(src) && is_type_uintptr(dst)) {
			
 
				 		res.value = LLVMBuildPtrToInt(p->builder, value.value, lb_type(m, t), "");
			
 
				 		return res;
			
 
				 	}
			
 
				 
			
 
				-	if (is_type_integer(src) && (is_type_pointer(dst) || is_type_cstring(dst))) {
			
 
				+	if (is_type_integer(src) && is_type_internally_pointer_like(dst)) {
			
 
				 		res.value = LLVMBuildIntToPtr(p->builder, value.value, lb_type(m, t), "");
			
 
				 		return res;
			
 
				-	} else if ((is_type_pointer(src) || is_type_cstring(src)) && is_type_integer(dst)) {
			
 
				+	} else if (is_type_internally_pointer_like(src) && is_type_integer(dst)) {
			
 
				 		res.value = LLVMBuildPtrToInt(p->builder, value.value, lb_type(m, t), "");
			
 
				 		return res;
			
 
				 	}
			
 
				 
			
 
				-	if (is_type_pointer(src) && is_type_pointer(dst)) {
			
 
				+	if (is_type_internally_pointer_like(src) && is_type_internally_pointer_like(dst)) {
			
 
				 		res.value = LLVMBuildPointerCast(p->builder, value.value, lb_type(p->module, t), "");
			
 
				 		return res;
			
 
				 	}
			
--- a/src/ptr_map.cpp
+++ b/src/ptr_map.cpp
@@ -28,11 +28,13 @@ struct PtrMap {
 
				 
			
 
				 u32 ptr_map_hash_key(uintptr key) {
			
 
				 #if defined(GB_ARCH_64_BIT)
			
 
				-	u64 x = (u64)key;
			
 
				-	u8 count = (u8)(x >> 59);
			
 
				-	x ^= x >> (5 + count);
			
 
				-	x *= 12605985483714917081ull;
			
 
				-	return (u32)(x ^ (x >> 43));
			
 
				+	key = (~key) + (key << 21);
			
 
				+	key = key ^ (key >> 24);
			
 
				+	key = (key + (key << 3)) + (key << 8);
			
 
				+	key = key ^ (key >> 14);
			
 
				+	key = (key + (key << 2)) + (key << 4);
			
 
				+	key = key ^ (key << 28);
			
 
				+	return cast(u32)key;
			
 
				 #elif defined(GB_ARCH_32_BIT)
			
 
				 	u32 state = ((u32)key) * 747796405u + 2891336453u;
			
 
				 	u32 word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
			
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -697,6 +697,7 @@ Type *   bit_set_to_int(Type *t);
 
				 bool are_types_identical(Type *x, Type *y);
			
 
				 
			
 
				 bool is_type_pointer(Type *t);
			
 
				+bool is_type_proc(Type *t);
			
 
				 bool is_type_slice(Type *t);
			
 
				 bool is_type_integer(Type *t);
			
 
				 bool type_set_offsets(Type *t);
			
@@ -1284,6 +1285,10 @@ bool is_type_multi_pointer(Type *t) {
 
				 	t = base_type(t);
			
 
				 	return t->kind == Type_MultiPointer;
			
 
				 }
			
 
				+bool is_type_internally_pointer_like(Type *t) {
			
 
				+	return is_type_pointer(t) || is_type_multi_pointer(t) || is_type_cstring(t) || is_type_proc(t);
			
 
				+}
			
 
				+
			
 
				 bool is_type_tuple(Type *t) {
			
 
				 	t = base_type(t);
			
 
				 	return t->kind == Type_Tuple;
			
@@ -4019,7 +4024,13 @@ gbString write_type_to_string(gbString str, Type *type) {
 
				 
			
 
				 	case Type_BitSet:
			
 
				 		str = gb_string_appendc(str, "bit_set[");
			
 
				-		str = write_type_to_string(str, type->BitSet.elem);
			
 
				+		if (is_type_enum(type->BitSet.elem)) {
			
 
				+			str = write_type_to_string(str, type->BitSet.elem);
			
 
				+		} else {
			
 
				+			str = gb_string_append_fmt(str, "%lld", type->BitSet.lower);
			
 
				+			str = gb_string_append_fmt(str, "..=");
			
 
				+			str = gb_string_append_fmt(str, "%lld", type->BitSet.upper);
			
 
				+		}
			
 
				 		if (type->BitSet.underlying != nullptr) {
			
 
				 			str = gb_string_appendc(str, "; ");
			
 
				 			str = write_type_to_string(str, type->BitSet.underlying);
			
--- a/tests/core/crypto/test_core_crypto.odin
+++ b/tests/core/crypto/test_core_crypto.odin
@@ -115,6 +115,15 @@ main :: proc() {
 
				     test_haval_224(&t)
			
 
				     test_haval_256(&t)
			
 
				 
			
 
				+    // "modern" crypto tests
			
 
				+    test_chacha20(&t)
			
 
				+    test_poly1305(&t)
			
 
				+    test_chacha20poly1305(&t)
			
 
				+    test_x25519(&t)
			
 
				+    test_rand_bytes(&t)
			
 
				+
			
 
				+    bench_modern(&t)
			
 
				+
			
 
				     fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
			
 
				 }
			
 
				 
			
--- a/tests/core/crypto/test_core_crypto_modern.odin
+++ b/tests/core/crypto/test_core_crypto_modern.odin
@@ -0,0 +1,535 @@
 
				+package test_core_crypto
			
 
				+
			
 
				+import "core:testing"
			
 
				+import "core:fmt"
			
 
				+import "core:mem"
			
 
				+import "core:time"
			
 
				+import "core:crypto"
			
 
				+
			
 
				+import "core:crypto/chacha20"
			
 
				+import "core:crypto/chacha20poly1305"
			
 
				+import "core:crypto/poly1305"
			
 
				+import "core:crypto/x25519"
			
 
				+
			
 
				+_digit_value :: proc(r: rune) -> int {
			
 
				+	ri := int(r)
			
 
				+	v: int = 16
			
 
				+	switch r {
			
 
				+	case '0'..='9': v = ri-'0'
			
 
				+	case 'a'..='z': v = ri-'a'+10
			
 
				+	case 'A'..='Z': v = ri-'A'+10
			
 
				+	}
			
 
				+	return v
			
 
				+}
			
 
				+
			
 
				+_decode_hex32 :: proc(s: string) -> [32]byte{
			
 
				+	b: [32]byte
			
 
				+	for i := 0; i < len(s); i = i + 2 {
			
 
				+		hi := _digit_value(rune(s[i]))
			
 
				+		lo := _digit_value(rune(s[i+1]))
			
 
				+		b[i/2] = byte(hi << 4 | lo)
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+_PLAINTEXT_SUNSCREEN_STR := "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."
			
 
				+
			
 
				+@(test)
			
 
				+test_chacha20 :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing (X)ChaCha20")
			
 
				+
			
 
				+	// Test cases taken from RFC 8439, and draft-irtf-cfrg-xchacha-03
			
 
				+	plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
			
 
				+
			
 
				+	key := [chacha20.KEY_SIZE]byte{
			
 
				+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
			
 
				+		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
			
 
				+		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
			
 
				+		0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
			
 
				+	}
			
 
				+
			
 
				+	nonce := [chacha20.NONCE_SIZE]byte{
			
 
				+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a,
			
 
				+		0x00, 0x00, 0x00, 0x00,
			
 
				+	}
			
 
				+
			
 
				+	ciphertext := [114]byte{
			
 
				+		0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80,
			
 
				+		0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81,
			
 
				+		0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2,
			
 
				+		0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b,
			
 
				+		0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab,
			
 
				+		0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57,
			
 
				+		0x16, 0x39, 0xd6, 0x24, 0xe6, 0x51, 0x52, 0xab,
			
 
				+		0x8f, 0x53, 0x0c, 0x35, 0x9f, 0x08, 0x61, 0xd8,
			
 
				+		0x07, 0xca, 0x0d, 0xbf, 0x50, 0x0d, 0x6a, 0x61,
			
 
				+		0x56, 0xa3, 0x8e, 0x08, 0x8a, 0x22, 0xb6, 0x5e,
			
 
				+		0x52, 0xbc, 0x51, 0x4d, 0x16, 0xcc, 0xf8, 0x06,
			
 
				+		0x81, 0x8c, 0xe9, 0x1a, 0xb7, 0x79, 0x37, 0x36,
			
 
				+		0x5a, 0xf9, 0x0b, 0xbf, 0x74, 0xa3, 0x5b, 0xe6,
			
 
				+		0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
			
 
				+		0x87, 0x4d,
			
 
				+	}
			
 
				+	ciphertext_str := hex_string(ciphertext[:])
			
 
				+
			
 
				+	derived_ciphertext: [114]byte
			
 
				+	ctx: chacha20.Context = ---
			
 
				+	chacha20.init(&ctx, key[:], nonce[:])
			
 
				+	chacha20.seek(&ctx, 1) // The test vectors start the counter at 1.
			
 
				+	chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
			
 
				+
			
 
				+	derived_ciphertext_str := hex_string(derived_ciphertext[:])
			
 
				+	expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", ciphertext_str, derived_ciphertext_str))
			
 
				+
			
 
				+	xkey := [chacha20.KEY_SIZE]byte{
			
 
				+		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
			
 
				+		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
			
 
				+		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
			
 
				+		0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
			
 
				+	}
			
 
				+
			
 
				+	xnonce := [chacha20.XNONCE_SIZE]byte{
			
 
				+		0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
			
 
				+		0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
			
 
				+		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
			
 
				+	}
			
 
				+
			
 
				+	xciphertext := [114]byte{
			
 
				+		0xbd, 0x6d, 0x17, 0x9d, 0x3e, 0x83, 0xd4, 0x3b,
			
 
				+		0x95, 0x76, 0x57, 0x94, 0x93, 0xc0, 0xe9, 0x39,
			
 
				+		0x57, 0x2a, 0x17, 0x00, 0x25, 0x2b, 0xfa, 0xcc,
			
 
				+		0xbe, 0xd2, 0x90, 0x2c, 0x21, 0x39, 0x6c, 0xbb,
			
 
				+		0x73, 0x1c, 0x7f, 0x1b, 0x0b, 0x4a, 0xa6, 0x44,
			
 
				+		0x0b, 0xf3, 0xa8, 0x2f, 0x4e, 0xda, 0x7e, 0x39,
			
 
				+		0xae, 0x64, 0xc6, 0x70, 0x8c, 0x54, 0xc2, 0x16,
			
 
				+		0xcb, 0x96, 0xb7, 0x2e, 0x12, 0x13, 0xb4, 0x52,
			
 
				+		0x2f, 0x8c, 0x9b, 0xa4, 0x0d, 0xb5, 0xd9, 0x45,
			
 
				+		0xb1, 0x1b, 0x69, 0xb9, 0x82, 0xc1, 0xbb, 0x9e,
			
 
				+		0x3f, 0x3f, 0xac, 0x2b, 0xc3, 0x69, 0x48, 0x8f,
			
 
				+		0x76, 0xb2, 0x38, 0x35, 0x65, 0xd3, 0xff, 0xf9,
			
 
				+		0x21, 0xf9, 0x66, 0x4c, 0x97, 0x63, 0x7d, 0xa9,
			
 
				+		0x76, 0x88, 0x12, 0xf6, 0x15, 0xc6, 0x8b, 0x13,
			
 
				+		0xb5, 0x2e,
			
 
				+	}
			
 
				+	xciphertext_str := hex_string(xciphertext[:])
			
 
				+
			
 
				+	chacha20.init(&ctx, xkey[:], xnonce[:])
			
 
				+	chacha20.seek(&ctx, 1)
			
 
				+	chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
			
 
				+
			
 
				+	derived_ciphertext_str = hex_string(derived_ciphertext[:])
			
 
				+	expect(t, derived_ciphertext_str == xciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", xciphertext_str, derived_ciphertext_str))
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+test_poly1305 :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing poly1305")
			
 
				+
			
 
				+	// Test cases taken from poly1305-donna.
			
 
				+	key := [poly1305.KEY_SIZE]byte{
			
 
				+		0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91,
			
 
				+		0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25,
			
 
				+		0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65,
			
 
				+		0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80,
			
 
				+	}
			
 
				+
			
 
				+	msg := [131]byte{
			
 
				+		0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73,
			
 
				+		0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce,
			
 
				+		0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4,
			
 
				+		0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a,
			
 
				+		0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b,
			
 
				+		0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72,
			
 
				+		0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2,
			
 
				+		0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38,
			
 
				+		0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a,
			
 
				+		0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae,
			
 
				+		0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea,
			
 
				+		0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda,
			
 
				+		0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde,
			
 
				+		0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3,
			
 
				+		0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6,
			
 
				+		0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74,
			
 
				+		0xe3,0x55,0xa5,
			
 
				+	}
			
 
				+
			
 
				+	tag := [poly1305.TAG_SIZE]byte{
			
 
				+		0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5,
			
 
				+		0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9,
			
 
				+	}
			
 
				+	tag_str := hex_string(tag[:])
			
 
				+
			
 
				+	// Verify - oneshot + compare
			
 
				+	ok := poly1305.verify(tag[:], msg[:], key[:])
			
 
				+	expect(t, ok, "oneshot verify call failed")
			
 
				+
			
 
				+	// Sum - oneshot
			
 
				+	derived_tag: [poly1305.TAG_SIZE]byte
			
 
				+	poly1305.sum(derived_tag[:], msg[:], key[:])
			
 
				+	derived_tag_str := hex_string(derived_tag[:])
			
 
				+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for sum(msg, key), but got %s instead", tag_str, derived_tag_str))
			
 
				+
			
 
				+	// Incremental
			
 
				+	mem.zero(&derived_tag, size_of(derived_tag))
			
 
				+	ctx: poly1305.Context = ---
			
 
				+	poly1305.init(&ctx, key[:])
			
 
				+	read_lengths := [11]int{32, 64, 16, 8, 4, 2, 1, 1, 1, 1, 1}
			
 
				+	off := 0
			
 
				+	for read_length in read_lengths {
			
 
				+		to_read := msg[off:off+read_length]
			
 
				+		poly1305.update(&ctx, to_read)
			
 
				+		off = off + read_length
			
 
				+	}
			
 
				+	poly1305.final(&ctx, derived_tag[:])
			
 
				+	derived_tag_str = hex_string(derived_tag[:])
			
 
				+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for init/update/final - incremental, but got %s instead", tag_str, derived_tag_str))
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+test_chacha20poly1305 :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing chacha20poly1205")
			
 
				+
			
 
				+	plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
			
 
				+
			
 
				+	aad := [12]byte{
			
 
				+		0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
			
 
				+		0xc4, 0xc5, 0xc6, 0xc7,
			
 
				+	}
			
 
				+
			
 
				+	key := [chacha20poly1305.KEY_SIZE]byte{
			
 
				+		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
			
 
				+		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
			
 
				+		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
			
 
				+		0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
			
 
				+	}
			
 
				+
			
 
				+	nonce := [chacha20poly1305.NONCE_SIZE]byte{
			
 
				+		0x07, 0x00, 0x00, 0x00,
			
 
				+		0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
			
 
				+	}
			
 
				+
			
 
				+	ciphertext := [114]byte{
			
 
				+		0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
			
 
				+		0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
			
 
				+		0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
			
 
				+		0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
			
 
				+		0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
			
 
				+		0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
			
 
				+		0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
			
 
				+		0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
			
 
				+		0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
			
 
				+		0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
			
 
				+		0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
			
 
				+		0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
			
 
				+		0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
			
 
				+		0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
			
 
				+		0x61, 0x16,
			
 
				+	}
			
 
				+	ciphertext_str := hex_string(ciphertext[:])
			
 
				+
			
 
				+	tag := [chacha20poly1305.TAG_SIZE]byte{
			
 
				+		0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a,
			
 
				+		0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91,
			
 
				+	}
			
 
				+	tag_str := hex_string(tag[:])
			
 
				+
			
 
				+	derived_tag: [chacha20poly1305.TAG_SIZE]byte
			
 
				+	derived_ciphertext: [114]byte
			
 
				+
			
 
				+	chacha20poly1305.encrypt(derived_ciphertext[:], derived_tag[:], key[:], nonce[:], aad[:], plaintext)
			
 
				+
			
 
				+	derived_ciphertext_str := hex_string(derived_ciphertext[:])
			
 
				+	expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected ciphertext %s for encrypt(aad, plaintext), but got %s instead", ciphertext_str, derived_ciphertext_str))
			
 
				+
			
 
				+	derived_tag_str := hex_string(derived_tag[:])
			
 
				+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected tag %s for encrypt(aad, plaintext), but got %s instead", tag_str, derived_tag_str))
			
 
				+
			
 
				+	derived_plaintext: [114]byte
			
 
				+	ok := chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
			
 
				+	derived_plaintext_str := string(derived_plaintext[:])
			
 
				+	expect(t, ok, "Expected true for decrypt(tag, aad, ciphertext)")
			
 
				+	expect(t, derived_plaintext_str == _PLAINTEXT_SUNSCREEN_STR, fmt.tprintf("Expected plaintext %s for decrypt(tag, aad, ciphertext), but got %s instead", _PLAINTEXT_SUNSCREEN_STR, derived_plaintext_str))
			
 
				+
			
 
				+	derived_ciphertext[0] ~= 0xa5
			
 
				+	ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], derived_ciphertext[:])
			
 
				+	expect(t, !ok, "Expected false for decrypt(tag, aad, corrupted_ciphertext)")
			
 
				+
			
 
				+	aad[0] ~= 0xa5
			
 
				+	ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
			
 
				+	expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
			
 
				+}
			
 
				+
			
 
				+TestECDH :: struct {
			
 
				+	scalar:  string,
			
 
				+	point:   string,
			
 
				+	product: string,
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+test_x25519 :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing X25519")
			
 
				+
			
 
				+	test_vectors := [?]TestECDH {
			
 
				+		// Test vectors from RFC 7748
			
 
				+		TestECDH{
			
 
				+			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
			
 
				+			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
			
 
				+			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
			
 
				+		},
			
 
				+		TestECDH{
			
 
				+			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
			
 
				+			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
			
 
				+			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
			
 
				+		},
			
 
				+	}
			
 
				+	for v, _ in test_vectors {
			
 
				+		scalar := _decode_hex32(v.scalar)
			
 
				+		point := _decode_hex32(v.point)
			
 
				+
			
 
				+		derived_point: [x25519.POINT_SIZE]byte
			
 
				+		x25519.scalarmult(derived_point[:], scalar[:], point[:])
			
 
				+		derived_point_str := hex_string(derived_point[:])
			
 
				+
			
 
				+		expect(t, derived_point_str == v.product, fmt.tprintf("Expected %s for %s * %s, but got %s instead", v.product, v.scalar, v.point, derived_point_str))
			
 
				+
			
 
				+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
			
 
				+		p1, p2: [x25519.POINT_SIZE]byte
			
 
				+		x25519.scalarmult_basepoint(p1[:], scalar[:])
			
 
				+		x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:])
			
 
				+		p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:])
			
 
				+		expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str))
			
 
				+	}
			
 
				+
			
 
				+    // TODO/tests: Run the wycheproof test vectors, once I figure out
			
 
				+    // how to work with JSON.
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+test_rand_bytes :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing rand_bytes")
			
 
				+
			
 
				+	if ODIN_OS != "linux" {
			
 
				+		log(t, "rand_bytes not supported - skipping")
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	allocator := context.allocator
			
 
				+
			
 
				+	buf := make([]byte, 1 << 25, allocator)
			
 
				+	defer delete(buf)
			
 
				+
			
 
				+	// Testing a CSPRNG for correctness is incredibly involved and
			
 
				+	// beyond the scope of an implementation that offloads
			
 
				+	// responsibility for correctness to the OS.
			
 
				+	//
			
 
				+	// Just attempt to randomize a sufficiently large buffer, where
			
 
				+	// sufficiently large is:
			
 
				+	//  * Larger than the maximum getentropy request size (256 bytes).
			
 
				+	//  * Larger than the maximum getrandom request size (2^25 - 1 bytes).
			
 
				+	//
			
 
				+	// While theoretically non-deterministic, if this fails, chances
			
 
				+	// are the CSPRNG is busted.
			
 
				+	seems_ok := false
			
 
				+	for i := 0; i < 256; i = i + 1 {
			
 
				+		mem.zero_explicit(raw_data(buf), len(buf))
			
 
				+		crypto.rand_bytes(buf)
			
 
				+
			
 
				+		if buf[0] != 0 && buf[len(buf)-1] != 0 {
			
 
				+			seems_ok = true
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	expect(t, seems_ok, "Expected to randomize the head and tail of the buffer within a handful of attempts")
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+bench_modern :: proc(t: ^testing.T) {
			
 
				+	fmt.println("Starting benchmarks:")
			
 
				+
			
 
				+	bench_chacha20(t)
			
 
				+	bench_poly1305(t)
			
 
				+	bench_chacha20poly1305(t)
			
 
				+	bench_x25519(t)
			
 
				+}
			
 
				+
			
 
				+_setup_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
			
 
				+	assert(options != nil)
			
 
				+
			
 
				+	options.input = make([]u8, options.bytes, allocator)
			
 
				+	return nil if len(options.input) == options.bytes else .Allocation_Error
			
 
				+}
			
 
				+
			
 
				+_teardown_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
			
 
				+	assert(options != nil)
			
 
				+
			
 
				+	delete(options.input)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+_benchmark_chacha20 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
			
 
				+	buf := options.input
			
 
				+	key := [chacha20.KEY_SIZE]byte{
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+	}
			
 
				+	nonce := [chacha20.NONCE_SIZE]byte{
			
 
				+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
			
 
				+		0x00, 0x00, 0x00, 0x00,
			
 
				+	}
			
 
				+
			
 
				+	ctx: chacha20.Context = ---
			
 
				+	chacha20.init(&ctx, key[:], nonce[:])
			
 
				+
			
 
				+	for _ in 0..=options.rounds {
			
 
				+		chacha20.xor_bytes(&ctx, buf, buf)
			
 
				+	}
			
 
				+	options.count     = options.rounds
			
 
				+	options.processed = options.rounds * options.bytes
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+_benchmark_poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
			
 
				+	buf := options.input
			
 
				+	key := [poly1305.KEY_SIZE]byte{
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+	}
			
 
				+
			
 
				+	tag: [poly1305.TAG_SIZE]byte = ---
			
 
				+	for _ in 0..=options.rounds {
			
 
				+		poly1305.sum(tag[:], buf, key[:])
			
 
				+	}
			
 
				+	options.count     = options.rounds
			
 
				+	options.processed = options.rounds * options.bytes
			
 
				+	//options.hash      = u128(h)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+_benchmark_chacha20poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
			
 
				+	buf := options.input
			
 
				+	key := [chacha20.KEY_SIZE]byte{
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
			
 
				+	}
			
 
				+	nonce := [chacha20.NONCE_SIZE]byte{
			
 
				+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
			
 
				+		0x00, 0x00, 0x00, 0x00,
			
 
				+	}
			
 
				+
			
 
				+	tag: [chacha20poly1305.TAG_SIZE]byte = ---
			
 
				+
			
 
				+	for _ in 0..=options.rounds {
			
 
				+		chacha20poly1305.encrypt(buf,tag[:], key[:], nonce[:], nil, buf)
			
 
				+	}
			
 
				+	options.count     = options.rounds
			
 
				+	options.processed = options.rounds * options.bytes
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) {
			
 
				+	fmt.printf("\t[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
			
 
				+		name,
			
 
				+		options.rounds,
			
 
				+		options.processed,
			
 
				+		time.duration_nanoseconds(options.duration),
			
 
				+		options.rounds_per_second,
			
 
				+		options.megabytes_per_second,
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+bench_chacha20 :: proc(t: ^testing.T) {
			
 
				+	name    := "ChaCha20 64 bytes"
			
 
				+	options := &time.Benchmark_Options{
			
 
				+		rounds   = 1_000,
			
 
				+		bytes    = 64,
			
 
				+		setup    = _setup_sized_buf,
			
 
				+		bench    = _benchmark_chacha20,
			
 
				+		teardown = _teardown_sized_buf,
			
 
				+	}
			
 
				+
			
 
				+	err  := time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+
			
 
				+	name = "ChaCha20 1024 bytes"
			
 
				+	options.bytes = 1024
			
 
				+	err = time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+
			
 
				+	name = "ChaCha20 65536 bytes"
			
 
				+	options.bytes = 65536
			
 
				+	err = time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+}
			
 
				+
			
 
				+bench_poly1305 :: proc(t: ^testing.T) {
			
 
				+	name    := "Poly1305 64 zero bytes"
			
 
				+	options := &time.Benchmark_Options{
			
 
				+		rounds   = 1_000,
			
 
				+		bytes    = 64,
			
 
				+		setup    = _setup_sized_buf,
			
 
				+		bench    = _benchmark_poly1305,
			
 
				+		teardown = _teardown_sized_buf,
			
 
				+	}
			
 
				+
			
 
				+	err  := time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+
			
 
				+	name = "Poly1305 1024 zero bytes"
			
 
				+	options.bytes = 1024
			
 
				+	err = time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+}
			
 
				+
			
 
				+bench_chacha20poly1305 :: proc(t: ^testing.T) {
			
 
				+	name    := "chacha20poly1305 64 bytes"
			
 
				+	options := &time.Benchmark_Options{
			
 
				+		rounds   = 1_000,
			
 
				+		bytes    = 64,
			
 
				+		setup    = _setup_sized_buf,
			
 
				+		bench    = _benchmark_chacha20poly1305,
			
 
				+		teardown = _teardown_sized_buf,
			
 
				+	}
			
 
				+
			
 
				+	err  := time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+
			
 
				+	name = "chacha20poly1305 1024 bytes"
			
 
				+	options.bytes = 1024
			
 
				+	err = time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+
			
 
				+	name = "chacha20poly1305 65536 bytes"
			
 
				+	options.bytes = 65536
			
 
				+	err = time.benchmark(options, context.allocator)
			
 
				+	expect(t, err == nil, name)
			
 
				+	benchmark_print(name, options)
			
 
				+}
			
 
				+
			
 
				+bench_x25519 :: proc(t: ^testing.T) {
			
 
				+	point := _decode_hex32("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
			
 
				+	scalar := _decode_hex32("cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe")
			
 
				+	out: [x25519.POINT_SIZE]byte = ---
			
 
				+
			
 
				+	iters :: 10000
			
 
				+	start := time.now()
			
 
				+	for i := 0; i < iters; i = i + 1 {
			
 
				+		x25519.scalarmult(out[:], scalar[:], point[:])
			
 
				+	}
			
 
				+	elapsed := time.since(start)
			
 
				+
			
 
				+	log(t, fmt.tprintf("x25519.scalarmult: ~%f us/op", time.duration_microseconds(elapsed) / iters))
			
 
				+}
			
--- a/vendor/OpenGL/constants.odin
+++ b/vendor/OpenGL/constants.odin
@@ -1409,4 +1409,4 @@ TRANSFORM_FEEDBACK_OVERFLOW    :: 0x82EC
 
				 TRANSFORM_FEEDBACK_STREAM_OVERFLOW :: 0x82ED
			
 
				 
			
 
				 // Extensions, extended as necessary
			
 
				-DEVICE_LUID_EXT :: 0x9599;
			
 
				+DEVICE_LUID_EXT :: 0x9599
			
--- a/vendor/OpenGL/wrappers.odin
+++ b/vendor/OpenGL/wrappers.odin
--- a/vendor/glfw/bindings/bindings.odin
+++ b/vendor/glfw/bindings/bindings.odin
@@ -4,6 +4,7 @@ import "core:c"
 
				 import vk "vendor:vulkan"
			
 
				 
			
 
				 when ODIN_OS == "linux"   { foreign import glfw "system:glfw" } // TODO: Add the billion-or-so static libs to link to in linux
			
 
				+when ODIN_OS == "darwin"  { foreign import glfw "system:glfw" }
			
 
				 when ODIN_OS == "windows" { 
			
 
				 	foreign import glfw { 
			
 
				 		"../lib/glfw3_mt.lib",
			
--- a/vendor/portmidi/portmidi.odin
+++ b/vendor/portmidi/portmidi.odin
@@ -3,7 +3,13 @@ package portmidi
 
				 import "core:c"
			
 
				 import "core:strings"
			
 
				 
			
 
				-when ODIN_OS == "windows" { foreign import lib "portmidi.lib" }
			
 
				+when ODIN_OS == "windows" { 
			
 
				+	foreign import lib {
			
 
				+		"portmidi_s.lib",
			
 
				+		"system:Winmm.lib",
			
 
				+		"system:Advapi32.lib",
			
 
				+	}
			
 
				+}
			
 
				 
			
 
				 #assert(size_of(b32) == size_of(c.int))
			
 
				 
			
@@ -15,17 +21,17 @@ Error :: enum c.int {
 
				 	GotData = 1, /**< A "no error" return that also indicates data available */
			
 
				 	HostError = -10000,
			
 
				 	InvalidDeviceId, /** out of range or 
			
 
				-		            * output device when input is requested or 
			
 
				-		            * input device when output is requested or
			
 
				-		            * device is already opened 
			
 
				-		            */
			
 
				+	                   * output device when input is requested or 
			
 
				+	                   * input device when output is requested or
			
 
				+	                   * device is already opened 
			
 
				+	                   */
			
 
				 	InsufficientMemory,
			
 
				 	BufferTooSmall,
			
 
				 	BufferOverflow,
			
 
				 	BadPtr, /* Stream parameter is nil or
			
 
				-		     * stream is not opened or
			
 
				-		     * stream is output when input is required or
			
 
				-		     * stream is input when output is required */
			
 
				+	         * stream is not opened or
			
 
				+	         * stream is output when input is required or
			
 
				+	         * stream is input when output is required */
			
 
				 	BadData, /** illegal midi data, e.g. missing EOX */
			
 
				 	InternalError,
			
 
				 	BufferMaxSize, /** buffer is already as large as it can be */
			
@@ -38,30 +44,30 @@ Stream :: distinct rawptr
 
				 @(default_calling_convention="c", link_prefix="Pm_")
			
 
				 foreign lib {
			
 
				 	/**
			
 
				-	    Initialize() is the library initialisation function - call this before
			
 
				-	    using the library.
			
 
				+		Initialize() is the library initialisation function - call this before
			
 
				+		using the library.
			
 
				 	*/
			
 
				 	Initialize :: proc() -> Error ---
			
 
				 	
			
 
				 	/**
			
 
				-	    Terminate() is the library termination function - call this after
			
 
				-	    using the library.
			
 
				+		Terminate() is the library termination function - call this after
			
 
				+		using the library.
			
 
				 	*/
			
 
				 	Terminate  :: proc() -> Error ---
			
 
				 	
			
 
				 	/**
			
 
				-	    Test whether stream has a pending host error. Normally, the client finds
			
 
				-	    out about errors through returned error codes, but some errors can occur
			
 
				-	    asynchronously where the client does not
			
 
				-	    explicitly call a function, and therefore cannot receive an error code.
			
 
				-	    The client can test for a pending error using HasHostError(). If true,
			
 
				-	    the error can be accessed and cleared by calling GetErrorText(). 
			
 
				-	    Errors are also cleared by calling other functions that can return
			
 
				-	    errors, e.g. OpenInput(), OpenOutput(), Read(), Write(). The
			
 
				-	    client does not need to call HasHostError(). Any pending error will be
			
 
				-	    reported the next time the client performs an explicit function call on 
			
 
				-	    the stream, e.g. an input or output operation. Until the error is cleared,
			
 
				-	    no new error codes will be obtained, even for a different stream.
			
 
				+		Test whether stream has a pending host error. Normally, the client finds
			
 
				+		out about errors through returned error codes, but some errors can occur
			
 
				+		asynchronously where the client does not
			
 
				+		explicitly call a function, and therefore cannot receive an error code.
			
 
				+		The client can test for a pending error using HasHostError(). If true,
			
 
				+		the error can be accessed and cleared by calling GetErrorText(). 
			
 
				+		Errors are also cleared by calling other functions that can return
			
 
				+		errors, e.g. OpenInput(), OpenOutput(), Read(), Write(). The
			
 
				+		client does not need to call HasHostError(). Any pending error will be
			
 
				+		reported the next time the client performs an explicit function call on 
			
 
				+		the stream, e.g. an input or output operation. Until the error is cleared,
			
 
				+		no new error codes will be obtained, even for a different stream.
			
 
				 	*/
			
 
				 	HasHostError :: proc(stream: Stream) -> b32 ---	
			
 
				 }
			
@@ -103,8 +109,8 @@ DeviceInfo :: struct {
 
				 	structVersion: c.int,   /**< this internal structure version */ 
			
 
				 	interf:        cstring, /**< underlying MIDI API, e.g. MMSystem or DirectX */
			
 
				 	name:          cstring, /**< device name, e.g. USB MidiSport 1x1 */
			
 
				-	input:         c.int,   /**< true iff input is available */
			
 
				-	output:        c.int,   /**< true iff output is available */
			
 
				+	input:         b32,     /**< true iff input is available */
			
 
				+	output:        b32,     /**< true iff output is available */
			
 
				 	opened:        b32,     /**< used by generic PortMidi code to do error checking on arguments */
			
 
				 }
			
 
				 
			
@@ -132,79 +138,78 @@ Before :: #force_inline proc "c" (t1, t2: Timestamp) -> b32 {
 
				 @(default_calling_convention="c", link_prefix="Pm_")
			
 
				 foreign lib {
			
 
				 	/**
			
 
				-	    GetDeviceInfo() returns a pointer to a DeviceInfo structure
			
 
				-	    referring to the device specified by id.
			
 
				-	    If id is out of range the function returns nil.
			
 
				+		GetDeviceInfo() returns a pointer to a DeviceInfo structure
			
 
				+		referring to the device specified by id.
			
 
				+		If id is out of range the function returns nil.
			
 
				 
			
 
				-	    The returned structure is owned by the PortMidi implementation and must
			
 
				-	    not be manipulated or freed. The pointer is guaranteed to be valid
			
 
				-	    between calls to Initialize() and Terminate().
			
 
				+		The returned structure is owned by the PortMidi implementation and must
			
 
				+		not be manipulated or freed. The pointer is guaranteed to be valid
			
 
				+		between calls to Initialize() and Terminate().
			
 
				 	*/
			
 
				-	GetDeviceInfo :: proc(id: DeviceID) -> DeviceInfo ---
			
 
				+	GetDeviceInfo :: proc(id: DeviceID) -> ^DeviceInfo ---
			
 
				 	
			
 
				 	/**
			
 
				-	    OpenInput() and OpenOutput() open devices.
			
 
				-
			
 
				-	    stream is the address of a Stream pointer which will receive
			
 
				-	    a pointer to the newly opened stream.
			
 
				-
			
 
				-	    inputDevice is the id of the device used for input (see DeviceID above).
			
 
				-
			
 
				-	    inputDriverInfo is a pointer to an optional driver specific data structure
			
 
				-	    containing additional information for device setup or handle processing.
			
 
				-	    inputDriverInfo is never required for correct operation. If not used
			
 
				-	    inputDriverInfo should be nil.
			
 
				-
			
 
				-	    outputDevice is the id of the device used for output (see DeviceID above.)
			
 
				-
			
 
				-	    outputDriverInfo is a pointer to an optional driver specific data structure
			
 
				-	    containing additional information for device setup or handle processing.
			
 
				-	    outputDriverInfo is never required for correct operation. If not used
			
 
				-	    outputDriverInfo should be nil.
			
 
				-
			
 
				-	    For input, the buffersize specifies the number of input events to be 
			
 
				-	    buffered waiting to be read using Read(). For output, buffersize 
			
 
				-	    specifies the number of output events to be buffered waiting for output. 
			
 
				-	    (In some cases -- see below -- PortMidi does not buffer output at all
			
 
				-	    and merely passes data to a lower-level API, in which case buffersize
			
 
				-	    is ignored.)
			
 
				-	    
			
 
				-	    latency is the delay in milliseconds applied to timestamps to determine 
			
 
				-	    when the output should actually occur. (If latency is < 0, 0 is assumed.) 
			
 
				-	    If latency is zero, timestamps are ignored and all output is delivered
			
 
				-	    immediately. If latency is greater than zero, output is delayed until the
			
 
				-	    message timestamp plus the latency. (NOTE: the time is measured relative 
			
 
				-	    to the time source indicated by time_proc. Timestamps are absolute,
			
 
				-	    not relative delays or offsets.) In some cases, PortMidi can obtain
			
 
				-	    better timing than your application by passing timestamps along to the
			
 
				-	    device driver or hardware. Latency may also help you to synchronize midi
			
 
				-	    data to audio data by matching midi latency to the audio buffer latency.
			
 
				-
			
 
				-	    time_proc is a pointer to a procedure that returns time in milliseconds. It
			
 
				-	    may be nil, in which case a default millisecond timebase (PortTime) is 
			
 
				-	    used. If the application wants to use PortTime, it should start the timer
			
 
				-	    (call Pt_Start) before calling OpenInput or OpenOutput. If the
			
 
				-	    application tries to start the timer *after* OpenInput or OpenOutput,
			
 
				-	    it may get a ptAlreadyStarted error from Pt_Start, and the application's
			
 
				-	    preferred time resolution and callback function will be ignored.
			
 
				-	    time_proc result values are appended to incoming MIDI data, and time_proc
			
 
				-	    times are used to schedule outgoing MIDI data (when latency is non-zero).
			
 
				-
			
 
				-	    time_info is a pointer passed to time_proc.
			
 
				-
			
 
				-	    Example: If I provide a timestamp of 5000, latency is 1, and time_proc
			
 
				-	    returns 4990, then the desired output time will be when time_proc returns
			
 
				-	    timestamp+latency = 5001. This will be 5001-4990 = 11ms from now.
			
 
				-
			
 
				-	    return value:
			
 
				-	    Upon success Open() returns NoError and places a pointer to a
			
 
				-	    valid Stream in the stream argument.
			
 
				-	    If a call to Open() fails a nonzero error code is returned (see
			
 
				-	    PMError above) and the value of port is invalid.
			
 
				-
			
 
				-	    Any stream that is successfully opened should eventually be closed
			
 
				-	    by calling Close().
			
 
				-
			
 
				+		OpenInput() and OpenOutput() open devices.
			
 
				+
			
 
				+		stream is the address of a Stream pointer which will receive
			
 
				+		a pointer to the newly opened stream.
			
 
				+
			
 
				+		inputDevice is the id of the device used for input (see DeviceID above).
			
 
				+
			
 
				+		inputDriverInfo is a pointer to an optional driver specific data structure
			
 
				+		containing additional information for device setup or handle processing.
			
 
				+		inputDriverInfo is never required for correct operation. If not used
			
 
				+		inputDriverInfo should be nil.
			
 
				+
			
 
				+		outputDevice is the id of the device used for output (see DeviceID above.)
			
 
				+
			
 
				+		outputDriverInfo is a pointer to an optional driver specific data structure
			
 
				+		containing additional information for device setup or handle processing.
			
 
				+		outputDriverInfo is never required for correct operation. If not used
			
 
				+		outputDriverInfo should be nil.
			
 
				+
			
 
				+		For input, the buffersize specifies the number of input events to be 
			
 
				+		buffered waiting to be read using Read(). For output, buffersize 
			
 
				+		specifies the number of output events to be buffered waiting for output. 
			
 
				+		(In some cases -- see below -- PortMidi does not buffer output at all
			
 
				+		and merely passes data to a lower-level API, in which case buffersize
			
 
				+		is ignored.)
			
 
				+
			
 
				+		latency is the delay in milliseconds applied to timestamps to determine 
			
 
				+		when the output should actually occur. (If latency is < 0, 0 is assumed.) 
			
 
				+		If latency is zero, timestamps are ignored and all output is delivered
			
 
				+		immediately. If latency is greater than zero, output is delayed until the
			
 
				+		message timestamp plus the latency. (NOTE: the time is measured relative 
			
 
				+		to the time source indicated by time_proc. Timestamps are absolute,
			
 
				+		not relative delays or offsets.) In some cases, PortMidi can obtain
			
 
				+		better timing than your application by passing timestamps along to the
			
 
				+		device driver or hardware. Latency may also help you to synchronize midi
			
 
				+		data to audio data by matching midi latency to the audio buffer latency.
			
 
				+
			
 
				+		time_proc is a pointer to a procedure that returns time in milliseconds. It
			
 
				+		may be nil, in which case a default millisecond timebase (PortTime) is 
			
 
				+		used. If the application wants to use PortTime, it should start the timer
			
 
				+		(call Pt_Start) before calling OpenInput or OpenOutput. If the
			
 
				+		application tries to start the timer *after* OpenInput or OpenOutput,
			
 
				+		it may get a ptAlreadyStarted error from Pt_Start, and the application's
			
 
				+		preferred time resolution and callback function will be ignored.
			
 
				+		time_proc result values are appended to incoming MIDI data, and time_proc
			
 
				+		times are used to schedule outgoing MIDI data (when latency is non-zero).
			
 
				+
			
 
				+		time_info is a pointer passed to time_proc.
			
 
				+
			
 
				+		Example: If I provide a timestamp of 5000, latency is 1, and time_proc
			
 
				+		returns 4990, then the desired output time will be when time_proc returns
			
 
				+		timestamp+latency = 5001. This will be 5001-4990 = 11ms from now.
			
 
				+
			
 
				+		return value:
			
 
				+		Upon success Open() returns NoError and places a pointer to a
			
 
				+		valid Stream in the stream argument.
			
 
				+		If a call to Open() fails a nonzero error code is returned (see
			
 
				+		PMError above) and the value of port is invalid.
			
 
				+
			
 
				+		Any stream that is successfully opened should eventually be closed
			
 
				+		by calling Close().
			
 
				 	*/
			
 
				 	OpenInput :: proc(stream: ^Stream,
			
 
				 	                  inputDevice: DeviceID,
			
@@ -373,71 +378,80 @@ MessageData2  :: #force_inline proc "c" (msg: Message) -> c.int {
 
				 	return c.int((msg >> 16) & 0xFF)
			
 
				 }
			
 
				 
			
 
				+MessageCompose :: MessageMake
			
 
				+MessageDecompose :: #force_inline proc "c" (msg: Message) -> (status, data1, data2: c.int) {
			
 
				+	status = c.int(msg & 0xFF)
			
 
				+	data1  = c.int((msg >> 8) & 0xFF)
			
 
				+	data2  = c.int((msg >> 16) & 0xFF)
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+
			
 
				 Message :: distinct i32
			
 
				 /**
			
 
				-   All midi data comes in the form of Event structures. A sysex
			
 
				-   message is encoded as a sequence of Event structures, with each
			
 
				-   structure carrying 4 bytes of the message, i.e. only the first
			
 
				-   Event carries the status byte.
			
 
				-
			
 
				-   Note that MIDI allows nested messages: the so-called "real-time" MIDI 
			
 
				-   messages can be inserted into the MIDI byte stream at any location, 
			
 
				-   including within a sysex message. MIDI real-time messages are one-byte
			
 
				-   messages used mainly for timing (see the MIDI spec). PortMidi retains 
			
 
				-   the order of non-real-time MIDI messages on both input and output, but 
			
 
				-   it does not specify exactly how real-time messages are processed. This
			
 
				-   is particulary problematic for MIDI input, because the input parser 
			
 
				-   must either prepare to buffer an unlimited number of sysex message 
			
 
				-   bytes or to buffer an unlimited number of real-time messages that 
			
 
				-   arrive embedded in a long sysex message. To simplify things, the input
			
 
				-   parser is allowed to pass real-time MIDI messages embedded within a 
			
 
				-   sysex message, and it is up to the client to detect, process, and 
			
 
				-   remove these messages as they arrive.
			
 
				-
			
 
				-   When receiving sysex messages, the sysex message is terminated
			
 
				-   by either an EOX status byte (anywhere in the 4 byte messages) or
			
 
				-   by a non-real-time status byte in the low order byte of the message.
			
 
				-   If you get a non-real-time status byte but there was no EOX byte, it 
			
 
				-   means the sysex message was somehow truncated. This is not
			
 
				-   considered an error; e.g., a missing EOX can result from the user
			
 
				-   disconnecting a MIDI cable during sysex transmission.
			
 
				-
			
 
				-   A real-time message can occur within a sysex message. A real-time 
			
 
				-   message will always occupy a full Event with the status byte in 
			
 
				-   the low-order byte of the Event message field. (This implies that
			
 
				-   the byte-order of sysex bytes and real-time message bytes may not
			
 
				-   be preserved -- for example, if a real-time message arrives after
			
 
				-   3 bytes of a sysex message, the real-time message will be delivered
			
 
				-   first. The first word of the sysex message will be delivered only
			
 
				-   after the 4th byte arrives, filling the 4-byte Event message field.
			
 
				-   
			
 
				-   The timestamp field is observed when the output port is opened with
			
 
				-   a non-zero latency. A timestamp of zero means "use the current time",
			
 
				-   which in turn means to deliver the message with a delay of
			
 
				-   latency (the latency parameter used when opening the output port.)
			
 
				-   Do not expect PortMidi to sort data according to timestamps -- 
			
 
				-   messages should be sent in the correct order, and timestamps MUST 
			
 
				-   be non-decreasing. See also "Example" for OpenOutput() above.
			
 
				-
			
 
				-   A sysex message will generally fill many Event structures. On 
			
 
				-   output to a Stream with non-zero latency, the first timestamp
			
 
				-   on sysex message data will determine the time to begin sending the 
			
 
				-   message. PortMidi implementations may ignore timestamps for the 
			
 
				-   remainder of the sysex message. 
			
 
				-   
			
 
				-   On input, the timestamp ideally denotes the arrival time of the 
			
 
				-   status byte of the message. The first timestamp on sysex message 
			
 
				-   data will be valid. Subsequent timestamps may denote 
			
 
				-   when message bytes were actually received, or they may be simply 
			
 
				-   copies of the first timestamp.
			
 
				-
			
 
				-   Timestamps for nested messages: If a real-time message arrives in 
			
 
				-   the middle of some other message, it is enqueued immediately with 
			
 
				-   the timestamp corresponding to its arrival time. The interrupted 
			
 
				-   non-real-time message or 4-byte packet of sysex data will be enqueued 
			
 
				-   later. The timestamp of interrupted data will be equal to that of
			
 
				-   the interrupting real-time message to insure that timestamps are
			
 
				-   non-decreasing.
			
 
				+	All midi data comes in the form of Event structures. A sysex
			
 
				+	message is encoded as a sequence of Event structures, with each
			
 
				+	structure carrying 4 bytes of the message, i.e. only the first
			
 
				+	Event carries the status byte.
			
 
				+
			
 
				+	Note that MIDI allows nested messages: the so-called "real-time" MIDI 
			
 
				+	messages can be inserted into the MIDI byte stream at any location, 
			
 
				+	including within a sysex message. MIDI real-time messages are one-byte
			
 
				+	messages used mainly for timing (see the MIDI spec). PortMidi retains 
			
 
				+	the order of non-real-time MIDI messages on both input and output, but 
			
 
				+	it does not specify exactly how real-time messages are processed. This
			
 
				+	is particulary problematic for MIDI input, because the input parser 
			
 
				+	must either prepare to buffer an unlimited number of sysex message 
			
 
				+	bytes or to buffer an unlimited number of real-time messages that 
			
 
				+	arrive embedded in a long sysex message. To simplify things, the input
			
 
				+	parser is allowed to pass real-time MIDI messages embedded within a 
			
 
				+	sysex message, and it is up to the client to detect, process, and 
			
 
				+	remove these messages as they arrive.
			
 
				+
			
 
				+	When receiving sysex messages, the sysex message is terminated
			
 
				+	by either an EOX status byte (anywhere in the 4 byte messages) or
			
 
				+	by a non-real-time status byte in the low order byte of the message.
			
 
				+	If you get a non-real-time status byte but there was no EOX byte, it 
			
 
				+	means the sysex message was somehow truncated. This is not
			
 
				+	considered an error; e.g., a missing EOX can result from the user
			
 
				+	disconnecting a MIDI cable during sysex transmission.
			
 
				+
			
 
				+	A real-time message can occur within a sysex message. A real-time 
			
 
				+	message will always occupy a full Event with the status byte in 
			
 
				+	the low-order byte of the Event message field. (This implies that
			
 
				+	the byte-order of sysex bytes and real-time message bytes may not
			
 
				+	be preserved -- for example, if a real-time message arrives after
			
 
				+	3 bytes of a sysex message, the real-time message will be delivered
			
 
				+	first. The first word of the sysex message will be delivered only
			
 
				+	after the 4th byte arrives, filling the 4-byte Event message field.
			
 
				+
			
 
				+	The timestamp field is observed when the output port is opened with
			
 
				+	a non-zero latency. A timestamp of zero means "use the current time",
			
 
				+	which in turn means to deliver the message with a delay of
			
 
				+	latency (the latency parameter used when opening the output port.)
			
 
				+	Do not expect PortMidi to sort data according to timestamps -- 
			
 
				+	messages should be sent in the correct order, and timestamps MUST 
			
 
				+	be non-decreasing. See also "Example" for OpenOutput() above.
			
 
				+
			
 
				+	A sysex message will generally fill many Event structures. On 
			
 
				+	output to a Stream with non-zero latency, the first timestamp
			
 
				+	on sysex message data will determine the time to begin sending the 
			
 
				+	message. PortMidi implementations may ignore timestamps for the 
			
 
				+	remainder of the sysex message. 
			
 
				+
			
 
				+	On input, the timestamp ideally denotes the arrival time of the 
			
 
				+	status byte of the message. The first timestamp on sysex message 
			
 
				+	data will be valid. Subsequent timestamps may denote 
			
 
				+	when message bytes were actually received, or they may be simply 
			
 
				+	copies of the first timestamp.
			
 
				+
			
 
				+	Timestamps for nested messages: If a real-time message arrives in 
			
 
				+	the middle of some other message, it is enqueued immediately with 
			
 
				+	the timestamp corresponding to its arrival time. The interrupted 
			
 
				+	non-real-time message or 4-byte packet of sysex data will be enqueued 
			
 
				+	later. The timestamp of interrupted data will be equal to that of
			
 
				+	the interrupting real-time message to insure that timestamps are
			
 
				+	non-decreasing.
			
 
				  */
			
 
				 Event :: struct {
			
 
				 	message:   Message,
			
@@ -480,18 +494,18 @@ foreign lib {
 
				 	
			
 
				 	/** 
			
 
				 		Write() writes midi data from a buffer. This may contain:
			
 
				-		    - short messages 
			
 
				+			- short messages 
			
 
				 		or 
			
 
				-		    - sysex messages that are converted into a sequence of Event
			
 
				-		      structures, e.g. sending data from a file or forwarding them
			
 
				-		      from midi input.
			
 
				+			- sysex messages that are converted into a sequence of Event
			
 
				+			  structures, e.g. sending data from a file or forwarding them
			
 
				+			  from midi input.
			
 
				 
			
 
				 		Use WriteSysEx() to write a sysex message stored as a contiguous 
			
 
				 		array of bytes.
			
 
				 
			
 
				 		Sysex data may contain embedded real-time messages.
			
 
				 	*/
			
 
				-	Write      :: proc(stream: Stream, buffer: [^]Event, length: i32) -> Error ---
			
 
				+	Write :: proc(stream: Stream, buffer: [^]Event, length: i32) -> Error ---
			
 
				 	
			
 
				 	/**
			
 
				 		WriteShort() writes a timestamped non-system-exclusive midi message.
			
--- a/vendor/portmidi/portmidi_s.lib
+++ b/vendor/portmidi/portmidi_s.lib
--- a/vendor/raylib/raylib.odin
+++ b/vendor/raylib/raylib.odin
@@ -1077,6 +1077,7 @@ foreign lib {
 
				 	GetMouseX             :: proc() -> c.int ---                      // Returns mouse position X
			
 
				 	GetMouseY             :: proc() -> c.int ---                      // Returns mouse position Y
			
 
				 	GetMousePosition      :: proc() -> Vector2 ---                    // Returns mouse position XY
			
 
				+	GetMouseDelta         :: proc() -> Vector2 ---                    // Returns mouse delta XY
			
 
				 	SetMousePosition      :: proc(x, y: c.int) ---                    // Set mouse position XY
			
 
				 	SetMouseOffset        :: proc(offsetX, offsetY: c.int) ---        // Set mouse offset
			
 
				 	SetMouseScale         :: proc(scaleX, scaleY: f32) ---            // Set mouse scaling
			
@@ -1568,4 +1569,4 @@ MemAllocatorProc :: proc(allocator_data: rawptr, mode: mem.Allocator_Mode,
 
				 		return nil, .Mode_Not_Implemented
			
 
				 	}	
			
 
				 	return nil, .Mode_Not_Implemented
			
 
				-}
			
 
				+}