3 роки тому · bc775afccb
--- a/core/crypto/_fiat/README.md
+++ b/core/crypto/_fiat/README.md
@@ -0,0 +1,35 @@
 
															+# fiat
														
 
															+
														
 
															+This package contains low level arithmetic required to implement certain
														
 
															+cryptographic primitives, ported from the [fiat-crypto project][1]
														
 
															+along with some higher-level helpers.
														
 
															+
														
 
															+## Notes
														
 
															+
														
 
															+fiat-crypto gives the choice of 3 licenses for derived works.  The 1-Clause
														
 
															+BSD license is chosen as it is compatible with Odin's existing licensing.
														
 
															+
														
 
															+The routines are intended to be timing-safe, as long as the underlying
														
 
															+integer arithmetic is constant time.  This is true on most systems commonly
														
 
															+used today, with the notable exception of WASM.
														
 
															+
														
 
															+While fiat-crypto provides both output targeting both 32-bit and 64-bit
														
 
															+architectures, only the 64-bit versions were used, as 32-bit architectures
														
 
															+are becoming increasingly uncommon and irrelevant.
														
 
															+
														
 
															+With the current Odin syntax, the Go output is trivially ported in most
														
 
															+cases and was used as the basis of the port.
														
 
															+
														
 
															+In the future, it would be better to auto-generate Odin either directly
														
 
															+by adding an appropriate code-gen backend written in Coq, or perhaps by
														
 
															+parsing the JSON output.
														
 
															+
														
 
															+As this is a port rather than autogenerated output, none of fiat-crypto's
														
 
															+formal verification guarantees apply, unless it is possible to prove binary
														
 
															+equivalence.
														
 
															+
														
 
															+For the most part, alterations to the base fiat-crypto generated code was
														
 
															+kept to a minimum, to aid auditability.  This results in a somewhat
														
 
															+ideosyncratic style, and in some cases minor performance penalties.
														
 
															+
														
 
															+[1]: https://github.com/mit-plv/fiat-crypto
														
--- a/core/crypto/_fiat/fiat.odin
+++ b/core/crypto/_fiat/fiat.odin
@@ -0,0 +1,24 @@
 
															+package fiat
														
 
															+
														
 
															+// This package provides various helpers and types common to all of the
														
 
															+// fiat-crypto derived backends.
														
 
															+
														
 
															+// This code only works on a two's complement system.
														
 
															+#assert((-1 & 3) == 3)
														
 
															+
														
 
															+u1 :: distinct u8
														
 
															+i1 :: distinct i8
														
 
															+
														
 
															+cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
														
 
															+	x1 := (u64(arg1) * 0xffffffffffffffff)
														
 
															+	x2 := ((x1 & arg3) | ((~x1) & arg2))
														
 
															+	out1 = x2
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
														
 
															+	x1 := (u32(arg1) * 0xffffffff)
														
 
															+	x2 := ((x1 & arg3) | ((~x1) & arg2))
														
 
															+	out1 = x2
														
 
															+	return
														
 
															+}
														
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -0,0 +1,138 @@
 
															+package field_curve25519
														
 
															+
														
 
															+import "core:crypto"
														
 
															+import "core:mem"
														
 
															+
														
 
															+fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
														
 
															+	return transmute(^Loose_Field_Element)(arg1)
														
 
															+}
														
 
															+
														
 
															+fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
														
 
															+	return transmute(^Tight_Field_Element)(arg1)
														
 
															+}
														
 
															+
														
 
															+fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
														
 
															+	// Ignore the unused bit by copying the input and masking the bit off
														
 
															+	// prior to deserialization.
														
 
															+	tmp1: [32]byte = ---
														
 
															+	copy_slice(tmp1[:], arg1[:])
														
 
															+	tmp1[31] &= 127
														
 
															+
														
 
															+	_fe_from_bytes(out1, &tmp1)
														
 
															+
														
 
															+	mem.zero_explicit(&tmp1, size_of(tmp1))
														
 
															+}
														
 
															+
														
 
															+fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
														
 
															+	tmp2: [32]byte = ---
														
 
															+
														
 
															+	fe_to_bytes(&tmp2, arg2)
														
 
															+	ret := fe_equal_bytes(arg1, &tmp2)
														
 
															+
														
 
															+	mem.zero_explicit(&tmp2, size_of(tmp2))
														
 
															+
														
 
															+	return ret
														
 
															+}
														
 
															+
														
 
															+fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byte) -> int {
														
 
															+	tmp1: [32]byte = ---
														
 
															+
														
 
															+	fe_to_bytes(&tmp1, arg1)
														
 
															+
														
 
															+	ret := crypto.compare_constant_time(tmp1[:], arg2[:])
														
 
															+
														
 
															+	mem.zero_explicit(&tmp1, size_of(tmp1))
														
 
															+
														
 
															+	return ret
														
 
															+}
														
 
															+
														
 
															+fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
														
 
															+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
														
 
															+	if arg2 == 0 {
														
 
															+		fe_one(out1)
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	fe_carry_square(out1, arg1)
														
 
															+	for _ in 1..<arg2 {
														
 
															+		fe_carry_square(out1, fe_relax_cast(out1))
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
														
 
															+	fe_opp(fe_relax_cast(out1), arg1)
														
 
															+	fe_carry(out1, fe_relax_cast(out1))
														
 
															+}
														
 
															+
														
 
															+fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
														
 
															+	// Inverse square root taken from Monocypher.
														
 
															+
														
 
															+	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
														
 
															+
														
 
															+	// t0 = x^((p-5)/8)
														
 
															+	// Can be achieved with a simple double & add ladder,
														
 
															+	// but it would be slower.
														
 
															+	fe_carry_pow2k(&tmp1, arg1, 1)
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
														
 
															+	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
														
 
															+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 5)
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 10)
														
 
															+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 20)
														
 
															+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 10)
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 50)
														
 
															+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 100)
														
 
															+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
														
 
															+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
														
 
															+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
														
 
															+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
														
 
															+
														
 
															+	// quartic = x^((p-1)/4)
														
 
															+	quartic := &tmp2
														
 
															+	fe_carry_square(quartic, fe_relax_cast(&tmp1))
														
 
															+	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
														
 
															+
														
 
															+	// Serialize quartic once to save on repeated serialization/sanitization.
														
 
															+	quartic_buf: [32]byte = ---
														
 
															+	fe_to_bytes(&quartic_buf, quartic)
														
 
															+	check := &tmp3
														
 
															+
														
 
															+	fe_one(check)
														
 
															+	p1 := fe_equal_bytes(check, &quartic_buf)
														
 
															+	fe_carry_opp(check, check)
														
 
															+	m1 := fe_equal_bytes(check, &quartic_buf)
														
 
															+	fe_carry_opp(check, &SQRT_M1)
														
 
															+	ms := fe_equal_bytes(check, &quartic_buf)
														
 
															+
														
 
															+	// if quartic == -1 or sqrt(-1)
														
 
															+	// then  isr = x^((p-1)/4) * sqrt(-1)
														
 
															+	// else  isr = x^((p-1)/4)
														
 
															+	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
														
 
															+	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
														
 
															+
														
 
															+	mem.zero_explicit(&tmp1, size_of(tmp1))
														
 
															+	mem.zero_explicit(&tmp2, size_of(tmp2))
														
 
															+	mem.zero_explicit(&tmp3, size_of(tmp3))
														
 
															+	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
														
 
															+
														
 
															+	return p1 | m1
														
 
															+}
														
 
															+
														
 
															+fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	tmp1: Tight_Field_Element
														
 
															+
														
 
															+	fe_carry_square(&tmp1, arg1)
														
 
															+	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
														
 
															+	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
														
 
															+	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
														
 
															+
														
 
															+	mem.zero_explicit(&tmp1, size_of(tmp1))
														
 
															+}
														
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -0,0 +1,616 @@
 
															+// The BSD 1-Clause License (BSD-1-Clause)
														
 
															+//
														
 
															+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
														
 
															+// All rights reserved.
														
 
															+//
														
 
															+// Redistribution and use in source and binary forms, with or without
														
 
															+// modification, are permitted provided that the following conditions are
														
 
															+// met:
														
 
															+//
														
 
															+//     1. Redistributions of source code must retain the above copyright
														
 
															+//        notice, this list of conditions and the following disclaimer.
														
 
															+//
														
 
															+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
														
 
															+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
														
 
															+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
														
 
															+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
														
 
															+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
														
 
															+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
														
 
															+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
														
 
															+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
														
 
															+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
														
 
															+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+
														
 
															+package field_curve25519
														
 
															+
														
 
															+// The file provides arithmetic on the field Z/(2^255-19) using
														
 
															+// unsaturated 64-bit integer arithmetic.  It is derived primarily
														
 
															+// from the machine generated Golang output from the fiat-crypto project.
														
 
															+//
														
 
															+// While the base implementation is provably correct, this implementation
														
 
															+// makes no such claims as the port and optimizations were done by hand.
														
 
															+// At some point, it may be worth adding support to fiat-crypto for
														
 
															+// generating Odin output.
														
 
															+//
														
 
															+// TODO:
														
 
															+//  * When fiat-crypto supports it, using a saturated 64-bit limbs
														
 
															+//    instead of 51-bit limbs will be faster, though the gains are
														
 
															+//    minimal unless adcx/adox/mulx are used.
														
 
															+
														
 
															+import fiat "core:crypto/_fiat"
														
 
															+import "core:math/bits"
														
 
															+
														
 
															+Loose_Field_Element :: distinct [5]u64
														
 
															+Tight_Field_Element :: distinct [5]u64
														
 
															+
														
 
															+SQRT_M1 := Tight_Field_Element{
														
 
															+	1718705420411056,
														
 
															+	234908883556509,
														
 
															+	2233514472574048,
														
 
															+	2117202627021982,
														
 
															+	765476049583133,
														
 
															+}
														
 
															+
														
 
															+_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((u64(arg1) + arg2) + arg3)
														
 
															+	x2 := (x1 & 0x7ffffffffffff)
														
 
															+	x3 := fiat.u1((x1 >> 51))
														
 
															+	out1 = x2
														
 
															+	out2 = x3
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
														
 
															+	x2 := fiat.i1((x1 >> 51))
														
 
															+	x3 := (u64(x1) & 0x7ffffffffffff)
														
 
															+	out1 = x3
														
 
															+	out2 = (0x0 - fiat.u1(x2))
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
														
 
															+	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
														
 
															+	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
														
 
															+	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
														
 
															+	x8, x7 := bits.mul_u64(arg1[4], (arg2[1] * 0x13))
														
 
															+	x10, x9 := bits.mul_u64(arg1[3], (arg2[4] * 0x13))
														
 
															+	x12, x11 := bits.mul_u64(arg1[3], (arg2[3] * 0x13))
														
 
															+	x14, x13 := bits.mul_u64(arg1[3], (arg2[2] * 0x13))
														
 
															+	x16, x15 := bits.mul_u64(arg1[2], (arg2[4] * 0x13))
														
 
															+	x18, x17 := bits.mul_u64(arg1[2], (arg2[3] * 0x13))
														
 
															+	x20, x19 := bits.mul_u64(arg1[1], (arg2[4] * 0x13))
														
 
															+	x22, x21 := bits.mul_u64(arg1[4], arg2[0])
														
 
															+	x24, x23 := bits.mul_u64(arg1[3], arg2[1])
														
 
															+	x26, x25 := bits.mul_u64(arg1[3], arg2[0])
														
 
															+	x28, x27 := bits.mul_u64(arg1[2], arg2[2])
														
 
															+	x30, x29 := bits.mul_u64(arg1[2], arg2[1])
														
 
															+	x32, x31 := bits.mul_u64(arg1[2], arg2[0])
														
 
															+	x34, x33 := bits.mul_u64(arg1[1], arg2[3])
														
 
															+	x36, x35 := bits.mul_u64(arg1[1], arg2[2])
														
 
															+	x38, x37 := bits.mul_u64(arg1[1], arg2[1])
														
 
															+	x40, x39 := bits.mul_u64(arg1[1], arg2[0])
														
 
															+	x42, x41 := bits.mul_u64(arg1[0], arg2[4])
														
 
															+	x44, x43 := bits.mul_u64(arg1[0], arg2[3])
														
 
															+	x46, x45 := bits.mul_u64(arg1[0], arg2[2])
														
 
															+	x48, x47 := bits.mul_u64(arg1[0], arg2[1])
														
 
															+	x50, x49 := bits.mul_u64(arg1[0], arg2[0])
														
 
															+	x51, x52 := bits.add_u64(x13, x7, u64(0x0))
														
 
															+	x53, _ := bits.add_u64(x14, x8, u64(fiat.u1(x52)))
														
 
															+	x55, x56 := bits.add_u64(x17, x51, u64(0x0))
														
 
															+	x57, _ := bits.add_u64(x18, x53, u64(fiat.u1(x56)))
														
 
															+	x59, x60 := bits.add_u64(x19, x55, u64(0x0))
														
 
															+	x61, _ := bits.add_u64(x20, x57, u64(fiat.u1(x60)))
														
 
															+	x63, x64 := bits.add_u64(x49, x59, u64(0x0))
														
 
															+	x65, _ := bits.add_u64(x50, x61, u64(fiat.u1(x64)))
														
 
															+	x67 := ((x63 >> 51) | ((x65 << 13) & 0xffffffffffffffff))
														
 
															+	x68 := (x63 & 0x7ffffffffffff)
														
 
															+	x69, x70 := bits.add_u64(x23, x21, u64(0x0))
														
 
															+	x71, _ := bits.add_u64(x24, x22, u64(fiat.u1(x70)))
														
 
															+	x73, x74 := bits.add_u64(x27, x69, u64(0x0))
														
 
															+	x75, _ := bits.add_u64(x28, x71, u64(fiat.u1(x74)))
														
 
															+	x77, x78 := bits.add_u64(x33, x73, u64(0x0))
														
 
															+	x79, _ := bits.add_u64(x34, x75, u64(fiat.u1(x78)))
														
 
															+	x81, x82 := bits.add_u64(x41, x77, u64(0x0))
														
 
															+	x83, _ := bits.add_u64(x42, x79, u64(fiat.u1(x82)))
														
 
															+	x85, x86 := bits.add_u64(x25, x1, u64(0x0))
														
 
															+	x87, _ := bits.add_u64(x26, x2, u64(fiat.u1(x86)))
														
 
															+	x89, x90 := bits.add_u64(x29, x85, u64(0x0))
														
 
															+	x91, _ := bits.add_u64(x30, x87, u64(fiat.u1(x90)))
														
 
															+	x93, x94 := bits.add_u64(x35, x89, u64(0x0))
														
 
															+	x95, _ := bits.add_u64(x36, x91, u64(fiat.u1(x94)))
														
 
															+	x97, x98 := bits.add_u64(x43, x93, u64(0x0))
														
 
															+	x99, _ := bits.add_u64(x44, x95, u64(fiat.u1(x98)))
														
 
															+	x101, x102 := bits.add_u64(x9, x3, u64(0x0))
														
 
															+	x103, _ := bits.add_u64(x10, x4, u64(fiat.u1(x102)))
														
 
															+	x105, x106 := bits.add_u64(x31, x101, u64(0x0))
														
 
															+	x107, _ := bits.add_u64(x32, x103, u64(fiat.u1(x106)))
														
 
															+	x109, x110 := bits.add_u64(x37, x105, u64(0x0))
														
 
															+	x111, _ := bits.add_u64(x38, x107, u64(fiat.u1(x110)))
														
 
															+	x113, x114 := bits.add_u64(x45, x109, u64(0x0))
														
 
															+	x115, _ := bits.add_u64(x46, x111, u64(fiat.u1(x114)))
														
 
															+	x117, x118 := bits.add_u64(x11, x5, u64(0x0))
														
 
															+	x119, _ := bits.add_u64(x12, x6, u64(fiat.u1(x118)))
														
 
															+	x121, x122 := bits.add_u64(x15, x117, u64(0x0))
														
 
															+	x123, _ := bits.add_u64(x16, x119, u64(fiat.u1(x122)))
														
 
															+	x125, x126 := bits.add_u64(x39, x121, u64(0x0))
														
 
															+	x127, _ := bits.add_u64(x40, x123, u64(fiat.u1(x126)))
														
 
															+	x129, x130 := bits.add_u64(x47, x125, u64(0x0))
														
 
															+	x131, _ := bits.add_u64(x48, x127, u64(fiat.u1(x130)))
														
 
															+	x133, x134 := bits.add_u64(x67, x129, u64(0x0))
														
 
															+	x135 := (u64(fiat.u1(x134)) + x131)
														
 
															+	x136 := ((x133 >> 51) | ((x135 << 13) & 0xffffffffffffffff))
														
 
															+	x137 := (x133 & 0x7ffffffffffff)
														
 
															+	x138, x139 := bits.add_u64(x136, x113, u64(0x0))
														
 
															+	x140 := (u64(fiat.u1(x139)) + x115)
														
 
															+	x141 := ((x138 >> 51) | ((x140 << 13) & 0xffffffffffffffff))
														
 
															+	x142 := (x138 & 0x7ffffffffffff)
														
 
															+	x143, x144 := bits.add_u64(x141, x97, u64(0x0))
														
 
															+	x145 := (u64(fiat.u1(x144)) + x99)
														
 
															+	x146 := ((x143 >> 51) | ((x145 << 13) & 0xffffffffffffffff))
														
 
															+	x147 := (x143 & 0x7ffffffffffff)
														
 
															+	x148, x149 := bits.add_u64(x146, x81, u64(0x0))
														
 
															+	x150 := (u64(fiat.u1(x149)) + x83)
														
 
															+	x151 := ((x148 >> 51) | ((x150 << 13) & 0xffffffffffffffff))
														
 
															+	x152 := (x148 & 0x7ffffffffffff)
														
 
															+	x153 := (x151 * 0x13)
														
 
															+	x154 := (x68 + x153)
														
 
															+	x155 := (x154 >> 51)
														
 
															+	x156 := (x154 & 0x7ffffffffffff)
														
 
															+	x157 := (x155 + x137)
														
 
															+	x158 := fiat.u1((x157 >> 51))
														
 
															+	x159 := (x157 & 0x7ffffffffffff)
														
 
															+	x160 := (u64(x158) + x142)
														
 
															+	out1[0] = x156
														
 
															+	out1[1] = x159
														
 
															+	out1[2] = x160
														
 
															+	out1[3] = x147
														
 
															+	out1[4] = x152
														
 
															+}
														
 
															+
														
 
															+fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	x1 := (arg1[4] * 0x13)
														
 
															+	x2 := (x1 * 0x2)
														
 
															+	x3 := (arg1[4] * 0x2)
														
 
															+	x4 := (arg1[3] * 0x13)
														
 
															+	x5 := (x4 * 0x2)
														
 
															+	x6 := (arg1[3] * 0x2)
														
 
															+	x7 := (arg1[2] * 0x2)
														
 
															+	x8 := (arg1[1] * 0x2)
														
 
															+	x10, x9 := bits.mul_u64(arg1[4], x1)
														
 
															+	x12, x11 := bits.mul_u64(arg1[3], x2)
														
 
															+	x14, x13 := bits.mul_u64(arg1[3], x4)
														
 
															+	x16, x15 := bits.mul_u64(arg1[2], x2)
														
 
															+	x18, x17 := bits.mul_u64(arg1[2], x5)
														
 
															+	x20, x19 := bits.mul_u64(arg1[2], arg1[2])
														
 
															+	x22, x21 := bits.mul_u64(arg1[1], x2)
														
 
															+	x24, x23 := bits.mul_u64(arg1[1], x6)
														
 
															+	x26, x25 := bits.mul_u64(arg1[1], x7)
														
 
															+	x28, x27 := bits.mul_u64(arg1[1], arg1[1])
														
 
															+	x30, x29 := bits.mul_u64(arg1[0], x3)
														
 
															+	x32, x31 := bits.mul_u64(arg1[0], x6)
														
 
															+	x34, x33 := bits.mul_u64(arg1[0], x7)
														
 
															+	x36, x35 := bits.mul_u64(arg1[0], x8)
														
 
															+	x38, x37 := bits.mul_u64(arg1[0], arg1[0])
														
 
															+	x39, x40 := bits.add_u64(x21, x17, u64(0x0))
														
 
															+	x41, _ := bits.add_u64(x22, x18, u64(fiat.u1(x40)))
														
 
															+	x43, x44 := bits.add_u64(x37, x39, u64(0x0))
														
 
															+	x45, _ := bits.add_u64(x38, x41, u64(fiat.u1(x44)))
														
 
															+	x47 := ((x43 >> 51) | ((x45 << 13) & 0xffffffffffffffff))
														
 
															+	x48 := (x43 & 0x7ffffffffffff)
														
 
															+	x49, x50 := bits.add_u64(x23, x19, u64(0x0))
														
 
															+	x51, _ := bits.add_u64(x24, x20, u64(fiat.u1(x50)))
														
 
															+	x53, x54 := bits.add_u64(x29, x49, u64(0x0))
														
 
															+	x55, _ := bits.add_u64(x30, x51, u64(fiat.u1(x54)))
														
 
															+	x57, x58 := bits.add_u64(x25, x9, u64(0x0))
														
 
															+	x59, _ := bits.add_u64(x26, x10, u64(fiat.u1(x58)))
														
 
															+	x61, x62 := bits.add_u64(x31, x57, u64(0x0))
														
 
															+	x63, _ := bits.add_u64(x32, x59, u64(fiat.u1(x62)))
														
 
															+	x65, x66 := bits.add_u64(x27, x11, u64(0x0))
														
 
															+	x67, _ := bits.add_u64(x28, x12, u64(fiat.u1(x66)))
														
 
															+	x69, x70 := bits.add_u64(x33, x65, u64(0x0))
														
 
															+	x71, _ := bits.add_u64(x34, x67, u64(fiat.u1(x70)))
														
 
															+	x73, x74 := bits.add_u64(x15, x13, u64(0x0))
														
 
															+	x75, _ := bits.add_u64(x16, x14, u64(fiat.u1(x74)))
														
 
															+	x77, x78 := bits.add_u64(x35, x73, u64(0x0))
														
 
															+	x79, _ := bits.add_u64(x36, x75, u64(fiat.u1(x78)))
														
 
															+	x81, x82 := bits.add_u64(x47, x77, u64(0x0))
														
 
															+	x83 := (u64(fiat.u1(x82)) + x79)
														
 
															+	x84 := ((x81 >> 51) | ((x83 << 13) & 0xffffffffffffffff))
														
 
															+	x85 := (x81 & 0x7ffffffffffff)
														
 
															+	x86, x87 := bits.add_u64(x84, x69, u64(0x0))
														
 
															+	x88 := (u64(fiat.u1(x87)) + x71)
														
 
															+	x89 := ((x86 >> 51) | ((x88 << 13) & 0xffffffffffffffff))
														
 
															+	x90 := (x86 & 0x7ffffffffffff)
														
 
															+	x91, x92 := bits.add_u64(x89, x61, u64(0x0))
														
 
															+	x93 := (u64(fiat.u1(x92)) + x63)
														
 
															+	x94 := ((x91 >> 51) | ((x93 << 13) & 0xffffffffffffffff))
														
 
															+	x95 := (x91 & 0x7ffffffffffff)
														
 
															+	x96, x97 := bits.add_u64(x94, x53, u64(0x0))
														
 
															+	x98 := (u64(fiat.u1(x97)) + x55)
														
 
															+	x99 := ((x96 >> 51) | ((x98 << 13) & 0xffffffffffffffff))
														
 
															+	x100 := (x96 & 0x7ffffffffffff)
														
 
															+	x101 := (x99 * 0x13)
														
 
															+	x102 := (x48 + x101)
														
 
															+	x103 := (x102 >> 51)
														
 
															+	x104 := (x102 & 0x7ffffffffffff)
														
 
															+	x105 := (x103 + x85)
														
 
															+	x106 := fiat.u1((x105 >> 51))
														
 
															+	x107 := (x105 & 0x7ffffffffffff)
														
 
															+	x108 := (u64(x106) + x90)
														
 
															+	out1[0] = x104
														
 
															+	out1[1] = x107
														
 
															+	out1[2] = x108
														
 
															+	out1[3] = x95
														
 
															+	out1[4] = x100
														
 
															+}
														
 
															+
														
 
															+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := ((x1 >> 51) + arg1[1])
														
 
															+	x3 := ((x2 >> 51) + arg1[2])
														
 
															+	x4 := ((x3 >> 51) + arg1[3])
														
 
															+	x5 := ((x4 >> 51) + arg1[4])
														
 
															+	x6 := ((x1 & 0x7ffffffffffff) + ((x5 >> 51) * 0x13))
														
 
															+	x7 := (u64(fiat.u1((x6 >> 51))) + (x2 & 0x7ffffffffffff))
														
 
															+	x8 := (x6 & 0x7ffffffffffff)
														
 
															+	x9 := (x7 & 0x7ffffffffffff)
														
 
															+	x10 := (u64(fiat.u1((x7 >> 51))) + (x3 & 0x7ffffffffffff))
														
 
															+	x11 := (x4 & 0x7ffffffffffff)
														
 
															+	x12 := (x5 & 0x7ffffffffffff)
														
 
															+	out1[0] = x8
														
 
															+	out1[1] = x9
														
 
															+	out1[2] = x10
														
 
															+	out1[3] = x11
														
 
															+	out1[4] = x12
														
 
															+}
														
 
															+
														
 
															+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
														
 
															+	x1 := (arg1[0] + arg2[0])
														
 
															+	x2 := (arg1[1] + arg2[1])
														
 
															+	x3 := (arg1[2] + arg2[2])
														
 
															+	x4 := (arg1[3] + arg2[3])
														
 
															+	x5 := (arg1[4] + arg2[4])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
														
 
															+	x1 := ((0xfffffffffffda + arg1[0]) - arg2[0])
														
 
															+	x2 := ((0xffffffffffffe + arg1[1]) - arg2[1])
														
 
															+	x3 := ((0xffffffffffffe + arg1[2]) - arg2[2])
														
 
															+	x4 := ((0xffffffffffffe + arg1[3]) - arg2[3])
														
 
															+	x5 := ((0xffffffffffffe + arg1[4]) - arg2[4])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := (0xfffffffffffda - arg1[0])
														
 
															+	x2 := (0xffffffffffffe - arg1[1])
														
 
															+	x3 := (0xffffffffffffe - arg1[2])
														
 
															+	x4 := (0xffffffffffffe - arg1[3])
														
 
															+	x5 := (0xffffffffffffe - arg1[4])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
														
 
															+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
														
 
															+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
														
 
															+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
														
 
															+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
														
 
															+	x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
														
 
															+	x1, x2 := _subborrowx_u51(0x0, arg1[0], 0x7ffffffffffed)
														
 
															+	x3, x4 := _subborrowx_u51(x2, arg1[1], 0x7ffffffffffff)
														
 
															+	x5, x6 := _subborrowx_u51(x4, arg1[2], 0x7ffffffffffff)
														
 
															+	x7, x8 := _subborrowx_u51(x6, arg1[3], 0x7ffffffffffff)
														
 
															+	x9, x10 := _subborrowx_u51(x8, arg1[4], 0x7ffffffffffff)
														
 
															+	x11 := fiat.cmovznz_u64(x10, u64(0x0), 0xffffffffffffffff)
														
 
															+	x12, x13 := _addcarryx_u51(0x0, x1, (x11 & 0x7ffffffffffed))
														
 
															+	x14, x15 := _addcarryx_u51(x13, x3, (x11 & 0x7ffffffffffff))
														
 
															+	x16, x17 := _addcarryx_u51(x15, x5, (x11 & 0x7ffffffffffff))
														
 
															+	x18, x19 := _addcarryx_u51(x17, x7, (x11 & 0x7ffffffffffff))
														
 
															+	x20, _ := _addcarryx_u51(x19, x9, (x11 & 0x7ffffffffffff))
														
 
															+	x22 := (x20 << 4)
														
 
															+	x23 := (x18 * u64(0x2))
														
 
															+	x24 := (x16 << 6)
														
 
															+	x25 := (x14 << 3)
														
 
															+	x26 := (u8(x12) & 0xff)
														
 
															+	x27 := (x12 >> 8)
														
 
															+	x28 := (u8(x27) & 0xff)
														
 
															+	x29 := (x27 >> 8)
														
 
															+	x30 := (u8(x29) & 0xff)
														
 
															+	x31 := (x29 >> 8)
														
 
															+	x32 := (u8(x31) & 0xff)
														
 
															+	x33 := (x31 >> 8)
														
 
															+	x34 := (u8(x33) & 0xff)
														
 
															+	x35 := (x33 >> 8)
														
 
															+	x36 := (u8(x35) & 0xff)
														
 
															+	x37 := u8((x35 >> 8))
														
 
															+	x38 := (x25 + u64(x37))
														
 
															+	x39 := (u8(x38) & 0xff)
														
 
															+	x40 := (x38 >> 8)
														
 
															+	x41 := (u8(x40) & 0xff)
														
 
															+	x42 := (x40 >> 8)
														
 
															+	x43 := (u8(x42) & 0xff)
														
 
															+	x44 := (x42 >> 8)
														
 
															+	x45 := (u8(x44) & 0xff)
														
 
															+	x46 := (x44 >> 8)
														
 
															+	x47 := (u8(x46) & 0xff)
														
 
															+	x48 := (x46 >> 8)
														
 
															+	x49 := (u8(x48) & 0xff)
														
 
															+	x50 := u8((x48 >> 8))
														
 
															+	x51 := (x24 + u64(x50))
														
 
															+	x52 := (u8(x51) & 0xff)
														
 
															+	x53 := (x51 >> 8)
														
 
															+	x54 := (u8(x53) & 0xff)
														
 
															+	x55 := (x53 >> 8)
														
 
															+	x56 := (u8(x55) & 0xff)
														
 
															+	x57 := (x55 >> 8)
														
 
															+	x58 := (u8(x57) & 0xff)
														
 
															+	x59 := (x57 >> 8)
														
 
															+	x60 := (u8(x59) & 0xff)
														
 
															+	x61 := (x59 >> 8)
														
 
															+	x62 := (u8(x61) & 0xff)
														
 
															+	x63 := (x61 >> 8)
														
 
															+	x64 := (u8(x63) & 0xff)
														
 
															+	x65 := fiat.u1((x63 >> 8))
														
 
															+	x66 := (x23 + u64(x65))
														
 
															+	x67 := (u8(x66) & 0xff)
														
 
															+	x68 := (x66 >> 8)
														
 
															+	x69 := (u8(x68) & 0xff)
														
 
															+	x70 := (x68 >> 8)
														
 
															+	x71 := (u8(x70) & 0xff)
														
 
															+	x72 := (x70 >> 8)
														
 
															+	x73 := (u8(x72) & 0xff)
														
 
															+	x74 := (x72 >> 8)
														
 
															+	x75 := (u8(x74) & 0xff)
														
 
															+	x76 := (x74 >> 8)
														
 
															+	x77 := (u8(x76) & 0xff)
														
 
															+	x78 := u8((x76 >> 8))
														
 
															+	x79 := (x22 + u64(x78))
														
 
															+	x80 := (u8(x79) & 0xff)
														
 
															+	x81 := (x79 >> 8)
														
 
															+	x82 := (u8(x81) & 0xff)
														
 
															+	x83 := (x81 >> 8)
														
 
															+	x84 := (u8(x83) & 0xff)
														
 
															+	x85 := (x83 >> 8)
														
 
															+	x86 := (u8(x85) & 0xff)
														
 
															+	x87 := (x85 >> 8)
														
 
															+	x88 := (u8(x87) & 0xff)
														
 
															+	x89 := (x87 >> 8)
														
 
															+	x90 := (u8(x89) & 0xff)
														
 
															+	x91 := u8((x89 >> 8))
														
 
															+	out1[0] = x26
														
 
															+	out1[1] = x28
														
 
															+	out1[2] = x30
														
 
															+	out1[3] = x32
														
 
															+	out1[4] = x34
														
 
															+	out1[5] = x36
														
 
															+	out1[6] = x39
														
 
															+	out1[7] = x41
														
 
															+	out1[8] = x43
														
 
															+	out1[9] = x45
														
 
															+	out1[10] = x47
														
 
															+	out1[11] = x49
														
 
															+	out1[12] = x52
														
 
															+	out1[13] = x54
														
 
															+	out1[14] = x56
														
 
															+	out1[15] = x58
														
 
															+	out1[16] = x60
														
 
															+	out1[17] = x62
														
 
															+	out1[18] = x64
														
 
															+	out1[19] = x67
														
 
															+	out1[20] = x69
														
 
															+	out1[21] = x71
														
 
															+	out1[22] = x73
														
 
															+	out1[23] = x75
														
 
															+	out1[24] = x77
														
 
															+	out1[25] = x80
														
 
															+	out1[26] = x82
														
 
															+	out1[27] = x84
														
 
															+	out1[28] = x86
														
 
															+	out1[29] = x88
														
 
															+	out1[30] = x90
														
 
															+	out1[31] = x91
														
 
															+}
														
 
															+
														
 
															+_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
														
 
															+	x1 := (u64(arg1[31]) << 44)
														
 
															+	x2 := (u64(arg1[30]) << 36)
														
 
															+	x3 := (u64(arg1[29]) << 28)
														
 
															+	x4 := (u64(arg1[28]) << 20)
														
 
															+	x5 := (u64(arg1[27]) << 12)
														
 
															+	x6 := (u64(arg1[26]) << 4)
														
 
															+	x7 := (u64(arg1[25]) << 47)
														
 
															+	x8 := (u64(arg1[24]) << 39)
														
 
															+	x9 := (u64(arg1[23]) << 31)
														
 
															+	x10 := (u64(arg1[22]) << 23)
														
 
															+	x11 := (u64(arg1[21]) << 15)
														
 
															+	x12 := (u64(arg1[20]) << 7)
														
 
															+	x13 := (u64(arg1[19]) << 50)
														
 
															+	x14 := (u64(arg1[18]) << 42)
														
 
															+	x15 := (u64(arg1[17]) << 34)
														
 
															+	x16 := (u64(arg1[16]) << 26)
														
 
															+	x17 := (u64(arg1[15]) << 18)
														
 
															+	x18 := (u64(arg1[14]) << 10)
														
 
															+	x19 := (u64(arg1[13]) << 2)
														
 
															+	x20 := (u64(arg1[12]) << 45)
														
 
															+	x21 := (u64(arg1[11]) << 37)
														
 
															+	x22 := (u64(arg1[10]) << 29)
														
 
															+	x23 := (u64(arg1[9]) << 21)
														
 
															+	x24 := (u64(arg1[8]) << 13)
														
 
															+	x25 := (u64(arg1[7]) << 5)
														
 
															+	x26 := (u64(arg1[6]) << 48)
														
 
															+	x27 := (u64(arg1[5]) << 40)
														
 
															+	x28 := (u64(arg1[4]) << 32)
														
 
															+	x29 := (u64(arg1[3]) << 24)
														
 
															+	x30 := (u64(arg1[2]) << 16)
														
 
															+	x31 := (u64(arg1[1]) << 8)
														
 
															+	x32 := arg1[0]
														
 
															+	x33 := (x31 + u64(x32))
														
 
															+	x34 := (x30 + x33)
														
 
															+	x35 := (x29 + x34)
														
 
															+	x36 := (x28 + x35)
														
 
															+	x37 := (x27 + x36)
														
 
															+	x38 := (x26 + x37)
														
 
															+	x39 := (x38 & 0x7ffffffffffff)
														
 
															+	x40 := u8((x38 >> 51))
														
 
															+	x41 := (x25 + u64(x40))
														
 
															+	x42 := (x24 + x41)
														
 
															+	x43 := (x23 + x42)
														
 
															+	x44 := (x22 + x43)
														
 
															+	x45 := (x21 + x44)
														
 
															+	x46 := (x20 + x45)
														
 
															+	x47 := (x46 & 0x7ffffffffffff)
														
 
															+	x48 := u8((x46 >> 51))
														
 
															+	x49 := (x19 + u64(x48))
														
 
															+	x50 := (x18 + x49)
														
 
															+	x51 := (x17 + x50)
														
 
															+	x52 := (x16 + x51)
														
 
															+	x53 := (x15 + x52)
														
 
															+	x54 := (x14 + x53)
														
 
															+	x55 := (x13 + x54)
														
 
															+	x56 := (x55 & 0x7ffffffffffff)
														
 
															+	x57 := u8((x55 >> 51))
														
 
															+	x58 := (x12 + u64(x57))
														
 
															+	x59 := (x11 + x58)
														
 
															+	x60 := (x10 + x59)
														
 
															+	x61 := (x9 + x60)
														
 
															+	x62 := (x8 + x61)
														
 
															+	x63 := (x7 + x62)
														
 
															+	x64 := (x63 & 0x7ffffffffffff)
														
 
															+	x65 := u8((x63 >> 51))
														
 
															+	x66 := (x6 + u64(x65))
														
 
															+	x67 := (x5 + x66)
														
 
															+	x68 := (x4 + x67)
														
 
															+	x69 := (x3 + x68)
														
 
															+	x70 := (x2 + x69)
														
 
															+	x71 := (x1 + x70)
														
 
															+	out1[0] = x39
														
 
															+	out1[1] = x47
														
 
															+	out1[2] = x56
														
 
															+	out1[3] = x64
														
 
															+	out1[4] = x71
														
 
															+}
														
 
															+
														
 
															+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := arg1[1]
														
 
															+	x3 := arg1[2]
														
 
															+	x4 := arg1[3]
														
 
															+	x5 := arg1[4]
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
														
 
															+	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
														
 
															+	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
														
 
															+	x8, x7 := bits.mul_u64(0x1db42, arg1[1])
														
 
															+	x10, x9 := bits.mul_u64(0x1db42, arg1[0])
														
 
															+	x11 := ((x9 >> 51) | ((x10 << 13) & 0xffffffffffffffff))
														
 
															+	x12 := (x9 & 0x7ffffffffffff)
														
 
															+	x13, x14 := bits.add_u64(x11, x7, u64(0x0))
														
 
															+	x15 := (u64(fiat.u1(x14)) + x8)
														
 
															+	x16 := ((x13 >> 51) | ((x15 << 13) & 0xffffffffffffffff))
														
 
															+	x17 := (x13 & 0x7ffffffffffff)
														
 
															+	x18, x19 := bits.add_u64(x16, x5, u64(0x0))
														
 
															+	x20 := (u64(fiat.u1(x19)) + x6)
														
 
															+	x21 := ((x18 >> 51) | ((x20 << 13) & 0xffffffffffffffff))
														
 
															+	x22 := (x18 & 0x7ffffffffffff)
														
 
															+	x23, x24 := bits.add_u64(x21, x3, u64(0x0))
														
 
															+	x25 := (u64(fiat.u1(x24)) + x4)
														
 
															+	x26 := ((x23 >> 51) | ((x25 << 13) & 0xffffffffffffffff))
														
 
															+	x27 := (x23 & 0x7ffffffffffff)
														
 
															+	x28, x29 := bits.add_u64(x26, x1, u64(0x0))
														
 
															+	x30 := (u64(fiat.u1(x29)) + x2)
														
 
															+	x31 := ((x28 >> 51) | ((x30 << 13) & 0xffffffffffffffff))
														
 
															+	x32 := (x28 & 0x7ffffffffffff)
														
 
															+	x33 := (x31 * 0x13)
														
 
															+	x34 := (x12 + x33)
														
 
															+	x35 := fiat.u1((x34 >> 51))
														
 
															+	x36 := (x34 & 0x7ffffffffffff)
														
 
															+	x37 := (u64(x35) + x17)
														
 
															+	x38 := fiat.u1((x37 >> 51))
														
 
															+	x39 := (x37 & 0x7ffffffffffff)
														
 
															+	x40 := (u64(x38) + x22)
														
 
															+	out1[0] = x36
														
 
															+	out1[1] = x39
														
 
															+	out1[2] = x40
														
 
															+	out1[3] = x27
														
 
															+	out1[4] = x32
														
 
															+}
														
 
															+
														
 
															+// The following routines were added by hand, and do not come from fiat-crypto.
														
 
															+
														
 
															+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
														
 
															+	out1[0] = 0
														
 
															+	out1[1] = 0
														
 
															+	out1[2] = 0
														
 
															+	out1[3] = 0
														
 
															+	out1[4] = 0
														
 
															+}
														
 
															+
														
 
															+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
														
 
															+	out1[0] = 1
														
 
															+	out1[1] = 0
														
 
															+	out1[2] = 0
														
 
															+	out1[3] = 0
														
 
															+	out1[4] = 0
														
 
															+}
														
 
															+
														
 
															+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := arg1[1]
														
 
															+	x3 := arg1[2]
														
 
															+	x4 := arg1[3]
														
 
															+	x5 := arg1[4]
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+	out1[3] = x4
														
 
															+	out1[4] = x5
														
 
															+}
														
 
															+
														
 
															+fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
														
 
															+	mask := -u64(arg1)
														
 
															+	x := (out1[0] ~ out2[0]) & mask
														
 
															+	x1, y1 := out1[0] ~ x, out2[0] ~ x
														
 
															+	x = (out1[1] ~ out2[1]) & mask
														
 
															+	x2, y2 := out1[1] ~ x, out2[1] ~ x
														
 
															+	x = (out1[2] ~ out2[2]) & mask
														
 
															+	x3, y3 := out1[2] ~ x, out2[2] ~ x
														
 
															+	x = (out1[3] ~ out2[3]) & mask
														
 
															+	x4, y4 := out1[3] ~ x, out2[3] ~ x
														
 
															+	x = (out1[4] ~ out2[4]) & mask
														
 
															+	x5, y5 := out1[4] ~ x, out2[4] ~ x
														
 
															+	out1[0], out2[0] = x1, y1
														
 
															+	out1[1], out2[1] = x2, y2
														
 
															+	out1[2], out2[2] = x3, y3
														
 
															+	out1[3], out2[3] = x4, y4
														
 
															+	out1[4], out2[4] = x5, y5
														
 
															+}
														
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -0,0 +1,66 @@
 
															+package field_poly1305
														
 
															+
														
 
															+import "core:crypto/util"
														
 
															+import "core:mem"
														
 
															+
														
 
															+fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
														
 
															+	return transmute(^Loose_Field_Element)(arg1)
														
 
															+}
														
 
															+
														
 
															+fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
														
 
															+	return transmute(^Tight_Field_Element)(arg1)
														
 
															+}
														
 
															+
														
 
															+fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte, sanitize: bool = true) {
														
 
															+	// fiat-crypto's deserialization routine effectively processes a
														
 
															+	// single byte at a time, and wants 256-bits of input for a value
														
 
															+	// that will be 128-bits or 129-bits.
														
 
															+	//
														
 
															+	// This is somewhat cumbersome to use, so at a minimum a wrapper
														
 
															+	// makes implementing the actual MAC block processing considerably
														
 
															+	// neater.
														
 
															+
														
 
															+	assert(len(arg1) == 16)
														
 
															+
														
 
															+	when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
														
 
															+		// While it may be unwise to do deserialization here on our
														
 
															+		// own when fiat-crypto provides equivalent functionality,
														
 
															+		// doing it this way provides a little under 3x performance
														
 
															+		// improvement when optimization is enabled.
														
 
															+		src_p := transmute(^[2]u64)(&arg1[0])
														
 
															+		lo := src_p[0]
														
 
															+		hi := src_p[1]
														
 
															+
														
 
															+		// This is inspired by poly1305-donna, though adjustments were
														
 
															+		// made since a Tight_Field_Element's limbs are 44-bits, 43-bits,
														
 
															+		// and 43-bits wide.
														
 
															+		//
														
 
															+		// Note: This could be transplated into fe_from_u64s, but that
														
 
															+		// code is called once per MAC, and is non-criticial path.
														
 
															+		hibit := u64(arg2) << 41 // arg2 << 128
														
 
															+		out1[0] = lo & 0xfffffffffff
														
 
															+		out1[1] = ((lo >> 44) | (hi << 20)) & 0x7ffffffffff
														
 
															+		out1[2] = ((hi >> 23) & 0x7ffffffffff) | hibit
														
 
															+	} else {
														
 
															+		tmp: [32]byte
														
 
															+		copy_slice(tmp[0:16], arg1[:])
														
 
															+		tmp[16] = arg2
														
 
															+
														
 
															+		_fe_from_bytes(out1, &tmp)
														
 
															+		if sanitize {
														
 
															+			// This is used to deserialize `s` which is confidential.
														
 
															+			mem.zero_explicit(&tmp, size_of(tmp))
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
														
 
															+	tmp: [32]byte
														
 
															+	util.PUT_U64_LE(tmp[0:8], lo)
														
 
															+	util.PUT_U64_LE(tmp[8:16], hi)
														
 
															+
														
 
															+	_fe_from_bytes(out1, &tmp)
														
 
															+
														
 
															+	// This routine is only used to deserialize `r` which is confidential.
														
 
															+	mem.zero_explicit(&tmp, size_of(tmp))
														
 
															+}
														
--- a/core/crypto/_fiat/field_poly1305/field4344.odin
+++ b/core/crypto/_fiat/field_poly1305/field4344.odin
@@ -0,0 +1,356 @@
 
															+// The BSD 1-Clause License (BSD-1-Clause)
														
 
															+//
														
 
															+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
														
 
															+// All rights reserved.
														
 
															+//
														
 
															+// Redistribution and use in source and binary forms, with or without
														
 
															+// modification, are permitted provided that the following conditions are
														
 
															+// met:
														
 
															+//
														
 
															+//     1. Redistributions of source code must retain the above copyright
														
 
															+//        notice, this list of conditions and the following disclaimer.
														
 
															+//
														
 
															+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
														
 
															+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
														
 
															+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
														
 
															+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
														
 
															+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
														
 
															+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
														
 
															+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
														
 
															+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
														
 
															+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
														
 
															+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+
														
 
															+package field_poly1305
														
 
															+
														
 
															+// This file provides arithmetic on the field Z/(2^130 - 5) using
														
 
															+// unsaturated 64-bit integer arithmetic.  It is derived primarily
														
 
															+// from the machine generate Golang output from the fiat-crypto project.
														
 
															+//
														
 
															+// While the base implementation is provably correct, this implementation
														
 
															+// makes no such claims as the port and optimizations were done by hand.
														
 
															+// At some point, it may be worth adding support to fiat-crypto for
														
 
															+// generating Odin output.
														
 
															+
														
 
															+import fiat "core:crypto/_fiat"
														
 
															+import "core:math/bits"
														
 
															+
														
 
															+Loose_Field_Element :: distinct [3]u64
														
 
															+Tight_Field_Element :: distinct [3]u64
														
 
															+
														
 
															+_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((u64(arg1) + arg2) + arg3)
														
 
															+	x2 := (x1 & 0xfffffffffff)
														
 
															+	x3 := fiat.u1((x1 >> 44))
														
 
															+	out1 = x2
														
 
															+	out2 = x3
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
														
 
															+	x2 := fiat.i1((x1 >> 44))
														
 
															+	x3 := (u64(x1) & 0xfffffffffff)
														
 
															+	out1 = x3
														
 
															+	out2 = (0x0 - fiat.u1(x2))
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((u64(arg1) + arg2) + arg3)
														
 
															+	x2 := (x1 & 0x7ffffffffff)
														
 
															+	x3 := fiat.u1((x1 >> 43))
														
 
															+	out1 = x2
														
 
															+	out2 = x3
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
														
 
															+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
														
 
															+	x2 := fiat.i1((x1 >> 43))
														
 
															+	x3 := (u64(x1) & 0x7ffffffffff)
														
 
															+	out1 = x3
														
 
															+	out2 = (0x0 - fiat.u1(x2))
														
 
															+	return
														
 
															+}
														
 
															+
														
 
															+fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
														
 
															+	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
														
 
															+	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
														
 
															+	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
														
 
															+	x8, x7 := bits.mul_u64(arg1[2], arg2[0])
														
 
															+	x10, x9 := bits.mul_u64(arg1[1], (arg2[1] * 0x2))
														
 
															+	x12, x11 := bits.mul_u64(arg1[1], arg2[0])
														
 
															+	x14, x13 := bits.mul_u64(arg1[0], arg2[2])
														
 
															+	x16, x15 := bits.mul_u64(arg1[0], arg2[1])
														
 
															+	x18, x17 := bits.mul_u64(arg1[0], arg2[0])
														
 
															+	x19, x20 := bits.add_u64(x5, x3, u64(0x0))
														
 
															+	x21, _ := bits.add_u64(x6, x4, u64(fiat.u1(x20)))
														
 
															+	x23, x24 := bits.add_u64(x17, x19, u64(0x0))
														
 
															+	x25, _ := bits.add_u64(x18, x21, u64(fiat.u1(x24)))
														
 
															+	x27 := ((x23 >> 44) | ((x25 << 20) & 0xffffffffffffffff))
														
 
															+	x28 := (x23 & 0xfffffffffff)
														
 
															+	x29, x30 := bits.add_u64(x9, x7, u64(0x0))
														
 
															+	x31, _ := bits.add_u64(x10, x8, u64(fiat.u1(x30)))
														
 
															+	x33, x34 := bits.add_u64(x13, x29, u64(0x0))
														
 
															+	x35, _ := bits.add_u64(x14, x31, u64(fiat.u1(x34)))
														
 
															+	x37, x38 := bits.add_u64(x11, x1, u64(0x0))
														
 
															+	x39, _ := bits.add_u64(x12, x2, u64(fiat.u1(x38)))
														
 
															+	x41, x42 := bits.add_u64(x15, x37, u64(0x0))
														
 
															+	x43, _ := bits.add_u64(x16, x39, u64(fiat.u1(x42)))
														
 
															+	x45, x46 := bits.add_u64(x27, x41, u64(0x0))
														
 
															+	x47 := (u64(fiat.u1(x46)) + x43)
														
 
															+	x48 := ((x45 >> 43) | ((x47 << 21) & 0xffffffffffffffff))
														
 
															+	x49 := (x45 & 0x7ffffffffff)
														
 
															+	x50, x51 := bits.add_u64(x48, x33, u64(0x0))
														
 
															+	x52 := (u64(fiat.u1(x51)) + x35)
														
 
															+	x53 := ((x50 >> 43) | ((x52 << 21) & 0xffffffffffffffff))
														
 
															+	x54 := (x50 & 0x7ffffffffff)
														
 
															+	x55 := (x53 * 0x5)
														
 
															+	x56 := (x28 + x55)
														
 
															+	x57 := (x56 >> 44)
														
 
															+	x58 := (x56 & 0xfffffffffff)
														
 
															+	x59 := (x57 + x49)
														
 
															+	x60 := fiat.u1((x59 >> 43))
														
 
															+	x61 := (x59 & 0x7ffffffffff)
														
 
															+	x62 := (u64(x60) + x54)
														
 
															+	out1[0] = x58
														
 
															+	out1[1] = x61
														
 
															+	out1[2] = x62
														
 
															+}
														
 
															+
														
 
															+fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	x1 := (arg1[2] * 0x5)
														
 
															+	x2 := (x1 * 0x2)
														
 
															+	x3 := (arg1[2] * 0x2)
														
 
															+	x4 := (arg1[1] * 0x2)
														
 
															+	x6, x5 := bits.mul_u64(arg1[2], x1)
														
 
															+	x8, x7 := bits.mul_u64(arg1[1], (x2 * 0x2))
														
 
															+	x10, x9 := bits.mul_u64(arg1[1], (arg1[1] * 0x2))
														
 
															+	x12, x11 := bits.mul_u64(arg1[0], x3)
														
 
															+	x14, x13 := bits.mul_u64(arg1[0], x4)
														
 
															+	x16, x15 := bits.mul_u64(arg1[0], arg1[0])
														
 
															+	x17, x18 := bits.add_u64(x15, x7, u64(0x0))
														
 
															+	x19, _ := bits.add_u64(x16, x8, u64(fiat.u1(x18)))
														
 
															+	x21 := ((x17 >> 44) | ((x19 << 20) & 0xffffffffffffffff))
														
 
															+	x22 := (x17 & 0xfffffffffff)
														
 
															+	x23, x24 := bits.add_u64(x11, x9, u64(0x0))
														
 
															+	x25, _ := bits.add_u64(x12, x10, u64(fiat.u1(x24)))
														
 
															+	x27, x28 := bits.add_u64(x13, x5, u64(0x0))
														
 
															+	x29, _ := bits.add_u64(x14, x6, u64(fiat.u1(x28)))
														
 
															+	x31, x32 := bits.add_u64(x21, x27, u64(0x0))
														
 
															+	x33 := (u64(fiat.u1(x32)) + x29)
														
 
															+	x34 := ((x31 >> 43) | ((x33 << 21) & 0xffffffffffffffff))
														
 
															+	x35 := (x31 & 0x7ffffffffff)
														
 
															+	x36, x37 := bits.add_u64(x34, x23, u64(0x0))
														
 
															+	x38 := (u64(fiat.u1(x37)) + x25)
														
 
															+	x39 := ((x36 >> 43) | ((x38 << 21) & 0xffffffffffffffff))
														
 
															+	x40 := (x36 & 0x7ffffffffff)
														
 
															+	x41 := (x39 * 0x5)
														
 
															+	x42 := (x22 + x41)
														
 
															+	x43 := (x42 >> 44)
														
 
															+	x44 := (x42 & 0xfffffffffff)
														
 
															+	x45 := (x43 + x35)
														
 
															+	x46 := fiat.u1((x45 >> 43))
														
 
															+	x47 := (x45 & 0x7ffffffffff)
														
 
															+	x48 := (u64(x46) + x40)
														
 
															+	out1[0] = x44
														
 
															+	out1[1] = x47
														
 
															+	out1[2] = x48
														
 
															+}
														
 
															+
														
 
															+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := ((x1 >> 44) + arg1[1])
														
 
															+	x3 := ((x2 >> 43) + arg1[2])
														
 
															+	x4 := ((x1 & 0xfffffffffff) + ((x3 >> 43) * 0x5))
														
 
															+	x5 := (u64(fiat.u1((x4 >> 44))) + (x2 & 0x7ffffffffff))
														
 
															+	x6 := (x4 & 0xfffffffffff)
														
 
															+	x7 := (x5 & 0x7ffffffffff)
														
 
															+	x8 := (u64(fiat.u1((x5 >> 43))) + (x3 & 0x7ffffffffff))
														
 
															+	out1[0] = x6
														
 
															+	out1[1] = x7
														
 
															+	out1[2] = x8
														
 
															+}
														
 
															+
														
 
															+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
														
 
															+	x1 := (arg1[0] + arg2[0])
														
 
															+	x2 := (arg1[1] + arg2[1])
														
 
															+	x3 := (arg1[2] + arg2[2])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
														
 
															+	x1 := ((0x1ffffffffff6 + arg1[0]) - arg2[0])
														
 
															+	x2 := ((0xffffffffffe + arg1[1]) - arg2[1])
														
 
															+	x3 := ((0xffffffffffe + arg1[2]) - arg2[2])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := (0x1ffffffffff6 - arg1[0])
														
 
															+	x2 := (0xffffffffffe - arg1[1])
														
 
															+	x3 := (0xffffffffffe - arg1[2])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
														
 
															+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
														
 
															+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
														
 
															+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
														
 
															+	x1, x2 := _subborrowx_u44(0x0, arg1[0], 0xffffffffffb)
														
 
															+	x3, x4 := _subborrowx_u43(x2, arg1[1], 0x7ffffffffff)
														
 
															+	x5, x6 := _subborrowx_u43(x4, arg1[2], 0x7ffffffffff)
														
 
															+	x7 := fiat.cmovznz_u64(x6, u64(0x0), 0xffffffffffffffff)
														
 
															+	x8, x9 := _addcarryx_u44(0x0, x1, (x7 & 0xffffffffffb))
														
 
															+	x10, x11 := _addcarryx_u43(x9, x3, (x7 & 0x7ffffffffff))
														
 
															+	x12, _ := _addcarryx_u43(x11, x5, (x7 & 0x7ffffffffff))
														
 
															+	x14 := (x12 << 7)
														
 
															+	x15 := (x10 << 4)
														
 
															+	x16 := (u8(x8) & 0xff)
														
 
															+	x17 := (x8 >> 8)
														
 
															+	x18 := (u8(x17) & 0xff)
														
 
															+	x19 := (x17 >> 8)
														
 
															+	x20 := (u8(x19) & 0xff)
														
 
															+	x21 := (x19 >> 8)
														
 
															+	x22 := (u8(x21) & 0xff)
														
 
															+	x23 := (x21 >> 8)
														
 
															+	x24 := (u8(x23) & 0xff)
														
 
															+	x25 := u8((x23 >> 8))
														
 
															+	x26 := (x15 + u64(x25))
														
 
															+	x27 := (u8(x26) & 0xff)
														
 
															+	x28 := (x26 >> 8)
														
 
															+	x29 := (u8(x28) & 0xff)
														
 
															+	x30 := (x28 >> 8)
														
 
															+	x31 := (u8(x30) & 0xff)
														
 
															+	x32 := (x30 >> 8)
														
 
															+	x33 := (u8(x32) & 0xff)
														
 
															+	x34 := (x32 >> 8)
														
 
															+	x35 := (u8(x34) & 0xff)
														
 
															+	x36 := u8((x34 >> 8))
														
 
															+	x37 := (x14 + u64(x36))
														
 
															+	x38 := (u8(x37) & 0xff)
														
 
															+	x39 := (x37 >> 8)
														
 
															+	x40 := (u8(x39) & 0xff)
														
 
															+	x41 := (x39 >> 8)
														
 
															+	x42 := (u8(x41) & 0xff)
														
 
															+	x43 := (x41 >> 8)
														
 
															+	x44 := (u8(x43) & 0xff)
														
 
															+	x45 := (x43 >> 8)
														
 
															+	x46 := (u8(x45) & 0xff)
														
 
															+	x47 := (x45 >> 8)
														
 
															+	x48 := (u8(x47) & 0xff)
														
 
															+	x49 := u8((x47 >> 8))
														
 
															+	out1[0] = x16
														
 
															+	out1[1] = x18
														
 
															+	out1[2] = x20
														
 
															+	out1[3] = x22
														
 
															+	out1[4] = x24
														
 
															+	out1[5] = x27
														
 
															+	out1[6] = x29
														
 
															+	out1[7] = x31
														
 
															+	out1[8] = x33
														
 
															+	out1[9] = x35
														
 
															+	out1[10] = x38
														
 
															+	out1[11] = x40
														
 
															+	out1[12] = x42
														
 
															+	out1[13] = x44
														
 
															+	out1[14] = x46
														
 
															+	out1[15] = x48
														
 
															+	out1[16] = x49
														
 
															+}
														
 
															+
														
 
															+_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
														
 
															+	x1 := (u64(arg1[16]) << 41)
														
 
															+	x2 := (u64(arg1[15]) << 33)
														
 
															+	x3 := (u64(arg1[14]) << 25)
														
 
															+	x4 := (u64(arg1[13]) << 17)
														
 
															+	x5 := (u64(arg1[12]) << 9)
														
 
															+	x6 := (u64(arg1[11]) * u64(0x2))
														
 
															+	x7 := (u64(arg1[10]) << 36)
														
 
															+	x8 := (u64(arg1[9]) << 28)
														
 
															+	x9 := (u64(arg1[8]) << 20)
														
 
															+	x10 := (u64(arg1[7]) << 12)
														
 
															+	x11 := (u64(arg1[6]) << 4)
														
 
															+	x12 := (u64(arg1[5]) << 40)
														
 
															+	x13 := (u64(arg1[4]) << 32)
														
 
															+	x14 := (u64(arg1[3]) << 24)
														
 
															+	x15 := (u64(arg1[2]) << 16)
														
 
															+	x16 := (u64(arg1[1]) << 8)
														
 
															+	x17 := arg1[0]
														
 
															+	x18 := (x16 + u64(x17))
														
 
															+	x19 := (x15 + x18)
														
 
															+	x20 := (x14 + x19)
														
 
															+	x21 := (x13 + x20)
														
 
															+	x22 := (x12 + x21)
														
 
															+	x23 := (x22 & 0xfffffffffff)
														
 
															+	x24 := u8((x22 >> 44))
														
 
															+	x25 := (x11 + u64(x24))
														
 
															+	x26 := (x10 + x25)
														
 
															+	x27 := (x9 + x26)
														
 
															+	x28 := (x8 + x27)
														
 
															+	x29 := (x7 + x28)
														
 
															+	x30 := (x29 & 0x7ffffffffff)
														
 
															+	x31 := fiat.u1((x29 >> 43))
														
 
															+	x32 := (x6 + u64(x31))
														
 
															+	x33 := (x5 + x32)
														
 
															+	x34 := (x4 + x33)
														
 
															+	x35 := (x3 + x34)
														
 
															+	x36 := (x2 + x35)
														
 
															+	x37 := (x1 + x36)
														
 
															+	out1[0] = x23
														
 
															+	out1[1] = x30
														
 
															+	out1[2] = x37
														
 
															+}
														
 
															+
														
 
															+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := arg1[1]
														
 
															+	x3 := arg1[2]
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+// The following routines were added by hand, and do not come from fiat-crypto.
														
 
															+
														
 
															+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
														
 
															+	out1[0] = 0
														
 
															+	out1[1] = 0
														
 
															+	out1[2] = 0
														
 
															+}
														
 
															+
														
 
															+fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
														
 
															+	x1 := arg1[0]
														
 
															+	x2 := arg1[1]
														
 
															+	x3 := arg1[2]
														
 
															+	out1[0] = x1
														
 
															+	out1[1] = x2
														
 
															+	out1[2] = x3
														
 
															+}
														
 
															+
														
 
															+fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
														
 
															+	mask := -u64(arg1)
														
 
															+	x := (out1[0] ~ out2[0]) & mask
														
 
															+	x1, y1 := out1[0] ~ x, out2[0] ~ x
														
 
															+	x = (out1[1] ~ out2[1]) & mask
														
 
															+	x2, y2 := out1[1] ~ x, out2[1] ~ x
														
 
															+	x = (out1[2] ~ out2[2]) & mask
														
 
															+	x3, y3 := out1[2] ~ x, out2[2] ~ x
														
 
															+	out1[0], out2[0] = x1, y1
														
 
															+	out1[1], out2[1] = x2, y2
														
 
															+	out1[2], out2[2] = x3, y3
														
 
															+}
														
--- a/core/crypto/chacha20/chacha20.odin
+++ b/core/crypto/chacha20/chacha20.odin
@@ -0,0 +1,581 @@
 
															+package chacha20
														
 
															+
														
 
															+import "core:crypto/util"
														
 
															+import "core:math/bits"
														
 
															+import "core:mem"
														
 
															+
														
 
															+KEY_SIZE :: 32
														
 
															+NONCE_SIZE :: 12
														
 
															+XNONCE_SIZE :: 24
														
 
															+
														
 
															+_MAX_CTR_IETF :: 0xffffffff
														
 
															+
														
 
															+_BLOCK_SIZE :: 64
														
 
															+_STATE_SIZE_U32 :: 16
														
 
															+_ROUNDS :: 20
														
 
															+
														
 
															+_SIGMA_0 : u32 : 0x61707865
														
 
															+_SIGMA_1 : u32 : 0x3320646e
														
 
															+_SIGMA_2 : u32 : 0x79622d32
														
 
															+_SIGMA_3 : u32 : 0x6b206574
														
 
															+
														
 
															+Context :: struct {
														
 
															+	_s: [_STATE_SIZE_U32]u32,
														
 
															+
														
 
															+	_buffer: [_BLOCK_SIZE]byte,
														
 
															+	_off: int,
														
 
															+
														
 
															+	_is_ietf_flavor: bool,
														
 
															+	_is_initialized: bool,
														
 
															+}
														
 
															+
														
 
															+init :: proc (ctx: ^Context, key, nonce: []byte) {
														
 
															+	if len(key) != KEY_SIZE {
														
 
															+		panic("crypto/chacha20: invalid ChaCha20 key size")
														
 
															+	}
														
 
															+	if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
														
 
															+		panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
														
 
															+	}
														
 
															+
														
 
															+	k, n := key, nonce
														
 
															+
														
 
															+	// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
														
 
															+	is_xchacha := len(nonce) == XNONCE_SIZE
														
 
															+	if is_xchacha {
														
 
															+		sub_key := ctx._buffer[:KEY_SIZE]
														
 
															+		_hchacha20(sub_key, k, n)
														
 
															+		k = sub_key
														
 
															+		n = n[16:24]
														
 
															+	}
														
 
															+
														
 
															+	ctx._s[0] = _SIGMA_0
														
 
															+	ctx._s[1] = _SIGMA_1
														
 
															+	ctx._s[2] = _SIGMA_2
														
 
															+	ctx._s[3] = _SIGMA_3
														
 
															+	ctx._s[4] = util.U32_LE(k[0:4])
														
 
															+	ctx._s[5] = util.U32_LE(k[4:8])
														
 
															+	ctx._s[6] = util.U32_LE(k[8:12])
														
 
															+	ctx._s[7] = util.U32_LE(k[12:16])
														
 
															+	ctx._s[8] = util.U32_LE(k[16:20])
														
 
															+	ctx._s[9] = util.U32_LE(k[20:24])
														
 
															+	ctx._s[10] = util.U32_LE(k[24:28])
														
 
															+	ctx._s[11] = util.U32_LE(k[28:32])
														
 
															+	ctx._s[12] = 0
														
 
															+	if !is_xchacha {
														
 
															+		ctx._s[13] = util.U32_LE(n[0:4])
														
 
															+		ctx._s[14] = util.U32_LE(n[4:8])
														
 
															+		ctx._s[15] = util.U32_LE(n[8:12])
														
 
															+	} else {
														
 
															+		ctx._s[13] = 0
														
 
															+		ctx._s[14] = util.U32_LE(n[0:4])
														
 
															+		ctx._s[15] = util.U32_LE(n[4:8])
														
 
															+
														
 
															+		// The sub-key is stored in the keystream buffer.  While
														
 
															+		// this will be overwritten in most circumstances, explicitly
														
 
															+		// clear it out early.
														
 
															+		mem.zero_explicit(&ctx._buffer, KEY_SIZE)
														
 
															+	}
														
 
															+
														
 
															+	ctx._off = _BLOCK_SIZE
														
 
															+	ctx._is_ietf_flavor = !is_xchacha
														
 
															+	ctx._is_initialized = true
														
 
															+}
														
 
															+
														
 
															+seek :: proc (ctx: ^Context, block_nr: u64) {
														
 
															+	assert(ctx._is_initialized)
														
 
															+
														
 
															+	if ctx._is_ietf_flavor {
														
 
															+		if block_nr > _MAX_CTR_IETF {
														
 
															+			panic("crypto/chacha20: attempted to seek past maximum counter")
														
 
															+		}
														
 
															+	} else {
														
 
															+		ctx._s[13] = u32(block_nr >> 32)
														
 
															+	}
														
 
															+	ctx._s[12] = u32(block_nr)
														
 
															+	ctx._off = _BLOCK_SIZE
														
 
															+}
														
 
															+
														
 
															+xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
														
 
															+	assert(ctx._is_initialized)
														
 
															+
														
 
															+	// TODO: Enforcing that dst and src alias exactly or not at all
														
 
															+	// is a good idea, though odd aliasing should be extremely uncommon.
														
 
															+
														
 
															+	src, dst := src, dst
														
 
															+	if dst_len := len(dst); dst_len < len(src) {
														
 
															+		src = src[:dst_len]
														
 
															+	}
														
 
															+
														
 
															+	for remaining := len(src); remaining > 0; {
														
 
															+		// Process multiple blocks at once
														
 
															+		if ctx._off == _BLOCK_SIZE {
														
 
															+			if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
														
 
															+				direct_bytes := nr_blocks * _BLOCK_SIZE
														
 
															+				_do_blocks(ctx, dst, src, nr_blocks)
														
 
															+				remaining -= direct_bytes
														
 
															+				if remaining == 0 {
														
 
															+					return
														
 
															+				}
														
 
															+				dst = dst[direct_bytes:]
														
 
															+				src = src[direct_bytes:]
														
 
															+			}
														
 
															+
														
 
															+			// If there is a partial block, generate and buffer 1 block
														
 
															+			// worth of keystream.
														
 
															+			_do_blocks(ctx, ctx._buffer[:], nil, 1)
														
 
															+			ctx._off = 0
														
 
															+		}
														
 
															+
														
 
															+		// Process partial blocks from the buffered keystream.
														
 
															+		to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
														
 
															+		buffered_keystream := ctx._buffer[ctx._off:]
														
 
															+		for i := 0; i < to_xor; i = i + 1 {
														
 
															+			dst[i] = buffered_keystream[i] ~ src[i]
														
 
															+		}
														
 
															+		ctx._off += to_xor
														
 
															+		dst = dst[to_xor:]
														
 
															+		src = src[to_xor:]
														
 
															+		remaining -= to_xor
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
														
 
															+	assert(ctx._is_initialized)
														
 
															+
														
 
															+	dst := dst
														
 
															+	for remaining := len(dst); remaining > 0; {
														
 
															+		// Process multiple blocks at once
														
 
															+		if ctx._off == _BLOCK_SIZE {
														
 
															+			if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
														
 
															+				direct_bytes := nr_blocks * _BLOCK_SIZE
														
 
															+				_do_blocks(ctx, dst, nil, nr_blocks)
														
 
															+				remaining -= direct_bytes
														
 
															+				if remaining == 0 {
														
 
															+					return
														
 
															+				}
														
 
															+				dst = dst[direct_bytes:]
														
 
															+			}
														
 
															+
														
 
															+			// If there is a partial block, generate and buffer 1 block
														
 
															+			// worth of keystream.
														
 
															+			_do_blocks(ctx, ctx._buffer[:], nil, 1)
														
 
															+			ctx._off = 0
														
 
															+		}
														
 
															+
														
 
															+		// Process partial blocks from the buffered keystream.
														
 
															+		to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
														
 
															+		buffered_keystream := ctx._buffer[ctx._off:]
														
 
															+		copy(dst[:to_copy], buffered_keystream[:to_copy])
														
 
															+		ctx._off += to_copy
														
 
															+		dst = dst[to_copy:]
														
 
															+		remaining -= to_copy
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+reset :: proc (ctx: ^Context) {
														
 
															+	mem.zero_explicit(&ctx._s, size_of(ctx._s))
														
 
															+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
														
 
															+
														
 
															+	ctx._is_initialized = false
														
 
															+}
														
 
															+
														
 
															+_do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) {
														
 
															+	// Enforce the maximum consumed keystream per nonce.
														
 
															+	//
														
 
															+	// While all modern "standard" definitions of ChaCha20 use
														
 
															+	// the IETF 32-bit counter, for XChaCha20 most common
														
 
															+	// implementations allow for a 64-bit counter.
														
 
															+	//
														
 
															+	// Honestly, the answer here is "use a MRAE primitive", but
														
 
															+	// go with common practice in the case of XChaCha20.
														
 
															+	if ctx._is_ietf_flavor {
														
 
															+		if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
														
 
															+			panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
														
 
															+		}
														
 
															+	} else {
														
 
															+		ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
														
 
															+		if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
														
 
															+			panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	dst, src := dst, src
														
 
															+	x := &ctx._s
														
 
															+	for n := 0; n < nr_blocks; n = n + 1 {
														
 
															+		x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
														
 
															+		x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
														
 
															+
														
 
															+		for i := _ROUNDS; i > 0; i = i - 2 {
														
 
															+			// Even when forcing inlining manually inlining all of
														
 
															+			// these is decently faster.
														
 
															+
														
 
															+			// quarterround(x, 0, 4, 8, 12)
														
 
															+			x0 += x4
														
 
															+			x12 ~= x0
														
 
															+			x12 = util.ROTL32(x12, 16)
														
 
															+			x8 += x12
														
 
															+			x4 ~= x8
														
 
															+			x4 = util.ROTL32(x4, 12)
														
 
															+			x0 += x4
														
 
															+			x12 ~= x0
														
 
															+			x12 = util.ROTL32(x12, 8)
														
 
															+			x8 += x12
														
 
															+			x4 ~= x8
														
 
															+			x4 = util.ROTL32(x4, 7)
														
 
															+
														
 
															+			// quarterround(x, 1, 5, 9, 13)
														
 
															+			x1 += x5
														
 
															+			x13 ~= x1
														
 
															+			x13 = util.ROTL32(x13, 16)
														
 
															+			x9 += x13
														
 
															+			x5 ~= x9
														
 
															+			x5 = util.ROTL32(x5, 12)
														
 
															+			x1 += x5
														
 
															+			x13 ~= x1
														
 
															+			x13 = util.ROTL32(x13, 8)
														
 
															+			x9 += x13
														
 
															+			x5 ~= x9
														
 
															+			x5 = util.ROTL32(x5, 7)
														
 
															+
														
 
															+			// quarterround(x, 2, 6, 10, 14)
														
 
															+			x2 += x6
														
 
															+			x14 ~= x2
														
 
															+			x14 = util.ROTL32(x14, 16)
														
 
															+			x10 += x14
														
 
															+			x6 ~= x10
														
 
															+			x6 = util.ROTL32(x6, 12)
														
 
															+			x2 += x6
														
 
															+			x14 ~= x2
														
 
															+			x14 = util.ROTL32(x14, 8)
														
 
															+			x10 += x14
														
 
															+			x6 ~= x10
														
 
															+			x6 = util.ROTL32(x6, 7)
														
 
															+
														
 
															+			// quarterround(x, 3, 7, 11, 15)
														
 
															+			x3 += x7
														
 
															+			x15 ~= x3
														
 
															+			x15 = util.ROTL32(x15, 16)
														
 
															+			x11 += x15
														
 
															+			x7 ~= x11
														
 
															+			x7 = util.ROTL32(x7, 12)
														
 
															+			x3 += x7
														
 
															+			x15 ~= x3
														
 
															+			x15 = util.ROTL32(x15, 8)
														
 
															+			x11 += x15
														
 
															+			x7 ~= x11
														
 
															+			x7 = util.ROTL32(x7, 7)
														
 
															+
														
 
															+			// quarterround(x, 0, 5, 10, 15)
														
 
															+			x0 += x5
														
 
															+			x15 ~= x0
														
 
															+			x15 = util.ROTL32(x15, 16)
														
 
															+			x10 += x15
														
 
															+			x5 ~= x10
														
 
															+			x5 = util.ROTL32(x5, 12)
														
 
															+			x0 += x5
														
 
															+			x15 ~= x0
														
 
															+			x15 = util.ROTL32(x15, 8)
														
 
															+			x10 += x15
														
 
															+			x5 ~= x10
														
 
															+			x5 = util.ROTL32(x5, 7)
														
 
															+
														
 
															+			// quarterround(x, 1, 6, 11, 12)
														
 
															+			x1 += x6
														
 
															+			x12 ~= x1
														
 
															+			x12 = util.ROTL32(x12, 16)
														
 
															+			x11 += x12
														
 
															+			x6 ~= x11
														
 
															+			x6 = util.ROTL32(x6, 12)
														
 
															+			x1 += x6
														
 
															+			x12 ~= x1
														
 
															+			x12 = util.ROTL32(x12, 8)
														
 
															+			x11 += x12
														
 
															+			x6 ~= x11
														
 
															+			x6 = util.ROTL32(x6, 7)
														
 
															+
														
 
															+			// quarterround(x, 2, 7, 8, 13)
														
 
															+			x2 += x7
														
 
															+			x13 ~= x2
														
 
															+			x13 = util.ROTL32(x13, 16)
														
 
															+			x8 += x13
														
 
															+			x7 ~= x8
														
 
															+			x7 = util.ROTL32(x7, 12)
														
 
															+			x2 += x7
														
 
															+			x13 ~= x2
														
 
															+			x13 = util.ROTL32(x13, 8)
														
 
															+			x8 += x13
														
 
															+			x7 ~= x8
														
 
															+			x7 = util.ROTL32(x7, 7)
														
 
															+
														
 
															+			// quarterround(x, 3, 4, 9, 14)
														
 
															+			x3 += x4
														
 
															+			x14 ~= x3
														
 
															+			x14 = util.ROTL32(x14, 16)
														
 
															+			x9 += x14
														
 
															+			x4 ~= x9
														
 
															+			x4 = util.ROTL32(x4, 12)
														
 
															+			x3 += x4
														
 
															+			x14 ~= x3
														
 
															+			x14 = util.ROTL32(x14, 8)
														
 
															+			x9 += x14
														
 
															+			x4 ~= x9
														
 
															+			x4 = util.ROTL32(x4, 7)
														
 
															+		}
														
 
															+
														
 
															+		x0 += _SIGMA_0
														
 
															+		x1 += _SIGMA_1
														
 
															+		x2 += _SIGMA_2
														
 
															+		x3 += _SIGMA_3
														
 
															+		x4 += x[4]
														
 
															+		x5 += x[5]
														
 
															+		x6 += x[6]
														
 
															+		x7 += x[7]
														
 
															+		x8 += x[8]
														
 
															+		x9 += x[9]
														
 
															+		x10 += x[10]
														
 
															+		x11 += x[11]
														
 
															+		x12 += x[12]
														
 
															+		x13 += x[13]
														
 
															+		x14 += x[14]
														
 
															+		x15 += x[15]
														
 
															+
														
 
															+		// While the "correct" answer to getting more performance out of
														
 
															+		// this is "use vector operations", support for that is currently
														
 
															+		// a work in progress/to be designed.
														
 
															+		//
														
 
															+		// Until dedicated assembly can be written leverage the fact that
														
 
															+		// the callers of this routine ensure that src/dst are valid.
														
 
															+
														
 
															+		when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
														
 
															+			// util.PUT_U32_LE/util.U32_LE are not required on little-endian
														
 
															+			// systems that also happen to not be strict about aligned
														
 
															+			// memory access.
														
 
															+
														
 
															+			dst_p := transmute(^[16]u32)(&dst[0])
														
 
															+			if src != nil {
														
 
															+				src_p := transmute(^[16]u32)(&src[0])
														
 
															+				dst_p[0] = src_p[0] ~ x0
														
 
															+				dst_p[1] = src_p[1] ~ x1
														
 
															+				dst_p[2] = src_p[2] ~ x2
														
 
															+				dst_p[3] = src_p[3] ~ x3
														
 
															+				dst_p[4] = src_p[4] ~ x4
														
 
															+				dst_p[5] = src_p[5] ~ x5
														
 
															+				dst_p[6] = src_p[6] ~ x6
														
 
															+				dst_p[7] = src_p[7] ~ x7
														
 
															+				dst_p[8] = src_p[8] ~ x8
														
 
															+				dst_p[9] = src_p[9] ~ x9
														
 
															+				dst_p[10] = src_p[10] ~ x10
														
 
															+				dst_p[11] = src_p[11] ~ x11
														
 
															+				dst_p[12] = src_p[12] ~ x12
														
 
															+				dst_p[13] = src_p[13] ~ x13
														
 
															+				dst_p[14] = src_p[14] ~ x14
														
 
															+				dst_p[15] = src_p[15] ~ x15
														
 
															+				src = src[_BLOCK_SIZE:]
														
 
															+			} else {
														
 
															+				dst_p[0] = x0
														
 
															+				dst_p[1] = x1
														
 
															+				dst_p[2] = x2
														
 
															+				dst_p[3] = x3
														
 
															+				dst_p[4] = x4
														
 
															+				dst_p[5] = x5
														
 
															+				dst_p[6] = x6
														
 
															+				dst_p[7] = x7
														
 
															+				dst_p[8] = x8
														
 
															+				dst_p[9] = x9
														
 
															+				dst_p[10] = x10
														
 
															+				dst_p[11] = x11
														
 
															+				dst_p[12] = x12
														
 
															+				dst_p[13] = x13
														
 
															+				dst_p[14] = x14
														
 
															+				dst_p[15] = x15
														
 
															+			}
														
 
															+			dst = dst[_BLOCK_SIZE:]
														
 
															+		} else {
														
 
															+			#no_bounds_check {
														
 
															+				if src != nil {
														
 
															+					util.PUT_U32_LE(dst[0:4], util.U32_LE(src[0:4]) ~ x0)
														
 
															+					util.PUT_U32_LE(dst[4:8], util.U32_LE(src[4:8]) ~ x1)
														
 
															+					util.PUT_U32_LE(dst[8:12], util.U32_LE(src[8:12]) ~ x2)
														
 
															+					util.PUT_U32_LE(dst[12:16], util.U32_LE(src[12:16]) ~ x3)
														
 
															+					util.PUT_U32_LE(dst[16:20], util.U32_LE(src[16:20]) ~ x4)
														
 
															+					util.PUT_U32_LE(dst[20:24], util.U32_LE(src[20:24]) ~ x5)
														
 
															+					util.PUT_U32_LE(dst[24:28], util.U32_LE(src[24:28]) ~ x6)
														
 
															+					util.PUT_U32_LE(dst[28:32], util.U32_LE(src[28:32]) ~ x7)
														
 
															+					util.PUT_U32_LE(dst[32:36], util.U32_LE(src[32:36]) ~ x8)
														
 
															+					util.PUT_U32_LE(dst[36:40], util.U32_LE(src[36:40]) ~ x9)
														
 
															+					util.PUT_U32_LE(dst[40:44], util.U32_LE(src[40:44]) ~ x10)
														
 
															+					util.PUT_U32_LE(dst[44:48], util.U32_LE(src[44:48]) ~ x11)
														
 
															+					util.PUT_U32_LE(dst[48:52], util.U32_LE(src[48:52]) ~ x12)
														
 
															+					util.PUT_U32_LE(dst[52:56], util.U32_LE(src[52:56]) ~ x13)
														
 
															+					util.PUT_U32_LE(dst[56:60], util.U32_LE(src[56:60]) ~ x14)
														
 
															+					util.PUT_U32_LE(dst[60:64], util.U32_LE(src[60:64]) ~ x15)
														
 
															+					src = src[_BLOCK_SIZE:]
														
 
															+				} else {
														
 
															+					util.PUT_U32_LE(dst[0:4], x0)
														
 
															+					util.PUT_U32_LE(dst[4:8], x1)
														
 
															+					util.PUT_U32_LE(dst[8:12], x2)
														
 
															+					util.PUT_U32_LE(dst[12:16], x3)
														
 
															+					util.PUT_U32_LE(dst[16:20], x4)
														
 
															+					util.PUT_U32_LE(dst[20:24], x5)
														
 
															+					util.PUT_U32_LE(dst[24:28], x6)
														
 
															+					util.PUT_U32_LE(dst[28:32], x7)
														
 
															+					util.PUT_U32_LE(dst[32:36], x8)
														
 
															+					util.PUT_U32_LE(dst[36:40], x9)
														
 
															+					util.PUT_U32_LE(dst[40:44], x10)
														
 
															+					util.PUT_U32_LE(dst[44:48], x11)
														
 
															+					util.PUT_U32_LE(dst[48:52], x12)
														
 
															+					util.PUT_U32_LE(dst[52:56], x13)
														
 
															+					util.PUT_U32_LE(dst[56:60], x14)
														
 
															+					util.PUT_U32_LE(dst[60:64], x15)
														
 
															+				}
														
 
															+				dst = dst[_BLOCK_SIZE:]
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		// Increment the counter.  Overflow checking is done upon
														
 
															+		// entry into the routine, so a 64-bit increment safely
														
 
															+		// covers both cases.
														
 
															+		new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
														
 
															+		x[12] = u32(new_ctr)
														
 
															+		x[13] = u32(new_ctr >> 32)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+_hchacha20 :: proc (dst, key, nonce: []byte) {
														
 
															+	x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
														
 
															+	x4 := util.U32_LE(key[0:4])
														
 
															+	x5 := util.U32_LE(key[4:8])
														
 
															+	x6 := util.U32_LE(key[8:12])
														
 
															+	x7 := util.U32_LE(key[12:16])
														
 
															+	x8 := util.U32_LE(key[16:20])
														
 
															+	x9 := util.U32_LE(key[20:24])
														
 
															+	x10 := util.U32_LE(key[24:28])
														
 
															+	x11 := util.U32_LE(key[28:32])
														
 
															+	x12 := util.U32_LE(nonce[0:4])
														
 
															+	x13 := util.U32_LE(nonce[4:8])
														
 
															+	x14 := util.U32_LE(nonce[8:12])
														
 
															+	x15 := util.U32_LE(nonce[12:16])
														
 
															+
														
 
															+	for i := _ROUNDS; i > 0; i = i - 2 {
														
 
															+		// quarterround(x, 0, 4, 8, 12)
														
 
															+		x0 += x4
														
 
															+		x12 ~= x0
														
 
															+		x12 = util.ROTL32(x12, 16)
														
 
															+		x8 += x12
														
 
															+		x4 ~= x8
														
 
															+		x4 = util.ROTL32(x4, 12)
														
 
															+		x0 += x4
														
 
															+		x12 ~= x0
														
 
															+		x12 = util.ROTL32(x12, 8)
														
 
															+		x8 += x12
														
 
															+		x4 ~= x8
														
 
															+		x4 = util.ROTL32(x4, 7)
														
 
															+
														
 
															+		// quarterround(x, 1, 5, 9, 13)
														
 
															+		x1 += x5
														
 
															+		x13 ~= x1
														
 
															+		x13 = util.ROTL32(x13, 16)
														
 
															+		x9 += x13
														
 
															+		x5 ~= x9
														
 
															+		x5 = util.ROTL32(x5, 12)
														
 
															+		x1 += x5
														
 
															+		x13 ~= x1
														
 
															+		x13 = util.ROTL32(x13, 8)
														
 
															+		x9 += x13
														
 
															+		x5 ~= x9
														
 
															+		x5 = util.ROTL32(x5, 7)
														
 
															+
														
 
															+		// quarterround(x, 2, 6, 10, 14)
														
 
															+		x2 += x6
														
 
															+		x14 ~= x2
														
 
															+		x14 = util.ROTL32(x14, 16)
														
 
															+		x10 += x14
														
 
															+		x6 ~= x10
														
 
															+		x6 = util.ROTL32(x6, 12)
														
 
															+		x2 += x6
														
 
															+		x14 ~= x2
														
 
															+		x14 = util.ROTL32(x14, 8)
														
 
															+		x10 += x14
														
 
															+		x6 ~= x10
														
 
															+		x6 = util.ROTL32(x6, 7)
														
 
															+
														
 
															+		// quarterround(x, 3, 7, 11, 15)
														
 
															+		x3 += x7
														
 
															+		x15 ~= x3
														
 
															+		x15 = util.ROTL32(x15, 16)
														
 
															+		x11 += x15
														
 
															+		x7 ~= x11
														
 
															+		x7 = util.ROTL32(x7, 12)
														
 
															+		x3 += x7
														
 
															+		x15 ~= x3
														
 
															+		x15 = util.ROTL32(x15, 8)
														
 
															+		x11 += x15
														
 
															+		x7 ~= x11
														
 
															+		x7 = util.ROTL32(x7, 7)
														
 
															+
														
 
															+		// quarterround(x, 0, 5, 10, 15)
														
 
															+		x0 += x5
														
 
															+		x15 ~= x0
														
 
															+		x15 = util.ROTL32(x15, 16)
														
 
															+		x10 += x15
														
 
															+		x5 ~= x10
														
 
															+		x5 = util.ROTL32(x5, 12)
														
 
															+		x0 += x5
														
 
															+		x15 ~= x0
														
 
															+		x15 = util.ROTL32(x15, 8)
														
 
															+		x10 += x15
														
 
															+		x5 ~= x10
														
 
															+		x5 = util.ROTL32(x5, 7)
														
 
															+
														
 
															+		// quarterround(x, 1, 6, 11, 12)
														
 
															+		x1 += x6
														
 
															+		x12 ~= x1
														
 
															+		x12 = util.ROTL32(x12, 16)
														
 
															+		x11 += x12
														
 
															+		x6 ~= x11
														
 
															+		x6 = util.ROTL32(x6, 12)
														
 
															+		x1 += x6
														
 
															+		x12 ~= x1
														
 
															+		x12 = util.ROTL32(x12, 8)
														
 
															+		x11 += x12
														
 
															+		x6 ~= x11
														
 
															+		x6 = util.ROTL32(x6, 7)
														
 
															+
														
 
															+		// quarterround(x, 2, 7, 8, 13)
														
 
															+		x2 += x7
														
 
															+		x13 ~= x2
														
 
															+		x13 = util.ROTL32(x13, 16)
														
 
															+		x8 += x13
														
 
															+		x7 ~= x8
														
 
															+		x7 = util.ROTL32(x7, 12)
														
 
															+		x2 += x7
														
 
															+		x13 ~= x2
														
 
															+		x13 = util.ROTL32(x13, 8)
														
 
															+		x8 += x13
														
 
															+		x7 ~= x8
														
 
															+		x7 = util.ROTL32(x7, 7)
														
 
															+
														
 
															+		// quarterround(x, 3, 4, 9, 14)
														
 
															+		x3 += x4
														
 
															+		x14 ~= x3
														
 
															+		x14 = util.ROTL32(x14, 16)
														
 
															+		x9 += x14
														
 
															+		x4 ~= x9
														
 
															+		x4 = util.ROTL32(x4, 12)
														
 
															+		x3 += x4
														
 
															+		x14 ~= x3
														
 
															+		x14 = util.ROTL32(x14, 8)
														
 
															+		x9 += x14
														
 
															+		x4 ~= x9
														
 
															+		x4 = util.ROTL32(x4, 7)
														
 
															+	}
														
 
															+
														
 
															+	util.PUT_U32_LE(dst[0:4], x0)
														
 
															+	util.PUT_U32_LE(dst[4:8], x1)
														
 
															+	util.PUT_U32_LE(dst[8:12], x2)
														
 
															+	util.PUT_U32_LE(dst[12:16], x3)
														
 
															+	util.PUT_U32_LE(dst[16:20], x12)
														
 
															+	util.PUT_U32_LE(dst[20:24], x13)
														
 
															+	util.PUT_U32_LE(dst[24:28], x14)
														
 
															+	util.PUT_U32_LE(dst[28:32], x15)
														
 
															+}
														
--- a/core/crypto/chacha20poly1305/chacha20poly1305.odin
+++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin
@@ -0,0 +1,146 @@
 
															+package chacha20poly1305
														
 
															+
														
 
															+import "core:crypto"
														
 
															+import "core:crypto/chacha20"
														
 
															+import "core:crypto/poly1305"
														
 
															+import "core:crypto/util"
														
 
															+import "core:mem"
														
 
															+
														
 
															+KEY_SIZE :: chacha20.KEY_SIZE
														
 
															+NONCE_SIZE :: chacha20.NONCE_SIZE
														
 
															+TAG_SIZE :: poly1305.TAG_SIZE
														
 
															+
														
 
															+_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
														
 
															+
														
 
															+_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
														
 
															+	if len(tag) != TAG_SIZE {
														
 
															+		panic("crypto/chacha20poly1305: invalid destination tag size")
														
 
															+	}
														
 
															+	if len(key) != KEY_SIZE {
														
 
															+		panic("crypto/chacha20poly1305: invalid key size")
														
 
															+	}
														
 
															+	if len(nonce) != NONCE_SIZE {
														
 
															+		panic("crypto/chacha20poly1305: invalid nonce size")
														
 
															+	}
														
 
															+
														
 
															+	#assert(size_of(int) == 8 || size_of(int) <= 4)
														
 
															+	when size_of(int) == 8 {
														
 
															+		// A_MAX = 2^64 - 1 due to the length field limit.
														
 
															+		// P_MAX = 64 * (2^32 - 1) due to the IETF ChaCha20 counter limit.
														
 
															+		//
														
 
															+		// A_MAX is limited by size_of(int), so there is no need to
														
 
															+		// enforce it. P_MAX only needs to be checked on 64-bit targets,
														
 
															+		// for reasons that should be obvious.
														
 
															+		if text_len := len(text); text_len > _P_MAX {
														
 
															+			panic("crypto/chacha20poly1305: oversized src data")
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+_PAD: [16]byte
														
 
															+_update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
														
 
															+	if pad_len := 16 - (x_len & (16-1)); pad_len != 16 {
														
 
															+		poly1305.update(ctx, _PAD[:pad_len])
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
														
 
															+	_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
														
 
															+	if len(ciphertext) != len(plaintext) {
														
 
															+		panic("crypto/chacha20poly1305: invalid destination ciphertext size")
														
 
															+	}
														
 
															+
														
 
															+	stream_ctx: chacha20.Context = ---
														
 
															+	chacha20.init(&stream_ctx, key, nonce)
														
 
															+
														
 
															+	// otk = poly1305_key_gen(key, nonce)
														
 
															+	otk: [poly1305.KEY_SIZE]byte = ---
														
 
															+	chacha20.keystream_bytes(&stream_ctx, otk[:])
														
 
															+	mac_ctx: poly1305.Context = ---
														
 
															+	poly1305.init(&mac_ctx, otk[:])
														
 
															+	mem.zero_explicit(&otk, size_of(otk))
														
 
															+
														
 
															+	aad_len, ciphertext_len := len(aad), len(ciphertext)
														
 
															+
														
 
															+	// There is nothing preventing aad and ciphertext from overlapping
														
 
															+	// so auth the AAD before encrypting (slightly different from the
														
 
															+	// RFC, since the RFC encrypts into a new buffer).
														
 
															+	//
														
 
															+	// mac_data = aad | pad16(aad)
														
 
															+	poly1305.update(&mac_ctx, aad)
														
 
															+	_update_mac_pad16(&mac_ctx, aad_len)
														
 
															+
														
 
															+	// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
														
 
															+	chacha20.seek(&stream_ctx, 1)
														
 
															+	chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
														
 
															+	chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
														
 
															+
														
 
															+	// mac_data |= ciphertext | pad16(ciphertext)
														
 
															+	poly1305.update(&mac_ctx, ciphertext)
														
 
															+	_update_mac_pad16(&mac_ctx, ciphertext_len)
														
 
															+
														
 
															+	// mac_data |= num_to_8_le_bytes(aad.length)
														
 
															+	// mac_data |= num_to_8_le_bytes(ciphertext.length)
														
 
															+	l_buf := otk[0:16] // Reuse the scratch buffer.
														
 
															+	util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
														
 
															+	util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
														
 
															+	poly1305.update(&mac_ctx, l_buf)
														
 
															+
														
 
															+	// tag = poly1305_mac(mac_data, otk)
														
 
															+	poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
														
 
															+}
														
 
															+
														
 
															+decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
														
 
															+	_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
														
 
															+	if len(ciphertext) != len(plaintext) {
														
 
															+		panic("crypto/chacha20poly1305: invalid destination plaintext size")
														
 
															+	}
														
 
															+
														
 
															+	// Note: Unlike encrypt, this can fail early, so use defer for
														
 
															+	// sanitization rather than assuming control flow reaches certain
														
 
															+	// points where needed.
														
 
															+
														
 
															+	stream_ctx: chacha20.Context = ---
														
 
															+	chacha20.init(&stream_ctx, key, nonce)
														
 
															+
														
 
															+	// otk = poly1305_key_gen(key, nonce)
														
 
															+	otk: [poly1305.KEY_SIZE]byte = ---
														
 
															+	chacha20.keystream_bytes(&stream_ctx, otk[:])
														
 
															+	defer chacha20.reset(&stream_ctx)
														
 
															+
														
 
															+	mac_ctx: poly1305.Context = ---
														
 
															+	poly1305.init(&mac_ctx, otk[:])
														
 
															+	defer mem.zero_explicit(&otk, size_of(otk))
														
 
															+
														
 
															+	aad_len, ciphertext_len := len(aad), len(ciphertext)
														
 
															+
														
 
															+	// mac_data = aad | pad16(aad)
														
 
															+	// mac_data |= ciphertext | pad16(ciphertext)
														
 
															+	// mac_data |= num_to_8_le_bytes(aad.length)
														
 
															+	// mac_data |= num_to_8_le_bytes(ciphertext.length)
														
 
															+	poly1305.update(&mac_ctx, aad)
														
 
															+	_update_mac_pad16(&mac_ctx, aad_len)
														
 
															+	poly1305.update(&mac_ctx, ciphertext)
														
 
															+	_update_mac_pad16(&mac_ctx, ciphertext_len)
														
 
															+	l_buf := otk[0:16] // Reuse the scratch buffer.
														
 
															+	util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
														
 
															+	util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
														
 
															+	poly1305.update(&mac_ctx, l_buf)
														
 
															+
														
 
															+	// tag = poly1305_mac(mac_data, otk)
														
 
															+	derived_tag := otk[0:poly1305.TAG_SIZE] // Reuse the scratch buffer again.
														
 
															+	poly1305.final(&mac_ctx, derived_tag) // Implicitly sanitizes context.
														
 
															+
														
 
															+	// Validate the tag in constant time.
														
 
															+	if crypto.compare_constant_time(tag, derived_tag) != 1 {
														
 
															+		// Zero out the plaintext, as a defense in depth measure.
														
 
															+		mem.zero_explicit(raw_data(plaintext), ciphertext_len)
														
 
															+		return false
														
 
															+	}
														
 
															+
														
 
															+	// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
														
 
															+	chacha20.seek(&stream_ctx, 1)
														
 
															+	chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
														
 
															+
														
 
															+	return true
														
 
															+}
														
--- a/core/crypto/crypto.odin
+++ b/core/crypto/crypto.odin
@@ -0,0 +1,52 @@
 
															+package crypto
														
 
															+
														
 
															+import "core:mem"
														
 
															+
														
 
															+// compare_constant_time returns 1 iff a and b are equal, 0 otherwise.
														
 
															+//
														
 
															+// The execution time of this routine is constant regardless of the contents
														
 
															+// of the slices being compared, as long as the length of the slices is equal.
														
 
															+// If the length of the two slices is different, it will early-return 0.
														
 
															+compare_constant_time :: proc "contextless" (a, b: []byte) -> int {
														
 
															+	// If the length of the slices is different, early return.
														
 
															+	//
														
 
															+	// This leaks the fact that the slices have a different length,
														
 
															+	// but the routine is primarily intended for comparing things
														
 
															+	// like MACS and password digests.
														
 
															+	n := len(a)
														
 
															+	if n != len(b) {
														
 
															+		return 0
														
 
															+	}
														
 
															+
														
 
															+	return compare_byte_ptrs_constant_time(raw_data(a), raw_data(b), n)
														
 
															+}
														
 
															+
														
 
															+// compare_byte_ptrs_constant_time returns 1 iff the bytes pointed to by
														
 
															+// a and b are equal, 0 otherwise.
														
 
															+//
														
 
															+// The execution time of this routine is constant regardless of the
														
 
															+// contents of the memory being compared.
														
 
															+compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> int {
														
 
															+	x := mem.slice_ptr(a, n)
														
 
															+	y := mem.slice_ptr(b, n)
														
 
															+
														
 
															+	v: byte
														
 
															+	for i in 0..<n {
														
 
															+		v |= x[i] ~ y[i]
														
 
															+	}
														
 
															+
														
 
															+	// After the loop, v == 0 iff a == b.  The subtraction will underflow
														
 
															+	// iff v == 0, setting the sign-bit, which gets returned.
														
 
															+	return int((u32(v)-1) >> 31)
														
 
															+}
														
 
															+
														
 
															+// rand_bytes fills the dst buffer with cryptographic entropy taken from
														
 
															+// the system entropy source.  This routine will block if the system entropy
														
 
															+// source is not ready yet.  All system entropy source failures are treated
														
 
															+// as catastrophic, resulting in a panic.
														
 
															+rand_bytes :: proc (dst: []byte) {
														
 
															+	// zero-fill the buffer first
														
 
															+	mem.zero_explicit(raw_data(dst), len(dst))
														
 
															+
														
 
															+	_rand_bytes(dst)
														
 
															+}
														
--- a/core/crypto/poly1305/poly1305.odin
+++ b/core/crypto/poly1305/poly1305.odin
@@ -0,0 +1,163 @@
 
															+package poly1305
														
 
															+
														
 
															+import "core:crypto"
														
 
															+import "core:crypto/util"
														
 
															+import field "core:crypto/_fiat/field_poly1305"
														
 
															+import "core:mem"
														
 
															+
														
 
															+KEY_SIZE :: 32
														
 
															+TAG_SIZE :: 16
														
 
															+
														
 
															+_BLOCK_SIZE :: 16
														
 
															+
														
 
															+sum :: proc (dst, msg, key: []byte) {
														
 
															+	ctx: Context = ---
														
 
															+
														
 
															+	init(&ctx, key)
														
 
															+	update(&ctx, msg)
														
 
															+	final(&ctx, dst)
														
 
															+}
														
 
															+
														
 
															+verify :: proc (tag, msg, key: []byte) -> bool {
														
 
															+	ctx: Context = ---
														
 
															+	derived_tag: [16]byte = ---
														
 
															+
														
 
															+	if len(tag) != TAG_SIZE {
														
 
															+		panic("crypto/poly1305: invalid tag size")
														
 
															+	}
														
 
															+
														
 
															+	init(&ctx, key)
														
 
															+	update(&ctx, msg)
														
 
															+	final(&ctx, derived_tag[:])
														
 
															+
														
 
															+	return crypto.compare_constant_time(derived_tag[:], tag) == 1
														
 
															+}
														
 
															+
														
 
															+Context :: struct {
														
 
															+	_r: field.Tight_Field_Element,
														
 
															+	_a: field.Tight_Field_Element,
														
 
															+	_s: field.Tight_Field_Element,
														
 
															+
														
 
															+	_buffer: [_BLOCK_SIZE]byte,
														
 
															+	_leftover: int,
														
 
															+
														
 
															+	_is_initialized: bool,
														
 
															+}
														
 
															+
														
 
															+init :: proc (ctx: ^Context, key: []byte) {
														
 
															+	if len(key) != KEY_SIZE {
														
 
															+		panic("crypto/poly1305: invalid key size")
														
 
															+	}
														
 
															+
														
 
															+	// r = le_bytes_to_num(key[0..15])
														
 
															+	// r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff)
														
 
															+	tmp_lo := util.U64_LE(key[0:8]) & 0x0ffffffc0fffffff
														
 
															+	tmp_hi := util.U64_LE(key[8:16]) & 0xffffffc0ffffffc
														
 
															+	field.fe_from_u64s(&ctx._r, tmp_lo, tmp_hi)
														
 
															+
														
 
															+	// s = le_bytes_to_num(key[16..31])
														
 
															+	field.fe_from_bytes(&ctx._s, key[16:32], 0)
														
 
															+
														
 
															+	// a = 0
														
 
															+	field.fe_zero(&ctx._a)
														
 
															+
														
 
															+	// No leftover in buffer
														
 
															+	ctx._leftover = 0
														
 
															+
														
 
															+	ctx._is_initialized = true
														
 
															+}
														
 
															+
														
 
															+update :: proc (ctx: ^Context, data: []byte) {
														
 
															+	assert(ctx._is_initialized)
														
 
															+
														
 
															+	msg := data
														
 
															+	msg_len := len(data)
														
 
															+
														
 
															+	// Handle leftover
														
 
															+	if ctx._leftover > 0 {
														
 
															+		want := min(_BLOCK_SIZE - ctx._leftover, msg_len)
														
 
															+		copy_slice(ctx._buffer[ctx._leftover:], msg[:want])
														
 
															+		msg_len = msg_len - want
														
 
															+		msg = msg[want:]
														
 
															+		ctx._leftover = ctx._leftover + want
														
 
															+		if ctx._leftover < _BLOCK_SIZE {
														
 
															+			return
														
 
															+		}
														
 
															+		_blocks(ctx, ctx._buffer[:])
														
 
															+		ctx._leftover = 0
														
 
															+	}
														
 
															+
														
 
															+	// Process full blocks
														
 
															+	if msg_len >= _BLOCK_SIZE {
														
 
															+		want := msg_len & (~int(_BLOCK_SIZE - 1))
														
 
															+		_blocks(ctx, msg[:want])
														
 
															+		msg = msg[want:]
														
 
															+		msg_len = msg_len - want
														
 
															+	}
														
 
															+
														
 
															+	// Store leftover
														
 
															+	if msg_len > 0 {
														
 
															+		// TODO: While -donna does it this way, I'm fairly sure that
														
 
															+		// `ctx._leftover == 0` is an invariant at this point.
														
 
															+		copy(ctx._buffer[ctx._leftover:], msg)
														
 
															+		ctx._leftover = ctx._leftover + msg_len
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+final :: proc (ctx: ^Context, dst: []byte) {
														
 
															+	assert(ctx._is_initialized)
														
 
															+
														
 
															+	if len(dst) != TAG_SIZE {
														
 
															+		panic("poly1305: invalid destination tag size")
														
 
															+	}
														
 
															+
														
 
															+	// Process remaining block
														
 
															+	if ctx._leftover > 0 {
														
 
															+		ctx._buffer[ctx._leftover] = 1
														
 
															+		for i := ctx._leftover + 1; i < _BLOCK_SIZE; i = i + 1 {
														
 
															+			ctx._buffer[i] = 0
														
 
															+		}
														
 
															+		_blocks(ctx, ctx._buffer[:], true)
														
 
															+	}
														
 
															+
														
 
															+	// a += s
														
 
															+	field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &ctx._s) // _a unreduced
														
 
															+	field.fe_carry(&ctx._a, field.fe_relax_cast(&ctx._a)) // _a reduced
														
 
															+
														
 
															+	// return num_to_16_le_bytes(a)
														
 
															+	tmp: [32]byte = ---
														
 
															+	field.fe_to_bytes(&tmp, &ctx._a)
														
 
															+	copy_slice(dst, tmp[0:16])
														
 
															+
														
 
															+	reset(ctx)
														
 
															+}
														
 
															+
														
 
															+reset :: proc (ctx: ^Context) {
														
 
															+	mem.zero_explicit(&ctx._r, size_of(ctx._r))
														
 
															+	mem.zero_explicit(&ctx._a, size_of(ctx._a))
														
 
															+	mem.zero_explicit(&ctx._s, size_of(ctx._s))
														
 
															+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
														
 
															+
														
 
															+	ctx._is_initialized = false
														
 
															+}
														
 
															+
														
 
															+_blocks :: proc (ctx: ^Context, msg: []byte, final := false) {
														
 
															+	n: field.Tight_Field_Element = ---
														
 
															+	final_byte := byte(!final)
														
 
															+
														
 
															+	data := msg
														
 
															+	data_len := len(data)
														
 
															+	for data_len >= _BLOCK_SIZE {
														
 
															+		// n = le_bytes_to_num(msg[((i-1)*16)..*i*16] | [0x01])
														
 
															+		field.fe_from_bytes(&n, data[:_BLOCK_SIZE], final_byte, false)
														
 
															+
														
 
															+		// a += n
														
 
															+		field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &n) // _a unreduced
														
 
															+
														
 
															+		// a = (r * a) % p
														
 
															+		field.fe_carry_mul(&ctx._a, field.fe_relax_cast(&ctx._a), field.fe_relax_cast(&ctx._r)) // _a reduced
														
 
															+
														
 
															+		data = data[_BLOCK_SIZE:]
														
 
															+		data_len = data_len - _BLOCK_SIZE
														
 
															+	}
														
 
															+}
														
--- a/core/crypto/rand_generic.odin
+++ b/core/crypto/rand_generic.odin
@@ -0,0 +1,7 @@
 
															+package crypto
														
 
															+
														
 
															+when ODIN_OS != "linux" {
														
 
															+	_rand_bytes :: proc (dst: []byte) {
														
 
															+		unimplemented("crypto: rand_bytes not supported on this OS")
														
 
															+	}
														
 
															+}
														
--- a/core/crypto/rand_linux.odin
+++ b/core/crypto/rand_linux.odin
@@ -0,0 +1,37 @@
 
															+package crypto
														
 
															+
														
 
															+import "core:fmt"
														
 
															+import "core:os"
														
 
															+import "core:sys/unix"
														
 
															+
														
 
															+_MAX_PER_CALL_BYTES :: 33554431 // 2^25 - 1
														
 
															+
														
 
															+_rand_bytes :: proc (dst: []byte) {
														
 
															+	dst := dst
														
 
															+	l := len(dst)
														
 
															+
														
 
															+	for l > 0 {
														
 
															+		to_read := min(l, _MAX_PER_CALL_BYTES)
														
 
															+		ret := unix.sys_getrandom(raw_data(dst), to_read, 0)
														
 
															+		if ret < 0 {
														
 
															+			switch os.Errno(-ret) {
														
 
															+			case os.EINTR:
														
 
															+				// Call interupted by a signal handler, just retry the
														
 
															+				// request.
														
 
															+				continue
														
 
															+			case os.ENOSYS:
														
 
															+				// The kernel is apparently prehistoric (< 3.17 circa 2014)
														
 
															+				// and does not support getrandom.
														
 
															+				panic("crypto: getrandom not available in kernel")
														
 
															+			case:
														
 
															+				// All other failures are things that should NEVER happen
														
 
															+				// unless the kernel interface changes (ie: the Linux
														
 
															+				// developers break userland).
														
 
															+				panic(fmt.tprintf("crypto: getrandom failed: %d", ret))
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		l -= ret
														
 
															+		dst = dst[ret:]
														
 
															+	}
														
 
															+}
														
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -0,0 +1,126 @@
 
															+package x25519
														
 
															+
														
 
															+import field "core:crypto/_fiat/field_curve25519"
														
 
															+import "core:mem"
														
 
															+
														
 
															+SCALAR_SIZE :: 32
														
 
															+POINT_SIZE :: 32
														
 
															+
														
 
															+_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
														
 
															+
														
 
															+_scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
														
 
															+	if i < 0 {
														
 
															+		return 0
														
 
															+	}
														
 
															+	return (s[i>>3] >> uint(i&7)) & 1
														
 
															+}
														
 
															+
														
 
															+_scalarmult :: proc (out, scalar, point: ^[32]byte) {
														
 
															+	// Montgomery pseduo-multiplication taken from Monocypher.
														
 
															+
														
 
															+	// computes the scalar product
														
 
															+	x1: field.Tight_Field_Element = ---
														
 
															+	field.fe_from_bytes(&x1, point)
														
 
															+
														
 
															+	// computes the actual scalar product (the result is in x2 and z2)
														
 
															+	x2, x3, z2, z3: field.Tight_Field_Element =  ---, ---, ---, ---
														
 
															+	t0, t1: field.Loose_Field_Element = ---, ---
														
 
															+
														
 
															+	// Montgomery ladder
														
 
															+	// In projective coordinates, to avoid divisions: x = X / Z
														
 
															+	// We don't care about the y coordinate, it's only 1 bit of information
														
 
															+	field.fe_one(&x2) // "zero" point
														
 
															+	field.fe_zero(&z2)
														
 
															+	field.fe_set(&x3, &x1) // "one" point
														
 
															+	field.fe_one(&z3)
														
 
															+
														
 
															+	swap: int
														
 
															+	for pos := 255-1; pos >= 0; pos = pos - 1 	{
														
 
															+		// constant time conditional swap before ladder step
														
 
															+		b := int(_scalar_bit(scalar, pos))
														
 
															+		swap ~= b // xor trick avoids swapping at the end of the loop
														
 
															+		field.fe_cond_swap(&x2, &x3, swap)
														
 
															+		field.fe_cond_swap(&z2, &z3, swap)
														
 
															+		swap = b // anticipates one last swap after the loop
														
 
															+
														
 
															+		// Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
														
 
															+		// with differential addition
														
 
															+		//
														
 
															+		// Note: This deliberately omits reductions after add/sub operations
														
 
															+		// if the result is only ever used as the input to a mul/square since
														
 
															+		// the implementations of those can deal with non-reduced inputs.
														
 
															+		//
														
 
															+		// fe_tighten_cast is only used to store a fully reduced
														
 
															+		// output in a Loose_Field_Element, or to provide such a
														
 
															+		// Loose_Field_Element as a Tight_Field_Element argument.
														
 
															+		field.fe_sub(&t0, &x3, &z3)
														
 
															+		field.fe_sub(&t1, &x2, &z2)
														
 
															+		field.fe_add(field.fe_relax_cast(&x2), &x2, &z2) // x2 - unreduced
														
 
															+		field.fe_add(field.fe_relax_cast(&z2), &x3, &z3) // z2 - unreduced
														
 
															+		field.fe_carry_mul(&z3, &t0, field.fe_relax_cast(&x2))
														
 
															+		field.fe_carry_mul(&z2, field.fe_relax_cast(&z2), &t1) // z2 - reduced
														
 
															+		field.fe_carry_square(field.fe_tighten_cast(&t0), &t1) // t0 - reduced
														
 
															+		field.fe_carry_square(field.fe_tighten_cast(&t1), field.fe_relax_cast(&x2)) // t1 - reduced
														
 
															+		field.fe_add(field.fe_relax_cast(&x3), &z3, &z2) // x3 - unreduced
														
 
															+		field.fe_sub(field.fe_relax_cast(&z2), &z3, &z2) // z2 - unreduced
														
 
															+		field.fe_carry_mul(&x2, &t1, &t0) // x2 - reduced
														
 
															+		field.fe_sub(&t1, field.fe_tighten_cast(&t1), field.fe_tighten_cast(&t0)) // safe - t1/t0 is reduced
														
 
															+		field.fe_carry_square(&z2, field.fe_relax_cast(&z2)) // z2 - reduced
														
 
															+		field.fe_carry_scmul_121666(&z3, &t1)
														
 
															+		field.fe_carry_square(&x3, field.fe_relax_cast(&x3)) // x3 - reduced
														
 
															+		field.fe_add(&t0, field.fe_tighten_cast(&t0), &z3) // safe - t0 is reduced
														
 
															+		field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z2))
														
 
															+		field.fe_carry_mul(&z2, &t1, &t0)
														
 
															+	}
														
 
															+	// last swap is necessary to compensate for the xor trick
														
 
															+	// Note: after this swap, P3 == P2 + P1.
														
 
															+	field.fe_cond_swap(&x2, &x3, swap)
														
 
															+	field.fe_cond_swap(&z2, &z3, swap)
														
 
															+
														
 
															+	// normalises the coordinates: x == X / Z
														
 
															+	field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
														
 
															+	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
														
 
															+	field.fe_to_bytes(out, &x2)
														
 
															+
														
 
															+	mem.zero_explicit(&x1, size_of(x1))
														
 
															+	mem.zero_explicit(&x2, size_of(x2))
														
 
															+	mem.zero_explicit(&x3, size_of(x3))
														
 
															+	mem.zero_explicit(&z2, size_of(z2))
														
 
															+	mem.zero_explicit(&z3, size_of(z3))
														
 
															+	mem.zero_explicit(&t0, size_of(t0))
														
 
															+	mem.zero_explicit(&t1, size_of(t1))
														
 
															+}
														
 
															+
														
 
															+scalarmult :: proc (dst, scalar, point: []byte) {
														
 
															+	if len(scalar) != SCALAR_SIZE {
														
 
															+		panic("crypto/x25519: invalid scalar size")
														
 
															+	}
														
 
															+	if len(point) != POINT_SIZE {
														
 
															+		panic("crypto/x25519: invalid point size")
														
 
															+	}
														
 
															+	if len(dst) != POINT_SIZE {
														
 
															+		panic("crypto/x25519: invalid destination point size")
														
 
															+	}
														
 
															+
														
 
															+	// "clamp" the scalar
														
 
															+	e: [32]byte = ---
														
 
															+	copy_slice(e[:], scalar)
														
 
															+	e[0] &= 248
														
 
															+	e[31] &= 127
														
 
															+	e[31] |= 64
														
 
															+
														
 
															+	p: [32]byte = ---
														
 
															+	copy_slice(p[:], point)
														
 
															+
														
 
															+	d: [32]byte = ---
														
 
															+	_scalarmult(&d, &e, &p)
														
 
															+	copy_slice(dst, d[:])
														
 
															+
														
 
															+	mem.zero_explicit(&e, size_of(e))
														
 
															+	mem.zero_explicit(&d, size_of(d))
														
 
															+}
														
 
															+
														
 
															+scalarmult_basepoint :: proc (dst, scalar: []byte) {
														
 
															+	// TODO/perf: Switch to using a precomputed table.
														
 
															+	scalarmult(dst, scalar, _BASE_POINT[:])
														
 
															+}
														
--- a/core/mem/virtual/virtual_linux.odin
+++ b/core/mem/virtual/virtual_linux.odin
@@ -4,64 +4,56 @@ package mem_virtual
 
															 import "core:c"
														
 
															 import "core:intrinsics"
														
 
															+import "core:sys/unix"
														
 
															-when ODIN_ARCH == "amd64" {
														
 
															-	SYS_mmap     :: 9
														
 
															-	SYS_mprotect :: 10
														
 
															-	SYS_munmap   :: 11
														
 
															-	SYS_madvise  :: 28
														
 
															-	
														
 
															-	PROT_NONE  :: 0x0
														
 
															-	PROT_READ  :: 0x1
														
 
															-	PROT_WRITE :: 0x2
														
 
															-	PROT_EXEC  :: 0x4
														
 
															-	PROT_GROWSDOWN :: 0x01000000
														
 
															-	PROT_GROWSUP :: 0x02000000
														
 
															+PROT_NONE  :: 0x0
														
 
															+PROT_READ  :: 0x1
														
 
															+PROT_WRITE :: 0x2
														
 
															+PROT_EXEC  :: 0x4
														
 
															+PROT_GROWSDOWN :: 0x01000000
														
 
															+PROT_GROWSUP :: 0x02000000
														
 
															-	MAP_FIXED     :: 0x1
														
 
															-	MAP_PRIVATE   :: 0x2
														
 
															-	MAP_SHARED    :: 0x4
														
 
															-	MAP_ANONYMOUS :: 0x20
														
 
															-	
														
 
															-	MADV_NORMAL      :: 0
														
 
															-	MADV_RANDOM      :: 1
														
 
															-	MADV_SEQUENTIAL  :: 2
														
 
															-	MADV_WILLNEED    :: 3
														
 
															-	MADV_DONTNEED    :: 4
														
 
															-	MADV_FREE        :: 8
														
 
															-	MADV_REMOVE      :: 9
														
 
															-	MADV_DONTFORK    :: 10
														
 
															-	MADV_DOFORK      :: 11
														
 
															-	MADV_MERGEABLE   :: 12
														
 
															-	MADV_UNMERGEABLE :: 13
														
 
															-	MADV_HUGEPAGE    :: 14
														
 
															-	MADV_NOHUGEPAGE  :: 15
														
 
															-	MADV_DONTDUMP    :: 16
														
 
															-	MADV_DODUMP      :: 17
														
 
															-	MADV_WIPEONFORK  :: 18
														
 
															-	MADV_KEEPONFORK  :: 19
														
 
															-	MADV_HWPOISON    :: 100
														
 
															-} else {
														
 
															-	#panic("Unsupported architecture")
														
 
															-}
														
 
															+MAP_FIXED     :: 0x1
														
 
															+MAP_PRIVATE   :: 0x2
														
 
															+MAP_SHARED    :: 0x4
														
 
															+MAP_ANONYMOUS :: 0x20
														
 
															+
														
 
															+MADV_NORMAL      :: 0
														
 
															+MADV_RANDOM      :: 1
														
 
															+MADV_SEQUENTIAL  :: 2
														
 
															+MADV_WILLNEED    :: 3
														
 
															+MADV_DONTNEED    :: 4
														
 
															+MADV_FREE        :: 8
														
 
															+MADV_REMOVE      :: 9
														
 
															+MADV_DONTFORK    :: 10
														
 
															+MADV_DOFORK      :: 11
														
 
															+MADV_MERGEABLE   :: 12
														
 
															+MADV_UNMERGEABLE :: 13
														
 
															+MADV_HUGEPAGE    :: 14
														
 
															+MADV_NOHUGEPAGE  :: 15
														
 
															+MADV_DONTDUMP    :: 16
														
 
															+MADV_DODUMP      :: 17
														
 
															+MADV_WIPEONFORK  :: 18
														
 
															+MADV_KEEPONFORK  :: 19
														
 
															+MADV_HWPOISON    :: 100
														
 
															 mmap :: proc "contextless" (addr: rawptr, length: uint, prot: c.int, flags: c.int, fd: c.int, offset: uintptr) -> rawptr {
														
 
															-	res := intrinsics.syscall(SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
														
 
															+	res := intrinsics.syscall(unix.SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
														
 
															 	return rawptr(res)
														
 
															 }
														
 
															 munmap :: proc "contextless" (addr: rawptr, length: uint) -> c.int {
														
 
															-	res := intrinsics.syscall(SYS_munmap, uintptr(addr), uintptr(length))
														
 
															+	res := intrinsics.syscall(unix.SYS_munmap, uintptr(addr), uintptr(length))
														
 
															 	return c.int(res)
														
 
															 }
														
 
															 mprotect :: proc "contextless" (addr: rawptr, length: uint, prot: c.int) -> c.int {
														
 
															-	res := intrinsics.syscall(SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
														
 
															+	res := intrinsics.syscall(unix.SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
														
 
															 	return c.int(res)
														
 
															 }
														
 
															 madvise :: proc "contextless" (addr: rawptr, length: uint, advice: c.int) -> c.int {
														
 
															-	res := intrinsics.syscall(SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
														
 
															+	res := intrinsics.syscall(unix.SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
														
 
															 	return c.int(res)
														
 
															 }
														
--- a/core/os/os_linux.odin
+++ b/core/os/os_linux.odin
@@ -8,6 +8,7 @@ import "core:strings"
 
															 import "core:c"
														
 
															 import "core:strconv"
														
 
															 import "core:intrinsics"
														
 
															+import "core:sys/unix"
														
 
															 Handle    :: distinct i32
														
 
															 File_Time :: distinct u64
														
@@ -265,8 +266,6 @@ X_OK :: 1 // Test for execute permission
 
															 W_OK :: 2 // Test for write permission
														
 
															 R_OK :: 4 // Test for read permission
														
 
															-SYS_GETTID :: 186
														
 
															-
														
 
															 foreign libc {
														
 
															 	@(link_name="__errno_location") __errno_location    :: proc() -> ^int ---
														
@@ -594,7 +593,7 @@ exit :: proc "contextless" (code: int) -> ! {
 
															 }
														
 
															 current_thread_id :: proc "contextless" () -> int {
														
 
															-	return cast(int)intrinsics.syscall(SYS_GETTID)
														
 
															+	return unix.sys_gettid()
														
 
															 }
														
 
															 dlopen :: proc(filename: string, flags: int) -> rawptr {
														
--- a/core/sync/sync2/futex_linux.odin
+++ b/core/sync/sync2/futex_linux.odin
@@ -5,6 +5,7 @@ package sync2
 
															 import "core:c"
														
 
															 import "core:time"
														
 
															 import "core:intrinsics"
														
 
															+import "core:sys/unix"
														
 
															 FUTEX_WAIT :: 0
														
 
															 FUTEX_WAKE :: 1
														
@@ -34,7 +35,7 @@ get_errno :: proc(r: int) -> int {
 
															 }
														
 
															 internal_futex :: proc(f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> int {
														
 
															-	code := int(intrinsics.syscall(202, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
														
 
															+	code := int(intrinsics.syscall(unix.SYS_futex, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
														
 
															 	return get_errno(code)
														
 
															 }
														
--- a/core/sync/sync2/primitives_linux.odin
+++ b/core/sync/sync2/primitives_linux.odin
@@ -2,9 +2,8 @@
 
															 //+private
														
 
															 package sync2
														
 
															-import "core:intrinsics"
														
 
															+import "core:sys/unix"
														
 
															 _current_thread_id :: proc "contextless" () -> int {
														
 
															-	SYS_GETTID :: 186
														
 
															-	return int(intrinsics.syscall(SYS_GETTID))
														
 
															+	return unix.sys_gettid()
														
 
															 }
														
--- a/core/sync/sync_linux.odin
+++ b/core/sync/sync_linux.odin
@@ -1,11 +1,9 @@
 
															 package sync
														
 
															 import "core:sys/unix"
														
 
															-import "core:intrinsics"
														
 
															 current_thread_id :: proc "contextless" () -> int {
														
 
															-	SYS_GETTID :: 186
														
 
															-	return int(intrinsics.syscall(SYS_GETTID))
														
 
															+	return unix.sys_gettid()
														
 
															 }
														
--- a/core/sys/unix/syscalls_linux.odin
+++ b/core/sys/unix/syscalls_linux.odin
@@ -0,0 +1,60 @@
 
															+package unix
														
 
															+
														
 
															+import "core:intrinsics"
														
 
															+
														
 
															+// Linux has inconsistent system call numbering across architectures,
														
 
															+// for largely historical reasons.  This attempts to provide a unified
														
 
															+// Odin-side interface for system calls that are required for the core
														
 
															+// library to work.
														
 
															+
														
 
															+// For authorative system call numbers, the following files in the kernel
														
 
															+// source can be used:
														
 
															+//
														
 
															+//  amd64: arch/x86/entry/syscalls/syscall_64.tbl
														
 
															+//  arm64: include/uapi/asm-generic/unistd.h
														
 
															+//  386: arch/x86/entry/syscalls/sycall_32.tbl
														
 
															+//  arm: arch/arm/tools/syscall.tbl
														
 
															+
														
 
															+when ODIN_ARCH == "amd64" {
														
 
															+	SYS_mmap : uintptr : 9
														
 
															+	SYS_mprotect : uintptr : 10
														
 
															+	SYS_munmap : uintptr : 11
														
 
															+	SYS_madvise : uintptr : 28
														
 
															+	SYS_futex : uintptr : 202
														
 
															+	SYS_gettid : uintptr : 186
														
 
															+	SYS_getrandom : uintptr : 318
														
 
															+} else when ODIN_ARCH == "arm64" {
														
 
															+	SYS_mmap : uintptr : 222
														
 
															+	SYS_mprotect : uintptr : 226
														
 
															+	SYS_munmap : uintptr : 215
														
 
															+	SYS_madvise : uintptr : 233
														
 
															+	SYS_futex : uintptr : 98
														
 
															+	SYS_gettid : uintptr : 178
														
 
															+	SYS_getrandom : uintptr : 278
														
 
															+} else when ODIN_ARCH == "386" {
														
 
															+	SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
														
 
															+	SYS_mprotect : uintptr : 125
														
 
															+	SYS_munmap : uintptr : 91
														
 
															+	SYS_madvise : uintptr : 219
														
 
															+	SYS_futex : uintptr : 240
														
 
															+	SYS_gettid : uintptr : 224
														
 
															+	SYS_getrandom : uintptr : 355
														
 
															+} else when ODIN_ARCH == "arm" {
														
 
															+	SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
														
 
															+	SYS_mprotect : uintptr : 125
														
 
															+	SYS_munmap: uintptr : 91
														
 
															+	SYS_madvise: uintptr : 220
														
 
															+	SYS_futex : uintptr : 240
														
 
															+	SYS_gettid : uintptr: 224
														
 
															+	SYS_getrandom : uintptr : 384
														
 
															+} else {
														
 
															+	#panic("Unsupported architecture")
														
 
															+}
														
 
															+
														
 
															+sys_gettid :: proc "contextless" () -> int {
														
 
															+	return cast(int)intrinsics.syscall(SYS_gettid)
														
 
															+}
														
 
															+
														
 
															+sys_getrandom :: proc "contextless" (buf: ^byte, buflen: int, flags: uint) -> int {
														
 
															+	return cast(int)intrinsics.syscall(SYS_getrandom, buf, cast(uintptr)(buflen), cast(uintptr)(flags))
														
 
															+}
														
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -545,8 +545,8 @@ void usage(String argv0) {
 
															 	print_usage_line(1, "version   print version");
														
 
															 	print_usage_line(1, "report    print information useful to reporting a bug");
														
 
															 	print_usage_line(0, "");
														
 
															-	print_usage_line(0, "For more information of flags, apply the flag to see what is possible");
														
 
															-	print_usage_line(1, "-help");
														
 
															+	print_usage_line(0, "For further details on a command, use -help after the command name");
														
 
															+	print_usage_line(1, "e.g. odin build -help");
														
 
															 }
														
--- a/tests/core/crypto/test_core_crypto.odin
+++ b/tests/core/crypto/test_core_crypto.odin
@@ -115,6 +115,15 @@ main :: proc() {
 
															     test_haval_224(&t)
														
 
															     test_haval_256(&t)
														
 
															+    // "modern" crypto tests
														
 
															+    test_chacha20(&t)
														
 
															+    test_poly1305(&t)
														
 
															+    test_chacha20poly1305(&t)
														
 
															+    test_x25519(&t)
														
 
															+    test_rand_bytes(&t)
														
 
															+
														
 
															+    bench_modern(&t)
														
 
															+
														
 
															     fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
														
 
															 }
														
--- a/tests/core/crypto/test_core_crypto_modern.odin
+++ b/tests/core/crypto/test_core_crypto_modern.odin
@@ -0,0 +1,535 @@
 
															+package test_core_crypto
														
 
															+
														
 
															+import "core:testing"
														
 
															+import "core:fmt"
														
 
															+import "core:mem"
														
 
															+import "core:time"
														
 
															+import "core:crypto"
														
 
															+
														
 
															+import "core:crypto/chacha20"
														
 
															+import "core:crypto/chacha20poly1305"
														
 
															+import "core:crypto/poly1305"
														
 
															+import "core:crypto/x25519"
														
 
															+
														
 
															+_digit_value :: proc(r: rune) -> int {
														
 
															+	ri := int(r)
														
 
															+	v: int = 16
														
 
															+	switch r {
														
 
															+	case '0'..='9': v = ri-'0'
														
 
															+	case 'a'..='z': v = ri-'a'+10
														
 
															+	case 'A'..='Z': v = ri-'A'+10
														
 
															+	}
														
 
															+	return v
														
 
															+}
														
 
															+
														
 
															+_decode_hex32 :: proc(s: string) -> [32]byte{
														
 
															+	b: [32]byte
														
 
															+	for i := 0; i < len(s); i = i + 2 {
														
 
															+		hi := _digit_value(rune(s[i]))
														
 
															+		lo := _digit_value(rune(s[i+1]))
														
 
															+		b[i/2] = byte(hi << 4 | lo)
														
 
															+	}
														
 
															+	return b
														
 
															+}
														
 
															+
														
 
															+_PLAINTEXT_SUNSCREEN_STR := "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."
														
 
															+
														
 
															+@(test)
														
 
															+test_chacha20 :: proc(t: ^testing.T) {
														
 
															+	log(t, "Testing (X)ChaCha20")
														
 
															+
														
 
															+	// Test cases taken from RFC 8439, and draft-irtf-cfrg-xchacha-03
														
 
															+	plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
														
 
															+
														
 
															+	key := [chacha20.KEY_SIZE]byte{
														
 
															+		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
														
 
															+		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
														
 
															+		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
														
 
															+		0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
														
 
															+	}
														
 
															+
														
 
															+	nonce := [chacha20.NONCE_SIZE]byte{
														
 
															+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a,
														
 
															+		0x00, 0x00, 0x00, 0x00,
														
 
															+	}
														
 
															+
														
 
															+	ciphertext := [114]byte{
														
 
															+		0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80,
														
 
															+		0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81,
														
 
															+		0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2,
														
 
															+		0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b,
														
 
															+		0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab,
														
 
															+		0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57,
														
 
															+		0x16, 0x39, 0xd6, 0x24, 0xe6, 0x51, 0x52, 0xab,
														
 
															+		0x8f, 0x53, 0x0c, 0x35, 0x9f, 0x08, 0x61, 0xd8,
														
 
															+		0x07, 0xca, 0x0d, 0xbf, 0x50, 0x0d, 0x6a, 0x61,
														
 
															+		0x56, 0xa3, 0x8e, 0x08, 0x8a, 0x22, 0xb6, 0x5e,
														
 
															+		0x52, 0xbc, 0x51, 0x4d, 0x16, 0xcc, 0xf8, 0x06,
														
 
															+		0x81, 0x8c, 0xe9, 0x1a, 0xb7, 0x79, 0x37, 0x36,
														
 
															+		0x5a, 0xf9, 0x0b, 0xbf, 0x74, 0xa3, 0x5b, 0xe6,
														
 
															+		0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
														
 
															+		0x87, 0x4d,
														
 
															+	}
														
 
															+	ciphertext_str := hex_string(ciphertext[:])
														
 
															+
														
 
															+	derived_ciphertext: [114]byte
														
 
															+	ctx: chacha20.Context = ---
														
 
															+	chacha20.init(&ctx, key[:], nonce[:])
														
 
															+	chacha20.seek(&ctx, 1) // The test vectors start the counter at 1.
														
 
															+	chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
														
 
															+
														
 
															+	derived_ciphertext_str := hex_string(derived_ciphertext[:])
														
 
															+	expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", ciphertext_str, derived_ciphertext_str))
														
 
															+
														
 
															+	xkey := [chacha20.KEY_SIZE]byte{
														
 
															+		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
														
 
															+		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
														
 
															+		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
														
 
															+		0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
														
 
															+	}
														
 
															+
														
 
															+	xnonce := [chacha20.XNONCE_SIZE]byte{
														
 
															+		0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
														
 
															+		0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
														
 
															+		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
														
 
															+	}
														
 
															+
														
 
															+	xciphertext := [114]byte{
														
 
															+		0xbd, 0x6d, 0x17, 0x9d, 0x3e, 0x83, 0xd4, 0x3b,
														
 
															+		0x95, 0x76, 0x57, 0x94, 0x93, 0xc0, 0xe9, 0x39,
														
 
															+		0x57, 0x2a, 0x17, 0x00, 0x25, 0x2b, 0xfa, 0xcc,
														
 
															+		0xbe, 0xd2, 0x90, 0x2c, 0x21, 0x39, 0x6c, 0xbb,
														
 
															+		0x73, 0x1c, 0x7f, 0x1b, 0x0b, 0x4a, 0xa6, 0x44,
														
 
															+		0x0b, 0xf3, 0xa8, 0x2f, 0x4e, 0xda, 0x7e, 0x39,
														
 
															+		0xae, 0x64, 0xc6, 0x70, 0x8c, 0x54, 0xc2, 0x16,
														
 
															+		0xcb, 0x96, 0xb7, 0x2e, 0x12, 0x13, 0xb4, 0x52,
														
 
															+		0x2f, 0x8c, 0x9b, 0xa4, 0x0d, 0xb5, 0xd9, 0x45,
														
 
															+		0xb1, 0x1b, 0x69, 0xb9, 0x82, 0xc1, 0xbb, 0x9e,
														
 
															+		0x3f, 0x3f, 0xac, 0x2b, 0xc3, 0x69, 0x48, 0x8f,
														
 
															+		0x76, 0xb2, 0x38, 0x35, 0x65, 0xd3, 0xff, 0xf9,
														
 
															+		0x21, 0xf9, 0x66, 0x4c, 0x97, 0x63, 0x7d, 0xa9,
														
 
															+		0x76, 0x88, 0x12, 0xf6, 0x15, 0xc6, 0x8b, 0x13,
														
 
															+		0xb5, 0x2e,
														
 
															+	}
														
 
															+	xciphertext_str := hex_string(xciphertext[:])
														
 
															+
														
 
															+	chacha20.init(&ctx, xkey[:], xnonce[:])
														
 
															+	chacha20.seek(&ctx, 1)
														
 
															+	chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
														
 
															+
														
 
															+	derived_ciphertext_str = hex_string(derived_ciphertext[:])
														
 
															+	expect(t, derived_ciphertext_str == xciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", xciphertext_str, derived_ciphertext_str))
														
 
															+}
														
 
															+
														
 
															+@(test)
														
 
															+test_poly1305 :: proc(t: ^testing.T) {
														
 
															+	log(t, "Testing poly1305")
														
 
															+
														
 
															+	// Test cases taken from poly1305-donna.
														
 
															+	key := [poly1305.KEY_SIZE]byte{
														
 
															+		0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91,
														
 
															+		0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25,
														
 
															+		0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65,
														
 
															+		0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80,
														
 
															+	}
														
 
															+
														
 
															+	msg := [131]byte{
														
 
															+		0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73,
														
 
															+		0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce,
														
 
															+		0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4,
														
 
															+		0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a,
														
 
															+		0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b,
														
 
															+		0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72,
														
 
															+		0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2,
														
 
															+		0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38,
														
 
															+		0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a,
														
 
															+		0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae,
														
 
															+		0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea,
														
 
															+		0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda,
														
 
															+		0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde,
														
 
															+		0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3,
														
 
															+		0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6,
														
 
															+		0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74,
														
 
															+		0xe3,0x55,0xa5,
														
 
															+	}
														
 
															+
														
 
															+	tag := [poly1305.TAG_SIZE]byte{
														
 
															+		0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5,
														
 
															+		0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9,
														
 
															+	}
														
 
															+	tag_str := hex_string(tag[:])
														
 
															+
														
 
															+	// Verify - oneshot + compare
														
 
															+	ok := poly1305.verify(tag[:], msg[:], key[:])
														
 
															+	expect(t, ok, "oneshot verify call failed")
														
 
															+
														
 
															+	// Sum - oneshot
														
 
															+	derived_tag: [poly1305.TAG_SIZE]byte
														
 
															+	poly1305.sum(derived_tag[:], msg[:], key[:])
														
 
															+	derived_tag_str := hex_string(derived_tag[:])
														
 
															+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for sum(msg, key), but got %s instead", tag_str, derived_tag_str))
														
 
															+
														
 
															+	// Incremental
														
 
															+	mem.zero(&derived_tag, size_of(derived_tag))
														
 
															+	ctx: poly1305.Context = ---
														
 
															+	poly1305.init(&ctx, key[:])
														
 
															+	read_lengths := [11]int{32, 64, 16, 8, 4, 2, 1, 1, 1, 1, 1}
														
 
															+	off := 0
														
 
															+	for read_length in read_lengths {
														
 
															+		to_read := msg[off:off+read_length]
														
 
															+		poly1305.update(&ctx, to_read)
														
 
															+		off = off + read_length
														
 
															+	}
														
 
															+	poly1305.final(&ctx, derived_tag[:])
														
 
															+	derived_tag_str = hex_string(derived_tag[:])
														
 
															+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for init/update/final - incremental, but got %s instead", tag_str, derived_tag_str))
														
 
															+}
														
 
															+
														
 
															+@(test)
														
 
															+test_chacha20poly1305 :: proc(t: ^testing.T) {
														
 
															+	log(t, "Testing chacha20poly1205")
														
 
															+
														
 
															+	plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
														
 
															+
														
 
															+	aad := [12]byte{
														
 
															+		0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
														
 
															+		0xc4, 0xc5, 0xc6, 0xc7,
														
 
															+	}
														
 
															+
														
 
															+	key := [chacha20poly1305.KEY_SIZE]byte{
														
 
															+		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
														
 
															+		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
														
 
															+		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
														
 
															+		0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
														
 
															+	}
														
 
															+
														
 
															+	nonce := [chacha20poly1305.NONCE_SIZE]byte{
														
 
															+		0x07, 0x00, 0x00, 0x00,
														
 
															+		0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
														
 
															+	}
														
 
															+
														
 
															+	ciphertext := [114]byte{
														
 
															+		0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
														
 
															+		0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
														
 
															+		0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
														
 
															+		0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
														
 
															+		0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
														
 
															+		0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
														
 
															+		0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
														
 
															+		0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
														
 
															+		0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
														
 
															+		0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
														
 
															+		0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
														
 
															+		0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
														
 
															+		0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
														
 
															+		0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
														
 
															+		0x61, 0x16,
														
 
															+	}
														
 
															+	ciphertext_str := hex_string(ciphertext[:])
														
 
															+
														
 
															+	tag := [chacha20poly1305.TAG_SIZE]byte{
														
 
															+		0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a,
														
 
															+		0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91,
														
 
															+	}
														
 
															+	tag_str := hex_string(tag[:])
														
 
															+
														
 
															+	derived_tag: [chacha20poly1305.TAG_SIZE]byte
														
 
															+	derived_ciphertext: [114]byte
														
 
															+
														
 
															+	chacha20poly1305.encrypt(derived_ciphertext[:], derived_tag[:], key[:], nonce[:], aad[:], plaintext)
														
 
															+
														
 
															+	derived_ciphertext_str := hex_string(derived_ciphertext[:])
														
 
															+	expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected ciphertext %s for encrypt(aad, plaintext), but got %s instead", ciphertext_str, derived_ciphertext_str))
														
 
															+
														
 
															+	derived_tag_str := hex_string(derived_tag[:])
														
 
															+	expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected tag %s for encrypt(aad, plaintext), but got %s instead", tag_str, derived_tag_str))
														
 
															+
														
 
															+	derived_plaintext: [114]byte
														
 
															+	ok := chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
														
 
															+	derived_plaintext_str := string(derived_plaintext[:])
														
 
															+	expect(t, ok, "Expected true for decrypt(tag, aad, ciphertext)")
														
 
															+	expect(t, derived_plaintext_str == _PLAINTEXT_SUNSCREEN_STR, fmt.tprintf("Expected plaintext %s for decrypt(tag, aad, ciphertext), but got %s instead", _PLAINTEXT_SUNSCREEN_STR, derived_plaintext_str))
														
 
															+
														
 
															+	derived_ciphertext[0] ~= 0xa5
														
 
															+	ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], derived_ciphertext[:])
														
 
															+	expect(t, !ok, "Expected false for decrypt(tag, aad, corrupted_ciphertext)")
														
 
															+
														
 
															+	aad[0] ~= 0xa5
														
 
															+	ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
														
 
															+	expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
														
 
															+}
														
 
															+
														
 
															+TestECDH :: struct {
														
 
															+	scalar:  string,
														
 
															+	point:   string,
														
 
															+	product: string,
														
 
															+}
														
 
															+
														
 
															+@(test)
														
 
															+test_x25519 :: proc(t: ^testing.T) {
														
 
															+	log(t, "Testing X25519")
														
 
															+
														
 
															+	test_vectors := [?]TestECDH {
														
 
															+		// Test vectors from RFC 7748
														
 
															+		TestECDH{
														
 
															+			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
														
 
															+			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
														
 
															+			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
														
 
															+		},
														
 
															+		TestECDH{
														
 
															+			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
														
 
															+			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
														
 
															+			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
														
 
															+		},
														
 
															+	}
														
 
															+	for v, _ in test_vectors {
														
 
															+		scalar := _decode_hex32(v.scalar)
														
 
															+		point := _decode_hex32(v.point)
														
 
															+
														
 
															+		derived_point: [x25519.POINT_SIZE]byte
														
 
															+		x25519.scalarmult(derived_point[:], scalar[:], point[:])
														
 
															+		derived_point_str := hex_string(derived_point[:])
														
 
															+
														
 
															+		expect(t, derived_point_str == v.product, fmt.tprintf("Expected %s for %s * %s, but got %s instead", v.product, v.scalar, v.point, derived_point_str))
														
 
															+
														
 
															+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
														
 
															+		p1, p2: [x25519.POINT_SIZE]byte
														
 
															+		x25519.scalarmult_basepoint(p1[:], scalar[:])
														
 
															+		x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:])
														
 
															+		p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:])
														
 
															+		expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str))
														
 
															+	}
														
 
															+
														
 
															+    // TODO/tests: Run the wycheproof test vectors, once I figure out
														
 
															+    // how to work with JSON.
														
 
															+}
														
 
															+
														
 
															+@(test)
														
 
															+test_rand_bytes :: proc(t: ^testing.T) {
														
 
															+	log(t, "Testing rand_bytes")
														
 
															+
														
 
															+	if ODIN_OS != "linux" {
														
 
															+		log(t, "rand_bytes not supported - skipping")
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	allocator := context.allocator
														
 
															+
														
 
															+	buf := make([]byte, 1 << 25, allocator)
														
 
															+	defer delete(buf)
														
 
															+
														
 
															+	// Testing a CSPRNG for correctness is incredibly involved and
														
 
															+	// beyond the scope of an implementation that offloads
														
 
															+	// responsibility for correctness to the OS.
														
 
															+	//
														
 
															+	// Just attempt to randomize a sufficiently large buffer, where
														
 
															+	// sufficiently large is:
														
 
															+	//  * Larger than the maximum getentropy request size (256 bytes).
														
 
															+	//  * Larger than the maximum getrandom request size (2^25 - 1 bytes).
														
 
															+	//
														
 
															+	// While theoretically non-deterministic, if this fails, chances
														
 
															+	// are the CSPRNG is busted.
														
 
															+	seems_ok := false
														
 
															+	for i := 0; i < 256; i = i + 1 {
														
 
															+		mem.zero_explicit(raw_data(buf), len(buf))
														
 
															+		crypto.rand_bytes(buf)
														
 
															+
														
 
															+		if buf[0] != 0 && buf[len(buf)-1] != 0 {
														
 
															+			seems_ok = true
														
 
															+			break
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	expect(t, seems_ok, "Expected to randomize the head and tail of the buffer within a handful of attempts")
														
 
															+}
														
 
															+
														
 
															+@(test)
														
 
															+bench_modern :: proc(t: ^testing.T) {
														
 
															+	fmt.println("Starting benchmarks:")
														
 
															+
														
 
															+	bench_chacha20(t)
														
 
															+	bench_poly1305(t)
														
 
															+	bench_chacha20poly1305(t)
														
 
															+	bench_x25519(t)
														
 
															+}
														
 
															+
														
 
															+_setup_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
														
 
															+	assert(options != nil)
														
 
															+
														
 
															+	options.input = make([]u8, options.bytes, allocator)
														
 
															+	return nil if len(options.input) == options.bytes else .Allocation_Error
														
 
															+}
														
 
															+
														
 
															+_teardown_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
														
 
															+	assert(options != nil)
														
 
															+
														
 
															+	delete(options.input)
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+_benchmark_chacha20 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
														
 
															+	buf := options.input
														
 
															+	key := [chacha20.KEY_SIZE]byte{
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+	}
														
 
															+	nonce := [chacha20.NONCE_SIZE]byte{
														
 
															+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
														
 
															+		0x00, 0x00, 0x00, 0x00,
														
 
															+	}
														
 
															+
														
 
															+	ctx: chacha20.Context = ---
														
 
															+	chacha20.init(&ctx, key[:], nonce[:])
														
 
															+
														
 
															+	for _ in 0..=options.rounds {
														
 
															+		chacha20.xor_bytes(&ctx, buf, buf)
														
 
															+	}
														
 
															+	options.count     = options.rounds
														
 
															+	options.processed = options.rounds * options.bytes
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+_benchmark_poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
														
 
															+	buf := options.input
														
 
															+	key := [poly1305.KEY_SIZE]byte{
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+	}
														
 
															+
														
 
															+	tag: [poly1305.TAG_SIZE]byte = ---
														
 
															+	for _ in 0..=options.rounds {
														
 
															+		poly1305.sum(tag[:], buf, key[:])
														
 
															+	}
														
 
															+	options.count     = options.rounds
														
 
															+	options.processed = options.rounds * options.bytes
														
 
															+	//options.hash      = u128(h)
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+_benchmark_chacha20poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
														
 
															+	buf := options.input
														
 
															+	key := [chacha20.KEY_SIZE]byte{
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
														
 
															+	}
														
 
															+	nonce := [chacha20.NONCE_SIZE]byte{
														
 
															+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
														
 
															+		0x00, 0x00, 0x00, 0x00,
														
 
															+	}
														
 
															+
														
 
															+	tag: [chacha20poly1305.TAG_SIZE]byte = ---
														
 
															+
														
 
															+	for _ in 0..=options.rounds {
														
 
															+		chacha20poly1305.encrypt(buf,tag[:], key[:], nonce[:], nil, buf)
														
 
															+	}
														
 
															+	options.count     = options.rounds
														
 
															+	options.processed = options.rounds * options.bytes
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) {
														
 
															+	fmt.printf("\t[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
														
 
															+		name,
														
 
															+		options.rounds,
														
 
															+		options.processed,
														
 
															+		time.duration_nanoseconds(options.duration),
														
 
															+		options.rounds_per_second,
														
 
															+		options.megabytes_per_second,
														
 
															+	)
														
 
															+}
														
 
															+
														
 
															+bench_chacha20 :: proc(t: ^testing.T) {
														
 
															+	name    := "ChaCha20 64 bytes"
														
 
															+	options := &time.Benchmark_Options{
														
 
															+		rounds   = 1_000,
														
 
															+		bytes    = 64,
														
 
															+		setup    = _setup_sized_buf,
														
 
															+		bench    = _benchmark_chacha20,
														
 
															+		teardown = _teardown_sized_buf,
														
 
															+	}
														
 
															+
														
 
															+	err  := time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+
														
 
															+	name = "ChaCha20 1024 bytes"
														
 
															+	options.bytes = 1024
														
 
															+	err = time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+
														
 
															+	name = "ChaCha20 65536 bytes"
														
 
															+	options.bytes = 65536
														
 
															+	err = time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+}
														
 
															+
														
 
															+bench_poly1305 :: proc(t: ^testing.T) {
														
 
															+	name    := "Poly1305 64 zero bytes"
														
 
															+	options := &time.Benchmark_Options{
														
 
															+		rounds   = 1_000,
														
 
															+		bytes    = 64,
														
 
															+		setup    = _setup_sized_buf,
														
 
															+		bench    = _benchmark_poly1305,
														
 
															+		teardown = _teardown_sized_buf,
														
 
															+	}
														
 
															+
														
 
															+	err  := time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+
														
 
															+	name = "Poly1305 1024 zero bytes"
														
 
															+	options.bytes = 1024
														
 
															+	err = time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+}
														
 
															+
														
 
															+bench_chacha20poly1305 :: proc(t: ^testing.T) {
														
 
															+	name    := "chacha20poly1305 64 bytes"
														
 
															+	options := &time.Benchmark_Options{
														
 
															+		rounds   = 1_000,
														
 
															+		bytes    = 64,
														
 
															+		setup    = _setup_sized_buf,
														
 
															+		bench    = _benchmark_chacha20poly1305,
														
 
															+		teardown = _teardown_sized_buf,
														
 
															+	}
														
 
															+
														
 
															+	err  := time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+
														
 
															+	name = "chacha20poly1305 1024 bytes"
														
 
															+	options.bytes = 1024
														
 
															+	err = time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+
														
 
															+	name = "chacha20poly1305 65536 bytes"
														
 
															+	options.bytes = 65536
														
 
															+	err = time.benchmark(options, context.allocator)
														
 
															+	expect(t, err == nil, name)
														
 
															+	benchmark_print(name, options)
														
 
															+}
														
 
															+
														
 
															+bench_x25519 :: proc(t: ^testing.T) {
														
 
															+	point := _decode_hex32("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
														
 
															+	scalar := _decode_hex32("cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe")
														
 
															+	out: [x25519.POINT_SIZE]byte = ---
														
 
															+
														
 
															+	iters :: 10000
														
 
															+	start := time.now()
														
 
															+	for i := 0; i < iters; i = i + 1 {
														
 
															+		x25519.scalarmult(out[:], scalar[:], point[:])
														
 
															+	}
														
 
															+	elapsed := time.since(start)
														
 
															+
														
 
															+	log(t, fmt.tprintf("x25519.scalarmult: ~%f us/op", time.duration_microseconds(elapsed) / iters))
														
 
															+}