3 năm trước cách đây · 1a7a6a9116
--- a/core/crypto/_fiat/README.md
+++ b/core/crypto/_fiat/README.md
@@ -0,0 +1,35 @@
 
				+# fiat
			
 
				+
			
 
				+This package contains low level arithmetic required to implement certain
			
 
				+cryptographic primitives, ported from the [fiat-crypto project][1]
			
 
				+along with some higher-level helpers.
			
 
				+
			
 
				+## Notes
			
 
				+
			
 
				+fiat-crypto gives the choice of 3 licenses for derived works.  The 1-Clause
			
 
				+BSD license is chosen as it is compatible with Odin's existing licensing.
			
 
				+
			
 
				+The routines are intended to be timing-safe, as long as the underlying
			
 
				+integer arithmetic is constant time.  This is true on most systems commonly
			
 
				+used today, with the notable exception of WASM.
			
 
				+
			
 
				+While fiat-crypto provides both output targeting both 32-bit and 64-bit
			
 
				+architectures, only the 64-bit versions were used, as 32-bit architectures
			
 
				+are becoming increasingly uncommon and irrelevant.
			
 
				+
			
 
				+With the current Odin syntax, the Go output is trivially ported in most
			
 
				+cases and was used as the basis of the port.
			
 
				+
			
 
				+In the future, it would be better to auto-generate Odin either directly
			
 
				+by adding an appropriate code-gen backend written in Coq, or perhaps by
			
 
				+parsing the JSON output.
			
 
				+
			
 
				+As this is a port rather than autogenerated output, none of fiat-crypto's
			
 
				+formal verification guarantees apply, unless it is possible to prove binary
			
 
				+equivalence.
			
 
				+
			
 
				+For the most part, alterations to the base fiat-crypto generated code was
			
 
				+kept to a minimum, to aid auditability.  This results in a somewhat
			
 
				+ideosyncratic style, and in some cases minor performance penalties.
			
 
				+
			
 
				+[1]: https://github.com/mit-plv/fiat-crypto
			
--- a/core/crypto/_fiat/fiat.odin
+++ b/core/crypto/_fiat/fiat.odin
@@ -0,0 +1,24 @@
 
				+package fiat
			
 
				+
			
 
				+// This package provides various helpers and types common to all of the
			
 
				+// fiat-crypto derived backends.
			
 
				+
			
 
				+// This code only works on a two's complement system.
			
 
				+#assert((-1 & 3) == 3)
			
 
				+
			
 
				+u1 :: distinct u8
			
 
				+i1 :: distinct i8
			
 
				+
			
 
				+cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
			
 
				+	x1 := (u64(arg1) * 0xffffffffffffffff)
			
 
				+	x2 := ((x1 & arg3) | ((~x1) & arg2))
			
 
				+	out1 = x2
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
			
 
				+	x1 := (u32(arg1) * 0xffffffff)
			
 
				+	x2 := ((x1 & arg3) | ((~x1) & arg2))
			
 
				+	out1 = x2
			
 
				+	return
			
 
				+}
			
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -0,0 +1,138 @@
 
				+package field_curve25519
			
 
				+
			
 
				+import "core:crypto"
			
 
				+import "core:mem"
			
 
				+
			
 
				+fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
			
 
				+	return transmute(^Loose_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
			
 
				+	return transmute(^Tight_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
			
 
				+	// Ignore the unused bit by copying the input and masking the bit off
			
 
				+	// prior to deserialization.
			
 
				+	tmp1: [32]byte = ---
			
 
				+	copy_slice(tmp1[:], arg1[:])
			
 
				+	tmp1[31] &= 127
			
 
				+
			
 
				+	_fe_from_bytes(out1, &tmp1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+}
			
 
				+
			
 
				+fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
			
 
				+	tmp2: [32]byte = ---
			
 
				+
			
 
				+	fe_to_bytes(&tmp2, arg2)
			
 
				+	ret := fe_equal_bytes(arg1, &tmp2)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp2, size_of(tmp2))
			
 
				+
			
 
				+	return ret
			
 
				+}
			
 
				+
			
 
				+fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byte) -> int {
			
 
				+	tmp1: [32]byte = ---
			
 
				+
			
 
				+	fe_to_bytes(&tmp1, arg1)
			
 
				+
			
 
				+	ret := crypto.compare_constant_time(tmp1[:], arg2[:])
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+
			
 
				+	return ret
			
 
				+}
			
 
				+
			
 
				+fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
			
 
				+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
			
 
				+	if arg2 == 0 {
			
 
				+		fe_one(out1)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	fe_carry_square(out1, arg1)
			
 
				+	for _ in 1..<arg2 {
			
 
				+		fe_carry_square(out1, fe_relax_cast(out1))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	fe_opp(fe_relax_cast(out1), arg1)
			
 
				+	fe_carry(out1, fe_relax_cast(out1))
			
 
				+}
			
 
				+
			
 
				+fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
			
 
				+	// Inverse square root taken from Monocypher.
			
 
				+
			
 
				+	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
			
 
				+
			
 
				+	// t0 = x^((p-5)/8)
			
 
				+	// Can be achieved with a simple double & add ladder,
			
 
				+	// but it would be slower.
			
 
				+	fe_carry_pow2k(&tmp1, arg1, 1)
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
			
 
				+	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 5)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 10)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 20)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 10)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 50)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 100)
			
 
				+	fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
			
 
				+	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
			
 
				+	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
			
 
				+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
			
 
				+
			
 
				+	// quartic = x^((p-1)/4)
			
 
				+	quartic := &tmp2
			
 
				+	fe_carry_square(quartic, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
			
 
				+
			
 
				+	// Serialize quartic once to save on repeated serialization/sanitization.
			
 
				+	quartic_buf: [32]byte = ---
			
 
				+	fe_to_bytes(&quartic_buf, quartic)
			
 
				+	check := &tmp3
			
 
				+
			
 
				+	fe_one(check)
			
 
				+	p1 := fe_equal_bytes(check, &quartic_buf)
			
 
				+	fe_carry_opp(check, check)
			
 
				+	m1 := fe_equal_bytes(check, &quartic_buf)
			
 
				+	fe_carry_opp(check, &SQRT_M1)
			
 
				+	ms := fe_equal_bytes(check, &quartic_buf)
			
 
				+
			
 
				+	// if quartic == -1 or sqrt(-1)
			
 
				+	// then  isr = x^((p-1)/4) * sqrt(-1)
			
 
				+	// else  isr = x^((p-1)/4)
			
 
				+	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
			
 
				+	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+	mem.zero_explicit(&tmp2, size_of(tmp2))
			
 
				+	mem.zero_explicit(&tmp3, size_of(tmp3))
			
 
				+	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
			
 
				+
			
 
				+	return p1 | m1
			
 
				+}
			
 
				+
			
 
				+fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	tmp1: Tight_Field_Element
			
 
				+
			
 
				+	fe_carry_square(&tmp1, arg1)
			
 
				+	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
			
 
				+	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
			
 
				+
			
 
				+	mem.zero_explicit(&tmp1, size_of(tmp1))
			
 
				+}
			
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -0,0 +1,616 @@
 
				+// The BSD 1-Clause License (BSD-1-Clause)
			
 
				+//
			
 
				+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+//     1. Redistributions of source code must retain the above copyright
			
 
				+//        notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
			
 
				+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
			
 
				+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
			
 
				+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package field_curve25519
			
 
				+
			
 
				+// The file provides arithmetic on the field Z/(2^255-19) using
			
 
				+// unsaturated 64-bit integer arithmetic.  It is derived primarily
			
 
				+// from the machine generated Golang output from the fiat-crypto project.
			
 
				+//
			
 
				+// While the base implementation is provably correct, this implementation
			
 
				+// makes no such claims as the port and optimizations were done by hand.
			
 
				+// At some point, it may be worth adding support to fiat-crypto for
			
 
				+// generating Odin output.
			
 
				+//
			
 
				+// TODO:
			
 
				+//  * When fiat-crypto supports it, using a saturated 64-bit limbs
			
 
				+//    instead of 51-bit limbs will be faster, though the gains are
			
 
				+//    minimal unless adcx/adox/mulx are used.
			
 
				+
			
 
				+import fiat "core:crypto/_fiat"
			
 
				+import "core:math/bits"
			
 
				+
			
 
				+Loose_Field_Element :: distinct [5]u64
			
 
				+Tight_Field_Element :: distinct [5]u64
			
 
				+
			
 
				+SQRT_M1 := Tight_Field_Element{
			
 
				+	1718705420411056,
			
 
				+	234908883556509,
			
 
				+	2233514472574048,
			
 
				+	2117202627021982,
			
 
				+	765476049583133,
			
 
				+}
			
 
				+
			
 
				+_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((u64(arg1) + arg2) + arg3)
			
 
				+	x2 := (x1 & 0x7ffffffffffff)
			
 
				+	x3 := fiat.u1((x1 >> 51))
			
 
				+	out1 = x2
			
 
				+	out2 = x3
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
			
 
				+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
			
 
				+	x2 := fiat.i1((x1 >> 51))
			
 
				+	x3 := (u64(x1) & 0x7ffffffffffff)
			
 
				+	out1 = x3
			
 
				+	out2 = (0x0 - fiat.u1(x2))
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
			
 
				+	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
			
 
				+	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
			
 
				+	x8, x7 := bits.mul_u64(arg1[4], (arg2[1] * 0x13))
			
 
				+	x10, x9 := bits.mul_u64(arg1[3], (arg2[4] * 0x13))
			
 
				+	x12, x11 := bits.mul_u64(arg1[3], (arg2[3] * 0x13))
			
 
				+	x14, x13 := bits.mul_u64(arg1[3], (arg2[2] * 0x13))
			
 
				+	x16, x15 := bits.mul_u64(arg1[2], (arg2[4] * 0x13))
			
 
				+	x18, x17 := bits.mul_u64(arg1[2], (arg2[3] * 0x13))
			
 
				+	x20, x19 := bits.mul_u64(arg1[1], (arg2[4] * 0x13))
			
 
				+	x22, x21 := bits.mul_u64(arg1[4], arg2[0])
			
 
				+	x24, x23 := bits.mul_u64(arg1[3], arg2[1])
			
 
				+	x26, x25 := bits.mul_u64(arg1[3], arg2[0])
			
 
				+	x28, x27 := bits.mul_u64(arg1[2], arg2[2])
			
 
				+	x30, x29 := bits.mul_u64(arg1[2], arg2[1])
			
 
				+	x32, x31 := bits.mul_u64(arg1[2], arg2[0])
			
 
				+	x34, x33 := bits.mul_u64(arg1[1], arg2[3])
			
 
				+	x36, x35 := bits.mul_u64(arg1[1], arg2[2])
			
 
				+	x38, x37 := bits.mul_u64(arg1[1], arg2[1])
			
 
				+	x40, x39 := bits.mul_u64(arg1[1], arg2[0])
			
 
				+	x42, x41 := bits.mul_u64(arg1[0], arg2[4])
			
 
				+	x44, x43 := bits.mul_u64(arg1[0], arg2[3])
			
 
				+	x46, x45 := bits.mul_u64(arg1[0], arg2[2])
			
 
				+	x48, x47 := bits.mul_u64(arg1[0], arg2[1])
			
 
				+	x50, x49 := bits.mul_u64(arg1[0], arg2[0])
			
 
				+	x51, x52 := bits.add_u64(x13, x7, u64(0x0))
			
 
				+	x53, _ := bits.add_u64(x14, x8, u64(fiat.u1(x52)))
			
 
				+	x55, x56 := bits.add_u64(x17, x51, u64(0x0))
			
 
				+	x57, _ := bits.add_u64(x18, x53, u64(fiat.u1(x56)))
			
 
				+	x59, x60 := bits.add_u64(x19, x55, u64(0x0))
			
 
				+	x61, _ := bits.add_u64(x20, x57, u64(fiat.u1(x60)))
			
 
				+	x63, x64 := bits.add_u64(x49, x59, u64(0x0))
			
 
				+	x65, _ := bits.add_u64(x50, x61, u64(fiat.u1(x64)))
			
 
				+	x67 := ((x63 >> 51) | ((x65 << 13) & 0xffffffffffffffff))
			
 
				+	x68 := (x63 & 0x7ffffffffffff)
			
 
				+	x69, x70 := bits.add_u64(x23, x21, u64(0x0))
			
 
				+	x71, _ := bits.add_u64(x24, x22, u64(fiat.u1(x70)))
			
 
				+	x73, x74 := bits.add_u64(x27, x69, u64(0x0))
			
 
				+	x75, _ := bits.add_u64(x28, x71, u64(fiat.u1(x74)))
			
 
				+	x77, x78 := bits.add_u64(x33, x73, u64(0x0))
			
 
				+	x79, _ := bits.add_u64(x34, x75, u64(fiat.u1(x78)))
			
 
				+	x81, x82 := bits.add_u64(x41, x77, u64(0x0))
			
 
				+	x83, _ := bits.add_u64(x42, x79, u64(fiat.u1(x82)))
			
 
				+	x85, x86 := bits.add_u64(x25, x1, u64(0x0))
			
 
				+	x87, _ := bits.add_u64(x26, x2, u64(fiat.u1(x86)))
			
 
				+	x89, x90 := bits.add_u64(x29, x85, u64(0x0))
			
 
				+	x91, _ := bits.add_u64(x30, x87, u64(fiat.u1(x90)))
			
 
				+	x93, x94 := bits.add_u64(x35, x89, u64(0x0))
			
 
				+	x95, _ := bits.add_u64(x36, x91, u64(fiat.u1(x94)))
			
 
				+	x97, x98 := bits.add_u64(x43, x93, u64(0x0))
			
 
				+	x99, _ := bits.add_u64(x44, x95, u64(fiat.u1(x98)))
			
 
				+	x101, x102 := bits.add_u64(x9, x3, u64(0x0))
			
 
				+	x103, _ := bits.add_u64(x10, x4, u64(fiat.u1(x102)))
			
 
				+	x105, x106 := bits.add_u64(x31, x101, u64(0x0))
			
 
				+	x107, _ := bits.add_u64(x32, x103, u64(fiat.u1(x106)))
			
 
				+	x109, x110 := bits.add_u64(x37, x105, u64(0x0))
			
 
				+	x111, _ := bits.add_u64(x38, x107, u64(fiat.u1(x110)))
			
 
				+	x113, x114 := bits.add_u64(x45, x109, u64(0x0))
			
 
				+	x115, _ := bits.add_u64(x46, x111, u64(fiat.u1(x114)))
			
 
				+	x117, x118 := bits.add_u64(x11, x5, u64(0x0))
			
 
				+	x119, _ := bits.add_u64(x12, x6, u64(fiat.u1(x118)))
			
 
				+	x121, x122 := bits.add_u64(x15, x117, u64(0x0))
			
 
				+	x123, _ := bits.add_u64(x16, x119, u64(fiat.u1(x122)))
			
 
				+	x125, x126 := bits.add_u64(x39, x121, u64(0x0))
			
 
				+	x127, _ := bits.add_u64(x40, x123, u64(fiat.u1(x126)))
			
 
				+	x129, x130 := bits.add_u64(x47, x125, u64(0x0))
			
 
				+	x131, _ := bits.add_u64(x48, x127, u64(fiat.u1(x130)))
			
 
				+	x133, x134 := bits.add_u64(x67, x129, u64(0x0))
			
 
				+	x135 := (u64(fiat.u1(x134)) + x131)
			
 
				+	x136 := ((x133 >> 51) | ((x135 << 13) & 0xffffffffffffffff))
			
 
				+	x137 := (x133 & 0x7ffffffffffff)
			
 
				+	x138, x139 := bits.add_u64(x136, x113, u64(0x0))
			
 
				+	x140 := (u64(fiat.u1(x139)) + x115)
			
 
				+	x141 := ((x138 >> 51) | ((x140 << 13) & 0xffffffffffffffff))
			
 
				+	x142 := (x138 & 0x7ffffffffffff)
			
 
				+	x143, x144 := bits.add_u64(x141, x97, u64(0x0))
			
 
				+	x145 := (u64(fiat.u1(x144)) + x99)
			
 
				+	x146 := ((x143 >> 51) | ((x145 << 13) & 0xffffffffffffffff))
			
 
				+	x147 := (x143 & 0x7ffffffffffff)
			
 
				+	x148, x149 := bits.add_u64(x146, x81, u64(0x0))
			
 
				+	x150 := (u64(fiat.u1(x149)) + x83)
			
 
				+	x151 := ((x148 >> 51) | ((x150 << 13) & 0xffffffffffffffff))
			
 
				+	x152 := (x148 & 0x7ffffffffffff)
			
 
				+	x153 := (x151 * 0x13)
			
 
				+	x154 := (x68 + x153)
			
 
				+	x155 := (x154 >> 51)
			
 
				+	x156 := (x154 & 0x7ffffffffffff)
			
 
				+	x157 := (x155 + x137)
			
 
				+	x158 := fiat.u1((x157 >> 51))
			
 
				+	x159 := (x157 & 0x7ffffffffffff)
			
 
				+	x160 := (u64(x158) + x142)
			
 
				+	out1[0] = x156
			
 
				+	out1[1] = x159
			
 
				+	out1[2] = x160
			
 
				+	out1[3] = x147
			
 
				+	out1[4] = x152
			
 
				+}
			
 
				+
			
 
				+fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := (arg1[4] * 0x13)
			
 
				+	x2 := (x1 * 0x2)
			
 
				+	x3 := (arg1[4] * 0x2)
			
 
				+	x4 := (arg1[3] * 0x13)
			
 
				+	x5 := (x4 * 0x2)
			
 
				+	x6 := (arg1[3] * 0x2)
			
 
				+	x7 := (arg1[2] * 0x2)
			
 
				+	x8 := (arg1[1] * 0x2)
			
 
				+	x10, x9 := bits.mul_u64(arg1[4], x1)
			
 
				+	x12, x11 := bits.mul_u64(arg1[3], x2)
			
 
				+	x14, x13 := bits.mul_u64(arg1[3], x4)
			
 
				+	x16, x15 := bits.mul_u64(arg1[2], x2)
			
 
				+	x18, x17 := bits.mul_u64(arg1[2], x5)
			
 
				+	x20, x19 := bits.mul_u64(arg1[2], arg1[2])
			
 
				+	x22, x21 := bits.mul_u64(arg1[1], x2)
			
 
				+	x24, x23 := bits.mul_u64(arg1[1], x6)
			
 
				+	x26, x25 := bits.mul_u64(arg1[1], x7)
			
 
				+	x28, x27 := bits.mul_u64(arg1[1], arg1[1])
			
 
				+	x30, x29 := bits.mul_u64(arg1[0], x3)
			
 
				+	x32, x31 := bits.mul_u64(arg1[0], x6)
			
 
				+	x34, x33 := bits.mul_u64(arg1[0], x7)
			
 
				+	x36, x35 := bits.mul_u64(arg1[0], x8)
			
 
				+	x38, x37 := bits.mul_u64(arg1[0], arg1[0])
			
 
				+	x39, x40 := bits.add_u64(x21, x17, u64(0x0))
			
 
				+	x41, _ := bits.add_u64(x22, x18, u64(fiat.u1(x40)))
			
 
				+	x43, x44 := bits.add_u64(x37, x39, u64(0x0))
			
 
				+	x45, _ := bits.add_u64(x38, x41, u64(fiat.u1(x44)))
			
 
				+	x47 := ((x43 >> 51) | ((x45 << 13) & 0xffffffffffffffff))
			
 
				+	x48 := (x43 & 0x7ffffffffffff)
			
 
				+	x49, x50 := bits.add_u64(x23, x19, u64(0x0))
			
 
				+	x51, _ := bits.add_u64(x24, x20, u64(fiat.u1(x50)))
			
 
				+	x53, x54 := bits.add_u64(x29, x49, u64(0x0))
			
 
				+	x55, _ := bits.add_u64(x30, x51, u64(fiat.u1(x54)))
			
 
				+	x57, x58 := bits.add_u64(x25, x9, u64(0x0))
			
 
				+	x59, _ := bits.add_u64(x26, x10, u64(fiat.u1(x58)))
			
 
				+	x61, x62 := bits.add_u64(x31, x57, u64(0x0))
			
 
				+	x63, _ := bits.add_u64(x32, x59, u64(fiat.u1(x62)))
			
 
				+	x65, x66 := bits.add_u64(x27, x11, u64(0x0))
			
 
				+	x67, _ := bits.add_u64(x28, x12, u64(fiat.u1(x66)))
			
 
				+	x69, x70 := bits.add_u64(x33, x65, u64(0x0))
			
 
				+	x71, _ := bits.add_u64(x34, x67, u64(fiat.u1(x70)))
			
 
				+	x73, x74 := bits.add_u64(x15, x13, u64(0x0))
			
 
				+	x75, _ := bits.add_u64(x16, x14, u64(fiat.u1(x74)))
			
 
				+	x77, x78 := bits.add_u64(x35, x73, u64(0x0))
			
 
				+	x79, _ := bits.add_u64(x36, x75, u64(fiat.u1(x78)))
			
 
				+	x81, x82 := bits.add_u64(x47, x77, u64(0x0))
			
 
				+	x83 := (u64(fiat.u1(x82)) + x79)
			
 
				+	x84 := ((x81 >> 51) | ((x83 << 13) & 0xffffffffffffffff))
			
 
				+	x85 := (x81 & 0x7ffffffffffff)
			
 
				+	x86, x87 := bits.add_u64(x84, x69, u64(0x0))
			
 
				+	x88 := (u64(fiat.u1(x87)) + x71)
			
 
				+	x89 := ((x86 >> 51) | ((x88 << 13) & 0xffffffffffffffff))
			
 
				+	x90 := (x86 & 0x7ffffffffffff)
			
 
				+	x91, x92 := bits.add_u64(x89, x61, u64(0x0))
			
 
				+	x93 := (u64(fiat.u1(x92)) + x63)
			
 
				+	x94 := ((x91 >> 51) | ((x93 << 13) & 0xffffffffffffffff))
			
 
				+	x95 := (x91 & 0x7ffffffffffff)
			
 
				+	x96, x97 := bits.add_u64(x94, x53, u64(0x0))
			
 
				+	x98 := (u64(fiat.u1(x97)) + x55)
			
 
				+	x99 := ((x96 >> 51) | ((x98 << 13) & 0xffffffffffffffff))
			
 
				+	x100 := (x96 & 0x7ffffffffffff)
			
 
				+	x101 := (x99 * 0x13)
			
 
				+	x102 := (x48 + x101)
			
 
				+	x103 := (x102 >> 51)
			
 
				+	x104 := (x102 & 0x7ffffffffffff)
			
 
				+	x105 := (x103 + x85)
			
 
				+	x106 := fiat.u1((x105 >> 51))
			
 
				+	x107 := (x105 & 0x7ffffffffffff)
			
 
				+	x108 := (u64(x106) + x90)
			
 
				+	out1[0] = x104
			
 
				+	out1[1] = x107
			
 
				+	out1[2] = x108
			
 
				+	out1[3] = x95
			
 
				+	out1[4] = x100
			
 
				+}
			
 
				+
			
 
				+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := ((x1 >> 51) + arg1[1])
			
 
				+	x3 := ((x2 >> 51) + arg1[2])
			
 
				+	x4 := ((x3 >> 51) + arg1[3])
			
 
				+	x5 := ((x4 >> 51) + arg1[4])
			
 
				+	x6 := ((x1 & 0x7ffffffffffff) + ((x5 >> 51) * 0x13))
			
 
				+	x7 := (u64(fiat.u1((x6 >> 51))) + (x2 & 0x7ffffffffffff))
			
 
				+	x8 := (x6 & 0x7ffffffffffff)
			
 
				+	x9 := (x7 & 0x7ffffffffffff)
			
 
				+	x10 := (u64(fiat.u1((x7 >> 51))) + (x3 & 0x7ffffffffffff))
			
 
				+	x11 := (x4 & 0x7ffffffffffff)
			
 
				+	x12 := (x5 & 0x7ffffffffffff)
			
 
				+	out1[0] = x8
			
 
				+	out1[1] = x9
			
 
				+	out1[2] = x10
			
 
				+	out1[3] = x11
			
 
				+	out1[4] = x12
			
 
				+}
			
 
				+
			
 
				+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := (arg1[0] + arg2[0])
			
 
				+	x2 := (arg1[1] + arg2[1])
			
 
				+	x3 := (arg1[2] + arg2[2])
			
 
				+	x4 := (arg1[3] + arg2[3])
			
 
				+	x5 := (arg1[4] + arg2[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := ((0xfffffffffffda + arg1[0]) - arg2[0])
			
 
				+	x2 := ((0xffffffffffffe + arg1[1]) - arg2[1])
			
 
				+	x3 := ((0xffffffffffffe + arg1[2]) - arg2[2])
			
 
				+	x4 := ((0xffffffffffffe + arg1[3]) - arg2[3])
			
 
				+	x5 := ((0xffffffffffffe + arg1[4]) - arg2[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := (0xfffffffffffda - arg1[0])
			
 
				+	x2 := (0xffffffffffffe - arg1[1])
			
 
				+	x3 := (0xffffffffffffe - arg1[2])
			
 
				+	x4 := (0xffffffffffffe - arg1[3])
			
 
				+	x5 := (0xffffffffffffe - arg1[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
			
 
				+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
			
 
				+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
			
 
				+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
			
 
				+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
			
 
				+	x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
			
 
				+	x1, x2 := _subborrowx_u51(0x0, arg1[0], 0x7ffffffffffed)
			
 
				+	x3, x4 := _subborrowx_u51(x2, arg1[1], 0x7ffffffffffff)
			
 
				+	x5, x6 := _subborrowx_u51(x4, arg1[2], 0x7ffffffffffff)
			
 
				+	x7, x8 := _subborrowx_u51(x6, arg1[3], 0x7ffffffffffff)
			
 
				+	x9, x10 := _subborrowx_u51(x8, arg1[4], 0x7ffffffffffff)
			
 
				+	x11 := fiat.cmovznz_u64(x10, u64(0x0), 0xffffffffffffffff)
			
 
				+	x12, x13 := _addcarryx_u51(0x0, x1, (x11 & 0x7ffffffffffed))
			
 
				+	x14, x15 := _addcarryx_u51(x13, x3, (x11 & 0x7ffffffffffff))
			
 
				+	x16, x17 := _addcarryx_u51(x15, x5, (x11 & 0x7ffffffffffff))
			
 
				+	x18, x19 := _addcarryx_u51(x17, x7, (x11 & 0x7ffffffffffff))
			
 
				+	x20, _ := _addcarryx_u51(x19, x9, (x11 & 0x7ffffffffffff))
			
 
				+	x22 := (x20 << 4)
			
 
				+	x23 := (x18 * u64(0x2))
			
 
				+	x24 := (x16 << 6)
			
 
				+	x25 := (x14 << 3)
			
 
				+	x26 := (u8(x12) & 0xff)
			
 
				+	x27 := (x12 >> 8)
			
 
				+	x28 := (u8(x27) & 0xff)
			
 
				+	x29 := (x27 >> 8)
			
 
				+	x30 := (u8(x29) & 0xff)
			
 
				+	x31 := (x29 >> 8)
			
 
				+	x32 := (u8(x31) & 0xff)
			
 
				+	x33 := (x31 >> 8)
			
 
				+	x34 := (u8(x33) & 0xff)
			
 
				+	x35 := (x33 >> 8)
			
 
				+	x36 := (u8(x35) & 0xff)
			
 
				+	x37 := u8((x35 >> 8))
			
 
				+	x38 := (x25 + u64(x37))
			
 
				+	x39 := (u8(x38) & 0xff)
			
 
				+	x40 := (x38 >> 8)
			
 
				+	x41 := (u8(x40) & 0xff)
			
 
				+	x42 := (x40 >> 8)
			
 
				+	x43 := (u8(x42) & 0xff)
			
 
				+	x44 := (x42 >> 8)
			
 
				+	x45 := (u8(x44) & 0xff)
			
 
				+	x46 := (x44 >> 8)
			
 
				+	x47 := (u8(x46) & 0xff)
			
 
				+	x48 := (x46 >> 8)
			
 
				+	x49 := (u8(x48) & 0xff)
			
 
				+	x50 := u8((x48 >> 8))
			
 
				+	x51 := (x24 + u64(x50))
			
 
				+	x52 := (u8(x51) & 0xff)
			
 
				+	x53 := (x51 >> 8)
			
 
				+	x54 := (u8(x53) & 0xff)
			
 
				+	x55 := (x53 >> 8)
			
 
				+	x56 := (u8(x55) & 0xff)
			
 
				+	x57 := (x55 >> 8)
			
 
				+	x58 := (u8(x57) & 0xff)
			
 
				+	x59 := (x57 >> 8)
			
 
				+	x60 := (u8(x59) & 0xff)
			
 
				+	x61 := (x59 >> 8)
			
 
				+	x62 := (u8(x61) & 0xff)
			
 
				+	x63 := (x61 >> 8)
			
 
				+	x64 := (u8(x63) & 0xff)
			
 
				+	x65 := fiat.u1((x63 >> 8))
			
 
				+	x66 := (x23 + u64(x65))
			
 
				+	x67 := (u8(x66) & 0xff)
			
 
				+	x68 := (x66 >> 8)
			
 
				+	x69 := (u8(x68) & 0xff)
			
 
				+	x70 := (x68 >> 8)
			
 
				+	x71 := (u8(x70) & 0xff)
			
 
				+	x72 := (x70 >> 8)
			
 
				+	x73 := (u8(x72) & 0xff)
			
 
				+	x74 := (x72 >> 8)
			
 
				+	x75 := (u8(x74) & 0xff)
			
 
				+	x76 := (x74 >> 8)
			
 
				+	x77 := (u8(x76) & 0xff)
			
 
				+	x78 := u8((x76 >> 8))
			
 
				+	x79 := (x22 + u64(x78))
			
 
				+	x80 := (u8(x79) & 0xff)
			
 
				+	x81 := (x79 >> 8)
			
 
				+	x82 := (u8(x81) & 0xff)
			
 
				+	x83 := (x81 >> 8)
			
 
				+	x84 := (u8(x83) & 0xff)
			
 
				+	x85 := (x83 >> 8)
			
 
				+	x86 := (u8(x85) & 0xff)
			
 
				+	x87 := (x85 >> 8)
			
 
				+	x88 := (u8(x87) & 0xff)
			
 
				+	x89 := (x87 >> 8)
			
 
				+	x90 := (u8(x89) & 0xff)
			
 
				+	x91 := u8((x89 >> 8))
			
 
				+	out1[0] = x26
			
 
				+	out1[1] = x28
			
 
				+	out1[2] = x30
			
 
				+	out1[3] = x32
			
 
				+	out1[4] = x34
			
 
				+	out1[5] = x36
			
 
				+	out1[6] = x39
			
 
				+	out1[7] = x41
			
 
				+	out1[8] = x43
			
 
				+	out1[9] = x45
			
 
				+	out1[10] = x47
			
 
				+	out1[11] = x49
			
 
				+	out1[12] = x52
			
 
				+	out1[13] = x54
			
 
				+	out1[14] = x56
			
 
				+	out1[15] = x58
			
 
				+	out1[16] = x60
			
 
				+	out1[17] = x62
			
 
				+	out1[18] = x64
			
 
				+	out1[19] = x67
			
 
				+	out1[20] = x69
			
 
				+	out1[21] = x71
			
 
				+	out1[22] = x73
			
 
				+	out1[23] = x75
			
 
				+	out1[24] = x77
			
 
				+	out1[25] = x80
			
 
				+	out1[26] = x82
			
 
				+	out1[27] = x84
			
 
				+	out1[28] = x86
			
 
				+	out1[29] = x88
			
 
				+	out1[30] = x90
			
 
				+	out1[31] = x91
			
 
				+}
			
 
				+
			
 
				+_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
			
 
				+	x1 := (u64(arg1[31]) << 44)
			
 
				+	x2 := (u64(arg1[30]) << 36)
			
 
				+	x3 := (u64(arg1[29]) << 28)
			
 
				+	x4 := (u64(arg1[28]) << 20)
			
 
				+	x5 := (u64(arg1[27]) << 12)
			
 
				+	x6 := (u64(arg1[26]) << 4)
			
 
				+	x7 := (u64(arg1[25]) << 47)
			
 
				+	x8 := (u64(arg1[24]) << 39)
			
 
				+	x9 := (u64(arg1[23]) << 31)
			
 
				+	x10 := (u64(arg1[22]) << 23)
			
 
				+	x11 := (u64(arg1[21]) << 15)
			
 
				+	x12 := (u64(arg1[20]) << 7)
			
 
				+	x13 := (u64(arg1[19]) << 50)
			
 
				+	x14 := (u64(arg1[18]) << 42)
			
 
				+	x15 := (u64(arg1[17]) << 34)
			
 
				+	x16 := (u64(arg1[16]) << 26)
			
 
				+	x17 := (u64(arg1[15]) << 18)
			
 
				+	x18 := (u64(arg1[14]) << 10)
			
 
				+	x19 := (u64(arg1[13]) << 2)
			
 
				+	x20 := (u64(arg1[12]) << 45)
			
 
				+	x21 := (u64(arg1[11]) << 37)
			
 
				+	x22 := (u64(arg1[10]) << 29)
			
 
				+	x23 := (u64(arg1[9]) << 21)
			
 
				+	x24 := (u64(arg1[8]) << 13)
			
 
				+	x25 := (u64(arg1[7]) << 5)
			
 
				+	x26 := (u64(arg1[6]) << 48)
			
 
				+	x27 := (u64(arg1[5]) << 40)
			
 
				+	x28 := (u64(arg1[4]) << 32)
			
 
				+	x29 := (u64(arg1[3]) << 24)
			
 
				+	x30 := (u64(arg1[2]) << 16)
			
 
				+	x31 := (u64(arg1[1]) << 8)
			
 
				+	x32 := arg1[0]
			
 
				+	x33 := (x31 + u64(x32))
			
 
				+	x34 := (x30 + x33)
			
 
				+	x35 := (x29 + x34)
			
 
				+	x36 := (x28 + x35)
			
 
				+	x37 := (x27 + x36)
			
 
				+	x38 := (x26 + x37)
			
 
				+	x39 := (x38 & 0x7ffffffffffff)
			
 
				+	x40 := u8((x38 >> 51))
			
 
				+	x41 := (x25 + u64(x40))
			
 
				+	x42 := (x24 + x41)
			
 
				+	x43 := (x23 + x42)
			
 
				+	x44 := (x22 + x43)
			
 
				+	x45 := (x21 + x44)
			
 
				+	x46 := (x20 + x45)
			
 
				+	x47 := (x46 & 0x7ffffffffffff)
			
 
				+	x48 := u8((x46 >> 51))
			
 
				+	x49 := (x19 + u64(x48))
			
 
				+	x50 := (x18 + x49)
			
 
				+	x51 := (x17 + x50)
			
 
				+	x52 := (x16 + x51)
			
 
				+	x53 := (x15 + x52)
			
 
				+	x54 := (x14 + x53)
			
 
				+	x55 := (x13 + x54)
			
 
				+	x56 := (x55 & 0x7ffffffffffff)
			
 
				+	x57 := u8((x55 >> 51))
			
 
				+	x58 := (x12 + u64(x57))
			
 
				+	x59 := (x11 + x58)
			
 
				+	x60 := (x10 + x59)
			
 
				+	x61 := (x9 + x60)
			
 
				+	x62 := (x8 + x61)
			
 
				+	x63 := (x7 + x62)
			
 
				+	x64 := (x63 & 0x7ffffffffffff)
			
 
				+	x65 := u8((x63 >> 51))
			
 
				+	x66 := (x6 + u64(x65))
			
 
				+	x67 := (x5 + x66)
			
 
				+	x68 := (x4 + x67)
			
 
				+	x69 := (x3 + x68)
			
 
				+	x70 := (x2 + x69)
			
 
				+	x71 := (x1 + x70)
			
 
				+	out1[0] = x39
			
 
				+	out1[1] = x47
			
 
				+	out1[2] = x56
			
 
				+	out1[3] = x64
			
 
				+	out1[4] = x71
			
 
				+}
			
 
				+
			
 
				+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
			
 
				+	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
			
 
				+	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
			
 
				+	x8, x7 := bits.mul_u64(0x1db42, arg1[1])
			
 
				+	x10, x9 := bits.mul_u64(0x1db42, arg1[0])
			
 
				+	x11 := ((x9 >> 51) | ((x10 << 13) & 0xffffffffffffffff))
			
 
				+	x12 := (x9 & 0x7ffffffffffff)
			
 
				+	x13, x14 := bits.add_u64(x11, x7, u64(0x0))
			
 
				+	x15 := (u64(fiat.u1(x14)) + x8)
			
 
				+	x16 := ((x13 >> 51) | ((x15 << 13) & 0xffffffffffffffff))
			
 
				+	x17 := (x13 & 0x7ffffffffffff)
			
 
				+	x18, x19 := bits.add_u64(x16, x5, u64(0x0))
			
 
				+	x20 := (u64(fiat.u1(x19)) + x6)
			
 
				+	x21 := ((x18 >> 51) | ((x20 << 13) & 0xffffffffffffffff))
			
 
				+	x22 := (x18 & 0x7ffffffffffff)
			
 
				+	x23, x24 := bits.add_u64(x21, x3, u64(0x0))
			
 
				+	x25 := (u64(fiat.u1(x24)) + x4)
			
 
				+	x26 := ((x23 >> 51) | ((x25 << 13) & 0xffffffffffffffff))
			
 
				+	x27 := (x23 & 0x7ffffffffffff)
			
 
				+	x28, x29 := bits.add_u64(x26, x1, u64(0x0))
			
 
				+	x30 := (u64(fiat.u1(x29)) + x2)
			
 
				+	x31 := ((x28 >> 51) | ((x30 << 13) & 0xffffffffffffffff))
			
 
				+	x32 := (x28 & 0x7ffffffffffff)
			
 
				+	x33 := (x31 * 0x13)
			
 
				+	x34 := (x12 + x33)
			
 
				+	x35 := fiat.u1((x34 >> 51))
			
 
				+	x36 := (x34 & 0x7ffffffffffff)
			
 
				+	x37 := (u64(x35) + x17)
			
 
				+	x38 := fiat.u1((x37 >> 51))
			
 
				+	x39 := (x37 & 0x7ffffffffffff)
			
 
				+	x40 := (u64(x38) + x22)
			
 
				+	out1[0] = x36
			
 
				+	out1[1] = x39
			
 
				+	out1[2] = x40
			
 
				+	out1[3] = x27
			
 
				+	out1[4] = x32
			
 
				+}
			
 
				+
			
 
				+// The following routines were added by hand, and do not come from fiat-crypto.
			
 
				+
			
 
				+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 0
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+}
			
 
				+
			
 
				+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 1
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+}
			
 
				+
			
 
				+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+}
			
 
				+
			
 
				+fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
			
 
				+	mask := -u64(arg1)
			
 
				+	x := (out1[0] ~ out2[0]) & mask
			
 
				+	x1, y1 := out1[0] ~ x, out2[0] ~ x
			
 
				+	x = (out1[1] ~ out2[1]) & mask
			
 
				+	x2, y2 := out1[1] ~ x, out2[1] ~ x
			
 
				+	x = (out1[2] ~ out2[2]) & mask
			
 
				+	x3, y3 := out1[2] ~ x, out2[2] ~ x
			
 
				+	x = (out1[3] ~ out2[3]) & mask
			
 
				+	x4, y4 := out1[3] ~ x, out2[3] ~ x
			
 
				+	x = (out1[4] ~ out2[4]) & mask
			
 
				+	x5, y5 := out1[4] ~ x, out2[4] ~ x
			
 
				+	out1[0], out2[0] = x1, y1
			
 
				+	out1[1], out2[1] = x2, y2
			
 
				+	out1[2], out2[2] = x3, y3
			
 
				+	out1[3], out2[3] = x4, y4
			
 
				+	out1[4], out2[4] = x5, y5
			
 
				+}
			
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -0,0 +1,126 @@
 
				+package x25519
			
 
				+
			
 
				+import field "core:crypto/_fiat/field_curve25519"
			
 
				+import "core:mem"
			
 
				+
			
 
				+SCALAR_SIZE :: 32
			
 
				+POINT_SIZE :: 32
			
 
				+
			
 
				+_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
			
 
				+
			
 
				+_scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
			
 
				+	if i < 0 {
			
 
				+		return 0
			
 
				+	}
			
 
				+	return (s[i>>3] >> uint(i&7)) & 1
			
 
				+}
			
 
				+
			
 
				+_scalarmult :: proc (out, scalar, point: ^[32]byte) {
			
 
				+	// Montgomery pseduo-multiplication taken from Monocypher.
			
 
				+
			
 
				+	// computes the scalar product
			
 
				+	x1: field.Tight_Field_Element = ---
			
 
				+	field.fe_from_bytes(&x1, point)
			
 
				+
			
 
				+	// computes the actual scalar product (the result is in x2 and z2)
			
 
				+	x2, x3, z2, z3: field.Tight_Field_Element =  ---, ---, ---, ---
			
 
				+	t0, t1: field.Loose_Field_Element = ---, ---
			
 
				+
			
 
				+	// Montgomery ladder
			
 
				+	// In projective coordinates, to avoid divisions: x = X / Z
			
 
				+	// We don't care about the y coordinate, it's only 1 bit of information
			
 
				+	field.fe_one(&x2) // "zero" point
			
 
				+	field.fe_zero(&z2)
			
 
				+	field.fe_set(&x3, &x1) // "one" point
			
 
				+	field.fe_one(&z3)
			
 
				+
			
 
				+	swap: int
			
 
				+	for pos := 255-1; pos >= 0; pos = pos - 1 	{
			
 
				+		// constant time conditional swap before ladder step
			
 
				+		b := int(_scalar_bit(scalar, pos))
			
 
				+		swap ~= b // xor trick avoids swapping at the end of the loop
			
 
				+		field.fe_cond_swap(&x2, &x3, swap)
			
 
				+		field.fe_cond_swap(&z2, &z3, swap)
			
 
				+		swap = b // anticipates one last swap after the loop
			
 
				+
			
 
				+		// Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
			
 
				+		// with differential addition
			
 
				+		//
			
 
				+		// Note: This deliberately omits reductions after add/sub operations
			
 
				+		// if the result is only ever used as the input to a mul/square since
			
 
				+		// the implementations of those can deal with non-reduced inputs.
			
 
				+		//
			
 
				+		// fe_tighten_cast is only used to store a fully reduced
			
 
				+		// output in a Loose_Field_Element, or to provide such a
			
 
				+		// Loose_Field_Element as a Tight_Field_Element argument.
			
 
				+		field.fe_sub(&t0, &x3, &z3)
			
 
				+		field.fe_sub(&t1, &x2, &z2)
			
 
				+		field.fe_add(field.fe_relax_cast(&x2), &x2, &z2) // x2 - unreduced
			
 
				+		field.fe_add(field.fe_relax_cast(&z2), &x3, &z3) // z2 - unreduced
			
 
				+		field.fe_carry_mul(&z3, &t0, field.fe_relax_cast(&x2))
			
 
				+		field.fe_carry_mul(&z2, field.fe_relax_cast(&z2), &t1) // z2 - reduced
			
 
				+		field.fe_carry_square(field.fe_tighten_cast(&t0), &t1) // t0 - reduced
			
 
				+		field.fe_carry_square(field.fe_tighten_cast(&t1), field.fe_relax_cast(&x2)) // t1 - reduced
			
 
				+		field.fe_add(field.fe_relax_cast(&x3), &z3, &z2) // x3 - unreduced
			
 
				+		field.fe_sub(field.fe_relax_cast(&z2), &z3, &z2) // z2 - unreduced
			
 
				+		field.fe_carry_mul(&x2, &t1, &t0) // x2 - reduced
			
 
				+		field.fe_sub(&t1, field.fe_tighten_cast(&t1), field.fe_tighten_cast(&t0)) // safe - t1/t0 is reduced
			
 
				+		field.fe_carry_square(&z2, field.fe_relax_cast(&z2)) // z2 - reduced
			
 
				+		field.fe_carry_scmul_121666(&z3, &t1)
			
 
				+		field.fe_carry_square(&x3, field.fe_relax_cast(&x3)) // x3 - reduced
			
 
				+		field.fe_add(&t0, field.fe_tighten_cast(&t0), &z3) // safe - t0 is reduced
			
 
				+		field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z2))
			
 
				+		field.fe_carry_mul(&z2, &t1, &t0)
			
 
				+	}
			
 
				+	// last swap is necessary to compensate for the xor trick
			
 
				+	// Note: after this swap, P3 == P2 + P1.
			
 
				+	field.fe_cond_swap(&x2, &x3, swap)
			
 
				+	field.fe_cond_swap(&z2, &z3, swap)
			
 
				+
			
 
				+	// normalises the coordinates: x == X / Z
			
 
				+	field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
			
 
				+	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
			
 
				+	field.fe_to_bytes(out, &x2)
			
 
				+
			
 
				+	mem.zero_explicit(&x1, size_of(x1))
			
 
				+	mem.zero_explicit(&x2, size_of(x2))
			
 
				+	mem.zero_explicit(&x3, size_of(x3))
			
 
				+	mem.zero_explicit(&z2, size_of(z2))
			
 
				+	mem.zero_explicit(&z3, size_of(z3))
			
 
				+	mem.zero_explicit(&t0, size_of(t0))
			
 
				+	mem.zero_explicit(&t1, size_of(t1))
			
 
				+}
			
 
				+
			
 
				+scalarmult :: proc (dst, scalar, point: []byte) {
			
 
				+	if len(scalar) != SCALAR_SIZE {
			
 
				+		panic("crypto/x25519: invalid scalar size")
			
 
				+	}
			
 
				+	if len(point) != POINT_SIZE {
			
 
				+		panic("crypto/x25519: invalid point size")
			
 
				+	}
			
 
				+	if len(dst) != POINT_SIZE {
			
 
				+		panic("crypto/x25519: invalid destination point size")
			
 
				+	}
			
 
				+
			
 
				+	// "clamp" the scalar
			
 
				+	e: [32]byte = ---
			
 
				+	copy_slice(e[:], scalar)
			
 
				+	e[0] &= 248
			
 
				+	e[31] &= 127
			
 
				+	e[31] |= 64
			
 
				+
			
 
				+	p: [32]byte = ---
			
 
				+	copy_slice(p[:], point)
			
 
				+
			
 
				+	d: [32]byte = ---
			
 
				+	_scalarmult(&d, &e, &p)
			
 
				+	copy_slice(dst, d[:])
			
 
				+
			
 
				+	mem.zero_explicit(&e, size_of(e))
			
 
				+	mem.zero_explicit(&d, size_of(d))
			
 
				+}
			
 
				+
			
 
				+scalarmult_basepoint :: proc (dst, scalar: []byte) {
			
 
				+	// TODO/perf: Switch to using a precomputed table.
			
 
				+	scalarmult(dst, scalar, _BASE_POINT[:])
			
 
				+}
			
--- a/tests/core/crypto/test_core_crypto.odin
+++ b/tests/core/crypto/test_core_crypto.odin
@@ -115,6 +115,11 @@ main :: proc() {
 
				     test_haval_224(&t)
			
 
				     test_haval_256(&t)
			
 
				 
			
 
				+    // "modern" crypto tests
			
 
				+    test_x25519(&t)
			
 
				+
			
 
				+    bench_modern(&t)
			
 
				+
			
 
				     fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
			
 
				 }
			
 
				 
			
--- a/tests/core/crypto/test_core_crypto_modern.odin
+++ b/tests/core/crypto/test_core_crypto_modern.odin
@@ -0,0 +1,95 @@
 
				+package test_core_crypto
			
 
				+
			
 
				+import "core:testing"
			
 
				+import "core:fmt"
			
 
				+import "core:time"
			
 
				+
			
 
				+import "core:crypto/x25519"
			
 
				+
			
 
				+_digit_value :: proc(r: rune) -> int {
			
 
				+	ri := int(r)
			
 
				+	v: int = 16
			
 
				+	switch r {
			
 
				+	case '0'..='9': v = ri-'0'
			
 
				+	case 'a'..='z': v = ri-'a'+10
			
 
				+	case 'A'..='Z': v = ri-'A'+10
			
 
				+	}
			
 
				+	return v
			
 
				+}
			
 
				+
			
 
				+_decode_hex32 :: proc(s: string) -> [32]byte{
			
 
				+	b: [32]byte
			
 
				+	for i := 0; i < len(s); i = i + 2 {
			
 
				+		hi := _digit_value(rune(s[i]))
			
 
				+		lo := _digit_value(rune(s[i+1]))
			
 
				+		b[i/2] = byte(hi << 4 | lo)
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+TestECDH :: struct {
			
 
				+	scalar:  string,
			
 
				+	point:   string,
			
 
				+	product: string,
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+test_x25519 :: proc(t: ^testing.T) {
			
 
				+	log(t, "Testing X25519")
			
 
				+
			
 
				+	test_vectors := [?]TestECDH {
			
 
				+		// Test vectors from RFC 7748
			
 
				+		TestECDH{
			
 
				+			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
			
 
				+			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
			
 
				+			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
			
 
				+		},
			
 
				+		TestECDH{
			
 
				+			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
			
 
				+			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
			
 
				+			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
			
 
				+		},
			
 
				+	}
			
 
				+	for v, _ in test_vectors {
			
 
				+		scalar := _decode_hex32(v.scalar)
			
 
				+		point := _decode_hex32(v.point)
			
 
				+
			
 
				+		derived_point: [x25519.POINT_SIZE]byte
			
 
				+		x25519.scalarmult(derived_point[:], scalar[:], point[:])
			
 
				+		derived_point_str := hex_string(derived_point[:])
			
 
				+
			
 
				+		expect(t, derived_point_str == v.product, fmt.tprintf("Expected %s for %s * %s, but got %s instead", v.product, v.scalar, v.point, derived_point_str))
			
 
				+
			
 
				+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
			
 
				+		p1, p2: [x25519.POINT_SIZE]byte
			
 
				+		x25519.scalarmult_basepoint(p1[:], scalar[:])
			
 
				+		x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:])
			
 
				+		p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:])
			
 
				+		expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str))
			
 
				+	}
			
 
				+
			
 
				+    // TODO/tests: Run the wycheproof test vectors, once I figure out
			
 
				+    // how to work with JSON.
			
 
				+}
			
 
				+
			
 
				+@(test)
			
 
				+bench_modern :: proc(t: ^testing.T) {
			
 
				+	fmt.println("Starting benchmarks:")
			
 
				+
			
 
				+	bench_x25519(t)
			
 
				+}
			
 
				+
			
 
				+bench_x25519 :: proc(t: ^testing.T) {
			
 
				+	point := _decode_hex32("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
			
 
				+	scalar := _decode_hex32("cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe")
			
 
				+	out: [x25519.POINT_SIZE]byte = ---
			
 
				+
			
 
				+	iters :: 10000
			
 
				+	start := time.now()
			
 
				+	for i := 0; i < iters; i = i + 1 {
			
 
				+		x25519.scalarmult(out[:], scalar[:], point[:])
			
 
				+	}
			
 
				+	elapsed := time.since(start)
			
 
				+
			
 
				+	log(t, fmt.tprintf("x25519.scalarmult: ~%f us/op", time.duration_microseconds(elapsed) / iters))
			
 
				+}