Browse Source

Merge pull request #3288 from Yawning/feature/even-moar-crypto

core/crypto: Even more improvments
Jeroen van Rijn 1 year ago
parent
commit
2ba2bc1fec
31 changed files with 3567 additions and 234 deletions
  1. 8 0
      core/crypto/README.md
  2. 428 0
      core/crypto/_edwards25519/edwards25519.odin
  3. 61 0
      core/crypto/_edwards25519/edwards25519_scalar.odin
  4. 288 0
      core/crypto/_edwards25519/edwards25519_scalar_mul.odin
  5. 2 2
      core/crypto/_fiat/fiat.odin
  6. 175 46
      core/crypto/_fiat/field_curve25519/field.odin
  7. 29 61
      core/crypto/_fiat/field_curve25519/field51.odin
  8. 47 4
      core/crypto/_fiat/field_poly1305/field.odin
  9. 35 39
      core/crypto/_fiat/field_poly1305/field4344.odin
  10. 153 0
      core/crypto/_fiat/field_scalar25519/field.odin
  11. 535 0
      core/crypto/_fiat/field_scalar25519/field64.odin
  12. 10 0
      core/crypto/crypto.odin
  13. 314 0
      core/crypto/ed25519/ed25519.odin
  14. 1 1
      core/crypto/poly1305/poly1305.odin
  15. 4 0
      core/crypto/rand_bsd.odin
  16. 4 0
      core/crypto/rand_darwin.odin
  17. 4 0
      core/crypto/rand_generic.odin
  18. 4 0
      core/crypto/rand_js.odin
  19. 4 0
      core/crypto/rand_linux.odin
  20. 4 0
      core/crypto/rand_windows.odin
  21. 510 0
      core/crypto/ristretto255/ristretto255.odin
  22. 97 0
      core/crypto/ristretto255/ristretto255_scalar.odin
  23. 3 9
      core/crypto/x25519/x25519.odin
  24. 4 0
      examples/all/all_main.odin
  25. 2 72
      tests/core/crypto/test_core_crypto.odin
  26. 766 0
      tests/core/crypto/test_core_crypto_ecc25519.odin
  27. 3 0
      tests/core/crypto/test_core_crypto_hash.odin
  28. 3 0
      tests/core/crypto/test_core_crypto_kdf.odin
  29. 3 0
      tests/core/crypto/test_core_crypto_mac.odin
  30. 3 0
      tests/core/crypto/test_core_crypto_sha3_variants.odin
  31. 63 0
      tests/core/crypto/test_crypto_benchmark.odin

+ 8 - 0
core/crypto/README.md

@@ -14,6 +14,14 @@ constant-time byte comparison.
 - Best-effort is make to mitigate timing side-channels on reasonable
 - Best-effort is make to mitigate timing side-channels on reasonable
   architectures.  Architectures that are known to be unreasonable include
   architectures.  Architectures that are known to be unreasonable include
   but are not limited to i386, i486, and WebAssembly.
   but are not limited to i386, i486, and WebAssembly.
+- Implementations assume a 64-bit architecture (64-bit integer arithmetic
+  is fast, and includes add-with-carry, sub-with-borrow, and full-result
+  multiply).
+- Hardware sidechannels are explicitly out of scope for this package.
+  Notable examples include but are not limited to:
+  - Power/RF side-channels etc.
+  - Fault injection attacks etc.
+  - Hardware vulnerabilities ("apply mitigations or buy a new CPU").
 - The packages attempt to santize sensitive data, however this is, and
 - The packages attempt to santize sensitive data, however this is, and
   will remain a "best-effort" implementation decision.  As Thomas Pornin
   will remain a "best-effort" implementation decision.  As Thomas Pornin
   puts it "In general, such memory cleansing is a fool's quest."
   puts it "In general, such memory cleansing is a fool's quest."

+ 428 - 0
core/crypto/_edwards25519/edwards25519.odin

@@ -0,0 +1,428 @@
+package _edwards25519
+
+/*
+This implements the edwards25519 composite-order group, primarily for
+the purpose of implementing X25519, Ed25519, and ristretto255.  Use of
+this package for other purposes is NOT RECOMMENDED.
+
+See:
+- https://eprint.iacr.org/2011/368.pdf
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
+*/
+
+import "base:intrinsics"
+import "core:crypto"
+import field "core:crypto/_fiat/field_curve25519"
+import "core:mem"
+
+// Group_Element is an edwards25519 group element, as extended homogenous
+// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`,
+// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`.
+//
+// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555
+// a = -1
+//
+// Notes:
+// - There is considerable scope for optimization, however that
+//   will not change the external API, and this is simple and reasonably
+//   performant.
+// - The API delibarately makes it hard to create arbitrary group
+//   elements that are not on the curve.
+// - The group element decoding routine takes the opinionated stance of
+//   rejecting non-canonical encodings.
+
+FE_D := field.Tight_Field_Element {
+	929955233495203,
+	466365720129213,
+	1662059464998953,
+	2033849074728123,
+	1442794654840575,
+}
+@(private)
+FE_A := field.Tight_Field_Element {
+	2251799813685228,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+}
+@(private)
+FE_D2 := field.Tight_Field_Element {
+	1859910466990425,
+	932731440258426,
+	1072319116312658,
+	1815898335770999,
+	633789495995903,
+}
+@(private)
+GE_BASEPOINT := Group_Element {
+	field.Tight_Field_Element {
+		1738742601995546,
+		1146398526822698,
+		2070867633025821,
+		562264141797630,
+		587772402128613,
+	},
+	field.Tight_Field_Element {
+		1801439850948184,
+		1351079888211148,
+		450359962737049,
+		900719925474099,
+		1801439850948198,
+	},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element {
+		1841354044333475,
+		16398895984059,
+		755974180946558,
+		900171276175154,
+		1821297809914039,
+	},
+}
+GE_IDENTITY := Group_Element {
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+}
+
+Group_Element :: struct {
+	x: field.Tight_Field_Element,
+	y: field.Tight_Field_Element,
+	z: field.Tight_Field_Element,
+	t: field.Tight_Field_Element,
+}
+
+ge_clear :: proc "contextless" (ge: ^Group_Element) {
+	mem.zero_explicit(ge, size_of(Group_Element))
+}
+
+ge_set :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_set(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_set(&ge.t, &a.t)
+}
+
+@(require_results)
+ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	// Do the work in a scratch element, so that ge is unchanged on
+	// failure.
+	tmp: Group_Element = ---
+	defer ge_clear(&tmp)
+	field.fe_one(&tmp.z) // Z = 1
+
+	// The encoding is the y-coordinate, with the x-coordinate polarity
+	// (odd/even) encoded in the MSB.
+	field.fe_from_bytes(&tmp.y, b_) // ignores high bit
+
+	// Recover the candidate x-coordinate via the curve equation:
+	// x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p)
+
+	fe_tmp := &tmp.t // Use this to store intermediaries.
+	fe_one := &tmp.z
+
+	// x = num = y^2 - 1
+	field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2
+	field.fe_carry_sub(&tmp.x, fe_tmp, fe_one)
+
+	// den = d * y^2 + 1
+	field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D))
+	field.fe_carry_add(fe_tmp, fe_tmp, fe_one)
+
+	// x = invsqrt(den/num)
+	is_square := field.fe_carry_sqrt_ratio_m1(
+		&tmp.x,
+		field.fe_relax_cast(&tmp.x),
+		field.fe_relax_cast(fe_tmp),
+	)
+	if is_square == 0 {
+		return false
+	}
+
+	// Pick the right x-coordinate.
+	field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7))
+
+	// t = x * y
+	field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y))
+
+	// Reject non-canonical encodings of ge.
+	buf: [32]byte = ---
+	field.fe_to_bytes(&buf, &tmp.y)
+	buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7
+	is_canonical := crypto.compare_constant_time(b, buf[:])
+
+	ge_cond_assign(ge, &tmp, is_canonical)
+
+	mem.zero_explicit(&buf, size_of(buf))
+
+	return is_canonical == 1
+}
+
+ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
+	if len(dst) != 32 {
+		intrinsics.trap()
+	}
+	dst_ := transmute(^[32]byte)(raw_data(dst))
+
+	// Convert the element to affine (x, y) representation.
+	x, y, z_inv: field.Tight_Field_Element = ---, ---, ---
+	field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z))
+	field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv))
+	field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv))
+
+	// Encode the y-coordinate.
+	field.fe_to_bytes(dst_, &y)
+
+	// Copy the least significant bit of the x-coordinate to the most
+	// significant bit of the encoded y-coordinate.
+	dst_[31] |= byte((x[0] & 1) << 7)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv})
+}
+
+ge_identity :: proc "contextless" (ge: ^Group_Element) {
+	field.fe_zero(&ge.x)
+	field.fe_one(&ge.y)
+	field.fe_one(&ge.z)
+	field.fe_zero(&ge.t)
+}
+
+ge_generator :: proc "contextless" (ge: ^Group_Element) {
+	ge_set(ge, &GE_BASEPOINT)
+}
+
+@(private)
+Addend_Group_Element :: struct {
+	y2_minus_x2:  field.Loose_Field_Element, // t1
+	y2_plus_x2:   field.Loose_Field_Element, // t3
+	k_times_t2:   field.Tight_Field_Element, // t4
+	two_times_z2: field.Loose_Field_Element, // t5
+}
+
+@(private)
+ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) {
+	field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x)
+	field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x)
+	field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t))
+	field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z)
+}
+
+@(private)
+ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl)
+	field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl)
+	field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl)
+	field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl)
+}
+
+@(private)
+Add_Scratch :: struct {
+	A, B, C, D: field.Tight_Field_Element,
+	E, F, G, H: field.Loose_Field_Element,
+	t0, t2:     field.Loose_Field_Element,
+}
+
+ge_add :: proc "contextless" (ge, a, b: ^Group_Element) {
+	b_: Addend_Group_Element = ---
+	ge_addend_set(&b_, b)
+
+	scratch: Add_Scratch = ---
+	ge_add_addend(ge, a, &b_, &scratch)
+
+	mem.zero_explicit(&b_, size_of(Addend_Group_Element))
+	mem.zero_explicit(&scratch, size_of(Add_Scratch))
+}
+
+@(private)
+ge_add_addend :: proc "contextless" (
+	ge, a: ^Group_Element,
+	b: ^Addend_Group_Element,
+	scratch: ^Add_Scratch,
+) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
+	// Assumptions: k=2*d.
+	//
+	// t0 = Y1-X1
+	// t1 = Y2-X2
+	// A = t0*t1
+	// t2 = Y1+X1
+	// t3 = Y2+X2
+	// B = t2*t3
+	// t4 = k*T2
+	// C = T1*t4
+	// t5 = 2*Z2
+	// D = Z1*t5
+	// E = B-A
+	// F = D-C
+	// G = D+C
+	// H = B+A
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+	//
+	// In order to make the scalar multiply faster, the addend is provided
+	// as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as
+	// it is trivially obvious that those are the only values used by the
+	// formula that are directly dependent on `b`, and are only dependent
+	// on `b` and constants.  This saves 1 sub, 2 adds, and 1 multiply,
+	// each time the intermediate representation can be reused.
+
+	A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D
+	E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H
+	t0, t2 := &scratch.t0, &scratch.t2
+
+	field.fe_sub(t0, &a.y, &a.x)
+	t1 := &b.y2_minus_x2
+	field.fe_carry_mul(A, t0, t1)
+	field.fe_add(t2, &a.y, &a.x)
+	t3 := &b.y2_plus_x2
+	field.fe_carry_mul(B, t2, t3)
+	t4 := &b.k_times_t2
+	field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4))
+	t5 := &b.two_times_z2
+	field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5)
+	field.fe_sub(E, B, A)
+	field.fe_sub(F, D, C)
+	field.fe_add(G, D, C)
+	field.fe_add(H, B, A)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G)
+}
+
+@(private)
+Double_Scratch :: struct {
+	A, B, C, D, G: field.Tight_Field_Element,
+	t0, t2, t3:    field.Tight_Field_Element,
+	E, F, H:       field.Loose_Field_Element,
+	t1:            field.Loose_Field_Element,
+}
+
+ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+	//
+	// A = X1^2
+	// B = Y1^2
+	// t0 = Z1^2
+	// C = 2*t0
+	// D = a*A
+	// t1 = X1+Y1
+	// t2 = t1^2
+	// t3 = t2-A
+	// E = t3-B
+	// G = D+B
+	// F = G-C
+	// H = D-B
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+
+	sanitize, scratch := scratch == nil, scratch
+	if sanitize {
+		tmp: Double_Scratch = ---
+		scratch = &tmp
+	}
+
+	A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G
+	t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3
+	E, F, H := &scratch.E, &scratch.F, &scratch.H
+	t1 := &scratch.t1
+
+	field.fe_carry_square(A, field.fe_relax_cast(&a.x))
+	field.fe_carry_square(B, field.fe_relax_cast(&a.y))
+	field.fe_carry_square(t0, field.fe_relax_cast(&a.z))
+	field.fe_carry_add(C, t0, t0)
+	field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A))
+	field.fe_add(t1, &a.x, &a.y)
+	field.fe_carry_square(t2, t1)
+	field.fe_carry_sub(t3, t2, A)
+	field.fe_sub(E, t3, B)
+	field.fe_carry_add(G, D, B)
+	field.fe_sub(F, G, C)
+	field.fe_sub(H, D, B)
+	G_ := field.fe_relax_cast(G)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G_, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G_)
+
+	if sanitize {
+		mem.zero_explicit(scratch, size_of(Double_Scratch))
+	}
+}
+
+ge_negate :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_carry_opp(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_carry_opp(&ge.t, &a.t)
+}
+
+ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	tmp: Group_Element = ---
+	ge_negate(&tmp, a)
+	ge_cond_assign(ge, &tmp, ctrl)
+
+	ge_clear(&tmp)
+}
+
+ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	field.fe_cond_assign(&ge.x, &a.x, ctrl)
+	field.fe_cond_assign(&ge.y, &a.y, ctrl)
+	field.fe_cond_assign(&ge.z, &a.z, ctrl)
+	field.fe_cond_assign(&ge.t, &a.t, ctrl)
+}
+
+ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl)
+	field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl)
+	field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl)
+	field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl)
+}
+
+@(require_results)
+ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int {
+	// (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z')
+	// X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z
+	ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z))
+	field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z))
+
+	ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az})
+
+	return ret
+}
+
+@(require_results)
+ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool {
+	tmp: Group_Element = ---
+	ge_double(&tmp, ge)
+	ge_double(&tmp, &tmp)
+	ge_double(&tmp, &tmp)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}
+
+@(require_results)
+ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool {
+	// This is currently *very* expensive.  The faster method would be
+	// something like (https://eprint.iacr.org/2022/1164.pdf), however
+	// that is a ~50% speedup, and a lot of added complexity for something
+	// that is better solved by "just use ristretto255".
+	tmp: Group_Element = ---
+	_ge_scalarmult(&tmp, ge, &SC_ELL, true)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}

+ 61 - 0
core/crypto/_edwards25519/edwards25519_scalar.odin

@@ -0,0 +1,61 @@
+package _edwards25519
+
+import "base:intrinsics"
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:mem"
+
+Scalar :: field.Montgomery_Domain_Field_Element
+
+// WARNING: This is non-canonical and only to be used when checking if
+// a group element is on the prime-order subgroup.
+@(private)
+SC_ELL := field.Non_Montgomery_Domain_Field_Element {
+	field.ELL[0],
+	field.ELL[1],
+	field.ELL[2],
+	field.ELL[3],
+}
+
+sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
+	tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0}
+	field.fe_to_montgomery(sc, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(require_results)
+sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	return field.fe_from_bytes(sc, b_)
+}
+
+sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	field.fe_from_bytes_rfc8032(sc, b_)
+}
+
+sc_clear :: proc "contextless" (sc: ^Scalar) {
+	mem.zero_explicit(sc, size_of(Scalar))
+}
+
+sc_set :: field.fe_set
+sc_set_bytes_wide :: field.fe_from_bytes_wide
+sc_bytes :: field.fe_to_bytes
+
+sc_zero :: field.fe_zero
+sc_one :: field.fe_one
+
+sc_add :: field.fe_add
+sc_sub :: field.fe_sub
+sc_negate :: field.fe_opp
+sc_mul :: field.fe_mul
+sc_square :: field.fe_square
+
+sc_cond_assign :: field.fe_cond_assign
+sc_equal :: field.fe_equal

+ 288 - 0
core/crypto/_edwards25519/edwards25519_scalar_mul.odin

@@ -0,0 +1,288 @@
+package _edwards25519
+
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:math/bits"
+import "core:mem"
+
+// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format.
+//
+// Note: When generating, the values were reduced to Tight_Field_Element
+// ranges, even though that is not required.
+@(private)
+GE_BASEPOINT_TABLE := Multiply_Table {
+	{
+		{62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585},
+		{1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563},
+		{301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142},
+		{2, 0, 0, 0, 0},
+	},
+	{
+		{1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772},
+		{338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369},
+		{1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646},
+		{763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705},
+	},
+	{
+		{960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586},
+		{1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896},
+		{851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520},
+		{197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676},
+	},
+	{
+		{1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224},
+		{809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815},
+		{997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854},
+		{887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490},
+	},
+	{
+		{2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226},
+		{1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448},
+		{1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541},
+		{220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054},
+	},
+	{
+		{1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907},
+		{1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455},
+		{641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865},
+		{743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917},
+	},
+	{
+		{1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810},
+		{2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148},
+		{1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091},
+		{429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554},
+	},
+	{
+		{2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624},
+		{677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787},
+		{1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830},
+		{1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195},
+	},
+	{
+		{660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005},
+		{1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389},
+		{1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714},
+		{1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390},
+	},
+	{
+		{1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353},
+		{1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173},
+		{2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458},
+		{1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371},
+	},
+	{
+		{478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981},
+		{1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086},
+		{268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483},
+		{1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320},
+	},
+	{
+		{1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647},
+		{1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475},
+		{1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176},
+		{902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479},
+	},
+	{
+		{1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221},
+		{1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795},
+		{1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619},
+		{197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934},
+	},
+	{
+		{225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319},
+		{1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628},
+		{690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156},
+		{1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556},
+	},
+	{
+		{805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903},
+		{2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774},
+		{460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653},
+		{849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895},
+	},
+}
+
+ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
+	// Something like the comb method from "Fast and compact elliptic-curve
+	// cryptography" Section 3.3, would be more performant, but more
+	// complex.
+	//
+	// - https://eprint.iacr.org/2012/309
+	ge_scalarmult(ge, &GE_BASEPOINT, sc)
+}
+
+ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp, true)
+}
+
+ge_double_scalarmult_basepoint_vartime :: proc "contextless" (
+	ge: ^Group_Element,
+	a: ^Scalar,
+	A: ^Group_Element,
+	b: ^Scalar,
+) {
+	// Strauss-Shamir, commonly referred to as the "Shamir trick",
+	// saves half the doublings, relative to doing this the naive way.
+	//
+	// ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster,
+	// but significantly more complex, and has incompatibilities with
+	// mixed-order group elements.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	A_tbl: Multiply_Table = ---
+	mul_tbl_set(&A_tbl, A, &tmp_add)
+
+	sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&sc_a, a)
+	field.fe_from_montgomery(&sc_b, b)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+
+		limb_byte_a := sc_a[limb] >> shift
+		limb_byte_b := sc_b[limb] >> shift
+
+		hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f
+		hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true)
+	}
+
+	ge_set(ge, &tmp)
+}
+
+@(private)
+_ge_scalarmult :: proc "contextless" (
+	ge, p: ^Group_Element,
+	sc: ^field.Non_Montgomery_Domain_Field_Element,
+	unsafe_is_vartime := false,
+) {
+	// Do the simplest possible thing that works and provides adequate,
+	// performance, which is windowed add-then-multiply.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	p_tbl: Multiply_Table = ---
+	mul_tbl_set(&p_tbl, p, &tmp_add)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+		limb_byte := sc[limb] >> shift
+
+		hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime)
+	}
+
+	ge_set(ge, &tmp)
+
+	if !unsafe_is_vartime {
+		ge_clear(&tmp)
+		mem.zero_explicit(&tmp_add, size_of(Add_Scratch))
+		mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element))
+		mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch))
+	}
+}
+
+@(private)
+Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit.
+
+@(private)
+mul_tbl_set :: proc "contextless" (
+	tbl: ^Multiply_Table,
+	ge: ^Group_Element,
+	tmp_add: ^Add_Scratch,
+) {
+	tmp: Group_Element = ---
+	ge_set(&tmp, ge)
+
+	ge_addend_set(&tbl[0], ge)
+	for i := 1; i < 15; i = i + 1 {
+		ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add)
+		ge_addend_set(&tbl[i], &tmp)
+	}
+
+	ge_clear(&tmp)
+}
+
+@(private)
+mul_tbl_add :: proc "contextless" (
+	ge: ^Group_Element,
+	tbl: ^Multiply_Table,
+	idx: u64,
+	tmp_add: ^Add_Scratch,
+	tmp_addend: ^Addend_Group_Element,
+	unsafe_is_vartime: bool,
+) {
+	// Variable time lookup, with the addition omitted entirely if idx == 0.
+	if unsafe_is_vartime {
+		// Skip adding the point at infinity.
+		if idx != 0 {
+			ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add)
+		}
+		return
+	}
+
+	// Constant time lookup.
+	tmp_addend^ = {
+		// Point at infinity (0, 1, 1, 0) in precomputed form
+		{1, 0, 0, 0, 0}, // y - x
+		{1, 0, 0, 0, 0}, // y + x
+		{0, 0, 0, 0, 0}, // t * 2d
+		{2, 0, 0, 0, 0}, // z * 2
+	}
+	for i := u64(1); i < 16; i = i + 1 {
+		_, ctrl := bits.sub_u64(0, (i ~ idx), 0)
+		ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1)
+	}
+	ge_add_addend(ge, ge, tmp_addend, tmp_add)
+}

+ 2 - 2
core/crypto/_fiat/fiat.odin

@@ -9,7 +9,7 @@ package fiat
 u1 :: distinct u8
 u1 :: distinct u8
 i1 :: distinct i8
 i1 :: distinct i8
 
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	x1 := (u64(arg1) * 0xffffffffffffffff)
 	x1 := (u64(arg1) * 0xffffffffffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
@@ -17,7 +17,7 @@ cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	return
 	return
 }
 }
 
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
 cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
 	x1 := (u32(arg1) * 0xffffffff)
 	x1 := (u32(arg1) * 0xffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
 	x2 := ((x1 & arg3) | ((~x1) & arg2))

+ 175 - 46
core/crypto/_fiat/field_curve25519/field.odin

@@ -3,14 +3,32 @@ package field_curve25519
 import "core:crypto"
 import "core:crypto"
 import "core:mem"
 import "core:mem"
 
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 	return transmute(^Loose_Field_Element)(arg1)
 }
 }
 
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 	return transmute(^Tight_Field_Element)(arg1)
 }
 }
 
 
+fe_clear :: proc "contextless" (
+	arg1: $T,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mem.zero_explicit(arg1, size_of(arg1^))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: $T,
+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
 fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
 fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
 	// Ignore the unused bit by copying the input and masking the bit off
 	// Ignore the unused bit by copying the input and masking the bit off
 	// prior to deserialization.
 	// prior to deserialization.
@@ -23,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 }
 }
 
 
+fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int {
+	tmp1: [32]byte = ---
+
+	fe_to_bytes(&tmp1, arg1)
+	ret := tmp1[0] & 1
+
+	mem.zero_explicit(&tmp1, size_of(tmp1))
+
+	return int(ret)
+}
+
 fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
 fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
-	tmp2: [32]byte = ---
+	tmp1, tmp2: [32]byte = ---, ---
 
 
+	fe_to_bytes(&tmp1, arg1)
 	fe_to_bytes(&tmp2, arg2)
 	fe_to_bytes(&tmp2, arg2)
-	ret := fe_equal_bytes(arg1, &tmp2)
+	ret := crypto.compare_constant_time(tmp1[:], tmp2[:])
 
 
+	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
 
 
 	return ret
 	return ret
@@ -46,7 +77,11 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt
 	return ret
 	return ret
 }
 }
 
 
-fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
+fe_carry_pow2k :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: uint,
+) {
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	if arg2 == 0 {
 	if arg2 == 0 {
 		fe_one(out1)
 		fe_one(out1)
@@ -54,27 +89,46 @@ fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element,
 	}
 	}
 
 
 	fe_carry_square(out1, arg1)
 	fe_carry_square(out1, arg1)
-	for _ in 1..<arg2 {
+	for _ in 1 ..< arg2 {
 		fe_carry_square(out1, fe_relax_cast(out1))
 		fe_carry_square(out1, fe_relax_cast(out1))
 	}
 	}
 }
 }
 
 
+fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_add(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
+fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_sub(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
 fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 	fe_opp(fe_relax_cast(out1), arg1)
 	fe_opp(fe_relax_cast(out1), arg1)
 	fe_carry(out1, fe_relax_cast(out1))
 	fe_carry(out1, fe_relax_cast(out1))
 }
 }
 
 
-fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
-	// Inverse square root taken from Monocypher.
+fe_carry_abs :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	fe_cond_negate(out1, arg1, fe_is_negative(arg1))
+}
 
 
-	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
+fe_carry_sqrt_ratio_m1 :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element, // u
+	arg2: ^Loose_Field_Element, // v
+) -> int {
+	// SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse
+	// square root from Monocypher.
+
+	w: Tight_Field_Element = ---
+	fe_carry_mul(&w, arg1, arg2) // u * v
 
 
-	// t0 = x^((p-5)/8)
-	// Can be achieved with a simple double & add ladder,
-	// but it would be slower.
-	fe_carry_pow2k(&tmp1, arg1, 1)
+	// r = tmp1 = u * w^((p-5)/8)
+	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
+	fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
+	fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
@@ -93,46 +147,121 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
-
-	// quartic = x^((p-1)/4)
-	quartic := &tmp2
-	fe_carry_square(quartic, fe_relax_cast(&tmp1))
-	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
-
-	// Serialize quartic once to save on repeated serialization/sanitization.
-	quartic_buf: [32]byte = ---
-	fe_to_bytes(&quartic_buf, quartic)
-	check := &tmp3
-
-	fe_one(check)
-	p1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, check)
-	m1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, &SQRT_M1)
-	ms := fe_equal_bytes(check, &quartic_buf)
-
-	// if quartic == -1 or sqrt(-1)
-	// then  isr = x^((p-1)/4) * sqrt(-1)
-	// else  isr = x^((p-1)/4)
-	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
-	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8)
 
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
-	mem.zero_explicit(&tmp2, size_of(tmp2))
-	mem.zero_explicit(&tmp3, size_of(tmp3))
-	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8)
+
+	// Serialize `check` once to save on repeated serialization.
+	r, check := &tmp1, &tmp2
+	b: [32]byte = ---
+	fe_carry_square(check, fe_relax_cast(r))
+	fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v
+	fe_to_bytes(&b, check)
+
+	u, neg_u, neg_u_i := &tmp3, &w, check
+	fe_carry(u, arg1)
+	fe_carry_opp(neg_u, u)
+	fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1))
+
+	correct_sign_sqrt := fe_equal_bytes(u, &b)
+	flipped_sign_sqrt := fe_equal_bytes(neg_u, &b)
+	flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b)
 
 
-	return p1 | m1
+	r_prime := check
+	fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1))
+	fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i)
+
+	// Pick the non-negative square root.
+	fe_carry_abs(out1, r)
+
+	fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3})
+	mem.zero_explicit(&b, size_of(b))
+
+	return correct_sign_sqrt | flipped_sign_sqrt
 }
 }
 
 
-fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	tmp1: Tight_Field_Element
 	tmp1: Tight_Field_Element
 
 
 	fe_carry_square(&tmp1, arg1)
 	fe_carry_square(&tmp1, arg1)
-	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
+	_ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1))
 	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
 	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
 
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
+	fe_clear(&tmp1)
+}
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 1
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	x5 := arg1[4]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	x = (out1[4] ~ out2[4]) & mask
+	x5, y5 := out1[4] ~ x, out2[4] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+	out1[4], out2[4] = x5, y5
+}
+
+@(optimization_mode = "none")
+fe_cond_select :: #force_no_inline proc "contextless" (
+	out1, arg1, arg2: $T,
+	arg3: int,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mask := (u64(arg3) * 0xffffffffffffffff)
+	x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
+	x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
+	x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
+	x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
+	x5 := ((mask & arg2[4]) | ((~mask) & arg1[4]))
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
+
+fe_cond_negate :: proc "contextless" (out1, arg1: ^Tight_Field_Element, ctrl: int) {
+	tmp1: Tight_Field_Element = ---
+	fe_carry_opp(&tmp1, arg1)
+	fe_cond_select(out1, arg1, &tmp1, ctrl)
+
+	fe_clear(&tmp1)
 }
 }

+ 29 - 61
core/crypto/_fiat/field_curve25519/field51.odin

@@ -30,8 +30,6 @@ package field_curve25519
 //
 //
 // While the base implementation is provably correct, this implementation
 // While the base implementation is provably correct, this implementation
 // makes no such claims as the port and optimizations were done by hand.
 // makes no such claims as the port and optimizations were done by hand.
-// At some point, it may be worth adding support to fiat-crypto for
-// generating Odin output.
 //
 //
 // TODO:
 // TODO:
 //  * When fiat-crypto supports it, using a saturated 64-bit limbs
 //  * When fiat-crypto supports it, using a saturated 64-bit limbs
@@ -44,7 +42,10 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [5]u64
 Loose_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 
 
-SQRT_M1 := Tight_Field_Element{
+FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
+FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
+
+FE_SQRT_M1 := Tight_Field_Element {
 	1718705420411056,
 	1718705420411056,
 	234908883556509,
 	234908883556509,
 	2233514472574048,
 	2233514472574048,
@@ -52,7 +53,13 @@ SQRT_M1 := Tight_Field_Element{
 	765476049583133,
 	765476049583133,
 }
 }
 
 
-_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffffff)
 	x2 := (x1 & 0x7ffffffffffff)
 	x3 := fiat.u1((x1 >> 51))
 	x3 := fiat.u1((x1 >> 51))
@@ -61,7 +68,13 @@ _addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 51))
 	x2 := fiat.i1((x1 >> 51))
 	x3 := (u64(x1) & 0x7ffffffffffff)
 	x3 := (u64(x1) & 0x7ffffffffffff)
@@ -70,7 +83,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
@@ -169,7 +182,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[4] = x152
 	out1[4] = x152
 }
 }
 
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[4] * 0x13)
 	x1 := (arg1[4] * 0x13)
 	x2 := (x1 * 0x2)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[4] * 0x2)
 	x3 := (arg1[4] * 0x2)
@@ -305,8 +318,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[4] = x5
 	out1[4] = x5
 }
 }
 
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: int,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
@@ -527,7 +543,10 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[4] = x5
 	out1[4] = x5
 }
 }
 
 
-fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_scmul_121666 :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) {
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
@@ -565,54 +584,3 @@ fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_El
 	out1[3] = x27
 	out1[3] = x27
 	out1[4] = x32
 	out1[4] = x32
 }
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 1
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	x4 := arg1[3]
-	x5 := arg1[4]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-	out1[3] = x4
-	out1[4] = x5
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
-	mask := -u64(arg1)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	x = (out1[3] ~ out2[3]) & mask
-	x4, y4 := out1[3] ~ x, out2[3] ~ x
-	x = (out1[4] ~ out2[4]) & mask
-	x5, y5 := out1[4] ~ x, out2[4] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-	out1[3], out2[3] = x4, y4
-	out1[4], out2[4] = x5, y5
-}

+ 47 - 4
core/crypto/_fiat/field_poly1305/field.odin

@@ -1,17 +1,26 @@
 package field_poly1305
 package field_poly1305
 
 
+import "base:intrinsics"
 import "core:encoding/endian"
 import "core:encoding/endian"
 import "core:mem"
 import "core:mem"
 
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 	return transmute(^Loose_Field_Element)(arg1)
 }
 }
 
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 	return transmute(^Tight_Field_Element)(arg1)
 }
 }
 
 
-fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) {
+fe_from_bytes :: #force_inline proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: []byte,
+	arg2: byte,
+) {
 	// fiat-crypto's deserialization routine effectively processes a
 	// fiat-crypto's deserialization routine effectively processes a
 	// single byte at a time, and wants 256-bits of input for a value
 	// single byte at a time, and wants 256-bits of input for a value
 	// that will be 128-bits or 129-bits.
 	// that will be 128-bits or 129-bits.
@@ -20,7 +29,9 @@ fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, a
 	// makes implementing the actual MAC block processing considerably
 	// makes implementing the actual MAC block processing considerably
 	// neater.
 	// neater.
 
 
-	assert(len(arg1) == 16)
+	if len(arg1) != 16 {
+		intrinsics.trap()
+	}
 
 
 	// While it may be unwise to do deserialization here on our
 	// While it may be unwise to do deserialization here on our
 	// own when fiat-crypto provides equivalent functionality,
 	// own when fiat-crypto provides equivalent functionality,
@@ -51,3 +62,35 @@ fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
 	// This routine is only used to deserialize `r` which is confidential.
 	// This routine is only used to deserialize `r` which is confidential.
 	mem.zero_explicit(&tmp, size_of(tmp))
 	mem.zero_explicit(&tmp, size_of(tmp))
 }
 }
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+}
+
+fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (
+	out1, out2: ^Tight_Field_Element,
+	arg1: bool,
+) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+}

+ 35 - 39
core/crypto/_fiat/field_poly1305/field4344.odin

@@ -39,7 +39,13 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [3]u64
 Loose_Field_Element :: distinct [3]u64
 Tight_Field_Element :: distinct [3]u64
 Tight_Field_Element :: distinct [3]u64
 
 
-_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0xfffffffffff)
 	x2 := (x1 & 0xfffffffffff)
 	x3 := fiat.u1((x1 >> 44))
 	x3 := fiat.u1((x1 >> 44))
@@ -48,7 +54,13 @@ _addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 44))
 	x2 := fiat.i1((x1 >> 44))
 	x3 := (u64(x1) & 0xfffffffffff)
 	x3 := (u64(x1) & 0xfffffffffff)
@@ -57,7 +69,13 @@ _subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffff)
 	x2 := (x1 & 0x7ffffffffff)
 	x3 := fiat.u1((x1 >> 43))
 	x3 := fiat.u1((x1 >> 43))
@@ -66,7 +84,13 @@ _addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 43))
 	x2 := fiat.i1((x1 >> 43))
 	x3 := (u64(x1) & 0x7ffffffffff)
 	x3 := (u64(x1) & 0x7ffffffffff)
@@ -75,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
@@ -120,7 +144,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[2] = x62
 	out1[2] = x62
 }
 }
 
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[2] * 0x5)
 	x1 := (arg1[2] * 0x5)
 	x2 := (x1 * 0x2)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[2] * 0x2)
 	x3 := (arg1[2] * 0x2)
@@ -201,8 +225,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[2] = x3
 	out1[2] = x3
 }
 }
 
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: bool,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
@@ -325,34 +352,3 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[1] = x2
 	out1[1] = x2
 	out1[2] = x3
 	out1[2] = x3
 }
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-}
-
-fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
-	mask := -u64(arg1)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-}

+ 153 - 0
core/crypto/_fiat/field_scalar25519/field.odin

@@ -0,0 +1,153 @@
+package field_scalar25519
+
+import "base:intrinsics"
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+@(private)
+_TWO_168 := Montgomery_Domain_Field_Element {
+	0x5b8ab432eac74798,
+	0x38afddd6de59d5d7,
+	0xa2c131b399411b7c,
+	0x6329a7ed9ce5a30,
+}
+@(private)
+_TWO_336 := Montgomery_Domain_Field_Element {
+	0xbd3d108e2b35ecc5,
+	0x5c3a3718bdf9c90b,
+	0x63aa97a331b4f2ee,
+	0x3d217f5be65cb5c,
+}
+
+fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
+	mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
+}
+
+fe_from_bytes :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+	unsafe_assume_canonical := false,
+) -> bool {
+	tmp := Non_Montgomery_Domain_Field_Element {
+		endian.unchecked_get_u64le(arg1[0:]),
+		endian.unchecked_get_u64le(arg1[8:]),
+		endian.unchecked_get_u64le(arg1[16:]),
+		endian.unchecked_get_u64le(arg1[24:]),
+	}
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	// Check that tmp is in the the range [0, ELL).
+	if !unsafe_assume_canonical {
+		_, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0)
+		_, borrow = bits.sub_u64(ELL[1], tmp[1], borrow)
+		_, borrow = bits.sub_u64(ELL[2], tmp[2], borrow)
+		_, borrow = bits.sub_u64(ELL[3], tmp[3], borrow)
+		if borrow != 0 {
+			return false
+		}
+	}
+
+	fe_to_montgomery(out1, &tmp)
+
+	return true
+}
+
+fe_from_bytes_rfc8032 :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+) {
+	tmp: [64]byte
+	copy(tmp[:], arg1[:])
+
+	// Apply "clamping" as in RFC 8032.
+	tmp[0] &= 248
+	tmp[31] &= 127
+	tmp[31] |= 64 // Sets the 254th bit, so the encoding is non-canonical.
+
+	fe_from_bytes_wide(out1, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_from_bytes_wide :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[64]byte,
+) {
+	tmp: Montgomery_Domain_Field_Element
+	// Use Frank Denis' trick, as documented by Filippo Valsorda
+	// at https://words.filippo.io/dispatches/wide-reduction/
+	//
+	// x = c * 2^336 + b * 2^168 + a  mod l
+	_fe_from_bytes_short(out1, arg1[:21]) // a
+
+	_fe_from_bytes_short(&tmp, arg1[21:42]) // b
+	fe_mul(&tmp, &tmp, &_TWO_168) // b * 2^168
+	fe_add(out1, out1, &tmp) // a + b * 2^168
+
+	_fe_from_bytes_short(&tmp, arg1[42:]) // c
+	fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336
+	fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336
+
+	fe_clear(&tmp)
+}
+
+@(private)
+_fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
+	// INVARIANT: len(arg1) < 32.
+	if len(arg1) >= 32 {
+		intrinsics.trap()
+	}
+	tmp: [32]byte
+	copy(tmp[:], arg1)
+
+	_ = fe_from_bytes(out1, &tmp, true)
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
+	if len(out1) != 32 {
+		intrinsics.trap()
+	}
+
+	tmp: Non_Montgomery_Domain_Field_Element
+	fe_from_montgomery(&tmp, arg1)
+
+	endian.unchecked_put_u64le(out1[0:], tmp[0])
+	endian.unchecked_put_u64le(out1[8:], tmp[1])
+	endian.unchecked_put_u64le(out1[16:], tmp[2])
+	endian.unchecked_put_u64le(out1[24:], tmp[3])
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Montgomery_Domain_Field_Element
+	fe_sub(&tmp, arg1, arg2)
+
+	// This will only underflow iff arg1 == arg2, and we return the borrow,
+	// which will be 1.
+	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
+
+	fe_clear(&tmp)
+
+	return int(borrow)
+}
+
+fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}

+ 535 - 0
core/crypto/_fiat/field_scalar25519/field64.odin

@@ -0,0 +1,535 @@
+// The BSD 1-Clause License (BSD-1-Clause)
+//
+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     1. Redistributions of source code must retain the above copyright
+//        notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package field_scalar25519
+
+// The file provides arithmetic on the field Z/(2^252+27742317777372353535851937790883648493)
+// using a 64-bit Montgomery form internal representation.  It is derived
+// primarily from the machine generated Golang output from the fiat-crypto
+// project.
+//
+// While the base implementation is provably correct, this implementation
+// makes no such claims as the port and optimizations were done by hand.
+
+import fiat "core:crypto/_fiat"
+import "core:math/bits"
+
+// ELL is the saturated representation of the field order, least-significant
+// limb first.
+ELL :: [4]u64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0x0, 0x1000000000000000}
+
+Montgomery_Domain_Field_Element :: distinct [4]u64
+Non_Montgomery_Domain_Field_Element :: distinct [4]u64
+
+fe_mul :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg2[3])
+	x8, x7 := bits.mul_u64(x4, arg2[2])
+	x10, x9 := bits.mul_u64(x4, arg2[1])
+	x12, x11 := bits.mul_u64(x4, arg2[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg2[3])
+	x44, x43 := bits.mul_u64(x1, arg2[2])
+	x46, x45 := bits.mul_u64(x1, arg2[1])
+	x48, x47 := bits.mul_u64(x1, arg2[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg2[3])
+	x91, x90 := bits.mul_u64(x2, arg2[2])
+	x93, x92 := bits.mul_u64(x2, arg2[1])
+	x95, x94 := bits.mul_u64(x2, arg2[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg2[3])
+	x138, x137 := bits.mul_u64(x3, arg2[2])
+	x140, x139 := bits.mul_u64(x3, arg2[1])
+	x142, x141 := bits.mul_u64(x3, arg2[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_square :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg1[3])
+	x8, x7 := bits.mul_u64(x4, arg1[2])
+	x10, x9 := bits.mul_u64(x4, arg1[1])
+	x12, x11 := bits.mul_u64(x4, arg1[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg1[3])
+	x44, x43 := bits.mul_u64(x1, arg1[2])
+	x46, x45 := bits.mul_u64(x1, arg1[1])
+	x48, x47 := bits.mul_u64(x1, arg1[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg1[3])
+	x91, x90 := bits.mul_u64(x2, arg1[2])
+	x93, x92 := bits.mul_u64(x2, arg1[1])
+	x95, x94 := bits.mul_u64(x2, arg1[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg1[3])
+	x138, x137 := bits.mul_u64(x3, arg1[2])
+	x140, x139 := bits.mul_u64(x3, arg1[1])
+	x142, x141 := bits.mul_u64(x3, arg1[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_add :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.add_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.add_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.add_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.add_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9, x10 := bits.sub_u64(x1, 0x5812631a5cf5d3ed, u64(0x0))
+	x11, x12 := bits.sub_u64(x3, 0x14def9dea2f79cd6, u64(fiat.u1(x10)))
+	x13, x14 := bits.sub_u64(x5, u64(0x0), u64(fiat.u1(x12)))
+	x15, x16 := bits.sub_u64(x7, 0x1000000000000000, u64(fiat.u1(x14)))
+	_, x18 := bits.sub_u64(u64(fiat.u1(x8)), u64(0x0), u64(fiat.u1(x16)))
+	x19 := fiat.cmovznz_u64(fiat.u1(x18), x9, x1)
+	x20 := fiat.cmovznz_u64(fiat.u1(x18), x11, x3)
+	x21 := fiat.cmovznz_u64(fiat.u1(x18), x13, x5)
+	x22 := fiat.cmovznz_u64(fiat.u1(x18), x15, x7)
+	out1[0] = x19
+	out1[1] = x20
+	out1[2] = x21
+	out1[3] = x22
+}
+
+fe_sub :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.sub_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_opp :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(u64(0x0), arg1[0], u64(0x0))
+	x3, x4 := bits.sub_u64(u64(0x0), arg1[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(u64(0x0), arg1[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(u64(0x0), arg1[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_one :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0xd6ec31748d98951d
+	out1[1] = 0xc6ef5bf4737dcf70
+	out1[2] = 0xfffffffffffffffe
+	out1[3] = 0xfffffffffffffff
+}
+
+fe_non_zero :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> u64 {
+	return arg1[0] | (arg1[1] | (arg1[2] | arg1[3]))
+}
+
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Montgomery_Domain_Field_Element,
+	arg2: int,
+) {
+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+fe_from_montgomery :: proc "contextless" (
+	out1: ^Non_Montgomery_Domain_Field_Element,
+	arg1: ^Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[0]
+	_, x2 := bits.mul_u64(x1, 0xd2b51da312547e1b)
+	x5, x4 := bits.mul_u64(x2, 0x1000000000000000)
+	x7, x6 := bits.mul_u64(x2, 0x14def9dea2f79cd6)
+	x9, x8 := bits.mul_u64(x2, 0x5812631a5cf5d3ed)
+	x10, x11 := bits.add_u64(x9, x6, u64(0x0))
+	_, x13 := bits.add_u64(x1, x8, u64(0x0))
+	x14, x15 := bits.add_u64(u64(0x0), x10, u64(fiat.u1(x13)))
+	x16, x17 := bits.add_u64(x14, arg1[1], u64(0x0))
+	_, x18 := bits.mul_u64(x16, 0xd2b51da312547e1b)
+	x21, x20 := bits.mul_u64(x18, 0x1000000000000000)
+	x23, x22 := bits.mul_u64(x18, 0x14def9dea2f79cd6)
+	x25, x24 := bits.mul_u64(x18, 0x5812631a5cf5d3ed)
+	x26, x27 := bits.add_u64(x25, x22, u64(0x0))
+	_, x29 := bits.add_u64(x16, x24, u64(0x0))
+	x30, x31 := bits.add_u64(
+		(u64(fiat.u1(x17)) + (u64(fiat.u1(x15)) + (u64(fiat.u1(x11)) + x7))),
+		x26,
+		u64(fiat.u1(x29)),
+	)
+	x32, x33 := bits.add_u64(x4, (u64(fiat.u1(x27)) + x23), u64(fiat.u1(x31)))
+	x34, x35 := bits.add_u64(x5, x20, u64(fiat.u1(x33)))
+	x36, x37 := bits.add_u64(x30, arg1[2], u64(0x0))
+	x38, x39 := bits.add_u64(x32, u64(0x0), u64(fiat.u1(x37)))
+	x40, x41 := bits.add_u64(x34, u64(0x0), u64(fiat.u1(x39)))
+	_, x42 := bits.mul_u64(x36, 0xd2b51da312547e1b)
+	x45, x44 := bits.mul_u64(x42, 0x1000000000000000)
+	x47, x46 := bits.mul_u64(x42, 0x14def9dea2f79cd6)
+	x49, x48 := bits.mul_u64(x42, 0x5812631a5cf5d3ed)
+	x50, x51 := bits.add_u64(x49, x46, u64(0x0))
+	_, x53 := bits.add_u64(x36, x48, u64(0x0))
+	x54, x55 := bits.add_u64(x38, x50, u64(fiat.u1(x53)))
+	x56, x57 := bits.add_u64(x40, (u64(fiat.u1(x51)) + x47), u64(fiat.u1(x55)))
+	x58, x59 := bits.add_u64(
+		(u64(fiat.u1(x41)) + (u64(fiat.u1(x35)) + x21)),
+		x44,
+		u64(fiat.u1(x57)),
+	)
+	x60, x61 := bits.add_u64(x54, arg1[3], u64(0x0))
+	x62, x63 := bits.add_u64(x56, u64(0x0), u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(x58, u64(0x0), u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x60, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	_, x77 := bits.add_u64(x60, x72, u64(0x0))
+	x78, x79 := bits.add_u64(x62, x74, u64(fiat.u1(x77)))
+	x80, x81 := bits.add_u64(x64, (u64(fiat.u1(x75)) + x71), u64(fiat.u1(x79)))
+	x82, x83 := bits.add_u64(
+		(u64(fiat.u1(x65)) + (u64(fiat.u1(x59)) + x45)),
+		x68,
+		u64(fiat.u1(x81)),
+	)
+	x84 := (u64(fiat.u1(x83)) + x69)
+	x85, x86 := bits.sub_u64(x78, 0x5812631a5cf5d3ed, u64(0x0))
+	x87, x88 := bits.sub_u64(x80, 0x14def9dea2f79cd6, u64(fiat.u1(x86)))
+	x89, x90 := bits.sub_u64(x82, u64(0x0), u64(fiat.u1(x88)))
+	x91, x92 := bits.sub_u64(x84, 0x1000000000000000, u64(fiat.u1(x90)))
+	_, x94 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x92)))
+	x95 := fiat.cmovznz_u64(fiat.u1(x94), x85, x78)
+	x96 := fiat.cmovznz_u64(fiat.u1(x94), x87, x80)
+	x97 := fiat.cmovznz_u64(fiat.u1(x94), x89, x82)
+	x98 := fiat.cmovznz_u64(fiat.u1(x94), x91, x84)
+	out1[0] = x95
+	out1[1] = x96
+	out1[2] = x97
+	out1[3] = x98
+}
+
+fe_to_montgomery :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^Non_Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, 0x399411b7c309a3d)
+	x8, x7 := bits.mul_u64(x4, 0xceec73d217f5be65)
+	x10, x9 := bits.mul_u64(x4, 0xd00e1ba768859347)
+	x12, x11 := bits.mul_u64(x4, 0xa40611e3449c0f01)
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	_, x19 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x22, x21 := bits.mul_u64(x19, 0x1000000000000000)
+	x24, x23 := bits.mul_u64(x19, 0x14def9dea2f79cd6)
+	x26, x25 := bits.mul_u64(x19, 0x5812631a5cf5d3ed)
+	x27, x28 := bits.add_u64(x26, x23, u64(0x0))
+	_, x30 := bits.add_u64(x11, x25, u64(0x0))
+	x31, x32 := bits.add_u64(x13, x27, u64(fiat.u1(x30)))
+	x33, x34 := bits.add_u64(x15, (u64(fiat.u1(x28)) + x24), u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x17, x21, u64(fiat.u1(x34)))
+	x38, x37 := bits.mul_u64(x1, 0x399411b7c309a3d)
+	x40, x39 := bits.mul_u64(x1, 0xceec73d217f5be65)
+	x42, x41 := bits.mul_u64(x1, 0xd00e1ba768859347)
+	x44, x43 := bits.mul_u64(x1, 0xa40611e3449c0f01)
+	x45, x46 := bits.add_u64(x44, x41, u64(0x0))
+	x47, x48 := bits.add_u64(x42, x39, u64(fiat.u1(x46)))
+	x49, x50 := bits.add_u64(x40, x37, u64(fiat.u1(x48)))
+	x51, x52 := bits.add_u64(x31, x43, u64(0x0))
+	x53, x54 := bits.add_u64(x33, x45, u64(fiat.u1(x52)))
+	x55, x56 := bits.add_u64(x35, x47, u64(fiat.u1(x54)))
+	x57, x58 := bits.add_u64(
+		((u64(fiat.u1(x36)) + (u64(fiat.u1(x18)) + x6)) + x22),
+		x49,
+		u64(fiat.u1(x56)),
+	)
+	_, x59 := bits.mul_u64(x51, 0xd2b51da312547e1b)
+	x62, x61 := bits.mul_u64(x59, 0x1000000000000000)
+	x64, x63 := bits.mul_u64(x59, 0x14def9dea2f79cd6)
+	x66, x65 := bits.mul_u64(x59, 0x5812631a5cf5d3ed)
+	x67, x68 := bits.add_u64(x66, x63, u64(0x0))
+	_, x70 := bits.add_u64(x51, x65, u64(0x0))
+	x71, x72 := bits.add_u64(x53, x67, u64(fiat.u1(x70)))
+	x73, x74 := bits.add_u64(x55, (u64(fiat.u1(x68)) + x64), u64(fiat.u1(x72)))
+	x75, x76 := bits.add_u64(x57, x61, u64(fiat.u1(x74)))
+	x78, x77 := bits.mul_u64(x2, 0x399411b7c309a3d)
+	x80, x79 := bits.mul_u64(x2, 0xceec73d217f5be65)
+	x82, x81 := bits.mul_u64(x2, 0xd00e1ba768859347)
+	x84, x83 := bits.mul_u64(x2, 0xa40611e3449c0f01)
+	x85, x86 := bits.add_u64(x84, x81, u64(0x0))
+	x87, x88 := bits.add_u64(x82, x79, u64(fiat.u1(x86)))
+	x89, x90 := bits.add_u64(x80, x77, u64(fiat.u1(x88)))
+	x91, x92 := bits.add_u64(x71, x83, u64(0x0))
+	x93, x94 := bits.add_u64(x73, x85, u64(fiat.u1(x92)))
+	x95, x96 := bits.add_u64(x75, x87, u64(fiat.u1(x94)))
+	x97, x98 := bits.add_u64(
+		((u64(fiat.u1(x76)) + (u64(fiat.u1(x58)) + (u64(fiat.u1(x50)) + x38))) + x62),
+		x89,
+		u64(fiat.u1(x96)),
+	)
+	_, x99 := bits.mul_u64(x91, 0xd2b51da312547e1b)
+	x102, x101 := bits.mul_u64(x99, 0x1000000000000000)
+	x104, x103 := bits.mul_u64(x99, 0x14def9dea2f79cd6)
+	x106, x105 := bits.mul_u64(x99, 0x5812631a5cf5d3ed)
+	x107, x108 := bits.add_u64(x106, x103, u64(0x0))
+	_, x110 := bits.add_u64(x91, x105, u64(0x0))
+	x111, x112 := bits.add_u64(x93, x107, u64(fiat.u1(x110)))
+	x113, x114 := bits.add_u64(x95, (u64(fiat.u1(x108)) + x104), u64(fiat.u1(x112)))
+	x115, x116 := bits.add_u64(x97, x101, u64(fiat.u1(x114)))
+	x118, x117 := bits.mul_u64(x3, 0x399411b7c309a3d)
+	x120, x119 := bits.mul_u64(x3, 0xceec73d217f5be65)
+	x122, x121 := bits.mul_u64(x3, 0xd00e1ba768859347)
+	x124, x123 := bits.mul_u64(x3, 0xa40611e3449c0f01)
+	x125, x126 := bits.add_u64(x124, x121, u64(0x0))
+	x127, x128 := bits.add_u64(x122, x119, u64(fiat.u1(x126)))
+	x129, x130 := bits.add_u64(x120, x117, u64(fiat.u1(x128)))
+	x131, x132 := bits.add_u64(x111, x123, u64(0x0))
+	x133, x134 := bits.add_u64(x113, x125, u64(fiat.u1(x132)))
+	x135, x136 := bits.add_u64(x115, x127, u64(fiat.u1(x134)))
+	x137, x138 := bits.add_u64(
+		((u64(fiat.u1(x116)) + (u64(fiat.u1(x98)) + (u64(fiat.u1(x90)) + x78))) + x102),
+		x129,
+		u64(fiat.u1(x136)),
+	)
+	_, x139 := bits.mul_u64(x131, 0xd2b51da312547e1b)
+	x142, x141 := bits.mul_u64(x139, 0x1000000000000000)
+	x144, x143 := bits.mul_u64(x139, 0x14def9dea2f79cd6)
+	x146, x145 := bits.mul_u64(x139, 0x5812631a5cf5d3ed)
+	x147, x148 := bits.add_u64(x146, x143, u64(0x0))
+	_, x150 := bits.add_u64(x131, x145, u64(0x0))
+	x151, x152 := bits.add_u64(x133, x147, u64(fiat.u1(x150)))
+	x153, x154 := bits.add_u64(x135, (u64(fiat.u1(x148)) + x144), u64(fiat.u1(x152)))
+	x155, x156 := bits.add_u64(x137, x141, u64(fiat.u1(x154)))
+	x157 := ((u64(fiat.u1(x156)) + (u64(fiat.u1(x138)) + (u64(fiat.u1(x130)) + x118))) + x142)
+	x158, x159 := bits.sub_u64(x151, 0x5812631a5cf5d3ed, u64(0x0))
+	x160, x161 := bits.sub_u64(x153, 0x14def9dea2f79cd6, u64(fiat.u1(x159)))
+	x162, x163 := bits.sub_u64(x155, u64(0x0), u64(fiat.u1(x161)))
+	x164, x165 := bits.sub_u64(x157, 0x1000000000000000, u64(fiat.u1(x163)))
+	_, x167 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x165)))
+	x168 := fiat.cmovznz_u64(fiat.u1(x167), x158, x151)
+	x169 := fiat.cmovznz_u64(fiat.u1(x167), x160, x153)
+	x170 := fiat.cmovznz_u64(fiat.u1(x167), x162, x155)
+	x171 := fiat.cmovznz_u64(fiat.u1(x167), x164, x157)
+	out1[0] = x168
+	out1[1] = x169
+	out1[2] = x170
+	out1[3] = x171
+}

+ 10 - 0
core/crypto/crypto.odin

@@ -1,3 +1,7 @@
+/*
+package crypto implements a selection of cryptography algorithms and useful
+helper routines.
+*/
 package crypto
 package crypto
 
 
 import "core:mem"
 import "core:mem"
@@ -51,3 +55,9 @@ rand_bytes :: proc (dst: []byte) {
 
 
 	_rand_bytes(dst)
 	_rand_bytes(dst)
 }
 }
+
+// has_rand_bytes returns true iff the target has support for accessing the
+// system entropty source.
+has_rand_bytes :: proc () -> bool {
+	return _has_rand_bytes()
+}

+ 314 - 0
core/crypto/ed25519/ed25519.odin

@@ -0,0 +1,314 @@
+/*
+package ed25519 implements the Ed25519 EdDSA signature algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf
+- https://eprint.iacr.org/2020/1244.pdf
+*/
+package ed25519
+
+import "core:crypto"
+import grp "core:crypto/_edwards25519"
+import "core:crypto/sha2"
+import "core:mem"
+
+// PRIVATE_KEY_SIZE is the byte-encoded private key size.
+PRIVATE_KEY_SIZE :: 32
+// PUBLIC_KEY_SIZE is the byte-encoded public key size.
+PUBLIC_KEY_SIZE :: 32
+// SIGNATURE_SIZE is the byte-encoded signature size.
+SIGNATURE_SIZE :: 64
+
+@(private)
+NONCE_SIZE :: 32
+
+// Private_Key is an Ed25519 private key.
+Private_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Private_Key structure is intended to be opaque).  There are
+	// subtle vulnerabilities that can be introduced if the internal
+	// values are allowed to be altered.
+	//
+	// See: https://github.com/MystenLabs/ed25519-unsafe-libs
+	_b:              [PRIVATE_KEY_SIZE]byte,
+	_s:              grp.Scalar,
+	_nonce:          [NONCE_SIZE]byte,
+	_pub_key:        Public_Key,
+	_is_initialized: bool,
+}
+
+// Public_Key is an Ed25519 public key.
+Public_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Public_Key structure is intended to be opaque).
+	_b:              [PUBLIC_KEY_SIZE]byte,
+	_neg_A:          grp.Group_Element,
+	_is_valid:       bool,
+	_is_initialized: bool,
+}
+
+// private_key_set_bytes decodes a byte-encoded private key, and returns
+// true iff the operation was successful.
+private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
+	if len(b) != PRIVATE_KEY_SIZE {
+		return false
+	}
+
+	// Derive the private key.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, b)
+	sha2.final(&ctx, h_bytes[:])
+
+	copy(priv_key._b[:], b)
+	copy(priv_key._nonce[:], h_bytes[32:])
+	grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
+
+	// Derive the corresponding public key.
+	A: grp.Group_Element = ---
+	grp.ge_scalarmult_basepoint(&A, &priv_key._s)
+	grp.ge_bytes(&A, priv_key._pub_key._b[:])
+	grp.ge_negate(&priv_key._pub_key._neg_A, &A)
+	priv_key._pub_key._is_valid = !grp.ge_is_small_order(&A)
+	priv_key._pub_key._is_initialized = true
+
+	priv_key._is_initialized = true
+
+	return true
+}
+
+// private_key_bytes sets dst to byte-encoding of priv_key.
+private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(dst) != PRIVATE_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, priv_key._b[:])
+}
+
+// private_key_clear clears priv_key to the uninitialized state.
+private_key_clear :: proc "contextless" (priv_key: ^Private_Key) {
+	mem.zero_explicit(priv_key, size_of(Private_Key))
+}
+
+// sign writes the signature by priv_key over msg to sig.
+sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(sig) != SIGNATURE_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	// 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1)
+	// using SHA-512 for Ed25519.  H(d) may be precomputed.
+	//
+	// 2. Using the second half of the digest hdigest2 = hb || ... || h2b-1,
+	// define:
+	//
+	// 2.1 For Ed25519, r = SHA-512(hdigest2 || M); Interpret r as a
+	// 64-octet little-endian integer.
+	ctx: sha2.Context_512 = ---
+	digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, priv_key._nonce[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	r: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&r, &digest_bytes)
+
+	// 3. Compute the point [r]G. The octet string R is the encoding of
+	// the point [r]G.
+	R: grp.Group_Element = ---
+	R_bytes := sig[:32]
+	grp.ge_scalarmult_basepoint(&R, &r)
+	grp.ge_bytes(&R, R_bytes)
+
+	// 4. Derive s from H(d) as in the key pair generation algorithm.
+	// Use octet strings R, Q, and M to define:
+	//
+	// 4.1 For Ed25519, digest = SHA-512(R || Q || M).
+	// Interpret digest as a little-endian integer.
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, R_bytes)
+	sha2.update(&ctx, priv_key._pub_key._b[:]) // Q in NIST terminology.
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	sc: grp.Scalar = --- // `digest` in NIST terminology.
+	grp.sc_set_bytes_wide(&sc, &digest_bytes)
+
+	// 5. Compute S = (r + digest × s) mod n. The octet string S is the
+	// encoding of the resultant integer.
+	grp.sc_mul(&sc, &sc, &priv_key._s)
+	grp.sc_add(&sc, &sc, &r)
+
+	// 6. Form the signature as the concatenation of the octet strings
+	// R and S.
+	grp.sc_bytes(sig[32:], &sc)
+
+	grp.sc_clear(&r)
+}
+
+// public_key_set_bytes decodes a byte-encoded public key, and returns
+// true iff the operation was successful.
+public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) -> bool {
+	if len(b) != PUBLIC_KEY_SIZE {
+		return false
+	}
+
+	A: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&A, b) {
+		return false
+	}
+
+	copy(pub_key._b[:], b)
+	grp.ge_negate(&pub_key._neg_A, &A)
+	pub_key._is_valid = !grp.ge_is_small_order(&A)
+	pub_key._is_initialized = true
+
+	return true
+}
+
+// public_key_set_priv sets pub_key to the public component of priv_key.
+public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	src := &priv_key._pub_key
+	copy(pub_key._b[:], src._b[:])
+	grp.ge_set(&pub_key._neg_A, &src._neg_A)
+	pub_key._is_valid = src._is_valid
+	pub_key._is_initialized = src._is_initialized
+}
+
+// public_key_bytes sets dst to byte-encoding of pub_key.
+public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) {
+	if !pub_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+	if len(dst) != PUBLIC_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, pub_key._b[:])
+}
+
+// public_key_equal returns true iff pub_key is equal to other.
+public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool {
+	if !pub_key._is_initialized || !other._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1
+}
+
+// verify returns true iff sig is a valid signature by pub_key over msg.
+//
+// The optional `allow_small_order_A` parameter will make this
+// implementation strictly compatible with FIPS 186-5, at the expense of
+// SBS-security.  Doing so is NOT recommended, and the disallowed
+// public keys all have a known discrete-log.
+verify :: proc(pub_key: ^Public_Key, msg, sig: []byte, allow_small_order_A := false) -> bool {
+	switch {
+	case !pub_key._is_initialized:
+		return false
+	case len(sig) != SIGNATURE_SIZE:
+		return false
+	}
+
+	// TLDR: Just use ristretto255.
+	//
+	// While there are two "standards" for EdDSA, existing implementations
+	// diverge (sometimes dramatically).  This implementation opts for
+	// "Algorithm 2" from "Taming the Many EdDSAs", which provides the
+	// strongest notion of security (SUF-CMA + SBS).
+	//
+	// The relevant properties are:
+	// - Reject non-canonical S.
+	// - Reject non-canonical A/R.
+	// - Reject small-order A (Extra non-standard check).
+	// - Cofactored verification equation.
+	//
+	// There are 19 possible non-canonical group element encodings of
+	// which:
+	// - 2 are small order
+	// - 10 are mixed order
+	// - 7 are not on the curve
+	//
+	// While historical implementations have been lax about enforcing
+	// that A/R are canonically encoded, that behavior is mandated by
+	// both the RFC and FIPS specification.  No valid key generation
+	// or sign implementation will ever produce non-canonically encoded
+	// public keys or signatures.
+	//
+	// There are 8 small-order group elements, 1 which is in the
+	// prime-order sub-group, and thus the probability that a properly
+	// generated A is small-order is cryptographically insignificant.
+	//
+	// While both the RFC and FIPS standard allow for either the
+	// cofactored or non-cofactored equation.  It is possible to
+	// artificially produce signatures that are valid for the former
+	// but not the latter.  This will NEVER occur with a valid sign
+	// implementation.  The choice of the latter is to be compatible
+	// with ABGLSV-Pornin, batch verification, and FROST (among other
+	// things).
+
+	s_bytes, r_bytes := sig[32:], sig[:32]
+
+	// 1. Reject the signature if S is not in the range [0, L).
+	s: grp.Scalar = ---
+	if !grp.sc_set_bytes(&s, s_bytes) {
+		return false
+	}
+
+	// 2. Reject the signature if the public key A is one of 8 small
+	// order points.
+	//
+	// As this check is optional and not part of the standard, we allow
+	// the caller to bypass it if desired.  Disabling the check makes
+	// the scheme NOT SBS-secure.
+	if !pub_key._is_valid && !allow_small_order_A {
+		return false
+	}
+
+	// 3. Reject the signature if A or R are non-canonical.
+	//
+	// Note: All initialized public keys are guaranteed to be canonical.
+	neg_R: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&neg_R, r_bytes) {
+		return false
+	}
+	grp.ge_negate(&neg_R, &neg_R)
+
+	// 4. Compute the hash SHA512(R||A||M) and reduce it mod L to get a
+	// scalar h.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, r_bytes)
+	sha2.update(&ctx, pub_key._b[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, h_bytes[:])
+
+	h: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&h, &h_bytes)
+
+	// 5. Accept if 8(s * G) - 8R - 8(h * A) = 0
+	//
+	// > first compute V = SB − R − hA and then accept if V is one of
+	// > 8 small order points (or alternatively compute 8V with 3
+	// > doublings and check against the neutral element)
+	V: grp.Group_Element = ---
+	grp.ge_double_scalarmult_basepoint_vartime(&V, &h, &pub_key._neg_A, &s)
+	grp.ge_add(&V, &V, &neg_R)
+
+	return grp.ge_is_small_order(&V)
+}

+ 1 - 1
core/crypto/poly1305/poly1305.odin

@@ -168,7 +168,7 @@ reset :: proc(ctx: ^Context) {
 }
 }
 
 
 @(private)
 @(private)
-_blocks :: proc(ctx: ^Context, msg: []byte, final := false) {
+_blocks :: proc "contextless" (ctx: ^Context, msg: []byte, final := false) {
 	n: field.Tight_Field_Element = ---
 	n: field.Tight_Field_Element = ---
 	final_byte := byte(!final)
 	final_byte := byte(!final)
 
 

+ 4 - 0
core/crypto/rand_bsd.odin

@@ -10,3 +10,7 @@ foreign libc {
 _rand_bytes :: proc(dst: []byte) {
 _rand_bytes :: proc(dst: []byte) {
 	arc4random_buf(raw_data(dst), len(dst))
 	arc4random_buf(raw_data(dst), len(dst))
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}

+ 4 - 0
core/crypto/rand_darwin.odin

@@ -10,3 +10,7 @@ _rand_bytes :: proc(dst: []byte) {
 		panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", res, msg))
 		panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", res, msg))
 	}
 	}
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}

+ 4 - 0
core/crypto/rand_generic.odin

@@ -9,3 +9,7 @@ package crypto
 _rand_bytes :: proc(dst: []byte) {
 _rand_bytes :: proc(dst: []byte) {
 	unimplemented("crypto: rand_bytes not supported on this OS")
 	unimplemented("crypto: rand_bytes not supported on this OS")
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return false
+}

+ 4 - 0
core/crypto/rand_js.odin

@@ -18,3 +18,7 @@ _rand_bytes :: proc(dst: []byte) {
 		dst = dst[to_read:]
 		dst = dst[to_read:]
 	}
 	}
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}

+ 4 - 0
core/crypto/rand_linux.odin

@@ -34,3 +34,7 @@ _rand_bytes :: proc (dst: []byte) {
 		dst = dst[n_read:]
 		dst = dst[n_read:]
 	}
 	}
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}

+ 4 - 0
core/crypto/rand_windows.odin

@@ -21,3 +21,7 @@ _rand_bytes :: proc(dst: []byte) {
 		}
 		}
 	}
 	}
 }
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}

+ 510 - 0
core/crypto/ristretto255/ristretto255.odin

@@ -0,0 +1,510 @@
+/*
+package ristretto255 implement the ristretto255 prime-order group.
+
+See:
+- https://www.rfc-editor.org/rfc/rfc9496
+*/
+package ristretto255
+
+import grp "core:crypto/_edwards25519"
+import field "core:crypto/_fiat/field_curve25519"
+import "core:mem"
+
+// ELEMENT_SIZE is the size of a byte-encoded ristretto255 group element.
+ELEMENT_SIZE :: 32
+// WIDE_ELEMENT_SIZE is the side of a wide byte-encoded ristretto255
+// group element.
+WIDE_ELEMENT_SIZE :: 64
+
+@(private)
+FE_NEG_ONE := field.Tight_Field_Element {
+	2251799813685228,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+}
+@(private)
+FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
+	278908739862762,
+	821645201101625,
+	8113234426968,
+	1777959178193151,
+	2118520810568447,
+}
+@(private)
+FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
+	1136626929484150,
+	1998550399581263,
+	496427632559748,
+	118527312129759,
+	45110755273534,
+}
+@(private)
+FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
+	1507062230895904,
+	1572317787530805,
+	683053064812840,
+	317374165784489,
+	1572899562415810,
+}
+@(private)
+FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element {
+	2241493124984347,
+	425987919032274,
+	2207028919301688,
+	1220490630685848,
+	974799131293748,
+}
+@(private)
+GE_IDENTITY := Group_Element{grp.GE_IDENTITY, true}
+
+// Group_Element is a ristretto255 group element.  The zero-initialized
+// value is invalid.
+Group_Element :: struct {
+	// WARNING: While the internal representation is an Edwards25519
+	// group element, this is not guaranteed to always be the case,
+	// and your code *WILL* break if you mess with `_p`.
+	_p:              grp.Group_Element,
+	_is_initialized: bool,
+}
+
+// ge_clear clears ge to the uninitialized state.
+ge_clear :: proc "contextless" (ge: ^Group_Element) {
+	mem.zero_explicit(ge, size_of(Group_Element))
+}
+
+// ge_set sets `ge = a`.
+ge_set :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_set(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_identity sets ge to the identity (neutral) element.
+ge_identity :: proc "contextless" (ge: ^Group_Element) {
+	grp.ge_identity(&ge._p)
+	ge._is_initialized = true
+}
+
+// ge_generator sets ge to the group generator.
+ge_generator :: proc "contextless" (ge: ^Group_Element) {
+	grp.ge_generator(&ge._p)
+	ge._is_initialized = true
+}
+
+// ge_set_bytes sets ge to the result of decoding b as a ristretto255
+// group element, and returns true on success.
+@(require_results)
+ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
+	// 1.  Interpret the string as an unsigned integer s in little-endian
+	//     representation.  If the length of the string is not 32 bytes or
+	//     if the resulting value is >= p, decoding fails.
+	//
+	// 2.  If IS_NEGATIVE(s) returns TRUE, decoding fails.
+
+	if len(b) != ELEMENT_SIZE {
+		return false
+	}
+	if b[31] & 128 != 0 || b[0] & 1 != 0 {
+		// Fail early if b is clearly > p, or negative.
+		return false
+	}
+
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	s: field.Tight_Field_Element = ---
+	defer field.fe_clear(&s)
+
+	field.fe_from_bytes(&s, b_)
+	if field.fe_equal_bytes(&s, b_) != 1 {
+		// Reject non-canonical encodings of s.
+		return false
+	}
+
+	// 3.  Process s as follows:
+	v, u1, u2: field.Loose_Field_Element = ---, ---, ---
+	tmp, u2_sqr: field.Tight_Field_Element = ---, ---
+
+	// ss = s^2
+	// u1 = 1 - ss
+	// u2 = 1 + ss
+	// u2_sqr = u2^2
+	field.fe_carry_square(&tmp, field.fe_relax_cast(&s))
+	field.fe_sub(&u1, &field.FE_ONE, &tmp)
+	field.fe_add(&u2, &field.FE_ONE, &tmp)
+	field.fe_carry_square(&u2_sqr, &u2)
+
+	// v = -(D * u1^2) - u2_sqr
+	field.fe_carry_square(&tmp, &u1)
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&grp.FE_D), field.fe_relax_cast(&tmp))
+	field.fe_carry_add(&tmp, &tmp, &u2_sqr)
+	field.fe_opp(&v, &tmp)
+
+	// (was_square, invsqrt) = SQRT_RATIO_M1(1, v * u2_sqr)
+	field.fe_carry_mul(&tmp, &v, field.fe_relax_cast(&u2_sqr))
+	was_square := field.fe_carry_sqrt_ratio_m1(
+		&tmp,
+		field.fe_relax_cast(&field.FE_ONE),
+		field.fe_relax_cast(&tmp),
+	)
+
+	// den_x = invsqrt * u2
+	// den_y = invsqrt * den_x * v
+	x, y, t: field.Tight_Field_Element = ---, ---, ---
+	field.fe_carry_mul(&x, field.fe_relax_cast(&tmp), &u2)
+	field.fe_carry_mul(&y, field.fe_relax_cast(&tmp), field.fe_relax_cast(&x))
+	field.fe_carry_mul(&y, field.fe_relax_cast(&y), &v)
+
+	// x = CT_ABS(2 * s * den_x)
+	field.fe_carry_mul(&x, field.fe_relax_cast(&s), field.fe_relax_cast(&x))
+	field.fe_carry_add(&x, &x, &x)
+	field.fe_carry_abs(&x, &x)
+
+	// y = u1 * den_y
+	field.fe_carry_mul(&y, &u1, field.fe_relax_cast(&y))
+
+	// t = x * y
+	field.fe_carry_mul(&t, field.fe_relax_cast(&x), field.fe_relax_cast(&y))
+
+	field.fe_clear_vec([]^field.Loose_Field_Element{&v, &u1, &u2})
+	field.fe_clear_vec([]^field.Tight_Field_Element{&tmp, &u2_sqr})
+	defer field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &t})
+
+	// 4.  If was_square is FALSE, IS_NEGATIVE(t) returns TRUE, or y = 0,
+	// decoding fails.  Otherwise, return the group element represented
+	// by the internal representation (x, y, 1, t) as the result of
+	// decoding.
+
+	switch {
+	case was_square == 0:
+		// Not sure why the RFC doesn't have this just fail early.
+		return false
+	case field.fe_is_negative(&t) != 0:
+		return false
+	case field.fe_equal(&y, &field.FE_ZERO) != 0:
+		return false
+	}
+
+	field.fe_set(&ge._p.x, &x)
+	field.fe_set(&ge._p.y, &y)
+	field.fe_one(&ge._p.z)
+	field.fe_set(&ge._p.t, &t)
+	ge._is_initialized = true
+
+	return true
+}
+
+// ge_set_wide_bytes sets ge to the result of deriving a ristretto255
+// group element, from a wide (512-bit) byte string.
+ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
+	if len(b) != WIDE_ELEMENT_SIZE {
+		panic("crypto/ristretto255: invalid wide input size")
+	}
+
+	// The element derivation function on an input string b proceeds as
+	// follows:
+	//
+	// 1.  Compute P1 as MAP(b[0:32]).
+	// 2.  Compute P2 as MAP(b[32:64]).
+	// 3.  Return P1 + P2.
+
+	p1, p2: Group_Element = ---, ---
+	ge_map(&p1, b[0:32])
+	ge_map(&p2, b[32:64])
+
+	ge_add(ge, &p1, &p2)
+
+	ge_clear(&p1)
+	ge_clear(&p2)
+}
+
+// ge_bytes sets dst to the canonical encoding of ge.
+ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
+	_ge_assert_initialized([]^Group_Element{ge})
+	if len(dst) != ELEMENT_SIZE {
+		panic("crypto/ristretto255: invalid destination size")
+	}
+
+	x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t
+
+	// 1.  Process the internal representation into a field element s as
+	// follows:
+
+	// u1 = (z0 + y0) * (z0 - y0)
+	// u2 = x0 * y0
+	u1, u2: field.Tight_Field_Element = ---, ---
+	tmp1, tmp2: field.Loose_Field_Element = ---, ---
+	field.fe_add(&tmp1, z0, y0)
+	field.fe_sub(&tmp2, z0, y0)
+	field.fe_carry_mul(&u1, &tmp1, &tmp2)
+	field.fe_carry_mul(&u2, field.fe_relax_cast(x0), field.fe_relax_cast(y0))
+
+	// Ignore was_square since this is always square.
+	// (_, invsqrt) = SQRT_RATIO_M1(1, u1 * u2^2)
+	tmp: field.Tight_Field_Element = ---
+	field.fe_carry_square(&tmp, field.fe_relax_cast(&u2))
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&u1), field.fe_relax_cast(&tmp))
+	_ = field.fe_carry_sqrt_ratio_m1(
+		&tmp,
+		field.fe_relax_cast(&field.FE_ONE),
+		field.fe_relax_cast(&tmp),
+	)
+
+	// den1 = invsqrt * u1
+	// den2 = invsqrt * u2
+	// z_inv = den1 * den2 * t0
+	den1, den2 := &u1, &u2
+	z_inv: field.Tight_Field_Element = ---
+	field.fe_carry_mul(den1, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u1))
+	field.fe_carry_mul(den2, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u2))
+	field.fe_carry_mul(&z_inv, field.fe_relax_cast(den1), field.fe_relax_cast(den2))
+	field.fe_carry_mul(&z_inv, field.fe_relax_cast(&z_inv), field.fe_relax_cast(t0))
+
+	// rotate = IS_NEGATIVE(t0 * z_inv)
+	// Note: Reordered from the RFC because invsqrt is no longer needed.
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(t0), field.fe_relax_cast(&z_inv))
+	rotate := field.fe_is_negative(&tmp)
+
+	// ix0 = x0 * SQRT_M1
+	// iy0 = y0 * SQRT_M1
+	// enchanted_denominator = den1 * INVSQRT_A_MINUS_D
+	ix0, iy0: field.Tight_Field_Element = ---, ---
+	field.fe_carry_mul(&ix0, field.fe_relax_cast(x0), field.fe_relax_cast(&field.FE_SQRT_M1))
+	field.fe_carry_mul(&iy0, field.fe_relax_cast(y0), field.fe_relax_cast(&field.FE_SQRT_M1))
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(den1), field.fe_relax_cast(&FE_INVSQRT_A_MINUS_D))
+
+	// Conditionally rotate x and y.
+	// x = CT_SELECT(iy0 IF rotate ELSE x0)
+	// y = CT_SELECT(ix0 IF rotate ELSE y0)
+	// z = z0
+	// den_inv = CT_SELECT(enchanted_denominator IF rotate ELSE den2)
+	x, y: field.Tight_Field_Element = ---, ---
+	field.fe_cond_select(&x, x0, &iy0, rotate)
+	field.fe_cond_select(&y, y0, &ix0, rotate)
+	field.fe_cond_select(&tmp, den2, &tmp, rotate)
+
+	// y = CT_SELECT(-y IF IS_NEGATIVE(x * z_inv) ELSE y)
+	field.fe_carry_mul(&x, field.fe_relax_cast(&x), field.fe_relax_cast(&z_inv))
+	field.fe_cond_negate(&y, &y, field.fe_is_negative(&x))
+
+	// s = CT_ABS(den_inv * (z - y))
+	field.fe_sub(&tmp1, z0, &y)
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&tmp), &tmp1)
+	field.fe_carry_abs(&tmp, &tmp)
+
+	// 2.  Return the 32-byte little-endian encoding of s.  More
+	// specifically, this is the encoding of the canonical
+	// representation of s as an integer between 0 and p-1, inclusive.
+	dst_ := transmute(^[32]byte)(raw_data(dst))
+	field.fe_to_bytes(dst_, &tmp)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&u1, &u2, &tmp, &z_inv, &ix0, &iy0, &x, &y})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &tmp2})
+}
+
+// ge_add sets `ge = a + b`.
+ge_add :: proc(ge, a, b: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	grp.ge_add(&ge._p, &a._p, &b._p)
+	ge._is_initialized = true
+}
+
+// ge_double sets `ge = a + a`.
+ge_double :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_double(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_negate sets `ge = -a`.
+ge_negate :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_negate(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult sets `ge = A * sc`.
+ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_scalarmult(&ge._p, &A._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult_generator sets `ge = G * sc`
+ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
+	grp.ge_scalarmult_basepoint(&ge._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult_vartime sets `ge = A * sc` in variable time.
+ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_scalarmult_vartime(&ge._p, &A._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_double_scalarmult_generator_vartime sets `ge = A * a + G * b` in variable
+// time.
+ge_double_scalarmult_generator_vartime :: proc(
+	ge: ^Group_Element,
+	a: ^Scalar,
+	A: ^Group_Element,
+	b: ^Scalar,
+) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b)
+	ge._is_initialized = true
+}
+
+// ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_cond_negate(&ge._p, &a._p, ctrl)
+	ge._is_initialized = true
+}
+
+// ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{ge, a})
+
+	grp.ge_cond_assign(&ge._p, &a._p, ctrl)
+}
+
+// ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl)
+	ge._is_initialized = true
+}
+
+// ge_equal returns 1 iff `a == b`, and 0 otherwise.
+@(require_results)
+ge_equal :: proc(a, b: ^Group_Element) -> int {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	// CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2)
+	ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_carry_mul(&ax_by, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.y))
+	field.fe_carry_mul(&ay_bx, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.x))
+	field.fe_carry_mul(&ay_by, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.y))
+	field.fe_carry_mul(&ax_bx, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.x))
+
+	ret := field.fe_equal(&ax_by, &ay_bx) | field.fe_equal(&ay_by, &ax_bx)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&ax_by, &ay_bx, &ay_by, &ax_bx})
+
+	return ret
+}
+
+// ge_is_identity returns 1 iff `ge` is the identity element, and 0 otherwise.
+@(require_results)
+ge_is_identity :: proc(ge: ^Group_Element) -> int {
+	return ge_equal(ge, &GE_IDENTITY)
+}
+
+@(private)
+ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	// The MAP function is defined on 32-byte strings as:
+	//
+	// 1.  Mask the most significant bit in the final byte of the string,
+	// and interpret the string as an unsigned integer r in little-
+	// endian representation.  Reduce r modulo p to obtain a field
+	// element t.
+	// *  Masking the most significant bit is equivalent to interpreting
+	// the whole string as an unsigned integer in little-endian
+	// representation and then reducing it modulo 2^255.
+	t: field.Tight_Field_Element = ---
+	field.fe_from_bytes(&t, b_)
+
+	// 2.  Process t as follows:
+	//
+	// r = SQRT_M1 * t^2
+	// u = (r + 1) * ONE_MINUS_D_SQ
+	// v = (-1 - r*D) * (r + D)
+	tmp1: field.Loose_Field_Element = ---
+	r, u, v: field.Tight_Field_Element = ---, ---, ---
+
+	field.fe_carry_square(&r, field.fe_relax_cast(&t))
+	field.fe_carry_mul(&r, field.fe_relax_cast(&field.FE_SQRT_M1), field.fe_relax_cast(&r))
+
+	field.fe_add(&tmp1, &field.FE_ONE, &r)
+	field.fe_carry_mul(&u, &tmp1, field.fe_relax_cast(&FE_ONE_MINUS_D_SQ))
+
+	field.fe_carry_mul(&v, field.fe_relax_cast(&r), field.fe_relax_cast(&grp.FE_D))
+	field.fe_carry_add(&v, &field.FE_ONE, &v)
+	field.fe_carry_opp(&v, &v)
+	field.fe_add(&tmp1, &r, &grp.FE_D)
+	field.fe_carry_mul(&v, field.fe_relax_cast(&v), &tmp1)
+
+	// (was_square, s) = SQRT_RATIO_M1(u, v)
+	// s_prime = -CT_ABS(s*t)
+	// s = CT_SELECT(s IF was_square ELSE s_prime)
+	// c = CT_SELECT(-1 IF was_square ELSE r)
+	s, s_prime, c: field.Tight_Field_Element = ---, ---, ---
+	was_square := field.fe_carry_sqrt_ratio_m1(
+		&s,
+		field.fe_relax_cast(&u),
+		field.fe_relax_cast(&v),
+	)
+	field.fe_carry_mul(&s_prime, field.fe_relax_cast(&s), field.fe_relax_cast(&t))
+	field.fe_carry_abs(&s_prime, &s_prime)
+	field.fe_carry_opp(&s_prime, &s_prime)
+	field.fe_cond_select(&s, &s_prime, &s, was_square)
+	field.fe_cond_select(&c, &r, &FE_NEG_ONE, was_square)
+
+	// N = c * (r - 1) * D_MINUS_ONE_SQ - v
+	N: field.Tight_Field_Element = ---
+	field.fe_sub(&tmp1, &r, &field.FE_ONE)
+	field.fe_carry_mul(&N, field.fe_relax_cast(&c), &tmp1)
+	field.fe_carry_mul(&N, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_D_MINUS_ONE_SQUARED))
+	field.fe_carry_sub(&N, &N, &v)
+
+	// w0 = 2 * s * v
+	// w1 = N * SQRT_AD_MINUS_ONE
+	// w2 = 1 - s^2
+	// w3 = 1 + s^2
+	w0, w1: field.Tight_Field_Element = ---, ---
+	w2, w3: field.Loose_Field_Element = ---, ---
+	field.fe_carry_mul(&w0, field.fe_relax_cast(&s), field.fe_relax_cast(&v))
+	field.fe_carry_add(&w0, &w0, &w0)
+	field.fe_carry_mul(&w1, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_SQRT_AD_MINUS_ONE))
+	field.fe_carry_square(&s, field.fe_relax_cast(&s))
+	field.fe_sub(&w2, &field.FE_ONE, &s)
+	field.fe_add(&w3, &field.FE_ONE, &s)
+
+	// 3.  Return the group element represented by the internal
+	// representation (w0*w3, w2*w1, w1*w3, w0*w2).
+
+	field.fe_carry_mul(&ge._p.x, field.fe_relax_cast(&w0), &w3)
+	field.fe_carry_mul(&ge._p.y, &w2, field.fe_relax_cast(&w1))
+	field.fe_carry_mul(&ge._p.z, field.fe_relax_cast(&w1), &w3)
+	field.fe_carry_mul(&ge._p.t, field.fe_relax_cast(&w0), &w2)
+	ge._is_initialized = true
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&r, &u, &v, &s, &s_prime, &c, &N, &w0, &w1})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &w2, &w3})
+}
+
+@(private)
+_ge_assert_initialized :: proc(ges: []^Group_Element) {
+	for ge in ges {
+		if !ge._is_initialized {
+			panic("crypto/ristretto255: uninitialized group element")
+		}
+	}
+}

+ 97 - 0
core/crypto/ristretto255/ristretto255_scalar.odin

@@ -0,0 +1,97 @@
+package ristretto255
+
+import grp "core:crypto/_edwards25519"
+
+// SCALAR_SIZE is the size of a byte-encoded ristretto255 scalar.
+SCALAR_SIZE :: 32
+// WIDE_SCALAR_SIZE is the size of a wide byte-encoded ristretto255
+// scalar.
+WIDE_SCALAR_SIZE :: 64
+
+// Scalar is a ristretto255 scalar.  The zero-initialized value is valid,
+// and represents `0`.
+Scalar :: grp.Scalar
+
+// sc_clear clears sc to the uninitialized state.
+sc_clear :: proc "contextless" (sc: ^Scalar) {
+	grp.sc_clear(sc)
+}
+
+// sc_set sets `sc = a`.
+sc_set :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_set(sc, a)
+}
+
+// sc_set_u64 sets `sc = i`.
+sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
+	grp.sc_set_u64(sc, i)
+}
+
+// sc_set_bytes sets sc to the result of decoding b as a ristretto255
+// scalar, and returns true on success.
+@(require_results)
+sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool {
+	if len(b) != SCALAR_SIZE {
+		return false
+	}
+
+	return grp.sc_set_bytes(sc, b)
+}
+
+// sc_set_wide_bytes sets sc to the result of deriving a ristretto255
+// scalar, from a wide (512-bit) byte string by interpreting b as a
+// little-endian value, and reducing it mod the group order.
+sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
+	if len(b) != WIDE_SCALAR_SIZE {
+		panic("crypto/ristretto255: invalid wide input size")
+	}
+
+	b_ := transmute(^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
+	grp.sc_set_bytes_wide(sc, b_)
+}
+
+// sc_bytes sets dst to the canonical encoding of sc.
+sc_bytes :: proc(sc: ^Scalar, dst: []byte) {
+	if len(dst) != SCALAR_SIZE {
+		panic("crypto/ristretto255: invalid destination size")
+	}
+
+	grp.sc_bytes(dst, sc)
+}
+
+// sc_add sets `sc = a + b`.
+sc_add :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_add(sc, a, b)
+}
+
+// sc_sub sets `sc = a - b`.
+sc_sub :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_sub(sc, a, b)
+}
+
+// sc_negate sets `sc = -a`.
+sc_negate :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_negate(sc, a)
+}
+
+// sc_mul sets `sc = a * b`.
+sc_mul :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_mul(sc, a, b)
+}
+
+// sc_square sets `sc = a^2`.
+sc_square :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_square(sc, a)
+}
+
+// sc_cond_assign sets `sc = sc` iff `ctrl == 0` and `sc = a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+sc_cond_assign :: proc(sc, a: ^Scalar, ctrl: int) {
+	grp.sc_cond_assign(sc, a, ctrl)
+}
+
+// sc_equal returns 1 iff `a == b`, and 0 otherwise.
+@(require_results)
+sc_equal :: proc(a, b: ^Scalar) -> int {
+	return grp.sc_equal(a, b)
+}

+ 3 - 9
core/crypto/x25519/x25519.odin

@@ -27,7 +27,7 @@ _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
 }
 }
 
 
 @(private)
 @(private)
-_scalarmult :: proc(out, scalar, point: ^[32]byte) {
+_scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
 	// Montgomery pseduo-multiplication taken from Monocypher.
 	// Montgomery pseduo-multiplication taken from Monocypher.
 
 
 	// computes the scalar product
 	// computes the scalar product
@@ -94,13 +94,8 @@ _scalarmult :: proc(out, scalar, point: ^[32]byte) {
 	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
 	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
 	field.fe_to_bytes(out, &x2)
 	field.fe_to_bytes(out, &x2)
 
 
-	mem.zero_explicit(&x1, size_of(x1))
-	mem.zero_explicit(&x2, size_of(x2))
-	mem.zero_explicit(&x3, size_of(x3))
-	mem.zero_explicit(&z2, size_of(z2))
-	mem.zero_explicit(&z3, size_of(z3))
-	mem.zero_explicit(&t0, size_of(t0))
-	mem.zero_explicit(&t1, size_of(t1))
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&t0, &t1})
 }
 }
 
 
 // scalarmult "multiplies" the provided scalar and point, and writes the
 // scalarmult "multiplies" the provided scalar and point, and writes the
@@ -137,6 +132,5 @@ scalarmult :: proc(dst, scalar, point: []byte) {
 // scalarmult_basepoint "multiplies" the provided scalar with the X25519
 // scalarmult_basepoint "multiplies" the provided scalar with the X25519
 // base point and writes the resulting point to dst.
 // base point and writes the resulting point to dst.
 scalarmult_basepoint :: proc(dst, scalar: []byte) {
 scalarmult_basepoint :: proc(dst, scalar: []byte) {
-	// TODO/perf: Switch to using a precomputed table.
 	scalarmult(dst, scalar, _BASE_POINT[:])
 	scalarmult(dst, scalar, _BASE_POINT[:])
 }
 }

+ 4 - 0
examples/all/all_main.odin

@@ -29,6 +29,7 @@ import blake2s          "core:crypto/blake2s"
 import chacha20         "core:crypto/chacha20"
 import chacha20         "core:crypto/chacha20"
 import chacha20poly1305 "core:crypto/chacha20poly1305"
 import chacha20poly1305 "core:crypto/chacha20poly1305"
 import crypto_hash      "core:crypto/hash"
 import crypto_hash      "core:crypto/hash"
+import ed25519          "core:crypto/ed25519"
 import hkdf             "core:crypto/hkdf"
 import hkdf             "core:crypto/hkdf"
 import hmac             "core:crypto/hmac"
 import hmac             "core:crypto/hmac"
 import kmac             "core:crypto/kmac"
 import kmac             "core:crypto/kmac"
@@ -37,6 +38,7 @@ import md5              "core:crypto/legacy/md5"
 import sha1             "core:crypto/legacy/sha1"
 import sha1             "core:crypto/legacy/sha1"
 import pbkdf2           "core:crypto/pbkdf2"
 import pbkdf2           "core:crypto/pbkdf2"
 import poly1305         "core:crypto/poly1305"
 import poly1305         "core:crypto/poly1305"
+import ristretto255     "core:crypto/ristretto255"
 import sha2             "core:crypto/sha2"
 import sha2             "core:crypto/sha2"
 import sha3             "core:crypto/sha3"
 import sha3             "core:crypto/sha3"
 import shake            "core:crypto/shake"
 import shake            "core:crypto/shake"
@@ -151,6 +153,7 @@ _ :: blake2b
 _ :: blake2s
 _ :: blake2s
 _ :: chacha20
 _ :: chacha20
 _ :: chacha20poly1305
 _ :: chacha20poly1305
+_ :: ed25519
 _ :: hmac
 _ :: hmac
 _ :: hkdf
 _ :: hkdf
 _ :: kmac
 _ :: kmac
@@ -158,6 +161,7 @@ _ :: keccak
 _ :: md5
 _ :: md5
 _ :: pbkdf2
 _ :: pbkdf2
 _ :: poly1305
 _ :: poly1305
+_ :: ristretto255
 _ :: sha1
 _ :: sha1
 _ :: sha2
 _ :: sha2
 _ :: sha3
 _ :: sha3

+ 2 - 72
tests/core/crypto/test_core_crypto.odin

@@ -20,7 +20,6 @@ import "core:testing"
 import "core:crypto"
 import "core:crypto"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
 import "core:crypto/chacha20poly1305"
-import "core:crypto/x25519"
 
 
 import tc "tests:common"
 import tc "tests:common"
 
 
@@ -32,10 +31,10 @@ main :: proc() {
 	test_hash(&t)
 	test_hash(&t)
 	test_mac(&t)
 	test_mac(&t)
 	test_kdf(&t) // After hash/mac tests because those should pass first.
 	test_kdf(&t) // After hash/mac tests because those should pass first.
+	test_ecc25519(&t)
 
 
 	test_chacha20(&t)
 	test_chacha20(&t)
 	test_chacha20poly1305(&t)
 	test_chacha20poly1305(&t)
-	test_x25519(&t)
 	test_sha3_variants(&t)
 	test_sha3_variants(&t)
 
 
 	bench_crypto(&t)
 	bench_crypto(&t)
@@ -274,80 +273,11 @@ test_chacha20poly1305 :: proc(t: ^testing.T) {
 	tc.expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
 	tc.expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
 }
 }
 
 
-@(test)
-test_x25519 :: proc(t: ^testing.T) {
-	tc.log(t, "Testing X25519")
-
-	// Local copy of this so that the base point doesn't need to be exported.
-	_BASE_POINT: [32]byte =  {
-		9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	}
-
-	test_vectors := []struct{
-		scalar:  string,
-		point:   string,
-		product: string,
-	} {
-		// Test vectors from RFC 7748
-		{
-			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
-			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
-			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
-		},
-		{
-			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
-			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
-			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
-		},
-	}
-	for v, _ in test_vectors {
-		scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
-		point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
-
-		derived_point: [x25519.POINT_SIZE]byte
-		x25519.scalarmult(derived_point[:], scalar[:], point[:])
-		derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
-
-		tc.expect(
-			t,
-			derived_point_str == v.product,
-			fmt.tprintf(
-				"Expected %s for %s * %s, but got %s instead",
-				v.product,
-				v.scalar,
-				v.point,
-				derived_point_str,
-			),
-		)
-
-		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
-		p1, p2: [x25519.POINT_SIZE]byte
-		x25519.scalarmult_basepoint(p1[:], scalar[:])
-		x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
-		p1_str := string(hex.encode(p1[:], context.temp_allocator))
-		p2_str := string(hex.encode(p2[:], context.temp_allocator))
-		tc.expect(
-			t,
-			p1_str == p2_str,
-			fmt.tprintf(
-				"Expected %s for %s * basepoint, but got %s instead",
-				p2_str,
-				v.scalar,
-				p1_str,
-			),
-		)
-	}
-
-	// TODO/tests: Run the wycheproof test vectors, once I figure out
-	// how to work with JSON.
-}
-
 @(test)
 @(test)
 test_rand_bytes :: proc(t: ^testing.T) {
 test_rand_bytes :: proc(t: ^testing.T) {
 	tc.log(t, "Testing rand_bytes")
 	tc.log(t, "Testing rand_bytes")
 
 
-	if ODIN_OS != .Linux {
+	if !crypto.has_rand_bytes() {
 		tc.log(t, "rand_bytes not supported - skipping")
 		tc.log(t, "rand_bytes not supported - skipping")
 		return
 		return
 	}
 	}

+ 766 - 0
tests/core/crypto/test_core_crypto_ecc25519.odin

@@ -0,0 +1,766 @@
+package test_core_crypto
+
+import "base:runtime"
+import "core:encoding/hex"
+import "core:fmt"
+import "core:testing"
+
+import field "core:crypto/_fiat/field_curve25519"
+import "core:crypto/ed25519"
+import "core:crypto/ristretto255"
+import "core:crypto/x25519"
+
+import tc "tests:common"
+
+@(test)
+test_ecc25519 :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
+	tc.log(t, "Testing curve25519 ECC")
+
+	test_sqrt_ratio_m1(t)
+	test_ristretto255(t)
+
+	test_ed25519(t)
+	test_x25519(t)
+}
+
+@(test)
+test_sqrt_ratio_m1 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing sqrt_ratio_m1")
+
+	test_vectors := []struct {
+		u: string,
+		v: string,
+		r: string,
+		was_square: bool,
+	} {
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			false,
+		},
+		{
+			"0200000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"3c5ff1b5d8e4113b871bd052f9e7bcd0582804c266ffb2d4f4203eb07fdb7c54",
+			false,
+		},
+		{
+			"0400000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0200000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0400000000000000000000000000000000000000000000000000000000000000",
+			"f6ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3f",
+			true,
+		},
+	}
+	for v, _ in test_vectors {
+		u_bytes, _ := hex.decode(transmute([]byte)(v.u), context.temp_allocator)
+		v_bytes, _ := hex.decode(transmute([]byte)(v.v), context.temp_allocator)
+		r_bytes, _ := hex.decode(transmute([]byte)(v.r), context.temp_allocator)
+
+		u_ := transmute(^[32]byte)(raw_data(u_bytes))
+		v_ := transmute(^[32]byte)(raw_data(v_bytes))
+		r_ := transmute(^[32]byte)(raw_data(r_bytes))
+
+		u, vee, r: field.Tight_Field_Element
+		field.fe_from_bytes(&u, u_)
+		field.fe_from_bytes(&vee, v_)
+		was_square := field.fe_carry_sqrt_ratio_m1(
+			&r,
+			field.fe_relax_cast(&u),
+			field.fe_relax_cast(&vee),
+		)
+
+		tc.expect(
+			t,
+			(was_square == 1) == v.was_square && field.fe_equal_bytes(&r, r_) == 1,
+			fmt.tprintf(
+				"Expected (%v, %s) for SQRT_RATIO_M1(%s, %s), got %s",
+				v.was_square,
+				v.r,
+				v.u,
+				v.v,
+				fe_str(&r),
+			),
+		)
+	}
+}
+
+@(test)
+test_ristretto255 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing ristretto255")
+
+	ge_gen: ristretto255.Group_Element
+	ristretto255.ge_generator(&ge_gen)
+
+	// Invalid encodings.
+	bad_encodings := []string {
+		// Non-canonical field encodings.
+		"00ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+		"ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"f3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+
+		// Negative field elements.
+		"0100000000000000000000000000000000000000000000000000000000000000",
+		"01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"ed57ffd8c914fb201471d1c3d245ce3c746fcbe63a3679d51b6a516ebebe0e20",
+		"c34c4e1826e5d403b78e246e88aa051c36ccf0aafebffe137d148a2bf9104562",
+		"c940e5a4404157cfb1628b108db051a8d439e1a421394ec4ebccb9ec92a8ac78",
+		"47cfc5497c53dc8e61c91d17fd626ffb1c49e2bca94eed052281b510b1117a24",
+		"f1c6165d33367351b0da8f6e4511010c68174a03b6581212c71c0e1d026c3c72",
+		"87260f7a2f12495118360f02c26a470f450dadf34a413d21042b43b9d93e1309",
+
+		// Non-square x^2.
+		"26948d35ca62e643e26a83177332e6b6afeb9d08e4268b650f1f5bbd8d81d371",
+		"4eac077a713c57b4f4397629a4145982c661f48044dd3f96427d40b147d9742f",
+		"de6a7b00deadc788eb6b6c8d20c0ae96c2f2019078fa604fee5b87d6e989ad7b",
+		"bcab477be20861e01e4a0e295284146a510150d9817763caf1a6f4b422d67042",
+		"2a292df7e32cababbd9de088d1d1abec9fc0440f637ed2fba145094dc14bea08",
+		"f4a9e534fc0d216c44b218fa0c42d99635a0127ee2e53c712f70609649fdff22",
+		"8268436f8c4126196cf64b3c7ddbda90746a378625f9813dd9b8457077256731",
+		"2810e5cbc2cc4d4eece54f61c6f69758e289aa7ab440b3cbeaa21995c2f4232b",
+
+		// Negative x * y value.
+		"3eb858e78f5a7254d8c9731174a94f76755fd3941c0ac93735c07ba14579630e",
+		"a45fdc55c76448c049a1ab33f17023edfb2be3581e9c7aade8a6125215e04220",
+		"d483fe813c6ba647ebbfd3ec41adca1c6130c2beeee9d9bf065c8d151c5f396e",
+		"8a2e1d30050198c65a54483123960ccc38aef6848e1ec8f5f780e8523769ba32",
+		"32888462f8b486c68ad7dd9610be5192bbeaf3b443951ac1a8118419d9fa097b",
+		"227142501b9d4355ccba290404bde41575b037693cef1f438c47f8fbf35d1165",
+		"5c37cc491da847cfeb9281d407efc41e15144c876e0170b499a96a22ed31e01e",
+		"445425117cb8c90edcbc7c1cc0e74f747f2c1efa5630a967c64f287792a48a4b",
+
+		// s = -1, which causes y = 0.
+		"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+	}
+	for x, _ in bad_encodings {
+		b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator)
+
+		ge: ristretto255.Group_Element
+		ok := ristretto255.ge_set_bytes(&ge, b)
+		tc.expect(t, !ok, fmt.tprintf("Expected false for %s", x))
+	}
+
+	generator_multiples := []string {
+		"0000000000000000000000000000000000000000000000000000000000000000",
+		"e2f2ae0a6abc4e71a884a961c500515f58e30b6aa582dd8db6a65945e08d2d76",
+		"6a493210f7499cd17fecb510ae0cea23a110e8d5b901f8acadd3095c73a3b919",
+		"94741f5d5d52755ece4f23f044ee27d5d1ea1e2bd196b462166b16152a9d0259",
+		"da80862773358b466ffadfe0b3293ab3d9fd53c5ea6c955358f568322daf6a57",
+		"e882b131016b52c1d3337080187cf768423efccbb517bb495ab812c4160ff44e",
+		"f64746d3c92b13050ed8d80236a7f0007c3b3f962f5ba793d19a601ebb1df403",
+		"44f53520926ec81fbd5a387845beb7df85a96a24ece18738bdcfa6a7822a176d",
+		"903293d8f2287ebe10e2374dc1a53e0bc887e592699f02d077d5263cdd55601c",
+		"02622ace8f7303a31cafc63f8fc48fdc16e1c8c8d234b2f0d6685282a9076031",
+		"20706fd788b2720a1ed2a5dad4952b01f413bcf0e7564de8cdc816689e2db95f",
+		"bce83f8ba5dd2fa572864c24ba1810f9522bc6004afe95877ac73241cafdab42",
+		"e4549ee16b9aa03099ca208c67adafcafa4c3f3e4e5303de6026e3ca8ff84460",
+		"aa52e000df2e16f55fb1032fc33bc42742dad6bd5a8fc0be0167436c5948501f",
+		"46376b80f409b29dc2b5f6f0c52591990896e5716f41477cd30085ab7f10301e",
+		"e0c418f7c8d9c4cdd7395b93ea124f3ad99021bb681dfc3302a9d99a2e53e64e",
+	}
+	ges: [16]ristretto255.Group_Element
+	for x, i in generator_multiples {
+		b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator)
+
+		ge := &ges[i]
+		ok := ristretto255.ge_set_bytes(ge, b)
+		tc.expect(t, ok, fmt.tprintf("Expected true for %s", x))
+
+		x_check := ge_str(ge)
+
+		tc.expect(
+			t,
+			x == x_check,
+			fmt.tprintf(
+				"Expected %s (round-trip) but got %s instead",
+				x,
+				x_check,
+			),
+		)
+
+		if i == 1 {
+			tc.expect(
+				t,
+				ristretto255.ge_equal(ge, &ge_gen) == 1,
+				"Expected element 1 to be the generator",
+			)
+		}
+	}
+
+	// Addition/Multiplication.
+	for _, i in ges {
+		sc: ristretto255.Scalar
+		ristretto255.sc_set_u64(&sc, u64(i))
+
+		ge_check: ristretto255.Group_Element
+
+		ristretto255.ge_scalarmult_generator(&ge_check, &sc)
+		x_check := ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (specialized), got %s",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		ristretto255.ge_scalarmult(&ge_check, &ges[1], &sc)
+		x_check = ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (generic), got %s (slow compare)",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		ristretto255.ge_scalarmult_vartime(&ge_check, &ges[1], &sc)
+		x_check = ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (generic vartime), got %s (slow compare)",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		switch i {
+		case 0:
+		case:
+			ge_prev := &ges[i-1]
+			ristretto255.ge_add(&ge_check, ge_prev, &ge_gen)
+
+			x_check = ge_str(&ge_check)
+			tc.expect(
+				t,
+				x_check == generator_multiples[i],
+				fmt.tprintf(
+					"Expected %s for ges[%d] + ges[%d], got %s (slow compare)",
+					generator_multiples[i],
+					i-1,
+					1,
+					x_check,
+				),
+			)
+
+			tc.expect(
+				t,
+				ristretto255.ge_equal(&ges[i], &ge_check) == 1,
+				fmt.tprintf(
+					"Expected %s for ges[%d] + ges[%d], got %s (fast compare)",
+					generator_multiples[i],
+					i-1,
+					1,
+					x_check,
+				),
+			)
+		}
+	}
+
+	wide_test_vectors := []struct {
+		input: string,
+		output: string,
+	} {
+		{
+			"5d1be09e3d0c82fc538112490e35701979d99e06ca3e2b5b54bffe8b4dc772c14d98b696a1bbfb5ca32c436cc61c16563790306c79eaca7705668b47dffe5bb6",
+			"3066f82a1a747d45120d1740f14358531a8f04bbffe6a819f86dfe50f44a0a46",
+		},
+		{
+			"f116b34b8f17ceb56e8732a60d913dd10cce47a6d53bee9204be8b44f6678b270102a56902e2488c46120e9276cfe54638286b9e4b3cdb470b542d46c2068d38",
+			"f26e5b6f7d362d2d2a94c5d0e7602cb4773c95a2e5c31a64f133189fa76ed61b",
+		},
+		{
+			"8422e1bbdaab52938b81fd602effb6f89110e1e57208ad12d9ad767e2e25510c27140775f9337088b982d83d7fcf0b2fa1edffe51952cbe7365e95c86eaf325c",
+			"006ccd2a9e6867e6a2c5cea83d3302cc9de128dd2a9a57dd8ee7b9d7ffe02826",
+		},
+		{
+			"ac22415129b61427bf464e17baee8db65940c233b98afce8d17c57beeb7876c2150d15af1cb1fb824bbd14955f2b57d08d388aab431a391cfc33d5bafb5dbbaf",
+			"f8f0c87cf237953c5890aec3998169005dae3eca1fbb04548c635953c817f92a",
+		},
+		{
+			"165d697a1ef3d5cf3c38565beefcf88c0f282b8e7dbd28544c483432f1cec7675debea8ebb4e5fe7d6f6e5db15f15587ac4d4d4a1de7191e0c1ca6664abcc413",
+			"ae81e7dedf20a497e10c304a765c1767a42d6e06029758d2d7e8ef7cc4c41179",
+		},
+		{
+			"a836e6c9a9ca9f1e8d486273ad56a78c70cf18f0ce10abb1c7172ddd605d7fd2979854f47ae1ccf204a33102095b4200e5befc0465accc263175485f0e17ea5c",
+			"e2705652ff9f5e44d3e841bf1c251cf7dddb77d140870d1ab2ed64f1a9ce8628",
+		},
+		{
+			"2cdc11eaeb95daf01189417cdddbf95952993aa9cb9c640eb5058d09702c74622c9965a697a3b345ec24ee56335b556e677b30e6f90ac77d781064f866a3c982",
+			"80bd07262511cdde4863f8a7434cef696750681cb9510eea557088f76d9e5065",
+		},
+		// These all produce the same output.
+		{
+			"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff1200000000000000000000000000000000000000000000000000000000000000",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"0000000000000000000000000000000000000000000000000000000000000080ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"00000000000000000000000000000000000000000000000000000000000000001200000000000000000000000000000000000000000000000000000000000080",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+	}
+	for v, _ in wide_test_vectors {
+		in_bytes, _ := hex.decode(transmute([]byte)(v.input), context.temp_allocator)
+
+		ge: ristretto255.Group_Element
+		ristretto255.ge_set_wide_bytes(&ge, in_bytes)
+
+		ge_check := ge_str(&ge)
+		tc.expect(
+			t,
+			ge_check == v.output,
+			fmt.tprintf(
+				"Expected %s for %s, got %s",
+				v.output,
+				ge_check,
+			),
+		)
+	}
+}
+
+@(test)
+test_ed25519 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing ed25519")
+
+	test_vectors_rfc := []struct {
+		priv_key: string,
+		pub_key:  string,
+		msg:      string,
+		sig:      string,
+	} {
+		// Test vectors from RFC 8032
+		{
+			"9d61b19deffd5a60ba844af492ec2cc44449c5697b326919703bac031cae7f60",
+			"d75a980182b10ab7d54bfed3c964073a0ee172f3daa62325af021a68f707511a",
+			"",
+			"e5564300c360ac729086e2cc806e828a84877f1eb8e5d974d873e065224901555fb8821590a33bacc61e39701cf9b46bd25bf5f0595bbe24655141438e7a100b",
+		},
+		{
+			"4ccd089b28ff96da9db6c346ec114e0f5b8a319f35aba624da8cf6ed4fb8a6fb",
+			"3d4017c3e843895a92b70aa74d1b7ebc9c982ccf2ec4968cc0cd55f12af4660c",
+			"72",
+			"92a009a9f0d4cab8720e820b5f642540a2b27b5416503f8fb3762223ebdb69da085ac1e43e15996e458f3613d0f11d8c387b2eaeb4302aeeb00d291612bb0c00",
+		},
+		{
+			"c5aa8df43f9f837bedb7442f31dcb7b166d38535076f094b85ce3a2e0b4458f7",
+			"fc51cd8e6218a1a38da47ed00230f0580816ed13ba3303ac5deb911548908025",
+			"af82",
+			"6291d657deec24024827e69c3abe01a30ce548a284743a445e3680d7db5ac3ac18ff9b538d16f290ae67f760984dc6594a7c15e9716ed28dc027beceea1ec40a",
+		},
+		// TEST 1024 omitted for brevity, because all that does is add more to SHA-512
+		{
+			"833fe62409237b9d62ec77587520911e9a759cec1d19755b7da901b96dca3d42",
+			"ec172b93ad5e563bf4932c70e1245034c35467ef2efd4d64ebf819683467e2bf",
+			"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f",
+			"dc2a4459e7369633a52b1bf277839a00201009a3efbf3ecb69bea2186c26b58909351fc9ac90b3ecfdfbc7c66431e0303dca179c138ac17ad9bef1177331a704",
+		},
+	}
+	for v, _ in test_vectors_rfc {
+		priv_bytes, _ := hex.decode(transmute([]byte)(v.priv_key), context.temp_allocator)
+		pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator)
+		msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator)
+		sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator)
+
+		priv_key: ed25519.Private_Key
+		ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected %s to be a valid private key",
+				v.priv_key,
+			),
+		)
+
+		key_bytes: [32]byte
+		ed25519.private_key_bytes(&priv_key, key_bytes[:])
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected private key %s round-trip, got %s",
+				v.priv_key,
+				string(hex.encode(key_bytes[:], context.temp_allocator)),
+			),
+		)
+
+		pub_key: ed25519.Public_Key
+		ok = ed25519.public_key_set_bytes(&pub_key, pub_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected %s to be a valid public key (priv->pub: %s)",
+				v.pub_key,
+				string(hex.encode(priv_key._pub_key._b[:], context.temp_allocator)),
+			),
+		)
+
+		ed25519.public_key_bytes(&pub_key, key_bytes[:])
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected public key %s round-trip, got %s",
+				v.pub_key,
+				string(hex.encode(key_bytes[:], context.temp_allocator)),
+			),
+		)
+
+		sig: [ed25519.SIGNATURE_SIZE]byte
+		ed25519.sign(&priv_key, msg_bytes, sig[:])
+		x := string(hex.encode(sig[:], context.temp_allocator))
+		tc.expect(
+			t,
+			x == v.sig,
+			fmt.tprintf(
+				"Expected %s for sign(%s, %s), got %s",
+				v.sig,
+				v.priv_key,
+				v.msg,
+				x,
+			),
+		)
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected true for verify(%s, %s, %s)",
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		ok = ed25519.verify(&priv_key._pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected true for verify(pub(%s), %s %s)",
+				v.priv_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		// Corrupt the message and make sure verification fails.
+		switch len(msg_bytes) {
+		case 0:
+			tmp_msg := []byte{69}
+			msg_bytes = tmp_msg[:]
+		case:
+			msg_bytes[0] = msg_bytes[0] ~ 69
+		}
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok == false,
+			fmt.tprintf(
+				"Expected false for verify(%s, %s (corrupted), %s)",
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+	}
+
+	// Test cases from "Taming the many EdDSAs", which aim to exercise
+	// all of the ed25519 edge cases/implementation differences.
+	//
+	// - https://eprint.iacr.org/2020/1244
+	// - https://github.com/novifinancial/ed25519-speccheck
+	test_vectors_speccheck := []struct {
+		pub_key:        string,
+		msg:            string,
+		sig:            string,
+		pub_key_ok:     bool,
+		sig_ok:         bool,
+		sig_ok_relaxed: bool, // Ok if the small-order A check is relaxed.
+	} {
+		// S = 0, small-order A, small-order R
+		{
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa",
+			"8c93255d71dcab10e8f379c26200f3c7bd5f09d9bc3068d3ef4edeb4853022b6",
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac037a0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+			false,
+			true,
+		},
+		// 0 < S < L, small-order A, mixed-order R
+		{
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa",
+			"9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79",
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43a5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			true,
+			false,
+			true,
+		},
+		// 0 < S < L, mixed-order A, small-order R
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"aebf3f2601a0c8c5d39cc7d8911642f740b78168218da8471772b35f9d35b9ab",
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa8c4bd45aecaca5b24fb97bc10ac27ac8751a7dfe1baff8b953ec9f5833ca260e",
+			true,
+			true,
+			true,
+		},
+		// 0 < S < L, mixed-order A, mixed-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79",
+			"9046a64750444938de19f227bb80485e92b83fdb4b6506c160484c016cc1852f87909e14428a7a1d62e9f22f3d3ad7802db02eb2e688b6c52fcd6648a98bd009",
+			true,
+			true,
+			true,
+		},
+		// 0 < S < L, mixed-order A, mixed-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c",
+			"160a1cb0dc9c0258cd0a7d23e94d8fa878bcb1925f2c64246b2dee1796bed5125ec6bc982a269b723e0668e540911a9a6a58921d6925e434ab10aa7940551a09",
+			true,
+			true, // cofactored-only
+			true,
+		},
+		// 0 < S < L, mixed-order A, L-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c",
+			"21122a84e0b5fca4052f5b1235c80a537878b38f3142356b2c2384ebad4668b7e40bc836dac0f71076f9abe3a53f9c03c1ceeeddb658d0030494ace586687405",
+			true,
+			true, // cofactored only, (fail if 8h is pre-reduced)
+			true,
+		},
+		// S > L, L-order A, L-order R
+		{
+			"442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623",
+			"85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40",
+			"e96f66be976d82e60150baecff9906684aebb1ef181f67a7189ac78ea23b6c0e547f7690a0e2ddcd04d87dbc3490dc19b3b3052f7ff0538cb68afb369ba3a514",
+			true,
+			false,
+			false,
+		},
+		// S >> L, L-order A, L-order R
+		{
+			"442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623",
+			"85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40",
+			"8ce5b96c8f26d0ab6c47958c9e68b937104cd36e13c33566acd2fe8d38aa19427e71f98a473474f2f13f06f97c20d58cc3f54b8bd0d272f42b695dd7e89a8c22",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, mixed-order A, small-order R (non-canonical R, reduced for hash)
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff03be9678ac102edcd92b0210bb34d7428d12ffc5df5f37e359941266a4e35f0f",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, mixed-order A, small-order R (non-canonical R, not reduced for hash)
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffca8c5b64cd208982aa38d4936621a4775aa233aa0505711d8fdcfdaa943d4908",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, small-order A, mixed-order R (non-canonical A, reduced for hash)
+		{
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"e96b7021eb39c1a163b6da4e3093dcd3f21387da4cc4572be588fafae23c155b",
+			"a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			false,
+			false,
+			false,
+		},
+		// 0 < S < L, small-order A, mixed-order R (non-canonical A, not reduced for hash)
+		{
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"39a591f5321bbe07fd5a23dc2f39d025d74526615746727ceefd6e82ae65c06f",
+			"a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			false,
+			false,
+			false,
+		},
+	}
+	for v, i in test_vectors_speccheck {
+		pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator)
+		msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator)
+		sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator)
+
+		pub_key: ed25519.Public_Key
+		ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes)
+		tc.expect(
+			t,
+			ok == v.pub_key_ok,
+			fmt.tprintf(
+				"speccheck/%d: Expected %s to be a (in)valid public key, got %v",
+				i,
+				v.pub_key,
+				ok,
+			),
+		)
+
+		// If A is rejected for being non-canonical, skip signature check.
+		if !v.pub_key_ok {
+			continue
+		}
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok == v.sig_ok,
+			fmt.tprintf(
+				"speccheck/%d Expected %v for verify(%s, %s, %s)",
+				i,
+				v.sig_ok,
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		// If the signature is accepted, skip the relaxed signature check.
+		if v.sig_ok {
+			continue
+		}
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes, true)
+		tc.expect(
+			t,
+			ok == v.sig_ok_relaxed,
+			fmt.tprintf(
+				"speccheck/%d Expected %v for verify(%s, %s, %s, true)",
+				i,
+				v.sig_ok_relaxed,
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+	}
+}
+
+@(test)
+test_x25519 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing X25519")
+
+	// Local copy of this so that the base point doesn't need to be exported.
+	_BASE_POINT: [32]byte = {
+		9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	}
+
+	test_vectors := []struct {
+		scalar:  string,
+		point:   string,
+		product: string,
+	} {
+		// Test vectors from RFC 7748
+		{
+			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
+			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
+			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
+		},
+		{
+			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
+			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
+			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
+		},
+	}
+	for v, _ in test_vectors {
+		scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
+		point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
+
+		derived_point: [x25519.POINT_SIZE]byte
+		x25519.scalarmult(derived_point[:], scalar[:], point[:])
+		derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
+
+		tc.expect(
+			t,
+			derived_point_str == v.product,
+			fmt.tprintf(
+				"Expected %s for %s * %s, but got %s instead",
+				v.product,
+				v.scalar,
+				v.point,
+				derived_point_str,
+			),
+		)
+
+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
+		p1, p2: [x25519.POINT_SIZE]byte
+		x25519.scalarmult_basepoint(p1[:], scalar[:])
+		x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
+		p1_str := string(hex.encode(p1[:], context.temp_allocator))
+		p2_str := string(hex.encode(p2[:], context.temp_allocator))
+		tc.expect(
+			t,
+			p1_str == p2_str,
+			fmt.tprintf(
+				"Expected %s for %s * basepoint, but got %s instead",
+				p2_str,
+				v.scalar,
+				p1_str,
+			),
+		)
+	}
+}
+
+@(private)
+ge_str :: proc(ge: ^ristretto255.Group_Element) -> string {
+	b: [ristretto255.ELEMENT_SIZE]byte
+	ristretto255.ge_bytes(ge, b[:])
+	return string(hex.encode(b[:], context.temp_allocator))
+}
+
+@(private)
+fe_str :: proc(fe: ^field.Tight_Field_Element) -> string {
+	b: [32]byte
+	field.fe_to_bytes(&b, fe)
+	return string(hex.encode(b[:], context.temp_allocator))
+}

+ 3 - 0
tests/core/crypto/test_core_crypto_hash.odin

@@ -1,5 +1,6 @@
 package test_core_crypto
 package test_core_crypto
 
 
+import "base:runtime"
 import "core:bytes"
 import "core:bytes"
 import "core:encoding/hex"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:fmt"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 
 @(test)
 @(test)
 test_hash :: proc(t: ^testing.T) {
 test_hash :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing Hashes")
 	tc.log(t, "Testing Hashes")
 
 
 	// TODO:
 	// TODO:

+ 3 - 0
tests/core/crypto/test_core_crypto_kdf.odin

@@ -1,5 +1,6 @@
 package test_core_crypto
 package test_core_crypto
 
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:fmt"
 import "core:testing"
 import "core:testing"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 
 @(test)
 @(test)
 test_kdf :: proc(t: ^testing.T) {
 test_kdf :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing KDFs")
 	tc.log(t, "Testing KDFs")
 
 
 	test_hkdf(t)
 	test_hkdf(t)

+ 3 - 0
tests/core/crypto/test_core_crypto_mac.odin

@@ -1,5 +1,6 @@
 package test_core_crypto
 package test_core_crypto
 
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:fmt"
 import "core:mem"
 import "core:mem"
@@ -14,6 +15,8 @@ import tc "tests:common"
 
 
 @(test)
 @(test)
 test_mac :: proc(t: ^testing.T) {
 test_mac :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing MACs")
 	tc.log(t, "Testing MACs")
 
 
 	test_hmac(t)
 	test_hmac(t)

+ 3 - 0
tests/core/crypto/test_core_crypto_sha3_variants.odin

@@ -1,5 +1,6 @@
 package test_core_crypto
 package test_core_crypto
 
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:fmt"
 import "core:testing"
 import "core:testing"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 
 @(test)
 @(test)
 test_sha3_variants :: proc(t: ^testing.T) {
 test_sha3_variants :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing SHA3 derived functions")
 	tc.log(t, "Testing SHA3 derived functions")
 
 
 	test_shake(t)
 	test_shake(t)

+ 63 - 0
tests/core/crypto/test_crypto_benchmark.odin

@@ -1,5 +1,6 @@
 package test_core_crypto
 package test_core_crypto
 
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:fmt"
 import "core:testing"
 import "core:testing"
@@ -7,6 +8,7 @@ import "core:time"
 
 
 import "core:crypto/chacha20"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
 import "core:crypto/chacha20poly1305"
+import "core:crypto/ed25519"
 import "core:crypto/poly1305"
 import "core:crypto/poly1305"
 import "core:crypto/x25519"
 import "core:crypto/x25519"
 
 
@@ -16,11 +18,14 @@ import tc "tests:common"
 
 
 @(test)
 @(test)
 bench_crypto :: proc(t: ^testing.T) {
 bench_crypto :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	fmt.println("Starting benchmarks:")
 	fmt.println("Starting benchmarks:")
 
 
 	bench_chacha20(t)
 	bench_chacha20(t)
 	bench_poly1305(t)
 	bench_poly1305(t)
 	bench_chacha20poly1305(t)
 	bench_chacha20poly1305(t)
+	bench_ed25519(t)
 	bench_x25519(t)
 	bench_x25519(t)
 }
 }
 
 
@@ -216,6 +221,64 @@ bench_chacha20poly1305 :: proc(t: ^testing.T) {
 	benchmark_print(name, options)
 	benchmark_print(name, options)
 }
 }
 
 
+bench_ed25519 :: proc(t: ^testing.T) {
+	iters :: 10000
+
+	priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
+	priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator)
+	priv_key: ed25519.Private_Key
+	start := time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
+		assert(ok, "private key should deserialize")
+	}
+	elapsed := time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf(
+			"ed25519.private_key_set_bytes: ~%f us/op",
+			time.duration_microseconds(elapsed) / iters,
+		),
+	)
+
+	pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing"
+	pub_key: ed25519.Public_Key
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:])
+		assert(ok, "public key should deserialize")
+	}
+	elapsed = time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf(
+			"ed25519.public_key_set_bytes: ~%f us/op",
+			time.duration_microseconds(elapsed) / iters,
+		),
+	)
+
+	msg := "Got a job for you, 621."
+	sig_bytes: [ed25519.SIGNATURE_SIZE]byte
+	msg_bytes := transmute([]byte)(msg)
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ed25519.sign(&priv_key, msg_bytes, sig_bytes[:])
+	}
+	elapsed = time.since(start)
+	tc.log(t, fmt.tprintf("ed25519.sign: ~%f us/op", time.duration_microseconds(elapsed) / iters))
+
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.verify(&pub_key, msg_bytes, sig_bytes[:])
+		assert(ok, "signature should validate")
+	}
+	elapsed = time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf("ed25519.verify: ~%f us/op", time.duration_microseconds(elapsed) / iters),
+	)
+}
+
 bench_x25519 :: proc(t: ^testing.T) {
 bench_x25519 :: proc(t: ^testing.T) {
 	point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
 	point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
 	scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
 	scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"