Browse Source

Merge pull request #3635 from Yawning/feature/aes

core/crypto: Add AES
Jeroen van Rijn 1 year ago
parent
commit
c07a46abc9

+ 28 - 0
core/crypto/_aes/aes.odin

@@ -0,0 +1,28 @@
+package _aes
+
+// KEY_SIZE_128 is the AES-128 key size in bytes.
+KEY_SIZE_128 :: 16
+// KEY_SIZE_192 is the AES-192 key size in bytes.
+KEY_SIZE_192 :: 24
+// KEY_SIZE_256 is the AES-256 key size in bytes.
+KEY_SIZE_256 :: 32
+
+// BLOCK_SIZE is the AES block size in bytes.
+BLOCK_SIZE :: 16
+
+// ROUNDS_128 is the number of rounds for AES-128.
+ROUNDS_128 :: 10
+// ROUNDS_192 is the number of rounds for AES-192.
+ROUNDS_192 :: 12
+// ROUNDS_256 is the number of rounds for AES-256.
+ROUNDS_256 :: 14
+
+// GHASH_KEY_SIZE is the GHASH key size in bytes.
+GHASH_KEY_SIZE :: 16
+// GHASH_BLOCK_SIZE is the GHASH block size in bytes.
+GHASH_BLOCK_SIZE :: 16
+// GHASH_TAG_SIZE is the GHASH tag size in bytes.
+GHASH_TAG_SIZE :: 16
+
+// RCON is the AES keyschedule round constants.
+RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}

+ 96 - 0
core/crypto/_aes/ct64/api.odin

@@ -0,0 +1,96 @@
+package aes_ct64
+
+import "base:intrinsics"
+import "core:mem"
+
+STRIDE :: 4
+
+// Context is a keyed AES (ECB) instance.
+Context :: struct {
+	_sk_exp:         [120]u64,
+	_num_rounds:     int,
+	_is_initialized: bool,
+}
+
+// init initializes a context for AES with the provided key.
+init :: proc(ctx: ^Context, key: []byte) {
+	skey: [30]u64 = ---
+
+	ctx._num_rounds = keysched(skey[:], key)
+	skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
+	ctx._is_initialized = true
+}
+
+// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
+encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
+	assert(ctx._is_initialized)
+
+	q: [8]u64
+	load_blockx1(&q, src)
+	_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+	store_blockx1(dst, &q)
+}
+
+// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
+decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
+	assert(ctx._is_initialized)
+
+	q: [8]u64
+	load_blockx1(&q, src)
+	_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+	store_blockx1(dst, &q)
+}
+
+// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
+encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
+	assert(ctx._is_initialized)
+
+	q: [8]u64 = ---
+	src, dst := src, dst
+
+	n := len(src)
+	for n > 4 {
+		load_blocks(&q, src[0:4])
+		_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+		store_blocks(dst[0:4], &q)
+
+		src = src[4:]
+		dst = dst[4:]
+		n -= 4
+	}
+	if n > 0 {
+		load_blocks(&q, src)
+		_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+		store_blocks(dst, &q)
+	}
+}
+
+// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
+decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
+	assert(ctx._is_initialized)
+
+	q: [8]u64 = ---
+	src, dst := src, dst
+
+	n := len(src)
+	for n > 4 {
+		load_blocks(&q, src[0:4])
+		_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+		store_blocks(dst[0:4], &q)
+
+		src = src[4:]
+		dst = dst[4:]
+		n -= 4
+	}
+	if n > 0 {
+		load_blocks(&q, src)
+		_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+		store_blocks(dst, &q)
+	}
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	mem.zero_explicit(ctx, size_of(ctx))
+}

+ 265 - 0
core/crypto/_aes/ct64/ct64.odin

@@ -0,0 +1,265 @@
+// Copyright (c) 2016 Thomas Pornin <[email protected]>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//   1. Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+// Bitsliced AES for 64-bit general purpose (integer) registers.  Each
+// invocation will process up to 4 blocks at a time.  This implementation
+// is derived from the BearSSL ct64 code, and distributed under a 1-clause
+// BSD license with permission from the original author.
+//
+// WARNING: "hic sunt dracones"
+//
+// This package also deliberately exposes enough internals to be able to
+// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
+// allow the implementation of non-AES primitives that use the AES round
+// function such as AEGIS and Deoxys-II.  This should ONLY be done when
+// implementing something other than AES itself.
+
+sub_bytes :: proc "contextless" (q: ^[8]u64) {
+	// This S-box implementation is a straightforward translation of
+	// the circuit described by Boyar and Peralta in "A new
+	// combinational logic minimization technique with applications
+	// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	//
+	// Note that variables x* (input) and s* (output) are numbered
+	// in "reverse" order (x0 is the high bit, x7 is the low bit).
+
+	x0 := q[7]
+	x1 := q[6]
+	x2 := q[5]
+	x3 := q[4]
+	x4 := q[3]
+	x5 := q[2]
+	x6 := q[1]
+	x7 := q[0]
+
+	// Top linear transformation.
+	y14 := x3 ~ x5
+	y13 := x0 ~ x6
+	y9 := x0 ~ x3
+	y8 := x0 ~ x5
+	t0 := x1 ~ x2
+	y1 := t0 ~ x7
+	y4 := y1 ~ x3
+	y12 := y13 ~ y14
+	y2 := y1 ~ x0
+	y5 := y1 ~ x6
+	y3 := y5 ~ y8
+	t1 := x4 ~ y12
+	y15 := t1 ~ x5
+	y20 := t1 ~ x1
+	y6 := y15 ~ x7
+	y10 := y15 ~ t0
+	y11 := y20 ~ y9
+	y7 := x7 ~ y11
+	y17 := y10 ~ y11
+	y19 := y10 ~ y8
+	y16 := t0 ~ y11
+	y21 := y13 ~ y16
+	y18 := x0 ~ y16
+
+	// Non-linear section.
+	t2 := y12 & y15
+	t3 := y3 & y6
+	t4 := t3 ~ t2
+	t5 := y4 & x7
+	t6 := t5 ~ t2
+	t7 := y13 & y16
+	t8 := y5 & y1
+	t9 := t8 ~ t7
+	t10 := y2 & y7
+	t11 := t10 ~ t7
+	t12 := y9 & y11
+	t13 := y14 & y17
+	t14 := t13 ~ t12
+	t15 := y8 & y10
+	t16 := t15 ~ t12
+	t17 := t4 ~ t14
+	t18 := t6 ~ t16
+	t19 := t9 ~ t14
+	t20 := t11 ~ t16
+	t21 := t17 ~ y20
+	t22 := t18 ~ y19
+	t23 := t19 ~ y21
+	t24 := t20 ~ y18
+
+	t25 := t21 ~ t22
+	t26 := t21 & t23
+	t27 := t24 ~ t26
+	t28 := t25 & t27
+	t29 := t28 ~ t22
+	t30 := t23 ~ t24
+	t31 := t22 ~ t26
+	t32 := t31 & t30
+	t33 := t32 ~ t24
+	t34 := t23 ~ t33
+	t35 := t27 ~ t33
+	t36 := t24 & t35
+	t37 := t36 ~ t34
+	t38 := t27 ~ t36
+	t39 := t29 & t38
+	t40 := t25 ~ t39
+
+	t41 := t40 ~ t37
+	t42 := t29 ~ t33
+	t43 := t29 ~ t40
+	t44 := t33 ~ t37
+	t45 := t42 ~ t41
+	z0 := t44 & y15
+	z1 := t37 & y6
+	z2 := t33 & x7
+	z3 := t43 & y16
+	z4 := t40 & y1
+	z5 := t29 & y7
+	z6 := t42 & y11
+	z7 := t45 & y17
+	z8 := t41 & y10
+	z9 := t44 & y12
+	z10 := t37 & y3
+	z11 := t33 & y4
+	z12 := t43 & y13
+	z13 := t40 & y5
+	z14 := t29 & y2
+	z15 := t42 & y9
+	z16 := t45 & y14
+	z17 := t41 & y8
+
+	// Bottom linear transformation.
+	t46 := z15 ~ z16
+	t47 := z10 ~ z11
+	t48 := z5 ~ z13
+	t49 := z9 ~ z10
+	t50 := z2 ~ z12
+	t51 := z2 ~ z5
+	t52 := z7 ~ z8
+	t53 := z0 ~ z3
+	t54 := z6 ~ z7
+	t55 := z16 ~ z17
+	t56 := z12 ~ t48
+	t57 := t50 ~ t53
+	t58 := z4 ~ t46
+	t59 := z3 ~ t54
+	t60 := t46 ~ t57
+	t61 := z14 ~ t57
+	t62 := t52 ~ t58
+	t63 := t49 ~ t58
+	t64 := z4 ~ t59
+	t65 := t61 ~ t62
+	t66 := z1 ~ t63
+	s0 := t59 ~ t63
+	s6 := t56 ~ ~t62
+	s7 := t48 ~ ~t60
+	t67 := t64 ~ t65
+	s3 := t53 ~ t66
+	s4 := t51 ~ t66
+	s5 := t47 ~ t65
+	s1 := t64 ~ ~s3
+	s2 := t55 ~ ~t67
+
+	q[7] = s0
+	q[6] = s1
+	q[5] = s2
+	q[4] = s3
+	q[3] = s4
+	q[2] = s5
+	q[1] = s6
+	q[0] = s7
+}
+
+orthogonalize :: proc "contextless" (q: ^[8]u64) {
+	CL2 :: 0x5555555555555555
+	CH2 :: 0xAAAAAAAAAAAAAAAA
+	q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
+	q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
+	q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
+	q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)
+
+	CL4 :: 0x3333333333333333
+	CH4 :: 0xCCCCCCCCCCCCCCCC
+	q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
+	q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
+	q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
+	q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)
+
+	CL8 :: 0x0F0F0F0F0F0F0F0F
+	CH8 :: 0xF0F0F0F0F0F0F0F0
+	q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
+	q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
+	q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
+	q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
+}
+
+@(require_results)
+interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
+	if len(w) < 4 {
+		intrinsics.trap()
+	}
+	x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
+	x0 |= (x0 << 16)
+	x1 |= (x1 << 16)
+	x2 |= (x2 << 16)
+	x3 |= (x3 << 16)
+	x0 &= 0x0000FFFF0000FFFF
+	x1 &= 0x0000FFFF0000FFFF
+	x2 &= 0x0000FFFF0000FFFF
+	x3 &= 0x0000FFFF0000FFFF
+	x0 |= (x0 << 8)
+	x1 |= (x1 << 8)
+	x2 |= (x2 << 8)
+	x3 |= (x3 << 8)
+	x0 &= 0x00FF00FF00FF00FF
+	x1 &= 0x00FF00FF00FF00FF
+	x2 &= 0x00FF00FF00FF00FF
+	x3 &= 0x00FF00FF00FF00FF
+	q0 = x0 | (x2 << 8)
+	q1 = x1 | (x3 << 8)
+	return
+}
+
+@(require_results)
+interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
+	x0 := q0 & 0x00FF00FF00FF00FF
+	x1 := q1 & 0x00FF00FF00FF00FF
+	x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
+	x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
+	x0 |= (x0 >> 8)
+	x1 |= (x1 >> 8)
+	x2 |= (x2 >> 8)
+	x3 |= (x3 >> 8)
+	x0 &= 0x0000FFFF0000FFFF
+	x1 &= 0x0000FFFF0000FFFF
+	x2 &= 0x0000FFFF0000FFFF
+	x3 &= 0x0000FFFF0000FFFF
+	w0 = u32(x0) | u32(x0 >> 16)
+	w1 = u32(x1) | u32(x1 >> 16)
+	w2 = u32(x2) | u32(x2 >> 16)
+	w3 = u32(x3) | u32(x3 >> 16)
+	return
+}
+
+@(private)
+rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
+	return (x << 32) | (x >> 32)
+}

+ 135 - 0
core/crypto/_aes/ct64/ct64_dec.odin

@@ -0,0 +1,135 @@
+// Copyright (c) 2016 Thomas Pornin <[email protected]>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//   1. Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+inv_sub_bytes :: proc "contextless" (q: ^[8]u64) {
+	// AES S-box is:
+	//   S(x) = A(I(x)) ^ 0x63
+	// where I() is inversion in GF(256), and A() is a linear
+	// transform (0 is formally defined to be its own inverse).
+	// Since inversion is an involution, the inverse S-box can be
+	// computed from the S-box as:
+	//   iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+	// where B() is the inverse of A(). Indeed, for any y in GF(256):
+	//   iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+	//
+	// Note: we reuse the implementation of the forward S-box,
+	// instead of duplicating it here, so that total code size is
+	// lower. By merging the B() transforms into the S-box circuit
+	// we could make faster CBC decryption, but CBC decryption is
+	// already quite faster than CBC encryption because we can
+	// process four blocks in parallel.
+
+	q0 := ~q[0]
+	q1 := ~q[1]
+	q2 := q[2]
+	q3 := q[3]
+	q4 := q[4]
+	q5 := ~q[5]
+	q6 := ~q[6]
+	q7 := q[7]
+	q[7] = q1 ~ q4 ~ q6
+	q[6] = q0 ~ q3 ~ q5
+	q[5] = q7 ~ q2 ~ q4
+	q[4] = q6 ~ q1 ~ q3
+	q[3] = q5 ~ q0 ~ q2
+	q[2] = q4 ~ q7 ~ q1
+	q[1] = q3 ~ q6 ~ q0
+	q[0] = q2 ~ q5 ~ q7
+
+	sub_bytes(q)
+
+	q0 = ~q[0]
+	q1 = ~q[1]
+	q2 = q[2]
+	q3 = q[3]
+	q4 = q[4]
+	q5 = ~q[5]
+	q6 = ~q[6]
+	q7 = q[7]
+	q[7] = q1 ~ q4 ~ q6
+	q[6] = q0 ~ q3 ~ q5
+	q[5] = q7 ~ q2 ~ q4
+	q[4] = q6 ~ q1 ~ q3
+	q[3] = q5 ~ q0 ~ q2
+	q[2] = q4 ~ q7 ~ q1
+	q[1] = q3 ~ q6 ~ q0
+	q[0] = q2 ~ q5 ~ q7
+}
+
+inv_shift_rows :: proc "contextless" (q: ^[8]u64) {
+	for x, i in q {
+		q[i] =
+			(x & 0x000000000000FFFF) |
+			((x & 0x000000000FFF0000) << 4) |
+			((x & 0x00000000F0000000) >> 12) |
+			((x & 0x000000FF00000000) << 8) |
+			((x & 0x0000FF0000000000) >> 8) |
+			((x & 0x000F000000000000) << 12) |
+			((x & 0xFFF0000000000000) >> 4)
+	}
+}
+
+inv_mix_columns :: proc "contextless" (q: ^[8]u64) {
+	q0 := q[0]
+	q1 := q[1]
+	q2 := q[2]
+	q3 := q[3]
+	q4 := q[4]
+	q5 := q[5]
+	q6 := q[6]
+	q7 := q[7]
+	r0 := (q0 >> 16) | (q0 << 48)
+	r1 := (q1 >> 16) | (q1 << 48)
+	r2 := (q2 >> 16) | (q2 << 48)
+	r3 := (q3 >> 16) | (q3 << 48)
+	r4 := (q4 >> 16) | (q4 << 48)
+	r5 := (q5 >> 16) | (q5 << 48)
+	r6 := (q6 >> 16) | (q6 << 48)
+	r7 := (q7 >> 16) | (q7 << 48)
+
+	q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5)
+	q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6)
+	q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7)
+	q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7)
+	q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6)
+	q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7)
+	q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7)
+	q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7)
+}
+
+@(private)
+_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
+	add_round_key(q, skey[num_rounds << 3:])
+	for u := num_rounds - 1; u > 0; u -= 1 {
+		inv_shift_rows(q)
+		inv_sub_bytes(q)
+		add_round_key(q, skey[u << 3:])
+		inv_mix_columns(q)
+	}
+	inv_shift_rows(q)
+	inv_sub_bytes(q)
+	add_round_key(q, skey)
+}

+ 95 - 0
core/crypto/_aes/ct64/ct64_enc.odin

@@ -0,0 +1,95 @@
+// Copyright (c) 2016 Thomas Pornin <[email protected]>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//   1. Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
+	if len(sk) < 8 {
+		intrinsics.trap()
+	}
+
+	q[0] ~= sk[0]
+	q[1] ~= sk[1]
+	q[2] ~= sk[2]
+	q[3] ~= sk[3]
+	q[4] ~= sk[4]
+	q[5] ~= sk[5]
+	q[6] ~= sk[6]
+	q[7] ~= sk[7]
+}
+
+shift_rows :: proc "contextless" (q: ^[8]u64) {
+	for x, i in q {
+		q[i] =
+			(x & 0x000000000000FFFF) |
+			((x & 0x00000000FFF00000) >> 4) |
+			((x & 0x00000000000F0000) << 12) |
+			((x & 0x0000FF0000000000) >> 8) |
+			((x & 0x000000FF00000000) << 8) |
+			((x & 0xF000000000000000) >> 12) |
+			((x & 0x0FFF000000000000) << 4)
+	}
+}
+
+mix_columns :: proc "contextless" (q: ^[8]u64) {
+	q0 := q[0]
+	q1 := q[1]
+	q2 := q[2]
+	q3 := q[3]
+	q4 := q[4]
+	q5 := q[5]
+	q6 := q[6]
+	q7 := q[7]
+	r0 := (q0 >> 16) | (q0 << 48)
+	r1 := (q1 >> 16) | (q1 << 48)
+	r2 := (q2 >> 16) | (q2 << 48)
+	r3 := (q3 >> 16) | (q3 << 48)
+	r4 := (q4 >> 16) | (q4 << 48)
+	r5 := (q5 >> 16) | (q5 << 48)
+	r6 := (q6 >> 16) | (q6 << 48)
+	r7 := (q7 >> 16) | (q7 << 48)
+
+	q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0)
+	q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1)
+	q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2)
+	q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3)
+	q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4)
+	q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5)
+	q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6)
+	q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7)
+}
+
+@(private)
+_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
+	add_round_key(q, skey)
+	for u in 1 ..< num_rounds {
+		sub_bytes(q)
+		shift_rows(q)
+		mix_columns(q)
+		add_round_key(q, skey[u << 3:])
+	}
+	sub_bytes(q)
+	shift_rows(q)
+	add_round_key(q, skey[num_rounds << 3:])
+}

+ 179 - 0
core/crypto/_aes/ct64/ct64_keysched.odin

@@ -0,0 +1,179 @@
+// Copyright (c) 2016 Thomas Pornin <[email protected]>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//   1. Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+import "core:crypto/_aes"
+import "core:encoding/endian"
+import "core:mem"
+
+@(private, require_results)
+sub_word :: proc "contextless" (x: u32) -> u32 {
+	q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0}
+
+	orthogonalize(&q)
+	sub_bytes(&q)
+	orthogonalize(&q)
+	ret := u32(q[0])
+
+	mem.zero_explicit(&q[0], size_of(u64))
+
+	return ret
+}
+
+@(private, require_results)
+keysched :: proc(comp_skey: []u64, key: []byte) -> int {
+	num_rounds, key_len := 0, len(key)
+	switch key_len {
+	case _aes.KEY_SIZE_128:
+		num_rounds = _aes.ROUNDS_128
+	case _aes.KEY_SIZE_192:
+		num_rounds = _aes.ROUNDS_192
+	case _aes.KEY_SIZE_256:
+		num_rounds = _aes.ROUNDS_256
+	case:
+		panic("crypto/aes: invalid AES key size")
+	}
+
+	skey: [60]u32 = ---
+	nk, nkf := key_len >> 2, (num_rounds + 1) << 2
+	for i in 0 ..< nk {
+		skey[i] = endian.unchecked_get_u32le(key[i << 2:])
+	}
+	tmp := skey[(key_len >> 2) - 1]
+	for i, j, k := nk, 0, 0; i < nkf; i += 1 {
+		if j == 0 {
+			tmp = (tmp << 24) | (tmp >> 8)
+			tmp = sub_word(tmp) ~ u32(_aes.RCON[k])
+		} else if nk > 6 && j == 4 {
+			tmp = sub_word(tmp)
+		}
+		tmp ~= skey[i - nk]
+		skey[i] = tmp
+		if j += 1; j == nk {
+			j = 0
+			k += 1
+		}
+	}
+
+	q: [8]u64 = ---
+	for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
+		q[0], q[4] = interleave_in(skey[i:])
+		q[1] = q[0]
+		q[2] = q[0]
+		q[3] = q[0]
+		q[5] = q[4]
+		q[6] = q[4]
+		q[7] = q[4]
+		orthogonalize(&q)
+		comp_skey[j + 0] =
+			(q[0] & 0x1111111111111111) |
+			(q[1] & 0x2222222222222222) |
+			(q[2] & 0x4444444444444444) |
+			(q[3] & 0x8888888888888888)
+		comp_skey[j + 1] =
+			(q[4] & 0x1111111111111111) |
+			(q[5] & 0x2222222222222222) |
+			(q[6] & 0x4444444444444444) |
+			(q[7] & 0x8888888888888888)
+	}
+
+	mem.zero_explicit(&skey, size_of(skey))
+	mem.zero_explicit(&q, size_of(q))
+
+	return num_rounds
+}
+
+@(private)
+skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
+	n := (num_rounds + 1) << 1
+	for u, v := 0, 0; u < n; u, v = u + 1, v + 4 {
+		x0 := comp_skey[u]
+		x1, x2, x3 := x0, x0, x0
+		x0 &= 0x1111111111111111
+		x1 &= 0x2222222222222222
+		x2 &= 0x4444444444444444
+		x3 &= 0x8888888888888888
+		x1 >>= 1
+		x2 >>= 2
+		x3 >>= 3
+		skey[v + 0] = (x0 << 4) - x0
+		skey[v + 1] = (x1 << 4) - x1
+		skey[v + 2] = (x2 << 4) - x2
+		skey[v + 3] = (x3 << 4) - x3
+	}
+}
+
+orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
+	if len(qq) < 8 || len(key) != 16 {
+		intrinsics.trap()
+	}
+
+	skey: [4]u32 = ---
+	skey[0] = endian.unchecked_get_u32le(key[0:])
+	skey[1] = endian.unchecked_get_u32le(key[4:])
+	skey[2] = endian.unchecked_get_u32le(key[8:])
+	skey[3] = endian.unchecked_get_u32le(key[12:])
+
+	q: [8]u64 = ---
+	q[0], q[4] = interleave_in(skey[:])
+	q[1] = q[0]
+	q[2] = q[0]
+	q[3] = q[0]
+	q[5] = q[4]
+	q[6] = q[4]
+	q[7] = q[4]
+	orthogonalize(&q)
+
+	comp_skey: [2]u64 = ---
+	comp_skey[0] =
+		(q[0] & 0x1111111111111111) |
+		(q[1] & 0x2222222222222222) |
+		(q[2] & 0x4444444444444444) |
+		(q[3] & 0x8888888888888888)
+	comp_skey[1] =
+		(q[4] & 0x1111111111111111) |
+		(q[5] & 0x2222222222222222) |
+		(q[6] & 0x4444444444444444) |
+		(q[7] & 0x8888888888888888)
+
+	for x, u in comp_skey {
+		x0 := x
+		x1, x2, x3 := x0, x0, x0
+		x0 &= 0x1111111111111111
+		x1 &= 0x2222222222222222
+		x2 &= 0x4444444444444444
+		x3 &= 0x8888888888888888
+		x1 >>= 1
+		x2 >>= 2
+		x3 >>= 3
+		qq[u * 4 + 0] = (x0 << 4) - x0
+		qq[u * 4 + 1] = (x1 << 4) - x1
+		qq[u * 4 + 2] = (x2 << 4) - x2
+		qq[u * 4 + 3] = (x3 << 4) - x3
+	}
+
+	mem.zero_explicit(&skey, size_of(skey))
+	mem.zero_explicit(&q, size_of(q))
+	mem.zero_explicit(&comp_skey, size_of(comp_skey))
+}

+ 136 - 0
core/crypto/_aes/ct64/ghash.odin

@@ -0,0 +1,136 @@
+// Copyright (c) 2016 Thomas Pornin <[email protected]>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//   1. Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+import "core:crypto/_aes"
+import "core:encoding/endian"
+
+@(private = "file")
+bmul64 :: proc "contextless" (x, y: u64) -> u64 {
+	x0 := x & 0x1111111111111111
+	x1 := x & 0x2222222222222222
+	x2 := x & 0x4444444444444444
+	x3 := x & 0x8888888888888888
+	y0 := y & 0x1111111111111111
+	y1 := y & 0x2222222222222222
+	y2 := y & 0x4444444444444444
+	y3 := y & 0x8888888888888888
+	z0 := (x0 * y0) ~ (x1 * y3) ~ (x2 * y2) ~ (x3 * y1)
+	z1 := (x0 * y1) ~ (x1 * y0) ~ (x2 * y3) ~ (x3 * y2)
+	z2 := (x0 * y2) ~ (x1 * y1) ~ (x2 * y0) ~ (x3 * y3)
+	z3 := (x0 * y3) ~ (x1 * y2) ~ (x2 * y1) ~ (x3 * y0)
+	z0 &= 0x1111111111111111
+	z1 &= 0x2222222222222222
+	z2 &= 0x4444444444444444
+	z3 &= 0x8888888888888888
+	return z0 | z1 | z2 | z3
+}
+
+@(private = "file")
+rev64 :: proc "contextless" (x: u64) -> u64 {
+	x := x
+	x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555)
+	x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333)
+	x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F)
+	x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF)
+	x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF)
+	return (x << 32) | (x >> 32)
+}
+
+// ghash calculates the GHASH of data, with the key `key`, and input `dst`
+// and `data`, and stores the resulting digest in `dst`.
+//
+// Note: `dst` is both an input and an output, to support easy implementation
+// of GCM.
+ghash :: proc "contextless" (dst, key, data: []byte) {
+	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
+		intrinsics.trap()
+	}
+
+	buf := data
+	l := len(buf)
+
+	y1 := endian.unchecked_get_u64be(dst[0:])
+	y0 := endian.unchecked_get_u64be(dst[8:])
+	h1 := endian.unchecked_get_u64be(key[0:])
+	h0 := endian.unchecked_get_u64be(key[8:])
+	h0r := rev64(h0)
+	h1r := rev64(h1)
+	h2 := h0 ~ h1
+	h2r := h0r ~ h1r
+
+	src: []byte
+	for l > 0 {
+		if l >= _aes.GHASH_BLOCK_SIZE {
+			src = buf
+			buf = buf[_aes.GHASH_BLOCK_SIZE:]
+			l -= _aes.GHASH_BLOCK_SIZE
+		} else {
+			tmp: [_aes.GHASH_BLOCK_SIZE]byte
+			copy(tmp[:], buf)
+			src = tmp[:]
+			l = 0
+		}
+		y1 ~= endian.unchecked_get_u64be(src)
+		y0 ~= endian.unchecked_get_u64be(src[8:])
+
+		y0r := rev64(y0)
+		y1r := rev64(y1)
+		y2 := y0 ~ y1
+		y2r := y0r ~ y1r
+
+		z0 := bmul64(y0, h0)
+		z1 := bmul64(y1, h1)
+		z2 := bmul64(y2, h2)
+		z0h := bmul64(y0r, h0r)
+		z1h := bmul64(y1r, h1r)
+		z2h := bmul64(y2r, h2r)
+		z2 ~= z0 ~ z1
+		z2h ~= z0h ~ z1h
+		z0h = rev64(z0h) >> 1
+		z1h = rev64(z1h) >> 1
+		z2h = rev64(z2h) >> 1
+
+		v0 := z0
+		v1 := z0h ~ z2
+		v2 := z1 ~ z2h
+		v3 := z1h
+
+		v3 = (v3 << 1) | (v2 >> 63)
+		v2 = (v2 << 1) | (v1 >> 63)
+		v1 = (v1 << 1) | (v0 >> 63)
+		v0 = (v0 << 1)
+
+		v2 ~= v0 ~ (v0 >> 1) ~ (v0 >> 2) ~ (v0 >> 7)
+		v1 ~= (v0 << 63) ~ (v0 << 62) ~ (v0 << 57)
+		v3 ~= v1 ~ (v1 >> 1) ~ (v1 >> 2) ~ (v1 >> 7)
+		v2 ~= (v1 << 63) ~ (v1 << 62) ~ (v1 << 57)
+
+		y0 = v2
+		y1 = v3
+	}
+
+	endian.unchecked_put_u64be(dst[0:], y1)
+	endian.unchecked_put_u64be(dst[8:], y0)
+}

+ 75 - 0
core/crypto/_aes/ct64/helpers.odin

@@ -0,0 +1,75 @@
+package aes_ct64
+
+import "base:intrinsics"
+import "core:crypto/_aes"
+import "core:encoding/endian"
+
+load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
+	if len(src) != _aes.BLOCK_SIZE {
+		intrinsics.trap()
+	}
+
+	w: [4]u32 = ---
+	w[0] = endian.unchecked_get_u32le(src[0:])
+	w[1] = endian.unchecked_get_u32le(src[4:])
+	w[2] = endian.unchecked_get_u32le(src[8:])
+	w[3] = endian.unchecked_get_u32le(src[12:])
+	q[0], q[4] = interleave_in(w[:])
+	orthogonalize(q)
+}
+
+store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
+	if len(dst) != _aes.BLOCK_SIZE {
+		intrinsics.trap()
+	}
+
+	orthogonalize(q)
+	w0, w1, w2, w3 := interleave_out(q[0], q[4])
+	endian.unchecked_put_u32le(dst[0:], w0)
+	endian.unchecked_put_u32le(dst[4:], w1)
+	endian.unchecked_put_u32le(dst[8:], w2)
+	endian.unchecked_put_u32le(dst[12:], w3)
+}
+
+load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
+	if n := len(src); n > STRIDE || n == 0 {
+		intrinsics.trap()
+	}
+
+	w: [4]u32 = ---
+	for s, i in src {
+		if len(s) != _aes.BLOCK_SIZE {
+			intrinsics.trap()
+		}
+
+		w[0] = endian.unchecked_get_u32le(s[0:])
+		w[1] = endian.unchecked_get_u32le(s[4:])
+		w[2] = endian.unchecked_get_u32le(s[8:])
+		w[3] = endian.unchecked_get_u32le(s[12:])
+		q[i], q[i + 4] = interleave_in(w[:])
+	}
+	orthogonalize(q)
+}
+
+store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
+	if n := len(dst); n > STRIDE || n == 0 {
+		intrinsics.trap()
+	}
+
+	orthogonalize(q)
+	for d, i in dst {
+		// Allow storing [0,4] blocks.
+		if d == nil {
+			break
+		}
+		if len(d) != _aes.BLOCK_SIZE {
+			intrinsics.trap()
+		}
+
+		w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
+		endian.unchecked_put_u32le(d[0:], w0)
+		endian.unchecked_put_u32le(d[4:], w1)
+		endian.unchecked_put_u32le(d[8:], w2)
+		endian.unchecked_put_u32le(d[12:], w3)
+	}
+}

+ 22 - 0
core/crypto/aes/aes.odin

@@ -0,0 +1,22 @@
+/*
+package aes implements the AES block cipher and some common modes.
+
+See:
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
+- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
+- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
+*/
+
+package aes
+
+import "core:crypto/_aes"
+
+// KEY_SIZE_128 is the AES-128 key size in bytes.
+KEY_SIZE_128 :: _aes.KEY_SIZE_128
+// KEY_SIZE_192 is the AES-192 key size in bytes.
+KEY_SIZE_192 :: _aes.KEY_SIZE_192
+// KEY_SIZE_256 is the AES-256 key size in bytes.
+KEY_SIZE_256 :: _aes.KEY_SIZE_256
+
+// BLOCK_SIZE is the AES block size in bytes.
+BLOCK_SIZE :: _aes.BLOCK_SIZE

+ 199 - 0
core/crypto/aes/aes_ctr.odin

@@ -0,0 +1,199 @@
+package aes
+
+import "core:crypto/_aes/ct64"
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+// CTR_IV_SIZE is the size of the CTR mode IV in bytes.
+CTR_IV_SIZE :: 16
+
+// Context_CTR is a keyed AES-CTR instance.
+Context_CTR :: struct {
+	_impl:           Context_Impl,
+	_buffer:         [BLOCK_SIZE]byte,
+	_off:            int,
+	_ctr_hi:         u64,
+	_ctr_lo:         u64,
+	_is_initialized: bool,
+}
+
+// init_ctr initializes a Context_CTR with the provided key and IV.
+init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
+	if len(iv) != CTR_IV_SIZE {
+		panic("crypto/aes: invalid CTR IV size")
+	}
+
+	init_impl(&ctx._impl, key, impl)
+	ctx._off = BLOCK_SIZE
+	ctx._ctr_hi = endian.unchecked_get_u64be(iv[0:])
+	ctx._ctr_lo = endian.unchecked_get_u64be(iv[8:])
+	ctx._is_initialized = true
+}
+
+// xor_bytes_ctr XORs each byte in src with bytes taken from the AES-CTR
+// keystream, and writes the resulting output to dst.  dst and src MUST
+// alias exactly or not at all.
+xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
+	assert(ctx._is_initialized)
+
+	// TODO: Enforcing that dst and src alias exactly or not at all
+	// is a good idea, though odd aliasing should be extremely uncommon.
+
+	src, dst := src, dst
+	if dst_len := len(dst); dst_len < len(src) {
+		src = src[:dst_len]
+	}
+
+	for remaining := len(src); remaining > 0; {
+		// Process multiple blocks at once
+		if ctx._off == BLOCK_SIZE {
+			if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
+				direct_bytes := nr_blocks * BLOCK_SIZE
+				ctr_blocks(ctx, dst, src, nr_blocks)
+				remaining -= direct_bytes
+				if remaining == 0 {
+					return
+				}
+				dst = dst[direct_bytes:]
+				src = src[direct_bytes:]
+			}
+
+			// If there is a partial block, generate and buffer 1 block
+			// worth of keystream.
+			ctr_blocks(ctx, ctx._buffer[:], nil, 1)
+			ctx._off = 0
+		}
+
+		// Process partial blocks from the buffered keystream.
+		to_xor := min(BLOCK_SIZE - ctx._off, remaining)
+		buffered_keystream := ctx._buffer[ctx._off:]
+		for i := 0; i < to_xor; i = i + 1 {
+			dst[i] = buffered_keystream[i] ~ src[i]
+		}
+		ctx._off += to_xor
+		dst = dst[to_xor:]
+		src = src[to_xor:]
+		remaining -= to_xor
+	}
+}
+
+// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
+keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
+	assert(ctx._is_initialized)
+
+	dst := dst
+	for remaining := len(dst); remaining > 0; {
+		// Process multiple blocks at once
+		if ctx._off == BLOCK_SIZE {
+			if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
+				direct_bytes := nr_blocks * BLOCK_SIZE
+				ctr_blocks(ctx, dst, nil, nr_blocks)
+				remaining -= direct_bytes
+				if remaining == 0 {
+					return
+				}
+				dst = dst[direct_bytes:]
+			}
+
+			// If there is a partial block, generate and buffer 1 block
+			// worth of keystream.
+			ctr_blocks(ctx, ctx._buffer[:], nil, 1)
+			ctx._off = 0
+		}
+
+		// Process partial blocks from the buffered keystream.
+		to_copy := min(BLOCK_SIZE - ctx._off, remaining)
+		buffered_keystream := ctx._buffer[ctx._off:]
+		copy(dst[:to_copy], buffered_keystream[:to_copy])
+		ctx._off += to_copy
+		dst = dst[to_copy:]
+		remaining -= to_copy
+	}
+}
+
+// reset_ctr sanitizes the Context_CTR.  The Context_CTR must be
+// re-initialized to be used again.
+reset_ctr :: proc "contextless" (ctx: ^Context_CTR) {
+	reset_impl(&ctx._impl)
+	ctx._off = 0
+	ctx._ctr_hi = 0
+	ctx._ctr_lo = 0
+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
+	ctx._is_initialized = false
+}
+
+@(private)
+ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
+	// Use the optimized hardware implementation if available.
+	if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
+		ctr_blocks_hw(ctx, dst, src, nr_blocks)
+		return
+	}
+
+	// Portable implementation.
+	ct64_inc_ctr := #force_inline proc "contextless" (dst: []byte, hi, lo: u64) -> (u64, u64) {
+		endian.unchecked_put_u64be(dst[0:], hi)
+		endian.unchecked_put_u64be(dst[8:], lo)
+
+		hi, lo := hi, lo
+		carry: u64
+		lo, carry = bits.add_u64(lo, 1, 0)
+		hi, _ = bits.add_u64(hi, 0, carry)
+		return hi, lo
+	}
+
+	impl := &ctx._impl.(ct64.Context)
+	src, dst := src, dst
+	nr_blocks := nr_blocks
+	ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
+
+	tmp: [ct64.STRIDE][BLOCK_SIZE]byte = ---
+	ctrs: [ct64.STRIDE][]byte = ---
+	for i in 0 ..< ct64.STRIDE {
+		ctrs[i] = tmp[i][:]
+	}
+	for nr_blocks > 0 {
+		n := min(ct64.STRIDE, nr_blocks)
+		blocks := ctrs[:n]
+
+		for i in 0 ..< n {
+			ctr_hi, ctr_lo = ct64_inc_ctr(blocks[i], ctr_hi, ctr_lo)
+		}
+		ct64.encrypt_blocks(impl, blocks, blocks)
+
+		xor_blocks(dst, src, blocks)
+
+		if src != nil {
+			src = src[n * BLOCK_SIZE:]
+		}
+		dst = dst[n * BLOCK_SIZE:]
+		nr_blocks -= n
+	}
+
+	// Write back the counter.
+	ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(private)
+xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]byte) {
+	// Note: This would be faster `core:simd` was used, however if
+	// performance of this implementation matters to where that
+	// optimization would be worth it, use chacha20poly1305, or a
+	// CPU that isn't e-waste.
+	if src != nil {
+		#no_bounds_check {
+			for i in 0 ..< len(blocks) {
+				off := i * BLOCK_SIZE
+				for j in 0 ..< BLOCK_SIZE {
+					blocks[i][j] ~= src[off + j]
+				}
+			}
+		}
+	}
+	for i in 0 ..< len(blocks) {
+		copy(dst[i * BLOCK_SIZE:], blocks[i])
+	}
+}

+ 57 - 0
core/crypto/aes/aes_ecb.odin

@@ -0,0 +1,57 @@
+package aes
+
+import "core:crypto/_aes/ct64"
+
+// Context_ECB is a keyed AES-ECB instance.
+//
+// WARNING: Using ECB mode is strongly discouraged unless it is being
+// used to implement higher level constructs.
+Context_ECB :: struct {
+	_impl:           Context_Impl,
+	_is_initialized: bool,
+}
+
+// init_ecb initializes a Context_ECB with the provided key.
+init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
+	init_impl(&ctx._impl, key, impl)
+	ctx._is_initialized = true
+}
+
+// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
+encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
+	assert(ctx._is_initialized)
+
+	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
+		panic("crypto/aes: invalid buffer size(s)")
+	}
+
+	switch &impl in ctx._impl {
+	case ct64.Context:
+		ct64.encrypt_block(&impl, dst, src)
+	case Context_Impl_Hardware:
+		encrypt_block_hw(&impl, dst, src)
+	}
+}
+
+// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
+decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
+	assert(ctx._is_initialized)
+
+	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
+		panic("crypto/aes: invalid buffer size(s)")
+	}
+
+	switch &impl in ctx._impl {
+	case ct64.Context:
+		ct64.decrypt_block(&impl, dst, src)
+	case Context_Impl_Hardware:
+		decrypt_block_hw(&impl, dst, src)
+	}
+}
+
+// reset_ecb sanitizes the Context_ECB.  The Context_ECB must be
+// re-initialized to be used again.
+reset_ecb :: proc "contextless" (ctx: ^Context_ECB) {
+	reset_impl(&ctx._impl)
+	ctx._is_initialized = false
+}

+ 253 - 0
core/crypto/aes/aes_gcm.odin

@@ -0,0 +1,253 @@
+package aes
+
+import "core:crypto"
+import "core:crypto/_aes"
+import "core:crypto/_aes/ct64"
+import "core:encoding/endian"
+import "core:mem"
+
+// GCM_NONCE_SIZE is the size of the GCM nonce in bytes.
+GCM_NONCE_SIZE :: 12
+// GCM_TAG_SIZE is the size of a GCM tag in bytes.
+GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
+
+@(private)
+GCM_A_MAX :: max(u64) / 8 // 2^64 - 1 bits -> bytes
+@(private)
+GCM_P_MAX :: 0xfffffffe0 // 2^39 - 256 bits -> bytes
+
+// Context_GCM is a keyed AES-GCM instance.
+Context_GCM :: struct {
+	_impl:           Context_Impl,
+	_is_initialized: bool,
+}
+
+// init_gcm initializes a Context_GCM with the provided key.
+init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
+	init_impl(&ctx._impl, key, impl)
+	ctx._is_initialized = true
+}
+
+// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
+// with the provided Context_GCM and nonce, stores the output in dst and tag.
+//
+// dst and plaintext MUST alias exactly or not at all.
+seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
+	assert(ctx._is_initialized)
+
+	gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
+	if len(dst) != len(plaintext) {
+		panic("crypto/aes: invalid destination ciphertext size")
+	}
+
+	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
+		gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
+		return
+	}
+
+	h: [_aes.GHASH_KEY_SIZE]byte
+	j0: [_aes.GHASH_BLOCK_SIZE]byte
+	s: [_aes.GHASH_TAG_SIZE]byte
+	init_ghash_ct64(ctx, &h, &j0, nonce)
+
+	// Note: Our GHASH implementation handles appending padding.
+	ct64.ghash(s[:], h[:], aad)
+	gctr_ct64(ctx, dst, &s, plaintext, &h, nonce, true)
+	final_ghash_ct64(&s, &h, &j0, len(aad), len(plaintext))
+	copy(tag, s[:])
+
+	mem.zero_explicit(&h, len(h))
+	mem.zero_explicit(&j0, len(j0))
+}
+
+// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
+// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
+// returning true iff the authentication was successful.  If authentication
+// fails, the destination buffer will be zeroed.
+//
+// dst and plaintext MUST alias exactly or not at all.
+open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
+	assert(ctx._is_initialized)
+
+	gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
+	if len(dst) != len(ciphertext) {
+		panic("crypto/aes: invalid destination plaintext size")
+	}
+
+	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
+		return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
+	}
+
+	h: [_aes.GHASH_KEY_SIZE]byte
+	j0: [_aes.GHASH_BLOCK_SIZE]byte
+	s: [_aes.GHASH_TAG_SIZE]byte
+	init_ghash_ct64(ctx, &h, &j0, nonce)
+
+	ct64.ghash(s[:], h[:], aad)
+	gctr_ct64(ctx, dst, &s, ciphertext, &h, nonce, false)
+	final_ghash_ct64(&s, &h, &j0, len(aad), len(ciphertext))
+
+	ok := crypto.compare_constant_time(s[:], tag) == 1
+	if !ok {
+		mem.zero_explicit(raw_data(dst), len(dst))
+	}
+
+	mem.zero_explicit(&h, len(h))
+	mem.zero_explicit(&j0, len(j0))
+	mem.zero_explicit(&s, len(s))
+
+	return ok
+}
+
+// reset_ctr sanitizes the Context_GCM.  The Context_GCM must be
+// re-initialized to be used again.
+reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
+	reset_impl(&ctx._impl)
+	ctx._is_initialized = false
+}
+
+@(private)
+gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
+	if len(tag) != GCM_TAG_SIZE {
+		panic("crypto/aes: invalid GCM tag size")
+	}
+
+	// The specification supports nonces in the range [1, 2^64) bits
+	// however per NIST SP 800-38D 5.2.1.1:
+	//
+	// > For IVs, it is recommended that implementations restrict support
+	// > to the length of 96 bits, to promote interoperability, efficiency,
+	// > and simplicity of design.
+	if len(nonce) != GCM_NONCE_SIZE {
+		panic("crypto/aes: invalid GCM nonce size")
+	}
+
+	if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
+		panic("crypto/aes: oversized GCM aad")
+	}
+	if text_len := u64(len(text)); text_len > GCM_P_MAX {
+		panic("crypto/aes: oversized GCM src data")
+	}
+}
+
+@(private = "file")
+init_ghash_ct64 :: proc(
+	ctx: ^Context_GCM,
+	h: ^[_aes.GHASH_KEY_SIZE]byte,
+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
+	nonce: []byte,
+) {
+	impl := &ctx._impl.(ct64.Context)
+
+	// 1. Let H = CIPH(k, 0^128)
+	ct64.encrypt_block(impl, h[:], h[:])
+
+	// ECB encrypt j0, so that we can just XOR with the tag.  In theory
+	// this could be processed along with the final GCTR block, to
+	// potentially save a call to AES-ECB, but... just use AES-NI.
+	copy(j0[:], nonce)
+	j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
+	ct64.encrypt_block(impl, j0[:], j0[:])
+}
+
+@(private = "file")
+final_ghash_ct64 :: proc(
+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
+	h: ^[_aes.GHASH_KEY_SIZE]byte,
+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
+	a_len: int,
+	t_len: int,
+) {
+	blk: [_aes.GHASH_BLOCK_SIZE]byte
+	endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
+	endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
+
+	ct64.ghash(s[:], h[:], blk[:])
+	for i in 0 ..< len(s) {
+		s[i] ~= j0[i]
+	}
+}
+
+@(private = "file")
+gctr_ct64 :: proc(
+	ctx: ^Context_GCM,
+	dst: []byte,
+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
+	src: []byte,
+	h: ^[_aes.GHASH_KEY_SIZE]byte,
+	nonce: []byte,
+	is_seal: bool,
+) {
+	ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
+		endian.unchecked_put_u32be(dst[12:], ctr)
+		return ctr + 1
+	}
+
+	// 2. Define a block J_0 as follows:
+	//    if len(IV) = 96, then let J0 = IV || 0^31 || 1
+	//
+	// Note: We only support 96 bit IVs.
+	tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
+	ctrs, blks: [ct64.STRIDE][]byte = ---, ---
+	ctr: u32 = 2
+	for i in 0 ..< ct64.STRIDE {
+		// Setup scratch space for the keystream.
+		blks[i] = tmp2[i][:]
+
+		// Pre-copy the IV to all the counter blocks.
+		ctrs[i] = tmp[i][:]
+		copy(ctrs[i], nonce)
+	}
+
+	// We stitch the GCTR and GHASH operations together, so that only
+	// one pass over the ciphertext is required.
+
+	impl := &ctx._impl.(ct64.Context)
+	src, dst := src, dst
+
+	nr_blocks := len(src) / BLOCK_SIZE
+	for nr_blocks > 0 {
+		n := min(ct64.STRIDE, nr_blocks)
+		l := n * BLOCK_SIZE
+
+		if !is_seal {
+			ct64.ghash(s[:], h[:], src[:l])
+		}
+
+		// The keystream is written to a separate buffer, as we will
+		// reuse the first 96-bits of each counter.
+		for i in 0 ..< n {
+			ctr = ct64_inc_ctr32(ctrs[i], ctr)
+		}
+		ct64.encrypt_blocks(impl, blks[:n], ctrs[:n])
+
+		xor_blocks(dst, src, blks[:n])
+
+		if is_seal {
+			ct64.ghash(s[:], h[:], dst[:l])
+		}
+
+		src = src[l:]
+		dst = dst[l:]
+		nr_blocks -= n
+	}
+	if l := len(src); l > 0 {
+		if !is_seal {
+			ct64.ghash(s[:], h[:], src[:l])
+		}
+
+		ct64_inc_ctr32(ctrs[0], ctr)
+		ct64.encrypt_block(impl, ctrs[0], ctrs[0])
+
+		for i in 0 ..< l {
+			dst[i] = src[i] ~ ctrs[0][i]
+		}
+
+		if is_seal {
+			ct64.ghash(s[:], h[:], dst[:l])
+		}
+	}
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+	mem.zero_explicit(&tmp2, size_of(tmp2))
+}

+ 41 - 0
core/crypto/aes/aes_impl.odin

@@ -0,0 +1,41 @@
+package aes
+
+import "core:crypto/_aes/ct64"
+import "core:mem"
+import "core:reflect"
+
+@(private)
+Context_Impl :: union {
+	ct64.Context,
+	Context_Impl_Hardware,
+}
+
+// Implementation is an AES implementation.  Most callers will not need
+// to use this as the package will automatically select the most performant
+// implementation available (See `is_hardware_accelerated()`).
+Implementation :: enum {
+	Portable,
+	Hardware,
+}
+
+@(private)
+init_impl :: proc(ctx: ^Context_Impl, key: []byte, impl: Implementation) {
+	impl := impl
+	if !is_hardware_accelerated() {
+		impl = .Portable
+	}
+
+	switch impl {
+	case .Portable:
+		reflect.set_union_variant_typeid(ctx^, typeid_of(ct64.Context))
+		ct64.init(&ctx.(ct64.Context), key)
+	case .Hardware:
+		reflect.set_union_variant_typeid(ctx^, typeid_of(Context_Impl_Hardware))
+		init_impl_hw(&ctx.(Context_Impl_Hardware), key)
+	}
+}
+
+@(private)
+reset_impl :: proc "contextless" (ctx: ^Context_Impl) {
+	mem.zero_explicit(ctx, size_of(Context_Impl))
+}

+ 43 - 0
core/crypto/aes/aes_impl_hw_gen.odin

@@ -0,0 +1,43 @@
+package aes
+
+@(private = "file")
+ERR_HW_NOT_SUPPORTED :: "crypto/aes: hardware implementation unsupported"
+
+// is_hardware_accelerated returns true iff hardware accelerated AES
+// is supported.
+is_hardware_accelerated :: proc "contextless" () -> bool {
+	return false
+}
+
+@(private)
+Context_Impl_Hardware :: struct {}
+
+@(private)
+init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
+	panic(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
+	panic(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
+	panic(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
+	panic(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
+	panic(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
+	panic(ERR_HW_NOT_SUPPORTED)
+}

+ 49 - 0
core/simd/x86/aes.odin

@@ -0,0 +1,49 @@
+//+build i386, amd64
+package simd_x86
+
+@(require_results, enable_target_feature = "aes")
+_mm_aesdec :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
+	return aesdec(a, b)
+}
+
+@(require_results, enable_target_feature = "aes")
+_mm_aesdeclast :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
+	return aesdeclast(a, b)
+}
+
+@(require_results, enable_target_feature = "aes")
+_mm_aesenc :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
+	return aesenc(a, b)
+}
+
+@(require_results, enable_target_feature = "aes")
+_mm_aesenclast :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
+	return aesenclast(a, b)
+}
+
+@(require_results, enable_target_feature = "aes")
+_mm_aesimc :: #force_inline proc "c" (a: __m128i) -> __m128i {
+	return aesimc(a)
+}
+
+@(require_results, enable_target_feature = "aes")
+_mm_aeskeygenassist :: #force_inline proc "c" (a: __m128i, $IMM8: u8) -> __m128i {
+	return aeskeygenassist(a, u8(IMM8))
+}
+
+
+@(private, default_calling_convention = "none")
+foreign _ {
+	@(link_name = "llvm.x86.aesni.aesdec")
+	aesdec :: proc(a, b: __m128i) -> __m128i ---
+	@(link_name = "llvm.x86.aesni.aesdeclast")
+	aesdeclast :: proc(a, b: __m128i) -> __m128i ---
+	@(link_name = "llvm.x86.aesni.aesenc")
+	aesenc :: proc(a, b: __m128i) -> __m128i ---
+	@(link_name = "llvm.x86.aesni.aesenclast")
+	aesenclast :: proc(a, b: __m128i) -> __m128i ---
+	@(link_name = "llvm.x86.aesni.aesimc")
+	aesimc :: proc(a: __m128i) -> __m128i ---
+	@(link_name = "llvm.x86.aesni.aeskeygenassist")
+	aeskeygenassist :: proc(a: __m128i, imm8: u8) -> __m128i ---
+}

+ 2 - 0
examples/all/all_main.odin

@@ -25,6 +25,7 @@ import rbtree           "core:container/rbtree"
 import topological_sort "core:container/topological_sort"
 
 import crypto           "core:crypto"
+import aes              "core:crypto/aes"
 import blake2b          "core:crypto/blake2b"
 import blake2s          "core:crypto/blake2s"
 import chacha20         "core:crypto/chacha20"
@@ -150,6 +151,7 @@ _ :: rbtree
 _ :: topological_sort
 _ :: crypto
 _ :: crypto_hash
+_ :: aes
 _ :: blake2b
 _ :: blake2s
 _ :: chacha20

+ 1 - 0
tests/core/crypto/test_core_crypto.odin

@@ -33,6 +33,7 @@ main :: proc() {
 	test_kdf(&t) // After hash/mac tests because those should pass first.
 	test_ecc25519(&t)
 
+	test_aes(&t)
 	test_chacha20(&t)
 	test_chacha20poly1305(&t)
 	test_sha3_variants(&t)

+ 462 - 0
tests/core/crypto/test_core_crypto_aes.odin

@@ -0,0 +1,462 @@
+package test_core_crypto
+
+import "base:runtime"
+import "core:encoding/hex"
+import "core:fmt"
+import "core:testing"
+
+import "core:crypto/aes"
+import "core:crypto/sha2"
+
+import tc "tests:common"
+
+@(test)
+test_aes :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
+	tc.log(t, "Testing AES")
+
+	impls := make([dynamic]aes.Implementation, 0, 2)
+	append(&impls, aes.Implementation.Portable)
+	if aes.is_hardware_accelerated() {
+		append(&impls, aes.Implementation.Hardware)
+	}
+
+	for impl in impls {
+		test_aes_ecb(t, impl)
+		test_aes_ctr(t, impl)
+		test_aes_gcm(t, impl)
+	}
+}
+
+@(test)
+test_aes_ecb :: proc(t: ^testing.T, impl: aes.Implementation) {
+	tc.log(t, fmt.tprintf("Testing AES-ECB/%v", impl))
+
+	test_vectors := []struct {
+		key: string,
+		plaintext: string,
+		ciphertext: string,
+	} {
+		// http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
+		{
+			"2b7e151628aed2a6abf7158809cf4f3c",
+			"6bc1bee22e409f96e93d7e117393172a",
+			"3ad77bb40d7a3660a89ecaf32466ef97",
+		},
+		{
+			"2b7e151628aed2a6abf7158809cf4f3c",
+			"ae2d8a571e03ac9c9eb76fac45af8e51",
+			"f5d3d58503b9699de785895a96fdbaaf",
+		},
+		{
+			"2b7e151628aed2a6abf7158809cf4f3c",
+			"30c81c46a35ce411e5fbc1191a0a52ef",
+			"43b1cd7f598ece23881b00e3ed030688",
+		},
+		{
+			"2b7e151628aed2a6abf7158809cf4f3c",
+			"f69f2445df4f9b17ad2b417be66c3710",
+			"7b0c785e27e8ad3f8223207104725dd4",
+		},
+		{
+			"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+			"6bc1bee22e409f96e93d7e117393172a",
+			"bd334f1d6e45f25ff712a214571fa5cc",
+		},
+		{
+			"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+			"ae2d8a571e03ac9c9eb76fac45af8e51",
+			"974104846d0ad3ad7734ecb3ecee4eef",
+		},
+		{
+			"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+			"30c81c46a35ce411e5fbc1191a0a52ef",
+			"ef7afd2270e2e60adce0ba2face6444e",
+		},
+		{
+			"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+			"f69f2445df4f9b17ad2b417be66c3710",
+			"9a4b41ba738d6c72fb16691603c18e0e",
+		},
+		{
+			"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4",
+			"6bc1bee22e409f96e93d7e117393172a",
+			"f3eed1bdb5d2a03c064b5a7e3db181f8",
+		},
+		{
+			"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4",
+			"ae2d8a571e03ac9c9eb76fac45af8e51",
+			"591ccb10d410ed26dc5ba74a31362870",
+		},
+		{
+			"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4",
+			"30c81c46a35ce411e5fbc1191a0a52ef",
+			"b6ed21b99ca6f4f9f153e7b1beafed1d",
+		},
+		{
+			"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4",
+			"f69f2445df4f9b17ad2b417be66c3710",
+			"23304b7a39f9f3ff067d8d8f9e24ecc7",
+		},
+	}
+	for v, _ in test_vectors {
+		key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator)
+		plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator)
+		ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator)
+
+		ctx: aes.Context_ECB
+		dst: [aes.BLOCK_SIZE]byte
+		aes.init_ecb(&ctx, key, impl)
+
+		aes.encrypt_ecb(&ctx, dst[:], plaintext)
+		dst_str := string(hex.encode(dst[:], context.temp_allocator))
+		tc.expect(
+			t,
+			dst_str == v.ciphertext,
+			fmt.tprintf(
+				"AES-ECB/%v: Expected: %s for encrypt(%s, %s), but got %s instead",
+				impl,
+				v.ciphertext,
+				v.key,
+				v.plaintext,
+				dst_str,
+			),
+		)
+
+		aes.decrypt_ecb(&ctx, dst[:], ciphertext)
+		dst_str = string(hex.encode(dst[:], context.temp_allocator))
+		tc.expect(
+			t,
+			dst_str == v.plaintext,
+			fmt.tprintf(
+				"AES-ECB/%v: Expected: %s for decrypt(%s, %s), but got %s instead",
+				impl,
+				v.plaintext,
+				v.key,
+				v.ciphertext,
+				dst_str,
+			),
+		)
+	}
+}
+
+@(test)
+test_aes_ctr :: proc(t: ^testing.T, impl: aes.Implementation) {
+	tc.log(t, fmt.tprintf("Testing AES-CTR/%v", impl))
+
+	test_vectors := []struct {
+		key: string,
+		iv: string,
+		plaintext: string,
+		ciphertext: string,
+	} {
+		// http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
+		{
+			"2b7e151628aed2a6abf7158809cf4f3c",
+			"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+			"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710",
+			"874d6191b620e3261bef6864990db6ce9806f66b7970fdff8617187bb9fffdff5ae4df3edbd5d35e5b4f09020db03eab1e031dda2fbe03d1792170a0f3009cee",
+		},
+		{
+			"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+			"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+			"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710",
+			"1abc932417521ca24f2b0459fe7e6e0b090339ec0aa6faefd5ccc2c6f4ce8e941e36b26bd1ebc670d1bd1d665620abf74f78a7f6d29809585a97daec58c6b050",
+		},
+		{
+			"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4",
+			"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+			"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710",
+			"601ec313775789a5b7a7f504bbf3d228f443e3ca4d62b59aca84e990cacaf5c52b0930daa23de94ce87017ba2d84988ddfc9c58db67aada613c2dd08457941a6",
+		},
+	}
+	for v, _ in test_vectors {
+		key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator)
+		iv, _ := hex.decode(transmute([]byte)(v.iv), context.temp_allocator)
+		plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator)
+		ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator)
+
+		dst := make([]byte, len(ciphertext), context.temp_allocator)
+
+		ctx: aes.Context_CTR
+		aes.init_ctr(&ctx, key, iv, impl)
+
+		aes.xor_bytes_ctr(&ctx, dst, plaintext)
+
+		dst_str := string(hex.encode(dst[:], context.temp_allocator))
+		tc.expect(
+			t,
+			dst_str == v.ciphertext,
+			fmt.tprintf(
+				"AES-CTR/%v: Expected: %s for encrypt(%s, %s, %s), but got %s instead",
+				impl,
+				v.ciphertext,
+				v.key,
+				v.iv,
+				v.plaintext,
+				dst_str,
+			),
+		)
+	}
+
+	// Incrementally read 1, 2, 3, ..., 2048 bytes of keystream, and
+	// compare the SHA-512/256 digest with a known value.  Results
+	// and testcase taken from a known good implementation.
+
+	tmp := make([]byte, 2048, context.temp_allocator)
+
+	ctx: aes.Context_CTR
+	key: [aes.KEY_SIZE_256]byte
+	nonce: [aes.CTR_IV_SIZE]byte
+	aes.init_ctr(&ctx, key[:], nonce[:])
+
+	h_ctx: sha2.Context_512
+	sha2.init_512_256(&h_ctx)
+
+	for i := 1; i < 2048; i = i + 1 {
+		aes.keystream_bytes_ctr(&ctx, tmp[:i])
+		sha2.update(&h_ctx, tmp[:i])
+	}
+
+	digest: [32]byte
+	sha2.final(&h_ctx, digest[:])
+	digest_str := string(hex.encode(digest[:], context.temp_allocator))
+
+	expected_digest_str := "d4445343afeb9d1237f95b10d00358aed4c1d7d57c9fe480cd0afb5e2ffd448c"
+	tc.expect(
+		t,
+		expected_digest_str == digest_str,
+		fmt.tprintf(
+			"AES-CTR/%v: Expected %s for keystream digest, but got %s instead",
+			impl,
+			expected_digest_str,
+			digest_str,
+		),
+	)
+}
+
+@(test)
+test_aes_gcm :: proc(t: ^testing.T, impl: aes.Implementation) {
+	tc.log(t, fmt.tprintf("Testing AES-GCM/%v", impl))
+
+	// NIST did a reorg of their site, so the source of the test vectors
+	// is only available from an archive.  The commented out tests are
+	// for non-96-bit IVs which our implementation does not support.
+	//
+	// https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf
+	test_vectors := []struct {
+		key: string,
+		iv: string,
+		aad: string,
+		plaintext: string,
+		ciphertext: string,
+		tag: string,
+	} {
+		{
+			"00000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"",
+			"",
+			"58e2fccefa7e3061367f1d57a4e7455a",
+		},
+		{
+			"00000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"00000000000000000000000000000000",
+			"0388dace60b6a392f328c2b971b2fe78",
+			"ab6e47d42cec13bdf53a67b21257bddf",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308",
+			"cafebabefacedbaddecaf888",
+			"",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255",
+			"42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985",
+			"4d5c2af327cd64a62cf35abd2ba6fab4",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308",
+			"cafebabefacedbaddecaf888",
+			"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+			"42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091",
+			"5bc94fbc3221a5db94fae95ae7121a47",
+		},
+		/*
+			{
+				"feffe9928665731c6d6a8f9467308308",
+				"cafebabefacedbad",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598",
+				"3612d2e79e3b0785561be14aaca2fccb",
+			},
+			{
+				"feffe9928665731c6d6a8f9467308308",
+				"9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5",
+				"619cc5aefffe0bfa462af43c1699d050",
+			},
+		*/
+		{
+			"000000000000000000000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"",
+			"",
+			"cd33b28ac773f74ba00ed1f312572435",
+		},
+		{
+			"000000000000000000000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"00000000000000000000000000000000",
+			"98e7247c07f0fe411c267e4384b0f600",
+			"2ff58d80033927ab8ef4d4587514f0fb",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308feffe9928665731c",
+			"cafebabefacedbaddecaf888",
+			"",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255",
+			"3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256",
+			"9924a7c8587336bfb118024db8674a14",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308feffe9928665731c",
+			"cafebabefacedbaddecaf888",
+			"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+			"3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710",
+			"2519498e80f1478f37ba55bd6d27618c",
+		},
+		/*
+			{
+				"feffe9928665731c6d6a8f9467308308feffe9928665731c",
+				"cafebabefacedbad",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7",
+				"65dcc57fcf623a24094fcca40d3533f8",
+			},
+			{
+				"feffe9928665731c6d6a8f9467308308feffe9928665731c",
+				"9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b",
+				"dcf566ff291c25bbb8568fc3d376a6d9",
+			},
+		*/
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"",
+			"",
+			"530f8afbc74536b9a963b4f1c4cb738b",
+		},
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"000000000000000000000000",
+			"",
+			"00000000000000000000000000000000",
+			"cea7403d4d606b6e074ec5d3baf39d18",
+			"d0d1c8a799996bf0265b98b5d48ab919",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308",
+			"cafebabefacedbaddecaf888",
+			"",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255",
+			"522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad",
+			"b094dac5d93471bdec1a502270e3cc6c",
+		},
+		{
+			"feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308",
+			"cafebabefacedbaddecaf888",
+			"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+			"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+			"522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662",
+			"76fc6ece0f4e1768cddf8853bb2d551b",
+		},
+		/*
+			{
+				"feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308",
+				"cafebabefacedbad",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f",
+				"3a337dbf46a792c45e454913fe2ea8f2",
+			},
+			{
+				"feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308",
+				"9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b",
+				"feedfacedeadbeeffeedfacedeadbeefabaddad2",
+				"d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39",
+				"5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f",
+				"a44a8266ee1c8eb0c8b5d4cf5ae9f19a",
+			},
+		*/
+	}
+	for v, _ in test_vectors {
+		key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator)
+		iv, _ := hex.decode(transmute([]byte)(v.iv), context.temp_allocator)
+		aad, _ := hex.decode(transmute([]byte)(v.aad), context.temp_allocator)
+		plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator)
+		ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator)
+		tag, _ := hex.decode(transmute([]byte)(v.tag), context.temp_allocator)
+
+		tag_ := make([]byte, len(tag), context.temp_allocator)
+		dst := make([]byte, len(ciphertext), context.temp_allocator)
+
+		ctx: aes.Context_GCM
+		aes.init_gcm(&ctx, key, impl)
+
+		aes.seal_gcm(&ctx, dst, tag_, iv, aad, plaintext)
+		dst_str := string(hex.encode(dst[:], context.temp_allocator))
+		tag_str := string(hex.encode(tag_[:], context.temp_allocator))
+
+		tc.expect(
+			t,
+			dst_str == v.ciphertext && tag_str == v.tag,
+			fmt.tprintf(
+				"AES-GCM/%v: Expected: (%s, %s) for seal(%s, %s, %s, %s), but got (%s, %s) instead",
+				impl,
+				v.ciphertext,
+				v.tag,
+				v.key,
+				v.iv,
+				v.aad,
+				v.plaintext,
+				dst_str,
+				tag_str,
+			),
+		)
+
+		ok := aes.open_gcm(&ctx, dst, iv, aad, ciphertext, tag)
+		dst_str = string(hex.encode(dst[:], context.temp_allocator))
+
+		tc.expect(
+			t,
+			ok && dst_str == v.plaintext,
+			fmt.tprintf(
+				"AES-GCM/%v: Expected: (%s, true) for open(%s, %s, %s, %s, %s), but got (%s, %s) instead",
+				impl,
+				v.plaintext,
+				v.key,
+				v.iv,
+				v.aad,
+				v.ciphertext,
+				v.tag,
+				dst_str,
+				ok,
+			),
+		)
+	}
+}

+ 60 - 0
tests/core/crypto/test_crypto_benchmark.odin

@@ -6,6 +6,7 @@ import "core:fmt"
 import "core:testing"
 import "core:time"
 
+import "core:crypto/aes"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
 import "core:crypto/ed25519"
@@ -25,6 +26,7 @@ bench_crypto :: proc(t: ^testing.T) {
 	bench_chacha20(t)
 	bench_poly1305(t)
 	bench_chacha20poly1305(t)
+	bench_aes256_gcm(t)
 	bench_ed25519(t)
 	bench_x25519(t)
 }
@@ -134,6 +136,26 @@ _benchmark_chacha20poly1305 :: proc(
 	return nil
 }
 
+_benchmark_aes256_gcm :: proc(
+	options: ^time.Benchmark_Options,
+	allocator := context.allocator,
+) -> (
+	err: time.Benchmark_Error,
+) {
+	buf := options.input
+	nonce: [aes.GCM_NONCE_SIZE]byte
+	tag: [aes.GCM_TAG_SIZE]byte = ---
+
+	ctx := transmute(^aes.Context_GCM)context.user_ptr
+
+	for _ in 0 ..= options.rounds {
+		aes.seal_gcm(ctx, buf, tag[:], nonce[:], nil, buf)
+	}
+	options.count = options.rounds
+	options.processed = options.rounds * options.bytes
+	return nil
+}
+
 benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) {
 	fmt.printf(
 		"\t[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
@@ -221,6 +243,44 @@ bench_chacha20poly1305 :: proc(t: ^testing.T) {
 	benchmark_print(name, options)
 }
 
+bench_aes256_gcm :: proc(t: ^testing.T) {
+	name := "AES256-GCM 64 bytes"
+	options := &time.Benchmark_Options {
+		rounds = 1_000,
+		bytes = 64,
+		setup = _setup_sized_buf,
+		bench = _benchmark_aes256_gcm,
+		teardown = _teardown_sized_buf,
+	}
+
+	key := [aes.KEY_SIZE_256]byte {
+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
+		0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
+	}
+	ctx: aes.Context_GCM
+	aes.init_gcm(&ctx, key[:])
+
+	context.user_ptr = &ctx
+
+	err := time.benchmark(options, context.allocator)
+	tc.expect(t, err == nil, name)
+	benchmark_print(name, options)
+
+	name = "AES256-GCM 1024 bytes"
+	options.bytes = 1024
+	err = time.benchmark(options, context.allocator)
+	tc.expect(t, err == nil, name)
+	benchmark_print(name, options)
+
+	name = "AES256-GCM 65536 bytes"
+	options.bytes = 65536
+	err = time.benchmark(options, context.allocator)
+	tc.expect(t, err == nil, name)
+	benchmark_print(name, options)
+}
+
 bench_ed25519 :: proc(t: ^testing.T) {
 	iters :: 10000