2 luni în urmă · a7e89e1324
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,8 +32,8 @@ jobs:
 
				           gmake -C vendor/miniaudio/src
			
 
				           ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
			
 
				           ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
			
 
				-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
			
 
				-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
			
 
				+          ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
			
 
				+          ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
			
 
				           ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				           ./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				           ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
@@ -52,7 +52,7 @@ jobs:
 
				         usesh: true
			
 
				         copyback: false
			
 
				         prepare: |
			
 
				-          pkg install -y gmake git bash python3 libxml2 llvm17
			
 
				+          pkg install -y gmake git bash python3 libxml2 llvm18
			
 
				         run: |
			
 
				           # `set -e` is needed for test failures to register. https://github.com/vmactions/freebsd-vm/issues/72
			
 
				           set -e -x
			
@@ -64,7 +64,7 @@ jobs:
 
				           gmake -C vendor/cgltf/src
			
 
				           gmake -C vendor/miniaudio/src
			
 
				           ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
			
 
				-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
			
 
				+          ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
			
 
				           ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				           ./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				           ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
@@ -75,32 +75,35 @@ jobs:
 
				       fail-fast: false
			
 
				       matrix:
			
 
				         # MacOS 13 runs on Intel, 14 runs on ARM
			
 
				-        os: [ubuntu-latest, macos-13, macos-14]
			
 
				+        os: [macos-14, ubuntu-latest]
			
 
				     runs-on: ${{ matrix.os }}
			
 
				-    name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel' || 'Ubuntu') }} Build, Check, and Test
			
 
				+    name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel') || (matrix.os == 'ubuntu-latest' && 'Ubuntu') }} Build, Check, and Test
			
 
				     timeout-minutes: 15
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				 
			
 
				-      - name: Download LLVM (Linux)
			
 
				-        if: matrix.os == 'ubuntu-latest'
			
 
				-        run: |
			
 
				-          wget https://apt.llvm.org/llvm.sh
			
 
				-          chmod +x llvm.sh
			
 
				-          sudo ./llvm.sh 17
			
 
				-          echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
			
 
				+      - uses: actions/checkout@v4
			
 
				 
			
 
				       - name: Download LLVM (MacOS Intel)
			
 
				         if: matrix.os == 'macos-13'
			
 
				         run: |
			
 
				-          brew install llvm@17 [email protected]
			
 
				-          echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				+          brew update
			
 
				+          brew install llvm@20 [email protected] lld
			
 
				+          echo "$(brew --prefix llvm@20)/bin" >> $GITHUB_PATH
			
 
				 
			
 
				       - name: Download LLVM (MacOS ARM)
			
 
				         if: matrix.os == 'macos-14'
			
 
				         run: |
			
 
				-          brew install llvm@17 wasmtime [email protected]
			
 
				-          echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				+          brew update
			
 
				+          brew install llvm@20 wasmtime [email protected] lld
			
 
				+          echo "$(brew --prefix llvm@20)/bin" >> $GITHUB_PATH
			
 
				+
			
 
				+      - name: Download LLVM (Ubuntu)
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+        run: |
			
 
				+          wget https://apt.llvm.org/llvm.sh
			
 
				+          chmod +x llvm.sh
			
 
				+          sudo ./llvm.sh 20
			
 
				+          echo "/usr/lib/llvm-20/bin" >> $GITHUB_PATH
			
 
				 
			
 
				       - name: Build Odin
			
 
				         run: ./build_odin.sh release
			
@@ -121,55 +124,60 @@ jobs:
 
				         run: ./odin run examples/demo -debug
			
 
				       - name: Odin check examples/all
			
 
				         run: ./odin check examples/all -strict-style -vet -disallow-do
			
 
				-      - name: Odin check vendor/sdl3
			
 
				-        run: ./odin check vendor/sdl3  -strict-style -vet -disallow-do -no-entry-point
			
 
				+      - name: Odin check examples/all/sdl3
			
 
				+        run: ./odin check examples/all/sdl3  -strict-style -vet -disallow-do -no-entry-point
			
 
				       - name: Normal Core library tests
			
 
				-        run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+        run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Optimized Core library tests
			
 
				-        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Vendor library tests
			
 
				-        run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+        run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Internals tests
			
 
				-        run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+        run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: GitHub Issue tests
			
 
				         run: |
			
 
				           cd tests/issues
			
 
				           ./run.sh
			
 
				 
			
 
				+      - name: Run demo on WASI WASM32
			
 
				+        run: |
			
 
				+          ./odin build examples/demo -target:wasi_wasm32 -vet -strict-style -disallow-do -out:demo
			
 
				+          wasmtime ./demo.wasm
			
 
				+        if: matrix.os == 'macos-14'
			
 
				+
			
 
				       - name: Check benchmarks
			
 
				         run: ./odin check tests/benchmark -vet -strict-style -no-entry-point
			
 
				       - name: Odin check examples/all for Linux i386
			
 
				-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_i386
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_i386
			
 
				       - name: Odin check examples/all for Linux arm64
			
 
				-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_arm64
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_arm64
			
 
				       - name: Odin check examples/all for FreeBSD amd64
			
 
				-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
			
 
				       - name: Odin check examples/all for OpenBSD amd64
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				         run: ./odin check examples/all -vet -strict-style -disallow-do -target:openbsd_amd64
			
 
				+      - name: Odin check examples/all for js_wasm32
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -no-entry-point -target:js_wasm32
			
 
				+      - name: Odin check examples/all for js_wasm64p32
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -no-entry-point -target:js_wasm64p32
			
 
				 
			
 
				-      - name: Odin check vendor/sdl3 for Linux i386
			
 
				-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_i386
			
 
				+      - name: Odin check examples/all/sdl3 for Linux i386
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				-      - name: Odin check vendor/sdl3 for Linux arm64
			
 
				-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_arm64
			
 
				+        run: ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_i386
			
 
				+      - name: Odin check examples/all/sdl3 for Linux arm64
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				-      - name: Odin check vendor/sdl3 for FreeBSD amd64
			
 
				-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:freebsd_amd64
			
 
				+        run: ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_arm64
			
 
				+      - name: Odin check examples/all/sdl3 for FreeBSD amd64
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				-      - name: Odin check vendor/sdl3 for OpenBSD amd64
			
 
				-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:openbsd_amd64
			
 
				+        run: ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:freebsd_amd64
			
 
				+      - name: Odin check examples/all/sdl3 for OpenBSD amd64
			
 
				         if: matrix.os == 'ubuntu-latest'
			
 
				-
			
 
				-
			
 
				-      - name: Run demo on WASI WASM32
			
 
				-        run: |
			
 
				-          ./odin build examples/demo -target:wasi_wasm32 -vet -strict-style -disallow-do -out:demo
			
 
				-          wasmtime ./demo.wasm
			
 
				-        if: matrix.os == 'macos-14'
			
 
				+        run: ./odin check examples/all/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:openbsd_amd64
			
 
				 
			
 
				   build_windows:
			
 
				     name: Windows Build, Check, and Test
			
@@ -206,32 +214,32 @@ jobs:
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin check examples/all -vet -strict-style -disallow-do
			
 
				-      - name: Odin check vendor/sdl3
			
 
				+      - name: Odin check examples/all/sdl3
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point
			
 
				+          odin check examples/all/sdl3 -vet -strict-style -disallow-do -no-entry-point
			
 
				       - name: Core library tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+          odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Optimized core library tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+          odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Vendor library tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           copy vendor\lua\5.4\windows\*.dll .
			
 
				-          odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+          odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Odin internals tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
			
 
				+          odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
			
 
				       - name: Check issues
			
 
				         shell: cmd
			
 
				         run: |
			
@@ -249,12 +257,6 @@ jobs:
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           cd tests\documentation
			
 
				           call build.bat
			
 
				-      - name: core:math/big tests
			
 
				-        shell: cmd
			
 
				-        run: |
			
 
				-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          cd tests\core\math\big
			
 
				-          call build.bat
			
 
				       - name: Odin check examples/all for Windows 32bits
			
 
				         shell: cmd
			
 
				         run: |
			
@@ -293,8 +295,8 @@ jobs:
 
				       - name: Odin check examples/all
			
 
				         run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do
			
 
				 
			
 
				-      - name: Odin check vendor/sdl3
			
 
				-        run: ./odin check vendor/sdl3 -target:linux_riscv64 -vet -strict-style -disallow-do -no-entry-point
			
 
				+      - name: Odin check examples/all/sdl3
			
 
				+        run: ./odin check examples/all/sdl3 -target:linux_riscv64 -vet -strict-style -disallow-do -no-entry-point
			
 
				 
			
 
				       - name: Install riscv64 toolchain and qemu
			
 
				         run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross
			
--- a/.github/workflows/cover.yml
+++ b/.github/workflows/cover.yml
@@ -0,0 +1,60 @@
 
				+name: Test Coverage
			
 
				+on: [push, pull_request, workflow_dispatch]
			
 
				+
			
 
				+jobs:
			
 
				+  build_linux_amd64:
			
 
				+    runs-on: ubuntu-latest
			
 
				+    name: Linux AMD64 Test Coverage
			
 
				+    timeout-minutes: 60
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+
			
 
				+      - name: Download LLVM (Ubuntu)
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+        run: |
			
 
				+          wget https://apt.llvm.org/llvm.sh
			
 
				+          chmod +x llvm.sh
			
 
				+          sudo ./llvm.sh 20
			
 
				+          echo "/usr/lib/llvm-20/bin" >> $GITHUB_PATH
			
 
				+
			
 
				+      - name: Install kcov
			
 
				+        run: |
			
 
				+          sudo apt-get update
			
 
				+          sudo apt-get install binutils-dev build-essential cmake libssl-dev libcurl4-openssl-dev libelf-dev libstdc++-12-dev zlib1g-dev libdw-dev libiberty-dev
			
 
				+          git clone https://github.com/SimonKagstrom/kcov.git
			
 
				+          mkdir kcov/build
			
 
				+          cd kcov/build
			
 
				+          cmake ..
			
 
				+          sudo make
			
 
				+          sudo make install
			
 
				+          cd ../..
			
 
				+          kcov --version
			
 
				+
			
 
				+      - name: Build Odin
			
 
				+        run: ./build_odin.sh release
			
 
				+
			
 
				+      - name: Odin report
			
 
				+        run: ./odin report
			
 
				+
			
 
				+      - name: Normal Core library tests
			
 
				+        run: |
			
 
				+          ./odin build tests/core/normal.odin -build-mode:test -debug -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
			
 
				+          mkdir kcov-out
			
 
				+          kcov --exclude-path=tests,/usr kcov-out ./normal.bin .
			
 
				+
			
 
				+      - name: Optimized Core library tests
			
 
				+        run: |
			
 
				+          ./odin build tests/core/speed.odin -build-mode:test -debug -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
			
 
				+          kcov --exclude-path=tests,/usr kcov-out ./speed.bin .
			
 
				+
			
 
				+      - name: Internals tests
			
 
				+        run: |
			
 
				+          ./odin build tests/internal -build-mode:test -debug -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
			
 
				+          kcov --exclude-path=tests,/usr kcov-out ./internal .
			
 
				+
			
 
				+      - uses: codecov/codecov-action@v5
			
 
				+        with:
			
 
				+          name: Ubuntu Coverage # optional
			
 
				+          token: ${{ secrets.CODECOV_TOKEN }}
			
 
				+          verbose: true # optional (default = false
			
 
				+          directory: kcov-out/kcov-merged
			
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -49,12 +49,12 @@ jobs:
 
				       - uses: actions/checkout@v4
			
 
				       - uses: jirutka/setup-alpine@v1
			
 
				         with:
			
 
				-          branch: v3.20
			
 
				+          branch: edge
			
 
				       - name: (Linux) Download LLVM
			
 
				         run: |
			
 
				           apk add --no-cache \
			
 
				-          musl-dev llvm18-dev clang18 git mold lz4 \
			
 
				-          libxml2-static llvm18-static zlib-static zstd-static \
			
 
				+          musl-dev llvm20-dev clang20 git mold lz4 \
			
 
				+          libxml2-static llvm20-static zlib-static zstd-static \
			
 
				           make
			
 
				         shell: alpine.sh --root {0}
			
 
				       - name: build odin
			
@@ -93,8 +93,9 @@ jobs:
 
				       - uses: actions/checkout@v4
			
 
				       - name: Download LLVM and setup PATH
			
 
				         run: |
			
 
				-          brew install llvm@18 dylibbundler
			
 
				-          echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH
			
 
				+          brew update
			
 
				+          brew install llvm@20 dylibbundler lld
			
 
				+
			
 
				       - name: build odin
			
 
				         # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
			
 
				         # not link with libunwind bundled with LLVM but link with libunwind on the system.
			
@@ -130,8 +131,9 @@ jobs:
 
				       - uses: actions/checkout@v4
			
 
				       - name: Download LLVM and setup PATH
			
 
				         run: |
			
 
				-          brew install llvm@18 dylibbundler
			
 
				-          echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH
			
 
				+          brew update
			
 
				+          brew install llvm@20 dylibbundler lld
			
 
				+
			
 
				       - name: build odin
			
 
				         # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
			
 
				         # not link with libunwind bundled with LLVM but link with libunwind on the system.
			
--- a/.gitignore
+++ b/.gitignore
@@ -277,6 +277,7 @@ odin
 
				 *.bin
			
 
				 demo.bin
			
 
				 libLLVM*.so*
			
 
				+*.a
			
 
				 
			
 
				 # shared collection
			
 
				 shared/
			
@@ -293,5 +294,6 @@ build.sh
 
				 
			
 
				 # RAD debugger project file
			
 
				 *.raddbg
			
 
				-
			
 
				+*.rdi
			
 
				+tests/issues/build/*
			
 
				 misc/featuregen/featuregen
			
--- a/LLVM-C.dll
+++ b/LLVM-C.dll
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 
				         <img src="https://img.shields.io/discord/568138951836172421?logo=discord">
			
 
				     </a>
			
 
				     <a href="https://github.com/odin-lang/odin/actions">
			
 
				-        <img src="https://github.com/odin-lang/odin/workflows/CI/badge.svg?branch=master&event=push">
			
 
				+        <img src="https://github.com/odin-lang/odin/actions/workflows/ci.yml/badge.svg?branch=master&event=push">
			
 
				     </a>
			
 
				 </p>
			
 
				 
			
--- a/base/builtin/builtin.odin
+++ b/base/builtin/builtin.odin
@@ -7,13 +7,232 @@ nil   :: nil
 
				 false :: 0!=0
			
 
				 true  :: 0==0
			
 
				 
			
 
				-ODIN_OS      :: ODIN_OS
			
 
				-ODIN_ARCH    :: ODIN_ARCH
			
 
				-ODIN_ENDIAN  :: ODIN_ENDIAN
			
 
				-ODIN_VENDOR  :: ODIN_VENDOR
			
 
				-ODIN_VERSION :: ODIN_VERSION
			
 
				-ODIN_ROOT    :: ODIN_ROOT
			
 
				-ODIN_DEBUG   :: ODIN_DEBUG
			
 
				+// The following constants are added in `checker.cpp`'s `init_universal` procedure.
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating the target's CPU architecture.
			
 
				+	Possible values are: `.amd64`, `.i386`, `.arm32`, `.arm64`, `.wasm32`, `.wasm64p32`, and `.riscv64`.
			
 
				+*/
			
 
				+ODIN_ARCH                       :: ODIN_ARCH
			
 
				+
			
 
				+/*
			
 
				+	A `string` indicating the target's CPU architecture.
			
 
				+	Possible values are: "amd64", "i386", "arm32", "arm64", "wasm32", "wasm64p32", "riscv64".
			
 
				+*/
			
 
				+ODIN_ARCH_STRING                :: ODIN_ARCH_STRING
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating the type of compiled output, chosen using `-build-mode`.
			
 
				+	Possible values are: `.Executable`, `.Dynamic`, `.Static`, `.Object`, `.Assembly`, and `.LLVM_IR`.
			
 
				+*/
			
 
				+ODIN_BUILD_MODE                 :: ODIN_BUILD_MODE
			
 
				+
			
 
				+/*
			
 
				+	A `string` containing the name of the folder that contains the entry point,
			
 
				+	e.g. for `%ODIN_ROOT%/examples/demo`, this would contain `demo`.
			
 
				+*/
			
 
				+ODIN_BUILD_PROJECT_NAME         :: ODIN_BUILD_PROJECT_NAME
			
 
				+
			
 
				+/*
			
 
				+	An `i64` containing the time at which the executable was compiled, in nanoseconds.
			
 
				+	This is compatible with the `time.Time` type, i.e. `time.Time{_nsec=ODIN_COMPILE_TIMESTAMP}`
			
 
				+*/
			
 
				+ODIN_COMPILE_TIMESTAMP          :: ODIN_COMPILE_TIMESTAMP
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-debug` command line switch is passed, which enables debug info generation.
			
 
				+*/
			
 
				+ODIN_DEBUG                      :: ODIN_DEBUG
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-default-to-nil-allocator` command line switch is passed,
			
 
				+	which sets the initial `context.allocator` to an allocator that does nothing.
			
 
				+*/
			
 
				+ODIN_DEFAULT_TO_NIL_ALLOCATOR   :: ODIN_DEFAULT_TO_NIL_ALLOCATOR
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-default-to-panic-allocator` command line switch is passed,
			
 
				+	which sets the initial `context.allocator` to an allocator that panics if allocated from.
			
 
				+*/
			
 
				+ODIN_DEFAULT_TO_PANIC_ALLOCATOR :: ODIN_DEFAULT_TO_PANIC_ALLOCATOR
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-disable-assert` command line switch is passed,
			
 
				+	which removes all calls to `assert` from the program.
			
 
				+*/
			
 
				+ODIN_DISABLE_ASSERT             :: ODIN_DISABLE_ASSERT
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating the endianness of the target.
			
 
				+	Possible values are: `.Little` and `.Big`.
			
 
				+*/
			
 
				+ODIN_ENDIAN                     :: ODIN_ENDIAN
			
 
				+
			
 
				+/*
			
 
				+	An `string` indicating the endianness of the target.
			
 
				+	Possible values are: "little" and "big".
			
 
				+*/
			
 
				+ODIN_ENDIAN_STRING              :: ODIN_ENDIAN_STRING
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value set using the `-error-pos-style` switch, indicating the source location style used for compile errors and warnings.
			
 
				+	Possible values are: `.Default` (Odin-style) and `.Unix`.
			
 
				+*/
			
 
				+ODIN_ERROR_POS_STYLE            :: ODIN_ERROR_POS_STYLE
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-foreign-error-procedures` command line switch is passed,
			
 
				+	which inhibits generation of runtime error procedures, so that they can be in a separate compilation unit.
			
 
				+*/
			
 
				+ODIN_FOREIGN_ERROR_PROCEDURES   :: ODIN_FOREIGN_ERROR_PROCEDURES
			
 
				+
			
 
				+/*
			
 
				+	A `string` describing the microarchitecture used for code generation.
			
 
				+	If not set using the `-microarch` command line switch, the compiler will pick a default.
			
 
				+	Possible values include, but are not limited to: "sandybridge", "x86-64-v2".
			
 
				+*/
			
 
				+ODIN_MICROARCH_STRING           :: ODIN_MICROARCH_STRING
			
 
				+
			
 
				+/*
			
 
				+	An `int` value representing the minimum OS version given to the linker, calculated as `major * 10_000 + minor * 100 + revision`.
			
 
				+	If not set using the `-minimum-os-version` command line switch, it defaults to `0`, except on Darwin, where it's `11_00_00`.
			
 
				+*/
			
 
				+ODIN_MINIMUM_OS_VERSION         :: ODIN_MINIMUM_OS_VERSION
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-no-bounds-check` command line switch is passed, which disables bounds checking at runtime.
			
 
				+*/
			
 
				+ODIN_NO_BOUNDS_CHECK            :: ODIN_NO_BOUNDS_CHECK
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-no-crt` command line switch is passed, which inhibits linking with the C Runtime Library, a.k.a. LibC.
			
 
				+*/
			
 
				+ODIN_NO_CRT                     :: ODIN_NO_CRT
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-no-entry-point` command line switch is passed, which makes the declaration of a `main` procedure optional.
			
 
				+*/
			
 
				+ODIN_NO_ENTRY_POINT             :: ODIN_NO_ENTRY_POINT
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-no-rtti` command line switch is passed, which inhibits generation of full Runtime Type Information.
			
 
				+*/
			
 
				+ODIN_NO_RTTI                    :: ODIN_NO_RTTI
			
 
				+
			
 
				+/*
			
 
				+	`true` if the `-no-type-assert` command line switch is passed, which disables type assertion checking program wide.
			
 
				+*/
			
 
				+ODIN_NO_TYPE_ASSERT             :: ODIN_NO_TYPE_ASSERT
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating the optimization level selected using the `-o` command line switch.
			
 
				+	Possible values are: `.None`, `.Minimal`, `.Size`, `.Speed`, and `.Aggressive`.
			
 
				+
			
 
				+	If `ODIN_OPTIMIZATION_MODE` is anything other than `.None` or `.Minimal`, the compiler will also perform a unity build,
			
 
				+	and `ODIN_USE_SEPARATE_MODULES` will be set to `false` as a result.
			
 
				+*/
			
 
				+ODIN_OPTIMIZATION_MODE          :: ODIN_OPTIMIZATION_MODE
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating what the target operating system is.
			
 
				+*/
			
 
				+ODIN_OS                         :: ODIN_OS
			
 
				+
			
 
				+/*
			
 
				+	A `string` indicating what the target operating system is.
			
 
				+*/
			
 
				+ODIN_OS_STRING                  :: ODIN_OS_STRING
			
 
				+
			
 
				+/*
			
 
				+	An `enum` value indicating the platform subtarget, chosen using the `-subtarget` switch.
			
 
				+	Possible values are: `.Default` `.iOS`, and `.Android`.
			
 
				+*/
			
 
				+ODIN_PLATFORM_SUBTARGET         :: ODIN_PLATFORM_SUBTARGET
			
 
				+
			
 
				+/*
			
 
				+	A `string` representing the path of the folder containing the Odin compiler,
			
 
				+	relative to which we expect to find the `base` and `core` package collections.
			
 
				+*/
			
 
				+ODIN_ROOT                       :: ODIN_ROOT
			
 
				+
			
 
				+/*
			
 
				+	A `bit_set` indicating the sanitizer flags set using the `-sanitize` command line switch.
			
 
				+	Supported flags are `.Address`, `.Memory`, and `.Thread`.
			
 
				+*/
			
 
				+ODIN_SANITIZER_FLAGS            :: ODIN_SANITIZER_FLAGS
			
 
				+
			
 
				+/*
			
 
				+	`true` if the code is being compiled via an invocation of `odin test`.
			
 
				+*/
			
 
				+ODIN_TEST                       :: ODIN_TEST
			
 
				+
			
 
				+/*
			
 
				+	`true` if built using the experimental Tilde backend.
			
 
				+*/
			
 
				+ODIN_TILDE                      :: ODIN_TILDE
			
 
				+
			
 
				+/*
			
 
				+	`true` by default, meaning each each package is built into its own object file, and then linked together.
			
 
				+	`false` if the `-use-single-module` command line switch to force a unity build is provided.
			
 
				+
			
 
				+	If `ODIN_OPTIMIZATION_MODE` is anything other than `.None` or `.Minimal`, the compiler will also perform a unity build,
			
 
				+	and this constant will also be set to `false`.
			
 
				+*/
			
 
				+ODIN_USE_SEPARATE_MODULES       :: ODIN_USE_SEPARATE_MODULES
			
 
				+
			
 
				+/*
			
 
				+	`true` if Valgrind integration is supported on the target.
			
 
				+*/
			
 
				+ODIN_VALGRIND_SUPPORT           :: ODIN_VALGRIND_SUPPORT
			
 
				+
			
 
				+/*
			
 
				+	A `string` which identifies the compiler being used. The official compiler sets this to `"odin"`.
			
 
				+*/
			
 
				+ODIN_VENDOR                     :: ODIN_VENDOR
			
 
				+
			
 
				+/*
			
 
				+	A `string` containing the version of the Odin compiler, typically in the format `dev-YYYY-MM`.
			
 
				+*/
			
 
				+ODIN_VERSION                    :: ODIN_VERSION
			
 
				+
			
 
				+/*
			
 
				+	A `string` containing the Git hash part of the Odin version.
			
 
				+	Empty if `.git` could not be detected at the time the compiler was built.
			
 
				+*/
			
 
				+ODIN_VERSION_HASH               :: ODIN_VERSION_HASH
			
 
				+
			
 
				+/*
			
 
				+	An `enum` set by the `-subsystem` flag, specifying which Windows subsystem the PE file was created for.
			
 
				+	Possible values are:
			
 
				+		`.Unknown` - Default and only value on non-Windows platforms
			
 
				+		`.Console` - Default on Windows
			
 
				+		`.Windows` - Can be used by graphical applications so Windows doesn't open an empty console
			
 
				+
			
 
				+	There are some other possible values for e.g. EFI applications, but only Console and Windows are supported.
			
 
				+
			
 
				+	See also: https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64
			
 
				+*/
			
 
				+ODIN_WINDOWS_SUBSYSTEM          :: ODIN_WINDOWS_SUBSYSTEM
			
 
				+
			
 
				+/*
			
 
				+	An `string` set by the `-subsystem` flag, specifying which Windows subsystem the PE file was created for.
			
 
				+	Possible values are:
			
 
				+		"UNKNOWN" - Default and only value on non-Windows platforms
			
 
				+		"CONSOLE" - Default on Windows
			
 
				+		"WINDOWS" - Can be used by graphical applications so Windows doesn't open an empty console
			
 
				+
			
 
				+	There are some other possible values for e.g. EFI applications, but only Console and Windows are supported.
			
 
				+
			
 
				+	See also: https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64
			
 
				+*/
			
 
				+ODIN_WINDOWS_SUBSYSTEM_STRING   :: ODIN_WINDOWS_SUBSYSTEM_STRING
			
 
				+
			
 
				+/*
			
 
				+	`true` if LLVM supports the f16 type.
			
 
				+*/
			
 
				+__ODIN_LLVM_F16_SUPPORTED       :: __ODIN_LLVM_F16_SUPPORTED
			
 
				+
			
 
				+
			
 
				 
			
 
				 byte :: u8 // alias
			
 
				 
			
@@ -119,7 +338,8 @@ jmag       :: proc(value: Quaternion) -> Float ---
 
				 kmag       :: proc(value: Quaternion) -> Float ---
			
 
				 conj       :: proc(value: Complex_Or_Quaternion) -> Complex_Or_Quaternion ---
			
 
				 
			
 
				-expand_values :: proc(value: Struct_Or_Array) -> (A, B, C, ...) ---
			
 
				+expand_values   :: proc(value: Struct_Or_Array) -> (A, B, C, ...) ---
			
 
				+compress_values :: proc(values: ...) -> Struct_Or_Array_Like_Type ---
			
 
				 
			
 
				 min   :: proc(values: ..T) -> T ---
			
 
				 max   :: proc(values: ..T) -> T ---
			
@@ -130,3 +350,6 @@ soa_zip :: proc(slices: ...) -> #soa[]Struct ---
 
				 soa_unzip :: proc(value: $S/#soa[]$E) -> (slices: ...) ---
			
 
				 
			
 
				 unreachable :: proc() -> ! ---
			
 
				+
			
 
				+// Where T is a string, slice, dynamic array, or pointer to an array type
			
 
				+raw_data :: proc(t: $T) -> rawptr
			
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -169,6 +169,7 @@ type_is_union            :: proc($T: typeid) -> bool ---
 
				 type_is_enum             :: proc($T: typeid) -> bool ---
			
 
				 type_is_proc             :: proc($T: typeid) -> bool ---
			
 
				 type_is_bit_set          :: proc($T: typeid) -> bool ---
			
 
				+type_is_bit_field        :: proc($T: typeid) -> bool ---
			
 
				 type_is_simd_vector      :: proc($T: typeid) -> bool ---
			
 
				 type_is_matrix           :: proc($T: typeid) -> bool ---
			
 
				 
			
@@ -221,6 +222,9 @@ type_map_cell_info :: proc($T: typeid)           -> ^runtime.Map_Cell_Info ---
 
				 type_convert_variants_to_pointers :: proc($T: typeid) -> typeid where type_is_union(T) ---
			
 
				 type_merge :: proc($U, $V: typeid) -> typeid where type_is_union(U), type_is_union(V) ---
			
 
				 
			
 
				+type_integer_to_unsigned :: proc($T: typeid) -> type where type_is_integer(T), !type_is_unsigned(T) ---
			
 
				+type_integer_to_signed   :: proc($T: typeid) -> type where type_is_integer(T), type_is_unsigned(T) ---
			
 
				+
			
 
				 type_has_shared_fields :: proc($U, $V: typeid) -> bool where type_is_struct(U), type_is_struct(V) ---
			
 
				 
			
 
				 constant_utf16_cstring :: proc($literal: string) -> [^]u16 ---
			
@@ -274,8 +278,12 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer ---
 
				 simd_extract :: proc(a: #simd[N]T, idx: uint) -> T ---
			
 
				 simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T ---
			
 
				 
			
 
				+simd_reduce_add_bisect  :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				+simd_reduce_mul_bisect  :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				 simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				 simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				+simd_reduce_add_pairs   :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				+simd_reduce_mul_pairs   :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				 simd_reduce_min         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				 simd_reduce_max         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
 
				 simd_reduce_and         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
			
@@ -298,7 +306,7 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)
 
				 simd_masked_expand_load    :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
			
 
				 simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)              where type_is_integer(U) || type_is_boolean(U) ---
			
 
				 
			
 
				-
			
 
				+simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) ---
			
 
				 
			
 
				 simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
			
 
				 simd_select  :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
			
@@ -353,15 +361,18 @@ x86_xgetbv :: proc(cx: u32) -> (eax, edx: u32) ---
 
				 objc_object   :: struct{}
			
 
				 objc_selector :: struct{}
			
 
				 objc_class    :: struct{}
			
 
				+objc_ivar     :: struct{}
			
 
				+
			
 
				 objc_id    :: ^objc_object
			
 
				 objc_SEL   :: ^objc_selector
			
 
				 objc_Class :: ^objc_class
			
 
				+objc_Ivar  :: ^objc_ivar
			
 
				 
			
 
				 objc_find_selector     :: proc($name: string) -> objc_SEL   ---
			
 
				 objc_register_selector :: proc($name: string) -> objc_SEL   ---
			
 
				 objc_find_class        :: proc($name: string) -> objc_Class ---
			
 
				 objc_register_class    :: proc($name: string) -> objc_Class ---
			
 
				-
			
 
				+objc_ivar_get          :: proc(self: ^$T) -> ^$U ---
			
 
				 
			
 
				 valgrind_client_request :: proc(default: uintptr, request: uintptr, a0, a1, a2, a3, a4: uintptr) -> uintptr ---
			
 
				 
			
--- a/base/runtime/core.odin
+++ b/base/runtime/core.odin
@@ -110,7 +110,6 @@ Type_Info_Parameters :: struct { // Only used for procedures parameters and resu
 
				 	types:        []^Type_Info,
			
 
				 	names:        []string,
			
 
				 }
			
 
				-Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually
			
 
				 
			
 
				 Type_Info_Struct_Flags :: distinct bit_set[Type_Info_Struct_Flag; u8]
			
 
				 Type_Info_Struct_Flag :: enum u8 {
			
@@ -559,10 +558,14 @@ ALL_ODIN_OS_TYPES :: Odin_OS_Types{
 
				 	Odin_Platform_Subtarget_Type :: enum int {
			
 
				 		Default,
			
 
				 		iOS,
			
 
				+		Android,
			
 
				 	}
			
 
				 */
			
 
				 Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)
			
 
				 
			
 
				+Odin_Platform_Subtarget_Types :: bit_set[Odin_Platform_Subtarget_Type]
			
 
				+
			
 
				+
			
 
				 /*
			
 
				 	// Defined internally by the compiler
			
 
				 	Odin_Sanitizer_Flag :: enum u32 {
			
--- a/base/runtime/core_builtin.odin
+++ b/base/runtime/core_builtin.odin
@@ -67,7 +67,7 @@ init_global_temporary_allocator :: proc(size: int, backup_allocator := context.a
 
				 // Prefer the procedure group `copy`.
			
 
				 @builtin
			
 
				 copy_slice :: proc "contextless" (dst, src: $T/[]$E) -> int {
			
 
				-	n := max(0, min(len(dst), len(src)))
			
 
				+	n := min(len(dst), len(src))
			
 
				 	if n > 0 {
			
 
				 		intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(E))
			
 
				 	}
			
@@ -80,7 +80,7 @@ copy_slice :: proc "contextless" (dst, src: $T/[]$E) -> int {
 
				 // Prefer the procedure group `copy`.
			
 
				 @builtin
			
 
				 copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int {
			
 
				-	n := max(0, min(len(dst), len(src)))
			
 
				+	n := min(len(dst), len(src))
			
 
				 	if n > 0 {
			
 
				 		intrinsics.mem_copy(raw_data(dst), raw_data(src), n)
			
 
				 	}
			
@@ -648,6 +648,9 @@ append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: i
 
				 
			
 
				 @builtin
			
 
				 inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(index >= 0, "Index must be positive.", loc)
			
 
				+	}
			
 
				 	if array == nil {
			
 
				 		return
			
 
				 	}
			
@@ -666,6 +669,9 @@ inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcas
 
				 
			
 
				 @builtin
			
 
				 inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(index >= 0, "Index must be positive.", loc)
			
 
				+	}
			
 
				 	if array == nil {
			
 
				 		return
			
 
				 	}
			
@@ -689,6 +695,9 @@ inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadca
 
				 
			
 
				 @builtin
			
 
				 inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(index >= 0, "Index must be positive.", loc)
			
 
				+	}
			
 
				 	if array == nil {
			
 
				 		return
			
 
				 	}
			
--- a/base/runtime/default_temp_allocator_arena.odin
+++ b/base/runtime/default_temp_allocator_arena.odin
@@ -1,6 +1,7 @@
 
				 package runtime
			
 
				 
			
 
				 import "base:intrinsics"
			
 
				+// import "base:sanitizer"
			
 
				 
			
 
				 DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: uint(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE)
			
 
				 
			
@@ -43,6 +44,8 @@ memory_block_alloc :: proc(allocator: Allocator, capacity: uint, alignment: uint
 
				 	block.base = ([^]byte)(uintptr(block) + base_offset)
			
 
				 	block.capacity = uint(end - uintptr(block.base))
			
 
				 
			
 
				+	// sanitizer.address_poison(block.base, block.capacity)
			
 
				+
			
 
				 	// Should be zeroed
			
 
				 	assert(block.used == 0)
			
 
				 	assert(block.prev == nil)
			
@@ -52,6 +55,7 @@ memory_block_alloc :: proc(allocator: Allocator, capacity: uint, alignment: uint
 
				 memory_block_dealloc :: proc(block_to_free: ^Memory_Block, loc := #caller_location) {
			
 
				 	if block_to_free != nil {
			
 
				 		allocator := block_to_free.allocator
			
 
				+		// sanitizer.address_unpoison(block_to_free.base, block_to_free.capacity)
			
 
				 		mem_free(block_to_free, allocator, loc)
			
 
				 	}
			
 
				 }
			
@@ -83,6 +87,7 @@ alloc_from_memory_block :: proc(block: ^Memory_Block, min_size, alignment: uint)
 
				 		return
			
 
				 	}
			
 
				 	data = block.base[block.used+alignment_offset:][:min_size]
			
 
				+	// sanitizer.address_unpoison(block.base[block.used:block.used+size])
			
 
				 	block.used += size
			
 
				 	return
			
 
				 }
			
@@ -104,13 +109,15 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
 
				 	if size == 0 {
			
 
				 		return
			
 
				 	}
			
 
				-	
			
 
				-	needed := align_forward_uint(size, alignment)
			
 
				-	if arena.curr_block == nil || (safe_add(arena.curr_block.used, needed) or_else 0) > arena.curr_block.capacity {
			
 
				+
			
 
				+	prev_used := 0 if arena.curr_block == nil else arena.curr_block.used
			
 
				+	data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
			
 
				+	if err == .Out_Of_Memory {
			
 
				 		if arena.minimum_block_size == 0 {
			
 
				 			arena.minimum_block_size = DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE
			
 
				 		}
			
 
				 
			
 
				+		needed := align_forward_uint(size, alignment)
			
 
				 		block_size := max(needed, arena.minimum_block_size)
			
 
				 
			
 
				 		if arena.backing_allocator.procedure == nil {
			
@@ -121,10 +128,9 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
 
				 		new_block.prev = arena.curr_block
			
 
				 		arena.curr_block = new_block
			
 
				 		arena.total_capacity += new_block.capacity
			
 
				+		prev_used = 0
			
 
				+		data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
			
 
				 	}
			
 
				-
			
 
				-	prev_used := arena.curr_block.used
			
 
				-	data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
			
 
				 	arena.total_used += arena.curr_block.used - prev_used
			
 
				 	return
			
 
				 }
			
@@ -161,6 +167,7 @@ arena_free_all :: proc(arena: ^Arena, loc := #caller_location) {
 
				 	if arena.curr_block != nil {
			
 
				 		intrinsics.mem_zero(arena.curr_block.base, arena.curr_block.used)
			
 
				 		arena.curr_block.used = 0
			
 
				+		// sanitizer.address_poison(arena.curr_block.base, arena.curr_block.capacity)
			
 
				 	}
			
 
				 	arena.total_used = 0
			
 
				 }
			
@@ -225,6 +232,7 @@ arena_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 
				 					// grow data in-place, adjusting next allocation
			
 
				 					block.used = uint(new_end)
			
 
				 					data = block.base[start:new_end]
			
 
				+					// sanitizer.address_unpoison(data)
			
 
				 					return
			
 
				 				}
			
 
				 			}
			
@@ -298,6 +306,7 @@ arena_temp_end :: proc(temp: Arena_Temp, loc := #caller_location) {
 
				 			assert(block.used >= temp.used, "out of order use of arena_temp_end", loc)
			
 
				 			amount_to_zero := block.used-temp.used
			
 
				 			intrinsics.mem_zero(block.base[temp.used:], amount_to_zero)
			
 
				+			// sanitizer.address_poison(block.base[temp.used:block.capacity])
			
 
				 			block.used = temp.used
			
 
				 			arena.total_used -= amount_to_zero
			
 
				 		}
			
--- a/base/runtime/dynamic_map_internal.odin
+++ b/base/runtime/dynamic_map_internal.odin
@@ -1029,3 +1029,32 @@ default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> ui
 
				 	h &= HASH_MASK
			
 
				 	return uintptr(h) | uintptr(uintptr(h) == 0)
			
 
				 }
			
 
				+
			
 
				+default_hasher_f64 :: proc "contextless" (f: f64, seed: uintptr) -> uintptr {
			
 
				+	f := f
			
 
				+	buf: [size_of(f)]u8
			
 
				+	if f == 0 {
			
 
				+		return default_hasher(&buf, seed, size_of(buf))
			
 
				+	}
			
 
				+	if f != f {
			
 
				+		// TODO(bill): What should the logic be for NaNs?
			
 
				+		return default_hasher(&f, seed, size_of(f))
			
 
				+	}
			
 
				+	return default_hasher(&f, seed, size_of(f))
			
 
				+}
			
 
				+
			
 
				+default_hasher_complex128 :: proc "contextless" (x, y: f64, seed: uintptr) -> uintptr {
			
 
				+	seed := seed
			
 
				+	seed = default_hasher_f64(x, seed)
			
 
				+	seed = default_hasher_f64(y, seed)
			
 
				+	return seed
			
 
				+}
			
 
				+
			
 
				+default_hasher_quaternion256 :: proc "contextless" (x, y, z, w: f64, seed: uintptr) -> uintptr {
			
 
				+	seed := seed
			
 
				+	seed = default_hasher_f64(x, seed)
			
 
				+	seed = default_hasher_f64(y, seed)
			
 
				+	seed = default_hasher_f64(z, seed)
			
 
				+	seed = default_hasher_f64(w, seed)
			
 
				+	return seed
			
 
				+}
			
--- a/base/runtime/heap_allocator_windows.odin
+++ b/base/runtime/heap_allocator_windows.odin
@@ -1,5 +1,7 @@
 
				 package runtime
			
 
				 
			
 
				+import "../sanitizer"
			
 
				+
			
 
				 foreign import kernel32 "system:Kernel32.lib"
			
 
				 
			
 
				 @(private="file")
			
@@ -16,7 +18,10 @@ foreign kernel32 {
 
				 
			
 
				 _heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				 	HEAP_ZERO_MEMORY :: 0x00000008
			
 
				-	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
			
 
				+	ptr := HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
			
 
				+	// NOTE(lucas): asan not guarunteed to unpoison win32 heap out of the box, do it ourselves
			
 
				+	sanitizer.address_unpoison(ptr, size)
			
 
				+	return ptr
			
 
				 }
			
 
				 _heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				 	if new_size == 0 {
			
@@ -28,7 +33,10 @@ _heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
 
				 	}
			
 
				 
			
 
				 	HEAP_ZERO_MEMORY :: 0x00000008
			
 
				-	return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
			
 
				+	new_ptr := HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
			
 
				+	// NOTE(lucas): asan not guarunteed to unpoison win32 heap out of the box, do it ourselves
			
 
				+	sanitizer.address_unpoison(new_ptr, new_size)
			
 
				+	return new_ptr
			
 
				 }
			
 
				 _heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				 	if ptr == nil {
			
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -16,6 +16,12 @@ RUNTIME_REQUIRE :: false // !ODIN_TILDE
 
				 @(private)
			
 
				 __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
			
 
				 
			
 
				+HAS_HARDWARE_SIMD :: false when (ODIN_ARCH == .amd64 || ODIN_ARCH == .i386) && !intrinsics.has_target_feature("sse2") else
			
 
				+	false when (ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32) && !intrinsics.has_target_feature("neon") else
			
 
				+	false when (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") else
			
 
				+	false when (ODIN_ARCH == .riscv64) && !intrinsics.has_target_feature("v") else
			
 
				+	true
			
 
				+
			
 
				 
			
 
				 @(private)
			
 
				 byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte #no_bounds_check {
			
@@ -229,151 +235,242 @@ memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
 
				 	case n == 0: return true
			
 
				 	case x == y: return true
			
 
				 	}
			
 
				-	a, b := ([^]byte)(x), ([^]byte)(y)
			
 
				-	length := uint(n)
			
 
				-
			
 
				-	for i := uint(0); i < length; i += 1 {
			
 
				-		if a[i] != b[i] {
			
 
				-			return false
			
 
				-		}
			
 
				-	}
			
 
				-	return true
			
 
				-	
			
 
				-/*
			
 
				-
			
 
				-	when size_of(uint) == 8 {
			
 
				-		if word_length := length >> 3; word_length != 0 {
			
 
				-			for _ in 0..<word_length {
			
 
				-				if intrinsics.unaligned_load((^u64)(a)) != intrinsics.unaligned_load((^u64)(b)) {
			
 
				-					return false
			
 
				+	a, b := cast([^]byte)x, cast([^]byte)y
			
 
				+
			
 
				+	n := uint(n)
			
 
				+	i := uint(0)
			
 
				+	m := uint(0)
			
 
				+
			
 
				+	if n >= 8 {
			
 
				+		when HAS_HARDWARE_SIMD {
			
 
				+			// Avoid using 256-bit SIMD on platforms where its emulation is
			
 
				+			// likely to be less than ideal.
			
 
				+			when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
			
 
				+				m = n / 32 * 32
			
 
				+				for /**/; i < m; i += 32 {
			
 
				+					load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
			
 
				+					load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
			
 
				+					ne := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+					if intrinsics.simd_reduce_or(ne) != 0 {
			
 
				+						return false
			
 
				+					}
			
 
				 				}
			
 
				-				a = a[size_of(u64):]
			
 
				-				b = b[size_of(u64):]
			
 
				 			}
			
 
				 		}
			
 
				-		
			
 
				-		if length & 4 != 0 {
			
 
				-			if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
			
 
				+
			
 
				+		m = (n-i) / 16 * 16
			
 
				+		for /**/; i < m; i += 16 {
			
 
				+			load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
			
 
				+			load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
			
 
				+			ne := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+			if intrinsics.simd_reduce_or(ne) != 0 {
			
 
				 				return false
			
 
				 			}
			
 
				-			a = a[size_of(u32):]
			
 
				-			b = b[size_of(u32):]
			
 
				 		}
			
 
				-		
			
 
				-		if length & 2 != 0 {
			
 
				-			if intrinsics.unaligned_load((^u16)(a)) != intrinsics.unaligned_load((^u16)(b)) {
			
 
				+
			
 
				+		m = (n-i) / 8 * 8
			
 
				+		for /**/; i < m; i += 8 {
			
 
				+			if intrinsics.unaligned_load(cast(^uintptr)&a[i]) != intrinsics.unaligned_load(cast(^uintptr)&b[i]) {
			
 
				 				return false
			
 
				 			}
			
 
				-			a = a[size_of(u16):]
			
 
				-			b = b[size_of(u16):]
			
 
				 		}
			
 
				-		
			
 
				-		if length & 1 != 0 && a[0] != b[0] {
			
 
				-			return false	
			
 
				+	}
			
 
				+
			
 
				+	for /**/; i < n; i += 1 {
			
 
				+		if a[i] != b[i] {
			
 
				+			return false
			
 
				 		}
			
 
				-		return true
			
 
				-	} else {
			
 
				-		if word_length := length >> 2; word_length != 0 {
			
 
				-			for _ in 0..<word_length {
			
 
				-				if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
			
 
				-					return false
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+memory_compare :: proc "contextless" (x, y: rawptr, n: int) -> int #no_bounds_check {
			
 
				+	switch {
			
 
				+	case x == y:   return 0
			
 
				+	case x == nil: return -1
			
 
				+	case y == nil: return +1
			
 
				+	}
			
 
				+	a, b := cast([^]byte)x, cast([^]byte)y
			
 
				+	
			
 
				+	n := uint(n)
			
 
				+	i := uint(0)
			
 
				+	m := uint(0)
			
 
				+
			
 
				+	when HAS_HARDWARE_SIMD {
			
 
				+		when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
			
 
				+			m = n / 32 * 32
			
 
				+			for /**/; i < m; i += 32 {
			
 
				+				load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
			
 
				+				load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
			
 
				+				comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+				if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+					sentinel: #simd[32]u8 = u8(0xFF)
			
 
				+					indices := intrinsics.simd_indices(#simd[32]u8)
			
 
				+					index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+					index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+					return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
			
 
				 				}
			
 
				-				a = a[size_of(u32):]
			
 
				-				b = b[size_of(u32):]
			
 
				 			}
			
 
				 		}
			
 
				-		
			
 
				-		length &= 3
			
 
				-		
			
 
				-		if length != 0 {
			
 
				-			for i in 0..<length {
			
 
				-				if a[i] != b[i] {
			
 
				-					return false
			
 
				-				}
			
 
				-			}
			
 
				+	}
			
 
				+
			
 
				+	m = (n-i) / 16 * 16
			
 
				+	for /**/; i < m; i += 16 {
			
 
				+		load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
			
 
				+		load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
			
 
				+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+		if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+			sentinel: #simd[16]u8 = u8(0xFF)
			
 
				+			indices := intrinsics.simd_indices(#simd[16]u8)
			
 
				+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+			return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
			
 
				 		}
			
 
				+	}
			
 
				 
			
 
				-		return true
			
 
				+	// 64-bit SIMD is faster than using a `uintptr` to detect a difference then
			
 
				+	// re-iterating with the byte-by-byte loop, at least on AMD64.
			
 
				+	m = (n-i) / 8 * 8
			
 
				+	for /**/; i < m; i += 8 {
			
 
				+		load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
			
 
				+		load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
			
 
				+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+		if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+			sentinel: #simd[8]u8 = u8(0xFF)
			
 
				+			indices := intrinsics.simd_indices(#simd[8]u8)
			
 
				+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+			return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
			
 
				+		}
			
 
				 	}
			
 
				-*/
			
 
				 
			
 
				+	for /**/; i < n; i += 1 {
			
 
				+		if a[i] ~ b[i] != 0 {
			
 
				+			return -1 if int(a[i]) - int(b[i]) < 0 else +1
			
 
				+		}
			
 
				+	}
			
 
				+	return 0
			
 
				 }
			
 
				-memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
			
 
				-	switch {
			
 
				-	case a == b:   return 0
			
 
				-	case a == nil: return -1
			
 
				-	case b == nil: return +1
			
 
				-	}
			
 
				-
			
 
				-	x := uintptr(a)
			
 
				-	y := uintptr(b)
			
 
				-	n := uintptr(n)
			
 
				-
			
 
				-	SU :: size_of(uintptr)
			
 
				-	fast := n/SU + 1
			
 
				-	offset := (fast-1)*SU
			
 
				-	curr_block := uintptr(0)
			
 
				-	if n < SU {
			
 
				-		fast = 0
			
 
				-	}
			
 
				-
			
 
				-	for /**/; curr_block < fast; curr_block += 1 {
			
 
				-		va := (^uintptr)(x + curr_block * size_of(uintptr))^
			
 
				-		vb := (^uintptr)(y + curr_block * size_of(uintptr))^
			
 
				-		if va ~ vb != 0 {
			
 
				-			for pos := curr_block*SU; pos < n; pos += 1 {
			
 
				-				a := (^byte)(x+pos)^
			
 
				-				b := (^byte)(y+pos)^
			
 
				-				if a ~ b != 0 {
			
 
				-					return -1 if (int(a) - int(b)) < 0 else +1
			
 
				+
			
 
				+memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
			
 
				+	n := uint(n)
			
 
				+	i := uint(0)
			
 
				+	m := uint(0)
			
 
				+
			
 
				+	// Because we're comparing against zero, we never return -1, as that would
			
 
				+	// indicate the compared value is less than zero.
			
 
				+	//
			
 
				+	// Note that a zero return value here means equality.
			
 
				+
			
 
				+	bytes := ([^]u8)(a)
			
 
				+
			
 
				+	if n >= 8 {
			
 
				+		when HAS_HARDWARE_SIMD {
			
 
				+			when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
			
 
				+				scanner32: #simd[32]u8
			
 
				+				m = n / 32 * 32
			
 
				+				for /**/; i < m; i += 32 {
			
 
				+					load := intrinsics.unaligned_load(cast(^#simd[32]u8)&bytes[i])
			
 
				+					ne := intrinsics.simd_lanes_ne(scanner32, load)
			
 
				+					if intrinsics.simd_reduce_or(ne) > 0 {
			
 
				+						return 1
			
 
				+					}
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				-	}
			
 
				 
			
 
				-	for /**/; offset < n; offset += 1 {
			
 
				-		a := (^byte)(x+offset)^
			
 
				-		b := (^byte)(y+offset)^
			
 
				-		if a ~ b != 0 {
			
 
				-			return -1 if (int(a) - int(b)) < 0 else +1
			
 
				+		scanner16: #simd[16]u8
			
 
				+		m = (n-i) / 16 * 16
			
 
				+		for /**/; i < m; i += 16 {
			
 
				+			load := intrinsics.unaligned_load(cast(^#simd[16]u8)&bytes[i])
			
 
				+			ne := intrinsics.simd_lanes_ne(scanner16, load)
			
 
				+			if intrinsics.simd_reduce_or(ne) != 0 {
			
 
				+				return 1
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		m = (n-i) / 8 * 8
			
 
				+		for /**/; i < m; i += 8 {
			
 
				+			if intrinsics.unaligned_load(cast(^uintptr)&bytes[i]) != 0 {
			
 
				+				return 1
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	for /**/; i < n; i += 1 {
			
 
				+		if bytes[i] != 0 {
			
 
				+			return 1
			
 
				+		}
			
 
				+	}
			
 
				 	return 0
			
 
				 }
			
 
				 
			
 
				-memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
			
 
				-	x := uintptr(a)
			
 
				-	n := uintptr(n)
			
 
				-
			
 
				-	SU :: size_of(uintptr)
			
 
				-	fast := n/SU + 1
			
 
				-	offset := (fast-1)*SU
			
 
				-	curr_block := uintptr(0)
			
 
				-	if n < SU {
			
 
				-		fast = 0
			
 
				-	}
			
 
				-
			
 
				-	for /**/; curr_block < fast; curr_block += 1 {
			
 
				-		va := (^uintptr)(x + curr_block * size_of(uintptr))^
			
 
				-		if va ~ 0 != 0 {
			
 
				-			for pos := curr_block*SU; pos < n; pos += 1 {
			
 
				-				a := (^byte)(x+pos)^
			
 
				-				if a ~ 0 != 0 {
			
 
				-					return -1 if int(a) < 0 else +1
			
 
				+memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check {
			
 
				+	switch {
			
 
				+	case x == y:   return n
			
 
				+	case x == nil: return 0
			
 
				+	case y == nil: return 0
			
 
				+	}
			
 
				+	a, b := cast([^]byte)x, cast([^]byte)y
			
 
				+
			
 
				+	n := uint(n)
			
 
				+	i := uint(0)
			
 
				+	m := uint(0)
			
 
				+
			
 
				+	when HAS_HARDWARE_SIMD {
			
 
				+		when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
			
 
				+			m = n / 32 * 32
			
 
				+			for /**/; i < m; i += 32 {
			
 
				+				load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
			
 
				+				load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
			
 
				+				comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+				if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+					sentinel: #simd[32]u8 = u8(0xFF)
			
 
				+					indices := intrinsics.simd_indices(#simd[32]u8)
			
 
				+					index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+					index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+					return int(i + index_reduce)
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for /**/; offset < n; offset += 1 {
			
 
				-		a := (^byte)(x+offset)^
			
 
				-		if a ~ 0 != 0 {
			
 
				-			return -1 if int(a) < 0 else +1
			
 
				+	m = (n-i) / 16 * 16
			
 
				+	for /**/; i < m; i += 16 {
			
 
				+		load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
			
 
				+		load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
			
 
				+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+		if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+			sentinel: #simd[16]u8 = u8(0xFF)
			
 
				+			indices := intrinsics.simd_indices(#simd[16]u8)
			
 
				+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+			return int(i + index_reduce)
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	return 0
			
 
				+	// 64-bit SIMD is faster than using a `uintptr` to detect a difference then
			
 
				+	// re-iterating with the byte-by-byte loop, at least on AMD64.
			
 
				+	m = (n-i) / 8 * 8
			
 
				+	for /**/; i < m; i += 8 {
			
 
				+		load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
			
 
				+		load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
			
 
				+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
			
 
				+		if intrinsics.simd_reduce_or(comparison) != 0 {
			
 
				+			sentinel: #simd[8]u8 = u8(0xFF)
			
 
				+			indices := intrinsics.simd_indices(#simd[8]u8)
			
 
				+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
			
 
				+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
			
 
				+			return int(i + index_reduce)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for /**/; i < n; i += 1 {
			
 
				+		if a[i] ~ b[i] != 0 {
			
 
				+			return int(i)
			
 
				+		}
			
 
				+	}
			
 
				+	return int(n)
			
 
				 }
			
 
				 
			
 
				 string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
			
@@ -1106,3 +1203,11 @@ __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uin
 
				 		dst[j>>3]  |= the_bit<<(j&7)
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+when .Address in ODIN_SANITIZER_FLAGS {
			
 
				+	foreign {
			
 
				+		@(require)
			
 
				+		__asan_unpoison_memory_region :: proc "system" (address: rawptr, size: uint) ---
			
 
				+	}
			
 
				+}
			
 
				+
			
--- a/base/runtime/os_specific_bsd.odin
+++ b/base/runtime/os_specific_bsd.odin
@@ -9,7 +9,7 @@ foreign libc {
 
				 	@(link_name="write")
			
 
				 	_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---
			
 
				 
			
 
				-	when ODIN_OS == .NetBSD {
			
 
				+	when ODIN_OS == .NetBSD || ODIN_OS == .OpenBSD {
			
 
				 		@(link_name="__errno") __error :: proc() -> ^i32 ---
			
 
				 	} else {
			
 
				 		__error :: proc() -> ^i32 ---
			
--- a/base/runtime/procs_darwin.odin
+++ b/base/runtime/procs_darwin.odin
@@ -2,21 +2,34 @@
 
				 package runtime
			
 
				 
			
 
				 @(priority_index=-1e6)
			
 
				-foreign import "system:Foundation.framework"
			
 
				+foreign import ObjC "system:objc"
			
 
				 
			
 
				 import "base:intrinsics"
			
 
				 
			
 
				-objc_id :: ^intrinsics.objc_object
			
 
				+objc_id    :: ^intrinsics.objc_object
			
 
				 objc_Class :: ^intrinsics.objc_class
			
 
				-objc_SEL :: ^intrinsics.objc_selector
			
 
				+objc_SEL   :: ^intrinsics.objc_selector
			
 
				+objc_Ivar  :: ^intrinsics.objc_ivar
			
 
				+objc_BOOL  :: bool
			
 
				 
			
 
				-foreign Foundation {
			
 
				-	objc_lookUpClass :: proc "c" (name: cstring) -> objc_Class ---
			
 
				+
			
 
				+objc_IMP :: proc "c" (object: objc_id, sel: objc_SEL, #c_vararg args: ..any) -> objc_id
			
 
				+
			
 
				+foreign ObjC {
			
 
				 	sel_registerName :: proc "c" (name: cstring) -> objc_SEL ---
			
 
				-	objc_allocateClassPair :: proc "c" (superclass: objc_Class, name: cstring, extraBytes: uint) -> objc_Class ---
			
 
				 
			
 
				 	objc_msgSend        :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
			
 
				 	objc_msgSend_fpret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> f64 ---
			
 
				 	objc_msgSend_fp2ret :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> complex128 ---
			
 
				 	objc_msgSend_stret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
			
 
				+
			
 
				+	objc_lookUpClass          :: proc "c" (name: cstring) -> objc_Class ---
			
 
				+	objc_allocateClassPair    :: proc "c" (superclass: objc_Class, name: cstring, extraBytes: uint) -> objc_Class ---
			
 
				+	objc_registerClassPair    :: proc "c" (cls : objc_Class) ---
			
 
				+	class_addMethod           :: proc "c" (cls: objc_Class, name: objc_SEL, imp: objc_IMP, types: cstring) -> objc_BOOL ---
			
 
				+	class_addIvar             :: proc "c" (cls: objc_Class, name: cstring, size: uint, alignment: u8, types: cstring) -> objc_BOOL ---
			
 
				+	class_getInstanceVariable :: proc "c" (cls : objc_Class, name: cstring) -> objc_Ivar ---
			
 
				+	class_getInstanceSize     :: proc "c" (cls : objc_Class) -> uint ---
			
 
				+	ivar_getOffset            :: proc "c" (v: objc_Ivar) -> uintptr ---
			
 
				 }
			
 
				+
			
--- a/base/sanitizer/address.odin
+++ b/base/sanitizer/address.odin
@@ -0,0 +1,601 @@
 
				+#+no-instrumentation
			
 
				+package sanitizer
			
 
				+
			
 
				+Address_Death_Callback :: #type proc "c" (pc: rawptr, bp: rawptr, sp: rawptr, addr: rawptr, is_write: i32, access_size: uint)
			
 
				+
			
 
				+@(private="file")
			
 
				+ASAN_ENABLED :: .Address in ODIN_SANITIZER_FLAGS
			
 
				+
			
 
				+@(private="file")
			
 
				+@(default_calling_convention="system")
			
 
				+foreign {
			
 
				+	__asan_poison_memory_region      :: proc(address: rawptr, size: uint) ---
			
 
				+	__asan_unpoison_memory_region    :: proc(address: rawptr, size: uint) ---
			
 
				+	__sanitizer_set_death_callback   :: proc(callback: Address_Death_Callback) ---
			
 
				+	__asan_region_is_poisoned        :: proc(begin: rawptr, size: uint) -> rawptr ---
			
 
				+	__asan_address_is_poisoned       :: proc(addr: rawptr) -> i32 ---
			
 
				+	__asan_describe_address          :: proc(addr: rawptr) ---
			
 
				+	__asan_report_present            :: proc() -> i32 ---
			
 
				+	__asan_get_report_pc             :: proc() -> rawptr ---
			
 
				+	__asan_get_report_bp             :: proc() -> rawptr ---
			
 
				+	__asan_get_report_sp             :: proc() -> rawptr ---
			
 
				+	__asan_get_report_address        :: proc() -> rawptr ---
			
 
				+	__asan_get_report_access_type    :: proc() -> i32 ---
			
 
				+	__asan_get_report_access_size    :: proc() -> uint ---
			
 
				+	__asan_get_report_description    :: proc() -> cstring ---
			
 
				+	__asan_locate_address            :: proc(addr: rawptr, name: rawptr, name_size: uint, region_address: ^rawptr, region_size: ^uint) -> cstring ---
			
 
				+	__asan_get_alloc_stack           :: proc(addr: rawptr, trace: rawptr, size: uint, thread_id: ^i32) -> uint ---
			
 
				+	__asan_get_free_stack            :: proc(addr: rawptr, trace: rawptr, size: uint, thread_id: ^i32) -> uint ---
			
 
				+	__asan_get_shadow_mapping        :: proc(shadow_scale: ^uint, shadow_offset: ^uint) ---
			
 
				+	__asan_print_accumulated_stats   :: proc() ---
			
 
				+	__asan_get_current_fake_stack    :: proc() -> rawptr ---
			
 
				+	__asan_addr_is_in_fake_stack     :: proc(fake_stack: rawptr, addr: rawptr, beg: ^rawptr, end: ^rawptr) -> rawptr ---
			
 
				+	__asan_handle_no_return          :: proc() ---
			
 
				+	__asan_update_allocation_context :: proc(addr: rawptr) -> i32 ---
			
 
				+}
			
 
				+
			
 
				+Address_Access_Type :: enum {
			
 
				+	none,
			
 
				+	read,
			
 
				+	write,
			
 
				+}
			
 
				+
			
 
				+Address_Located_Address :: struct {
			
 
				+	category: string,
			
 
				+	name: string,
			
 
				+	region: []byte,
			
 
				+}
			
 
				+
			
 
				+Address_Shadow_Mapping :: struct {
			
 
				+	scale: uint,
			
 
				+	offset: uint,
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a slice as unaddressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is forbidden from accessing any address
			
 
				+within the slice. This procedure is not thread-safe because no two threads can
			
 
				+poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_poison_slice :: proc "contextless" (region: $T/[]$E) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_poison_memory_region(raw_data(region), size_of(E) * len(region))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a slice as addressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is allowed to access any address
			
 
				+within the slice again. This procedure is not thread-safe because no two threads
			
 
				+can poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_unpoison_slice :: proc "contextless" (region: $T/[]$E) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_unpoison_memory_region(raw_data(region), size_of(E) * len(region))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a pointer as unaddressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is forbidden from accessing any address
			
 
				+within the region the pointer points to. This procedure is not thread-safe because no
			
 
				+two threads can poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_poison_ptr :: proc "contextless" (ptr: ^$T) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_poison_memory_region(ptr, size_of(T))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a pointer as addressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is allowed to access any address
			
 
				+within the region the pointer points to again. This procedure is not thread-safe
			
 
				+because no two threads can poison or unpoison memory in the same memory region
			
 
				+region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_unpoison_ptr :: proc "contextless" (ptr: ^$T) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_unpoison_memory_region(ptr, size_of(T))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as unaddressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is forbidden from accessing any address
			
 
				+within the region. This procedure is not thread-safe because no two threads can
			
 
				+poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_poison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		assert_contextless(len >= 0)
			
 
				+		__asan_poison_memory_region(ptr, uint(len))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as unaddressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is forbidden from accessing any address
			
 
				+within the region. This procedure is not thread-safe because no two threads can
			
 
				+poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_poison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_poison_memory_region(ptr, len)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as addressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is allowed to access any address
			
 
				+within the region again. This procedure is not thread-safe because no two
			
 
				+threads can poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_unpoison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		assert_contextless(len >= 0)
			
 
				+		__asan_unpoison_memory_region(ptr, uint(len))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as addressable
			
 
				+
			
 
				+Code instrumented with `-sanitize:address` is allowed to access any address
			
 
				+within the region again. This procedure is not thread-safe because no two
			
 
				+threads can poison or unpoison memory in the same memory region region simultaneously.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_unpoison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_unpoison_memory_region(ptr, len)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+address_poison :: proc {
			
 
				+	address_poison_slice,
			
 
				+	address_poison_ptr,
			
 
				+	address_poison_rawptr,
			
 
				+	address_poison_rawptr_uint,
			
 
				+}
			
 
				+
			
 
				+address_unpoison :: proc {
			
 
				+	address_unpoison_slice,
			
 
				+	address_unpoison_ptr,
			
 
				+	address_unpoison_rawptr,
			
 
				+	address_unpoison_rawptr_uint,
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Registers a callback to be run when asan detects a memory error right before terminating
			
 
				+the process.
			
 
				+
			
 
				+This can be used for logging and/or debugging purposes.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_set_death_callback :: proc "contextless" (callback: Address_Death_Callback) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__sanitizer_set_death_callback(callback)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Checks if the memory region covered by the slice is poisoned.
			
 
				+
			
 
				+If it is poisoned this procedure returns the address which would result
			
 
				+in an asan error.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_region_is_poisoned_slice :: proc "contextless" (region: $T/[]$E) -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_region_is_poisoned(raw_data(region), size_of(E) * len(region))
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Checks if the memory region pointed to by the pointer is poisoned.
			
 
				+
			
 
				+If it is poisoned this procedure returns the address which would result
			
 
				+in an asan error.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_region_is_poisoned_ptr :: proc "contextless" (ptr: ^$T) -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_region_is_poisoned(ptr, size_of(T))
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Checks if the memory region covered by `[ptr, ptr+len)` is poisoned.
			
 
				+
			
 
				+If it is poisoned this procedure returns the address which would result
			
 
				+in an asan error.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_region_is_poisoned_rawptr :: proc "contextless" (region: rawptr, len: int) -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		assert_contextless(len >= 0)
			
 
				+		return __asan_region_is_poisoned(region, uint(len))
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Checks if the memory region covered by `[ptr, ptr+len)` is poisoned.
			
 
				+
			
 
				+If it is poisoned this procedure returns the address which would result
			
 
				+in an asan error.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_region_is_poisoned_rawptr_uint :: proc "contextless" (region: rawptr, len: uint) -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_region_is_poisoned(region, len)
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+address_region_is_poisoned :: proc {
			
 
				+	address_region_is_poisoned_slice,
			
 
				+	address_region_is_poisoned_ptr,
			
 
				+	address_region_is_poisoned_rawptr,
			
 
				+	address_region_is_poisoned_rawptr_uint,
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Checks if the address is poisoned.
			
 
				+
			
 
				+If it is poisoned this procedure returns `true`, otherwise it returns
			
 
				+`false`.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `false`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_is_poisoned :: proc "contextless" (address: rawptr) -> bool {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_address_is_poisoned(address) != 0
			
 
				+	} else {
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Describes the sanitizer state for an address.
			
 
				+
			
 
				+This procedure prints the description out to `stdout`.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_describe_address :: proc "contextless" (address: rawptr) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_describe_address(address)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns `true` if an asan error has occured, otherwise it returns
			
 
				+`false`.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `false`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_report_present :: proc "contextless" () -> bool {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_report_present() != 0
			
 
				+	} else {
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the program counter register value of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `nil` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_pc :: proc "contextless" () -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_report_pc()
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the base pointer register value of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `nil` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_bp :: proc "contextless" () -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_report_bp()
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the stack pointer register value of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `nil` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_sp :: proc "contextless" () -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_report_sp()
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the report buffer address of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `nil` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_address :: proc "contextless" () -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_report_address()
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the address access type of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `.none` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `.none`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_access_type :: proc "contextless" () -> Address_Access_Type {
			
 
				+	when ASAN_ENABLED {
			
 
				+		if ! address_report_present() {
			
 
				+			return .none
			
 
				+		}
			
 
				+		return __asan_get_report_access_type() == 0 ? .read : .write
			
 
				+	} else {
			
 
				+		return .none
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the access size of an asan error.
			
 
				+
			
 
				+If no asan error has occurd `0` is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `0`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_access_size :: proc "contextless" () -> uint {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_report_access_size()
			
 
				+	} else {
			
 
				+		return 0
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the bug description of an asan error.
			
 
				+
			
 
				+If no asan error has occurd an empty string is returned.
			
 
				+
			
 
				+When asan is not enabled this procedure returns an empty string.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_report_description :: proc "contextless" () -> string {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return string(__asan_get_report_description())
			
 
				+	} else {
			
 
				+		return ""
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns asan information about the address provided, writing the category into `data`.
			
 
				+
			
 
				+The information provided include:
			
 
				+* The category of the address, i.e. stack, global, heap, etc.
			
 
				+* The name of the variable this address belongs to
			
 
				+* The memory region of the address
			
 
				+
			
 
				+When asan is not enabled this procedure returns zero initialised values.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_locate_address :: proc "contextless" (addr: rawptr, data: []byte) -> Address_Located_Address {
			
 
				+	when ASAN_ENABLED {
			
 
				+		out_addr: rawptr
			
 
				+		out_size: uint
			
 
				+		str := __asan_locate_address(addr, raw_data(data), len(data), &out_addr, &out_size)
			
 
				+		return { string(str), string(cstring(raw_data(data))), (cast([^]byte)out_addr)[:out_size] }, 
			
 
				+	} else {
			
 
				+		return { "", "", {} }
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the allocation stack trace and thread id for a heap address.
			
 
				+
			
 
				+The stack trace is filled into the `data` slice.
			
 
				+
			
 
				+When asan is not enabled this procedure returns a zero initialised value.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_alloc_stack_trace :: proc "contextless" (addr: rawptr, data: []rawptr) -> ([]rawptr, int) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		out_thread: i32
			
 
				+		__asan_get_alloc_stack(addr, raw_data(data), len(data), &out_thread)
			
 
				+		return data, int(out_thread)
			
 
				+	} else {
			
 
				+		return {}, 0
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the free stack trace and thread id for a heap address.
			
 
				+
			
 
				+The stack trace is filled into the `data` slice.
			
 
				+
			
 
				+When asan is not enabled this procedure returns zero initialised values.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_free_stack_trace :: proc "contextless" (addr: rawptr, data: []rawptr) -> ([]rawptr, int) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		out_thread: i32
			
 
				+		__asan_get_free_stack(addr, raw_data(data), len(data), &out_thread)
			
 
				+		return data, int(out_thread)
			
 
				+	} else {
			
 
				+		return {}, 0
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the current asan shadow memory mapping.
			
 
				+
			
 
				+When asan is not enabled this procedure returns a zero initialised value.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_shadow_mapping :: proc "contextless" () -> Address_Shadow_Mapping {
			
 
				+	when ASAN_ENABLED {
			
 
				+		result: Address_Shadow_Mapping
			
 
				+		__asan_get_shadow_mapping(&result.scale, &result.offset)
			
 
				+		return result
			
 
				+	} else {
			
 
				+		return {}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Prints asan statistics to `stderr`
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_print_accumulated_stats :: proc "contextless" () {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_print_accumulated_stats()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns the address of the current fake stack used by asan.
			
 
				+
			
 
				+This pointer can be then used for `address_is_in_fake_stack`.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `nil`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_get_current_fake_stack :: proc "contextless" () -> rawptr {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_get_current_fake_stack()
			
 
				+	} else {
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Returns if an address belongs to a given fake stack and if so the region of the fake frame.
			
 
				+
			
 
				+When asan is not enabled this procedure returns zero initialised values.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_is_in_fake_stack :: proc "contextless" (fake_stack: rawptr, addr: rawptr) -> ([]byte, bool) {
			
 
				+	when ASAN_ENABLED {
			
 
				+		begin: rawptr
			
 
				+		end: rawptr
			
 
				+		if __asan_addr_is_in_fake_stack(fake_stack, addr, &begin, &end) == nil {
			
 
				+			return {}, false
			
 
				+		}
			
 
				+		return ((cast([^]byte)begin)[:uintptr(end)-uintptr(begin)]), true
			
 
				+	} else {
			
 
				+		return {}, false
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Performs shadow memory cleanup for the current thread before a procedure with no return is called
			
 
				+i.e. a procedure such as `panic` and `os.exit`.
			
 
				+
			
 
				+When asan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_handle_no_return :: proc "contextless" () {
			
 
				+	when ASAN_ENABLED {
			
 
				+		__asan_handle_no_return()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Updates the allocation stack trace for the given address.
			
 
				+
			
 
				+Returns `true` if successful, otherwise it returns `false`.
			
 
				+
			
 
				+When asan is not enabled this procedure returns `false`.
			
 
				+*/
			
 
				+@(no_sanitize_address)
			
 
				+address_update_allocation_context :: proc "contextless" (addr: rawptr) -> bool {
			
 
				+	when ASAN_ENABLED {
			
 
				+		return __asan_update_allocation_context(addr) != 0
			
 
				+	} else {
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
 
				+
			
--- a/base/sanitizer/doc.odin
+++ b/base/sanitizer/doc.odin
@@ -0,0 +1,38 @@
 
				+/*
			
 
				+The `sanitizer` package implements various procedures for interacting with sanitizers
			
 
				+from user code.
			
 
				+
			
 
				+An odin project can be linked with various sanitizers to help identify various different
			
 
				+bugs. These sanitizers are:
			
 
				+
			
 
				+## Address
			
 
				+
			
 
				+Enabled with `-sanitize:address` when building an odin project.
			
 
				+
			
 
				+The address sanitizer (asan) is a runtime memory error detector used to help find common memory
			
 
				+related bugs. Typically asan interacts with libc but Odin code can be marked up to interact
			
 
				+with the asan runtime to extend the memory error detection outside of libc using this package.
			
 
				+For more information about asan see: https://clang.llvm.org/docs/AddressSanitizer.html
			
 
				+
			
 
				+Procedures can be made exempt from asan when marked up with @(no_sanitize_address)
			
 
				+
			
 
				+## Memory
			
 
				+
			
 
				+Enabled with `-sanitize:memory` when building an odin project.
			
 
				+
			
 
				+The memory sanitizer is another runtime memory error detector with the sole purpose to catch the
			
 
				+use of uninitialized memory. This is not a very common bug in Odin as by default everything is
			
 
				+set to zero when initialised (ZII).
			
 
				+For more information about the memory sanitizer see: https://clang.llvm.org/docs/MemorySanitizer.html
			
 
				+
			
 
				+## Thread
			
 
				+
			
 
				+Enabled with `-sanitize:thread` when building an odin project.
			
 
				+
			
 
				+The thread sanitizer is a runtime data race detector. It can be used to detect if multiple threads
			
 
				+are concurrently writing and accessing a memory location without proper syncronisation.
			
 
				+For more information about the thread sanitizer see: https://clang.llvm.org/docs/ThreadSanitizer.html
			
 
				+
			
 
				+*/
			
 
				+package sanitizer
			
 
				+
			
--- a/base/sanitizer/memory.odin
+++ b/base/sanitizer/memory.odin
@@ -0,0 +1,74 @@
 
				+#+no-instrumentation
			
 
				+package sanitizer
			
 
				+
			
 
				+@(private="file")
			
 
				+MSAN_ENABLED :: .Memory in ODIN_SANITIZER_FLAGS
			
 
				+
			
 
				+@(private="file")
			
 
				+@(default_calling_convention="system")
			
 
				+foreign {
			
 
				+	__msan_unpoison :: proc(addr: rawptr, size: uint) ---
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a slice as fully initialized.
			
 
				+
			
 
				+Code instrumented with `-sanitize:memory` will be permitted to access any
			
 
				+address within the slice as if it had already been initialized.
			
 
				+
			
 
				+When msan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+memory_unpoison_slice :: proc "contextless" (region: $T/[]$E) {
			
 
				+	when MSAN_ENABLED {
			
 
				+		__msan_unpoison(raw_data(region),  size_of(E) * len(region))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks a pointer as fully initialized.
			
 
				+
			
 
				+Code instrumented with `-sanitize:memory` will be permitted to access memory
			
 
				+within the region the pointer points to as if it had already been initialized.
			
 
				+
			
 
				+When msan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+memory_unpoison_ptr :: proc "contextless" (ptr: ^$T) {
			
 
				+	when MSAN_ENABLED {
			
 
				+		__msan_unpoison(ptr, size_of(T))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as fully initialized.
			
 
				+
			
 
				+Code instrumented with `-sanitize:memory` will be permitted to access memory
			
 
				+within this range as if it had already been initialized.
			
 
				+
			
 
				+When msan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+memory_unpoison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
			
 
				+	when MSAN_ENABLED {
			
 
				+		__msan_unpoison(ptr, uint(len))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Marks the region covering `[ptr, ptr+len)` as fully initialized.
			
 
				+
			
 
				+Code instrumented with `-sanitize:memory` will be permitted to access memory
			
 
				+within this range as if it had already been initialized.
			
 
				+
			
 
				+When msan is not enabled this procedure does nothing.
			
 
				+*/
			
 
				+memory_unpoison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
			
 
				+	when MSAN_ENABLED {
			
 
				+		__msan_unpoison(ptr, len)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+memory_unpoison :: proc {
			
 
				+	memory_unpoison_slice,
			
 
				+	memory_unpoison_ptr,
			
 
				+	memory_unpoison_rawptr,
			
 
				+	memory_unpoison_rawptr_uint,
			
 
				+}
			
--- a/bin/lld-link.exe
+++ b/bin/lld-link.exe
--- a/bin/llvm/windows/LLVM-C.lib
+++ b/bin/llvm/windows/LLVM-C.lib
--- a/bin/wasm-ld.exe
+++ b/bin/wasm-ld.exe
--- a/build.bat
+++ b/build.bat
@@ -4,12 +4,12 @@ setlocal EnableDelayedExpansion
 
				 
			
 
				 where /Q cl.exe || (
			
 
				 	set __VSCMD_ARG_NO_LOGO=1
			
 
				-	for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -requires Microsoft.VisualStudio.Workload.NativeDesktop -property installationPath') do set VS=%%i
			
 
				+	for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath') do set VS=%%i
			
 
				 	if "!VS!" equ "" (
			
 
				-		echo ERROR: Visual Studio installation not found
			
 
				+		echo ERROR: MSVC installation not found
			
 
				 		exit /b 1
			
 
				 	)
			
 
				-	call "!VS!\VC\Auxiliary\Build\vcvarsall.bat" amd64 || exit /b 1
			
 
				+	call "!VS!\Common7\Tools\vsdevcmd.bat" -arch=x64 -host_arch=x64 || exit /b 1
			
 
				 )
			
 
				 
			
 
				 if "%VSCMD_ARG_TGT_ARCH%" neq "x64" (
			
@@ -19,16 +19,27 @@ if "%VSCMD_ARG_TGT_ARCH%" neq "x64" (
 
				 	)
			
 
				 )
			
 
				 
			
 
				+where /Q git.exe || goto skip_git_hash
			
 
				+if not exist .git\ goto skip_git_hash
			
 
				+for /f "tokens=1,2" %%i IN ('git show "--pretty=%%cd %%h" "--date=format:%%Y-%%m-%%d" --no-patch --no-notes HEAD') do (
			
 
				+	set CURR_DATE_TIME=%%i
			
 
				+	set GIT_SHA=%%j
			
 
				+)
			
 
				+if %ERRORLEVEL% equ 0 (
			
 
				+	goto have_git_hash_and_date
			
 
				+)
			
 
				+:skip_git_hash
			
 
				 pushd misc
			
 
				 cl /nologo get-date.c
			
 
				-popd
			
 
				-
			
 
				-for /f %%i in ('misc\get-date') do (
			
 
				+for /f %%i in ('get-date') do (
			
 
				 	set CURR_DATE_TIME=%%i
			
 
				+	rem Don't set GIT_SHA
			
 
				 )
			
 
				+popd
			
 
				+:have_git_hash_and_date
			
 
				 set curr_year=%CURR_DATE_TIME:~0,4%
			
 
				-set curr_month=%CURR_DATE_TIME:~4,2%
			
 
				-set curr_day=%CURR_DATE_TIME:~6,2%
			
 
				+set curr_month=%CURR_DATE_TIME:~5,2%
			
 
				+set curr_day=%CURR_DATE_TIME:~8,2%
			
 
				 
			
 
				 :: Make sure this is a decent name and not generic
			
 
				 set exe_name=odin.exe
			
@@ -61,31 +72,14 @@ if %release_mode% equ 0 (
 
				 set V4=0
			
 
				 set odin_version_full="%V1%.%V2%.%V3%.%V4%"
			
 
				 set odin_version_raw="dev-%V1%-%V2%"
			
 
				-
			
 
				 set compiler_flags= -nologo -Oi -TP -fp:precise -Gm- -MP -FC -EHsc- -GR- -GF
			
 
				 rem Parse source code as utf-8 even on shift-jis and other codepages
			
 
				 rem See https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
			
 
				 set compiler_flags= %compiler_flags% /utf-8
			
 
				-set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\"
			
 
				+set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\" -DGIT_SHA=\"%GIT_SHA%\"
			
 
				 
			
 
				 rem fileversion is defined as {Major,Minor,Build,Private: u16} so a bit limited
			
 
				-set rc_flags=-nologo ^
			
 
				--DV1=%V1% -DV2=%V2% -DV3=%V3% -DV4=%V4% ^
			
 
				--DVF=%odin_version_full% -DNIGHTLY=%nightly%
			
 
				-
			
 
				-where /Q git.exe || goto skip_git_hash
			
 
				-if not exist .git\ goto skip_git_hash
			
 
				-for /f "tokens=1,2" %%i IN ('git show "--pretty=%%cd %%h" "--date=format:%%Y-%%m" --no-patch --no-notes HEAD') do (
			
 
				-	set odin_version_raw=dev-%%i
			
 
				-	set GIT_SHA=%%j
			
 
				-)
			
 
				-if %ERRORLEVEL% equ 0 (
			
 
				-	set compiler_defines=%compiler_defines% -DGIT_SHA=\"%GIT_SHA%\"
			
 
				-	set rc_flags=%rc_flags% -DGIT_SHA=%GIT_SHA% -DVP=%odin_version_raw%:%GIT_SHA%
			
 
				-) else (
			
 
				-	set rc_flags=%rc_flags% -DVP=%odin_version_raw%
			
 
				-)
			
 
				-:skip_git_hash
			
 
				+set rc_flags="-DGIT_SHA=%GIT_SHA% -DVP=dev-%V1%-%V2%:%GIT_SHA% nologo -DV1=%V1% -DV2=%V2% -DV3=%V3% -DV4=%V4% -DVF=%odin_version_full% -DNIGHTLY=%nightly%"
			
 
				 
			
 
				 if %nightly% equ 1 set compiler_defines=%compiler_defines% -DNIGHTLY
			
 
				 
			
@@ -138,6 +132,7 @@ del *.ilk > NUL 2> NUL
 
				 
			
 
				 rc %rc_flags% %odin_rc%
			
 
				 cl %compiler_settings% "src\main.cpp" "src\libtommath.cpp" /link %linker_settings% -OUT:%exe_name%
			
 
				+if %errorlevel% neq 0 goto end_of_build
			
 
				 mt -nologo -inputresource:%exe_name%;#1 -manifest misc\odin.manifest -outputresource:%exe_name%;#1 -validate_manifest -identity:"odin, processorArchitecture=amd64, version=%odin_version_full%, type=win32"
			
 
				 if %errorlevel% neq 0 goto end_of_build
			
 
				 
			
--- a/build_odin.sh
+++ b/build_odin.sh
@@ -6,7 +6,6 @@ set -eu
 
				 : ${LDFLAGS=}
			
 
				 : ${LLVM_CONFIG=}
			
 
				 
			
 
				-CPPFLAGS="$CPPFLAGS -DODIN_VERSION_RAW=\"dev-$(date +"%Y-%m")\""
			
 
				 CXXFLAGS="$CXXFLAGS -std=c++14"
			
 
				 DISABLED_WARNINGS="-Wno-switch -Wno-macro-redefined -Wno-unused-value"
			
 
				 LDFLAGS="$LDFLAGS -pthread -lm"
			
@@ -15,8 +14,12 @@ OS_NAME="$(uname -s)"
 
				 
			
 
				 if [ -d ".git" ] && [ -n "$(command -v git)" ]; then
			
 
				 	GIT_SHA=$(git show --pretty='%h' --no-patch --no-notes HEAD)
			
 
				+	GIT_DATE=$(git show "--pretty=%cd" "--date=format:%Y-%m" --no-patch --no-notes HEAD)
			
 
				 	CPPFLAGS="$CPPFLAGS -DGIT_SHA=\"$GIT_SHA\""
			
 
				+else
			
 
				+	GIT_DATE=$(date +"%Y-%m")
			
 
				 fi
			
 
				+CPPFLAGS="$CPPFLAGS -DODIN_VERSION_RAW=\"dev-$GIT_DATE\""
			
 
				 
			
 
				 error() {
			
 
				 	printf "ERROR: %s\n" "$1"
			
@@ -25,7 +28,8 @@ error() {
 
				 
			
 
				 # Brew advises people not to add llvm to their $PATH, so try and use brew to find it.
			
 
				 if [ -z "$LLVM_CONFIG" ] &&  [ -n "$(command -v brew)" ]; then
			
 
				-    if   [ -n "$(command -v $(brew --prefix llvm@19)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@19)/bin/llvm-config"
			
 
				+    if   [ -n "$(command -v $(brew --prefix llvm@20)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@20)/bin/llvm-config"
			
 
				+    elif [ -n "$(command -v $(brew --prefix llvm@19)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@19)/bin/llvm-config"
			
 
				     elif [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config"
			
 
				     elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config"
			
 
				     elif [ -n "$(command -v $(brew --prefix llvm@14)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@14)/bin/llvm-config"
			
@@ -34,7 +38,8 @@ fi
 
				 
			
 
				 if [ -z "$LLVM_CONFIG" ]; then
			
 
				 	# darwin, linux, openbsd
			
 
				-	if   [ -n "$(command -v llvm-config-19)" ]; then LLVM_CONFIG="llvm-config-19"
			
 
				+	if   [ -n "$(command -v llvm-config-20)" ]; then LLVM_CONFIG="llvm-config-20"
			
 
				+	elif [ -n "$(command -v llvm-config-19)" ]; then LLVM_CONFIG="llvm-config-19"
			
 
				 	elif [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
			
 
				 	elif [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
			
 
				 	elif [ -n "$(command -v llvm-config-14)" ]; then LLVM_CONFIG="llvm-config-14"
			
@@ -42,6 +47,7 @@ if [ -z "$LLVM_CONFIG" ]; then
 
				 	elif [ -n "$(command -v llvm-config-12)" ]; then LLVM_CONFIG="llvm-config-12"
			
 
				 	elif [ -n "$(command -v llvm-config-11)" ]; then LLVM_CONFIG="llvm-config-11"
			
 
				 	# freebsd
			
 
				+	elif [ -n "$(command -v llvm-config20)" ]; then  LLVM_CONFIG="llvm-config20"
			
 
				 	elif [ -n "$(command -v llvm-config19)" ]; then  LLVM_CONFIG="llvm-config19"
			
 
				 	elif [ -n "$(command -v llvm-config18)" ]; then  LLVM_CONFIG="llvm-config18"
			
 
				 	elif [ -n "$(command -v llvm-config17)" ]; then  LLVM_CONFIG="llvm-config17"
			
@@ -69,15 +75,15 @@ LLVM_VERSION_MAJOR="$(echo $LLVM_VERSION | awk -F. '{print $1}')"
 
				 LLVM_VERSION_MINOR="$(echo $LLVM_VERSION | awk -F. '{print $2}')"
			
 
				 LLVM_VERSION_PATCH="$(echo $LLVM_VERSION | awk -F. '{print $3}')"
			
 
				 
			
 
				-if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 19 ]; then
			
 
				-	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17, 18 or 19"
			
 
				+if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 20 ]; then
			
 
				+	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17, 18, 19 or 20"
			
 
				 fi
			
 
				 
			
 
				 case "$OS_NAME" in
			
 
				 Darwin)
			
 
				 	if [ "$OS_ARCH" = "arm64" ]; then
			
 
				 		if [ $LLVM_VERSION_MAJOR -lt 13 ]; then
			
 
				-			error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17, 18 or 19"
			
 
				+			error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17, 18, 19 or 20"
			
 
				 		fi
			
 
				 	fi
			
 
				 
			
--- a/check_all.bat
+++ b/check_all.bat
@@ -0,0 +1,75 @@
 
				+@echo off
			
 
				+
			
 
				+if "%1" == "" (
			
 
				+	echo Checking darwin_amd64 - expect vendor:cgltf panic
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_amd64
			
 
				+	echo Checking darwin_arm64 - expect vendor:cgltf panic
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_arm64
			
 
				+	echo Checking linux_i386
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_i386
			
 
				+	echo Checking linux_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_amd64
			
 
				+	echo Checking linux_arm64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm64
			
 
				+	echo Checking linux_arm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm32
			
 
				+	echo Checking linux_riscv64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64
			
 
				+	echo Checking windows_i386
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_i386
			
 
				+	echo Checking windows_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_amd64
			
 
				+	echo Checking freebsd_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
			
 
				+	echo Checking freebsd_arm64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_arm64
			
 
				+	echo Checking netbsd_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64
			
 
				+	echo Checking netbsd_arm64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64
			
 
				+	echo Checking openbsd_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:openbsd_amd64
			
 
				+)
			
 
				+
			
 
				+if "%1" == "freestanding" (
			
 
				+	echo Checking freestanding_wasm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
			
 
				+	echo Checking freestanding_wasm64p32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
			
 
				+	echo Checking freestanding_amd64_sysv
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_sysv
			
 
				+	echo Checking freestanding_amd64_win64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_win64
			
 
				+	echo Checking freestanding_arm64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm64
			
 
				+	echo Checking freestanding_arm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm32
			
 
				+	echo Checking freestanding_riscv64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_riscv64
			
 
				+)
			
 
				+
			
 
				+if "%1" == "rare" (
			
 
				+	echo Checking essence_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:essence_amd64
			
 
				+	echo Checking freebsd_i386
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_i386
			
 
				+	echo Checking haiku_amd64
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:haiku_amd64
			
 
				+)
			
 
				+
			
 
				+if "%1" == "wasm" (
			
 
				+	echo Checking freestanding_wasm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
			
 
				+	echo Checking freestanding_wasm64p32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
			
 
				+	echo Checking wasi_wasm64p32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm64p32
			
 
				+	echo Checking wasi_wasm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm32
			
 
				+	echo Checking js_wasm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm32
			
 
				+	echo Checking orca_wasm32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:orca_wasm32
			
 
				+	echo Checking js_wasm64p32
			
 
				+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm64p32
			
 
				+)
			
--- a/check_all.sh
+++ b/check_all.sh
@@ -0,0 +1,78 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+case $1 in
			
 
				+freestanding)
			
 
				+	echo Checking freestanding_wasm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
			
 
				+	echo Checking freestanding_wasm64p32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
			
 
				+	echo Checking freestanding_amd64_sysv
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_sysv
			
 
				+	echo Checking freestanding_amd64_win64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_win64
			
 
				+	echo Checking freestanding_arm64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm64
			
 
				+	echo Checking freestanding_arm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm32
			
 
				+	echo Checking freestanding_riscv64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_riscv64
			
 
				+	;;
			
 
				+
			
 
				+rare)
			
 
				+	echo Checking essence_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:essence_amd64
			
 
				+	echo Checking freebsd_i386
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_i386
			
 
				+	echo Checking haiku_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:haiku_amd64
			
 
				+	;;
			
 
				+
			
 
				+wasm)
			
 
				+	echo Checking freestanding_wasm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
			
 
				+	echo Checking freestanding_wasm64p32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
			
 
				+	echo Checking wasi_wasm64p32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm64p32
			
 
				+	echo Checking wasi_wasm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm32
			
 
				+	echo Checking js_wasm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm32
			
 
				+	echo Checking orca_wasm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:orca_wasm32
			
 
				+	echo Checking js_wasm64p32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm64p32
			
 
				+	;;
			
 
				+
			
 
				+*)
			
 
				+	echo Checking darwin_amd64 - expect vendor:cgltf panic
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_amd64
			
 
				+	echo Checking darwin_arm64 - expect vendor:cgltf panic
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_arm64
			
 
				+	echo Checking linux_i386
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_i386
			
 
				+	echo Checking linux_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_amd64
			
 
				+	echo Checking linux_arm64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm64
			
 
				+	echo Checking linux_arm32
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm32
			
 
				+	echo Checking linux_riscv64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64
			
 
				+	echo Checking windows_i386
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_i386
			
 
				+	echo Checking windows_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_amd64
			
 
				+	echo Checking freebsd_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
			
 
				+	echo Checking freebsd_arm64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_arm64
			
 
				+	echo Checking netbsd_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64
			
 
				+	echo Checking netbsd_arm64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64
			
 
				+	echo Checking openbsd_amd64
			
 
				+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:openbsd_amd64
			
 
				+	;;
			
 
				+
			
 
				+esac
			
--- a/ci/build_linux_static.sh
+++ b/ci/build_linux_static.sh
@@ -1,8 +1,8 @@
 
				 #!/usr/bin/env sh
			
 
				 # Intended for use in Alpine containers, see the "nightly" Github action for a list of dependencies
			
 
				 
			
 
				-CXX="clang++-18"
			
 
				-LLVM_CONFIG="llvm-config-18"
			
 
				+CXX="clang++-20"
			
 
				+LLVM_CONFIG="llvm-config-20"
			
 
				 
			
 
				 DISABLED_WARNINGS="-Wno-switch -Wno-macro-redefined -Wno-unused-value"
			
 
				 
			
--- a/codecov.yml
+++ b/codecov.yml
@@ -0,0 +1 @@
 
				+comment: false
			
--- a/core/bufio/reader.odin
+++ b/core/bufio/reader.odin
@@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
 
				 	for b.r+utf8.UTF_MAX > b.w &&
			
 
				 	    !utf8.full_rune(b.buf[b.r:b.w]) &&
			
 
				 	    b.err == nil &&
			
 
				-	    b.w-b.w < len(b.buf) {
			
 
				+	    b.w-b.r < len(b.buf) {
			
 
				 		_reader_read_new_chunk(b) or_return
			
 
				 	}
			
 
				 
			
--- a/core/bytes/bytes.odin
+++ b/core/bytes/bytes.odin
@@ -350,7 +350,7 @@ index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds
 
				 	}
			
 
				 
			
 
				 	c_vec: simd.u8x16 = c
			
 
				-	when !simd.IS_EMULATED {
			
 
				+	when simd.HAS_HARDWARE_SIMD {
			
 
				 		// Note: While this is something that could also logically take
			
 
				 		// advantage of AVX512, the various downclocking and power
			
 
				 		// consumption related woes make premature to have a dedicated
			
@@ -485,7 +485,7 @@ last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_che
 
				 	}
			
 
				 
			
 
				 	c_vec: simd.u8x16 = c
			
 
				-	when !simd.IS_EMULATED {
			
 
				+	when simd.HAS_HARDWARE_SIMD {
			
 
				 		// Note: While this is something that could also logically take
			
 
				 		// advantage of AVX512, the various downclocking and power
			
 
				 		// consumption related woes make premature to have a dedicated
			
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -139,9 +139,6 @@ Context_Memory_Input :: struct #packed {
 
				 }
			
 
				 when size_of(rawptr) == 8 {
			
 
				 	#assert(size_of(Context_Memory_Input) == 64)
			
 
				-} else {
			
 
				-	// e.g. `-target:windows_i386`
			
 
				-	#assert(size_of(Context_Memory_Input) == 52)
			
 
				 }
			
 
				 
			
 
				 Context_Stream_Input :: struct #packed {
			
--- a/core/container/intrusive/list/intrusive_list.odin
+++ b/core/container/intrusive/list/intrusive_list.odin
@@ -278,19 +278,19 @@ Example:
 
				 	iterate_next_example :: proc() {
			
 
				 		l: list.List
			
 
				 
			
 
				-		one := My_Struct{value=1}
			
 
				-		two := My_Struct{value=2}
			
 
				+		one := My_Next_Struct{value=1}
			
 
				+		two := My_Next_Struct{value=2}
			
 
				 
			
 
				 		list.push_back(&l, &one.node)
			
 
				 		list.push_back(&l, &two.node)
			
 
				 
			
 
				-		it := list.iterator_head(l, My_Struct, "node")
			
 
				+		it := list.iterator_head(l, My_Next_Struct, "node")
			
 
				 		for num in list.iterate_next(&it) {
			
 
				 			fmt.println(num.value)
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	My_Struct :: struct {
			
 
				+	My_Next_Struct :: struct {
			
 
				 		node : list.Node,
			
 
				 		value: int,
			
 
				 	}
			
@@ -325,22 +325,22 @@ Example:
 
				 	import "core:fmt"
			
 
				 	import "core:container/intrusive/list"
			
 
				 
			
 
				-	iterate_next_example :: proc() {
			
 
				+	iterate_prev_example :: proc() {
			
 
				 		l: list.List
			
 
				 
			
 
				-		one := My_Struct{value=1}
			
 
				-		two := My_Struct{value=2}
			
 
				+		one := My_Prev_Struct{value=1}
			
 
				+		two := My_Prev_Struct{value=2}
			
 
				 
			
 
				 		list.push_back(&l, &one.node)
			
 
				 		list.push_back(&l, &two.node)
			
 
				 
			
 
				-		it := list.iterator_tail(l, My_Struct, "node")
			
 
				+		it := list.iterator_tail(l, My_Prev_Struct, "node")
			
 
				 		for num in list.iterate_prev(&it) {
			
 
				 			fmt.println(num.value)
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	My_Struct :: struct {
			
 
				+	My_Prev_Struct :: struct {
			
 
				 		node : list.Node,
			
 
				 		value: int,
			
 
				 	}
			
--- a/core/container/lru/lru_cache.odin
+++ b/core/container/lru/lru_cache.odin
@@ -129,7 +129,7 @@ remove :: proc(c: ^$C/Cache($Key, $Value), key: Key) -> bool {
 
				 		return false
			
 
				 	}
			
 
				 	_remove_node(c, e)
			
 
				-	free(node, c.node_allocator)
			
 
				+	free(e, c.node_allocator)
			
 
				 	c.count -= 1
			
 
				 	return true
			
 
				 }
			
--- a/core/container/priority_queue/priority_queue.odin
+++ b/core/container/priority_queue/priority_queue.odin
@@ -1,6 +1,7 @@
 
				 package container_priority_queue
			
 
				 
			
 
				 import "base:builtin"
			
 
				+import "base:runtime"
			
 
				 
			
 
				 Priority_Queue :: struct($T: typeid) {
			
 
				 	queue: [dynamic]T,
			
@@ -17,13 +18,14 @@ default_swap_proc :: proc($T: typeid) -> proc(q: []T, i, j: int) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-init :: proc(pq: ^$Q/Priority_Queue($T), less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int), capacity := DEFAULT_CAPACITY, allocator := context.allocator) {
			
 
				+init :: proc(pq: ^$Q/Priority_Queue($T), less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int), capacity := DEFAULT_CAPACITY, allocator := context.allocator) -> (err: runtime.Allocator_Error) {
			
 
				 	if pq.queue.allocator.procedure == nil {
			
 
				 		pq.queue.allocator = allocator
			
 
				 	}
			
 
				-	reserve(pq, capacity)
			
 
				+	reserve(pq, capacity) or_return
			
 
				 	pq.less = less
			
 
				 	pq.swap = swap
			
 
				+	return .None
			
 
				 }
			
 
				 
			
 
				 init_from_dynamic_array :: proc(pq: ^$Q/Priority_Queue($T), queue: [dynamic]T, less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int)) {
			
@@ -41,8 +43,8 @@ destroy :: proc(pq: ^$Q/Priority_Queue($T)) {
 
				 	delete(pq.queue)
			
 
				 }
			
 
				 
			
 
				-reserve :: proc(pq: ^$Q/Priority_Queue($T), capacity: int) {
			
 
				-	builtin.reserve(&pq.queue, capacity)
			
 
				+reserve :: proc(pq: ^$Q/Priority_Queue($T), capacity: int) -> (err: runtime.Allocator_Error) {
			
 
				+	return builtin.reserve(&pq.queue, capacity)
			
 
				 }
			
 
				 clear :: proc(pq: ^$Q/Priority_Queue($T)) {
			
 
				 	builtin.clear(&pq.queue)
			
@@ -103,9 +105,10 @@ fix :: proc(pq: ^$Q/Priority_Queue($T), i: int) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-push :: proc(pq: ^$Q/Priority_Queue($T), value: T) {
			
 
				-	append(&pq.queue, value)
			
 
				+push :: proc(pq: ^$Q/Priority_Queue($T), value: T) -> (err: runtime.Allocator_Error) {
			
 
				+	append(&pq.queue, value) or_return
			
 
				 	_shift_up(pq, builtin.len(pq.queue)-1)
			
 
				+	return .None
			
 
				 }
			
 
				 
			
 
				 pop :: proc(pq: ^$Q/Priority_Queue($T), loc := #caller_location) -> (value: T) {
			
@@ -130,12 +133,10 @@ pop_safe :: proc(pq: ^$Q/Priority_Queue($T), loc := #caller_location) -> (value:
 
				 remove :: proc(pq: ^$Q/Priority_Queue($T), i: int) -> (value: T, ok: bool) {
			
 
				 	n := builtin.len(pq.queue)
			
 
				 	if 0 <= i && i < n {
			
 
				-		if n != i {
			
 
				-			pq.swap(pq.queue[:], i, n)
			
 
				-			_shift_down(pq, i, n)
			
 
				-			_shift_up(pq, i)
			
 
				-		}
			
 
				-		value, ok = builtin.pop_safe(&pq.queue)
			
 
				+		pq.swap(pq.queue[:], i, n-1)
			
 
				+		_shift_down(pq, i, n-1)
			
 
				+		_shift_up(pq, i)
			
 
				+		value, ok = builtin.pop(&pq.queue), true
			
 
				 	}
			
 
				 	return
			
 
				 }
			
--- a/core/container/queue/queue.odin
+++ b/core/container/queue/queue.odin
@@ -4,7 +4,13 @@ import "base:builtin"
 
				 import "base:runtime"
			
 
				 _ :: runtime
			
 
				 
			
 
				-// Dynamically resizable double-ended queue/ring-buffer
			
 
				+/*
			
 
				+`Queue` is a dynamically resizable double-ended queue/ring-buffer.
			
 
				+
			
 
				+Being double-ended means that either end may be pushed onto or popped from
			
 
				+across the same block of memory, in any order, thus providing both stack and
			
 
				+queue-like behaviors in the same data structure.
			
 
				+*/
			
 
				 Queue :: struct($T: typeid) {
			
 
				 	data:   [dynamic]T,
			
 
				 	len:    uint,
			
@@ -13,18 +19,31 @@ Queue :: struct($T: typeid) {
 
				 
			
 
				 DEFAULT_CAPACITY :: 16
			
 
				 
			
 
				-// Procedure to initialize a queue
			
 
				+/*
			
 
				+Initialize a `Queue` with a starting `capacity` and an `allocator`.
			
 
				+*/
			
 
				 init :: proc(q: ^$Q/Queue($T), capacity := DEFAULT_CAPACITY, allocator := context.allocator) -> runtime.Allocator_Error {
			
 
				-	if q.data.allocator.procedure == nil {
			
 
				-		q.data.allocator = allocator
			
 
				-	}
			
 
				 	clear(q)
			
 
				+	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
			
 
				+		data = nil,
			
 
				+		len = 0,
			
 
				+		cap = 0,
			
 
				+		allocator = allocator,
			
 
				+	}
			
 
				 	return reserve(q, capacity)
			
 
				 }
			
 
				 
			
 
				-// Procedure to initialize a queue from a fixed backing slice.
			
 
				-// The contents of the `backing` will be overwritten as items are pushed onto the `Queue`.
			
 
				-// Any previous contents are not available.
			
 
				+/*
			
 
				+Initialize a `Queue` from a fixed `backing` slice into which modifications are
			
 
				+made directly.
			
 
				+
			
 
				+The contents of the `backing` will be overwritten as items are pushed onto the
			
 
				+`Queue`. Any previous contents will not be available through the API but are
			
 
				+not explicitly zeroed either.
			
 
				+
			
 
				+Note that procedures which need space to work (`push_back`, ...) will fail if
			
 
				+the backing slice runs out of space.
			
 
				+*/
			
 
				 init_from_slice :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
			
 
				 	clear(q)
			
 
				 	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
			
@@ -36,8 +55,14 @@ init_from_slice :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				-// Procedure to initialize a queue from a fixed backing slice.
			
 
				-// Existing contents are preserved and available on the queue.
			
 
				+/*
			
 
				+Initialize a `Queue` from a fixed `backing` slice into which modifications are
			
 
				+made directly.
			
 
				+
			
 
				+The contents of the queue will start out with all of the elements in `backing`,
			
 
				+effectively creating a full queue from the slice. As such, no procedures will
			
 
				+be able to add more elements to the queue until some are taken off.
			
 
				+*/
			
 
				 init_with_contents :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
			
 
				 	clear(q)
			
 
				 	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
			
@@ -50,84 +75,200 @@ init_with_contents :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 
				 	return true
			
 
				 }
			
 
				 
			
 
				-// Procedure to destroy a queue
			
 
				+/*
			
 
				+Delete memory that has been dynamically allocated from a `Queue` that was setup with `init`.
			
 
				+
			
 
				+Note that this procedure should not be used on queues setup with
			
 
				+`init_from_slice` or `init_with_contents`, as neither of those procedures keep
			
 
				+track of the allocator state of the underlying `backing` slice.
			
 
				+*/
			
 
				 destroy :: proc(q: ^$Q/Queue($T)) {
			
 
				 	delete(q.data)
			
 
				 }
			
 
				 
			
 
				-// The length of the queue
			
 
				+/*
			
 
				+Return the length of the queue.
			
 
				+*/
			
 
				 len :: proc(q: $Q/Queue($T)) -> int {
			
 
				 	return int(q.len)
			
 
				 }
			
 
				 
			
 
				-// The current capacity of the queue
			
 
				+/*
			
 
				+Return the capacity of the queue.
			
 
				+*/
			
 
				 cap :: proc(q: $Q/Queue($T)) -> int {
			
 
				 	return builtin.len(q.data)
			
 
				 }
			
 
				 
			
 
				-// Remaining space in the queue (cap-len)
			
 
				+/*
			
 
				+Return the remaining space in the queue.
			
 
				+
			
 
				+This will be `cap() - len()`.
			
 
				+*/
			
 
				 space :: proc(q: $Q/Queue($T)) -> int {
			
 
				 	return builtin.len(q.data) - int(q.len)
			
 
				 }
			
 
				 
			
 
				-// Reserve enough space for at least the specified capacity
			
 
				+/*
			
 
				+Reserve enough space in the queue for at least the specified capacity.
			
 
				+
			
 
				+This may return an error if allocation failed.
			
 
				+*/
			
 
				 reserve :: proc(q: ^$Q/Queue($T), capacity: int) -> runtime.Allocator_Error {
			
 
				 	if capacity > space(q^) {
			
 
				-		return _grow(q, uint(capacity)) 
			
 
				+		return _grow(q, uint(capacity))
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Shrink a queue's dynamically allocated array.
			
 
				+
			
 
				+This has no effect if the queue was initialized with a backing slice.
			
 
				+*/
			
 
				+shrink :: proc(q: ^$Q/Queue($T), temp_allocator := context.temp_allocator, loc := #caller_location) {
			
 
				+	if q.data.allocator.procedure == runtime.nil_allocator_proc {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	if q.len > 0 && q.offset > 0 {
			
 
				+		// Make the array contiguous again.
			
 
				+		buffer := make([]T, q.len, temp_allocator)
			
 
				+		defer delete(buffer, temp_allocator)
			
 
				+
			
 
				+		right := uint(builtin.len(q.data)) - q.offset
			
 
				+		copy(buffer[:],      q.data[q.offset:])
			
 
				+		copy(buffer[right:], q.data[:q.offset])
			
 
				+
			
 
				+		copy(q.data[:], buffer[:])
			
 
				+
			
 
				+		q.offset = 0
			
 
				+	}
			
 
				+
			
 
				+	builtin.shrink(&q.data, q.len, loc)
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Get the element at index `i`.
			
 
				 
			
 
				+This will raise a bounds checking error if `i` is an invalid index.
			
 
				+*/
			
 
				 get :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> T {
			
 
				-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
			
 
				+	runtime.bounds_check_error_loc(loc, i, int(q.len))
			
 
				 
			
 
				 	idx := (uint(i)+q.offset)%builtin.len(q.data)
			
 
				 	return q.data[idx]
			
 
				 }
			
 
				 
			
 
				-front :: proc(q: ^$Q/Queue($T)) -> T {
			
 
				+/*
			
 
				+Get a pointer to the element at index `i`.
			
 
				+
			
 
				+This will raise a bounds checking error if `i` is an invalid index.
			
 
				+*/
			
 
				+get_ptr :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> ^T {
			
 
				+	runtime.bounds_check_error_loc(loc, i, int(q.len))
			
 
				+
			
 
				+	idx := (uint(i)+q.offset)%builtin.len(q.data)
			
 
				+	return &q.data[idx]
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Set the element at index `i` to `val`.
			
 
				+
			
 
				+This will raise a bounds checking error if `i` is an invalid index.
			
 
				+*/
			
 
				+set :: proc(q: ^$Q/Queue($T), #any_int i: int, val: T, loc := #caller_location) {
			
 
				+	runtime.bounds_check_error_loc(loc, i, int(q.len))
			
 
				+
			
 
				+	idx := (uint(i)+q.offset)%builtin.len(q.data)
			
 
				+	q.data[idx] = val
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+Get the element at the front of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+*/
			
 
				+front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> T {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	return q.data[q.offset]
			
 
				 }
			
 
				-front_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
			
 
				+
			
 
				+/*
			
 
				+Get a pointer to the element at the front of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+*/
			
 
				+front_ptr :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	return &q.data[q.offset]
			
 
				 }
			
 
				 
			
 
				-back :: proc(q: ^$Q/Queue($T)) -> T {
			
 
				+/*
			
 
				+Get the element at the back of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+*/
			
 
				+back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> T {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
			
 
				 	return q.data[idx]
			
 
				 }
			
 
				-back_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
			
 
				+
			
 
				+/*
			
 
				+Get a pointer to the element at the back of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+*/
			
 
				+back_ptr :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
			
 
				 	return &q.data[idx]
			
 
				 }
			
 
				 
			
 
				-set :: proc(q: ^$Q/Queue($T), #any_int i: int, val: T, loc := #caller_location) {
			
 
				-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
			
 
				-	
			
 
				-	idx := (uint(i)+q.offset)%builtin.len(q.data)
			
 
				-	q.data[idx] = val
			
 
				-}
			
 
				-get_ptr :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> ^T {
			
 
				-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
			
 
				-	
			
 
				-	idx := (uint(i)+q.offset)%builtin.len(q.data)
			
 
				-	return &q.data[idx]
			
 
				-}
			
 
				 
			
 
				+@(deprecated="Use `front_ptr` instead")
			
 
				 peek_front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
			
 
				-	runtime.bounds_check_error_loc(loc, 0, builtin.len(q.data))
			
 
				-	idx := q.offset%builtin.len(q.data)
			
 
				-	return &q.data[idx]
			
 
				+	return front_ptr(q, loc)
			
 
				 }
			
 
				 
			
 
				+@(deprecated="Use `back_ptr` instead")
			
 
				 peek_back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
			
 
				-	runtime.bounds_check_error_loc(loc, int(q.len - 1), builtin.len(q.data))
			
 
				-	idx := (uint(q.len - 1)+q.offset)%builtin.len(q.data)
			
 
				-	return &q.data[idx]
			
 
				+	return back_ptr(q, loc)
			
 
				 }
			
 
				 
			
 
				-// Push an element to the back of the queue
			
 
				+/*
			
 
				+Push an element to the back of the queue.
			
 
				+
			
 
				+If there is no more space left and allocation fails to get more, this will
			
 
				+return false with an `Allocator_Error`.
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "base:runtime"
			
 
				+	import "core:container/queue"
			
 
				+
			
 
				+	// This demonstrates typical queue behavior (First-In First-Out).
			
 
				+	main :: proc() {
			
 
				+		q: queue.Queue(int)
			
 
				+		queue.init(&q)
			
 
				+		queue.push_back(&q, 1)
			
 
				+		queue.push_back(&q, 2)
			
 
				+		queue.push_back(&q, 3)
			
 
				+		// q.data is now [1, 2, 3, ...]
			
 
				+		assert(queue.pop_front(&q) == 1)
			
 
				+		assert(queue.pop_front(&q) == 2)
			
 
				+		assert(queue.pop_front(&q) == 3)
			
 
				+	}
			
 
				+*/
			
 
				 push_back :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocator_Error) {
			
 
				 	if space(q^) == 0 {
			
 
				 		_grow(q) or_return
			
@@ -138,27 +279,78 @@ push_back :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocato
 
				 	return true, nil
			
 
				 }
			
 
				 
			
 
				-// Push an element to the front of the queue
			
 
				+/*
			
 
				+Push an element to the front of the queue.
			
 
				+
			
 
				+If there is no more space left and allocation fails to get more, this will
			
 
				+return false with an `Allocator_Error`.
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "base:runtime"
			
 
				+	import "core:container/queue"
			
 
				+
			
 
				+	// This demonstrates stack behavior (First-In Last-Out).
			
 
				+	main :: proc() {
			
 
				+		q: queue.Queue(int)
			
 
				+		queue.init(&q)
			
 
				+		queue.push_back(&q, 1)
			
 
				+		queue.push_back(&q, 2)
			
 
				+		queue.push_back(&q, 3)
			
 
				+		// q.data is now [1, 2, 3, ...]
			
 
				+		assert(queue.pop_back(&q) == 3)
			
 
				+		assert(queue.pop_back(&q) == 2)
			
 
				+		assert(queue.pop_back(&q) == 1)
			
 
				+	}
			
 
				+*/
			
 
				 push_front :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocator_Error)  {
			
 
				 	if space(q^) == 0 {
			
 
				 		_grow(q) or_return
			
 
				-	}	
			
 
				+	}
			
 
				 	q.offset = uint(q.offset - 1 + builtin.len(q.data)) % builtin.len(q.data)
			
 
				 	q.len += 1
			
 
				 	q.data[q.offset] = elem
			
 
				 	return true, nil
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Pop an element from the back of the queue.
			
 
				 
			
 
				-// Pop an element from the back of the queue
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "base:runtime"
			
 
				+	import "core:container/queue"
			
 
				+
			
 
				+	// This demonstrates stack behavior (First-In Last-Out) at the far end of the data array.
			
 
				+	main :: proc() {
			
 
				+		q: queue.Queue(int)
			
 
				+		queue.init(&q)
			
 
				+		queue.push_front(&q, 1)
			
 
				+		queue.push_front(&q, 2)
			
 
				+		queue.push_front(&q, 3)
			
 
				+		// q.data is now [..., 3, 2, 1]
			
 
				+		log.infof("%#v", q)
			
 
				+		assert(queue.pop_front(&q) == 3)
			
 
				+		assert(queue.pop_front(&q) == 2)
			
 
				+		assert(queue.pop_front(&q) == 1)
			
 
				+	}
			
 
				+*/
			
 
				 pop_back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> (elem: T) {
			
 
				-	assert(condition=q.len > 0, loc=loc)
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	q.len -= 1
			
 
				 	idx := (q.offset+uint(q.len))%builtin.len(q.data)
			
 
				 	elem = q.data[idx]
			
 
				 	return
			
 
				 }
			
 
				-// Safely pop an element from the back of the queue
			
 
				+
			
 
				+/*
			
 
				+Pop an element from the back of the queue if one exists and return true.
			
 
				+Otherwise, return a nil element and false.
			
 
				+*/
			
 
				 pop_back_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
			
 
				 	if q.len > 0 {
			
 
				 		q.len -= 1
			
@@ -169,15 +361,25 @@ pop_back_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				-// Pop an element from the front of the queue
			
 
				+/*
			
 
				+Pop an element from the front of the queue
			
 
				+
			
 
				+This will raise a bounds checking error if the queue is empty.
			
 
				+*/
			
 
				 pop_front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> (elem: T) {
			
 
				-	assert(condition=q.len > 0, loc=loc)
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len > 0, "Queue is empty.", loc)
			
 
				+	}
			
 
				 	elem = q.data[q.offset]
			
 
				 	q.offset = (q.offset+1)%builtin.len(q.data)
			
 
				 	q.len -= 1
			
 
				 	return
			
 
				 }
			
 
				-// Safely pop an element from the front of the queue
			
 
				+
			
 
				+/*
			
 
				+Pop an element from the front of the queue if one exists and return true.
			
 
				+Otherwise, return a nil element and false.
			
 
				+*/
			
 
				 pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
			
 
				 	if q.len > 0 {
			
 
				 		elem = q.data[q.offset]
			
@@ -188,13 +390,18 @@ pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				-// Push multiple elements to the back of the queue
			
 
				+/*
			
 
				+Push many elements at once to the back of the queue.
			
 
				+
			
 
				+If there is not enough space left and allocation fails to get more, this will
			
 
				+return false with an `Allocator_Error`.
			
 
				+*/
			
 
				 push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime.Allocator_Error)  {
			
 
				 	n := uint(builtin.len(elems))
			
 
				 	if space(q^) < int(n) {
			
 
				 		_grow(q, q.len + n) or_return
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	sz := uint(builtin.len(q.data))
			
 
				 	insert_from := (q.offset + q.len) % sz
			
 
				 	insert_to := n
			
@@ -207,19 +414,31 @@ push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime
 
				 	return true, nil
			
 
				 }
			
 
				 
			
 
				-// Consume `n` elements from the front of the queue
			
 
				+/*
			
 
				+Consume `n` elements from the back of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue does not have enough elements.
			
 
				+*/
			
 
				 consume_front :: proc(q: ^$Q/Queue($T), n: int, loc := #caller_location) {
			
 
				-	assert(condition=int(q.len) >= n, loc=loc)
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len >= uint(n), "Queue does not have enough elements to consume.", loc)
			
 
				+	}
			
 
				 	if n > 0 {
			
 
				 		nu := uint(n)
			
 
				 		q.offset = (q.offset + nu) % builtin.len(q.data)
			
 
				-		q.len -= nu	
			
 
				+		q.len -= nu
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// Consume `n` elements from the back of the queue
			
 
				+/*
			
 
				+Consume `n` elements from the back of the queue.
			
 
				+
			
 
				+This will raise a bounds checking error if the queue does not have enough elements.
			
 
				+*/
			
 
				 consume_back :: proc(q: ^$Q/Queue($T), n: int, loc := #caller_location) {
			
 
				-	assert(condition=int(q.len) >= n, loc=loc)
			
 
				+	when !ODIN_NO_BOUNDS_CHECK {
			
 
				+		ensure(q.len >= uint(n), "Queue does not have enough elements to consume.", loc)
			
 
				+	}
			
 
				 	if n > 0 {
			
 
				 		q.len -= uint(n)
			
 
				 	}
			
@@ -231,9 +450,14 @@ append_elem  :: push_back
 
				 append_elems :: push_back_elems
			
 
				 push   :: proc{push_back, push_back_elems}
			
 
				 append :: proc{push_back, push_back_elems}
			
 
				+enqueue :: push_back
			
 
				+dequeue :: pop_front
			
 
				 
			
 
				 
			
 
				-// Clear the contents of the queue
			
 
				+/*
			
 
				+Reset the queue's length and offset to zero, letting it write new elements over
			
 
				+old memory, in effect clearing the accessible contents.
			
 
				+*/
			
 
				 clear :: proc(q: ^$Q/Queue($T)) {
			
 
				 	q.len = 0
			
 
				 	q.offset = 0
			
--- a/core/container/small_array/doc.odin
+++ b/core/container/small_array/doc.odin
@@ -0,0 +1,55 @@
 
				+/*
			
 
				+Package small_array implements a dynamic array like
			
 
				+interface on a stack-allocated, fixed-size array.
			
 
				+
			
 
				+The Small_Array type is optimal for scenarios where you need
			
 
				+a container for a fixed number of elements of a specific type,
			
 
				+with the total number known at compile time but the exact
			
 
				+number to be used determined at runtime.
			
 
				+
			
 
				+Example:
			
 
				+	import "core:fmt"
			
 
				+	import "core:container/small_array"
			
 
				+
			
 
				+	create :: proc() -> (result: small_array.Small_Array(10, rune)) {
			
 
				+		// appending single elements
			
 
				+		small_array.push(&result, 'e')
			
 
				+		// pushing a bunch of elements at once
			
 
				+		small_array.push(&result, 'l', 'i', 'x', '-', 'e')
			
 
				+		// pre-pending
			
 
				+		small_array.push_front(&result, 'H')
			
 
				+		// removing elements
			
 
				+		small_array.ordered_remove(&result, 4)
			
 
				+		// resizing to the desired length (the capacity will stay unchanged)
			
 
				+		small_array.resize(&result, 7)
			
 
				+		// inserting elements
			
 
				+		small_array.inject_at(&result, 'p', 5)
			
 
				+		// updating elements
			
 
				+		small_array.set(&result, 3, 'l')
			
 
				+		// getting pointers to elements
			
 
				+		o := small_array.get_ptr(&result, 4)
			
 
				+		o^ = 'o'
			
 
				+		// and much more ....
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// the Small_Array can be an ordinary parameter 'generic' over
			
 
				+	// the actual length to be usable with different sizes
			
 
				+	print_elements :: proc(arr: ^small_array.Small_Array($N, rune)) {
			
 
				+		for r in small_array.slice(arr) {
			
 
				+			fmt.print(r)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	main :: proc() {
			
 
				+		arr := create()
			
 
				+		// ...
			
 
				+		print_elements(&arr)
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	Hellope
			
 
				+
			
 
				+*/
			
 
				+package container_small_array
			
--- a/core/container/small_array/small_array.odin
+++ b/core/container/small_array/small_array.odin
@@ -4,36 +4,171 @@ import "base:builtin"
 
				 import "base:runtime"
			
 
				 _ :: runtime
			
 
				 
			
 
				+/*
			
 
				+A fixed-size stack-allocated array operated on in a dynamic fashion.
			
 
				+
			
 
				+Fields:
			
 
				+- `data`: The underlying array
			
 
				+- `len`: Amount of items that the `Small_Array` currently holds
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+
			
 
				+	example :: proc() {
			
 
				+		a: small_array.Small_Array(100, int)
			
 
				+		small_array.push_back(&a, 10)
			
 
				+	}
			
 
				+*/
			
 
				 Small_Array :: struct($N: int, $T: typeid) where N >= 0 {
			
 
				 	data: [N]T,
			
 
				 	len:  int,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Returns the amount of items in the small-array.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The small-array
			
 
				 
			
 
				+**Returns**
			
 
				+- the amount of items in the array
			
 
				+*/
			
 
				 len :: proc "contextless" (a: $A/Small_Array) -> int {
			
 
				 	return a.len
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Returns the capacity of the small-array.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The small-array
			
 
				+
			
 
				+**Returns** the capacity
			
 
				+*/
			
 
				 cap :: proc "contextless" (a: $A/Small_Array) -> int {
			
 
				 	return builtin.len(a.data)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Returns how many more items the small-array could fit.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The small-array
			
 
				+
			
 
				+**Returns**
			
 
				+- the number of unused slots
			
 
				+*/
			
 
				 space :: proc "contextless" (a: $A/Small_Array) -> int {
			
 
				 	return builtin.len(a.data) - a.len
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Returns a slice of the data.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The pointer to the small-array
			
 
				+
			
 
				+**Returns**
			
 
				+- the slice
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	slice_example :: proc() {
			
 
				+		print :: proc(a: ^small_array.Small_Array($N, int)) {
			
 
				+			for item in small_array.slice(a) {
			
 
				+				fmt.println(item)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		a: small_array.Small_Array(5, int)
			
 
				+		small_array.push_back(&a, 1)
			
 
				+		small_array.push_back(&a, 2)
			
 
				+		print(&a)
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	1
			
 
				+	2
			
 
				+*/
			
 
				 slice :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> []T {
			
 
				 	return a.data[:a.len]
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Get a copy of the item at the specified position.
			
 
				+This operation assumes that the small-array is large enough.
			
 
				+
			
 
				+This will result in:
			
 
				+	- the value if 0 <= index < len
			
 
				+	- the zero value of the type if len < index < capacity
			
 
				+	- 'crash' if capacity < index or index < 0
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The small-array
			
 
				+- `index`: The position of the item to get
			
 
				 
			
 
				+**Returns**
			
 
				+- the element at the specified position
			
 
				+*/
			
 
				 get :: proc "contextless" (a: $A/Small_Array($N, $T), index: int) -> T {
			
 
				 	return a.data[index]
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+Get a pointer to the item at the specified position.
			
 
				+This operation assumes that the small-array is large enough.
			
 
				+
			
 
				+This will result in:
			
 
				+	- the pointer if 0 <= index < len
			
 
				+	- the pointer to the zero value if len < index < capacity
			
 
				+	- 'crash' if capacity < index or index < 0
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `index`: The position of the item to get
			
 
				+
			
 
				+**Returns**
			
 
				+- the pointer to the element at the specified position
			
 
				+*/
			
 
				 get_ptr :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int) -> ^T {
			
 
				 	return &a.data[index]
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempt to get a copy of the item at the specified position.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: The small-array
			
 
				+- `index`: The position of the item to get
			
 
				+
			
 
				+**Returns**
			
 
				+- the element at the specified position
			
 
				+- true if element exists, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	get_safe_example :: proc() {
			
 
				+		a: small_array.Small_Array(5, rune)
			
 
				+		small_array.push_back(&a, 'A')
			
 
				+		
			
 
				+		fmt.println(small_array.get_safe(a, 0) or_else 'x')
			
 
				+		fmt.println(small_array.get_safe(a, 1) or_else 'x')
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	A
			
 
				+	x
			
 
				+
			
 
				+*/
			
 
				 get_safe :: proc(a: $A/Small_Array($N, $T), index: int) -> (T, bool) #no_bounds_check {
			
 
				 	if index < 0 || index >= a.len {
			
 
				 		return {}, false
			
@@ -41,6 +176,17 @@ get_safe :: proc(a: $A/Small_Array($N, $T), index: int) -> (T, bool) #no_bounds_
 
				 	return a.data[index], true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Get a pointer to the item at the specified position.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `index`: The position of the item to get
			
 
				+
			
 
				+**Returns** 
			
 
				+- the pointer to the element at the specified position
			
 
				+- true if element exists, false otherwise
			
 
				+*/
			
 
				 get_ptr_safe :: proc(a: ^$A/Small_Array($N, $T), index: int) -> (^T, bool) #no_bounds_check {
			
 
				 	if index < 0 || index >= a.len {
			
 
				 		return {}, false
			
@@ -48,15 +194,128 @@ get_ptr_safe :: proc(a: ^$A/Small_Array($N, $T), index: int) -> (^T, bool) #no_b
 
				 	return &a.data[index], true
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Set the element at the specified position to the given value.
			
 
				+This operation assumes that the small-array is large enough.
			
 
				+
			
 
				+This will result in:
			
 
				+	- the value being set if 0 <= index < capacity
			
 
				+	- 'crash' otherwise
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `index`: The position of the item to set
			
 
				+- `value`: The value to set the element to
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	set_example :: proc() {
			
 
				+		a: small_array.Small_Array(5, rune)
			
 
				+		small_array.push_back(&a, 'A')
			
 
				+		small_array.push_back(&a, 'B')
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+
			
 
				+		// updates index 0
			
 
				+		small_array.set(&a, 0, 'Z')
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+
			
 
				+		// updates to a position x, where
			
 
				+		// len <= x < cap are not visible since
			
 
				+		// the length of the small-array remains unchanged
			
 
				+		small_array.set(&a, 2, 'X')
			
 
				+		small_array.set(&a, 3, 'Y')
			
 
				+		small_array.set(&a, 4, 'Z')
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+
			
 
				+		// resizing makes the change visible
			
 
				+		small_array.resize(&a, 100)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	[A, B]
			
 
				+	[Z, B]
			
 
				+	[Z, B]
			
 
				+	[Z, B, X, Y, Z]
			
 
				+
			
 
				+*/
			
 
				 set :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, item: T) {
			
 
				 	a.data[index] = item
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Tries to resize the small-array to the specified length.
			
 
				+
			
 
				+The new length will be:
			
 
				+	- `length` if `length` <= capacity
			
 
				+	- capacity if length > capacity
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `length`: The new desired length
			
 
				+
			
 
				+Example:
			
 
				+	
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	resize_example :: proc() {
			
 
				+		a: small_array.Small_Array(5, int)
			
 
				+
			
 
				+		small_array.push_back(&a, 1)
			
 
				+		small_array.push_back(&a, 2)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+		
			
 
				+		small_array.resize(&a, 1)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+
			
 
				+		small_array.resize(&a, 100)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+	
			
 
				+	[1, 2]
			
 
				+	[1]
			
 
				+	[1, 2, 0, 0, 0]
			
 
				+*/
			
 
				 resize :: proc "contextless" (a: ^$A/Small_Array, length: int) {
			
 
				 	a.len = min(length, builtin.len(a.data))
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempts to add the given element to the end.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `item`: The item to append
			
 
				+
			
 
				+**Returns** 
			
 
				+- true if there was enough space to fit the element, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+	
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	push_back_example :: proc() {
			
 
				+		a: small_array.Small_Array(2, int)
			
 
				+
			
 
				+		assert(small_array.push_back(&a, 1), "this should fit")
			
 
				+		assert(small_array.push_back(&a, 2), "this should fit")
			
 
				+		assert(!small_array.push_back(&a, 3), "this should not fit")
			
 
				+
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				 
			
 
				+	[1, 2]
			
 
				+*/
			
 
				 push_back :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
			
 
				 	if a.len < cap(a^) {
			
 
				 		a.data[a.len] = item
			
@@ -66,6 +325,39 @@ push_back :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 
				 	return false
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempts to add the given element at the beginning.
			
 
				+This operation assumes that the small-array is not empty.
			
 
				+
			
 
				+Note: Performing this operation will cause pointers obtained
			
 
				+through get_ptr(_save) to reference incorrect elements.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `item`: The item to append
			
 
				+
			
 
				+**Returns** 
			
 
				+- true if there was enough space to fit the element, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+	
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	push_front_example :: proc() {
			
 
				+		a: small_array.Small_Array(2, int)
			
 
				+
			
 
				+		assert(small_array.push_front(&a, 2), "this should fit")
			
 
				+		assert(small_array.push_front(&a, 1), "this should fit")
			
 
				+		assert(!small_array.push_back(&a, 0), "this should not fit")
			
 
				+
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	[1, 2]
			
 
				+*/
			
 
				 push_front :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
			
 
				 	if a.len < cap(a^) {
			
 
				 		a.len += 1
			
@@ -77,6 +369,35 @@ push_front :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 
				 	return false
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes and returns the last element of the small-array.
			
 
				+This operation assumes that the small-array is not empty.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+
			
 
				+**Returns** 
			
 
				+- a copy of the element removed from the end of the small-array
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	pop_back_example :: proc() {
			
 
				+		a: small_array.Small_Array(5, int)
			
 
				+		small_array.push(&a, 0, 1, 2)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.pop_back(&a)
			
 
				+		fmt.println("AFTER: ", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2]
			
 
				+	AFTER:  [0, 1]
			
 
				+*/
			
 
				 pop_back :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -> T {
			
 
				 	assert(condition=(N > 0 && a.len > 0), loc=loc)
			
 
				 	item := a.data[a.len-1]
			
@@ -84,6 +405,38 @@ pop_back :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) ->
 
				 	return item
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes and returns the first element of the small-array.
			
 
				+This operation assumes that the small-array is not empty.
			
 
				+
			
 
				+Note: Performing this operation will cause pointers obtained
			
 
				+through get_ptr(_save) to reference incorrect elements.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+
			
 
				+**Returns** 
			
 
				+- a copy of the element removed from the beginning of the small-array
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	pop_front_example :: proc() {
			
 
				+		a: small_array.Small_Array(5, int)
			
 
				+		small_array.push(&a, 0, 1, 2)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.pop_front(&a)
			
 
				+		fmt.println("AFTER: ", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2]
			
 
				+	AFTER:  [1, 2]
			
 
				+*/
			
 
				 pop_front :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -> T {
			
 
				 	assert(condition=(N > 0 && a.len > 0), loc=loc)
			
 
				 	item := a.data[0]
			
@@ -93,6 +446,32 @@ pop_front :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -
 
				 	return item
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempts to remove and return the last element of the small array.
			
 
				+Unlike `pop_back`, it does not assume that the array is non-empty.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+
			
 
				+**Returns** 
			
 
				+- a copy of the element removed from the end of the small-array
			
 
				+- true if the small-array was not empty, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+
			
 
				+	pop_back_safe_example :: proc() {
			
 
				+		a: small_array.Small_Array(3, int)
			
 
				+		small_array.push(&a, 1)
			
 
				+
			
 
				+		el, ok := small_array.pop_back_safe(&a)
			
 
				+		assert(ok, "there was an element in the array")
			
 
				+
			
 
				+		el, ok = small_array.pop_back_safe(&a)
			
 
				+		assert(!ok, "there was NO element in the array")
			
 
				+	}
			
 
				+*/
			
 
				 pop_back_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok: bool) {
			
 
				 	if N > 0 && a.len > 0 {
			
 
				 		item = a.data[a.len-1]
			
@@ -102,6 +481,35 @@ pop_back_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok
 
				 	return
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempts to remove and return the first element of the small array.
			
 
				+Unlike `pop_front`, it does not assume that the array is non-empty.
			
 
				+
			
 
				+Note: Performing this operation will cause pointers obtained
			
 
				+through get_ptr(_save) to reference incorrect elements.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+
			
 
				+**Returns** 
			
 
				+- a copy of the element removed from the beginning of the small-array
			
 
				+- true if the small-array was not empty, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+
			
 
				+	pop_front_safe_example :: proc() {
			
 
				+		a: small_array.Small_Array(3, int)
			
 
				+		small_array.push(&a, 1)
			
 
				+
			
 
				+		el, ok := small_array.pop_front_safe(&a)
			
 
				+		assert(ok, "there was an element in the array")
			
 
				+
			
 
				+		el, ok = small_array.pop_front_(&a)
			
 
				+		assert(!ok, "there was NO element in the array")
			
 
				+	}
			
 
				+*/
			
 
				 pop_front_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok: bool) {
			
 
				 	if N > 0 && a.len > 0 {
			
 
				 		item = a.data[0]
			
@@ -113,11 +521,70 @@ pop_front_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, o
 
				 	return
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Decreases the length of the small-array by the given amount.
			
 
				+The elements are therefore not really removed and can be
			
 
				+recovered by calling `resize`.
			
 
				+
			
 
				+Note: This procedure assumes that the array has a sufficient length.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `count`: The amount the length should be reduced by
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	consume_example :: proc() {
			
 
				+		a: small_array.Small_Array(3, int)
			
 
				+		small_array.push(&a, 0, 1, 2)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.consume(&a, 2)
			
 
				+		fmt.println("AFTER :", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2]
			
 
				+	AFTER : [0]
			
 
				+*/
			
 
				 consume :: proc "odin" (a: ^$A/Small_Array($N, $T), count: int, loc := #caller_location) {
			
 
				 	assert(condition=a.len >= count, loc=loc)
			
 
				 	a.len -= count
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes the element at the specified index while retaining order.
			
 
				+
			
 
				+Note: Performing this operation will cause pointers obtained
			
 
				+through get_ptr(_save) to reference incorrect elements.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `index`: The position of the element to remove
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	ordered_remove_example :: proc() {
			
 
				+		a: small_array.Small_Array(4, int)
			
 
				+		small_array.push(&a, 0, 1, 2, 3)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.ordered_remove(&a, 1)
			
 
				+		fmt.println("AFTER :", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2, 3]
			
 
				+	AFTER : [0, 2, 3]
			
 
				+*/
			
 
				 ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
			
 
				 	runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				 	if index+1 < a.len {
			
@@ -126,6 +593,32 @@ ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, lo
 
				 	a.len -= 1
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes the element at the specified index without retaining order.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `index`: The position of the element to remove
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	unordered_remove_example :: proc() {
			
 
				+		a: small_array.Small_Array(4, int)
			
 
				+		small_array.push(&a, 0, 1, 2, 3)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.unordered_remove(&a, 1)
			
 
				+		fmt.println("AFTER :", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2, 3]
			
 
				+	AFTER : [0, 3, 2]
			
 
				+*/
			
 
				 unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
			
 
				 	runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				 	n := a.len-1
			
@@ -135,10 +628,63 @@ unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int,
 
				 	a.len -= 1
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Sets the length of the small-array to 0.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+
			
 
				+Example:
			
 
				+	
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	clear_example :: proc() {
			
 
				+		a: small_array.Small_Array(4, int)
			
 
				+		small_array.push(&a, 0, 1, 2, 3)
			
 
				+
			
 
				+		fmt.println("BEFORE:", small_array.slice(&a))
			
 
				+		small_array.clear(&a)
			
 
				+		fmt.println("AFTER :", small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	BEFORE: [0, 1, 2, 3]
			
 
				+	AFTER : []
			
 
				+
			
 
				+*/
			
 
				 clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {
			
 
				 	resize(a, 0)
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Attempts to append all elements to the small-array returning
			
 
				+false if there is not enough space to fit all of them.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `item`: The item to append
			
 
				+- ..:
			
 
				+
			
 
				+**Returns**
			
 
				+- true if there was enough space to fit the element, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+	
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	push_back_elems_example :: proc() {
			
 
				+		a: small_array.Small_Array(100, int)
			
 
				+		small_array.push_back_elems(&a, 0, 1, 2, 3, 4)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	[0, 1, 2, 3, 4]
			
 
				+*/
			
 
				 push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -> bool {
			
 
				 	if a.len + builtin.len(items) <= cap(a^) {
			
 
				 		n := copy(a.data[a.len:], items[:])
			
@@ -148,6 +694,36 @@ push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -
 
				 	return false
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Tries to insert an element at the specified position.
			
 
				+
			
 
				+Note: Performing this operation will cause pointers obtained
			
 
				+through get_ptr(_save) to reference incorrect elements.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `item`: The item to insert
			
 
				+- `index`: The index to insert the item at
			
 
				+
			
 
				+**Returns**
			
 
				+- true if there was enough space to fit the element, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	inject_at_example :: proc() {
			
 
				+		arr: small_array.Small_Array(100, rune)
			
 
				+		small_array.push(&arr,  'A', 'C', 'D')
			
 
				+		small_array.inject_at(&arr, 'B', 1)
			
 
				+		fmt.println(small_array.slice(&arr))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	[A, B, C, D]
			
 
				+*/
			
 
				 inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int) -> bool #no_bounds_check {
			
 
				 	if a.len < cap(a^) && index >= 0 && index <= len(a^) {
			
 
				 		a.len += 1
			
@@ -160,7 +736,38 @@ inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int
 
				 	return false
			
 
				 }
			
 
				 
			
 
				+// Alias for `push_back`
			
 
				 append_elem  :: push_back
			
 
				+// Alias for `push_back_elems`
			
 
				 append_elems :: push_back_elems
			
 
				+
			
 
				+/*
			
 
				+Tries to append the element(s) to the small-array.
			
 
				+
			
 
				+**Inputs**
			
 
				+- `a`: A pointer to the small-array
			
 
				+- `item`: The item to append
			
 
				+- ..:
			
 
				+
			
 
				+**Returns**
			
 
				+- true if there was enough space to fit the element, false otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:container/small_array"
			
 
				+	import "core:fmt"
			
 
				+
			
 
				+	push_example :: proc() {
			
 
				+		a: small_array.Small_Array(100, int)
			
 
				+		small_array.push(&a, 0)
			
 
				+		small_array.push(&a, 1, 2, 3, 4)
			
 
				+		fmt.println(small_array.slice(&a))
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	[0, 1, 2, 3, 4]
			
 
				+*/
			
 
				 push   :: proc{push_back, push_back_elems}
			
 
				+// Alias for `push`
			
 
				 append :: proc{push_back, push_back_elems}
			
--- a/core/crypto/_aes/aes.odin
+++ b/core/crypto/_aes/aes.odin
@@ -25,4 +25,5 @@ GHASH_BLOCK_SIZE :: 16
 
				 GHASH_TAG_SIZE :: 16
			
 
				 
			
 
				 // RCON is the AES keyschedule round constants.
			
 
				+@(rodata)
			
 
				 RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
			
--- a/core/crypto/_aes/ct64/ct64.odin
+++ b/core/crypto/_aes/ct64/ct64.odin
@@ -22,8 +22,6 @@
 
				 
			
 
				 package aes_ct64
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				-
			
 
				 // Bitsliced AES for 64-bit general purpose (integer) registers.  Each
			
 
				 // invocation will process up to 4 blocks at a time.  This implementation
			
 
				 // is derived from the BearSSL ct64 code, and distributed under a 1-clause
			
@@ -212,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) {
 
				 }
			
 
				 
			
 
				 @(require_results)
			
 
				-interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
			
 
				-	if len(w) < 4 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				-	x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
			
 
				+interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
			
 
				+	x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
			
 
				 	x0 |= (x0 << 16)
			
 
				 	x1 |= (x1 << 16)
			
 
				 	x2 |= (x2 << 16)
			
--- a/core/crypto/_aes/ct64/ct64_enc.odin
+++ b/core/crypto/_aes/ct64/ct64_enc.odin
@@ -22,12 +22,8 @@
 
				 
			
 
				 package aes_ct64
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				-
			
 
				 add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
			
 
				-	if len(sk) < 8 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(sk) >= 8, "aes/ct64: invalid round key size")
			
 
				 
			
 
				 	q[0] ~= sk[0]
			
 
				 	q[1] ~= sk[1]
			
--- a/core/crypto/_aes/ct64/ct64_keysched.odin
+++ b/core/crypto/_aes/ct64/ct64_keysched.odin
@@ -22,7 +22,6 @@
 
				 
			
 
				 package aes_ct64
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:crypto/_aes"
			
 
				 import "core:encoding/endian"
			
 
				 import "core:mem"
			
@@ -42,7 +41,7 @@ sub_word :: proc "contextless" (x: u32) -> u32 {
 
				 }
			
 
				 
			
 
				 @(private, require_results)
			
 
				-keysched :: proc(comp_skey: []u64, key: []byte) -> int {
			
 
				+keysched :: proc "contextless" (comp_skey: []u64, key: []byte) -> int {
			
 
				 	num_rounds, key_len := 0, len(key)
			
 
				 	switch key_len {
			
 
				 	case _aes.KEY_SIZE_128:
			
@@ -52,7 +51,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
 
				 	case _aes.KEY_SIZE_256:
			
 
				 		num_rounds = _aes.ROUNDS_256
			
 
				 	case:
			
 
				-		panic("crypto/aes: invalid AES key size")
			
 
				+		panic_contextless("crypto/aes: invalid AES key size")
			
 
				 	}
			
 
				 
			
 
				 	skey: [60]u32 = ---
			
@@ -78,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
 
				 
			
 
				 	q: [8]u64 = ---
			
 
				 	for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
			
 
				-		q[0], q[4] = interleave_in(skey[i:])
			
 
				+		q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3])
			
 
				 		q[1] = q[0]
			
 
				 		q[2] = q[0]
			
 
				 		q[3] = q[0]
			
@@ -123,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
 
				 		skey[v + 3] = (x3 << 4) - x3
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
			
 
				-	if len(qq) < 8 || len(key) != 16 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				-
			
 
				-	skey: [4]u32 = ---
			
 
				-	skey[0] = endian.unchecked_get_u32le(key[0:])
			
 
				-	skey[1] = endian.unchecked_get_u32le(key[4:])
			
 
				-	skey[2] = endian.unchecked_get_u32le(key[8:])
			
 
				-	skey[3] = endian.unchecked_get_u32le(key[12:])
			
 
				-
			
 
				-	q: [8]u64 = ---
			
 
				-	q[0], q[4] = interleave_in(skey[:])
			
 
				-	q[1] = q[0]
			
 
				-	q[2] = q[0]
			
 
				-	q[3] = q[0]
			
 
				-	q[5] = q[4]
			
 
				-	q[6] = q[4]
			
 
				-	q[7] = q[4]
			
 
				-	orthogonalize(&q)
			
 
				-
			
 
				-	comp_skey: [2]u64 = ---
			
 
				-	comp_skey[0] =
			
 
				-		(q[0] & 0x1111111111111111) |
			
 
				-		(q[1] & 0x2222222222222222) |
			
 
				-		(q[2] & 0x4444444444444444) |
			
 
				-		(q[3] & 0x8888888888888888)
			
 
				-	comp_skey[1] =
			
 
				-		(q[4] & 0x1111111111111111) |
			
 
				-		(q[5] & 0x2222222222222222) |
			
 
				-		(q[6] & 0x4444444444444444) |
			
 
				-		(q[7] & 0x8888888888888888)
			
 
				-
			
 
				-	for x, u in comp_skey {
			
 
				-		x0 := x
			
 
				-		x1, x2, x3 := x0, x0, x0
			
 
				-		x0 &= 0x1111111111111111
			
 
				-		x1 &= 0x2222222222222222
			
 
				-		x2 &= 0x4444444444444444
			
 
				-		x3 &= 0x8888888888888888
			
 
				-		x1 >>= 1
			
 
				-		x2 >>= 2
			
 
				-		x3 >>= 3
			
 
				-		qq[u * 4 + 0] = (x0 << 4) - x0
			
 
				-		qq[u * 4 + 1] = (x1 << 4) - x1
			
 
				-		qq[u * 4 + 2] = (x2 << 4) - x2
			
 
				-		qq[u * 4 + 3] = (x3 << 4) - x3
			
 
				-	}
			
 
				-
			
 
				-	mem.zero_explicit(&skey, size_of(skey))
			
 
				-	mem.zero_explicit(&q, size_of(q))
			
 
				-	mem.zero_explicit(&comp_skey, size_of(comp_skey))
			
 
				-}
			
--- a/core/crypto/_aes/ct64/ghash.odin
+++ b/core/crypto/_aes/ct64/ghash.odin
@@ -22,7 +22,6 @@
 
				 
			
 
				 package aes_ct64
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:crypto/_aes"
			
 
				 import "core:encoding/endian"
			
 
				 
			
@@ -64,9 +63,8 @@ rev64 :: proc "contextless" (x: u64) -> u64 {
 
				 // Note: `dst` is both an input and an output, to support easy implementation
			
 
				 // of GCM.
			
 
				 ghash :: proc "contextless" (dst, key, data: []byte) {
			
 
				-	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(dst) == _aes.GHASH_BLOCK_SIZE)
			
 
				+	ensure_contextless(len(key) == _aes.GHASH_BLOCK_SIZE)
			
 
				 
			
 
				 	buf := data
			
 
				 	l := len(buf)
			
--- a/core/crypto/_aes/ct64/helpers.odin
+++ b/core/crypto/_aes/ct64/helpers.odin
@@ -1,60 +1,61 @@
 
				 package aes_ct64
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:crypto/_aes"
			
 
				 import "core:encoding/endian"
			
 
				 
			
 
				+@(require_results)
			
 
				+load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) #no_bounds_check {
			
 
				+	w0 := endian.unchecked_get_u32le(src[0:])
			
 
				+	w1 := endian.unchecked_get_u32le(src[4:])
			
 
				+	w2 := endian.unchecked_get_u32le(src[8:])
			
 
				+	w3 := endian.unchecked_get_u32le(src[12:])
			
 
				+	return interleave_in(w0, w1, w2, w3)
			
 
				+}
			
 
				+
			
 
				+store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) #no_bounds_check {
			
 
				+	w0, w1, w2, w3 := interleave_out(a0, a1)
			
 
				+	endian.unchecked_put_u32le(dst[0:], w0)
			
 
				+	endian.unchecked_put_u32le(dst[4:], w1)
			
 
				+	endian.unchecked_put_u32le(dst[8:], w2)
			
 
				+	endian.unchecked_put_u32le(dst[12:], w3)
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
			
 
				+	return a0 ~ b0, a1 ~ b1
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
			
 
				+	return a0 & b0, a1 & b1
			
 
				+}
			
 
				+
			
 
				 load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
			
 
				-	if len(src) != _aes.BLOCK_SIZE {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(src) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
			
 
				 
			
 
				-	w: [4]u32 = ---
			
 
				-	w[0] = endian.unchecked_get_u32le(src[0:])
			
 
				-	w[1] = endian.unchecked_get_u32le(src[4:])
			
 
				-	w[2] = endian.unchecked_get_u32le(src[8:])
			
 
				-	w[3] = endian.unchecked_get_u32le(src[12:])
			
 
				-	q[0], q[4] = interleave_in(w[:])
			
 
				+	q[0], q[4] = #force_inline load_interleaved(src)
			
 
				 	orthogonalize(q)
			
 
				 }
			
 
				 
			
 
				 store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
			
 
				-	if len(dst) != _aes.BLOCK_SIZE {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(dst) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
			
 
				 
			
 
				 	orthogonalize(q)
			
 
				-	w0, w1, w2, w3 := interleave_out(q[0], q[4])
			
 
				-	endian.unchecked_put_u32le(dst[0:], w0)
			
 
				-	endian.unchecked_put_u32le(dst[4:], w1)
			
 
				-	endian.unchecked_put_u32le(dst[8:], w2)
			
 
				-	endian.unchecked_put_u32le(dst[12:], w3)
			
 
				+	#force_inline store_interleaved(dst, q[0], q[4])
			
 
				 }
			
 
				 
			
 
				 load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
			
 
				-	if n := len(src); n > STRIDE || n == 0 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(src) == 0 || len(src) <= STRIDE, "aes/ct64: invalid block(s) size")
			
 
				 
			
 
				-	w: [4]u32 = ---
			
 
				 	for s, i in src {
			
 
				-		if len(s) != _aes.BLOCK_SIZE {
			
 
				-			intrinsics.trap()
			
 
				-		}
			
 
				-
			
 
				-		w[0] = endian.unchecked_get_u32le(s[0:])
			
 
				-		w[1] = endian.unchecked_get_u32le(s[4:])
			
 
				-		w[2] = endian.unchecked_get_u32le(s[8:])
			
 
				-		w[3] = endian.unchecked_get_u32le(s[12:])
			
 
				-		q[i], q[i + 4] = interleave_in(w[:])
			
 
				+		ensure_contextless(len(s) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
			
 
				+		q[i], q[i + 4] = #force_inline load_interleaved(s)
			
 
				 	}
			
 
				 	orthogonalize(q)
			
 
				 }
			
 
				 
			
 
				 store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
			
 
				-	if n := len(dst); n > STRIDE || n == 0 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(dst) == 0 || len(dst) <= STRIDE, "aes/ct64: invalid block(s) size")
			
 
				 
			
 
				 	orthogonalize(q)
			
 
				 	for d, i in dst {
			
@@ -62,14 +63,7 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
 
				 		if d == nil {
			
 
				 			break
			
 
				 		}
			
 
				-		if len(d) != _aes.BLOCK_SIZE {
			
 
				-			intrinsics.trap()
			
 
				-		}
			
 
				-
			
 
				-		w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
			
 
				-		endian.unchecked_put_u32le(d[0:], w0)
			
 
				-		endian.unchecked_put_u32le(d[4:], w1)
			
 
				-		endian.unchecked_put_u32le(d[8:], w2)
			
 
				-		endian.unchecked_put_u32le(d[12:], w3)
			
 
				+		ensure_contextless(len(d) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
			
 
				+		#force_inline store_interleaved(d, q[i], q[i + 4])
			
 
				 	}
			
 
				 }
			
--- a/core/crypto/_aes/hw_intel/api.odin
+++ b/core/crypto/_aes/hw_intel/api.odin
@@ -6,7 +6,7 @@ import "core:sys/info"
 
				 // is_supported returns true iff hardware accelerated AES
			
 
				 // is supported.
			
 
				 is_supported :: proc "contextless" () -> bool {
			
 
				-	features, ok := info.cpu_features.?
			
 
				+	features, ok := info.cpu.features.?
			
 
				 	if !ok {
			
 
				 		return false
			
 
				 	}
			
--- a/core/crypto/_aes/hw_intel/ghash.odin
+++ b/core/crypto/_aes/hw_intel/ghash.odin
@@ -52,7 +52,7 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
 
				 // that it is right-shifted by 1 bit. The left-shift is relatively
			
 
				 // inexpensive, and it can be mutualised.
			
 
				 //
			
 
				-// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
			
 
				+// Since SSE2 opcodes do not have facilities for shifting full 128-bit
			
 
				 // values with bit precision, we have to break down values into 64-bit
			
 
				 // chunks. We number chunks from 0 to 3 in left to right order.
			
 
				 
			
@@ -155,7 +155,7 @@ square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128
 
				 @(enable_target_feature = "sse2,ssse3,pclmul")
			
 
				 ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
			
 
				 	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
			
 
				-		intrinsics.trap()
			
 
				+		panic_contextless("aes/ghash: invalid dst or key size")
			
 
				 	}
			
 
				 
			
 
				 	// Note: BearSSL opts to copy the remainder into a zero-filled
			
--- a/core/crypto/_blake2/blake2.odin
+++ b/core/crypto/_blake2/blake2.odin
@@ -18,6 +18,8 @@ BLAKE2S_SIZE :: 32
 
				 BLAKE2B_BLOCK_SIZE :: 128
			
 
				 BLAKE2B_SIZE :: 64
			
 
				 
			
 
				+MAX_SIZE :: 255
			
 
				+
			
 
				 Blake2s_Context :: struct {
			
 
				 	h:            [8]u32,
			
 
				 	t:            [2]u32,
			
@@ -68,13 +70,13 @@ Blake2_Tree :: struct {
 
				 	is_last_node:    bool,
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 BLAKE2S_IV := [8]u32 {
			
 
				 	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
			
 
				 	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 BLAKE2B_IV := [8]u64 {
			
 
				 	0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
			
 
				 	0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
			
@@ -82,16 +84,13 @@ BLAKE2B_IV := [8]u64 {
 
				 	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
			
 
				 }
			
 
				 
			
 
				-init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
			
 
				+init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
			
 
				 	when T == Blake2s_Context {
			
 
				 		max_size :: BLAKE2S_SIZE
			
 
				 	} else when T == Blake2b_Context {
			
 
				 		max_size :: BLAKE2B_SIZE
			
 
				 	}
			
 
				-
			
 
				-	if cfg.size > max_size {
			
 
				-		panic("blake2: requested output size exceeeds algorithm max")
			
 
				-	}
			
 
				+	ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max")
			
 
				 
			
 
				 	// To save having to allocate a scratch buffer, use the internal
			
 
				 	// data buffer (`ctx.x`), as it is exactly the correct size.
			
@@ -167,8 +166,8 @@ init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
 
				 	ctx.is_initialized = true
			
 
				 }
			
 
				 
			
 
				-update :: proc(ctx: ^$T, p: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+update :: proc "contextless" (ctx: ^$T, p: []byte) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				 
			
 
				 	p := p
			
 
				 	when T == Blake2s_Context {
			
@@ -195,8 +194,8 @@ update :: proc(ctx: ^$T, p: []byte) {
 
				 	ctx.nx += copy(ctx.x[ctx.nx:], p)
			
 
				 }
			
 
				 
			
 
				-final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+final :: proc "contextless" (ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
@@ -206,24 +205,19 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
 
				 	}
			
 
				 	defer(reset(ctx))
			
 
				 
			
 
				+	ensure_contextless(len(hash) >= int(ctx.size), "crypto/blake2: invalid destination digest size")
			
 
				 	when T == Blake2s_Context {
			
 
				-		if len(hash) < int(ctx.size) {
			
 
				-			panic("crypto/blake2s: invalid destination digest size")
			
 
				-		}
			
 
				 		blake2s_final(ctx, hash)
			
 
				 	} else when T == Blake2b_Context {
			
 
				-		if len(hash) < int(ctx.size) {
			
 
				-			panic("crypto/blake2b: invalid destination digest size")
			
 
				-		}
			
 
				 		blake2b_final(ctx, hash)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-clone :: proc(ctx, other: ^$T) {
			
 
				+clone :: proc "contextless" (ctx, other: ^$T) {
			
 
				 	ctx^ = other^
			
 
				 }
			
 
				 
			
 
				-reset :: proc(ctx: ^$T) {
			
 
				+reset :: proc "contextless" (ctx: ^$T) {
			
 
				 	if !ctx.is_initialized {
			
 
				 		return
			
 
				 	}
			
--- a/core/crypto/_chacha20/chacha20.odin
+++ b/core/crypto/_chacha20/chacha20.odin
@@ -1,6 +1,5 @@
 
				 package _chacha20
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:encoding/endian"
			
 
				 import "core:math/bits"
			
 
				 import "core:mem"
			
@@ -46,9 +45,8 @@ Context :: struct {
 
				 // derivation is expected to be handled by the caller, so that the
			
 
				 // HChaCha call can be suitably accelerated.
			
 
				 init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
			
 
				-	if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(key) == KEY_SIZE, "chacha20: invalid key size")
			
 
				+	ensure_contextless(len(iv) == IV_SIZE, "chacha20: invalid key size")
			
 
				 
			
 
				 	k, n := key, iv
			
 
				 
			
@@ -76,12 +74,10 @@ init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
 
				 
			
 
				 // seek seeks the (X)ChaCha20 stream counter to the specified block.
			
 
				 seek :: proc(ctx: ^Context, block_nr: u64) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	if ctx._is_ietf_flavor {
			
 
				-		if block_nr > MAX_CTR_IETF {
			
 
				-			panic("crypto/chacha20: attempted to seek past maximum counter")
			
 
				-		}
			
 
				+		ensure(block_nr <= MAX_CTR_IETF, "crypto/chacha20: attempted to seek past maximum counter")
			
 
				 	} else {
			
 
				 		ctx._s[13] = u32(block_nr >> 32)
			
 
				 	}
			
@@ -102,7 +98,7 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
 
				 	// Enforce the maximum consumed keystream per IV.
			
 
				 	//
			
 
				 	// While all modern "standard" definitions of ChaCha20 use
			
 
				-	// the IETF 32-bit counter, for XChaCha20 most common
			
 
				+	// the IETF 32-bit counter, for XChaCha20 historical
			
 
				 	// implementations allow for a 64-bit counter.
			
 
				 	//
			
 
				 	// Honestly, the answer here is "use a MRAE primitive", but
			
@@ -110,14 +106,14 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
 
				 
			
 
				 	ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"
			
 
				 
			
 
				+	ctr_ok: bool
			
 
				 	if ctx._is_ietf_flavor {
			
 
				-		if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
			
 
				-			panic(ERR_CTR_EXHAUSTED)
			
 
				-		}
			
 
				+		ctr_ok = u64(ctx._s[12]) + u64(nr_blocks) <= MAX_CTR_IETF
			
 
				 	} else {
			
 
				 		ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
			
 
				-		if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
			
 
				-			panic(ERR_CTR_EXHAUSTED)
			
 
				-		}
			
 
				+		_, carry := bits.add_u64(ctr, u64(nr_blocks), 0)
			
 
				+		ctr_ok = carry == 0
			
 
				 	}
			
 
				+
			
 
				+	ensure(ctr_ok, "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached")
			
 
				 }
			
--- a/core/crypto/_chacha20/simd128/chacha20_simd128.odin
+++ b/core/crypto/_chacha20/simd128/chacha20_simd128.odin
@@ -29,11 +29,24 @@ when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
 
				 	// explicitly using simd.u8x16 shuffles.
			
 
				 	@(private = "file")
			
 
				 	TARGET_SIMD_FEATURES :: "sse2,ssse3"
			
 
				+} else when ODIN_ARCH == .riscv64 {
			
 
				+	@(private = "file")
			
 
				+	TARGET_SIMD_FEATURES :: "v"
			
 
				 } else {
			
 
				 	@(private = "file")
			
 
				 	TARGET_SIMD_FEATURES :: ""
			
 
				 }
			
 
				 
			
 
				+// Some targets lack runtime feature detection, and will flat out refuse
			
 
				+// to load binaries that have unknown instructions.  This is distinct from
			
 
				+// `simd.HAS_HARDWARE_SIMD` as actually good designs support runtime feature
			
 
				+// detection and that constant establishes a baseline.
			
 
				+//
			
 
				+// See:
			
 
				+// - https://github.com/WebAssembly/design/issues/1161
			
 
				+@(private = "file")
			
 
				+TARGET_IS_DESIGNED_BY_IDIOTS :: (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128")
			
 
				+
			
 
				 @(private = "file")
			
 
				 _ROT_7L: simd.u32x4 : {7, 7, 7, 7}
			
 
				 @(private = "file")
			
@@ -205,14 +218,16 @@ _store_simd128 :: #force_inline proc "contextless" (
 
				 // is_performant returns true iff the target and current host both support
			
 
				 // "enough" 128-bit SIMD to make this implementation performant.
			
 
				 is_performant :: proc "contextless" () -> bool {
			
 
				-	when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
			
 
				+	when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .riscv64 {
			
 
				 		when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
			
 
				 			req_features :: info.CPU_Features{.asimd}
			
 
				 		} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
			
 
				 			req_features :: info.CPU_Features{.sse2, .ssse3}
			
 
				+		} else when ODIN_ARCH == .riscv64 {
			
 
				+			req_features :: info.CPU_Features{.V}
			
 
				 		}
			
 
				 
			
 
				-		features, ok := info.cpu_features.?
			
 
				+		features, ok := info.cpu.features.?
			
 
				 		if !ok {
			
 
				 			return false
			
 
				 		}
			
@@ -245,8 +260,17 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
 
				 
			
 
				 	// 8 blocks at a time.
			
 
				 	//
			
 
				-	// Note: This is only worth it on Aarch64.
			
 
				-	when ODIN_ARCH == .arm64 {
			
 
				+	// Note:
			
 
				+	// This uses a ton of registers so it is only worth it on targets
			
 
				+	// that have something like 32 128-bit registers.  This is currently
			
 
				+	// all ARMv8 targets, and RISC-V Zvl128b (`V` application profile)
			
 
				+	// targets.
			
 
				+	//
			
 
				+	// While our current definition of `.arm32` is 32-bit ARMv8, this
			
 
				+	// may change in the future (ARMv7 is still relevant), and things
			
 
				+	// like Cortex-A8/A9 does "pretend" 128-bit SIMD 64-bits at a time
			
 
				+	// thus needs bemchmarking.
			
 
				+	when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 {
			
 
				 		for ; n >= 8; n = n - 8 {
			
 
				 			v0, v1, v2, v3 := s0, s1, s2, s3
			
 
				 
			
@@ -354,9 +378,11 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
 
				 
			
 
				 	// 4 blocks at a time.
			
 
				 	//
			
 
				-	// Note: The i386 target lacks the required number of registers
			
 
				-	// for this to be performant, so it is skipped.
			
 
				-	when ODIN_ARCH != .i386 {
			
 
				+	// Note: This is skipped on several targets for various reasons.
			
 
				+	// - i386 lacks the required number of registers
			
 
				+	// - Generating code when runtime "hardware" SIMD support is impossible
			
 
				+	//   to detect is pointless, since this will be emulated using GP regs.
			
 
				+	when ODIN_ARCH != .i386 && !TARGET_IS_DESIGNED_BY_IDIOTS {
			
 
				 		for ; n >= 4; n = n - 4 {
			
 
				 			v0, v1, v2, v3 := s0, s1, s2, s3
			
 
				 
			
--- a/core/crypto/_chacha20/simd256/chacha20_simd256.odin
+++ b/core/crypto/_chacha20/simd256/chacha20_simd256.odin
@@ -41,7 +41,7 @@ _VEC_TWO: simd.u64x4 : {2, 0, 2, 0}
 
				 is_performant :: proc "contextless" () -> bool {
			
 
				 	req_features :: info.CPU_Features{.avx, .avx2}
			
 
				 
			
 
				-	features, ok := info.cpu_features.?
			
 
				+	features, ok := info.cpu.features.?
			
 
				 	if !ok {
			
 
				 		return false
			
 
				 	}
			
--- a/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin
+++ b/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin
@@ -13,5 +13,5 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
 
				 }
			
 
				 
			
 
				 hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
			
 
				-	intrinsics.trap()
			
 
				+	panic_contextless("crypto/chacha20: simd256 implementation unsupported")
			
 
				 }
			
--- a/core/crypto/_edwards25519/edwards25519.odin
+++ b/core/crypto/_edwards25519/edwards25519.odin
@@ -11,7 +11,6 @@ See:
 
				 - https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
			
 
				 */
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:crypto"
			
 
				 import field "core:crypto/_fiat/field_curve25519"
			
 
				 import "core:mem"
			
@@ -32,6 +31,7 @@ import "core:mem"
 
				 // - The group element decoding routine takes the opinionated stance of
			
 
				 //   rejecting non-canonical encodings.
			
 
				 
			
 
				+@(rodata)
			
 
				 FE_D := field.Tight_Field_Element {
			
 
				 	929955233495203,
			
 
				 	466365720129213,
			
@@ -39,7 +39,7 @@ FE_D := field.Tight_Field_Element {
 
				 	2033849074728123,
			
 
				 	1442794654840575,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_A := field.Tight_Field_Element {
			
 
				 	2251799813685228,
			
 
				 	2251799813685247,
			
@@ -47,7 +47,7 @@ FE_A := field.Tight_Field_Element {
 
				 	2251799813685247,
			
 
				 	2251799813685247,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_D2 := field.Tight_Field_Element {
			
 
				 	1859910466990425,
			
 
				 	932731440258426,
			
@@ -55,7 +55,7 @@ FE_D2 := field.Tight_Field_Element {
 
				 	1815898335770999,
			
 
				 	633789495995903,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 GE_BASEPOINT := Group_Element {
			
 
				 	field.Tight_Field_Element {
			
 
				 		1738742601995546,
			
@@ -80,6 +80,7 @@ GE_BASEPOINT := Group_Element {
 
				 		1821297809914039,
			
 
				 	},
			
 
				 }
			
 
				+@(rodata)
			
 
				 GE_IDENTITY := Group_Element {
			
 
				 	field.Tight_Field_Element{0, 0, 0, 0, 0},
			
 
				 	field.Tight_Field_Element{1, 0, 0, 0, 0},
			
@@ -107,9 +108,7 @@ ge_set :: proc "contextless" (ge, a: ^Group_Element) {
 
				 
			
 
				 @(require_results)
			
 
				 ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
			
 
				-	if len(b) != 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(b) == 32, "edwards25519: invalid group element size")
			
 
				 	b_ := (^[32]byte)(raw_data(b))
			
 
				 
			
 
				 	// Do the work in a scratch element, so that ge is unchanged on
			
@@ -166,9 +165,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
 
				 }
			
 
				 
			
 
				 ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
			
 
				-	if len(dst) != 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(dst) == 32, "edwards25519: invalid group element size")
			
 
				 	dst_ := (^[32]byte)(raw_data(dst))
			
 
				 
			
 
				 	// Convert the element to affine (x, y) representation.
			
--- a/core/crypto/_edwards25519/edwards25519_scalar.odin
+++ b/core/crypto/_edwards25519/edwards25519_scalar.odin
@@ -1,6 +1,5 @@
 
				 package _edwards25519
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import field "core:crypto/_fiat/field_scalar25519"
			
 
				 import "core:mem"
			
 
				 
			
@@ -8,7 +7,7 @@ Scalar :: field.Montgomery_Domain_Field_Element
 
				 
			
 
				 // WARNING: This is non-canonical and only to be used when checking if
			
 
				 // a group element is on the prime-order subgroup.
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 SC_ELL := field.Non_Montgomery_Domain_Field_Element {
			
 
				 	field.ELL[0],
			
 
				 	field.ELL[1],
			
@@ -25,17 +24,13 @@ sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
 
				 
			
 
				 @(require_results)
			
 
				 sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
			
 
				-	if len(b) != 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
			
 
				 	b_ := (^[32]byte)(raw_data(b))
			
 
				 	return field.fe_from_bytes(sc, b_)
			
 
				 }
			
 
				 
			
 
				 sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
			
 
				-	if len(b) != 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
			
 
				 	b_ := (^[32]byte)(raw_data(b))
			
 
				 	field.fe_from_bytes_rfc8032(sc, b_)
			
 
				 }
			
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -42,9 +42,12 @@ import "core:math/bits"
 
				 Loose_Field_Element :: distinct [5]u64
			
 
				 Tight_Field_Element :: distinct [5]u64
			
 
				 
			
 
				+@(rodata)
			
 
				 FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
			
 
				+@(rodata)
			
 
				 FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
			
 
				 
			
 
				+@(rodata)
			
 
				 FE_SQRT_M1 := Tight_Field_Element {
			
 
				 	1718705420411056,
			
 
				 	234908883556509,
			
--- a/core/crypto/_fiat/field_curve448/field.odin
+++ b/core/crypto/_fiat/field_curve448/field.odin
@@ -0,0 +1,235 @@
 
				+package field_curve448
			
 
				+
			
 
				+import "core:mem"
			
 
				+
			
 
				+fe_relax_cast :: #force_inline proc "contextless" (
			
 
				+	arg1: ^Tight_Field_Element,
			
 
				+) -> ^Loose_Field_Element {
			
 
				+	return (^Loose_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_tighten_cast :: #force_inline proc "contextless" (
			
 
				+	arg1: ^Loose_Field_Element,
			
 
				+) -> ^Tight_Field_Element {
			
 
				+	return (^Tight_Field_Element)(arg1)
			
 
				+}
			
 
				+
			
 
				+fe_clear :: proc "contextless" (
			
 
				+	arg1: $T,
			
 
				+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
			
 
				+	mem.zero_explicit(arg1, size_of(arg1^))
			
 
				+}
			
 
				+
			
 
				+fe_clear_vec :: proc "contextless" (
			
 
				+	arg1: $T,
			
 
				+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
			
 
				+	for fe in arg1 {
			
 
				+		fe_clear(fe)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+fe_carry_mul_small :: proc "contextless" (
			
 
				+	out1: ^Tight_Field_Element,
			
 
				+	arg1: ^Loose_Field_Element,
			
 
				+	arg2: u64,
			
 
				+) {
			
 
				+	arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0}
			
 
				+	fe_carry_mul(out1, arg1, &arg2_)
			
 
				+}
			
 
				+
			
 
				+fe_carry_pow2k :: proc "contextless" (
			
 
				+	out1: ^Tight_Field_Element,
			
 
				+	arg1: ^Loose_Field_Element,
			
 
				+	arg2: uint,
			
 
				+) {
			
 
				+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
			
 
				+	if arg2 == 0 {
			
 
				+		fe_one(out1)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	fe_carry_square(out1, arg1)
			
 
				+	for _ in 1 ..< arg2 {
			
 
				+		fe_carry_square(out1, fe_relax_cast(out1))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+fe_carry_inv :: proc "contextless" (
			
 
				+	out1: ^Tight_Field_Element,
			
 
				+	arg1: ^Loose_Field_Element,
			
 
				+) {
			
 
				+	// Inversion computation is derived from the addition chain:
			
 
				+	//
			
 
				+	//	_10     = 2*1
			
 
				+	//	_11     = 1 + _10
			
 
				+	//	_110    = 2*_11
			
 
				+	//	_111    = 1 + _110
			
 
				+	//	_111000 = _111 << 3
			
 
				+	//	_111111 = _111 + _111000
			
 
				+	//	x12     = _111111 << 6 + _111111
			
 
				+	//	x24     = x12 << 12 + x12
			
 
				+	//	i34     = x24 << 6
			
 
				+	//	x30     = _111111 + i34
			
 
				+	//	x48     = i34 << 18 + x24
			
 
				+	//	x96     = x48 << 48 + x48
			
 
				+	//	x192    = x96 << 96 + x96
			
 
				+	//	x222    = x192 << 30 + x30
			
 
				+	//	x223    = 2*x222 + 1
			
 
				+	//	return    (x223 << 223 + x222) << 2 + 1
			
 
				+	//
			
 
				+	// Operations: 447 squares 13 multiplies
			
 
				+	//
			
 
				+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
			
 
				+
			
 
				+	t0, t1, t2: Tight_Field_Element = ---, ---, ---
			
 
				+
			
 
				+	// Step 1: t0 = x^0x2
			
 
				+	fe_carry_square(&t0, arg1)
			
 
				+
			
 
				+	// Step 2: t0 = x^0x3
			
 
				+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
			
 
				+
			
 
				+	// t0.Sqr(t0)
			
 
				+	fe_carry_square(&t0, fe_relax_cast(&t0))
			
 
				+
			
 
				+	// Step 4: t0 = x^0x7
			
 
				+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
			
 
				+
			
 
				+	// Step 7: t1 = x^0x38
			
 
				+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3)
			
 
				+
			
 
				+	// Step 8: t0 = x^0x3f
			
 
				+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
			
 
				+
			
 
				+	// Step 14: t1 = x^0xfc0
			
 
				+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6)
			
 
				+
			
 
				+	// Step 15: t1 = x^0xfff
			
 
				+	fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1))
			
 
				+
			
 
				+	// Step 27: t2 = x^0xfff000
			
 
				+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12)
			
 
				+
			
 
				+	// Step 28: t1 = x^0xffffff
			
 
				+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
			
 
				+
			
 
				+	// Step 34: t2 = x^0x3fffffc0
			
 
				+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6)
			
 
				+
			
 
				+	// Step 35: t0 = x^0x3fffffff
			
 
				+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2))
			
 
				+
			
 
				+	// Step 53: t2 = x^0xffffff000000
			
 
				+	fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18)
			
 
				+
			
 
				+	// Step 54: t1 = x^0xffffffffffff
			
 
				+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
			
 
				+
			
 
				+	// Step 102: t2 = x^0xffffffffffff000000000000
			
 
				+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48)
			
 
				+
			
 
				+	// Step 103: t1 = x^0xffffffffffffffffffffffff
			
 
				+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
			
 
				+
			
 
				+	// Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000
			
 
				+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96)
			
 
				+
			
 
				+	// Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff
			
 
				+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
			
 
				+
			
 
				+	// Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000
			
 
				+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30)
			
 
				+
			
 
				+	// Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff
			
 
				+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
			
 
				+
			
 
				+	// Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe
			
 
				+	fe_carry_square(&t1, fe_relax_cast(&t0))
			
 
				+
			
 
				+	// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
			
 
				+	fe_carry_mul(&t1, arg1, fe_relax_cast(&t1))
			
 
				+
			
 
				+	// Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000
			
 
				+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223)
			
 
				+
			
 
				+	// Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff
			
 
				+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
			
 
				+
			
 
				+	// Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc
			
 
				+	fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2)
			
 
				+
			
 
				+	// Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd
			
 
				+	fe_carry_mul(out1, arg1, fe_relax_cast(&t0))
			
 
				+
			
 
				+	fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2})
			
 
				+}
			
 
				+
			
 
				+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 0
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+	out1[5] = 0
			
 
				+	out1[6] = 0
			
 
				+	out1[7] = 0
			
 
				+}
			
 
				+
			
 
				+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
			
 
				+	out1[0] = 1
			
 
				+	out1[1] = 0
			
 
				+	out1[2] = 0
			
 
				+	out1[3] = 0
			
 
				+	out1[4] = 0
			
 
				+	out1[5] = 0
			
 
				+	out1[6] = 0
			
 
				+	out1[7] = 0
			
 
				+}
			
 
				+
			
 
				+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	x6 := arg1[5]
			
 
				+	x7 := arg1[6]
			
 
				+	x8 := arg1[7]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
 
				+
			
 
				+@(optimization_mode = "none")
			
 
				+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
			
 
				+	mask := (u64(arg1) * 0xffffffffffffffff)
			
 
				+	x := (out1[0] ~ out2[0]) & mask
			
 
				+	x1, y1 := out1[0] ~ x, out2[0] ~ x
			
 
				+	x = (out1[1] ~ out2[1]) & mask
			
 
				+	x2, y2 := out1[1] ~ x, out2[1] ~ x
			
 
				+	x = (out1[2] ~ out2[2]) & mask
			
 
				+	x3, y3 := out1[2] ~ x, out2[2] ~ x
			
 
				+	x = (out1[3] ~ out2[3]) & mask
			
 
				+	x4, y4 := out1[3] ~ x, out2[3] ~ x
			
 
				+	x = (out1[4] ~ out2[4]) & mask
			
 
				+	x5, y5 := out1[4] ~ x, out2[4] ~ x
			
 
				+	x = (out1[5] ~ out2[5]) & mask
			
 
				+	x6, y6 := out1[5] ~ x, out2[5] ~ x
			
 
				+	x = (out1[6] ~ out2[6]) & mask
			
 
				+	x7, y7 := out1[6] ~ x, out2[6] ~ x
			
 
				+	x = (out1[7] ~ out2[7]) & mask
			
 
				+	x8, y8 := out1[7] ~ x, out2[7] ~ x
			
 
				+	out1[0], out2[0] = x1, y1
			
 
				+	out1[1], out2[1] = x2, y2
			
 
				+	out1[2], out2[2] = x3, y3
			
 
				+	out1[3], out2[3] = x4, y4
			
 
				+	out1[4], out2[4] = x5, y5
			
 
				+	out1[5], out2[5] = x6, y6
			
 
				+	out1[6], out2[6] = x7, y7
			
 
				+	out1[7], out2[7] = x8, y8
			
 
				+}
			
--- a/core/crypto/_fiat/field_curve448/field51.odin
+++ b/core/crypto/_fiat/field_curve448/field51.odin
@@ -0,0 +1,1060 @@
 
				+// The BSD 1-Clause License (BSD-1-Clause)
			
 
				+//
			
 
				+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions are
			
 
				+// met:
			
 
				+//
			
 
				+//     1. Redistributions of source code must retain the above copyright
			
 
				+//        notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
			
 
				+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
			
 
				+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
			
 
				+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
			
 
				+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
			
 
				+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
			
 
				+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
			
 
				+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
			
 
				+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package field_curve448
			
 
				+
			
 
				+// The file provides arithmetic on the field Z/(2^448 - 2^224 - 1) using
			
 
				+// unsaturated 64-bit integer arithmetic.  It is derived primarily
			
 
				+// from the machine generated Golang output from the fiat-crypto project.
			
 
				+//
			
 
				+// While the base implementation is provably correct, this implementation
			
 
				+// makes no such claims as the port and optimizations were done by hand.
			
 
				+//
			
 
				+// TODO:
			
 
				+//  * When fiat-crypto supports it, using a saturated 64-bit limbs
			
 
				+//    instead of 56-bit limbs will be faster, though the gains are
			
 
				+//    minimal unless adcx/adox/mulx are used.
			
 
				+
			
 
				+import fiat "core:crypto/_fiat"
			
 
				+import "core:math/bits"
			
 
				+
			
 
				+Loose_Field_Element :: distinct [8]u64
			
 
				+Tight_Field_Element :: distinct [8]u64
			
 
				+
			
 
				+@(rodata)
			
 
				+FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0, 0, 0, 0}
			
 
				+@(rodata)
			
 
				+FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0, 0, 0, 0}
			
 
				+
			
 
				+_addcarryx_u56 :: #force_inline proc "contextless" (
			
 
				+	arg1: fiat.u1,
			
 
				+	arg2, arg3: u64,
			
 
				+) -> (
			
 
				+	out1: u64,
			
 
				+	out2: fiat.u1,
			
 
				+) {
			
 
				+	x1 := ((u64(arg1) + arg2) + arg3)
			
 
				+	x2 := (x1 & 0xffffffffffffff)
			
 
				+	x3 := fiat.u1((x1 >> 56))
			
 
				+	out1 = x2
			
 
				+	out2 = x3
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+_subborrowx_u56 :: #force_inline proc "contextless" (
			
 
				+	arg1: fiat.u1,
			
 
				+	arg2, arg3: u64,
			
 
				+) -> (
			
 
				+	out1: u64,
			
 
				+	out2: fiat.u1,
			
 
				+) {
			
 
				+	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
			
 
				+	x2 := fiat.u1((x1 >> 56))
			
 
				+	x3 := (u64(x1) & 0xffffffffffffff)
			
 
				+	out1 = x3
			
 
				+	out2 = (0x0 - fiat.u1(x2))
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
			
 
				+	x2, x1 := bits.mul_u64(arg1[7], arg2[7])
			
 
				+	x4, x3 := bits.mul_u64(arg1[7], arg2[6])
			
 
				+	x6, x5 := bits.mul_u64(arg1[7], arg2[5])
			
 
				+	x8, x7 := bits.mul_u64(arg1[6], arg2[7])
			
 
				+	x10, x9 := bits.mul_u64(arg1[6], arg2[6])
			
 
				+	x12, x11 := bits.mul_u64(arg1[5], arg2[7])
			
 
				+	x14, x13 := bits.mul_u64(arg1[7], arg2[7])
			
 
				+	x16, x15 := bits.mul_u64(arg1[7], arg2[6])
			
 
				+	x18, x17 := bits.mul_u64(arg1[7], arg2[5])
			
 
				+	x20, x19 := bits.mul_u64(arg1[6], arg2[7])
			
 
				+	x22, x21 := bits.mul_u64(arg1[6], arg2[6])
			
 
				+	x24, x23 := bits.mul_u64(arg1[5], arg2[7])
			
 
				+	x26, x25 := bits.mul_u64(arg1[7], arg2[7])
			
 
				+	x28, x27 := bits.mul_u64(arg1[7], arg2[6])
			
 
				+	x30, x29 := bits.mul_u64(arg1[7], arg2[5])
			
 
				+	x32, x31 := bits.mul_u64(arg1[7], arg2[4])
			
 
				+	x34, x33 := bits.mul_u64(arg1[7], arg2[3])
			
 
				+	x36, x35 := bits.mul_u64(arg1[7], arg2[2])
			
 
				+	x38, x37 := bits.mul_u64(arg1[7], arg2[1])
			
 
				+	x40, x39 := bits.mul_u64(arg1[6], arg2[7])
			
 
				+	x42, x41 := bits.mul_u64(arg1[6], arg2[6])
			
 
				+	x44, x43 := bits.mul_u64(arg1[6], arg2[5])
			
 
				+	x46, x45 := bits.mul_u64(arg1[6], arg2[4])
			
 
				+	x48, x47 := bits.mul_u64(arg1[6], arg2[3])
			
 
				+	x50, x49 := bits.mul_u64(arg1[6], arg2[2])
			
 
				+	x52, x51 := bits.mul_u64(arg1[5], arg2[7])
			
 
				+	x54, x53 := bits.mul_u64(arg1[5], arg2[6])
			
 
				+	x56, x55 := bits.mul_u64(arg1[5], arg2[5])
			
 
				+	x58, x57 := bits.mul_u64(arg1[5], arg2[4])
			
 
				+	x60, x59 := bits.mul_u64(arg1[5], arg2[3])
			
 
				+	x62, x61 := bits.mul_u64(arg1[4], arg2[7])
			
 
				+	x64, x63 := bits.mul_u64(arg1[4], arg2[6])
			
 
				+	x66, x65 := bits.mul_u64(arg1[4], arg2[5])
			
 
				+	x68, x67 := bits.mul_u64(arg1[4], arg2[4])
			
 
				+	x70, x69 := bits.mul_u64(arg1[3], arg2[7])
			
 
				+	x72, x71 := bits.mul_u64(arg1[3], arg2[6])
			
 
				+	x74, x73 := bits.mul_u64(arg1[3], arg2[5])
			
 
				+	x76, x75 := bits.mul_u64(arg1[2], arg2[7])
			
 
				+	x78, x77 := bits.mul_u64(arg1[2], arg2[6])
			
 
				+	x80, x79 := bits.mul_u64(arg1[1], arg2[7])
			
 
				+	x82, x81 := bits.mul_u64(arg1[7], arg2[4])
			
 
				+	x84, x83 := bits.mul_u64(arg1[7], arg2[3])
			
 
				+	x86, x85 := bits.mul_u64(arg1[7], arg2[2])
			
 
				+	x88, x87 := bits.mul_u64(arg1[7], arg2[1])
			
 
				+	x90, x89 := bits.mul_u64(arg1[6], arg2[5])
			
 
				+	x92, x91 := bits.mul_u64(arg1[6], arg2[4])
			
 
				+	x94, x93 := bits.mul_u64(arg1[6], arg2[3])
			
 
				+	x96, x95 := bits.mul_u64(arg1[6], arg2[2])
			
 
				+	x98, x97 := bits.mul_u64(arg1[5], arg2[6])
			
 
				+	x100, x99 := bits.mul_u64(arg1[5], arg2[5])
			
 
				+	x102, x101 := bits.mul_u64(arg1[5], arg2[4])
			
 
				+	x104, x103 := bits.mul_u64(arg1[5], arg2[3])
			
 
				+	x106, x105 := bits.mul_u64(arg1[4], arg2[7])
			
 
				+	x108, x107 := bits.mul_u64(arg1[4], arg2[6])
			
 
				+	x110, x109 := bits.mul_u64(arg1[4], arg2[5])
			
 
				+	x112, x111 := bits.mul_u64(arg1[4], arg2[4])
			
 
				+	x114, x113 := bits.mul_u64(arg1[3], arg2[7])
			
 
				+	x116, x115 := bits.mul_u64(arg1[3], arg2[6])
			
 
				+	x118, x117 := bits.mul_u64(arg1[3], arg2[5])
			
 
				+	x120, x119 := bits.mul_u64(arg1[2], arg2[7])
			
 
				+	x122, x121 := bits.mul_u64(arg1[2], arg2[6])
			
 
				+	x124, x123 := bits.mul_u64(arg1[1], arg2[7])
			
 
				+	x126, x125 := bits.mul_u64(arg1[7], arg2[0])
			
 
				+	x128, x127 := bits.mul_u64(arg1[6], arg2[1])
			
 
				+	x130, x129 := bits.mul_u64(arg1[6], arg2[0])
			
 
				+	x132, x131 := bits.mul_u64(arg1[5], arg2[2])
			
 
				+	x134, x133 := bits.mul_u64(arg1[5], arg2[1])
			
 
				+	x136, x135 := bits.mul_u64(arg1[5], arg2[0])
			
 
				+	x138, x137 := bits.mul_u64(arg1[4], arg2[3])
			
 
				+	x140, x139 := bits.mul_u64(arg1[4], arg2[2])
			
 
				+	x142, x141 := bits.mul_u64(arg1[4], arg2[1])
			
 
				+	x144, x143 := bits.mul_u64(arg1[4], arg2[0])
			
 
				+	x146, x145 := bits.mul_u64(arg1[3], arg2[4])
			
 
				+	x148, x147 := bits.mul_u64(arg1[3], arg2[3])
			
 
				+	x150, x149 := bits.mul_u64(arg1[3], arg2[2])
			
 
				+	x152, x151 := bits.mul_u64(arg1[3], arg2[1])
			
 
				+	x154, x153 := bits.mul_u64(arg1[3], arg2[0])
			
 
				+	x156, x155 := bits.mul_u64(arg1[2], arg2[5])
			
 
				+	x158, x157 := bits.mul_u64(arg1[2], arg2[4])
			
 
				+	x160, x159 := bits.mul_u64(arg1[2], arg2[3])
			
 
				+	x162, x161 := bits.mul_u64(arg1[2], arg2[2])
			
 
				+	x164, x163 := bits.mul_u64(arg1[2], arg2[1])
			
 
				+	x166, x165 := bits.mul_u64(arg1[2], arg2[0])
			
 
				+	x168, x167 := bits.mul_u64(arg1[1], arg2[6])
			
 
				+	x170, x169 := bits.mul_u64(arg1[1], arg2[5])
			
 
				+	x172, x171 := bits.mul_u64(arg1[1], arg2[4])
			
 
				+	x174, x173 := bits.mul_u64(arg1[1], arg2[3])
			
 
				+	x176, x175 := bits.mul_u64(arg1[1], arg2[2])
			
 
				+	x178, x177 := bits.mul_u64(arg1[1], arg2[1])
			
 
				+	x180, x179 := bits.mul_u64(arg1[1], arg2[0])
			
 
				+	x182, x181 := bits.mul_u64(arg1[0], arg2[7])
			
 
				+	x184, x183 := bits.mul_u64(arg1[0], arg2[6])
			
 
				+	x186, x185 := bits.mul_u64(arg1[0], arg2[5])
			
 
				+	x188, x187 := bits.mul_u64(arg1[0], arg2[4])
			
 
				+	x190, x189 := bits.mul_u64(arg1[0], arg2[3])
			
 
				+	x192, x191 := bits.mul_u64(arg1[0], arg2[2])
			
 
				+	x194, x193 := bits.mul_u64(arg1[0], arg2[1])
			
 
				+	x196, x195 := bits.mul_u64(arg1[0], arg2[0])
			
 
				+	x197, x198 := bits.add_u64(x43, x31, u64(0x0))
			
 
				+	x199, _ := bits.add_u64(x44, x32, u64(fiat.u1(x198)))
			
 
				+	x201, x202 := bits.add_u64(x53, x197, u64(0x0))
			
 
				+	x203, _ := bits.add_u64(x54, x199, u64(fiat.u1(x202)))
			
 
				+	x205, x206 := bits.add_u64(x61, x201, u64(0x0))
			
 
				+	x207, _ := bits.add_u64(x62, x203, u64(fiat.u1(x206)))
			
 
				+	x209, x210 := bits.add_u64(x153, x205, u64(0x0))
			
 
				+	x211, _ := bits.add_u64(x154, x207, u64(fiat.u1(x210)))
			
 
				+	x213, x214 := bits.add_u64(x163, x209, u64(0x0))
			
 
				+	x215, _ := bits.add_u64(x164, x211, u64(fiat.u1(x214)))
			
 
				+	x217, x218 := bits.add_u64(x175, x213, u64(0x0))
			
 
				+	x219, _ := bits.add_u64(x176, x215, u64(fiat.u1(x218)))
			
 
				+	x221, x222 := bits.add_u64(x189, x217, u64(0x0))
			
 
				+	x223, _ := bits.add_u64(x190, x219, u64(fiat.u1(x222)))
			
 
				+	x225 := ((x221 >> 56) | ((x223 << 8) & 0xffffffffffffffff))
			
 
				+	x226 := (x221 & 0xffffffffffffff)
			
 
				+	x227, x228 := bits.add_u64(x89, x81, u64(0x0))
			
 
				+	x229, _ := bits.add_u64(x90, x82, u64(fiat.u1(x228)))
			
 
				+	x231, x232 := bits.add_u64(x97, x227, u64(0x0))
			
 
				+	x233, _ := bits.add_u64(x98, x229, u64(fiat.u1(x232)))
			
 
				+	x235, x236 := bits.add_u64(x105, x231, u64(0x0))
			
 
				+	x237, _ := bits.add_u64(x106, x233, u64(fiat.u1(x236)))
			
 
				+	x239, x240 := bits.add_u64(x125, x235, u64(0x0))
			
 
				+	x241, _ := bits.add_u64(x126, x237, u64(fiat.u1(x240)))
			
 
				+	x243, x244 := bits.add_u64(x127, x239, u64(0x0))
			
 
				+	x245, _ := bits.add_u64(x128, x241, u64(fiat.u1(x244)))
			
 
				+	x247, x248 := bits.add_u64(x131, x243, u64(0x0))
			
 
				+	x249, _ := bits.add_u64(x132, x245, u64(fiat.u1(x248)))
			
 
				+	x251, x252 := bits.add_u64(x137, x247, u64(0x0))
			
 
				+	x253, _ := bits.add_u64(x138, x249, u64(fiat.u1(x252)))
			
 
				+	x255, x256 := bits.add_u64(x145, x251, u64(0x0))
			
 
				+	x257, _ := bits.add_u64(x146, x253, u64(fiat.u1(x256)))
			
 
				+	x259, x260 := bits.add_u64(x155, x255, u64(0x0))
			
 
				+	x261, _ := bits.add_u64(x156, x257, u64(fiat.u1(x260)))
			
 
				+	x263, x264 := bits.add_u64(x167, x259, u64(0x0))
			
 
				+	x265, _ := bits.add_u64(x168, x261, u64(fiat.u1(x264)))
			
 
				+	x267, x268 := bits.add_u64(x181, x263, u64(0x0))
			
 
				+	x269, _ := bits.add_u64(x182, x265, u64(fiat.u1(x268)))
			
 
				+	x271, x272 := bits.add_u64(x25, x13, u64(0x0))
			
 
				+	x273, _ := bits.add_u64(x26, x14, u64(fiat.u1(x272)))
			
 
				+	x275, x276 := bits.add_u64(x83, x271, u64(0x0))
			
 
				+	x277, _ := bits.add_u64(x84, x273, u64(fiat.u1(x276)))
			
 
				+	x279, x280 := bits.add_u64(x91, x275, u64(0x0))
			
 
				+	x281, _ := bits.add_u64(x92, x277, u64(fiat.u1(x280)))
			
 
				+	x283, x284 := bits.add_u64(x99, x279, u64(0x0))
			
 
				+	x285, _ := bits.add_u64(x100, x281, u64(fiat.u1(x284)))
			
 
				+	x287, x288 := bits.add_u64(x107, x283, u64(0x0))
			
 
				+	x289, _ := bits.add_u64(x108, x285, u64(fiat.u1(x288)))
			
 
				+	x291, x292 := bits.add_u64(x113, x287, u64(0x0))
			
 
				+	x293, _ := bits.add_u64(x114, x289, u64(fiat.u1(x292)))
			
 
				+	x295, x296 := bits.add_u64(x129, x291, u64(0x0))
			
 
				+	x297, _ := bits.add_u64(x130, x293, u64(fiat.u1(x296)))
			
 
				+	x299, x300 := bits.add_u64(x133, x295, u64(0x0))
			
 
				+	x301, _ := bits.add_u64(x134, x297, u64(fiat.u1(x300)))
			
 
				+	x303, x304 := bits.add_u64(x139, x299, u64(0x0))
			
 
				+	x305, _ := bits.add_u64(x140, x301, u64(fiat.u1(x304)))
			
 
				+	x307, x308 := bits.add_u64(x147, x303, u64(0x0))
			
 
				+	x309, _ := bits.add_u64(x148, x305, u64(fiat.u1(x308)))
			
 
				+	x311, x312 := bits.add_u64(x157, x307, u64(0x0))
			
 
				+	x313, _ := bits.add_u64(x158, x309, u64(fiat.u1(x312)))
			
 
				+	x315, x316 := bits.add_u64(x169, x311, u64(0x0))
			
 
				+	x317, _ := bits.add_u64(x170, x313, u64(fiat.u1(x316)))
			
 
				+	x319, x320 := bits.add_u64(x183, x315, u64(0x0))
			
 
				+	x321, _ := bits.add_u64(x184, x317, u64(fiat.u1(x320)))
			
 
				+	x323, x324 := bits.add_u64(x19, x15, u64(0x0))
			
 
				+	x325, _ := bits.add_u64(x20, x16, u64(fiat.u1(x324)))
			
 
				+	x327, x328 := bits.add_u64(x27, x323, u64(0x0))
			
 
				+	x329, _ := bits.add_u64(x28, x325, u64(fiat.u1(x328)))
			
 
				+	x331, x332 := bits.add_u64(x39, x327, u64(0x0))
			
 
				+	x333, _ := bits.add_u64(x40, x329, u64(fiat.u1(x332)))
			
 
				+	x335, x336 := bits.add_u64(x85, x331, u64(0x0))
			
 
				+	x337, _ := bits.add_u64(x86, x333, u64(fiat.u1(x336)))
			
 
				+	x339, x340 := bits.add_u64(x93, x335, u64(0x0))
			
 
				+	x341, _ := bits.add_u64(x94, x337, u64(fiat.u1(x340)))
			
 
				+	x343, x344 := bits.add_u64(x101, x339, u64(0x0))
			
 
				+	x345, _ := bits.add_u64(x102, x341, u64(fiat.u1(x344)))
			
 
				+	x347, x348 := bits.add_u64(x109, x343, u64(0x0))
			
 
				+	x349, _ := bits.add_u64(x110, x345, u64(fiat.u1(x348)))
			
 
				+	x351, x352 := bits.add_u64(x115, x347, u64(0x0))
			
 
				+	x353, _ := bits.add_u64(x116, x349, u64(fiat.u1(x352)))
			
 
				+	x355, x356 := bits.add_u64(x119, x351, u64(0x0))
			
 
				+	x357, _ := bits.add_u64(x120, x353, u64(fiat.u1(x356)))
			
 
				+	x359, x360 := bits.add_u64(x135, x355, u64(0x0))
			
 
				+	x361, _ := bits.add_u64(x136, x357, u64(fiat.u1(x360)))
			
 
				+	x363, x364 := bits.add_u64(x141, x359, u64(0x0))
			
 
				+	x365, _ := bits.add_u64(x142, x361, u64(fiat.u1(x364)))
			
 
				+	x367, x368 := bits.add_u64(x149, x363, u64(0x0))
			
 
				+	x369, _ := bits.add_u64(x150, x365, u64(fiat.u1(x368)))
			
 
				+	x371, x372 := bits.add_u64(x159, x367, u64(0x0))
			
 
				+	x373, _ := bits.add_u64(x160, x369, u64(fiat.u1(x372)))
			
 
				+	x375, x376 := bits.add_u64(x171, x371, u64(0x0))
			
 
				+	x377, _ := bits.add_u64(x172, x373, u64(fiat.u1(x376)))
			
 
				+	x379, x380 := bits.add_u64(x185, x375, u64(0x0))
			
 
				+	x381, _ := bits.add_u64(x186, x377, u64(fiat.u1(x380)))
			
 
				+	x383, x384 := bits.add_u64(x21, x17, u64(0x0))
			
 
				+	x385, _ := bits.add_u64(x22, x18, u64(fiat.u1(x384)))
			
 
				+	x387, x388 := bits.add_u64(x23, x383, u64(0x0))
			
 
				+	x389, _ := bits.add_u64(x24, x385, u64(fiat.u1(x388)))
			
 
				+	x391, x392 := bits.add_u64(x29, x387, u64(0x0))
			
 
				+	x393, _ := bits.add_u64(x30, x389, u64(fiat.u1(x392)))
			
 
				+	x395, x396 := bits.add_u64(x41, x391, u64(0x0))
			
 
				+	x397, _ := bits.add_u64(x42, x393, u64(fiat.u1(x396)))
			
 
				+	x399, x400 := bits.add_u64(x51, x395, u64(0x0))
			
 
				+	x401, _ := bits.add_u64(x52, x397, u64(fiat.u1(x400)))
			
 
				+	x403, x404 := bits.add_u64(x87, x399, u64(0x0))
			
 
				+	x405, _ := bits.add_u64(x88, x401, u64(fiat.u1(x404)))
			
 
				+	x407, x408 := bits.add_u64(x95, x403, u64(0x0))
			
 
				+	x409, _ := bits.add_u64(x96, x405, u64(fiat.u1(x408)))
			
 
				+	x411, x412 := bits.add_u64(x103, x407, u64(0x0))
			
 
				+	x413, _ := bits.add_u64(x104, x409, u64(fiat.u1(x412)))
			
 
				+	x415, x416 := bits.add_u64(x111, x411, u64(0x0))
			
 
				+	x417, _ := bits.add_u64(x112, x413, u64(fiat.u1(x416)))
			
 
				+	x419, x420 := bits.add_u64(x117, x415, u64(0x0))
			
 
				+	x421, _ := bits.add_u64(x118, x417, u64(fiat.u1(x420)))
			
 
				+	x423, x424 := bits.add_u64(x121, x419, u64(0x0))
			
 
				+	x425, _ := bits.add_u64(x122, x421, u64(fiat.u1(x424)))
			
 
				+	x427, x428 := bits.add_u64(x123, x423, u64(0x0))
			
 
				+	x429, _ := bits.add_u64(x124, x425, u64(fiat.u1(x428)))
			
 
				+	x431, x432 := bits.add_u64(x143, x427, u64(0x0))
			
 
				+	x433, _ := bits.add_u64(x144, x429, u64(fiat.u1(x432)))
			
 
				+	x435, x436 := bits.add_u64(x151, x431, u64(0x0))
			
 
				+	x437, _ := bits.add_u64(x152, x433, u64(fiat.u1(x436)))
			
 
				+	x439, x440 := bits.add_u64(x161, x435, u64(0x0))
			
 
				+	x441, _ := bits.add_u64(x162, x437, u64(fiat.u1(x440)))
			
 
				+	x443, x444 := bits.add_u64(x173, x439, u64(0x0))
			
 
				+	x445, _ := bits.add_u64(x174, x441, u64(fiat.u1(x444)))
			
 
				+	x447, x448 := bits.add_u64(x187, x443, u64(0x0))
			
 
				+	x449, _ := bits.add_u64(x188, x445, u64(fiat.u1(x448)))
			
 
				+	x451, x452 := bits.add_u64(x33, x1, u64(0x0))
			
 
				+	x453, _ := bits.add_u64(x34, x2, u64(fiat.u1(x452)))
			
 
				+	x455, x456 := bits.add_u64(x45, x451, u64(0x0))
			
 
				+	x457, _ := bits.add_u64(x46, x453, u64(fiat.u1(x456)))
			
 
				+	x459, x460 := bits.add_u64(x55, x455, u64(0x0))
			
 
				+	x461, _ := bits.add_u64(x56, x457, u64(fiat.u1(x460)))
			
 
				+	x463, x464 := bits.add_u64(x63, x459, u64(0x0))
			
 
				+	x465, _ := bits.add_u64(x64, x461, u64(fiat.u1(x464)))
			
 
				+	x467, x468 := bits.add_u64(x69, x463, u64(0x0))
			
 
				+	x469, _ := bits.add_u64(x70, x465, u64(fiat.u1(x468)))
			
 
				+	x471, x472 := bits.add_u64(x165, x467, u64(0x0))
			
 
				+	x473, _ := bits.add_u64(x166, x469, u64(fiat.u1(x472)))
			
 
				+	x475, x476 := bits.add_u64(x177, x471, u64(0x0))
			
 
				+	x477, _ := bits.add_u64(x178, x473, u64(fiat.u1(x476)))
			
 
				+	x479, x480 := bits.add_u64(x191, x475, u64(0x0))
			
 
				+	x481, _ := bits.add_u64(x192, x477, u64(fiat.u1(x480)))
			
 
				+	x483, x484 := bits.add_u64(x7, x3, u64(0x0))
			
 
				+	x485, _ := bits.add_u64(x8, x4, u64(fiat.u1(x484)))
			
 
				+	x487, x488 := bits.add_u64(x35, x483, u64(0x0))
			
 
				+	x489, _ := bits.add_u64(x36, x485, u64(fiat.u1(x488)))
			
 
				+	x491, x492 := bits.add_u64(x47, x487, u64(0x0))
			
 
				+	x493, _ := bits.add_u64(x48, x489, u64(fiat.u1(x492)))
			
 
				+	x495, x496 := bits.add_u64(x57, x491, u64(0x0))
			
 
				+	x497, _ := bits.add_u64(x58, x493, u64(fiat.u1(x496)))
			
 
				+	x499, x500 := bits.add_u64(x65, x495, u64(0x0))
			
 
				+	x501, _ := bits.add_u64(x66, x497, u64(fiat.u1(x500)))
			
 
				+	x503, x504 := bits.add_u64(x71, x499, u64(0x0))
			
 
				+	x505, _ := bits.add_u64(x72, x501, u64(fiat.u1(x504)))
			
 
				+	x507, x508 := bits.add_u64(x75, x503, u64(0x0))
			
 
				+	x509, _ := bits.add_u64(x76, x505, u64(fiat.u1(x508)))
			
 
				+	x511, x512 := bits.add_u64(x179, x507, u64(0x0))
			
 
				+	x513, _ := bits.add_u64(x180, x509, u64(fiat.u1(x512)))
			
 
				+	x515, x516 := bits.add_u64(x193, x511, u64(0x0))
			
 
				+	x517, _ := bits.add_u64(x194, x513, u64(fiat.u1(x516)))
			
 
				+	x519, x520 := bits.add_u64(x9, x5, u64(0x0))
			
 
				+	x521, _ := bits.add_u64(x10, x6, u64(fiat.u1(x520)))
			
 
				+	x523, x524 := bits.add_u64(x11, x519, u64(0x0))
			
 
				+	x525, _ := bits.add_u64(x12, x521, u64(fiat.u1(x524)))
			
 
				+	x527, x528 := bits.add_u64(x37, x523, u64(0x0))
			
 
				+	x529, _ := bits.add_u64(x38, x525, u64(fiat.u1(x528)))
			
 
				+	x531, x532 := bits.add_u64(x49, x527, u64(0x0))
			
 
				+	x533, _ := bits.add_u64(x50, x529, u64(fiat.u1(x532)))
			
 
				+	x535, x536 := bits.add_u64(x59, x531, u64(0x0))
			
 
				+	x537, _ := bits.add_u64(x60, x533, u64(fiat.u1(x536)))
			
 
				+	x539, x540 := bits.add_u64(x67, x535, u64(0x0))
			
 
				+	x541, _ := bits.add_u64(x68, x537, u64(fiat.u1(x540)))
			
 
				+	x543, x544 := bits.add_u64(x73, x539, u64(0x0))
			
 
				+	x545, _ := bits.add_u64(x74, x541, u64(fiat.u1(x544)))
			
 
				+	x547, x548 := bits.add_u64(x77, x543, u64(0x0))
			
 
				+	x549, _ := bits.add_u64(x78, x545, u64(fiat.u1(x548)))
			
 
				+	x551, x552 := bits.add_u64(x79, x547, u64(0x0))
			
 
				+	x553, _ := bits.add_u64(x80, x549, u64(fiat.u1(x552)))
			
 
				+	x555, x556 := bits.add_u64(x195, x551, u64(0x0))
			
 
				+	x557, _ := bits.add_u64(x196, x553, u64(fiat.u1(x556)))
			
 
				+	x559, x560 := bits.add_u64(x225, x447, u64(0x0))
			
 
				+	x561 := (u64(fiat.u1(x560)) + x449)
			
 
				+	x562 := ((x267 >> 56) | ((x269 << 8) & 0xffffffffffffffff))
			
 
				+	x563 := (x267 & 0xffffffffffffff)
			
 
				+	x564, x565 := bits.add_u64(x559, x562, u64(0x0))
			
 
				+	x566 := (u64(fiat.u1(x565)) + x561)
			
 
				+	x567 := ((x564 >> 56) | ((x566 << 8) & 0xffffffffffffffff))
			
 
				+	x568 := (x564 & 0xffffffffffffff)
			
 
				+	x569, x570 := bits.add_u64(x555, x562, u64(0x0))
			
 
				+	x571 := (u64(fiat.u1(x570)) + x557)
			
 
				+	x572, x573 := bits.add_u64(x567, x379, u64(0x0))
			
 
				+	x574 := (u64(fiat.u1(x573)) + x381)
			
 
				+	x575 := ((x569 >> 56) | ((x571 << 8) & 0xffffffffffffffff))
			
 
				+	x576 := (x569 & 0xffffffffffffff)
			
 
				+	x577, x578 := bits.add_u64(x575, x515, u64(0x0))
			
 
				+	x579 := (u64(fiat.u1(x578)) + x517)
			
 
				+	x580 := ((x572 >> 56) | ((x574 << 8) & 0xffffffffffffffff))
			
 
				+	x581 := (x572 & 0xffffffffffffff)
			
 
				+	x582, x583 := bits.add_u64(x580, x319, u64(0x0))
			
 
				+	x584 := (u64(fiat.u1(x583)) + x321)
			
 
				+	x585 := ((x577 >> 56) | ((x579 << 8) & 0xffffffffffffffff))
			
 
				+	x586 := (x577 & 0xffffffffffffff)
			
 
				+	x587, x588 := bits.add_u64(x585, x479, u64(0x0))
			
 
				+	x589 := (u64(fiat.u1(x588)) + x481)
			
 
				+	x590 := ((x582 >> 56) | ((x584 << 8) & 0xffffffffffffffff))
			
 
				+	x591 := (x582 & 0xffffffffffffff)
			
 
				+	x592 := (x590 + x563)
			
 
				+	x593 := ((x587 >> 56) | ((x589 << 8) & 0xffffffffffffffff))
			
 
				+	x594 := (x587 & 0xffffffffffffff)
			
 
				+	x595 := (x593 + x226)
			
 
				+	x596 := (x592 >> 56)
			
 
				+	x597 := (x592 & 0xffffffffffffff)
			
 
				+	x598 := (x595 >> 56)
			
 
				+	x599 := (x595 & 0xffffffffffffff)
			
 
				+	x600 := (x568 + x596)
			
 
				+	x601 := (x576 + x596)
			
 
				+	x602 := (x598 + x600)
			
 
				+	x603 := fiat.u1((x602 >> 56))
			
 
				+	x604 := (x602 & 0xffffffffffffff)
			
 
				+	x605 := (u64(x603) + x581)
			
 
				+	x606 := fiat.u1((x601 >> 56))
			
 
				+	x607 := (x601 & 0xffffffffffffff)
			
 
				+	x608 := (u64(x606) + x586)
			
 
				+	out1[0] = x607
			
 
				+	out1[1] = x608
			
 
				+	out1[2] = x594
			
 
				+	out1[3] = x599
			
 
				+	out1[4] = x604
			
 
				+	out1[5] = x605
			
 
				+	out1[6] = x591
			
 
				+	out1[7] = x597
			
 
				+}
			
 
				+
			
 
				+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := arg1[7]
			
 
				+	x2 := arg1[7]
			
 
				+	x3 := (x1 * 0x2)
			
 
				+	x4 := (x2 * 0x2)
			
 
				+	x5 := (arg1[7] * 0x2)
			
 
				+	x6 := arg1[6]
			
 
				+	x7 := arg1[6]
			
 
				+	x8 := (x6 * 0x2)
			
 
				+	x9 := (x7 * 0x2)
			
 
				+	x10 := (arg1[6] * 0x2)
			
 
				+	x11 := arg1[5]
			
 
				+	x12 := arg1[5]
			
 
				+	x13 := (x11 * 0x2)
			
 
				+	x14 := (x12 * 0x2)
			
 
				+	x15 := (arg1[5] * 0x2)
			
 
				+	x16 := arg1[4]
			
 
				+	x17 := arg1[4]
			
 
				+	x18 := (arg1[4] * 0x2)
			
 
				+	x19 := (arg1[3] * 0x2)
			
 
				+	x20 := (arg1[2] * 0x2)
			
 
				+	x21 := (arg1[1] * 0x2)
			
 
				+	x23, x22 := bits.mul_u64(arg1[7], x1)
			
 
				+	x25, x24 := bits.mul_u64(arg1[6], x3)
			
 
				+	x27, x26 := bits.mul_u64(arg1[6], x6)
			
 
				+	x29, x28 := bits.mul_u64(arg1[5], x3)
			
 
				+	x31, x30 := bits.mul_u64(arg1[7], x1)
			
 
				+	x33, x32 := bits.mul_u64(arg1[6], x3)
			
 
				+	x35, x34 := bits.mul_u64(arg1[6], x6)
			
 
				+	x37, x36 := bits.mul_u64(arg1[5], x3)
			
 
				+	x39, x38 := bits.mul_u64(arg1[7], x2)
			
 
				+	x41, x40 := bits.mul_u64(arg1[6], x4)
			
 
				+	x43, x42 := bits.mul_u64(arg1[6], x7)
			
 
				+	x45, x44 := bits.mul_u64(arg1[5], x4)
			
 
				+	x47, x46 := bits.mul_u64(arg1[5], x9)
			
 
				+	x49, x48 := bits.mul_u64(arg1[5], x8)
			
 
				+	x51, x50 := bits.mul_u64(arg1[5], x12)
			
 
				+	x53, x52 := bits.mul_u64(arg1[5], x11)
			
 
				+	x55, x54 := bits.mul_u64(arg1[4], x4)
			
 
				+	x57, x56 := bits.mul_u64(arg1[4], x3)
			
 
				+	x59, x58 := bits.mul_u64(arg1[4], x9)
			
 
				+	x61, x60 := bits.mul_u64(arg1[4], x8)
			
 
				+	x63, x62 := bits.mul_u64(arg1[4], x14)
			
 
				+	x65, x64 := bits.mul_u64(arg1[4], x13)
			
 
				+	x67, x66 := bits.mul_u64(arg1[4], x17)
			
 
				+	x69, x68 := bits.mul_u64(arg1[4], x16)
			
 
				+	x71, x70 := bits.mul_u64(arg1[3], x4)
			
 
				+	x73, x72 := bits.mul_u64(arg1[3], x3)
			
 
				+	x75, x74 := bits.mul_u64(arg1[3], x9)
			
 
				+	x77, x76 := bits.mul_u64(arg1[3], x8)
			
 
				+	x79, x78 := bits.mul_u64(arg1[3], x14)
			
 
				+	x81, x80 := bits.mul_u64(arg1[3], x13)
			
 
				+	x83, x82 := bits.mul_u64(arg1[3], x18)
			
 
				+	x85, x84 := bits.mul_u64(arg1[3], arg1[3])
			
 
				+	x87, x86 := bits.mul_u64(arg1[2], x4)
			
 
				+	x89, x88 := bits.mul_u64(arg1[2], x3)
			
 
				+	x91, x90 := bits.mul_u64(arg1[2], x9)
			
 
				+	x93, x92 := bits.mul_u64(arg1[2], x8)
			
 
				+	x95, x94 := bits.mul_u64(arg1[2], x15)
			
 
				+	x97, x96 := bits.mul_u64(arg1[2], x18)
			
 
				+	x99, x98 := bits.mul_u64(arg1[2], x19)
			
 
				+	x101, x100 := bits.mul_u64(arg1[2], arg1[2])
			
 
				+	x103, x102 := bits.mul_u64(arg1[1], x4)
			
 
				+	x105, x104 := bits.mul_u64(arg1[1], x3)
			
 
				+	x107, x106 := bits.mul_u64(arg1[1], x10)
			
 
				+	x109, x108 := bits.mul_u64(arg1[1], x15)
			
 
				+	x111, x110 := bits.mul_u64(arg1[1], x18)
			
 
				+	x113, x112 := bits.mul_u64(arg1[1], x19)
			
 
				+	x115, x114 := bits.mul_u64(arg1[1], x20)
			
 
				+	x117, x116 := bits.mul_u64(arg1[1], arg1[1])
			
 
				+	x119, x118 := bits.mul_u64(arg1[0], x5)
			
 
				+	x121, x120 := bits.mul_u64(arg1[0], x10)
			
 
				+	x123, x122 := bits.mul_u64(arg1[0], x15)
			
 
				+	x125, x124 := bits.mul_u64(arg1[0], x18)
			
 
				+	x127, x126 := bits.mul_u64(arg1[0], x19)
			
 
				+	x129, x128 := bits.mul_u64(arg1[0], x20)
			
 
				+	x131, x130 := bits.mul_u64(arg1[0], x21)
			
 
				+	x133, x132 := bits.mul_u64(arg1[0], arg1[0])
			
 
				+	x134, x135 := bits.add_u64(x54, x46, u64(0x0))
			
 
				+	x136, _ := bits.add_u64(x55, x47, u64(fiat.u1(x135)))
			
 
				+	x138, x139 := bits.add_u64(x114, x134, u64(0x0))
			
 
				+	x140, _ := bits.add_u64(x115, x136, u64(fiat.u1(x139)))
			
 
				+	x142, x143 := bits.add_u64(x126, x138, u64(0x0))
			
 
				+	x144, _ := bits.add_u64(x127, x140, u64(fiat.u1(x143)))
			
 
				+	x146 := ((x142 >> 56) | ((x144 << 8) & 0xffffffffffffffff))
			
 
				+	x147 := (x142 & 0xffffffffffffff)
			
 
				+	x148, x149 := bits.add_u64(x56, x48, u64(0x0))
			
 
				+	x150, _ := bits.add_u64(x57, x49, u64(fiat.u1(x149)))
			
 
				+	x152, x153 := bits.add_u64(x82, x148, u64(0x0))
			
 
				+	x154, _ := bits.add_u64(x83, x150, u64(fiat.u1(x153)))
			
 
				+	x156, x157 := bits.add_u64(x94, x152, u64(0x0))
			
 
				+	x158, _ := bits.add_u64(x95, x154, u64(fiat.u1(x157)))
			
 
				+	x160, x161 := bits.add_u64(x106, x156, u64(0x0))
			
 
				+	x162, _ := bits.add_u64(x107, x158, u64(fiat.u1(x161)))
			
 
				+	x164, x165 := bits.add_u64(x118, x160, u64(0x0))
			
 
				+	x166, _ := bits.add_u64(x119, x162, u64(fiat.u1(x165)))
			
 
				+	x168, x169 := bits.add_u64(x38, x30, u64(0x0))
			
 
				+	x170, _ := bits.add_u64(x39, x31, u64(fiat.u1(x169)))
			
 
				+	x172, x173 := bits.add_u64(x52, x168, u64(0x0))
			
 
				+	x174, _ := bits.add_u64(x53, x170, u64(fiat.u1(x173)))
			
 
				+	x176, x177 := bits.add_u64(x60, x172, u64(0x0))
			
 
				+	x178, _ := bits.add_u64(x61, x174, u64(fiat.u1(x177)))
			
 
				+	x180, x181 := bits.add_u64(x72, x176, u64(0x0))
			
 
				+	x182, _ := bits.add_u64(x73, x178, u64(fiat.u1(x181)))
			
 
				+	x184, x185 := bits.add_u64(x84, x180, u64(0x0))
			
 
				+	x186, _ := bits.add_u64(x85, x182, u64(fiat.u1(x185)))
			
 
				+	x188, x189 := bits.add_u64(x96, x184, u64(0x0))
			
 
				+	x190, _ := bits.add_u64(x97, x186, u64(fiat.u1(x189)))
			
 
				+	x192, x193 := bits.add_u64(x108, x188, u64(0x0))
			
 
				+	x194, _ := bits.add_u64(x109, x190, u64(fiat.u1(x193)))
			
 
				+	x196, x197 := bits.add_u64(x120, x192, u64(0x0))
			
 
				+	x198, _ := bits.add_u64(x121, x194, u64(fiat.u1(x197)))
			
 
				+	x200, x201 := bits.add_u64(x40, x32, u64(0x0))
			
 
				+	x202, _ := bits.add_u64(x41, x33, u64(fiat.u1(x201)))
			
 
				+	x204, x205 := bits.add_u64(x64, x200, u64(0x0))
			
 
				+	x206, _ := bits.add_u64(x65, x202, u64(fiat.u1(x205)))
			
 
				+	x208, x209 := bits.add_u64(x76, x204, u64(0x0))
			
 
				+	x210, _ := bits.add_u64(x77, x206, u64(fiat.u1(x209)))
			
 
				+	x212, x213 := bits.add_u64(x88, x208, u64(0x0))
			
 
				+	x214, _ := bits.add_u64(x89, x210, u64(fiat.u1(x213)))
			
 
				+	x216, x217 := bits.add_u64(x98, x212, u64(0x0))
			
 
				+	x218, _ := bits.add_u64(x99, x214, u64(fiat.u1(x217)))
			
 
				+	x220, x221 := bits.add_u64(x110, x216, u64(0x0))
			
 
				+	x222, _ := bits.add_u64(x111, x218, u64(fiat.u1(x221)))
			
 
				+	x224, x225 := bits.add_u64(x122, x220, u64(0x0))
			
 
				+	x226, _ := bits.add_u64(x123, x222, u64(fiat.u1(x225)))
			
 
				+	x228, x229 := bits.add_u64(x36, x34, u64(0x0))
			
 
				+	x230, _ := bits.add_u64(x37, x35, u64(fiat.u1(x229)))
			
 
				+	x232, x233 := bits.add_u64(x42, x228, u64(0x0))
			
 
				+	x234, _ := bits.add_u64(x43, x230, u64(fiat.u1(x233)))
			
 
				+	x236, x237 := bits.add_u64(x44, x232, u64(0x0))
			
 
				+	x238, _ := bits.add_u64(x45, x234, u64(fiat.u1(x237)))
			
 
				+	x240, x241 := bits.add_u64(x68, x236, u64(0x0))
			
 
				+	x242, _ := bits.add_u64(x69, x238, u64(fiat.u1(x241)))
			
 
				+	x244, x245 := bits.add_u64(x80, x240, u64(0x0))
			
 
				+	x246, _ := bits.add_u64(x81, x242, u64(fiat.u1(x245)))
			
 
				+	x248, x249 := bits.add_u64(x92, x244, u64(0x0))
			
 
				+	x250, _ := bits.add_u64(x93, x246, u64(fiat.u1(x249)))
			
 
				+	x252, x253 := bits.add_u64(x100, x248, u64(0x0))
			
 
				+	x254, _ := bits.add_u64(x101, x250, u64(fiat.u1(x253)))
			
 
				+	x256, x257 := bits.add_u64(x104, x252, u64(0x0))
			
 
				+	x258, _ := bits.add_u64(x105, x254, u64(fiat.u1(x257)))
			
 
				+	x260, x261 := bits.add_u64(x112, x256, u64(0x0))
			
 
				+	x262, _ := bits.add_u64(x113, x258, u64(fiat.u1(x261)))
			
 
				+	x264, x265 := bits.add_u64(x124, x260, u64(0x0))
			
 
				+	x266, _ := bits.add_u64(x125, x262, u64(fiat.u1(x265)))
			
 
				+	x268, x269 := bits.add_u64(x50, x22, u64(0x0))
			
 
				+	x270, _ := bits.add_u64(x51, x23, u64(fiat.u1(x269)))
			
 
				+	x272, x273 := bits.add_u64(x58, x268, u64(0x0))
			
 
				+	x274, _ := bits.add_u64(x59, x270, u64(fiat.u1(x273)))
			
 
				+	x276, x277 := bits.add_u64(x70, x272, u64(0x0))
			
 
				+	x278, _ := bits.add_u64(x71, x274, u64(fiat.u1(x277)))
			
 
				+	x280, x281 := bits.add_u64(x116, x276, u64(0x0))
			
 
				+	x282, _ := bits.add_u64(x117, x278, u64(fiat.u1(x281)))
			
 
				+	x284, x285 := bits.add_u64(x128, x280, u64(0x0))
			
 
				+	x286, _ := bits.add_u64(x129, x282, u64(fiat.u1(x285)))
			
 
				+	x288, x289 := bits.add_u64(x62, x24, u64(0x0))
			
 
				+	x290, _ := bits.add_u64(x63, x25, u64(fiat.u1(x289)))
			
 
				+	x292, x293 := bits.add_u64(x74, x288, u64(0x0))
			
 
				+	x294, _ := bits.add_u64(x75, x290, u64(fiat.u1(x293)))
			
 
				+	x296, x297 := bits.add_u64(x86, x292, u64(0x0))
			
 
				+	x298, _ := bits.add_u64(x87, x294, u64(fiat.u1(x297)))
			
 
				+	x300, x301 := bits.add_u64(x130, x296, u64(0x0))
			
 
				+	x302, _ := bits.add_u64(x131, x298, u64(fiat.u1(x301)))
			
 
				+	x304, x305 := bits.add_u64(x28, x26, u64(0x0))
			
 
				+	x306, _ := bits.add_u64(x29, x27, u64(fiat.u1(x305)))
			
 
				+	x308, x309 := bits.add_u64(x66, x304, u64(0x0))
			
 
				+	x310, _ := bits.add_u64(x67, x306, u64(fiat.u1(x309)))
			
 
				+	x312, x313 := bits.add_u64(x78, x308, u64(0x0))
			
 
				+	x314, _ := bits.add_u64(x79, x310, u64(fiat.u1(x313)))
			
 
				+	x316, x317 := bits.add_u64(x90, x312, u64(0x0))
			
 
				+	x318, _ := bits.add_u64(x91, x314, u64(fiat.u1(x317)))
			
 
				+	x320, x321 := bits.add_u64(x102, x316, u64(0x0))
			
 
				+	x322, _ := bits.add_u64(x103, x318, u64(fiat.u1(x321)))
			
 
				+	x324, x325 := bits.add_u64(x132, x320, u64(0x0))
			
 
				+	x326, _ := bits.add_u64(x133, x322, u64(fiat.u1(x325)))
			
 
				+	x328, x329 := bits.add_u64(x146, x264, u64(0x0))
			
 
				+	x330 := (u64(fiat.u1(x329)) + x266)
			
 
				+	x331 := ((x164 >> 56) | ((x166 << 8) & 0xffffffffffffffff))
			
 
				+	x332 := (x164 & 0xffffffffffffff)
			
 
				+	x333, x334 := bits.add_u64(x328, x331, u64(0x0))
			
 
				+	x335 := (u64(fiat.u1(x334)) + x330)
			
 
				+	x336 := ((x333 >> 56) | ((x335 << 8) & 0xffffffffffffffff))
			
 
				+	x337 := (x333 & 0xffffffffffffff)
			
 
				+	x338, x339 := bits.add_u64(x324, x331, u64(0x0))
			
 
				+	x340 := (u64(fiat.u1(x339)) + x326)
			
 
				+	x341, x342 := bits.add_u64(x336, x224, u64(0x0))
			
 
				+	x343 := (u64(fiat.u1(x342)) + x226)
			
 
				+	x344 := ((x338 >> 56) | ((x340 << 8) & 0xffffffffffffffff))
			
 
				+	x345 := (x338 & 0xffffffffffffff)
			
 
				+	x346, x347 := bits.add_u64(x344, x300, u64(0x0))
			
 
				+	x348 := (u64(fiat.u1(x347)) + x302)
			
 
				+	x349 := ((x341 >> 56) | ((x343 << 8) & 0xffffffffffffffff))
			
 
				+	x350 := (x341 & 0xffffffffffffff)
			
 
				+	x351, x352 := bits.add_u64(x349, x196, u64(0x0))
			
 
				+	x353 := (u64(fiat.u1(x352)) + x198)
			
 
				+	x354 := ((x346 >> 56) | ((x348 << 8) & 0xffffffffffffffff))
			
 
				+	x355 := (x346 & 0xffffffffffffff)
			
 
				+	x356, x357 := bits.add_u64(x354, x284, u64(0x0))
			
 
				+	x358 := (u64(fiat.u1(x357)) + x286)
			
 
				+	x359 := ((x351 >> 56) | ((x353 << 8) & 0xffffffffffffffff))
			
 
				+	x360 := (x351 & 0xffffffffffffff)
			
 
				+	x361 := (x359 + x332)
			
 
				+	x362 := ((x356 >> 56) | ((x358 << 8) & 0xffffffffffffffff))
			
 
				+	x363 := (x356 & 0xffffffffffffff)
			
 
				+	x364 := (x362 + x147)
			
 
				+	x365 := (x361 >> 56)
			
 
				+	x366 := (x361 & 0xffffffffffffff)
			
 
				+	x367 := (x364 >> 56)
			
 
				+	x368 := (x364 & 0xffffffffffffff)
			
 
				+	x369 := (x337 + x365)
			
 
				+	x370 := (x345 + x365)
			
 
				+	x371 := (x367 + x369)
			
 
				+	x372 := fiat.u1((x371 >> 56))
			
 
				+	x373 := (x371 & 0xffffffffffffff)
			
 
				+	x374 := (u64(x372) + x350)
			
 
				+	x375 := fiat.u1((x370 >> 56))
			
 
				+	x376 := (x370 & 0xffffffffffffff)
			
 
				+	x377 := (u64(x375) + x355)
			
 
				+	out1[0] = x376
			
 
				+	out1[1] = x377
			
 
				+	out1[2] = x363
			
 
				+	out1[3] = x368
			
 
				+	out1[4] = x373
			
 
				+	out1[5] = x374
			
 
				+	out1[6] = x360
			
 
				+	out1[7] = x366
			
 
				+}
			
 
				+
			
 
				+fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
			
 
				+	x1 := arg1[3]
			
 
				+	x2 := arg1[7]
			
 
				+	x3 := (x2 >> 56)
			
 
				+	x4 := (((x1 >> 56) + arg1[4]) + x3)
			
 
				+	x5 := (arg1[0] + x3)
			
 
				+	x6 := ((x4 >> 56) + arg1[5])
			
 
				+	x7 := ((x5 >> 56) + arg1[1])
			
 
				+	x8 := ((x6 >> 56) + arg1[6])
			
 
				+	x9 := ((x7 >> 56) + arg1[2])
			
 
				+	x10 := ((x8 >> 56) + (x2 & 0xffffffffffffff))
			
 
				+	x11 := ((x9 >> 56) + (x1 & 0xffffffffffffff))
			
 
				+	x12 := fiat.u1((x10 >> 56))
			
 
				+	x13 := ((x5 & 0xffffffffffffff) + u64(x12))
			
 
				+	x14 := (u64(fiat.u1((x11 >> 56))) + ((x4 & 0xffffffffffffff) + u64(x12)))
			
 
				+	x15 := (x13 & 0xffffffffffffff)
			
 
				+	x16 := (u64(fiat.u1((x13 >> 56))) + (x7 & 0xffffffffffffff))
			
 
				+	x17 := (x9 & 0xffffffffffffff)
			
 
				+	x18 := (x11 & 0xffffffffffffff)
			
 
				+	x19 := (x14 & 0xffffffffffffff)
			
 
				+	x20 := (u64(fiat.u1((x14 >> 56))) + (x6 & 0xffffffffffffff))
			
 
				+	x21 := (x8 & 0xffffffffffffff)
			
 
				+	x22 := (x10 & 0xffffffffffffff)
			
 
				+	out1[0] = x15
			
 
				+	out1[1] = x16
			
 
				+	out1[2] = x17
			
 
				+	out1[3] = x18
			
 
				+	out1[4] = x19
			
 
				+	out1[5] = x20
			
 
				+	out1[6] = x21
			
 
				+	out1[7] = x22
			
 
				+}
			
 
				+
			
 
				+fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := (arg1[0] + arg2[0])
			
 
				+	x2 := (arg1[1] + arg2[1])
			
 
				+	x3 := (arg1[2] + arg2[2])
			
 
				+	x4 := (arg1[3] + arg2[3])
			
 
				+	x5 := (arg1[4] + arg2[4])
			
 
				+	x6 := (arg1[5] + arg2[5])
			
 
				+	x7 := (arg1[6] + arg2[6])
			
 
				+	x8 := (arg1[7] + arg2[7])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
 
				+
			
 
				+fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
			
 
				+	x1 := ((0x1fffffffffffffe + arg1[0]) - arg2[0])
			
 
				+	x2 := ((0x1fffffffffffffe + arg1[1]) - arg2[1])
			
 
				+	x3 := ((0x1fffffffffffffe + arg1[2]) - arg2[2])
			
 
				+	x4 := ((0x1fffffffffffffe + arg1[3]) - arg2[3])
			
 
				+	x5 := ((0x1fffffffffffffc + arg1[4]) - arg2[4])
			
 
				+	x6 := ((0x1fffffffffffffe + arg1[5]) - arg2[5])
			
 
				+	x7 := ((0x1fffffffffffffe + arg1[6]) - arg2[6])
			
 
				+	x8 := ((0x1fffffffffffffe + arg1[7]) - arg2[7])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
 
				+
			
 
				+fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := (0x1fffffffffffffe - arg1[0])
			
 
				+	x2 := (0x1fffffffffffffe - arg1[1])
			
 
				+	x3 := (0x1fffffffffffffe - arg1[2])
			
 
				+	x4 := (0x1fffffffffffffe - arg1[3])
			
 
				+	x5 := (0x1fffffffffffffc - arg1[4])
			
 
				+	x6 := (0x1fffffffffffffe - arg1[5])
			
 
				+	x7 := (0x1fffffffffffffe - arg1[6])
			
 
				+	x8 := (0x1fffffffffffffe - arg1[7])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
 
				+
			
 
				+@(optimization_mode = "none")
			
 
				+fe_cond_assign :: #force_no_inline proc "contextless" (
			
 
				+	out1, arg1: ^Tight_Field_Element,
			
 
				+	arg2: int,
			
 
				+) {
			
 
				+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
			
 
				+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
			
 
				+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
			
 
				+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
			
 
				+	x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
			
 
				+	x6 := fiat.cmovznz_u64(fiat.u1(arg2), out1[5], arg1[5])
			
 
				+	x7 := fiat.cmovznz_u64(fiat.u1(arg2), out1[6], arg1[6])
			
 
				+	x8 := fiat.cmovznz_u64(fiat.u1(arg2), out1[7], arg1[7])
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
 
				+
			
 
				+fe_to_bytes :: proc "contextless" (out1: ^[56]byte, arg1: ^Tight_Field_Element) {
			
 
				+	x1, x2 := _subborrowx_u56(0x0, arg1[0], 0xffffffffffffff)
			
 
				+	x3, x4 := _subborrowx_u56(x2, arg1[1], 0xffffffffffffff)
			
 
				+	x5, x6 := _subborrowx_u56(x4, arg1[2], 0xffffffffffffff)
			
 
				+	x7, x8 := _subborrowx_u56(x6, arg1[3], 0xffffffffffffff)
			
 
				+	x9, x10 := _subborrowx_u56(x8, arg1[4], 0xfffffffffffffe)
			
 
				+	x11, x12 := _subborrowx_u56(x10, arg1[5], 0xffffffffffffff)
			
 
				+	x13, x14 := _subborrowx_u56(x12, arg1[6], 0xffffffffffffff)
			
 
				+	x15, x16 := _subborrowx_u56(x14, arg1[7], 0xffffffffffffff)
			
 
				+	x17 := fiat.cmovznz_u64(x16, u64(0x0), 0xffffffffffffffff)
			
 
				+	x18, x19 := _addcarryx_u56(0x0, x1, (x17 & 0xffffffffffffff))
			
 
				+	x20, x21 := _addcarryx_u56(x19, x3, (x17 & 0xffffffffffffff))
			
 
				+	x22, x23 := _addcarryx_u56(x21, x5, (x17 & 0xffffffffffffff))
			
 
				+	x24, x25 := _addcarryx_u56(x23, x7, (x17 & 0xffffffffffffff))
			
 
				+	x26, x27 := _addcarryx_u56(x25, x9, (x17 & 0xfffffffffffffe))
			
 
				+	x28, x29 := _addcarryx_u56(x27, x11, (x17 & 0xffffffffffffff))
			
 
				+	x30, x31 := _addcarryx_u56(x29, x13, (x17 & 0xffffffffffffff))
			
 
				+	x32, _ := _addcarryx_u56(x31, x15, (x17 & 0xffffffffffffff))
			
 
				+	x34 := (u8(x18) & 0xff)
			
 
				+	x35 := (x18 >> 8)
			
 
				+	x36 := (u8(x35) & 0xff)
			
 
				+	x37 := (x35 >> 8)
			
 
				+	x38 := (u8(x37) & 0xff)
			
 
				+	x39 := (x37 >> 8)
			
 
				+	x40 := (u8(x39) & 0xff)
			
 
				+	x41 := (x39 >> 8)
			
 
				+	x42 := (u8(x41) & 0xff)
			
 
				+	x43 := (x41 >> 8)
			
 
				+	x44 := (u8(x43) & 0xff)
			
 
				+	x45 := u8((x43 >> 8))
			
 
				+	x46 := (u8(x20) & 0xff)
			
 
				+	x47 := (x20 >> 8)
			
 
				+	x48 := (u8(x47) & 0xff)
			
 
				+	x49 := (x47 >> 8)
			
 
				+	x50 := (u8(x49) & 0xff)
			
 
				+	x51 := (x49 >> 8)
			
 
				+	x52 := (u8(x51) & 0xff)
			
 
				+	x53 := (x51 >> 8)
			
 
				+	x54 := (u8(x53) & 0xff)
			
 
				+	x55 := (x53 >> 8)
			
 
				+	x56 := (u8(x55) & 0xff)
			
 
				+	x57 := u8((x55 >> 8))
			
 
				+	x58 := (u8(x22) & 0xff)
			
 
				+	x59 := (x22 >> 8)
			
 
				+	x60 := (u8(x59) & 0xff)
			
 
				+	x61 := (x59 >> 8)
			
 
				+	x62 := (u8(x61) & 0xff)
			
 
				+	x63 := (x61 >> 8)
			
 
				+	x64 := (u8(x63) & 0xff)
			
 
				+	x65 := (x63 >> 8)
			
 
				+	x66 := (u8(x65) & 0xff)
			
 
				+	x67 := (x65 >> 8)
			
 
				+	x68 := (u8(x67) & 0xff)
			
 
				+	x69 := u8((x67 >> 8))
			
 
				+	x70 := (u8(x24) & 0xff)
			
 
				+	x71 := (x24 >> 8)
			
 
				+	x72 := (u8(x71) & 0xff)
			
 
				+	x73 := (x71 >> 8)
			
 
				+	x74 := (u8(x73) & 0xff)
			
 
				+	x75 := (x73 >> 8)
			
 
				+	x76 := (u8(x75) & 0xff)
			
 
				+	x77 := (x75 >> 8)
			
 
				+	x78 := (u8(x77) & 0xff)
			
 
				+	x79 := (x77 >> 8)
			
 
				+	x80 := (u8(x79) & 0xff)
			
 
				+	x81 := u8((x79 >> 8))
			
 
				+	x82 := (u8(x26) & 0xff)
			
 
				+	x83 := (x26 >> 8)
			
 
				+	x84 := (u8(x83) & 0xff)
			
 
				+	x85 := (x83 >> 8)
			
 
				+	x86 := (u8(x85) & 0xff)
			
 
				+	x87 := (x85 >> 8)
			
 
				+	x88 := (u8(x87) & 0xff)
			
 
				+	x89 := (x87 >> 8)
			
 
				+	x90 := (u8(x89) & 0xff)
			
 
				+	x91 := (x89 >> 8)
			
 
				+	x92 := (u8(x91) & 0xff)
			
 
				+	x93 := u8((x91 >> 8))
			
 
				+	x94 := (u8(x28) & 0xff)
			
 
				+	x95 := (x28 >> 8)
			
 
				+	x96 := (u8(x95) & 0xff)
			
 
				+	x97 := (x95 >> 8)
			
 
				+	x98 := (u8(x97) & 0xff)
			
 
				+	x99 := (x97 >> 8)
			
 
				+	x100 := (u8(x99) & 0xff)
			
 
				+	x101 := (x99 >> 8)
			
 
				+	x102 := (u8(x101) & 0xff)
			
 
				+	x103 := (x101 >> 8)
			
 
				+	x104 := (u8(x103) & 0xff)
			
 
				+	x105 := u8((x103 >> 8))
			
 
				+	x106 := (u8(x30) & 0xff)
			
 
				+	x107 := (x30 >> 8)
			
 
				+	x108 := (u8(x107) & 0xff)
			
 
				+	x109 := (x107 >> 8)
			
 
				+	x110 := (u8(x109) & 0xff)
			
 
				+	x111 := (x109 >> 8)
			
 
				+	x112 := (u8(x111) & 0xff)
			
 
				+	x113 := (x111 >> 8)
			
 
				+	x114 := (u8(x113) & 0xff)
			
 
				+	x115 := (x113 >> 8)
			
 
				+	x116 := (u8(x115) & 0xff)
			
 
				+	x117 := u8((x115 >> 8))
			
 
				+	x118 := (u8(x32) & 0xff)
			
 
				+	x119 := (x32 >> 8)
			
 
				+	x120 := (u8(x119) & 0xff)
			
 
				+	x121 := (x119 >> 8)
			
 
				+	x122 := (u8(x121) & 0xff)
			
 
				+	x123 := (x121 >> 8)
			
 
				+	x124 := (u8(x123) & 0xff)
			
 
				+	x125 := (x123 >> 8)
			
 
				+	x126 := (u8(x125) & 0xff)
			
 
				+	x127 := (x125 >> 8)
			
 
				+	x128 := (u8(x127) & 0xff)
			
 
				+	x129 := u8((x127 >> 8))
			
 
				+	out1[0] = x34
			
 
				+	out1[1] = x36
			
 
				+	out1[2] = x38
			
 
				+	out1[3] = x40
			
 
				+	out1[4] = x42
			
 
				+	out1[5] = x44
			
 
				+	out1[6] = x45
			
 
				+	out1[7] = x46
			
 
				+	out1[8] = x48
			
 
				+	out1[9] = x50
			
 
				+	out1[10] = x52
			
 
				+	out1[11] = x54
			
 
				+	out1[12] = x56
			
 
				+	out1[13] = x57
			
 
				+	out1[14] = x58
			
 
				+	out1[15] = x60
			
 
				+	out1[16] = x62
			
 
				+	out1[17] = x64
			
 
				+	out1[18] = x66
			
 
				+	out1[19] = x68
			
 
				+	out1[20] = x69
			
 
				+	out1[21] = x70
			
 
				+	out1[22] = x72
			
 
				+	out1[23] = x74
			
 
				+	out1[24] = x76
			
 
				+	out1[25] = x78
			
 
				+	out1[26] = x80
			
 
				+	out1[27] = x81
			
 
				+	out1[28] = x82
			
 
				+	out1[29] = x84
			
 
				+	out1[30] = x86
			
 
				+	out1[31] = x88
			
 
				+	out1[32] = x90
			
 
				+	out1[33] = x92
			
 
				+	out1[34] = x93
			
 
				+	out1[35] = x94
			
 
				+	out1[36] = x96
			
 
				+	out1[37] = x98
			
 
				+	out1[38] = x100
			
 
				+	out1[39] = x102
			
 
				+	out1[40] = x104
			
 
				+	out1[41] = x105
			
 
				+	out1[42] = x106
			
 
				+	out1[43] = x108
			
 
				+	out1[44] = x110
			
 
				+	out1[45] = x112
			
 
				+	out1[46] = x114
			
 
				+	out1[47] = x116
			
 
				+	out1[48] = x117
			
 
				+	out1[49] = x118
			
 
				+	out1[50] = x120
			
 
				+	out1[51] = x122
			
 
				+	out1[52] = x124
			
 
				+	out1[53] = x126
			
 
				+	out1[54] = x128
			
 
				+	out1[55] = x129
			
 
				+}
			
 
				+
			
 
				+fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[56]byte) {
			
 
				+	x1 := (u64(arg1[55]) << 48)
			
 
				+	x2 := (u64(arg1[54]) << 40)
			
 
				+	x3 := (u64(arg1[53]) << 32)
			
 
				+	x4 := (u64(arg1[52]) << 24)
			
 
				+	x5 := (u64(arg1[51]) << 16)
			
 
				+	x6 := (u64(arg1[50]) << 8)
			
 
				+	x7 := arg1[49]
			
 
				+	x8 := (u64(arg1[48]) << 48)
			
 
				+	x9 := (u64(arg1[47]) << 40)
			
 
				+	x10 := (u64(arg1[46]) << 32)
			
 
				+	x11 := (u64(arg1[45]) << 24)
			
 
				+	x12 := (u64(arg1[44]) << 16)
			
 
				+	x13 := (u64(arg1[43]) << 8)
			
 
				+	x14 := arg1[42]
			
 
				+	x15 := (u64(arg1[41]) << 48)
			
 
				+	x16 := (u64(arg1[40]) << 40)
			
 
				+	x17 := (u64(arg1[39]) << 32)
			
 
				+	x18 := (u64(arg1[38]) << 24)
			
 
				+	x19 := (u64(arg1[37]) << 16)
			
 
				+	x20 := (u64(arg1[36]) << 8)
			
 
				+	x21 := arg1[35]
			
 
				+	x22 := (u64(arg1[34]) << 48)
			
 
				+	x23 := (u64(arg1[33]) << 40)
			
 
				+	x24 := (u64(arg1[32]) << 32)
			
 
				+	x25 := (u64(arg1[31]) << 24)
			
 
				+	x26 := (u64(arg1[30]) << 16)
			
 
				+	x27 := (u64(arg1[29]) << 8)
			
 
				+	x28 := arg1[28]
			
 
				+	x29 := (u64(arg1[27]) << 48)
			
 
				+	x30 := (u64(arg1[26]) << 40)
			
 
				+	x31 := (u64(arg1[25]) << 32)
			
 
				+	x32 := (u64(arg1[24]) << 24)
			
 
				+	x33 := (u64(arg1[23]) << 16)
			
 
				+	x34 := (u64(arg1[22]) << 8)
			
 
				+	x35 := arg1[21]
			
 
				+	x36 := (u64(arg1[20]) << 48)
			
 
				+	x37 := (u64(arg1[19]) << 40)
			
 
				+	x38 := (u64(arg1[18]) << 32)
			
 
				+	x39 := (u64(arg1[17]) << 24)
			
 
				+	x40 := (u64(arg1[16]) << 16)
			
 
				+	x41 := (u64(arg1[15]) << 8)
			
 
				+	x42 := arg1[14]
			
 
				+	x43 := (u64(arg1[13]) << 48)
			
 
				+	x44 := (u64(arg1[12]) << 40)
			
 
				+	x45 := (u64(arg1[11]) << 32)
			
 
				+	x46 := (u64(arg1[10]) << 24)
			
 
				+	x47 := (u64(arg1[9]) << 16)
			
 
				+	x48 := (u64(arg1[8]) << 8)
			
 
				+	x49 := arg1[7]
			
 
				+	x50 := (u64(arg1[6]) << 48)
			
 
				+	x51 := (u64(arg1[5]) << 40)
			
 
				+	x52 := (u64(arg1[4]) << 32)
			
 
				+	x53 := (u64(arg1[3]) << 24)
			
 
				+	x54 := (u64(arg1[2]) << 16)
			
 
				+	x55 := (u64(arg1[1]) << 8)
			
 
				+	x56 := arg1[0]
			
 
				+	x57 := (x55 + u64(x56))
			
 
				+	x58 := (x54 + x57)
			
 
				+	x59 := (x53 + x58)
			
 
				+	x60 := (x52 + x59)
			
 
				+	x61 := (x51 + x60)
			
 
				+	x62 := (x50 + x61)
			
 
				+	x63 := (x48 + u64(x49))
			
 
				+	x64 := (x47 + x63)
			
 
				+	x65 := (x46 + x64)
			
 
				+	x66 := (x45 + x65)
			
 
				+	x67 := (x44 + x66)
			
 
				+	x68 := (x43 + x67)
			
 
				+	x69 := (x41 + u64(x42))
			
 
				+	x70 := (x40 + x69)
			
 
				+	x71 := (x39 + x70)
			
 
				+	x72 := (x38 + x71)
			
 
				+	x73 := (x37 + x72)
			
 
				+	x74 := (x36 + x73)
			
 
				+	x75 := (x34 + u64(x35))
			
 
				+	x76 := (x33 + x75)
			
 
				+	x77 := (x32 + x76)
			
 
				+	x78 := (x31 + x77)
			
 
				+	x79 := (x30 + x78)
			
 
				+	x80 := (x29 + x79)
			
 
				+	x81 := (x27 + u64(x28))
			
 
				+	x82 := (x26 + x81)
			
 
				+	x83 := (x25 + x82)
			
 
				+	x84 := (x24 + x83)
			
 
				+	x85 := (x23 + x84)
			
 
				+	x86 := (x22 + x85)
			
 
				+	x87 := (x20 + u64(x21))
			
 
				+	x88 := (x19 + x87)
			
 
				+	x89 := (x18 + x88)
			
 
				+	x90 := (x17 + x89)
			
 
				+	x91 := (x16 + x90)
			
 
				+	x92 := (x15 + x91)
			
 
				+	x93 := (x13 + u64(x14))
			
 
				+	x94 := (x12 + x93)
			
 
				+	x95 := (x11 + x94)
			
 
				+	x96 := (x10 + x95)
			
 
				+	x97 := (x9 + x96)
			
 
				+	x98 := (x8 + x97)
			
 
				+	x99 := (x6 + u64(x7))
			
 
				+	x100 := (x5 + x99)
			
 
				+	x101 := (x4 + x100)
			
 
				+	x102 := (x3 + x101)
			
 
				+	x103 := (x2 + x102)
			
 
				+	x104 := (x1 + x103)
			
 
				+	out1[0] = x62
			
 
				+	out1[1] = x68
			
 
				+	out1[2] = x74
			
 
				+	out1[3] = x80
			
 
				+	out1[4] = x86
			
 
				+	out1[5] = x92
			
 
				+	out1[6] = x98
			
 
				+	out1[7] = x104
			
 
				+}
			
 
				+
			
 
				+fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
			
 
				+	x1 := arg1[0]
			
 
				+	x2 := arg1[1]
			
 
				+	x3 := arg1[2]
			
 
				+	x4 := arg1[3]
			
 
				+	x5 := arg1[4]
			
 
				+	x6 := arg1[5]
			
 
				+	x7 := arg1[6]
			
 
				+	x8 := arg1[7]
			
 
				+	out1[0] = x1
			
 
				+	out1[1] = x2
			
 
				+	out1[2] = x3
			
 
				+	out1[3] = x4
			
 
				+	out1[4] = x5
			
 
				+	out1[5] = x6
			
 
				+	out1[6] = x7
			
 
				+	out1[7] = x8
			
 
				+}
			
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -1,6 +1,5 @@
 
				 package field_poly1305
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:encoding/endian"
			
 
				 import "core:mem"
			
 
				 
			
@@ -29,9 +28,7 @@ fe_from_bytes :: #force_inline proc "contextless" (
 
				 	// makes implementing the actual MAC block processing considerably
			
 
				 	// neater.
			
 
				 
			
 
				-	if len(arg1) != 16 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(arg1) == 16, "poly1305: invalid field element size")
			
 
				 
			
 
				 	// While it may be unwise to do deserialization here on our
			
 
				 	// own when fiat-crypto provides equivalent functionality,
			
--- a/core/crypto/_fiat/field_scalar25519/field.odin
+++ b/core/crypto/_fiat/field_scalar25519/field.odin
@@ -1,18 +1,17 @@
 
				 package field_scalar25519
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				 import "core:encoding/endian"
			
 
				 import "core:math/bits"
			
 
				 import "core:mem"
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 _TWO_168 := Montgomery_Domain_Field_Element {
			
 
				 	0x5b8ab432eac74798,
			
 
				 	0x38afddd6de59d5d7,
			
 
				 	0xa2c131b399411b7c,
			
 
				 	0x6329a7ed9ce5a30,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 _TWO_336 := Montgomery_Domain_Field_Element {
			
 
				 	0xbd3d108e2b35ecc5,
			
 
				 	0x5c3a3718bdf9c90b,
			
@@ -95,9 +94,8 @@ fe_from_bytes_wide :: proc "contextless" (
 
				 @(private)
			
 
				 _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
			
 
				 	// INVARIANT: len(arg1) < 32.
			
 
				-	if len(arg1) >= 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(arg1) < 32, "edwards25519: oversized short scalar")
			
 
				+
			
 
				 	tmp: [32]byte
			
 
				 	copy(tmp[:], arg1)
			
 
				 
			
@@ -106,9 +104,7 @@ _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Eleme
 
				 }
			
 
				 
			
 
				 fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
			
 
				-	if len(out1) != 32 {
			
 
				-		intrinsics.trap()
			
 
				-	}
			
 
				+	ensure_contextless(len(out1) == 32, "edwards25519: oversized scalar output buffer")
			
 
				 
			
 
				 	tmp: Non_Montgomery_Domain_Field_Element
			
 
				 	fe_from_montgomery(&tmp, arg1)
			
--- a/core/crypto/_sha3/sha3.odin
+++ b/core/crypto/_sha3/sha3.odin
@@ -44,7 +44,7 @@ Context :: struct {
 
				 	is_finalized:   bool, // For SHAKE (unlimited squeeze is allowed)
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 keccakf_rndc := [?]u64 {
			
 
				 	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
			
 
				 	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
			
@@ -56,13 +56,13 @@ keccakf_rndc := [?]u64 {
 
				 	0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 keccakf_rotc := [?]int {
			
 
				 	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
			
 
				 	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 keccakf_piln := [?]i32 {
			
 
				 	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
			
 
				 	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
			
@@ -122,7 +122,7 @@ keccakf :: proc "contextless" (st: ^[25]u64) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-init :: proc(ctx: ^Context) {
			
 
				+init :: proc "contextless" (ctx: ^Context) {
			
 
				 	for i := 0; i < 25; i += 1 {
			
 
				 		ctx.st.q[i] = 0
			
 
				 	}
			
@@ -133,9 +133,9 @@ init :: proc(ctx: ^Context) {
 
				 	ctx.is_finalized = false
			
 
				 }
			
 
				 
			
 
				-update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-	assert(!ctx.is_finalized)
			
 
				+update :: proc "contextless" (ctx: ^Context, data: []byte) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				+	ensure_contextless(!ctx.is_finalized)
			
 
				 
			
 
				 	j := ctx.pt
			
 
				 	for i := 0; i < len(data); i += 1 {
			
@@ -149,12 +149,9 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 	ctx.pt = j
			
 
				 }
			
 
				 
			
 
				-final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				-	if len(hash) < ctx.mdlen {
			
 
				-		panic("crypto/sha3: invalid destination digest size")
			
 
				-	}
			
 
				+final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				+	ensure_contextless(len(hash) >= ctx.mdlen, "crypto/sha3: invalid destination digest size")
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
@@ -173,11 +170,11 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-clone :: proc(ctx, other: ^Context) {
			
 
				+clone :: proc "contextless" (ctx, other: ^Context) {
			
 
				 	ctx^ = other^
			
 
				 }
			
 
				 
			
 
				-reset :: proc(ctx: ^Context) {
			
 
				+reset :: proc "contextless" (ctx: ^Context) {
			
 
				 	if !ctx.is_initialized {
			
 
				 		return
			
 
				 	}
			
@@ -185,9 +182,9 @@ reset :: proc(ctx: ^Context) {
 
				 	mem.zero_explicit(ctx, size_of(ctx^))
			
 
				 }
			
 
				 
			
 
				-shake_xof :: proc(ctx: ^Context) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-	assert(!ctx.is_finalized)
			
 
				+shake_xof :: proc "contextless" (ctx: ^Context) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				+	ensure_contextless(!ctx.is_finalized)
			
 
				 
			
 
				 	ctx.st.b[ctx.pt] ~= ctx.dsbyte
			
 
				 	ctx.st.b[ctx.rsiz - 1] ~= 0x80
			
@@ -197,9 +194,9 @@ shake_xof :: proc(ctx: ^Context) {
 
				 	ctx.is_finalized = true // No more absorb, unlimited squeeze.
			
 
				 }
			
 
				 
			
 
				-shake_out :: proc(ctx: ^Context, hash: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-	assert(ctx.is_finalized)
			
 
				+shake_out :: proc "contextless" (ctx: ^Context, hash: []byte) {
			
 
				+	ensure_contextless(ctx.is_initialized)
			
 
				+	ensure_contextless(ctx.is_finalized)
			
 
				 
			
 
				 	j := ctx.pt
			
 
				 	for i := 0; i < len(hash); i += 1 {
			
--- a/core/crypto/_sha3/sp800_185.odin
+++ b/core/crypto/_sha3/sp800_185.odin
@@ -3,7 +3,7 @@ package _sha3
 
				 import "core:encoding/endian"
			
 
				 import "core:math/bits"
			
 
				 
			
 
				-init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
			
 
				+init_cshake :: proc "contextless" (ctx: ^Context, n, s: []byte, sec_strength: int) {
			
 
				 	ctx.mdlen = sec_strength / 8
			
 
				 
			
 
				 	// No domain separator is equivalent to vanilla SHAKE.
			
@@ -18,7 +18,7 @@ init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
 
				 	bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength))
			
 
				 }
			
 
				 
			
 
				-final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
			
 
				+final_cshake :: proc "contextless" (ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
 
				 		tmp_ctx: Context
			
@@ -32,7 +32,7 @@ final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
 
				 	shake_out(ctx, dst)
			
 
				 }
			
 
				 
			
 
				-rate_cshake :: #force_inline proc(sec_strength: int) -> int {
			
 
				+rate_cshake :: #force_inline proc "contextless" (sec_strength: int) -> int {
			
 
				 	switch sec_strength {
			
 
				 	case 128:
			
 
				 		return RATE_128
			
@@ -40,7 +40,7 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
 
				 		return RATE_256
			
 
				 	}
			
 
				 
			
 
				-	panic("crypto/sha3: invalid security strength")
			
 
				+	panic_contextless("crypto/sha3: invalid security strength")
			
 
				 }
			
 
				 
			
 
				 // right_encode and left_encode are defined to support 0 <= x < 2^2040
			
@@ -52,10 +52,10 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
 
				 //
			
 
				 // Thus we support 0 <= x < 2^128.
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 _PAD: [RATE_128]byte // Biggest possible value of w per spec.
			
 
				 
			
 
				-bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
			
 
				+bytepad :: proc "contextless" (ctx: ^Context, x_strings: [][]byte, w: int) {
			
 
				 	// 1. z = left_encode(w) || X.
			
 
				 	z_hi: u64
			
 
				 	z_lo := left_right_encode(ctx, 0, u64(w), true)
			
@@ -70,9 +70,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
 
				 
			
 
				 		// This isn't actually possible, at least with the currently
			
 
				 		// defined SP 800-185 routines.
			
 
				-		if carry != 0 {
			
 
				-			panic("crypto/sha3: bytepad input length overflow")
			
 
				-		}
			
 
				+		ensure_contextless(carry == 0, "crypto/sha3: bytepad input length overflow")
			
 
				 	}
			
 
				 
			
 
				 	// We skip this step as we are doing a byte-oriented implementation
			
@@ -95,7 +93,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
			
 
				+encode_string :: #force_inline proc "contextless" (ctx: ^Context, s: []byte) -> (u64, u64) {
			
 
				 	l := encode_byte_len(ctx, len(s), true) // left_encode
			
 
				 	update(ctx, s)
			
 
				 
			
@@ -104,13 +102,13 @@ encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
 
				 	return hi, lo
			
 
				 }
			
 
				 
			
 
				-encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 {
			
 
				+encode_byte_len :: #force_inline proc "contextless" (ctx: ^Context, l: int, is_left: bool) -> u64 {
			
 
				 	hi, lo := bits.mul_u64(u64(l), 8)
			
 
				 	return left_right_encode(ctx, hi, lo, is_left)
			
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
			
 
				+left_right_encode :: proc "contextless" (ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
			
 
				 	HI_OFFSET :: 1
			
 
				 	LO_OFFSET :: HI_OFFSET + 8
			
 
				 	RIGHT_OFFSET :: LO_OFFSET + 8
			
--- a/core/crypto/aead/aead.odin
+++ b/core/crypto/aead/aead.odin
@@ -16,7 +16,7 @@ seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte,
 
				 // returning true iff the authentication was successful.  If authentication
			
 
				 // fails, the destination buffer will be zeroed.
			
 
				 //
			
 
				-// dst and plaintext MUST alias exactly or not at all.
			
 
				+// dst and ciphertext MUST alias exactly or not at all.
			
 
				 @(require_results)
			
 
				 open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
			
 
				 	ctx: Context
			
--- a/core/crypto/aead/low_level.odin
+++ b/core/crypto/aead/low_level.odin
@@ -1,8 +1,10 @@
 
				 package aead
			
 
				 
			
 
				+import "core:crypto/aegis"
			
 
				 import "core:crypto/aes"
			
 
				 import "core:crypto/chacha20"
			
 
				 import "core:crypto/chacha20poly1305"
			
 
				+import "core:crypto/deoxysii"
			
 
				 import "core:reflect"
			
 
				 
			
 
				 // Implementation is an AEAD implementation.  Most callers will not need
			
@@ -15,7 +17,7 @@ Implementation :: union {
 
				 
			
 
				 // MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
			
 
				 // Algorithms supported via this package.
			
 
				-MAX_TAG_SIZE :: 16
			
 
				+MAX_TAG_SIZE :: 32
			
 
				 
			
 
				 // Algorithm is the algorithm identifier associated with a given Context.
			
 
				 Algorithm :: enum {
			
@@ -25,9 +27,14 @@ Algorithm :: enum {
 
				 	AES_GCM_256,
			
 
				 	CHACHA20POLY1305,
			
 
				 	XCHACHA20POLY1305,
			
 
				+	AEGIS_128L,
			
 
				+	AEGIS_128L_256, // AEGIS-128L (256-bit tag)
			
 
				+	AEGIS_256,
			
 
				+	AEGIS_256_256, // AEGIS-256 (256-bit tag)
			
 
				+	DEOXYS_II_256,
			
 
				 }
			
 
				 
			
 
				-// ALGORITM_NAMES is the Agorithm to algorithm name string.
			
 
				+// ALGORITM_NAMES is the Algorithm to algorithm name string.
			
 
				 ALGORITHM_NAMES := [Algorithm]string {
			
 
				 	.Invalid           = "Invalid",
			
 
				 	.AES_GCM_128       = "AES-GCM-128",
			
@@ -35,6 +42,11 @@ ALGORITHM_NAMES := [Algorithm]string {
 
				 	.AES_GCM_256       = "AES-GCM-256",
			
 
				 	.CHACHA20POLY1305  = "chacha20poly1305",
			
 
				 	.XCHACHA20POLY1305 = "xchacha20poly1305",
			
 
				+	.AEGIS_128L        = "AEGIS-128L",
			
 
				+	.AEGIS_128L_256    = "AEGIS-128L-256",
			
 
				+	.AEGIS_256         = "AEGIS-256",
			
 
				+	.AEGIS_256_256     = "AEGIS-256-256",
			
 
				+	.DEOXYS_II_256     = "Deoxys-II-256",
			
 
				 }
			
 
				 
			
 
				 // TAG_SIZES is the Algorithm to tag size in bytes.
			
@@ -45,6 +57,11 @@ TAG_SIZES := [Algorithm]int {
 
				 	.AES_GCM_256       = aes.GCM_TAG_SIZE,
			
 
				 	.CHACHA20POLY1305  = chacha20poly1305.TAG_SIZE,
			
 
				 	.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
			
 
				+	.AEGIS_128L        = aegis.TAG_SIZE_128,
			
 
				+	.AEGIS_128L_256    = aegis.TAG_SIZE_256,
			
 
				+	.AEGIS_256         = aegis.TAG_SIZE_128,
			
 
				+	.AEGIS_256_256     = aegis.TAG_SIZE_256,
			
 
				+	.DEOXYS_II_256     = deoxysii.TAG_SIZE,
			
 
				 }
			
 
				 
			
 
				 // KEY_SIZES is the Algorithm to key size in bytes.
			
@@ -55,6 +72,11 @@ KEY_SIZES := [Algorithm]int {
 
				 	.AES_GCM_256       = aes.KEY_SIZE_256,
			
 
				 	.CHACHA20POLY1305  = chacha20poly1305.KEY_SIZE,
			
 
				 	.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
			
 
				+	.AEGIS_128L        = aegis.KEY_SIZE_128L,
			
 
				+	.AEGIS_128L_256    = aegis.KEY_SIZE_128L,
			
 
				+	.AEGIS_256         = aegis.KEY_SIZE_256,
			
 
				+	.AEGIS_256_256     = aegis.KEY_SIZE_256,
			
 
				+	.DEOXYS_II_256     = deoxysii.KEY_SIZE,
			
 
				 }
			
 
				 
			
 
				 // IV_SIZES is the Algorithm to initialization vector size in bytes.
			
@@ -67,6 +89,11 @@ IV_SIZES := [Algorithm]int {
 
				 	.AES_GCM_256       = aes.GCM_IV_SIZE,
			
 
				 	.CHACHA20POLY1305  = chacha20poly1305.IV_SIZE,
			
 
				 	.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
			
 
				+	.AEGIS_128L        = aegis.IV_SIZE_128L,
			
 
				+	.AEGIS_128L_256    = aegis.IV_SIZE_128L,
			
 
				+	.AEGIS_256         = aegis.IV_SIZE_256,
			
 
				+	.AEGIS_256_256     = aegis.IV_SIZE_256,
			
 
				+	.DEOXYS_II_256     = deoxysii.IV_SIZE,
			
 
				 }
			
 
				 
			
 
				 // Context is a concrete instantiation of a specific AEAD algorithm.
			
@@ -75,6 +102,8 @@ Context :: struct {
 
				 	_impl: union {
			
 
				 		aes.Context_GCM,
			
 
				 		chacha20poly1305.Context,
			
 
				+		aegis.Context,
			
 
				+		deoxysii.Context,
			
 
				 	},
			
 
				 }
			
 
				 
			
@@ -86,6 +115,11 @@ _IMPL_IDS := [Algorithm]typeid {
 
				 	.AES_GCM_256       = typeid_of(aes.Context_GCM),
			
 
				 	.CHACHA20POLY1305  = typeid_of(chacha20poly1305.Context),
			
 
				 	.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
			
 
				+	.AEGIS_128L        = typeid_of(aegis.Context),
			
 
				+	.AEGIS_128L_256    = typeid_of(aegis.Context),
			
 
				+	.AEGIS_256         = typeid_of(aegis.Context),
			
 
				+	.AEGIS_256_256     = typeid_of(aegis.Context),
			
 
				+	.DEOXYS_II_256     = typeid_of(deoxysii.Context),
			
 
				 }
			
 
				 
			
 
				 // init initializes a Context with a specific AEAD Algorithm.
			
@@ -94,9 +128,7 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 
				 		reset(ctx)
			
 
				 	}
			
 
				 
			
 
				-	if len(key) != KEY_SIZES[algorithm] {
			
 
				-		panic("crypto/aead: invalid key size")
			
 
				-	}
			
 
				+	ensure(len(key) == KEY_SIZES[algorithm], "crypto/aead: invalid key size")
			
 
				 
			
 
				 	// Directly specialize the union by setting the type ID (save a copy).
			
 
				 	reflect.set_union_variant_typeid(
			
@@ -113,6 +145,12 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 
				 	case .XCHACHA20POLY1305:
			
 
				 		impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
			
 
				 		chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
			
 
				+	case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256:
			
 
				+		impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
			
 
				+		aegis.init(&ctx._impl.(aegis.Context), key, impl_)
			
 
				+	case .DEOXYS_II_256:
			
 
				+		impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
			
 
				+		deoxysii.init(&ctx._impl.(deoxysii.Context), key, impl_)
			
 
				 	case .Invalid:
			
 
				 		panic("crypto/aead: uninitialized algorithm")
			
 
				 	case:
			
@@ -127,11 +165,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 
				 //
			
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
			
 
				+	ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
			
 
				+
			
 
				 	switch &impl in ctx._impl {
			
 
				 	case aes.Context_GCM:
			
 
				 		aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
			
 
				 	case chacha20poly1305.Context:
			
 
				 		chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
			
 
				+	case aegis.Context:
			
 
				+		aegis.seal(&impl, dst, tag, iv, aad, plaintext)
			
 
				+	case deoxysii.Context:
			
 
				+		deoxysii.seal(&impl, dst, tag, iv, aad, plaintext)
			
 
				 	case:
			
 
				 		panic("crypto/aead: uninitialized algorithm")
			
 
				 	}
			
@@ -145,11 +189,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 @(require_results)
			
 
				 open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
			
 
				+
			
 
				 	switch &impl in ctx._impl {
			
 
				 	case aes.Context_GCM:
			
 
				 		return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
			
 
				 	case chacha20poly1305.Context:
			
 
				 		return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
			
 
				+	case aegis.Context:
			
 
				+		return aegis.open(&impl, dst, iv, aad, ciphertext, tag)
			
 
				+	case deoxysii.Context:
			
 
				+		return deoxysii.open(&impl, dst, iv, aad, ciphertext, tag)
			
 
				 	case:
			
 
				 		panic("crypto/aead: uninitialized algorithm")
			
 
				 	}
			
@@ -163,6 +213,10 @@ reset :: proc(ctx: ^Context) {
 
				 		aes.reset_gcm(&impl)
			
 
				 	case chacha20poly1305.Context:
			
 
				 		chacha20poly1305.reset(&impl)
			
 
				+	case aegis.Context:
			
 
				+		aegis.reset(&impl)
			
 
				+	case deoxysii.Context:
			
 
				+		deoxysii.reset(&impl)
			
 
				 	case:
			
 
				 		// Calling reset repeatedly is fine.
			
 
				 	}
			
--- a/core/crypto/aegis/aegis.odin
+++ b/core/crypto/aegis/aegis.odin
@@ -0,0 +1,213 @@
 
				+/*
			
 
				+package aegis implements the AEGIS-128L and AEGIS-256 Authenticated
			
 
				+Encryption with Additional Data algorithms.
			
 
				+
			
 
				+See:
			
 
				+- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]]
			
 
				+*/
			
 
				+package aegis
			
 
				+
			
 
				+import "core:bytes"
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/aes"
			
 
				+import "core:mem"
			
 
				+
			
 
				+// KEY_SIZE_128L is the AEGIS-128L key size in bytes.
			
 
				+KEY_SIZE_128L :: 16
			
 
				+// KEY_SIZE_256 is the AEGIS-256 key size in bytes.
			
 
				+KEY_SIZE_256 :: 32
			
 
				+// IV_SIZE_128L is the AEGIS-128L IV size in bytes.
			
 
				+IV_SIZE_128L :: 16
			
 
				+// IV_SIZE_256 is the AEGIS-256 IV size in bytes.
			
 
				+IV_SIZE_256 :: 32
			
 
				+// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes.
			
 
				+TAG_SIZE_128 :: 16
			
 
				+// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes.
			
 
				+TAG_SIZE_256 :: 32
			
 
				+
			
 
				+@(private)
			
 
				+_RATE_128L :: 32
			
 
				+@(private)
			
 
				+_RATE_256 :: 16
			
 
				+@(private)
			
 
				+_RATE_MAX :: _RATE_128L
			
 
				+
			
 
				+@(private, rodata)
			
 
				+_C0 := [16]byte{
			
 
				+	0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
			
 
				+	0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
			
 
				+}
			
 
				+
			
 
				+@(private, rodata)
			
 
				+_C1 := [16]byte {
			
 
				+	0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
			
 
				+	0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
			
 
				+}
			
 
				+
			
 
				+// Context is a keyed AEGIS-128L or AEGIS-256 instance.
			
 
				+Context :: struct {
			
 
				+	_key:            [KEY_SIZE_256]byte,
			
 
				+	_key_len:        int,
			
 
				+	_impl:           aes.Implementation,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
			
 
				+	switch len(tag) {
			
 
				+	case TAG_SIZE_128, TAG_SIZE_256:
			
 
				+	case:
			
 
				+		panic("crypto/aegis: invalid tag size")
			
 
				+	}
			
 
				+
			
 
				+	iv_ok: bool
			
 
				+	switch ctx._key_len {
			
 
				+	case KEY_SIZE_128L:
			
 
				+		iv_ok = len(iv) == IV_SIZE_128L
			
 
				+	case KEY_SIZE_256:
			
 
				+		iv_ok = len(iv) == IV_SIZE_256
			
 
				+	}
			
 
				+	ensure(iv_ok,"crypto/aegis: invalid IV size")
			
 
				+
			
 
				+	#assert(size_of(int) == 8 || size_of(int) <= 4)
			
 
				+	// As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and
			
 
				+	// the maximum length of a slice is bound by `size_of(int)`, where
			
 
				+	// `int` is register sized, there is no need to check AAD/text
			
 
				+	// lengths.
			
 
				+}
			
 
				+
			
 
				+// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256.
			
 
				+init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
			
 
				+	switch len(key) {
			
 
				+	case KEY_SIZE_128L, KEY_SIZE_256:
			
 
				+	case:
			
 
				+		panic("crypto/aegis: invalid key size")
			
 
				+	}
			
 
				+
			
 
				+	copy(ctx._key[:], key)
			
 
				+	ctx._key_len = len(key)
			
 
				+	ctx._impl = impl
			
 
				+	if ctx._impl == .Hardware && !is_hardware_accelerated() {
			
 
				+		ctx._impl = .Portable
			
 
				+	}
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+// seal encrypts the plaintext and authenticates the aad and ciphertext,
			
 
				+// with the provided Context and iv, stores the output in dst and tag.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				+	_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
			
 
				+	ensure(len(dst) == len(plaintext), "crypto/aegis: invalid destination ciphertext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aegis: dst and plaintext alias inexactly")
			
 
				+
			
 
				+	switch ctx._impl {
			
 
				+	case .Hardware:
			
 
				+		st: State_HW
			
 
				+		defer reset_state_hw(&st)
			
 
				+
			
 
				+		init_hw(ctx, &st, iv)
			
 
				+
			
 
				+		aad_len, pt_len := len(aad), len(plaintext)
			
 
				+		if aad_len > 0 {
			
 
				+			absorb_hw(&st, aad)
			
 
				+		}
			
 
				+
			
 
				+		if pt_len > 0 {
			
 
				+			enc_hw(&st, dst, plaintext)
			
 
				+		}
			
 
				+
			
 
				+		finalize_hw(&st, tag, aad_len, pt_len)
			
 
				+	case .Portable:
			
 
				+		st: State_SW
			
 
				+		defer reset_state_sw(&st)
			
 
				+
			
 
				+		init_sw(ctx, &st, iv)
			
 
				+
			
 
				+		aad_len, pt_len := len(aad), len(plaintext)
			
 
				+		if aad_len > 0 {
			
 
				+			absorb_sw(&st, aad)
			
 
				+		}
			
 
				+
			
 
				+		if pt_len > 0 {
			
 
				+			enc_sw(&st, dst, plaintext)
			
 
				+		}
			
 
				+
			
 
				+		finalize_sw(&st, tag, aad_len, pt_len)
			
 
				+	case:
			
 
				+		panic("core/crypto/aegis: not implemented")
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// open authenticates the aad and ciphertext, and decrypts the ciphertext,
			
 
				+// with the provided Context, iv, and tag, and stores the output in dst,
			
 
				+// returning true iff the authentication was successful.  If authentication
			
 
				+// fails, the destination buffer will be zeroed.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+@(require_results)
			
 
				+open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				+	_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
			
 
				+	ensure(len(dst) == len(ciphertext), "crypto/aegis: invalid destination plaintext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aegis: dst and ciphertext alias inexactly")
			
 
				+
			
 
				+	tmp: [TAG_SIZE_256]byte
			
 
				+	derived_tag := tmp[:len(tag)]
			
 
				+	aad_len, ct_len := len(aad), len(ciphertext)
			
 
				+
			
 
				+	switch ctx._impl {
			
 
				+	case .Hardware:
			
 
				+		st: State_HW
			
 
				+		defer reset_state_hw(&st)
			
 
				+
			
 
				+		init_hw(ctx, &st, iv)
			
 
				+
			
 
				+		if aad_len > 0 {
			
 
				+			absorb_hw(&st, aad)
			
 
				+		}
			
 
				+
			
 
				+		if ct_len > 0 {
			
 
				+			dec_hw(&st, dst, ciphertext)
			
 
				+		}
			
 
				+
			
 
				+		finalize_hw(&st, derived_tag, aad_len, ct_len)
			
 
				+	case .Portable:
			
 
				+		st: State_SW
			
 
				+		defer reset_state_sw(&st)
			
 
				+
			
 
				+		init_sw(ctx, &st, iv)
			
 
				+
			
 
				+		if aad_len > 0 {
			
 
				+			absorb_sw(&st, aad)
			
 
				+		}
			
 
				+
			
 
				+		if ct_len > 0 {
			
 
				+			dec_sw(&st, dst, ciphertext)
			
 
				+		}
			
 
				+
			
 
				+		finalize_sw(&st, derived_tag, aad_len, ct_len)
			
 
				+	case:
			
 
				+		panic("core/crypto/aegis: not implemented")
			
 
				+	}
			
 
				+
			
 
				+	if crypto.compare_constant_time(tag, derived_tag) != 1 {
			
 
				+		mem.zero_explicit(raw_data(derived_tag), len(derived_tag))
			
 
				+		mem.zero_explicit(raw_data(dst), ct_len)
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+// reset sanitizes the Context.  The Context must be
			
 
				+// re-initialized to be used again.
			
 
				+reset :: proc "contextless" (ctx: ^Context) {
			
 
				+	mem.zero_explicit(&ctx._key, len(ctx._key))
			
 
				+	ctx._key_len = 0
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
--- a/core/crypto/aegis/aegis_impl_ct64.odin
+++ b/core/crypto/aegis/aegis_impl_ct64.odin
@@ -0,0 +1,452 @@
 
				+package aegis
			
 
				+
			
 
				+import aes "core:crypto/_aes/ct64"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+
			
 
				+// This uses the bitlsiced 64-bit general purpose register SWAR AES
			
 
				+// round function.  The intermediate state is stored in interleaved
			
 
				+// but NOT orthogonalized form, as leaving things in the orthgonalized
			
 
				+// format would overly complicate the update implementation.
			
 
				+//
			
 
				+// Note/perf: Per Frank Denis and a review of the specification, it is
			
 
				+// possible to gain slightly more performance by leaving the state in
			
 
				+// orthogonalized form while doing initialization, finalization, and
			
 
				+// absorbing AAD.  This implementation opts out of those optimizations
			
 
				+// for the sake of simplicity.
			
 
				+//
			
 
				+// The update function leverages the paralleism (4xblocks) at once.
			
 
				+
			
 
				+@(private)
			
 
				+State_SW :: struct {
			
 
				+	s0_0, s0_1: u64,
			
 
				+	s1_0, s1_1: u64,
			
 
				+	s2_0, s2_1: u64,
			
 
				+	s3_0, s3_1: u64,
			
 
				+	s4_0, s4_1: u64,
			
 
				+	s5_0, s5_1: u64,
			
 
				+	s6_0, s6_1: u64,
			
 
				+	s7_0, s7_1: u64,
			
 
				+	q_k, q_b:   [8]u64,
			
 
				+	rate:       int,
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) {
			
 
				+	switch ctx._key_len {
			
 
				+	case KEY_SIZE_128L:
			
 
				+		key_0, key_1 := aes.load_interleaved(ctx._key[:16])
			
 
				+		iv_0, iv_1 := aes.load_interleaved(iv)
			
 
				+
			
 
				+		st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1)
			
 
				+		st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:])
			
 
				+		st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:])
			
 
				+		st.s3_0, st.s3_1 = st.s1_0, st.s1_1
			
 
				+		st.s4_0, st.s4_1 = st.s0_0, st.s0_1
			
 
				+		st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1)
			
 
				+		st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1)
			
 
				+		st.s7_0, st.s7_1 = st.s5_0, st.s5_1
			
 
				+		st.rate = _RATE_128L
			
 
				+
			
 
				+		for _ in 0 ..< 10 {
			
 
				+			update_sw_128l(st, iv_0, iv_1, key_0, key_1)
			
 
				+		}
			
 
				+	case KEY_SIZE_256:
			
 
				+		k0_0, k0_1 := aes.load_interleaved(ctx._key[:16])
			
 
				+		k1_0, k1_1 := aes.load_interleaved(ctx._key[16:])
			
 
				+		n0_0, n0_1 := aes.load_interleaved(iv[:16])
			
 
				+		n1_0, n1_1 := aes.load_interleaved(iv[16:])
			
 
				+
			
 
				+		st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1)
			
 
				+		st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1)
			
 
				+		st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:])
			
 
				+		st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:])
			
 
				+		st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1)
			
 
				+		st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1)
			
 
				+		st.rate = _RATE_256
			
 
				+
			
 
				+		u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1
			
 
				+		for _ in 0 ..< 4 {
			
 
				+			update_sw_256(st, k0_0, k0_1)
			
 
				+			update_sw_256(st, k1_0, k1_1)
			
 
				+			update_sw_256(st, u0_0, u0_1)
			
 
				+			update_sw_256(st, u1_0, u1_1)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) {
			
 
				+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1)
			
 
				+	st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
			
 
				+	st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
			
 
				+	st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
			
 
				+	aes.orthogonalize(&st.q_k)
			
 
				+
			
 
				+	st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1
			
 
				+	st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
			
 
				+	st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
			
 
				+	st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	aes.sub_bytes(&st.q_b)
			
 
				+	aes.shift_rows(&st.q_b)
			
 
				+	aes.mix_columns(&st.q_b)
			
 
				+	aes.add_round_key(&st.q_b, st.q_k[:])
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
			
 
				+	st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
			
 
				+	st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
			
 
				+	s3_0, s3_1 := st.q_b[3], st.q_b[7]
			
 
				+
			
 
				+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1)
			
 
				+	st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
			
 
				+	st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1
			
 
				+	st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1
			
 
				+	aes.orthogonalize(&st.q_k)
			
 
				+
			
 
				+	st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
			
 
				+	st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
			
 
				+	st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1
			
 
				+	st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	aes.sub_bytes(&st.q_b)
			
 
				+	aes.shift_rows(&st.q_b)
			
 
				+	aes.mix_columns(&st.q_b)
			
 
				+	aes.add_round_key(&st.q_b, st.q_k[:])
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	st.s3_0, st.s3_1 = s3_0, s3_1
			
 
				+	st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
			
 
				+	st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
			
 
				+	st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6]
			
 
				+	st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7]
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) {
			
 
				+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1)
			
 
				+	st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
			
 
				+	st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
			
 
				+	st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
			
 
				+	aes.orthogonalize(&st.q_k)
			
 
				+
			
 
				+	st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1
			
 
				+	st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
			
 
				+	st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
			
 
				+	st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	aes.sub_bytes(&st.q_b)
			
 
				+	aes.shift_rows(&st.q_b)
			
 
				+	aes.mix_columns(&st.q_b)
			
 
				+	aes.add_round_key(&st.q_b, st.q_k[:])
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
			
 
				+	st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
			
 
				+	st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
			
 
				+	s3_0, s3_1 := st.q_b[3], st.q_b[7]
			
 
				+
			
 
				+	st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1
			
 
				+	st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
			
 
				+	aes.orthogonalize(&st.q_k)
			
 
				+
			
 
				+	st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
			
 
				+	st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	aes.sub_bytes(&st.q_b)
			
 
				+	aes.shift_rows(&st.q_b)
			
 
				+	aes.mix_columns(&st.q_b)
			
 
				+	aes.add_round_key(&st.q_b, st.q_k[:])
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	st.s3_0, st.s3_1 = s3_0, s3_1
			
 
				+	st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
			
 
				+	st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check {
			
 
				+	t0_0, t0_1 := aes.load_interleaved(ai[:16])
			
 
				+	t1_0, t1_1 := aes.load_interleaved(ai[16:])
			
 
				+	update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) {
			
 
				+	m_0, m_1 := aes.load_interleaved(ai)
			
 
				+	update_sw_256(st, m_0, m_1)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check {
			
 
				+	ai, l := aad, len(aad)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			absorb_sw_128l(st, ai)
			
 
				+			ai = ai[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			absorb_sw_256(st, ai)
			
 
				+
			
 
				+			ai = ai[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Pad out the remainder with `0`s till it is rate sized.
			
 
				+	if l > 0 {
			
 
				+		tmp: [_RATE_MAX]byte // AAD is not confidential.
			
 
				+		copy(tmp[:], ai)
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			absorb_sw_128l(st, tmp[:])
			
 
				+		case _RATE_256:
			
 
				+			absorb_sw_256(st, tmp[:])
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results)
			
 
				+z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) {
			
 
				+	z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
			
 
				+	z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1)
			
 
				+	z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1)
			
 
				+
			
 
				+	z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1)
			
 
				+	z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1)
			
 
				+	z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1)
			
 
				+
			
 
				+	return z0_0, z0_1, z1_0, z1_1
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results)
			
 
				+z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) {
			
 
				+	z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
			
 
				+	z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1)
			
 
				+	z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1)
			
 
				+	return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
			
 
				+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
			
 
				+
			
 
				+	t0_0, t0_1 := aes.load_interleaved(xi[:16])
			
 
				+	t1_0, t1_1 := aes.load_interleaved(xi[16:])
			
 
				+	update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
			
 
				+
			
 
				+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
			
 
				+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
			
 
				+	aes.store_interleaved(ci[:16], out0_0, out0_1)
			
 
				+	aes.store_interleaved(ci[16:], out1_0, out1_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
			
 
				+	z_0, z_1 := z_sw_256(st)
			
 
				+
			
 
				+	xi_0, xi_1 := aes.load_interleaved(xi)
			
 
				+	update_sw_256(st, xi_0, xi_1)
			
 
				+
			
 
				+	ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1)
			
 
				+	aes.store_interleaved(ci, ci_0, ci_1)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
			
 
				+	ci, xi, l := dst, src, len(src)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			enc_sw_128l(st, ci, xi)
			
 
				+			ci = ci[_RATE_128L:]
			
 
				+			xi = xi[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			enc_sw_256(st, ci, xi)
			
 
				+			ci = ci[_RATE_256:]
			
 
				+			xi = xi[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Pad out the remainder with `0`s till it is rate sized.
			
 
				+	if l > 0 {
			
 
				+		tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
			
 
				+		copy(tmp[:], xi)
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			enc_sw_128l(st, tmp[:], tmp[:])
			
 
				+		case _RATE_256:
			
 
				+			enc_sw_256(st, tmp[:], tmp[:])
			
 
				+		}
			
 
				+		copy(ci, tmp[:l])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
			
 
				+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
			
 
				+
			
 
				+	t0_0, t0_1 := aes.load_interleaved(ci[:16])
			
 
				+	t1_0, t1_1 := aes.load_interleaved(ci[16:])
			
 
				+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
			
 
				+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
			
 
				+
			
 
				+	update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
			
 
				+	aes.store_interleaved(xi[:16], out0_0, out0_1)
			
 
				+	aes.store_interleaved(xi[16:], out1_0, out1_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
			
 
				+	z_0, z_1 := z_sw_256(st)
			
 
				+
			
 
				+	ci_0, ci_1 := aes.load_interleaved(ci)
			
 
				+	xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1)
			
 
				+
			
 
				+	update_sw_256(st, xi_0, xi_1)
			
 
				+	aes.store_interleaved(xi, xi_0, xi_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
			
 
				+	tmp: [_RATE_128L]byte
			
 
				+	defer mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
			
 
				+	copy(tmp[:], cn)
			
 
				+
			
 
				+	t0_0, t0_1 := aes.load_interleaved(tmp[:16])
			
 
				+	t1_0, t1_1 := aes.load_interleaved(tmp[16:])
			
 
				+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
			
 
				+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
			
 
				+
			
 
				+	aes.store_interleaved(tmp[:16], out0_0, out0_1)
			
 
				+	aes.store_interleaved(tmp[16:], out1_0, out1_1)
			
 
				+	copy(xn, tmp[:])
			
 
				+
			
 
				+	for off := len(xn); off < _RATE_128L; off += 1 {
			
 
				+		tmp[off] = 0
			
 
				+	}
			
 
				+	out0_0, out0_1 = aes.load_interleaved(tmp[:16])
			
 
				+	out1_0, out1_1 = aes.load_interleaved(tmp[16:])
			
 
				+	update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
			
 
				+	tmp: [_RATE_256]byte
			
 
				+	defer mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	z_0, z_1 := z_sw_256(st)
			
 
				+	copy(tmp[:], cn)
			
 
				+
			
 
				+	cn_0, cn_1 := aes.load_interleaved(tmp[:])
			
 
				+	xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1)
			
 
				+
			
 
				+	aes.store_interleaved(tmp[:], xn_0, xn_1)
			
 
				+	copy(xn, tmp[:])
			
 
				+
			
 
				+	for off := len(xn); off < _RATE_256; off += 1 {
			
 
				+		tmp[off] = 0
			
 
				+	}
			
 
				+	xn_0, xn_1 = aes.load_interleaved(tmp[:])
			
 
				+	update_sw_256(st, xn_0, xn_1)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
			
 
				+	xi, ci, l := dst, src, len(src)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			dec_sw_128l(st, xi, ci)
			
 
				+			xi = xi[_RATE_128L:]
			
 
				+			ci = ci[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			dec_sw_256(st, xi, ci)
			
 
				+			xi = xi[_RATE_256:]
			
 
				+			ci = ci[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Process the remainder.
			
 
				+	if l > 0 {
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			dec_partial_sw_128l(st, xi, ci)
			
 
				+		case _RATE_256:
			
 
				+			dec_partial_sw_256(st, xi, ci)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) {
			
 
				+	tmp: [16]byte
			
 
				+	endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
			
 
				+	endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
			
 
				+
			
 
				+	t_0, t_1 := aes.load_interleaved(tmp[:])
			
 
				+
			
 
				+	t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, ---
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1)
			
 
				+		for _ in 0 ..< 7 {
			
 
				+			update_sw_128l(st, t_0, t_1, t_0, t_1)
			
 
				+		}
			
 
				+
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1)
			
 
				+
			
 
				+		t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1)
			
 
				+		t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1)
			
 
				+		if len(tag) == TAG_SIZE_256 {
			
 
				+			t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1)
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1)
			
 
				+		for _ in 0 ..< 7 {
			
 
				+			update_sw_256(st, t_0, t_1)
			
 
				+		}
			
 
				+
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
			
 
				+
			
 
				+		t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1)
			
 
				+		t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1)
			
 
				+	}
			
 
				+	switch len(tag) {
			
 
				+	case TAG_SIZE_128:
			
 
				+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1)
			
 
				+		aes.store_interleaved(tag, t0_0, t0_1)
			
 
				+	case TAG_SIZE_256:
			
 
				+		aes.store_interleaved(tag[:16], t0_0, t0_1)
			
 
				+		aes.store_interleaved(tag[16:], t1_0, t1_1)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+reset_state_sw :: proc "contextless" (st: ^State_SW) {
			
 
				+	mem.zero_explicit(st, size_of(st^))
			
 
				+}
			
--- a/core/crypto/aegis/aegis_impl_hw_gen.odin
+++ b/core/crypto/aegis/aegis_impl_hw_gen.odin
@@ -0,0 +1,44 @@
 
				+#+build !amd64
			
 
				+package aegis
			
 
				+
			
 
				+@(private = "file")
			
 
				+ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported"
			
 
				+
			
 
				+@(private)
			
 
				+State_HW :: struct {}
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated AEGIS
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+reset_state_hw :: proc "contextless" (st: ^State_HW) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
--- a/core/crypto/aegis/aegis_impl_hw_intel.odin
+++ b/core/crypto/aegis/aegis_impl_hw_intel.odin
@@ -0,0 +1,389 @@
 
				+#+build amd64
			
 
				+package aegis
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/aes"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+@(private)
			
 
				+State_HW :: struct {
			
 
				+	s0:   x86.__m128i,
			
 
				+	s1:   x86.__m128i,
			
 
				+	s2:   x86.__m128i,
			
 
				+	s3:   x86.__m128i,
			
 
				+	s4:   x86.__m128i,
			
 
				+	s5:   x86.__m128i,
			
 
				+	s6:   x86.__m128i,
			
 
				+	s7:   x86.__m128i,
			
 
				+	rate: int,
			
 
				+}
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated AEGIS
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return aes.is_hardware_accelerated()
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
			
 
				+	switch ctx._key_len {
			
 
				+	case KEY_SIZE_128L:
			
 
				+		key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
			
 
				+		iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv)))
			
 
				+
			
 
				+		st.s0 = x86._mm_xor_si128(key, iv)
			
 
				+		st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
			
 
				+		st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
			
 
				+		st.s3 = st.s1
			
 
				+		st.s4 = st.s0
			
 
				+		st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0
			
 
				+		st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1
			
 
				+		st.s7 = st.s5
			
 
				+		st.rate = _RATE_128L
			
 
				+
			
 
				+		for _ in 0 ..< 10 {
			
 
				+			update_hw_128l(st, iv, key)
			
 
				+		}
			
 
				+	case KEY_SIZE_256:
			
 
				+		k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
			
 
				+		k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16]))
			
 
				+		n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0]))
			
 
				+		n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16]))
			
 
				+
			
 
				+		st.s0 = x86._mm_xor_si128(k0, n0)
			
 
				+		st.s1 = x86._mm_xor_si128(k1, n1)
			
 
				+		st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
			
 
				+		st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
			
 
				+		st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0
			
 
				+		st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1
			
 
				+		st.rate = _RATE_256
			
 
				+
			
 
				+		u0, u1 := st.s0, st.s1
			
 
				+		for _ in 0 ..< 4 {
			
 
				+			update_hw_256(st, k0)
			
 
				+			update_hw_256(st, k1)
			
 
				+			update_hw_256(st, u0)
			
 
				+			update_hw_256(st, u1)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) {
			
 
				+	s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0))
			
 
				+	s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
			
 
				+	s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
			
 
				+	s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
			
 
				+	s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1))
			
 
				+	s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
			
 
				+	s6_ := x86._mm_aesenc_si128(st.s5, st.s6)
			
 
				+	s7_ := x86._mm_aesenc_si128(st.s6, st.s7)
			
 
				+	st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) {
			
 
				+	s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m))
			
 
				+	s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
			
 
				+	s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
			
 
				+	s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
			
 
				+	s4_ := x86._mm_aesenc_si128(st.s3, st.s4)
			
 
				+	s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
			
 
				+	st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
			
 
				+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
			
 
				+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16]))
			
 
				+	update_hw_128l(st, t0, t1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
			
 
				+	m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
			
 
				+	update_hw_256(st, m)
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check {
			
 
				+	ai, l := aad, len(aad)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			absorb_hw_128l(st, ai)
			
 
				+			ai = ai[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			absorb_hw_256(st, ai)
			
 
				+
			
 
				+			ai = ai[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Pad out the remainder with `0`s till it is rate sized.
			
 
				+	if l > 0 {
			
 
				+		tmp: [_RATE_MAX]byte // AAD is not confidential.
			
 
				+		copy(tmp[:], ai)
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			absorb_hw_128l(st, tmp[:])
			
 
				+		case _RATE_256:
			
 
				+			absorb_hw_256(st, tmp[:])
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2", require_results)
			
 
				+z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) {
			
 
				+	z0 := x86._mm_xor_si128(
			
 
				+		st.s6,
			
 
				+		x86._mm_xor_si128(
			
 
				+			st.s1,
			
 
				+			x86._mm_and_si128(st.s2, st.s3),
			
 
				+		),
			
 
				+	)
			
 
				+	z1 := x86._mm_xor_si128(
			
 
				+		st.s2,
			
 
				+		x86._mm_xor_si128(
			
 
				+			st.s5,
			
 
				+			x86._mm_and_si128(st.s6, st.s7),
			
 
				+		),
			
 
				+	)
			
 
				+	return z0, z1
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2", require_results)
			
 
				+z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i {
			
 
				+	return x86._mm_xor_si128(
			
 
				+		st.s1,
			
 
				+		x86._mm_xor_si128(
			
 
				+			st.s4,
			
 
				+			x86._mm_xor_si128(
			
 
				+				st.s5,
			
 
				+				x86._mm_and_si128(st.s2, st.s3),
			
 
				+			),
			
 
				+		),
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
			
 
				+	z0, z1 := z_hw_128l(st)
			
 
				+
			
 
				+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0]))
			
 
				+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16]))
			
 
				+	update_hw_128l(st, t0, t1)
			
 
				+
			
 
				+	out0 := x86._mm_xor_si128(t0, z0)
			
 
				+	out1 := x86._mm_xor_si128(t1, z1)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
			
 
				+	z := z_hw_256(st)
			
 
				+
			
 
				+	xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi)))
			
 
				+	update_hw_256(st, xi_)
			
 
				+
			
 
				+	ci_ := x86._mm_xor_si128(xi_, z)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_)
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
			
 
				+	ci, xi, l := dst, src, len(src)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			enc_hw_128l(st, ci, xi)
			
 
				+			ci = ci[_RATE_128L:]
			
 
				+			xi = xi[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			enc_hw_256(st, ci, xi)
			
 
				+			ci = ci[_RATE_256:]
			
 
				+			xi = xi[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Pad out the remainder with `0`s till it is rate sized.
			
 
				+	if l > 0 {
			
 
				+		tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
			
 
				+		copy(tmp[:], xi)
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			enc_hw_128l(st, tmp[:], tmp[:])
			
 
				+		case _RATE_256:
			
 
				+			enc_hw_256(st, tmp[:], tmp[:])
			
 
				+		}
			
 
				+		copy(ci, tmp[:l])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
			
 
				+	z0, z1 := z_hw_128l(st)
			
 
				+
			
 
				+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0]))
			
 
				+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16]))
			
 
				+	out0 := x86._mm_xor_si128(t0, z0)
			
 
				+	out1 := x86._mm_xor_si128(t1, z1)
			
 
				+
			
 
				+	update_hw_128l(st, out0, out1)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
			
 
				+	z := z_hw_256(st)
			
 
				+
			
 
				+	ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci)))
			
 
				+	xi_ := x86._mm_xor_si128(ci_, z)
			
 
				+
			
 
				+	update_hw_256(st, xi_)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
			
 
				+	tmp: [_RATE_128L]byte
			
 
				+	defer mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	z0, z1 := z_hw_128l(st)
			
 
				+	copy(tmp[:], cn)
			
 
				+
			
 
				+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
			
 
				+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16]))
			
 
				+	out0 := x86._mm_xor_si128(t0, z0)
			
 
				+	out1 := x86._mm_xor_si128(t1, z1)
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1)
			
 
				+	copy(xn, tmp[:])
			
 
				+
			
 
				+	for off := len(xn); off < _RATE_128L; off += 1 {
			
 
				+		tmp[off] = 0
			
 
				+	}
			
 
				+	out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0
			
 
				+	out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1
			
 
				+	update_hw_128l(st, out0, out1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,aes")
			
 
				+dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
			
 
				+	tmp: [_RATE_256]byte
			
 
				+	defer mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	z := z_hw_256(st)
			
 
				+	copy(tmp[:], cn)
			
 
				+
			
 
				+	cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
			
 
				+	xn_ := x86._mm_xor_si128(cn_, z)
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_)
			
 
				+	copy(xn, tmp[:])
			
 
				+
			
 
				+	for off := len(xn); off < _RATE_256; off += 1 {
			
 
				+		tmp[off] = 0
			
 
				+	}
			
 
				+	xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
			
 
				+	update_hw_256(st, xn_)
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
			
 
				+	xi, ci, l := dst, src, len(src)
			
 
				+
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		for l >= _RATE_128L {
			
 
				+			dec_hw_128l(st, xi, ci)
			
 
				+			xi = xi[_RATE_128L:]
			
 
				+			ci = ci[_RATE_128L:]
			
 
				+			l -= _RATE_128L
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		for l >= _RATE_256 {
			
 
				+			dec_hw_256(st, xi, ci)
			
 
				+			xi = xi[_RATE_256:]
			
 
				+			ci = ci[_RATE_256:]
			
 
				+			l -= _RATE_256
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Process the remainder.
			
 
				+	if l > 0 {
			
 
				+		switch st.rate {
			
 
				+		case _RATE_128L:
			
 
				+			dec_partial_hw_128l(st, xi, ci)
			
 
				+		case _RATE_256:
			
 
				+			dec_partial_hw_256(st, xi, ci)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
			
 
				+	tmp: [16]byte
			
 
				+	endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
			
 
				+	endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
			
 
				+
			
 
				+	t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
			
 
				+
			
 
				+	t0, t1: x86.__m128i = ---, ---
			
 
				+	switch st.rate {
			
 
				+	case _RATE_128L:
			
 
				+		t = x86._mm_xor_si128(st.s2, t)
			
 
				+		for _ in 0 ..< 7 {
			
 
				+			update_hw_128l(st, t, t)
			
 
				+		}
			
 
				+
			
 
				+		t0 = x86._mm_xor_si128(st.s0, st.s1)
			
 
				+		t0 = x86._mm_xor_si128(t0, st.s2)
			
 
				+		t0 = x86._mm_xor_si128(t0, st.s3)
			
 
				+
			
 
				+		t1 = x86._mm_xor_si128(st.s4, st.s5)
			
 
				+		t1 = x86._mm_xor_si128(t1, st.s6)
			
 
				+		if len(tag) == TAG_SIZE_256 {
			
 
				+			t1 = x86._mm_xor_si128(t1, st.s7)
			
 
				+		}
			
 
				+	case _RATE_256:
			
 
				+		t = x86._mm_xor_si128(st.s3, t)
			
 
				+		for _ in 0 ..< 7 {
			
 
				+			update_hw_256(st, t)
			
 
				+		}
			
 
				+
			
 
				+		t0 = x86._mm_xor_si128(st.s0, st.s1)
			
 
				+		t0 = x86._mm_xor_si128(t0, st.s2)
			
 
				+
			
 
				+		t1 = x86._mm_xor_si128(st.s3, st.s4)
			
 
				+		t1 = x86._mm_xor_si128(t1, st.s5)
			
 
				+	}
			
 
				+	switch len(tag) {
			
 
				+	case TAG_SIZE_128:
			
 
				+		t0 = x86._mm_xor_si128(t0, t1)
			
 
				+		intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
			
 
				+	case TAG_SIZE_256:
			
 
				+		intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
			
 
				+		intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+reset_state_hw :: proc "contextless" (st: ^State_HW) {
			
 
				+	mem.zero_explicit(st, size_of(st^))
			
 
				+}
			
--- a/core/crypto/aes/aes_ctr.odin
+++ b/core/crypto/aes/aes_ctr.odin
@@ -21,9 +21,7 @@ Context_CTR :: struct {
 
				 
			
 
				 // init_ctr initializes a Context_CTR with the provided key and IV.
			
 
				 init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
			
 
				-	if len(iv) != CTR_IV_SIZE {
			
 
				-		panic("crypto/aes: invalid CTR IV size")
			
 
				-	}
			
 
				+	ensure(len(iv) == CTR_IV_SIZE, "crypto/aes: invalid CTR IV size")
			
 
				 
			
 
				 	init_impl(&ctx._impl, key, impl)
			
 
				 	ctx._off = BLOCK_SIZE
			
@@ -36,16 +34,14 @@ init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTAT
 
				 // keystream, and writes the resulting output to dst.  dst and src MUST
			
 
				 // alias exactly or not at all.
			
 
				 xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	src, dst := src, dst
			
 
				 	if dst_len := len(dst); dst_len < len(src) {
			
 
				 		src = src[:dst_len]
			
 
				 	}
			
 
				 
			
 
				-	if bytes.alias_inexactly(dst, src) {
			
 
				-		panic("crypto/aes: dst and src alias inexactly")
			
 
				-	}
			
 
				+	ensure(!bytes.alias_inexactly(dst, src), "crypto/aes: dst and src alias inexactly")
			
 
				 
			
 
				 	#no_bounds_check for remaining := len(src); remaining > 0; {
			
 
				 		// Process multiple blocks at once
			
@@ -82,7 +78,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
 
				 
			
 
				 // keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
			
 
				 keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	dst := dst
			
 
				 	#no_bounds_check for remaining := len(dst); remaining > 0; {
			
--- a/core/crypto/aes/aes_ecb.odin
+++ b/core/crypto/aes/aes_ecb.odin
@@ -19,11 +19,9 @@ init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION)
 
				 
			
 
				 // encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
			
 
				 encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				-
			
 
				-	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
			
 
				-		panic("crypto/aes: invalid buffer size(s)")
			
 
				-	}
			
 
				+	ensure(ctx._is_initialized)
			
 
				+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
			
 
				+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")
			
 
				 
			
 
				 	switch &impl in ctx._impl {
			
 
				 	case ct64.Context:
			
@@ -35,11 +33,9 @@ encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
 
				 
			
 
				 // decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
			
 
				 decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				-
			
 
				-	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
			
 
				-		panic("crypto/aes: invalid buffer size(s)")
			
 
				-	}
			
 
				+	ensure(ctx._is_initialized)
			
 
				+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
			
 
				+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")
			
 
				 
			
 
				 	switch &impl in ctx._impl {
			
 
				 	case ct64.Context:
			
--- a/core/crypto/aes/aes_gcm.odin
+++ b/core/crypto/aes/aes_gcm.odin
@@ -36,15 +36,11 @@ init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION)
 
				 //
			
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
			
 
				-	if len(dst) != len(plaintext) {
			
 
				-		panic("crypto/aes: invalid destination ciphertext size")
			
 
				-	}
			
 
				-	if bytes.alias_inexactly(dst, plaintext) {
			
 
				-		panic("crypto/aes: dst and plaintext alias inexactly")
			
 
				-	}
			
 
				+	ensure(len(dst) == len(plaintext), "crypto/aes: invalid destination ciphertext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aes: dst and plaintext alias inexactly")
			
 
				 
			
 
				 	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
			
 
				 		gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
			
@@ -76,15 +72,11 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 @(require_results)
			
 
				 open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
			
 
				-	if len(dst) != len(ciphertext) {
			
 
				-		panic("crypto/aes: invalid destination plaintext size")
			
 
				-	}
			
 
				-	if bytes.alias_inexactly(dst, ciphertext) {
			
 
				-		panic("crypto/aes: dst and ciphertext alias inexactly")
			
 
				-	}
			
 
				+	ensure(len(dst) == len(ciphertext), "crypto/aes: invalid destination plaintext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aes: dst and ciphertext alias inexactly")
			
 
				 
			
 
				 	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
			
 
				 		return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
			
@@ -122,21 +114,13 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
 
				 
			
 
				 @(private = "file")
			
 
				 gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
			
 
				-	if len(tag) != GCM_TAG_SIZE {
			
 
				-		panic("crypto/aes: invalid GCM tag size")
			
 
				-	}
			
 
				+	ensure(len(tag) == GCM_TAG_SIZE, "crypto/aes: invalid GCM tag size")
			
 
				 
			
 
				 	// The specification supports IVs in the range [1, 2^64) bits.
			
 
				-	if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
			
 
				-		panic("crypto/aes: invalid GCM IV size")
			
 
				-	}
			
 
				+	ensure(len(iv) == 0 || u64(len(iv)) <= GCM_IV_SIZE_MAX, "crypto/aes: invalid GCM IV size")
			
 
				 
			
 
				-	if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
			
 
				-		panic("crypto/aes: oversized GCM aad")
			
 
				-	}
			
 
				-	if text_len := u64(len(text)); text_len > GCM_P_MAX {
			
 
				-		panic("crypto/aes: oversized GCM src data")
			
 
				-	}
			
 
				+	ensure(u64(len(aad)) <= GCM_A_MAX, "crypto/aes: oversized GCM aad")
			
 
				+	ensure(u64(len(text)) <= GCM_P_MAX, "crypto/aes: oversized GCM data")
			
 
				 }
			
 
				 
			
 
				 @(private = "file")
			
--- a/core/crypto/aes/aes_gcm_hw_intel.odin
+++ b/core/crypto/aes/aes_gcm_hw_intel.odin
@@ -235,7 +235,7 @@ gctr_hw :: proc(
 
				 // BUG: Sticking this in gctr_hw (like the other implementations) crashes
			
 
				 // the compiler.
			
 
				 //
			
 
				-// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
			
 
				+// src/check_expr.cpp(8104): Assertion Failure: `c->curr_proc_decl->entity`
			
 
				 @(private = "file", enable_target_feature = "sse4.1")
			
 
				 hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
			
 
				 	ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
			
--- a/core/crypto/blake2b/blake2b.odin
+++ b/core/crypto/blake2b/blake2b.odin
@@ -18,7 +18,7 @@ package blake2b
 
				 import "../_blake2"
			
 
				 
			
 
				 // DIGEST_SIZE is the BLAKE2b digest size in bytes.
			
 
				-DIGEST_SIZE :: 64
			
 
				+DIGEST_SIZE :: _blake2.BLAKE2B_SIZE
			
 
				 
			
 
				 // BLOCK_SIZE is the BLAKE2b block size in bytes.
			
 
				 BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
			
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
 
				 Context :: _blake2.Blake2b_Context
			
 
				 
			
 
				 // init initializes a Context with the default BLAKE2b config.
			
 
				-init :: proc(ctx: ^Context) {
			
 
				+init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
			
 
				+	ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2b: invalid digest size")
			
 
				+
			
 
				 	cfg: _blake2.Blake2_Config
			
 
				-	cfg.size = _blake2.BLAKE2B_SIZE
			
 
				+	cfg.size = u8(digest_size)
			
 
				 	_blake2.init(ctx, &cfg)
			
 
				 }
			
 
				 
			
--- a/core/crypto/blake2s/blake2s.odin
+++ b/core/crypto/blake2s/blake2s.odin
@@ -18,7 +18,7 @@ package blake2s
 
				 import "../_blake2"
			
 
				 
			
 
				 // DIGEST_SIZE is the BLAKE2s digest size in bytes.
			
 
				-DIGEST_SIZE :: 32
			
 
				+DIGEST_SIZE :: _blake2.BLAKE2S_SIZE
			
 
				 
			
 
				 // BLOCK_SIZE is the BLAKE2s block size in bytes.
			
 
				 BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
			
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
 
				 Context :: _blake2.Blake2s_Context
			
 
				 
			
 
				 // init initializes a Context with the default BLAKE2s config.
			
 
				-init :: proc(ctx: ^Context) {
			
 
				+init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
			
 
				+	ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2s: invalid digest size")
			
 
				+
			
 
				 	cfg: _blake2.Blake2_Config
			
 
				-	cfg.size = _blake2.BLAKE2S_SIZE
			
 
				+	cfg.size = u8(digest_size)
			
 
				 	_blake2.init(ctx, &cfg)
			
 
				 }
			
 
				 
			
--- a/core/crypto/chacha20/chacha20.odin
+++ b/core/crypto/chacha20/chacha20.odin
@@ -27,12 +27,8 @@ Context :: struct {
 
				 // init inititializes a Context for ChaCha20 or XChaCha20 with the provided
			
 
				 // key and iv.
			
 
				 init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
			
 
				-	if len(key) != KEY_SIZE {
			
 
				-		panic("crypto/chacha20: invalid (X)ChaCha20 key size")
			
 
				-	}
			
 
				-	if l := len(iv); l != IV_SIZE && l != XIV_SIZE {
			
 
				-		panic("crypto/chacha20: invalid (X)ChaCha20 IV size")
			
 
				-	}
			
 
				+	ensure(len(key) == KEY_SIZE, "crypto/chacha20: invalid (X)ChaCha20 key size")
			
 
				+	ensure(len(iv) == IV_SIZE || len(iv) == XIV_SIZE, "crypto/chacha20: invalid (X)ChaCha20 IV size")
			
 
				 
			
 
				 	k, n := key, iv
			
 
				 
			
@@ -67,16 +63,14 @@ seek :: proc(ctx: ^Context, block_nr: u64) {
 
				 // keystream, and writes the resulting output to dst.  Dst and src MUST
			
 
				 // alias exactly or not at all.
			
 
				 xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
			
 
				-	assert(ctx._state._is_initialized)
			
 
				+	ensure(ctx._state._is_initialized)
			
 
				 
			
 
				 	src, dst := src, dst
			
 
				 	if dst_len := len(dst); dst_len < len(src) {
			
 
				 		src = src[:dst_len]
			
 
				 	}
			
 
				 
			
 
				-	if bytes.alias_inexactly(dst, src) {
			
 
				-		panic("crypto/chacha20: dst and src alias inexactly")
			
 
				-	}
			
 
				+	ensure(!bytes.alias_inexactly(dst, src), "crypto/chacha20: dst and src alias inexactly")
			
 
				 
			
 
				 	st := &ctx._state
			
 
				 	#no_bounds_check for remaining := len(src); remaining > 0; {
			
@@ -114,7 +108,7 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
 
				 
			
 
				 // keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
			
 
				 keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
			
 
				-	assert(ctx._state._is_initialized)
			
 
				+	ensure(ctx._state._is_initialized)
			
 
				 
			
 
				 	dst, st := dst, &ctx._state
			
 
				 	#no_bounds_check for remaining := len(dst); remaining > 0; {
			
--- a/core/crypto/chacha20poly1305/chacha20poly1305.odin
+++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin
@@ -29,13 +29,9 @@ _P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
 
				 
			
 
				 @(private)
			
 
				 _validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) {
			
 
				-	if len(tag) != TAG_SIZE {
			
 
				-		panic("crypto/chacha20poly1305: invalid destination tag size")
			
 
				-	}
			
 
				 	expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE
			
 
				-	if len(iv) != expected_iv_len {
			
 
				-		panic("crypto/chacha20poly1305: invalid IV size")
			
 
				-	}
			
 
				+	ensure(len(tag) == TAG_SIZE, "crypto/chacha20poly1305: invalid destination tag size")
			
 
				+	ensure(len(iv) == expected_iv_len, "crypto/chacha20poly1305: invalid IV size")
			
 
				 
			
 
				 	#assert(size_of(int) == 8 || size_of(int) <= 4)
			
 
				 	when size_of(int) == 8 {
			
@@ -45,13 +41,11 @@ _validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bo
 
				 		// A_MAX is limited by size_of(int), so there is no need to
			
 
				 		// enforce it. P_MAX only needs to be checked on 64-bit targets,
			
 
				 		// for reasons that should be obvious.
			
 
				-		if text_len := len(text); text_len > _P_MAX {
			
 
				-			panic("crypto/chacha20poly1305: oversized src data")
			
 
				-		}
			
 
				+		ensure(len(text) <= _P_MAX, "crypto/chacha20poly1305: oversized src data")
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 _PAD: [16]byte
			
 
				 
			
 
				 @(private)
			
@@ -71,9 +65,7 @@ Context :: struct {
 
				 
			
 
				 // init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305.
			
 
				 init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
			
 
				-	if len(key) != KEY_SIZE {
			
 
				-		panic("crypto/chacha20poly1305: invalid key size")
			
 
				-	}
			
 
				+	ensure(len(key) == KEY_SIZE, "crypto/chacha20poly1305: invalid key size")
			
 
				 
			
 
				 	copy(ctx._key[:], key)
			
 
				 	ctx._impl = impl
			
@@ -96,11 +88,11 @@ init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEM
 
				 //
			
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				 	ciphertext := dst
			
 
				 	_validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha)
			
 
				-	if len(ciphertext) != len(plaintext) {
			
 
				-		panic("crypto/chacha20poly1305: invalid destination ciphertext size")
			
 
				-	}
			
 
				+	ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination ciphertext size")
			
 
				 
			
 
				 	stream_ctx: chacha20.Context = ---
			
 
				 	chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl)
			
@@ -151,11 +143,11 @@ seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
 
				 // dst and plaintext MUST alias exactly or not at all.
			
 
				 @(require_results)
			
 
				 open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				 	plaintext := dst
			
 
				 	_validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha)
			
 
				-	if len(ciphertext) != len(plaintext) {
			
 
				-		panic("crypto/chacha20poly1305: invalid destination plaintext size")
			
 
				-	}
			
 
				+	ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination plaintext size")
			
 
				 
			
 
				 	// Note: Unlike encrypt, this can fail early, so use defer for
			
 
				 	// sanitization rather than assuming control flow reaches certain
			
--- a/core/crypto/deoxysii/deoxysii.odin
+++ b/core/crypto/deoxysii/deoxysii.odin
@@ -0,0 +1,280 @@
 
				+/*
			
 
				+package deoxysii implements the Deoxys-II-256 Authenticated Encryption
			
 
				+with Additional Data algorithm.
			
 
				+
			
 
				+- [[ https://sites.google.com/view/deoxyscipher ]]
			
 
				+- [[ https://thomaspeyrin.github.io/web/assets/docs/papers/Jean-etal-JoC2021.pdf ]]
			
 
				+*/
			
 
				+package deoxysii
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:bytes"
			
 
				+import "core:crypto/aes"
			
 
				+import "core:mem"
			
 
				+import "core:simd"
			
 
				+
			
 
				+// KEY_SIZE is the Deoxys-II-256 key size in bytes.
			
 
				+KEY_SIZE :: 32
			
 
				+// IV_SIZE iss the Deoxys-II-256 IV size in bytes.
			
 
				+IV_SIZE :: 15 // 120-bits
			
 
				+// TAG_SIZE is the Deoxys-II-256 tag size in bytes.
			
 
				+TAG_SIZE :: 16
			
 
				+
			
 
				+@(private)
			
 
				+PREFIX_AD_BLOCK :: 0b0010
			
 
				+@(private)
			
 
				+PREFIX_AD_FINAL :: 0b0110
			
 
				+@(private)
			
 
				+PREFIX_MSG_BLOCK :: 0b0000
			
 
				+@(private)
			
 
				+PREFIX_MSG_FINAL :: 0b0100
			
 
				+@(private)
			
 
				+PREFIX_TAG :: 0b0001
			
 
				+@(private)
			
 
				+PREFIX_SHIFT :: 4
			
 
				+
			
 
				+@(private)
			
 
				+BC_ROUNDS :: 16
			
 
				+@(private)
			
 
				+BLOCK_SIZE :: aes.BLOCK_SIZE
			
 
				+
			
 
				+@(private = "file")
			
 
				+_LFSR2_MASK :: simd.u8x16{
			
 
				+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
			
 
				+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
			
 
				+}
			
 
				+@(private = "file")
			
 
				+_LFSR3_MASK :: simd.u8x16{
			
 
				+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
			
 
				+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
			
 
				+}
			
 
				+@(private = "file")
			
 
				+_LFSR_SH1 :: _LFSR2_MASK
			
 
				+@(private = "file")
			
 
				+_LFSR_SH5 :: simd.u8x16{
			
 
				+	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
			
 
				+	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
			
 
				+}
			
 
				+@(private = "file")
			
 
				+_LFSR_SH7 :: simd.u8x16{
			
 
				+	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
			
 
				+	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
			
 
				+}
			
 
				+@(private = "file", rodata)
			
 
				+_RCONS := []byte {
			
 
				+	0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a,
			
 
				+	0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
			
 
				+	0x72,
			
 
				+}
			
 
				+
			
 
				+// Context is a keyed Deoxys-II-256 instance.
			
 
				+Context :: struct {
			
 
				+	_subkeys:        [BC_ROUNDS+1][16]byte,
			
 
				+	_impl:           aes.Implementation,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
			
 
				+	ensure(len(tag) == TAG_SIZE, "crypto/deoxysii: invalid tag size")
			
 
				+	ensure(len(iv) == IV_SIZE, "crypto/deoxysii: invalid IV size")
			
 
				+
			
 
				+	#assert(size_of(int) == 8 || size_of(int) <= 4)
			
 
				+	// For the nonce-misuse resistant mode, the total size of the
			
 
				+	// associated data and the total size of the message do not exceed
			
 
				+	// `16 * 2^max_l * 2^max_m bytes`, thus 2^128 bytes for all variants
			
 
				+	// of Deoxys-II. Moreover, the maximum number of messages that can
			
 
				+	// be handled for a same key is 2^max_m, that is 2^64 for all variants
			
 
				+	// of Deoxys.
			
 
				+}
			
 
				+
			
 
				+// init initializes a Context with the provided key.
			
 
				+init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
			
 
				+	ensure(len(key) == KEY_SIZE, "crypto/deoxysii: invalid key size")
			
 
				+
			
 
				+	ctx._impl = impl
			
 
				+	if ctx._impl == .Hardware && !is_hardware_accelerated() {
			
 
				+		ctx._impl = .Portable
			
 
				+	}
			
 
				+
			
 
				+	derive_ks(ctx, key)
			
 
				+
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+// seal encrypts the plaintext and authenticates the aad and ciphertext,
			
 
				+// with the provided Context and iv, stores the output in dst and tag.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				+	_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
			
 
				+	ensure(len(dst) == len(plaintext), "crypto/deoxysii: invalid destination ciphertext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/deoxysii: dst and plaintext alias inexactly")
			
 
				+
			
 
				+	switch ctx._impl {
			
 
				+	case .Hardware:
			
 
				+		e_hw(ctx, dst, tag, iv, aad, plaintext)
			
 
				+	case .Portable:
			
 
				+		e_ref(ctx, dst, tag, iv, aad, plaintext)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// open authenticates the aad and ciphertext, and decrypts the ciphertext,
			
 
				+// with the provided Context, iv, and tag, and stores the output in dst,
			
 
				+// returning true iff the authentication was successful.  If authentication
			
 
				+// fails, the destination buffer will be zeroed.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+@(require_results)
			
 
				+open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	ensure(ctx._is_initialized)
			
 
				+
			
 
				+	_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
			
 
				+	ensure(len(dst) == len(ciphertext), "crypto/deoxysii: invalid destination plaintext size")
			
 
				+	ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/deoxysii: dst and ciphertext alias inexactly")
			
 
				+
			
 
				+	ok: bool
			
 
				+	switch ctx._impl {
			
 
				+	case .Hardware:
			
 
				+		ok = d_hw(ctx, dst, iv, aad, ciphertext, tag)
			
 
				+	case .Portable:
			
 
				+		ok = d_ref(ctx, dst, iv, aad, ciphertext, tag)
			
 
				+	}
			
 
				+	if !ok {
			
 
				+		mem.zero_explicit(raw_data(dst), len(ciphertext))
			
 
				+	}
			
 
				+
			
 
				+	return ok
			
 
				+}
			
 
				+
			
 
				+// reset sanitizes the Context.  The Context must be
			
 
				+// re-initialized to be used again.
			
 
				+reset :: proc "contextless" (ctx: ^Context) {
			
 
				+	mem.zero_explicit(&ctx._subkeys, len(ctx._subkeys))
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+derive_ks :: proc "contextless" (ctx: ^Context, key: []byte) {
			
 
				+	// Derive the constant component of each subtweakkey.
			
 
				+	//
			
 
				+	// The key schedule is as thus:
			
 
				+	//
			
 
				+	//   STK_i = TK1_i ^ TK2_i ^ TK3_i ^ RC_i
			
 
				+	//
			
 
				+	//   TK1_i = h(TK1_(i-1))
			
 
				+	//   TK2_i = h(LFSR2(TK2_(i-1)))
			
 
				+	//   TK3_i = h(LFSR3(TK2_(i-1)))
			
 
				+	//
			
 
				+	// where:
			
 
				+	//
			
 
				+	//   KT = K || T
			
 
				+	//   W3 = KT[:16]
			
 
				+	//   W2 = KT[16:32]
			
 
				+	//   W1 = KT[32:]
			
 
				+	//
			
 
				+	//   TK1_0 = W1
			
 
				+	//   TK2_0 = W2
			
 
				+	//   TK3_0 = W3
			
 
				+	//
			
 
				+	// As `K` is fixed per Context, the XORs of `TK3_0 .. TK3_n`,
			
 
				+	// `TK2_0 .. TK2_n` and RC_i can be precomputed in advance like
			
 
				+	// thus:
			
 
				+	//
			
 
				+	//   subkey_i = TK3_i ^ TK2_i ^ RC_i
			
 
				+	//
			
 
				+	// When it is time to actually call Deoxys-BC-384, it is then
			
 
				+	// a simple matter of deriving each round subtweakkey via:
			
 
				+	//
			
 
				+	//   TK1_0 = T (Tweak)
			
 
				+	//   STK_0 = subkey_0 ^ TK1_0
			
 
				+	//   STK_i = subkey_i (precomputed) ^ H(TK1_(i-1))
			
 
				+	//
			
 
				+	// We opt to use SIMD here and for the subtweakkey deriviation
			
 
				+	// as `H()` is typically a single vector instruction.
			
 
				+
			
 
				+	tk2 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key[16:])))
			
 
				+	tk3 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key)))
			
 
				+
			
 
				+	// subkey_0 does not apply LFSR2/3 or H.
			
 
				+	intrinsics.unaligned_store(
			
 
				+		(^simd.u8x16)(&ctx._subkeys[0]),
			
 
				+		simd.bit_xor(
			
 
				+			tk2,
			
 
				+			simd.bit_xor(
			
 
				+				tk3,
			
 
				+				rcon(0),
			
 
				+			),
			
 
				+		),
			
 
				+	)
			
 
				+
			
 
				+	// Precompute k_1 .. k_16.
			
 
				+	for i in 1 ..< BC_ROUNDS+1 {
			
 
				+		tk2 = h(lfsr2(tk2))
			
 
				+		tk3 = h(lfsr3(tk3))
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^simd.u8x16)(&ctx._subkeys[i]),
			
 
				+			simd.bit_xor(
			
 
				+				tk2,
			
 
				+				simd.bit_xor(
			
 
				+					tk3,
			
 
				+					rcon(i),
			
 
				+				),
			
 
				+			),
			
 
				+		)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+lfsr2 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 {
			
 
				+	// LFSR2 is a application of the following LFSR to each byte of input.
			
 
				+	// (x7||x6||x5||x4||x3||x2||x1||x0) -> (x6||x5||x4||x3||x2||x1||x0||x7 ^ x5)
			
 
				+	return simd.bit_or(
			
 
				+		simd.shl(tk, _LFSR_SH1),
			
 
				+		simd.bit_and(
			
 
				+			simd.bit_xor(
			
 
				+				simd.shr(tk, _LFSR_SH7), // x7
			
 
				+				simd.shr(tk, _LFSR_SH5), // x5
			
 
				+			),
			
 
				+			_LFSR2_MASK,
			
 
				+		),
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+lfsr3 :: #force_inline proc "contextless"  (tk: simd.u8x16) -> simd.u8x16 {
			
 
				+	// LFSR3 is a application of the following LFSR to each byte of input.
			
 
				+	// (x7||x6||x5||x4||x3||x2||x1||x0) -> (x0 ^ x6||x7||x6||x5||x4||x3||x2||x1)
			
 
				+	return simd.bit_or(
			
 
				+		simd.shr(tk, _LFSR_SH1),
			
 
				+		simd.bit_and(
			
 
				+			simd.bit_xor(
			
 
				+				simd.shl(tk, _LFSR_SH7), // x0
			
 
				+				simd.shl(tk, _LFSR_SH1), // x6
			
 
				+			),
			
 
				+			_LFSR3_MASK,
			
 
				+		),
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+h :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 {
			
 
				+	return simd.swizzle(
			
 
				+		tk,
			
 
				+		0x01, 0x06, 0x0b, 0x0c, 0x05, 0x0a, 0x0f, 0x00,
			
 
				+		0x09, 0x0e, 0x03, 0x04, 0x0d, 0x02, 0x07, 0x08,
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+rcon :: #force_inline proc "contextless" (rd: int) -> simd.u8x16 #no_bounds_check {
			
 
				+	rc := _RCONS[rd]
			
 
				+	return simd.u8x16{
			
 
				+		1, 2, 4, 8,
			
 
				+		rc, rc, rc, rc,
			
 
				+		0, 0, 0, 0,
			
 
				+		0, 0, 0, 0,
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/deoxysii/deoxysii_impl_ct64.odin
+++ b/core/crypto/deoxysii/deoxysii_impl_ct64.odin
@@ -0,0 +1,399 @@
 
				+package deoxysii
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto"
			
 
				+import aes "core:crypto/_aes/ct64"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+import "core:simd"
			
 
				+
			
 
				+// This uses the bitlsiced 64-bit general purpose register SWAR AES
			
 
				+// round function.  The encryption pass skips orthogonalizing the
			
 
				+// AES round function input as it is aways going to be the leading 0
			
 
				+// padded IV, and doing a 64-byte copy is faster.
			
 
				+
			
 
				+@(private = "file")
			
 
				+TWEAK_SIZE :: 16
			
 
				+
			
 
				+@(private = "file")
			
 
				+State_SW :: struct {
			
 
				+	ctx:        ^Context,
			
 
				+	q_stk, q_b: [8]u64,
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+auth_tweak :: #force_inline proc "contextless" (
			
 
				+	dst: ^[TWEAK_SIZE]byte,
			
 
				+	prefix: byte,
			
 
				+	block_nr: int,
			
 
				+) {
			
 
				+	endian.unchecked_put_u64be(dst[8:], u64(block_nr))
			
 
				+	endian.unchecked_put_u64le(dst[0:], u64(prefix) << PREFIX_SHIFT) // dst[0] = prefix << PREFIX_SHIFT
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+enc_tweak :: #force_inline proc "contextless" (
			
 
				+	dst: ^[TWEAK_SIZE]byte,
			
 
				+	tag: ^[TAG_SIZE]byte,
			
 
				+	block_nr: int,
			
 
				+) {
			
 
				+	tmp: [8]byte
			
 
				+	endian.unchecked_put_u64be(tmp[:], u64(block_nr))
			
 
				+
			
 
				+	copy(dst[:], tag[:])
			
 
				+	dst[0] |= 0x80
			
 
				+	for i in 0 ..< 8 {
			
 
				+		dst[i+8] ~= tmp[i]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+enc_plaintext :: #force_inline proc "contextless" (
			
 
				+	dst: ^[8]u64,
			
 
				+	iv:  []byte,
			
 
				+) {
			
 
				+	tmp: [BLOCK_SIZE]byte = ---
			
 
				+	tmp[0] = 0
			
 
				+	copy(tmp[1:], iv[:])
			
 
				+
			
 
				+	q_0, q_1 := aes.load_interleaved(tmp[:])
			
 
				+	for i in 0 ..< 4 {
			
 
				+		dst[i], dst[i+4] = q_0, q_1
			
 
				+	}
			
 
				+	aes.orthogonalize(dst)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+bc_x4 :: proc "contextless" (
			
 
				+	ctx:     ^Context,
			
 
				+	dst:     []byte,
			
 
				+	tweaks:  ^[4][TWEAK_SIZE]byte,
			
 
				+	q_stk:   ^[8]u64,
			
 
				+	q_b:     ^[8]u64, // Orthogonalized
			
 
				+	n:       int,
			
 
				+) {
			
 
				+	tk1s: [4]simd.u8x16
			
 
				+	for j in 0 ..< n {
			
 
				+		tk1s[j] = intrinsics.unaligned_load((^simd.u8x16)(&tweaks[j]))
			
 
				+	}
			
 
				+
			
 
				+	// Deoxys-BC-384
			
 
				+	for i in 0 ..= BC_ROUNDS {
			
 
				+		// Derive the round's subtweakkey
			
 
				+		sk := intrinsics.unaligned_load((^simd.u8x16)(&ctx._subkeys[i]))
			
 
				+		for j in 0 ..< n {
			
 
				+			if i != 0 {
			
 
				+				tk1s[j] = h(tk1s[j])
			
 
				+			}
			
 
				+			intrinsics.unaligned_store(
			
 
				+				(^simd.u8x16)(raw_data(dst)),
			
 
				+				simd.bit_xor(sk, tk1s[j]),
			
 
				+			)
			
 
				+			q_stk[j], q_stk[j+4] = aes.load_interleaved(dst[:])
			
 
				+		}
			
 
				+		aes.orthogonalize(q_stk)
			
 
				+
			
 
				+		if i != 0 {
			
 
				+			aes.sub_bytes(q_b)
			
 
				+			aes.shift_rows(q_b)
			
 
				+			aes.mix_columns(q_b)
			
 
				+		}
			
 
				+		aes.add_round_key(q_b, q_stk[:])
			
 
				+	}
			
 
				+
			
 
				+	aes.orthogonalize(q_b)
			
 
				+	for i in 0 ..< n {
			
 
				+		aes.store_interleaved(dst[i*BLOCK_SIZE:], q_b[i], q_b[i+4])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results)
			
 
				+bc_absorb :: proc "contextless" (
			
 
				+	st:           ^State_SW,
			
 
				+	dst:          []byte,
			
 
				+	src:          []byte,
			
 
				+	tweak_prefix: byte,
			
 
				+	stk_block_nr: int,
			
 
				+) -> int {
			
 
				+	tweaks: [4][TWEAK_SIZE]byte = ---
			
 
				+	tmp: [BLOCK_SIZE*4]byte = ---
			
 
				+
			
 
				+	src, stk_block_nr := src, stk_block_nr
			
 
				+	dst_ := intrinsics.unaligned_load((^simd.u8x16)(raw_data(dst)))
			
 
				+
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks > 0 {
			
 
				+		// Derive the tweak(s), orthogonalize the plaintext
			
 
				+		n := min(nr_blocks, 4)
			
 
				+		for i in 0 ..< n {
			
 
				+			auth_tweak(&tweaks[i], tweak_prefix, stk_block_nr + i)
			
 
				+			st.q_b[i], st.q_b[i + 4] = aes.load_interleaved(src)
			
 
				+			src = src[BLOCK_SIZE:]
			
 
				+		}
			
 
				+		aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+		// Deoxys-BC-384
			
 
				+		bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n)
			
 
				+
			
 
				+		// XOR in the existing Auth/tag
			
 
				+		for i in 0 ..< n {
			
 
				+			dst_ = simd.bit_xor(
			
 
				+				dst_,
			
 
				+				intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))),
			
 
				+			)
			
 
				+		}
			
 
				+
			
 
				+		stk_block_nr += n
			
 
				+		nr_blocks -= n
			
 
				+	}
			
 
				+
			
 
				+	intrinsics.unaligned_store((^simd.u8x16)(raw_data(dst)), dst_)
			
 
				+
			
 
				+	mem.zero_explicit(&tweaks, size_of(tweaks))
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	return stk_block_nr
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+bc_final :: proc "contextless" (
			
 
				+	st:  ^State_SW,
			
 
				+	dst: []byte,
			
 
				+	iv:  []byte,
			
 
				+) {
			
 
				+	tweaks: [4][TWEAK_SIZE]byte = ---
			
 
				+
			
 
				+	tweaks[0][0] = PREFIX_TAG << PREFIX_SHIFT
			
 
				+	copy(tweaks[0][1:], iv)
			
 
				+
			
 
				+	st.q_b[0], st.q_b[4] = aes.load_interleaved(dst)
			
 
				+	aes.orthogonalize(&st.q_b)
			
 
				+
			
 
				+	bc_x4(st.ctx, dst, &tweaks, &st.q_stk, &st.q_b, 1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results)
			
 
				+bc_encrypt :: proc "contextless" (
			
 
				+	st:           ^State_SW,
			
 
				+	dst:          []byte,
			
 
				+	src:          []byte,
			
 
				+	q_n:          ^[8]u64, // Orthogonalized
			
 
				+	tweak_tag:    ^[TAG_SIZE]byte,
			
 
				+	stk_block_nr: int,
			
 
				+) -> int {
			
 
				+	tweaks: [4][TWEAK_SIZE]byte = ---
			
 
				+	tmp: [BLOCK_SIZE*4]byte = ---
			
 
				+
			
 
				+	dst, src, stk_block_nr := dst, src, stk_block_nr
			
 
				+
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks > 0 {
			
 
				+		// Derive the tweak(s)
			
 
				+		n := min(nr_blocks, 4)
			
 
				+		for i in 0 ..< n {
			
 
				+			enc_tweak(&tweaks[i], tweak_tag, stk_block_nr + i)
			
 
				+		}
			
 
				+		st.q_b = q_n^ // The plaintext is always `0^8 || N`
			
 
				+
			
 
				+		// Deoxys-BC-384
			
 
				+		bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n)
			
 
				+
			
 
				+		// XOR the ciphertext
			
 
				+		for i in 0 ..< n {
			
 
				+			intrinsics.unaligned_store(
			
 
				+				(^simd.u8x16)(raw_data(dst[i*BLOCK_SIZE:])),
			
 
				+				simd.bit_xor(
			
 
				+					intrinsics.unaligned_load((^simd.u8x16)(raw_data(src[i*BLOCK_SIZE:]))),
			
 
				+					intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))),
			
 
				+				),
			
 
				+			)
			
 
				+		}
			
 
				+
			
 
				+		dst, src = dst[n*BLOCK_SIZE:], src[n*BLOCK_SIZE:]
			
 
				+		stk_block_nr += n
			
 
				+		nr_blocks -= n
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&tweaks, size_of(tweaks))
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	return stk_block_nr
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+e_ref :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
			
 
				+	st: State_SW = ---
			
 
				+	st.ctx = ctx
			
 
				+
			
 
				+	// Algorithm 3
			
 
				+	//
			
 
				+	// Associated data
			
 
				+	// A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n
			
 
				+	// Auth <- 0^n
			
 
				+	// for i = 0 to la − 1 do
			
 
				+	//   Auth <- Auth ^ EK(0010 || i, A_i+1)
			
 
				+	// end
			
 
				+	// if A_∗ != nil then
			
 
				+	//   Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗))
			
 
				+	// end
			
 
				+	auth: [TAG_SIZE]byte
			
 
				+	aad := aad
			
 
				+	n := bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0)
			
 
				+	aad = aad[n*BLOCK_SIZE:]
			
 
				+	if l := len(aad); l > 0 {
			
 
				+		a_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(a_star[:], aad)
			
 
				+		a_star[l] = 0x80
			
 
				+
			
 
				+		_ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n)
			
 
				+	}
			
 
				+
			
 
				+	// Message authentication and tag generation
			
 
				+	// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
			
 
				+	// tag <- Auth
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   tag <- tag ^ EK(0000 || j, M_j+1)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   tag <- tag ^ EK(0100 || l, pad10∗(M_∗))
			
 
				+	// end
			
 
				+	// tag <- EK(0001 || 0^4 || N, tag)
			
 
				+	m := plaintext
			
 
				+	n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		m_star[l] = 0x80
			
 
				+
			
 
				+		_ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n)
			
 
				+	}
			
 
				+	bc_final(&st, auth[:], iv)
			
 
				+
			
 
				+	// Message encryption
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N)
			
 
				+	// end
			
 
				+	//
			
 
				+	// return (C_1 || ... || C_l || C_∗, tag)
			
 
				+	q_iv: [8]u64 = ---
			
 
				+	enc_plaintext(&q_iv, iv)
			
 
				+
			
 
				+	m = plaintext
			
 
				+	n = bc_encrypt(&st, dst, m, &q_iv, &auth, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		_ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n)
			
 
				+
			
 
				+		copy(dst[n*BLOCK_SIZE:], m_star[:])
			
 
				+
			
 
				+		mem.zero_explicit(&m_star, size_of(m_star))
			
 
				+	}
			
 
				+
			
 
				+	copy(tag, auth[:])
			
 
				+
			
 
				+	mem.zero_explicit(&st.q_stk, size_of(st.q_stk))
			
 
				+	mem.zero_explicit(&st.q_b, size_of(st.q_b))
			
 
				+}
			
 
				+
			
 
				+@(private, require_results)
			
 
				+d_ref :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	st: State_SW = ---
			
 
				+	st.ctx = ctx
			
 
				+
			
 
				+	// Algorithm 4
			
 
				+	//
			
 
				+	// Message decryption
			
 
				+	// C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N)
			
 
				+	// end
			
 
				+	// if C_∗ != nil then
			
 
				+	//   M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N)
			
 
				+	// end
			
 
				+	q_iv: [8]u64 = ---
			
 
				+	enc_plaintext(&q_iv, iv)
			
 
				+
			
 
				+	auth: [TAG_SIZE]byte
			
 
				+	copy(auth[:], tag)
			
 
				+
			
 
				+	m := ciphertext
			
 
				+	n := bc_encrypt(&st, dst, m, &q_iv, &auth, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		_ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n)
			
 
				+
			
 
				+		copy(dst[n*BLOCK_SIZE:], m_star[:])
			
 
				+
			
 
				+		mem.zero_explicit(&m_star, size_of(m_star))
			
 
				+	}
			
 
				+
			
 
				+	// Associated data
			
 
				+	// A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n
			
 
				+	// Auth <- 0
			
 
				+	// for i = 0 to la − 1 do
			
 
				+	//   Auth <- Auth ^ EK(0010 || i, A_i+1)
			
 
				+	// end
			
 
				+	// if A∗ != nil then
			
 
				+	//   Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗))
			
 
				+	// end
			
 
				+	auth = 0
			
 
				+	aad := aad
			
 
				+	n = bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0)
			
 
				+	aad = aad[n*BLOCK_SIZE:]
			
 
				+	if l := len(aad); l > 0 {
			
 
				+		a_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(a_star[:], aad)
			
 
				+		a_star[l] = 0x80
			
 
				+
			
 
				+		_ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n)
			
 
				+	}
			
 
				+
			
 
				+	// Message authentication and tag generation
			
 
				+	// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
			
 
				+	// tag0 <- Auth
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   tag0 <- tag0 ^ EK(0000 || j, M_j+1)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗))
			
 
				+	// end
			
 
				+	// tag0 <- EK(0001 || 0^4 || N, tag0)
			
 
				+	m = dst[:len(ciphertext)]
			
 
				+	n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		m_star[l] = 0x80
			
 
				+
			
 
				+		_ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n)
			
 
				+
			
 
				+		mem.zero_explicit(&m_star, size_of(m_star))
			
 
				+	}
			
 
				+	bc_final(&st, auth[:], iv)
			
 
				+
			
 
				+	// Tag verification
			
 
				+	// if tag0 = tag then return (M_1 || ... || M_l || M_∗)
			
 
				+	// else return false
			
 
				+	ok := crypto.compare_constant_time(auth[:], tag) == 1
			
 
				+
			
 
				+	mem.zero_explicit(&auth, size_of(auth))
			
 
				+	mem.zero_explicit(&st.q_stk, size_of(st.q_stk))
			
 
				+	mem.zero_explicit(&st.q_b, size_of(st.q_b))
			
 
				+
			
 
				+	return ok
			
 
				+}
			
--- a/core/crypto/deoxysii/deoxysii_impl_hw_gen.odin
+++ b/core/crypto/deoxysii/deoxysii_impl_hw_gen.odin
@@ -0,0 +1,21 @@
 
				+#+build !amd64
			
 
				+package deoxysii
			
 
				+
			
 
				+@(private = "file")
			
 
				+ERR_HW_NOT_SUPPORTED :: "crypto/deoxysii: hardware implementation unsupported"
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private, require_results)
			
 
				+d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
--- a/core/crypto/deoxysii/deoxysii_impl_hw_intel.odin
+++ b/core/crypto/deoxysii/deoxysii_impl_hw_intel.odin
@@ -0,0 +1,434 @@
 
				+#+build amd64
			
 
				+package deoxysii
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/aes"
			
 
				+import "core:mem"
			
 
				+import "core:simd"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+// This processes a maximum of 4 blocks at a time, as that is suitable
			
 
				+// for most current hardware that doesn't say "Xeon".
			
 
				+
			
 
				+@(private = "file")
			
 
				+_BIT_ENC :: x86.__m128i{0x80, 0}
			
 
				+@(private = "file")
			
 
				+_PREFIX_AD_BLOCK :: x86.__m128i{PREFIX_AD_BLOCK << PREFIX_SHIFT, 0}
			
 
				+@(private = "file")
			
 
				+_PREFIX_AD_FINAL :: x86.__m128i{PREFIX_AD_FINAL << PREFIX_SHIFT, 0}
			
 
				+@(private = "file")
			
 
				+_PREFIX_MSG_BLOCK :: x86.__m128i{PREFIX_MSG_BLOCK << PREFIX_SHIFT, 0}
			
 
				+@(private = "file")
			
 
				+_PREFIX_MSG_FINAL :: x86.__m128i{PREFIX_MSG_FINAL << PREFIX_SHIFT, 0}
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return aes.is_hardware_accelerated()
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse4.1", require_results)
			
 
				+auth_tweak :: #force_inline proc "contextless" (
			
 
				+	prefix:   x86.__m128i,
			
 
				+	block_nr: int,
			
 
				+) -> x86.__m128i {
			
 
				+	return x86._mm_insert_epi64(prefix, i64(intrinsics.byte_swap(u64(block_nr))), 1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2", require_results)
			
 
				+enc_tweak :: #force_inline proc "contextless" (
			
 
				+	tag:      x86.__m128i,
			
 
				+	block_nr: int,
			
 
				+) -> x86.__m128i {
			
 
				+	return x86._mm_xor_si128(
			
 
				+		x86._mm_or_si128(tag, _BIT_ENC),
			
 
				+		x86.__m128i{0, i64(intrinsics.byte_swap(u64(block_nr)))},
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "ssse3", require_results)
			
 
				+h_ :: #force_inline proc "contextless" (tk1: x86.__m128i) -> x86.__m128i {
			
 
				+	return transmute(x86.__m128i)h(transmute(simd.u8x16)tk1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
			
 
				+bc_x4 :: #force_inline proc "contextless" (
			
 
				+	ctx: ^Context,
			
 
				+	s_0, s_1, s_2, s_3:                 x86.__m128i,
			
 
				+	tweak_0, tweak_1, tweak_2, tweak_3: x86.__m128i,
			
 
				+) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) #no_bounds_check {
			
 
				+	s_0, s_1, s_2, s_3 := s_0, s_1, s_2, s_3
			
 
				+	tk1_0, tk1_1, tk1_2, tk1_3 := tweak_0, tweak_1, tweak_2, tweak_3
			
 
				+
			
 
				+	sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0]))
			
 
				+	stk_0 := x86._mm_xor_si128(tk1_0, sk)
			
 
				+	stk_1 := x86._mm_xor_si128(tk1_1, sk)
			
 
				+	stk_2 := x86._mm_xor_si128(tk1_2, sk)
			
 
				+	stk_3 := x86._mm_xor_si128(tk1_3, sk)
			
 
				+
			
 
				+	s_0 = x86._mm_xor_si128(s_0, stk_0)
			
 
				+	s_1 = x86._mm_xor_si128(s_1, stk_1)
			
 
				+	s_2 = x86._mm_xor_si128(s_2, stk_2)
			
 
				+	s_3 = x86._mm_xor_si128(s_3, stk_3)
			
 
				+
			
 
				+	for i in 1 ..= BC_ROUNDS {
			
 
				+		sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i]))
			
 
				+
			
 
				+		tk1_0 = h_(tk1_0)
			
 
				+		tk1_1 = h_(tk1_1)
			
 
				+		tk1_2 = h_(tk1_2)
			
 
				+		tk1_3 = h_(tk1_3)
			
 
				+
			
 
				+		stk_0 = x86._mm_xor_si128(tk1_0, sk)
			
 
				+		stk_1 = x86._mm_xor_si128(tk1_1, sk)
			
 
				+		stk_2 = x86._mm_xor_si128(tk1_2, sk)
			
 
				+		stk_3 = x86._mm_xor_si128(tk1_3, sk)
			
 
				+
			
 
				+		s_0 = x86._mm_aesenc_si128(s_0, stk_0)
			
 
				+		s_1 = x86._mm_aesenc_si128(s_1, stk_1)
			
 
				+		s_2 = x86._mm_aesenc_si128(s_2, stk_2)
			
 
				+		s_3 = x86._mm_aesenc_si128(s_3, stk_3)
			
 
				+	}
			
 
				+
			
 
				+	return s_0, s_1, s_2, s_3
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
			
 
				+bc_x1 :: #force_inline proc "contextless" (
			
 
				+	ctx:   ^Context,
			
 
				+	s:     x86.__m128i,
			
 
				+	tweak: x86.__m128i,
			
 
				+) -> x86.__m128i #no_bounds_check {
			
 
				+	s, tk1 := s, tweak
			
 
				+
			
 
				+	sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0]))
			
 
				+	stk := x86._mm_xor_si128(tk1, sk)
			
 
				+
			
 
				+	s = x86._mm_xor_si128(s, stk)
			
 
				+
			
 
				+	for i in 1 ..= BC_ROUNDS {
			
 
				+		sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i]))
			
 
				+
			
 
				+		tk1 = h_(tk1)
			
 
				+
			
 
				+		stk = x86._mm_xor_si128(tk1, sk)
			
 
				+
			
 
				+		s = x86._mm_aesenc_si128(s, stk)
			
 
				+	}
			
 
				+
			
 
				+	return s
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,ssse3,sse4.1,aes", require_results)
			
 
				+bc_absorb :: proc "contextless" (
			
 
				+	ctx:          ^Context,
			
 
				+	tag:          x86.__m128i,
			
 
				+	src:          []byte,
			
 
				+	tweak_prefix: x86.__m128i,
			
 
				+	stk_block_nr: int,
			
 
				+) -> (x86.__m128i, int) #no_bounds_check {
			
 
				+	src, stk_block_nr, tag := src, stk_block_nr, tag
			
 
				+
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks >= 4 {
			
 
				+		d_0, d_1, d_2, d_3 := bc_x4(
			
 
				+			ctx,
			
 
				+			intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
			
 
				+			intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))),
			
 
				+			intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))),
			
 
				+			intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))),
			
 
				+			auth_tweak(tweak_prefix, stk_block_nr),
			
 
				+			auth_tweak(tweak_prefix, stk_block_nr + 1),
			
 
				+			auth_tweak(tweak_prefix, stk_block_nr + 2),
			
 
				+			auth_tweak(tweak_prefix, stk_block_nr + 3),
			
 
				+		)
			
 
				+
			
 
				+		tag = x86._mm_xor_si128(tag, d_0)
			
 
				+		tag = x86._mm_xor_si128(tag, d_1)
			
 
				+		tag = x86._mm_xor_si128(tag, d_2)
			
 
				+		tag = x86._mm_xor_si128(tag, d_3)
			
 
				+
			
 
				+		src = src[4*BLOCK_SIZE:]
			
 
				+		stk_block_nr += 4
			
 
				+		nr_blocks -= 4
			
 
				+	}
			
 
				+
			
 
				+	for nr_blocks > 0 {
			
 
				+		d := bc_x1(
			
 
				+			ctx,
			
 
				+			intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
			
 
				+			auth_tweak(tweak_prefix, stk_block_nr),
			
 
				+		)
			
 
				+
			
 
				+		tag = x86._mm_xor_si128(tag, d)
			
 
				+
			
 
				+		src = src[BLOCK_SIZE:]
			
 
				+		stk_block_nr += 1
			
 
				+		nr_blocks -= 1
			
 
				+	}
			
 
				+
			
 
				+	return tag, stk_block_nr
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
			
 
				+bc_final :: proc "contextless" (
			
 
				+	ctx: ^Context,
			
 
				+	tag: x86.__m128i,
			
 
				+	iv:  []byte,
			
 
				+) -> x86.__m128i {
			
 
				+	tmp: [BLOCK_SIZE]byte
			
 
				+
			
 
				+	tmp[0] = PREFIX_TAG << PREFIX_SHIFT
			
 
				+	copy(tmp[1:], iv)
			
 
				+
			
 
				+	tweak := intrinsics.unaligned_load((^x86.__m128i)(&tmp))
			
 
				+
			
 
				+	return bc_x1(ctx, tag, tweak)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
			
 
				+bc_encrypt :: proc "contextless" (
			
 
				+	ctx:          ^Context,
			
 
				+	dst:          []byte,
			
 
				+	src:          []byte,
			
 
				+	iv:           x86.__m128i,
			
 
				+	tweak_tag:    x86.__m128i,
			
 
				+	stk_block_nr: int,
			
 
				+) -> int {
			
 
				+	dst, src, stk_block_nr := dst, src, stk_block_nr
			
 
				+
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks >= 4 {
			
 
				+		d_0, d_1, d_2, d_3 := bc_x4(
			
 
				+			ctx,
			
 
				+			iv, iv, iv, iv,
			
 
				+			enc_tweak(tweak_tag, stk_block_nr),
			
 
				+			enc_tweak(tweak_tag, stk_block_nr + 1),
			
 
				+			enc_tweak(tweak_tag, stk_block_nr + 2),
			
 
				+			enc_tweak(tweak_tag, stk_block_nr + 3),
			
 
				+		)
			
 
				+
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^x86.__m128i)(raw_data(dst)),
			
 
				+			x86._mm_xor_si128(
			
 
				+				d_0,
			
 
				+				intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
			
 
				+			),
			
 
				+		)
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^x86.__m128i)(raw_data(dst[BLOCK_SIZE:])),
			
 
				+			x86._mm_xor_si128(
			
 
				+				d_1,
			
 
				+				intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))),
			
 
				+			),
			
 
				+		)
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^x86.__m128i)(raw_data(dst[2*BLOCK_SIZE:])),
			
 
				+			x86._mm_xor_si128(
			
 
				+				d_2,
			
 
				+				intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))),
			
 
				+			),
			
 
				+		)
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^x86.__m128i)(raw_data(dst[3*BLOCK_SIZE:])),
			
 
				+			x86._mm_xor_si128(
			
 
				+				d_3,
			
 
				+				intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))),
			
 
				+			),
			
 
				+		)
			
 
				+
			
 
				+		src, dst = src[4*BLOCK_SIZE:], dst[4*BLOCK_SIZE:]
			
 
				+		stk_block_nr += 4
			
 
				+		nr_blocks -= 4
			
 
				+	}
			
 
				+
			
 
				+	for nr_blocks > 0 {
			
 
				+		d := bc_x1(
			
 
				+			ctx,
			
 
				+			iv,
			
 
				+			enc_tweak(tweak_tag, stk_block_nr),
			
 
				+		)
			
 
				+
			
 
				+		intrinsics.unaligned_store(
			
 
				+			(^x86.__m128i)(raw_data(dst)),
			
 
				+			x86._mm_xor_si128(
			
 
				+				d,
			
 
				+				intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
			
 
				+			),
			
 
				+		)
			
 
				+
			
 
				+		src, dst = src[BLOCK_SIZE:], dst[BLOCK_SIZE:]
			
 
				+		stk_block_nr += 1
			
 
				+		nr_blocks -= 1
			
 
				+	}
			
 
				+
			
 
				+	return stk_block_nr
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
			
 
				+	tmp: [BLOCK_SIZE]byte
			
 
				+	copy(tmp[1:], iv)
			
 
				+	iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp)))
			
 
				+
			
 
				+	// Algorithm 3
			
 
				+	//
			
 
				+	// Associated data
			
 
				+	// A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n
			
 
				+	// Auth <- 0^n
			
 
				+	// for i = 0 to la − 1 do
			
 
				+	//   Auth <- Auth ^ EK(0010 || i, A_i+1)
			
 
				+	// end
			
 
				+	// if A_∗ != nil then
			
 
				+	//   Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗))
			
 
				+	// end
			
 
				+	auth: x86.__m128i
			
 
				+	n: int
			
 
				+
			
 
				+	aad := aad
			
 
				+	auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0)
			
 
				+	aad = aad[n*BLOCK_SIZE:]
			
 
				+	if l := len(aad); l > 0 {
			
 
				+		a_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(a_star[:], aad)
			
 
				+		a_star[l] = 0x80
			
 
				+
			
 
				+		auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n)
			
 
				+	}
			
 
				+
			
 
				+	// Message authentication and tag generation
			
 
				+	// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
			
 
				+	// tag <- Auth
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   tag <- tag ^ EK(0000 || j, M_j+1)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   tag <- tag ^ EK(0100 || l, pad10∗(M_∗))
			
 
				+	// end
			
 
				+	// tag <- EK(0001 || 0^4 ||N, tag)
			
 
				+	m := plaintext
			
 
				+	auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		m_star[l] = 0x80
			
 
				+
			
 
				+		auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n)
			
 
				+	}
			
 
				+	auth = bc_final(ctx, auth, iv)
			
 
				+
			
 
				+	// Message encryption
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N)
			
 
				+	// end
			
 
				+	//
			
 
				+	// return (C_1 || ... || C_l || C_∗, tag)
			
 
				+	m = plaintext
			
 
				+	n = bc_encrypt(ctx, dst, m, iv_, auth, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		_ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n)
			
 
				+
			
 
				+		copy(dst[n*BLOCK_SIZE:], m_star[:])
			
 
				+	}
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(tag)), auth)
			
 
				+}
			
 
				+
			
 
				+@(private, require_results)
			
 
				+d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	tmp: [BLOCK_SIZE]byte
			
 
				+	copy(tmp[1:], iv)
			
 
				+	iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp)))
			
 
				+
			
 
				+	// Algorithm 4
			
 
				+	//
			
 
				+	// Message decryption
			
 
				+	// C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N)
			
 
				+	// end
			
 
				+	// if C_∗ != nil then
			
 
				+	//   M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N)
			
 
				+	// end
			
 
				+	auth := intrinsics.unaligned_load((^x86.__m128i)(raw_data(tag)))
			
 
				+
			
 
				+	m := ciphertext
			
 
				+	n := bc_encrypt(ctx, dst, m, iv_, auth, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		_ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n)
			
 
				+
			
 
				+		copy(dst[n*BLOCK_SIZE:], m_star[:])
			
 
				+
			
 
				+		mem.zero_explicit(&m_star, size_of(m_star))
			
 
				+	}
			
 
				+
			
 
				+	// Associated data
			
 
				+	// A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n
			
 
				+	// Auth <- 0
			
 
				+	// for i = 0 to la − 1 do
			
 
				+	//   Auth <- Auth ^ EK(0010 || i, A_i+1)
			
 
				+	// end
			
 
				+	// if A∗ != nil then
			
 
				+	//   Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗))
			
 
				+	// end
			
 
				+	auth = x86.__m128i{0, 0}
			
 
				+	aad := aad
			
 
				+	auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0)
			
 
				+	aad = aad[BLOCK_SIZE*n:]
			
 
				+	if l := len(aad); l > 0 {
			
 
				+		a_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(a_star[:], aad)
			
 
				+		a_star[l] = 0x80
			
 
				+
			
 
				+		auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n)
			
 
				+	}
			
 
				+
			
 
				+	// Message authentication and tag generation
			
 
				+	// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
			
 
				+	// tag0 <- Auth
			
 
				+	// for j = 0 to l − 1 do
			
 
				+	//   tag0 <- tag0 ^ EK(0000 || j, M_j+1)
			
 
				+	// end
			
 
				+	// if M_∗ != nil then
			
 
				+	//   tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗))
			
 
				+	// end
			
 
				+	// tag0 <- EK(0001 || 0^4 || N, tag0)
			
 
				+	m = dst[:len(ciphertext)]
			
 
				+	auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0)
			
 
				+	m = m[n*BLOCK_SIZE:]
			
 
				+	if l := len(m); l > 0 {
			
 
				+		m_star: [BLOCK_SIZE]byte
			
 
				+
			
 
				+		copy(m_star[:], m)
			
 
				+		m_star[l] = 0x80
			
 
				+
			
 
				+		auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n)
			
 
				+	}
			
 
				+	auth = bc_final(ctx, auth, iv)
			
 
				+
			
 
				+	// Tag verification
			
 
				+	// if tag0 = tag then return (M_1 || ... || M_l || M_∗)
			
 
				+	// else return false
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(&tmp)), auth)
			
 
				+	ok := crypto.compare_constant_time(tmp[:], tag) == 1
			
 
				+
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+
			
 
				+	return ok
			
 
				+}
			
--- a/core/crypto/ed25519/ed25519.odin
+++ b/core/crypto/ed25519/ed25519.odin
@@ -81,12 +81,8 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
 
				 
			
 
				 // private_key_bytes sets dst to byte-encoding of priv_key.
			
 
				 private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) {
			
 
				-	if !priv_key._is_initialized {
			
 
				-		panic("crypto/ed25519: uninitialized private key")
			
 
				-	}
			
 
				-	if len(dst) != PRIVATE_KEY_SIZE {
			
 
				-		panic("crypto/ed25519: invalid destination size")
			
 
				-	}
			
 
				+	ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key")
			
 
				+	ensure(len(dst) == PRIVATE_KEY_SIZE, "crypto/ed25519: invalid destination size")
			
 
				 
			
 
				 	copy(dst, priv_key._b[:])
			
 
				 }
			
@@ -98,12 +94,8 @@ private_key_clear :: proc "contextless" (priv_key: ^Private_Key) {
 
				 
			
 
				 // sign writes the signature by priv_key over msg to sig.
			
 
				 sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
			
 
				-	if !priv_key._is_initialized {
			
 
				-		panic("crypto/ed25519: uninitialized private key")
			
 
				-	}
			
 
				-	if len(sig) != SIGNATURE_SIZE {
			
 
				-		panic("crypto/ed25519: invalid destination size")
			
 
				-	}
			
 
				+	ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key")
			
 
				+	ensure(len(sig) == SIGNATURE_SIZE, "crypto/ed25519: invalid destination size")
			
 
				 
			
 
				 	// 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1)
			
 
				 	// using SHA-512 for Ed25519.  H(d) may be precomputed.
			
@@ -178,9 +170,7 @@ public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) ->
 
				 
			
 
				 // public_key_set_priv sets pub_key to the public component of priv_key.
			
 
				 public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
			
 
				-	if !priv_key._is_initialized {
			
 
				-		panic("crypto/ed25519: uninitialized public key")
			
 
				-	}
			
 
				+	ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized public key")
			
 
				 
			
 
				 	src := &priv_key._pub_key
			
 
				 	copy(pub_key._b[:], src._b[:])
			
@@ -191,21 +181,15 @@ public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
 
				 
			
 
				 // public_key_bytes sets dst to byte-encoding of pub_key.
			
 
				 public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) {
			
 
				-	if !pub_key._is_initialized {
			
 
				-		panic("crypto/ed25519: uninitialized public key")
			
 
				-	}
			
 
				-	if len(dst) != PUBLIC_KEY_SIZE {
			
 
				-		panic("crypto/ed25519: invalid destination size")
			
 
				-	}
			
 
				+	ensure(pub_key._is_initialized, "crypto/ed25519: uninitialized public key")
			
 
				+	ensure(len(dst) == PUBLIC_KEY_SIZE, "crypto/ed25519: invalid destination size")
			
 
				 
			
 
				 	copy(dst, pub_key._b[:])
			
 
				 }
			
 
				 
			
 
				 // public_key_equal returns true iff pub_key is equal to other.
			
 
				 public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool {
			
 
				-	if !pub_key._is_initialized || !other._is_initialized {
			
 
				-		panic("crypto/ed25519: uninitialized public key")
			
 
				-	}
			
 
				+	ensure(pub_key._is_initialized && other._is_initialized, "crypto/ed25519: uninitialized public key")
			
 
				 
			
 
				 	return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1
			
 
				 }
			
--- a/core/crypto/hmac/hmac.odin
+++ b/core/crypto/hmac/hmac.odin
@@ -56,7 +56,7 @@ init :: proc(ctx: ^Context, algorithm: hash.Algorithm, key: []byte) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	hash.update(&ctx._i_hash, data)
			
 
				 }
			
@@ -64,13 +64,10 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // final finalizes the Context, writes the tag to dst, and calls
			
 
				 // reset on the Context.
			
 
				 final :: proc(ctx: ^Context, dst: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				-
			
 
				 	defer (reset(ctx))
			
 
				 
			
 
				-	if len(dst) != ctx._tag_sz {
			
 
				-		panic("crypto/hmac: invalid destination tag size")
			
 
				-	}
			
 
				+	ensure(ctx._is_initialized)
			
 
				+	ensure(len(dst) == ctx._tag_sz, "crypto/hmac: invalid destination tag size")
			
 
				 
			
 
				 	hash.final(&ctx._i_hash, dst) // H((k ^ ipad) || text)
			
 
				 
			
@@ -105,14 +102,14 @@ reset :: proc(ctx: ^Context) {
 
				 
			
 
				 // algorithm returns the Algorithm used by a Context instance.
			
 
				 algorithm :: proc(ctx: ^Context) -> hash.Algorithm {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	return hash.algorithm(&ctx._i_hash)
			
 
				 }
			
 
				 
			
 
				 // tag_size returns the tag size of a Context instance in bytes.
			
 
				 tag_size :: proc(ctx: ^Context) -> int {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	return ctx._tag_sz
			
 
				 }
			
--- a/core/crypto/kmac/kmac.odin
+++ b/core/crypto/kmac/kmac.odin
@@ -36,6 +36,7 @@ sum :: proc(sec_strength: int, dst, msg, key, domain_sep: []byte) {
 
				 // tag is valid.
			
 
				 verify :: proc(sec_strength: int, tag, msg, key, domain_sep: []byte, allocator := context.temp_allocator) -> bool {
			
 
				 	derived_tag := make([]byte, len(tag), allocator)
			
 
				+	defer(delete(derived_tag))
			
 
				 
			
 
				 	sum(sec_strength, derived_tag, msg, key, domain_sep)
			
 
				 
			
@@ -59,8 +60,6 @@ init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				 	shake.write((^shake.Context)(ctx), data)
			
 
				 }
			
 
				 
			
@@ -68,12 +67,9 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // on the Context.  This routine will panic if the dst length is less than
			
 
				 // MIN_TAG_SIZE.
			
 
				 final :: proc(ctx: ^Context, dst: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				 	defer reset(ctx)
			
 
				 
			
 
				-	if len(dst) < MIN_TAG_SIZE {
			
 
				-		panic("crypto/kmac: invalid KMAC tag_size, too short")
			
 
				-	}
			
 
				+	ensure(len(dst) >= MIN_TAG_SIZE, "crypto/kmac: invalid KMAC tag_size, too short")
			
 
				 
			
 
				 	_sha3.final_cshake((^_sha3.Context)(ctx), dst)
			
 
				 }
			
@@ -103,14 +99,12 @@ _init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
 
				 		reset(ctx)
			
 
				 	}
			
 
				 
			
 
				-	if len(key) < sec_strength / 8 {
			
 
				-		panic("crypto/kmac: invalid KMAC key, too short")
			
 
				-	}
			
 
				+	ensure(len(key) >= sec_strength / 8, "crypto/kmac: invalid KMAC key, too short")
			
 
				 
			
 
				 	ctx_ := (^_sha3.Context)(ctx)
			
 
				 	_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
			
 
				 	_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 N_KMAC := []byte{'K', 'M', 'A', 'C'}
			
--- a/core/crypto/legacy/keccak/keccak.odin
+++ b/core/crypto/legacy/keccak/keccak.odin
@@ -40,37 +40,37 @@ BLOCK_SIZE_512 :: _sha3.RATE_512
 
				 Context :: distinct _sha3.Context
			
 
				 
			
 
				 // init_224 initializes a Context for Keccak-224.
			
 
				-init_224 :: proc(ctx: ^Context) {
			
 
				+init_224 :: proc "contextless" (ctx: ^Context) {
			
 
				 	ctx.mdlen = DIGEST_SIZE_224
			
 
				 	_init(ctx)
			
 
				 }
			
 
				 
			
 
				 // init_256 initializes a Context for Keccak-256.
			
 
				-init_256 :: proc(ctx: ^Context) {
			
 
				+init_256 :: proc "contextless" (ctx: ^Context) {
			
 
				 	ctx.mdlen = DIGEST_SIZE_256
			
 
				 	_init(ctx)
			
 
				 }
			
 
				 
			
 
				 // init_384 initializes a Context for Keccak-384.
			
 
				-init_384 :: proc(ctx: ^Context) {
			
 
				+init_384 :: proc "contextless" (ctx: ^Context) {
			
 
				 	ctx.mdlen = DIGEST_SIZE_384
			
 
				 	_init(ctx)
			
 
				 }
			
 
				 
			
 
				 // init_512 initializes a Context for Keccak-512.
			
 
				-init_512 :: proc(ctx: ^Context) {
			
 
				+init_512 :: proc "contextless" (ctx: ^Context) {
			
 
				 	ctx.mdlen = DIGEST_SIZE_512
			
 
				 	_init(ctx)
			
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-_init :: proc(ctx: ^Context) {
			
 
				+_init :: proc "contextless" (ctx: ^Context) {
			
 
				 	ctx.dsbyte = _sha3.DS_KECCAK
			
 
				 	_sha3.init((^_sha3.Context)(ctx))
			
 
				 }
			
 
				 
			
 
				 // update adds more data to the Context.
			
 
				-update :: proc(ctx: ^Context, data: []byte) {
			
 
				+update :: proc "contextless" (ctx: ^Context, data: []byte) {
			
 
				 	_sha3.update((^_sha3.Context)(ctx), data)
			
 
				 }
			
 
				 
			
@@ -79,17 +79,17 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 //
			
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				-final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				+final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				 	_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
			
 
				 }
			
 
				 
			
 
				 // clone clones the Context other into ctx.
			
 
				-clone :: proc(ctx, other: ^Context) {
			
 
				+clone :: proc "contextless" (ctx, other: ^Context) {
			
 
				 	_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
			
 
				 }
			
 
				 
			
 
				 // reset sanitizes the Context.  The Context must be re-initialized to
			
 
				 // be used again.
			
 
				-reset :: proc(ctx: ^Context) {
			
 
				+reset :: proc "contextless" (ctx: ^Context) {
			
 
				 	_sha3.reset((^_sha3.Context)(ctx))
			
 
				 }
			
--- a/core/crypto/legacy/md5/md5.odin
+++ b/core/crypto/legacy/md5/md5.odin
@@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	for i := 0; i < len(data); i += 1 {
			
 
				 		ctx.data[ctx.datalen] = data[i]
			
@@ -72,11 +72,8 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				-	if len(hash) < DIGEST_SIZE {
			
 
				-		panic("crypto/md5: invalid destination digest size")
			
 
				-	}
			
 
				+	ensure(ctx.is_initialized)
			
 
				+	ensure(len(hash) >= DIGEST_SIZE, "crypto/md5: invalid destination digest size")
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
--- a/core/crypto/legacy/sha1/sha1.odin
+++ b/core/crypto/legacy/sha1/sha1.odin
@@ -60,7 +60,7 @@ init :: proc(ctx: ^Context) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	for i := 0; i < len(data); i += 1 {
			
 
				 		ctx.data[ctx.datalen] = data[i]
			
@@ -79,11 +79,8 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				-	if len(hash) < DIGEST_SIZE {
			
 
				-		panic("crypto/sha1: invalid destination digest size")
			
 
				-	}
			
 
				+	ensure(ctx.is_initialized)
			
 
				+	ensure(len(hash) >= DIGEST_SIZE, "crypto/sha1: invalid destination digest size")
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
--- a/core/crypto/poly1305/poly1305.odin
+++ b/core/crypto/poly1305/poly1305.odin
@@ -60,9 +60,7 @@ Context :: struct {
 
				 // init initializes a Context with the specified key.  The key SHOULD be
			
 
				 // unique and MUST be unpredictable for each invocation.
			
 
				 init :: proc(ctx: ^Context, key: []byte) {
			
 
				-	if len(key) != KEY_SIZE {
			
 
				-		panic("crypto/poly1305: invalid key size")
			
 
				-	}
			
 
				+	ensure(len(key) == KEY_SIZE, "crypto/poly1305: invalid key size")
			
 
				 
			
 
				 	// r = le_bytes_to_num(key[0..15])
			
 
				 	// r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff)
			
@@ -85,7 +83,7 @@ init :: proc(ctx: ^Context, key: []byte) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				+	ensure(ctx._is_initialized)
			
 
				 
			
 
				 	msg := data
			
 
				 	msg_len := len(data)
			
@@ -124,12 +122,10 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // final finalizes the Context, writes the tag to dst, and calls
			
 
				 // reset on the Context.
			
 
				 final :: proc(ctx: ^Context, dst: []byte) {
			
 
				-	assert(ctx._is_initialized)
			
 
				 	defer reset(ctx)
			
 
				 
			
 
				-	if len(dst) != TAG_SIZE {
			
 
				-		panic("poly1305: invalid destination tag size")
			
 
				-	}
			
 
				+	ensure(ctx._is_initialized)
			
 
				+	ensure(len(dst) == TAG_SIZE, "poly1305: invalid destination tag size")
			
 
				 
			
 
				 	// Process remaining block
			
 
				 	if ctx._leftover > 0 {
			
--- a/core/crypto/ristretto255/ristretto255.odin
+++ b/core/crypto/ristretto255/ristretto255.odin
@@ -16,7 +16,7 @@ ELEMENT_SIZE :: 32
 
				 // group element.
			
 
				 WIDE_ELEMENT_SIZE :: 64
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_NEG_ONE := field.Tight_Field_Element {
			
 
				 	2251799813685228,
			
 
				 	2251799813685247,
			
@@ -24,7 +24,7 @@ FE_NEG_ONE := field.Tight_Field_Element {
 
				 	2251799813685247,
			
 
				 	2251799813685247,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
			
 
				 	278908739862762,
			
 
				 	821645201101625,
			
@@ -32,7 +32,7 @@ FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
 
				 	1777959178193151,
			
 
				 	2118520810568447,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
			
 
				 	1136626929484150,
			
 
				 	1998550399581263,
			
@@ -40,7 +40,7 @@ FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
 
				 	118527312129759,
			
 
				 	45110755273534,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
			
 
				 	1507062230895904,
			
 
				 	1572317787530805,
			
@@ -48,7 +48,7 @@ FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
 
				 	317374165784489,
			
 
				 	1572899562415810,
			
 
				 }
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element {
			
 
				 	2241493124984347,
			
 
				 	425987919032274,
			
@@ -76,7 +76,7 @@ ge_clear :: proc "contextless" (ge: ^Group_Element) {
 
				 
			
 
				 // ge_set sets `ge = a`.
			
 
				 ge_set :: proc(ge, a: ^Group_Element) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a})
			
 
				 
			
 
				 	grp.ge_set(&ge._p, &a._p)
			
 
				 	ge._is_initialized = true
			
@@ -199,9 +199,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
 
				 // ge_set_wide_bytes sets ge to the result of deriving a ristretto255
			
 
				 // group element, from a wide (512-bit) byte string.
			
 
				 ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
			
 
				-	if len(b) != WIDE_ELEMENT_SIZE {
			
 
				-		panic("crypto/ristretto255: invalid wide input size")
			
 
				-	}
			
 
				+	ensure(len(b) == WIDE_ELEMENT_SIZE, "crypto/ristretto255: invalid wide input size")
			
 
				 
			
 
				 	// The element derivation function on an input string b proceeds as
			
 
				 	// follows:
			
@@ -222,10 +220,8 @@ ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
 
				 
			
 
				 // ge_bytes sets dst to the canonical encoding of ge.
			
 
				 ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
			
 
				-	_ge_assert_initialized([]^Group_Element{ge})
			
 
				-	if len(dst) != ELEMENT_SIZE {
			
 
				-		panic("crypto/ristretto255: invalid destination size")
			
 
				-	}
			
 
				+	_ge_ensure_initialized([]^Group_Element{ge})
			
 
				+	ensure(len(dst) == ELEMENT_SIZE, "crypto/ristretto255: invalid destination size")
			
 
				 
			
 
				 	x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t
			
 
				 
			
@@ -306,7 +302,7 @@ ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
 
				 
			
 
				 // ge_add sets `ge = a + b`.
			
 
				 ge_add :: proc(ge, a, b: ^Group_Element) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a, b})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a, b})
			
 
				 
			
 
				 	grp.ge_add(&ge._p, &a._p, &b._p)
			
 
				 	ge._is_initialized = true
			
@@ -314,7 +310,7 @@ ge_add :: proc(ge, a, b: ^Group_Element) {
 
				 
			
 
				 // ge_double sets `ge = a + a`.
			
 
				 ge_double :: proc(ge, a: ^Group_Element) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a})
			
 
				 
			
 
				 	grp.ge_double(&ge._p, &a._p)
			
 
				 	ge._is_initialized = true
			
@@ -322,7 +318,7 @@ ge_double :: proc(ge, a: ^Group_Element) {
 
				 
			
 
				 // ge_negate sets `ge = -a`.
			
 
				 ge_negate :: proc(ge, a: ^Group_Element) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a})
			
 
				 
			
 
				 	grp.ge_negate(&ge._p, &a._p)
			
 
				 	ge._is_initialized = true
			
@@ -330,7 +326,7 @@ ge_negate :: proc(ge, a: ^Group_Element) {
 
				 
			
 
				 // ge_scalarmult sets `ge = A * sc`.
			
 
				 ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
			
 
				-	_ge_assert_initialized([]^Group_Element{A})
			
 
				+	_ge_ensure_initialized([]^Group_Element{A})
			
 
				 
			
 
				 	grp.ge_scalarmult(&ge._p, &A._p, sc)
			
 
				 	ge._is_initialized = true
			
@@ -344,7 +340,7 @@ ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar)
 
				 
			
 
				 // ge_scalarmult_vartime sets `ge = A * sc` in variable time.
			
 
				 ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
			
 
				-	_ge_assert_initialized([]^Group_Element{A})
			
 
				+	_ge_ensure_initialized([]^Group_Element{A})
			
 
				 
			
 
				 	grp.ge_scalarmult_vartime(&ge._p, &A._p, sc)
			
 
				 	ge._is_initialized = true
			
@@ -358,7 +354,7 @@ ge_double_scalarmult_generator_vartime :: proc(
 
				 	A: ^Group_Element,
			
 
				 	b: ^Scalar,
			
 
				 ) {
			
 
				-	_ge_assert_initialized([]^Group_Element{A})
			
 
				+	_ge_ensure_initialized([]^Group_Element{A})
			
 
				 
			
 
				 	grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b)
			
 
				 	ge._is_initialized = true
			
@@ -367,7 +363,7 @@ ge_double_scalarmult_generator_vartime :: proc(
 
				 // ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`.
			
 
				 // Behavior for all other values of ctrl are undefined,
			
 
				 ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a})
			
 
				 
			
 
				 	grp.ge_cond_negate(&ge._p, &a._p, ctrl)
			
 
				 	ge._is_initialized = true
			
@@ -376,7 +372,7 @@ ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
 
				 // ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`.
			
 
				 // Behavior for all other values of ctrl are undefined,
			
 
				 ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
			
 
				-	_ge_assert_initialized([]^Group_Element{ge, a})
			
 
				+	_ge_ensure_initialized([]^Group_Element{ge, a})
			
 
				 
			
 
				 	grp.ge_cond_assign(&ge._p, &a._p, ctrl)
			
 
				 }
			
@@ -384,7 +380,7 @@ ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
 
				 // ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`.
			
 
				 // Behavior for all other values of ctrl are undefined,
			
 
				 ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
			
 
				-	_ge_assert_initialized([]^Group_Element{a, b})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a, b})
			
 
				 
			
 
				 	grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl)
			
 
				 	ge._is_initialized = true
			
@@ -393,7 +389,7 @@ ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
 
				 // ge_equal returns 1 iff `a == b`, and 0 otherwise.
			
 
				 @(require_results)
			
 
				 ge_equal :: proc(a, b: ^Group_Element) -> int {
			
 
				-	_ge_assert_initialized([]^Group_Element{a, b})
			
 
				+	_ge_ensure_initialized([]^Group_Element{a, b})
			
 
				 
			
 
				 	// CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2)
			
 
				 	ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, ---
			
@@ -501,10 +497,8 @@ ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-_ge_assert_initialized :: proc(ges: []^Group_Element) {
			
 
				+_ge_ensure_initialized :: proc(ges: []^Group_Element) {
			
 
				 	for ge in ges {
			
 
				-		if !ge._is_initialized {
			
 
				-			panic("crypto/ristretto255: uninitialized group element")
			
 
				-		}
			
 
				+		ensure(ge._is_initialized, "crypto/ristretto255: uninitialized group element")
			
 
				 	}
			
 
				 }
			
--- a/core/crypto/ristretto255/ristretto255_scalar.odin
+++ b/core/crypto/ristretto255/ristretto255_scalar.odin
@@ -42,9 +42,7 @@ sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool {
 
				 // scalar, from a wide (512-bit) byte string by interpreting b as a
			
 
				 // little-endian value, and reducing it mod the group order.
			
 
				 sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
			
 
				-	if len(b) != WIDE_SCALAR_SIZE {
			
 
				-		panic("crypto/ristretto255: invalid wide input size")
			
 
				-	}
			
 
				+	ensure(len(b) == WIDE_SCALAR_SIZE, "crypto/ristretto255: invalid wide input size")
			
 
				 
			
 
				 	b_ := (^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
			
 
				 	grp.sc_set_bytes_wide(sc, b_)
			
@@ -52,9 +50,7 @@ sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
 
				 
			
 
				 // sc_bytes sets dst to the canonical encoding of sc.
			
 
				 sc_bytes :: proc(sc: ^Scalar, dst: []byte) {
			
 
				-	if len(dst) != SCALAR_SIZE {
			
 
				-		panic("crypto/ristretto255: invalid destination size")
			
 
				-	}
			
 
				+	ensure(len(dst) == SCALAR_SIZE, "crypto/ristretto255: invalid destination size")
			
 
				 
			
 
				 	grp.sc_bytes(dst, sc)
			
 
				 }
			
--- a/core/crypto/sha2/sha2.odin
+++ b/core/crypto/sha2/sha2.odin
@@ -15,9 +15,9 @@ package sha2
 
				         zhibog, dotbmp:  Initial implementation.
			
 
				 */
			
 
				 
			
 
				-import "core:encoding/endian"
			
 
				+@(require) import "core:encoding/endian"
			
 
				 import "core:math/bits"
			
 
				-import "core:mem"
			
 
				+@(require) import "core:mem"
			
 
				 
			
 
				 // DIGEST_SIZE_224 is the SHA-224 digest size in bytes.
			
 
				 DIGEST_SIZE_224 :: 28
			
@@ -158,7 +158,7 @@ _init :: proc(ctx: ^$T) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^$T, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	when T == Context_256 {
			
 
				 		CURR_BLOCK_SIZE :: BLOCK_SIZE_256
			
@@ -194,11 +194,8 @@ update :: proc(ctx: ^$T, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				-	if len(hash) * 8 < ctx.md_bits {
			
 
				-		panic("crypto/sha2: invalid destination digest size")
			
 
				-	}
			
 
				+	ensure(ctx.is_initialized)
			
 
				+	ensure(len(hash) * 8 >= ctx.md_bits, "crypto/sha2: invalid destination digest size")
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
@@ -238,7 +235,7 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
 
				 		endian.unchecked_put_u64be(pad[8:], length_lo)
			
 
				 		update(ctx, pad[0:16])
			
 
				 	}
			
 
				-	assert(ctx.bitlength == 0)
			
 
				+	assert(ctx.bitlength == 0) // Check for bugs
			
 
				 
			
 
				 	when T == Context_256 {
			
 
				 		for i := 0; i < ctx.md_bits / 32; i += 1 {
			
@@ -270,8 +267,8 @@ reset :: proc(ctx: ^$T) {
 
				     SHA2 implementation
			
 
				 */
			
 
				 
			
 
				-@(private)
			
 
				-sha256_k := [64]u32 {
			
 
				+@(private, rodata)
			
 
				+SHA256_K := [64]u32 {
			
 
				 	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
			
 
				 	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
			
 
				 	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
			
@@ -290,8 +287,8 @@ sha256_k := [64]u32 {
 
				 	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
			
 
				 }
			
 
				 
			
 
				-@(private)
			
 
				-sha512_k := [80]u64 {
			
 
				+@(private, rodata)
			
 
				+SHA512_K := [80]u64 {
			
 
				 	0x428a2f98d728ae22, 0x7137449123ef65cd,
			
 
				 	0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
			
 
				 	0x3956c25bf348b538, 0x59f111f1b605d019,
			
@@ -334,6 +331,11 @@ sha512_k := [80]u64 {
 
				 	0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
			
 
				 }
			
 
				 
			
 
				+@(private)
			
 
				+SHA256_ROUNDS :: 64
			
 
				+@(private)
			
 
				+SHA512_ROUNDS :: 80
			
 
				+
			
 
				 @(private)
			
 
				 SHA256_CH :: #force_inline proc "contextless" (x, y, z: u32) -> u32 {
			
 
				 	return (x & y) ~ (~x & z)
			
@@ -395,22 +397,29 @@ SHA512_F4 :: #force_inline proc "contextless" (x: u64) -> u64 {
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
			
 
				+sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
			
 
				 	when T == Context_256 {
			
 
				-		w: [64]u32
			
 
				+		if is_hardware_accelerated_256() {
			
 
				+			sha256_transf_hw(ctx, data)
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		w: [SHA256_ROUNDS]u32
			
 
				 		wv: [8]u32
			
 
				 		t1, t2: u32
			
 
				+
			
 
				 		CURR_BLOCK_SIZE :: BLOCK_SIZE_256
			
 
				 	} else when T == Context_512 {
			
 
				-		w: [80]u64
			
 
				+		w: [SHA512_ROUNDS]u64
			
 
				 		wv: [8]u64
			
 
				 		t1, t2: u64
			
 
				+
			
 
				 		CURR_BLOCK_SIZE :: BLOCK_SIZE_512
			
 
				 	}
			
 
				 
			
 
				 	data := data
			
 
				 	for len(data) >= CURR_BLOCK_SIZE {
			
 
				-		for i := 0; i < 16; i += 1 {
			
 
				+		for i in 0 ..< 16 {
			
 
				 			when T == Context_256 {
			
 
				 				w[i] = endian.unchecked_get_u32be(data[i * 4:])
			
 
				 			} else when T == Context_512 {
			
@@ -419,22 +428,22 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
 
				 		}
			
 
				 
			
 
				 		when T == Context_256 {
			
 
				-			for i := 16; i < 64; i += 1 {
			
 
				+			for i in 16 ..< SHA256_ROUNDS {
			
 
				 				w[i] = SHA256_F4(w[i - 2]) + w[i - 7] + SHA256_F3(w[i - 15]) + w[i - 16]
			
 
				 			}
			
 
				 		} else when T == Context_512 {
			
 
				-			for i := 16; i < 80; i += 1 {
			
 
				+			for i in 16 ..< SHA512_ROUNDS {
			
 
				 				w[i] = SHA512_F4(w[i - 2]) + w[i - 7] + SHA512_F3(w[i - 15]) + w[i - 16]
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		for i := 0; i < 8; i += 1 {
			
 
				+		for i in 0 ..< 8 {
			
 
				 			wv[i] = ctx.h[i]
			
 
				 		}
			
 
				 
			
 
				 		when T == Context_256 {
			
 
				-			for i := 0; i < 64; i += 1 {
			
 
				-				t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + sha256_k[i] + w[i]
			
 
				+			for i in 0 ..< SHA256_ROUNDS {
			
 
				+				t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + SHA256_K[i] + w[i]
			
 
				 				t2 = SHA256_F1(wv[0]) + SHA256_MAJ(wv[0], wv[1], wv[2])
			
 
				 				wv[7] = wv[6]
			
 
				 				wv[6] = wv[5]
			
@@ -446,8 +455,8 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
 
				 				wv[0] = t1 + t2
			
 
				 			}
			
 
				 		} else when T == Context_512 {
			
 
				-			for i := 0; i < 80; i += 1 {
			
 
				-				t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + sha512_k[i] + w[i]
			
 
				+			for i in 0 ..< SHA512_ROUNDS {
			
 
				+				t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + SHA512_K[i] + w[i]
			
 
				 				t2 = SHA512_F1(wv[0]) + SHA512_MAJ(wv[0], wv[1], wv[2])
			
 
				 				wv[7] = wv[6]
			
 
				 				wv[6] = wv[5]
			
@@ -460,7 +469,7 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		for i := 0; i < 8; i += 1 {
			
 
				+		for i in 0 ..< 8 {
			
 
				 			ctx.h[i] += wv[i]
			
 
				 		}
			
 
				 
			
--- a/core/crypto/sha2/sha2_impl_hw_gen.odin
+++ b/core/crypto/sha2/sha2_impl_hw_gen.odin
@@ -0,0 +1,15 @@
 
				+#+build !amd64
			
 
				+package sha2
			
 
				+
			
 
				+@(private = "file")
			
 
				+ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
			
 
				+
			
 
				+// is_hardware_accelerated_256 returns true iff hardware accelerated
			
 
				+// SHA-224/SHA-256 is supported.
			
 
				+is_hardware_accelerated_256 :: proc "contextless" () -> bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) {
			
 
				+	panic_contextless(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
--- a/core/crypto/sha2/sha2_impl_hw_intel.odin
+++ b/core/crypto/sha2/sha2_impl_hw_intel.odin
@@ -0,0 +1,260 @@
 
				+#+build amd64
			
 
				+package sha2
			
 
				+
			
 
				+// Based on the public domain code by Jeffrey Walton, though
			
 
				+// realistically, there only is one sensible way to write this
			
 
				+// and Intel's whitepaper covers it.
			
 
				+//
			
 
				+// See: https://github.com/noloader/SHA-Intrinsics
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:simd"
			
 
				+import "core:simd/x86"
			
 
				+import "core:sys/info"
			
 
				+
			
 
				+@(private = "file")
			
 
				+MASK :: x86.__m128i{0x0405060700010203, 0x0c0d0e0f08090a0b}
			
 
				+
			
 
				+@(private = "file")
			
 
				+K_0 :: simd.u64x2{0x71374491428a2f98, 0xe9b5dba5b5c0fbcf}
			
 
				+@(private = "file")
			
 
				+K_1 :: simd.u64x2{0x59f111f13956c25b, 0xab1c5ed5923f82a4}
			
 
				+@(private = "file")
			
 
				+K_2 :: simd.u64x2{0x12835b01d807aa98, 0x550c7dc3243185be}
			
 
				+@(private = "file")
			
 
				+K_3 :: simd.u64x2{0x80deb1fe72be5d74, 0xc19bf1749bdc06a7}
			
 
				+@(private = "file")
			
 
				+K_4 :: simd.u64x2{0xefbe4786e49b69c1, 0x240ca1cc0fc19dc6}
			
 
				+@(private = "file")
			
 
				+K_5 :: simd.u64x2{0x4a7484aa2de92c6f, 0x76f988da5cb0a9dc}
			
 
				+@(private = "file")
			
 
				+K_6 :: simd.u64x2{0xa831c66d983e5152, 0xbf597fc7b00327c8}
			
 
				+@(private = "file")
			
 
				+K_7 :: simd.u64x2{0xd5a79147c6e00bf3, 0x1429296706ca6351}
			
 
				+@(private = "file")
			
 
				+K_8 :: simd.u64x2{0x2e1b213827b70a85, 0x53380d134d2c6dfc}
			
 
				+@(private = "file")
			
 
				+K_9 :: simd.u64x2{0x766a0abb650a7354, 0x92722c8581c2c92e}
			
 
				+@(private = "file")
			
 
				+K_10 :: simd.u64x2{0xa81a664ba2bfe8a1, 0xc76c51a3c24b8b70}
			
 
				+@(private = "file")
			
 
				+K_11 :: simd.u64x2{0xd6990624d192e819, 0x106aa070f40e3585}
			
 
				+@(private = "file")
			
 
				+K_12 :: simd.u64x2{0x1e376c0819a4c116, 0x34b0bcb52748774c}
			
 
				+@(private = "file")
			
 
				+K_13 :: simd.u64x2{0x4ed8aa4a391c0cb3, 0x682e6ff35b9cca4f}
			
 
				+@(private = "file")
			
 
				+K_14 :: simd.u64x2{0x78a5636f748f82ee, 0x8cc7020884c87814}
			
 
				+@(private = "file")
			
 
				+K_15 :: simd.u64x2{0xa4506ceb90befffa, 0xc67178f2bef9a3f7}
			
 
				+
			
 
				+
			
 
				+// is_hardware_accelerated_256 returns true iff hardware accelerated
			
 
				+// SHA-224/SHA-256 is supported.
			
 
				+is_hardware_accelerated_256 :: proc "contextless" () -> bool {
			
 
				+	features, ok := info.cpu.features.?
			
 
				+	if !ok {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	req_features :: info.CPU_Features{
			
 
				+		.sse2,
			
 
				+		.ssse3,
			
 
				+		.sse41,
			
 
				+		.sha,
			
 
				+	}
			
 
				+	return features >= req_features
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature="sse2,ssse3,sse4.1,sha")
			
 
				+sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bounds_check {
			
 
				+	// Load the state
			
 
				+	tmp := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[0]))
			
 
				+	state_1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[4]))
			
 
				+
			
 
				+	tmp = x86._mm_shuffle_epi32(tmp, 0xb1)            // CDAB
			
 
				+	state_1 = x86._mm_shuffle_epi32(state_1, 0x1b)    // EFGH
			
 
				+	state_0 := x86._mm_alignr_epi8(tmp, state_1, 8)   // ABEF
			
 
				+	// state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
			
 
				+	state_1 = kludge_mm_blend_epi16_0xf0(state_1, tmp)
			
 
				+
			
 
				+	data := data
			
 
				+	for len(data) >= BLOCK_SIZE_256 {
			
 
				+		state_0_save, state_1_save := state_0, state_1
			
 
				+
			
 
				+		// Rounds 0-3
			
 
				+		msg := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data)))
			
 
				+		msg_0 := x86._mm_shuffle_epi8(msg, MASK)
			
 
				+		msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_0))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0xe)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+
			
 
				+		// Rounds 4-7
			
 
				+		msg_1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[16:])))
			
 
				+		msg_1 = x86._mm_shuffle_epi8(msg_1, MASK)
			
 
				+		msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_1))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0xe)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
			
 
				+
			
 
				+		// Rounds 8-11
			
 
				+		msg_2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[32:])))
			
 
				+		msg_2 = x86._mm_shuffle_epi8(msg_2, MASK)
			
 
				+		msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_2))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0xe)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
			
 
				+
			
 
				+		// Rounds 12-15
			
 
				+		msg_3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[48:])))
			
 
				+		msg_3 = x86._mm_shuffle_epi8(msg_3, MASK)
			
 
				+		msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_3))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
			
 
				+		msg_0 = x86._mm_add_epi32(msg_0, tmp)
			
 
				+		msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
			
 
				+
			
 
				+		// Rounds 16-19
			
 
				+		msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_4))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
			
 
				+		msg_1 = x86._mm_add_epi32(msg_1, tmp)
			
 
				+		msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
			
 
				+
			
 
				+		// Rounds 20-23
			
 
				+		msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_5))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
			
 
				+		msg_2 = x86._mm_add_epi32(msg_2, tmp)
			
 
				+		msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
			
 
				+
			
 
				+		// Rounds 24-27
			
 
				+		msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_6))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
			
 
				+		msg_3 = x86._mm_add_epi32(msg_3, tmp)
			
 
				+		msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
			
 
				+
			
 
				+		// Rounds 28-31
			
 
				+		msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_7))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
			
 
				+		msg_0 = x86._mm_add_epi32(msg_0, tmp)
			
 
				+		msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
			
 
				+
			
 
				+		// Rounds 32-35
			
 
				+		msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_8))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
			
 
				+		msg_1 = x86._mm_add_epi32(msg_1, tmp)
			
 
				+		msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
			
 
				+
			
 
				+		// Rounds 36-39
			
 
				+		msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_9))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
			
 
				+		msg_2 = x86._mm_add_epi32(msg_2, tmp)
			
 
				+		msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
			
 
				+
			
 
				+		// Rounds 40-43
			
 
				+		msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_10))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
			
 
				+		msg_3 = x86._mm_add_epi32(msg_3, tmp)
			
 
				+		msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
			
 
				+
			
 
				+		// Rounds 44-47
			
 
				+		msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_11))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
			
 
				+		msg_0 = x86._mm_add_epi32(msg_0, tmp)
			
 
				+		msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
			
 
				+
			
 
				+		// Rounds 48-51
			
 
				+		msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_12))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
			
 
				+		msg_1 = x86._mm_add_epi32(msg_1, tmp)
			
 
				+		msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+		msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
			
 
				+
			
 
				+		// Rounds 52-55
			
 
				+		msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_13))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
			
 
				+		msg_2 = x86._mm_add_epi32(msg_2, tmp)
			
 
				+		msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+
			
 
				+		/* Rounds 56-59 */
			
 
				+		msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_14))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
			
 
				+		msg_3 = x86._mm_add_epi32(msg_3, tmp)
			
 
				+		msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+
			
 
				+		// Rounds 60-63
			
 
				+		msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_15))
			
 
				+		state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
			
 
				+		msg = x86._mm_shuffle_epi32(msg, 0x0e)
			
 
				+		state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
			
 
				+
			
 
				+		state_0 = x86._mm_add_epi32(state_0, state_0_save)
			
 
				+		state_1 = x86._mm_add_epi32(state_1, state_1_save)
			
 
				+
			
 
				+		data = data[BLOCK_SIZE_256:]
			
 
				+	}
			
 
				+
			
 
				+	// Write back the updated state
			
 
				+	tmp = x86._mm_shuffle_epi32(state_0, 0x1b)        // FEBA
			
 
				+	state_1 = x86._mm_shuffle_epi32(state_1, 0xb1)    // DCHG
			
 
				+	// state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
			
 
				+	state_0 = kludge_mm_blend_epi16_0xf0(tmp, state_1)
			
 
				+	state_1 = x86._mm_alignr_epi8(state_1, tmp, 8)    // ABEF
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[0]), state_0)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[4]), state_1)
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+kludge_mm_blend_epi16_0xf0 :: #force_inline proc "contextless"(a, b: x86.__m128i) -> x86.__m128i {
			
 
				+	// HACK HACK HACK: LLVM got rid of `llvm.x86.sse41.pblendw`.
			
 
				+	a_ := simd.to_array(a)
			
 
				+	b_ := simd.to_array(b)
			
 
				+	return x86.__m128i{a_[0], b_[1]}
			
 
				+}
			
--- a/core/crypto/siphash/siphash.odin
+++ b/core/crypto/siphash/siphash.odin
@@ -219,18 +219,14 @@ verify_4_8 :: proc {
 
				 */
			
 
				 
			
 
				 init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) {
			
 
				-	if len(key) != KEY_SIZE {
			
 
				-		panic("crypto/siphash; invalid key size")
			
 
				-	}
			
 
				+	ensure(len(key) == KEY_SIZE,"crypto/siphash; invalid key size")
			
 
				 	ctx.c_rounds = c_rounds
			
 
				 	ctx.d_rounds = d_rounds
			
 
				 	is_valid_setting :=
			
 
				 		(ctx.c_rounds == 1 && ctx.d_rounds == 3) ||
			
 
				 		(ctx.c_rounds == 2 && ctx.d_rounds == 4) ||
			
 
				 		(ctx.c_rounds == 4 && ctx.d_rounds == 8)
			
 
				-	if !is_valid_setting {
			
 
				-		panic("crypto/siphash: incorrect rounds set up")
			
 
				-	}
			
 
				+	ensure(is_valid_setting, "crypto/siphash: incorrect rounds set up")
			
 
				 	ctx.k0 = endian.unchecked_get_u64le(key[:8])
			
 
				 	ctx.k1 = endian.unchecked_get_u64le(key[8:])
			
 
				 	ctx.v0 = 0x736f6d6570736575 ~ ctx.k0
			
@@ -245,7 +241,7 @@ init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) {
 
				 }
			
 
				 
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized, "crypto/siphash: context is not initialized")
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	data := data
			
 
				 	ctx.total_length += len(data)
			
@@ -269,7 +265,7 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 }
			
 
				 
			
 
				 final :: proc(ctx: ^Context, dst: ^u64) {
			
 
				-	assert(ctx.is_initialized, "crypto/siphash: context is not initialized")
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	tmp: [BLOCK_SIZE]byte
			
 
				 	copy(tmp[:], ctx.buf[:ctx.last_block])
			
@@ -336,9 +332,8 @@ _get_byte :: #force_inline proc "contextless" (byte_num: byte, into: u64) -> byt
 
				 
			
 
				 @(private)
			
 
				 _collect_output :: #force_inline proc(dst: []byte, hash: u64) {
			
 
				-	if len(dst) < DIGEST_SIZE {
			
 
				-		panic("crypto/siphash: invalid tag size")
			
 
				-	}
			
 
				+	ensure(len(dst) >= DIGEST_SIZE, "crypto/siphash: invalid tag size")
			
 
				+
			
 
				 	dst[0] = _get_byte(7, hash)
			
 
				 	dst[1] = _get_byte(6, hash)
			
 
				 	dst[2] = _get_byte(5, hash)
			
--- a/core/crypto/sm3/sm3.odin
+++ b/core/crypto/sm3/sm3.odin
@@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) {
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	assert(ctx.is_initialized)
			
 
				+	ensure(ctx.is_initialized)
			
 
				 
			
 
				 	data := data
			
 
				 	ctx.length += u64(len(data))
			
@@ -83,11 +83,8 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	assert(ctx.is_initialized)
			
 
				-
			
 
				-	if len(hash) < DIGEST_SIZE {
			
 
				-		panic("crypto/sm3: invalid destination digest size")
			
 
				-	}
			
 
				+	ensure(ctx.is_initialized)
			
 
				+	ensure(len(hash) >= DIGEST_SIZE, "crypto/sm3: invalid destination digest size")
			
 
				 
			
 
				 	ctx := ctx
			
 
				 	if finalize_clone {
			
@@ -110,7 +107,7 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 
				 	length <<= 3
			
 
				 	endian.unchecked_put_u64be(pad[:], length)
			
 
				 	update(ctx, pad[0:8])
			
 
				-	assert(ctx.bitlength == 0)
			
 
				+	assert(ctx.bitlength == 0) // Check for bugs
			
 
				 
			
 
				 	for i := 0; i < DIGEST_SIZE / 4; i += 1 {
			
 
				 		endian.unchecked_put_u32be(hash[i * 4:], ctx.state[i])
			
@@ -136,7 +133,7 @@ reset :: proc(ctx: ^Context) {
 
				     SM3 implementation
			
 
				 */
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 IV := [8]u32 {
			
 
				 	0x7380166f, 0x4914b2b9, 0x172442d7, 0xda8a0600,
			
 
				 	0xa96f30bc, 0x163138aa, 0xe38dee4d, 0xb0fb0e4e,
			
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -15,7 +15,7 @@ SCALAR_SIZE :: 32
 
				 // POINT_SIZE is the size of a X25519 point (public key/shared secret) in bytes.
			
 
				 POINT_SIZE :: 32
			
 
				 
			
 
				-@(private)
			
 
				+@(private, rodata)
			
 
				 _BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
			
 
				 
			
 
				 @(private)
			
@@ -101,15 +101,9 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
 
				 // scalarmult "multiplies" the provided scalar and point, and writes the
			
 
				 // resulting point to dst.
			
 
				 scalarmult :: proc(dst, scalar, point: []byte) {
			
 
				-	if len(scalar) != SCALAR_SIZE {
			
 
				-		panic("crypto/x25519: invalid scalar size")
			
 
				-	}
			
 
				-	if len(point) != POINT_SIZE {
			
 
				-		panic("crypto/x25519: invalid point size")
			
 
				-	}
			
 
				-	if len(dst) != POINT_SIZE {
			
 
				-		panic("crypto/x25519: invalid destination point size")
			
 
				-	}
			
 
				+	ensure(len(scalar) == SCALAR_SIZE, "crypto/x25519: invalid scalar size")
			
 
				+	ensure(len(point) == POINT_SIZE, "crypto/x25519: invalid point size")
			
 
				+	ensure(len(dst) == POINT_SIZE, "crypto/x25519: invalid destination point size")
			
 
				 
			
 
				 	// "clamp" the scalar
			
 
				 	e: [32]byte = ---
			
--- a/core/crypto/x448/x448.odin
+++ b/core/crypto/x448/x448.odin
@@ -0,0 +1,155 @@
 
				+/*
			
 
				+package x448 implements the X448 (aka curve448) Elliptic-Curve
			
 
				+Diffie-Hellman key exchange protocol.
			
 
				+
			
 
				+See:
			
 
				+- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
			
 
				+*/
			
 
				+package x448
			
 
				+
			
 
				+import field "core:crypto/_fiat/field_curve448"
			
 
				+import "core:mem"
			
 
				+
			
 
				+// SCALAR_SIZE is the size of a X448 scalar (private key) in bytes.
			
 
				+SCALAR_SIZE :: 56
			
 
				+// POINT_SIZE is the size of a X448 point (public key/shared secret) in bytes.
			
 
				+POINT_SIZE :: 56
			
 
				+
			
 
				+@(private, rodata)
			
 
				+_BASE_POINT: [56]byte = {
			
 
				+	5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+	0, 0, 0, 0, 0, 0, 0, 0,
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_scalar_bit :: #force_inline proc "contextless" (s: ^[56]byte, i: int) -> u8 {
			
 
				+	if i < 0 {
			
 
				+		return 0
			
 
				+	}
			
 
				+	return (s[i >> 3] >> uint(i & 7)) & 1
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_scalarmult :: proc "contextless" (out, scalar, point: ^[56]byte) {
			
 
				+	// Montgomery pseudo-multiplication, using the RFC 7748 formula.
			
 
				+	t1, t2: field.Loose_Field_Element = ---, ---
			
 
				+
			
 
				+	// x_1 = u
			
 
				+	// x_2 = 1
			
 
				+	// z_2 = 0
			
 
				+	// x_3 = u
			
 
				+	// z_3 = 1
			
 
				+	x1: field.Tight_Field_Element = ---
			
 
				+	field.fe_from_bytes(&x1, point)
			
 
				+
			
 
				+	x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
			
 
				+	field.fe_one(&x2)
			
 
				+	field.fe_zero(&z2)
			
 
				+	field.fe_set(&x3, &x1)
			
 
				+	field.fe_one(&z3)
			
 
				+
			
 
				+	// swap = 0
			
 
				+	swap: int
			
 
				+
			
 
				+	// For t = bits-1 down to 0:a
			
 
				+	for t := 448 - 1; t >= 0; t -= 1 {
			
 
				+		// k_t = (k >> t) & 1
			
 
				+		k_t := int(_scalar_bit(scalar, t))
			
 
				+		// swap ^= k_t
			
 
				+		swap ~= k_t
			
 
				+		// Conditional swap; see text below.
			
 
				+		// (x_2, x_3) = cswap(swap, x_2, x_3)
			
 
				+		field.fe_cond_swap(&x2, &x3, swap)
			
 
				+		// (z_2, z_3) = cswap(swap, z_2, z_3)
			
 
				+		field.fe_cond_swap(&z2, &z3, swap)
			
 
				+		// swap = k_t
			
 
				+		swap = k_t
			
 
				+
			
 
				+		// Note: This deliberately omits reductions after add/sub operations
			
 
				+		// if the result is only ever used as the input to a mul/square since
			
 
				+		// the implementations of those can deal with non-reduced inputs.
			
 
				+		//
			
 
				+		// fe_tighten_cast is only used to store a fully reduced
			
 
				+		// output in a Loose_Field_Element, or to provide such a
			
 
				+		// Loose_Field_Element as a Tight_Field_Element argument.
			
 
				+
			
 
				+		// A = x_2 + z_2
			
 
				+		field.fe_add(&t1, &x2, &z2)
			
 
				+		// B = x_2 - z_2
			
 
				+		field.fe_sub(&t2, &x2, &z2)
			
 
				+		// D = x_3 - z_3
			
 
				+		field.fe_sub(field.fe_relax_cast(&z2), &x3, &z3) // (z2 unreduced)
			
 
				+		// DA = D * A
			
 
				+		field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
			
 
				+		// C = x_3 + z_3
			
 
				+		field.fe_add(field.fe_relax_cast(&z3), &x3, &z3) // (z3 unreduced)
			
 
				+		// CB = C * B
			
 
				+		field.fe_carry_mul(&x3, &t2, field.fe_relax_cast(&z3))
			
 
				+		// z_3 = x_1 * (DA - CB)^2
			
 
				+		field.fe_sub(field.fe_relax_cast(&z3), &x2, &x3) // (z3 unreduced)
			
 
				+		field.fe_carry_square(&z3, field.fe_relax_cast(&z3))
			
 
				+		field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z3))
			
 
				+		// x_3 = (DA + CB)^2
			
 
				+		field.fe_add(field.fe_relax_cast(&z2), &x2, &x3) // (z2 unreduced)
			
 
				+		field.fe_carry_square(&x3, field.fe_relax_cast(&z2))
			
 
				+
			
 
				+		// AA = A^2
			
 
				+		field.fe_carry_square(&z2, &t1)
			
 
				+		// BB = B^2
			
 
				+		field.fe_carry_square(field.fe_tighten_cast(&t1), &t2) // (t1 reduced)
			
 
				+		// x_2 = AA * BB
			
 
				+		field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
			
 
				+		// E = AA - BB
			
 
				+		field.fe_sub(&t2, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
			
 
				+		// z_2 = E * (AA + a24 * E)
			
 
				+		field.fe_carry_mul_small(field.fe_tighten_cast(&t1), &t2, 39081) // (t1 reduced)
			
 
				+		field.fe_add(&t1, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
			
 
				+		field.fe_carry_mul(&z2, &t2, &t1)
			
 
				+	}
			
 
				+
			
 
				+	// Conditional swap; see text below.
			
 
				+	// (x_2, x_3) = cswap(swap, x_2, x_3)
			
 
				+	field.fe_cond_swap(&x2, &x3, swap)
			
 
				+	// (z_2, z_3) = cswap(swap, z_2, z_3)
			
 
				+	field.fe_cond_swap(&z2, &z3, swap)
			
 
				+
			
 
				+	// Return x_2 * (z_2^(p - 2))
			
 
				+	field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
			
 
				+	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
			
 
				+	field.fe_to_bytes(out, &x2)
			
 
				+
			
 
				+	field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
			
 
				+	field.fe_clear_vec([]^field.Loose_Field_Element{&t1, &t2})
			
 
				+}
			
 
				+
			
 
				+// scalarmult "multiplies" the provided scalar and point, and writes the
			
 
				+// resulting point to dst.
			
 
				+scalarmult :: proc(dst, scalar, point: []byte) {
			
 
				+	ensure(len(scalar) == SCALAR_SIZE, "crypto/x448: invalid scalar size")
			
 
				+	ensure(len(point) == POINT_SIZE, "crypto/x448: invalid point size")
			
 
				+	ensure(len(dst) == POINT_SIZE, "crypto/x448: invalid destination point size")
			
 
				+
			
 
				+	// "clamp" the scalar
			
 
				+	e: [56]byte = ---
			
 
				+	copy_slice(e[:], scalar)
			
 
				+	e[0] &= 252
			
 
				+	e[55] |= 128
			
 
				+
			
 
				+	p: [56]byte = ---
			
 
				+	copy_slice(p[:], point)
			
 
				+
			
 
				+	d: [56]byte = ---
			
 
				+	_scalarmult(&d, &e, &p)
			
 
				+	copy_slice(dst, d[:])
			
 
				+
			
 
				+	mem.zero_explicit(&e, size_of(e))
			
 
				+	mem.zero_explicit(&d, size_of(d))
			
 
				+}
			
 
				+
			
 
				+// scalarmult_basepoint "multiplies" the provided scalar with the X448
			
 
				+// base point and writes the resulting point to dst.
			
 
				+scalarmult_basepoint :: proc(dst, scalar: []byte) {
			
 
				+	scalarmult(dst, scalar, _BASE_POINT[:])
			
 
				+}
			
--- a/core/debug/trace/trace_windows.odin
+++ b/core/debug/trace/trace_windows.odin
@@ -49,7 +49,9 @@ _resolve :: proc(ctx: ^Context, frame: Frame, allocator: runtime.Allocator) -> (
 
				 
			
 
				 	data: [size_of(win32.SYMBOL_INFOW) + size_of([256]win32.WCHAR)]byte
			
 
				 	symbol := (^win32.SYMBOL_INFOW)(&data[0])
			
 
				-	symbol.SizeOfStruct = size_of(symbol)
			
 
				+	// The value of SizeOfStruct must be the size of the whole struct,
			
 
				+	// not just the size of the pointer
			
 
				+	symbol.SizeOfStruct = size_of(symbol^)
			
 
				 	symbol.MaxNameLen = 255
			
 
				 	if win32.SymFromAddrW(ctx.impl.hProcess, win32.DWORD64(frame), &{}, symbol) {
			
 
				 		fl.procedure, _ = win32.wstring_to_utf8(&symbol.Name[0], -1, allocator)
			
--- a/core/dynlib/lb_haiku.odin
+++ b/core/dynlib/lb_haiku.odin
@@ -0,0 +1,23 @@
 
				+#+build haiku
			
 
				+#+private
			
 
				+package dynlib
			
 
				+
			
 
				+import "base:runtime"
			
 
				+
			
 
				+_LIBRARY_FILE_EXTENSION :: ""
			
 
				+
			
 
				+_load_library :: proc(path: string, global_symbols: bool, allocator: runtime.Allocator) -> (Library, bool) {
			
 
				+	return nil, false
			
 
				+}
			
 
				+
			
 
				+_unload_library :: proc(library: Library) -> bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+_symbol_address :: proc(library: Library, symbol: string, allocator: runtime.Allocator) -> (ptr: rawptr, found: bool) {
			
 
				+	return nil, false
			
 
				+}
			
 
				+
			
 
				+_last_error :: proc() -> string {
			
 
				+	return ""
			
 
				+}
			
--- a/core/encoding/cbor/cbor.odin
+++ b/core/encoding/cbor/cbor.odin
@@ -385,17 +385,17 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
 
				 	// which we want for the diagnostic format.
			
 
				 	case f16:
			
 
				 		buf: [64]byte
			
 
				-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
			
 
				+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
			
 
				 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
			
 
				 		io.write_string(w, str) or_return
			
 
				 	case f32:
			
 
				 		buf: [128]byte
			
 
				-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
			
 
				+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
			
 
				 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
			
 
				 		io.write_string(w, str) or_return
			
 
				 	case f64:
			
 
				 		buf: [256]byte
			
 
				-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
			
 
				+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
			
 
				 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
			
 
				 		io.write_string(w, str) or_return
			
 
				 
			
--- a/core/encoding/cbor/marshal.odin
+++ b/core/encoding/cbor/marshal.odin
@@ -612,6 +612,42 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
 
				 		case:
			
 
				 			panic("unknown bit_size size")
			
 
				 		}
			
 
				+	case runtime.Type_Info_Matrix:
			
 
				+		count := info.column_count * info.elem_stride
			
 
				+		err_conv(_encode_u64(e, u64(count), .Array)) or_return
			
 
				+
			
 
				+		if impl, ok := _tag_implementations_type[info.elem.id]; ok {
			
 
				+			for i in 0..<count {
			
 
				+				data := uintptr(v.data) + uintptr(i*info.elem_size)
			
 
				+				impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
			
 
				+			}
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
			
 
				+		for i in 0..<count {
			
 
				+			data := uintptr(v.data) + uintptr(i*info.elem_size)
			
 
				+			_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
			
 
				+		}
			
 
				+		return
			
 
				+
			
 
				+	case runtime.Type_Info_Simd_Vector:
			
 
				+		err_conv(_encode_u64(e, u64(info.count), .Array)) or_return
			
 
				+
			
 
				+		if impl, ok := _tag_implementations_type[info.elem.id]; ok {
			
 
				+			for i in 0..<info.count {
			
 
				+				data := uintptr(v.data) + uintptr(i*info.elem_size)
			
 
				+				impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
			
 
				+			}
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
			
 
				+		for i in 0..<info.count {
			
 
				+			data := uintptr(v.data) + uintptr(i*info.elem_size)
			
 
				+			_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
			
 
				+		}
			
 
				+		return
			
 
				 	}
			
 
				 
			
 
				 	return _unsupported(v.id, nil)
			
--- a/core/encoding/cbor/unmarshal.odin
+++ b/core/encoding/cbor/unmarshal.odin
@@ -29,6 +29,7 @@ an input.
 
				 unmarshal :: proc {
			
 
				 	unmarshal_from_reader,
			
 
				 	unmarshal_from_string,
			
 
				+	unmarshal_from_bytes,
			
 
				 }
			
 
				 
			
 
				 unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
			
@@ -51,6 +52,11 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// Unmarshals from a slice of bytes, see docs on the proc group `Unmarshal` for more info.
			
 
				+unmarshal_from_bytes :: proc(bytes: []byte, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
			
 
				+	return unmarshal_from_string(string(bytes), ptr, flags, allocator, temp_allocator, loc)
			
 
				+}
			
 
				+
			
 
				 unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
			
 
				 	d := d
			
 
				 
			
@@ -487,7 +493,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
 
				 		data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align, allocator=allocator, loc=loc) or_return
			
 
				 		defer if err != nil { mem.free_bytes(data, allocator=allocator, loc=loc) }
			
 
				 
			
 
				-		da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator }
			
 
				+		da := mem.Raw_Dynamic_Array{raw_data(data), 0, scap, context.allocator }
			
 
				 
			
 
				 		assign_array(d, &da, t.elem, length) or_return
			
 
				 
			
@@ -585,6 +591,31 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
 
				 		if out_of_space { return _unsupported(v, hdr) }
			
 
				 		return
			
 
				 
			
 
				+	case reflect.Type_Info_Matrix:
			
 
				+		count := t.column_count * t.elem_stride
			
 
				+		length, _ := err_conv(_decode_len_container(d, add)) or_return
			
 
				+		if length > count {
			
 
				+			return _unsupported(v, hdr)
			
 
				+		}
			
 
				+
			
 
				+		da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
			
 
				+
			
 
				+		out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
			
 
				+		if out_of_space { return _unsupported(v, hdr) }
			
 
				+		return
			
 
				+
			
 
				+	case reflect.Type_Info_Simd_Vector:
			
 
				+		length, _ := err_conv(_decode_len_container(d, add)) or_return
			
 
				+		if length > t.count {
			
 
				+			return _unsupported(v, hdr)
			
 
				+		}
			
 
				+
			
 
				+		da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
			
 
				+
			
 
				+		out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
			
 
				+		if out_of_space { return _unsupported(v, hdr) }
			
 
				+		return
			
 
				+
			
 
				 	case: return _unsupported(v, hdr)
			
 
				 	}
			
 
				 }
			
--- a/core/encoding/csv/doc.odin
+++ b/core/encoding/csv/doc.odin
@@ -63,8 +63,6 @@ Example:
 
				 	read_csv_from_string :: proc(filename: string) {
			
 
				 		r: csv.Reader
			
 
				 		r.trim_leading_space  = true
			
 
				-		r.reuse_record        = true // Without it you have to delete(record)
			
 
				-		r.reuse_record_buffer = true // Without it you have to each of the fields within it
			
 
				 		defer csv.reader_destroy(&r)
			
 
				 
			
 
				 		csv_data, ok := os.read_entire_file(filename)
			
--- a/core/encoding/csv/reader.odin
+++ b/core/encoding/csv/reader.odin
@@ -130,7 +130,7 @@ reader_destroy :: proc(r: ^Reader) {
 
				 	for record, row_idx in csv.iterator_next(&r) { ... }
			
 
				 
			
 
				 	TIP: If you process the results within the loop and don't need to own the results,
			
 
				-	you can set the Reader's `reuse_record` and `reuse_record_reuse_record_buffer` to true;
			
 
				+	you can set the Reader's `reuse_record` and `reuse_record_buffer` to true;
			
 
				 	you won't need to delete the record or its fields.
			
 
				 */
			
 
				 iterator_next :: proc(r: ^Reader) -> (record: []string, idx: int, err: Error, more: bool) {