hai 1 ano · 62f455f47b
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,147 +2,163 @@ name: CI
 
				 on: [push, pull_request, workflow_dispatch]
			
 
				 
			
 
				 jobs:
			
 
				-  build_linux:
			
 
				-    name: Ubuntu Build, Check, and Test
			
 
				+  build_netbsd:
			
 
				+    name: NetBSD Build, Check, and Test
			
 
				     runs-on: ubuntu-latest
			
 
				+    env:
			
 
				+      PKGSRC_BRANCH: 2024Q1
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				-      - name: Download LLVM
			
 
				+    - uses: actions/checkout@v4
			
 
				+    - name: Build, Check, and Test
			
 
				+      timeout-minutes: 15
			
 
				+      uses: vmactions/netbsd-vm@v1
			
 
				+      with:
			
 
				+        release: "10.0"
			
 
				+        envs: PKGSRC_BRANCH
			
 
				+        usesh: true
			
 
				+        copyback: false
			
 
				+        prepare: |
			
 
				+          PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
			
 
				+          pkgin -y in gmake git bash python311
			
 
				+          pkgin -y in libxml2 perl zstd
			
 
				+          /usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/llvm-17.0.6.tgz
			
 
				+          /usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/clang-17.0.6.tgz
			
 
				+          ln -s /usr/pkg/bin/python3.11 /usr/bin/python3
			
 
				+        run: |
			
 
				+          git config --global --add safe.directory $(pwd)
			
 
				+          gmake release
			
 
				+          ./odin version
			
 
				+          ./odin report
			
 
				+          gmake -C vendor/stb/src
			
 
				+          gmake -C vendor/cgltf/src
			
 
				+          gmake -C vendor/miniaudio/src
			
 
				+          ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
			
 
				+          ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
			
 
				+          ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          (cd tests/issues; ./run.sh)
			
 
				+  build_freebsd:
			
 
				+    name: FreeBSD Build, Check, and Test
			
 
				+    runs-on: ubuntu-latest
			
 
				+    steps:
			
 
				+    - uses: actions/checkout@v4
			
 
				+    - name: Build, Check, and Test
			
 
				+      timeout-minutes: 15
			
 
				+      uses: vmactions/freebsd-vm@v1
			
 
				+      with:
			
 
				+        usesh: true
			
 
				+        copyback: false
			
 
				+        prepare: |
			
 
				+          pkg install -y gmake git bash python3 libxml2 llvm17
			
 
				+        run: |
			
 
				+          # `set -e` is needed for test failures to register. https://github.com/vmactions/freebsd-vm/issues/72
			
 
				+          set -e -x
			
 
				+          git config --global --add safe.directory $(pwd)
			
 
				+          gmake release
			
 
				+          ./odin version
			
 
				+          ./odin report
			
 
				+          gmake -C vendor/stb/src
			
 
				+          gmake -C vendor/cgltf/src
			
 
				+          gmake -C vendor/miniaudio/src
			
 
				+          ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
			
 
				+          ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+          (cd tests/issues; ./run.sh)
			
 
				+  ci:
			
 
				+    strategy:
			
 
				+      fail-fast: false
			
 
				+      matrix:
			
 
				+        # MacOS 13 runs on Intel, 14 runs on ARM
			
 
				+        os: [ubuntu-latest, macos-13, macos-14]
			
 
				+    runs-on: ${{ matrix.os }}
			
 
				+    name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel' || 'Ubuntu') }} Build, Check, and Test
			
 
				+    timeout-minutes: 15
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+
			
 
				+      - name: Download LLVM (Linux)
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				         run: |
			
 
				           wget https://apt.llvm.org/llvm.sh
			
 
				           chmod +x llvm.sh
			
 
				           sudo ./llvm.sh 17
			
 
				           echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
			
 
				-      - name: build odin
			
 
				-        run: ./build_odin.sh release
			
 
				-      - name: Odin version
			
 
				-        run: ./odin version
			
 
				-        timeout-minutes: 1
			
 
				-      - name: Odin report
			
 
				-        run: ./odin report
			
 
				-        timeout-minutes: 1
			
 
				-      - name: Odin check
			
 
				-        run: ./odin check examples/demo -vet
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin run
			
 
				-        run: ./odin run examples/demo
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin run -debug
			
 
				-        run: ./odin run examples/demo -debug
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all
			
 
				-        run: ./odin check examples/all -strict-style
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Core library tests
			
 
				-        run: |
			
 
				-          cd tests/core
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Vendor library tests
			
 
				-        run: |
			
 
				-          cd tests/vendor
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin internals tests
			
 
				-        run: |
			
 
				-          cd tests/internal
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all for Linux i386
			
 
				-        run: ./odin check examples/all -vet -strict-style -target:linux_i386
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all for Linux arm64
			
 
				-        run: ./odin check examples/all -vet -strict-style -target:linux_arm64
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all for FreeBSD amd64
			
 
				-        run: ./odin check examples/all -vet -strict-style -target:freebsd_amd64
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all for OpenBSD amd64
			
 
				-        run: ./odin check examples/all -vet -strict-style -target:openbsd_amd64
			
 
				-        timeout-minutes: 10
			
 
				-  build_macOS:
			
 
				-    name: MacOS Build, Check, and Test
			
 
				-    runs-on: macos-latest
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				-      - name: Download LLVM, and setup PATH
			
 
				+
			
 
				+      - name: Download LLVM (MacOS Intel)
			
 
				+        if: matrix.os == 'macos-13'
			
 
				         run: |
			
 
				           brew install llvm@17
			
 
				           echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				-      - name: build odin
			
 
				-        run: ./build_odin.sh release
			
 
				-      - name: Odin version
			
 
				-        run: ./odin version
			
 
				-        timeout-minutes: 1
			
 
				-      - name: Odin report
			
 
				-        run: ./odin report
			
 
				-        timeout-minutes: 1
			
 
				-      - name: Odin check
			
 
				-        run: ./odin check examples/demo -vet
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin run
			
 
				-        run: ./odin run examples/demo
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin run -debug
			
 
				-        run: ./odin run examples/demo -debug
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin check examples/all
			
 
				-        run: ./odin check examples/all -strict-style
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Core library tests
			
 
				-        run: |
			
 
				-          cd tests/core
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin internals tests
			
 
				-        run: |
			
 
				-          cd tests/internal
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-  build_macOS_arm:
			
 
				-    name: MacOS ARM Build, Check, and Test
			
 
				-    runs-on: macos-14 # This is an arm/m1 runner.
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				-      - name: Download LLVM and setup PATH
			
 
				+
			
 
				+      - name: Download LLVM (MacOS ARM)
			
 
				+        if: matrix.os == 'macos-14'
			
 
				         run: |
			
 
				-          brew install llvm@17
			
 
				+          brew install llvm@17 wasmtime
			
 
				           echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				-      - name: build odin
			
 
				+
			
 
				+      - name: Build Odin
			
 
				         run: ./build_odin.sh release
			
 
				       - name: Odin version
			
 
				         run: ./odin version
			
 
				-        timeout-minutes: 1
			
 
				       - name: Odin report
			
 
				         run: ./odin report
			
 
				-        timeout-minutes: 1
			
 
				+      - name: Compile needed Vendor
			
 
				+        run: |
			
 
				+          make -C vendor/stb/src
			
 
				+          make -C vendor/cgltf/src
			
 
				+          make -C vendor/miniaudio/src
			
 
				       - name: Odin check
			
 
				         run: ./odin check examples/demo -vet
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin run
			
 
				         run: ./odin run examples/demo
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin run -debug
			
 
				         run: ./odin run examples/demo -debug
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin check examples/all
			
 
				         run: ./odin check examples/all -strict-style
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Core library tests
			
 
				-        run: |
			
 
				-          cd tests/core
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				-      - name: Odin internals tests
			
 
				+      - name: Normal Core library tests
			
 
				+        run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Optimized Core library tests
			
 
				+        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Vendor library tests
			
 
				+        run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Internals tests
			
 
				+        run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Core library benchmarks
			
 
				+        run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: GitHub Issue tests
			
 
				+        run: |
			
 
				+          cd tests/issues
			
 
				+          ./run.sh
			
 
				+
			
 
				+      - name: Odin check examples/all for Linux i386
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_i386
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+      - name: Odin check examples/all for Linux arm64
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_arm64
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+      - name: Odin check examples/all for FreeBSD amd64
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+      - name: Odin check examples/all for OpenBSD amd64
			
 
				+        run: ./odin check examples/all -vet -strict-style -disallow-do -target:openbsd_amd64
			
 
				+        if: matrix.os == 'ubuntu-latest'
			
 
				+
			
 
				+      - name: Run demo on WASI WASM32
			
 
				         run: |
			
 
				-          cd tests/internal
			
 
				-          make
			
 
				-        timeout-minutes: 10
			
 
				+          ./odin build examples/demo -target:wasi_wasm32 -vet -strict-style -disallow-do -out:demo.wasm
			
 
				+          wasmtime ./demo.wasm
			
 
				+        if: matrix.os == 'macos-14'
			
 
				+
			
 
				   build_windows:
			
 
				     name: Windows Build, Check, and Test
			
 
				     runs-on: windows-2022
			
 
				+    timeout-minutes: 15
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - name: build Odin
			
 
				         shell: cmd
			
 
				         run: |
			
@@ -150,72 +166,67 @@ jobs:
 
				           ./build.bat 1
			
 
				       - name: Odin version
			
 
				         run: ./odin version
			
 
				-        timeout-minutes: 1
			
 
				       - name: Odin report
			
 
				         run: ./odin report
			
 
				-        timeout-minutes: 1
			
 
				       - name: Odin check
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin check examples/demo -vet
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin run
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin run examples/demo
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin run -debug
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin run examples/demo -debug
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin check examples/all
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin check examples/all -strict-style
			
 
				-        timeout-minutes: 10
			
 
				       - name: Core library tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          cd tests\core
			
 
				-          call build.bat
			
 
				-        timeout-minutes: 10
			
 
				+          odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Optimized core library tests
			
 
				+        shell: cmd
			
 
				+        run: |
			
 
				+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				+          odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
			
 
				+      - name: Core library benchmarks
			
 
				+        shell: cmd
			
 
				+        run: |
			
 
				+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				+          odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
			
 
				       - name: Vendor library tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          cd tests\vendor
			
 
				-          call build.bat
			
 
				-        timeout-minutes: 10
			
 
				+          odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
			
 
				       - name: Odin internals tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				-          cd tests\internal
			
 
				-          call build.bat
			
 
				-        timeout-minutes: 10
			
 
				+          odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
			
 
				       - name: Odin documentation tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           cd tests\documentation
			
 
				-          rem call build.bat
			
 
				-        timeout-minutes: 10
			
 
				+          call build.bat
			
 
				       - name: core:math/big tests
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           cd tests\core\math\big
			
 
				           call build.bat
			
 
				-        timeout-minutes: 10
			
 
				       - name: Odin check examples/all for Windows 32bits
			
 
				         shell: cmd
			
 
				         run: |
			
 
				           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
			
 
				           odin check examples/all -strict-style -target:windows_i386
			
 
				-        timeout-minutes: 10
			
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -11,7 +11,7 @@ jobs:
 
				     if: github.repository == 'odin-lang/Odin'
			
 
				     runs-on: windows-2022
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - name: build Odin
			
 
				         shell: cmd
			
 
				         run: |
			
@@ -45,13 +45,13 @@ jobs:
 
				     if: github.repository == 'odin-lang/Odin'
			
 
				     runs-on: ubuntu-latest
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - name: (Linux) Download LLVM
			
 
				         run: |
			
 
				           wget https://apt.llvm.org/llvm.sh
			
 
				           chmod +x llvm.sh
			
 
				-          sudo ./llvm.sh 17
			
 
				-          echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
			
 
				+          sudo ./llvm.sh 18
			
 
				+          echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
			
 
				       - name: build odin
			
 
				         run: make nightly
			
 
				       - name: Odin run
			
@@ -77,13 +77,13 @@ jobs:
 
				   build_macos:
			
 
				     name: MacOS Build
			
 
				     if: github.repository == 'odin-lang/Odin'
			
 
				-    runs-on: macos-latest
			
 
				+    runs-on: macos-13
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - name: Download LLVM and setup PATH
			
 
				         run: |
			
 
				-          brew install llvm@17 dylibbundler
			
 
				-          echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				+          brew install llvm@18 dylibbundler
			
 
				+          echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH
			
 
				       - name: build odin
			
 
				         # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
			
 
				         # not link with libunwind bundled with LLVM but link with libunwind on the system.
			
@@ -113,11 +113,11 @@ jobs:
 
				     if: github.repository == 'odin-lang/Odin'
			
 
				     runs-on: macos-14 # ARM machine
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - name: Download LLVM and setup PATH
			
 
				         run: |
			
 
				-          brew install llvm@17 dylibbundler
			
 
				-          echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
			
 
				+          brew install llvm@18 dylibbundler
			
 
				+          echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH
			
 
				       - name: build odin
			
 
				         # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
			
 
				         # not link with libunwind bundled with LLVM but link with libunwind on the system.
			
@@ -146,16 +146,16 @@ jobs:
 
				     runs-on: [ubuntu-latest]
			
 
				     needs: [build_windows, build_macos, build_macos_arm, build_ubuntu]
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v1
			
 
				+      - uses: actions/checkout@v4
			
 
				       - uses: actions/setup-python@v2
			
 
				         with:
			
 
				           python-version: '3.8.x'
			
 
				 
			
 
				-      - name: Install B2 CLI
			
 
				+      - name: Install B2 SDK
			
 
				         shell: bash
			
 
				         run: |
			
 
				           python -m pip install --upgrade pip
			
 
				-          pip install --upgrade b2
			
 
				+          pip install --upgrade b2sdk
			
 
				 
			
 
				       - name: Display Python version
			
 
				         run: python -c "import sys; print(sys.version)"
			
@@ -188,24 +188,9 @@ jobs:
 
				           BUCKET: ${{ secrets.B2_BUCKET }}
			
 
				           DAYS_TO_KEEP: ${{ secrets.B2_DAYS_TO_KEEP }}
			
 
				         run: |
			
 
				-          echo Authorizing B2 account
			
 
				-          b2 authorize-account "$APPID" "$APPKEY"
			
 
				-
			
 
				-          echo Uploading artifcates to B2
			
 
				-          chmod +x ./ci/upload_create_nightly.sh
			
 
				-          ./ci/upload_create_nightly.sh "$BUCKET" windows-amd64 windows_artifacts/
			
 
				-          ./ci/upload_create_nightly.sh "$BUCKET" ubuntu-amd64 ubuntu_artifacts/dist.zip
			
 
				-          ./ci/upload_create_nightly.sh "$BUCKET" macos-amd64 macos_artifacts/dist.zip
			
 
				-          ./ci/upload_create_nightly.sh "$BUCKET" macos-arm64 macos_arm_artifacts/dist.zip
			
 
				-
			
 
				-          echo Deleting old artifacts in B2
			
 
				-          python3 ci/delete_old_binaries.py "$BUCKET" "$DAYS_TO_KEEP"
			
 
				-
			
 
				-          echo Creating nightly.json
			
 
				-          python3 ci/create_nightly_json.py "$BUCKET" > nightly.json
			
 
				-
			
 
				-          echo Uploading nightly.json
			
 
				-          b2 upload-file "$BUCKET" nightly.json nightly.json
			
 
				-
			
 
				-          echo Clear B2 account info
			
 
				-          b2 clear-account
			
 
				+          python3 ci/nightly.py artifact windows-amd64 windows_artifacts/
			
 
				+          python3 ci/nightly.py artifact ubuntu-amd64 ubuntu_artifacts/dist.zip
			
 
				+          python3 ci/nightly.py artifact macos-amd64 macos_artifacts/dist.zip
			
 
				+          python3 ci/nightly.py artifact macos-arm64 macos_arm_artifacts/dist.zip
			
 
				+          python3 ci/nightly.py prune
			
 
				+          python3 ci/nightly.py json
			
--- a/.gitignore
+++ b/.gitignore
@@ -24,34 +24,6 @@ bld/
 
				 ![Cc]ore/[Ll]og/
			
 
				 tests/documentation/verify/
			
 
				 tests/documentation/all.odin-doc
			
 
				-tests/internal/test_map
			
 
				-tests/internal/test_pow
			
 
				-tests/internal/test_rtti
			
 
				-tests/core/test_core_compress
			
 
				-tests/core/test_core_container
			
 
				-tests/core/test_core_filepath
			
 
				-tests/core/test_core_fmt
			
 
				-tests/core/test_core_i18n
			
 
				-tests/core/test_core_image
			
 
				-tests/core/test_core_libc
			
 
				-tests/core/test_core_match
			
 
				-tests/core/test_core_math
			
 
				-tests/core/test_core_net
			
 
				-tests/core/test_core_os_exit
			
 
				-tests/core/test_core_reflect
			
 
				-tests/core/test_core_strings
			
 
				-tests/core/test_crypto
			
 
				-tests/core/test_hash
			
 
				-tests/core/test_hxa
			
 
				-tests/core/test_json
			
 
				-tests/core/test_linalg_glsl_math
			
 
				-tests/core/test_noise
			
 
				-tests/core/test_varint
			
 
				-tests/core/test_xml
			
 
				-tests/core/test_core_slice
			
 
				-tests/core/test_core_thread
			
 
				-tests/core/test_core_runtime
			
 
				-tests/vendor/vendor_botan
			
 
				 # Visual Studio 2015 cache/options directory
			
 
				 .vs/
			
 
				 # Visual Studio Code options directory
			
@@ -59,6 +31,7 @@ tests/vendor/vendor_botan
 
				 # Uncomment if you have tasks that create the project's static files in wwwroot
			
 
				 #wwwroot/
			
 
				 demo
			
 
				+benchmark
			
 
				 
			
 
				 # MSTest test Results
			
 
				 [Tt]est[Rr]esult*/
			
@@ -299,7 +272,7 @@ bin/
 
				 # - Linux/MacOS
			
 
				 odin
			
 
				 !odin/
			
 
				-odin.dSYM
			
 
				+**/*.dSYM
			
 
				 *.bin
			
 
				 demo.bin
			
 
				 libLLVM*.so*
			
@@ -318,4 +291,6 @@ build.sh
 
				 !core/debug/
			
 
				 
			
 
				 # RAD debugger project file
			
 
				-*.raddbg
			
 
				+*.raddbg
			
 
				+
			
 
				+misc/featuregen/featuregen
			
--- a/LLVM-C.dll
+++ b/LLVM-C.dll
--- a/base/builtin/builtin.odin
+++ b/base/builtin/builtin.odin
@@ -126,3 +126,5 @@ clamp :: proc(value, minimum, maximum: T) -> T ---
 
				 
			
 
				 soa_zip :: proc(slices: ...) -> #soa[]Struct ---
			
 
				 soa_unzip :: proc(value: $S/#soa[]$E) -> (slices: ...) ---
			
 
				+
			
 
				+unreachable :: proc() -> ! ---
			
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -38,9 +38,12 @@ count_leading_zeros  :: proc(x: $T) -> T where type_is_integer(T) || type_is_sim
 
				 reverse_bits         :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) ---
			
 
				 byte_swap            :: proc(x: $T) -> T where type_is_integer(T) || type_is_float(T) ---
			
 
				 
			
 
				-overflow_add :: proc(lhs, rhs: $T) -> (T, bool) ---
			
 
				-overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) ---
			
 
				-overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) ---
			
 
				+overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
			
 
				+overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
			
 
				+overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
			
 
				+
			
 
				+add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
			
 
				+sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
			
 
				 
			
 
				 sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) ---
			
 
				 
			
@@ -73,6 +76,8 @@ expect :: proc(val, expected_val: T) -> T ---
 
				 
			
 
				 // Linux and Darwin Only
			
 
				 syscall :: proc(id: uintptr, args: ..uintptr) -> uintptr ---
			
 
				+// FreeBSD, NetBSD, et cetera
			
 
				+syscall_bsd :: proc(id: uintptr, args: ..uintptr) -> (uintptr, bool) ---
			
 
				 
			
 
				 
			
 
				 // Atomics
			
@@ -167,17 +172,23 @@ type_is_matrix           :: proc($T: typeid) -> bool ---
 
				 
			
 
				 type_has_nil :: proc($T: typeid) -> bool ---
			
 
				 
			
 
				+type_is_matrix_row_major    :: proc($T: typeid) -> bool where type_is_matrix(T) ---
			
 
				+type_is_matrix_column_major :: proc($T: typeid) -> bool where type_is_matrix(T) ---
			
 
				+
			
 
				 type_is_specialization_of :: proc($T, $S: typeid) -> bool ---
			
 
				 
			
 
				-type_is_variant_of :: proc($U, $V: typeid) -> bool where type_is_union(U) ---
			
 
				-type_union_tag_type :: proc($T: typeid) -> typeid where type_is_union(T) ---
			
 
				-type_union_tag_offset :: proc($T: typeid) -> uintptr where type_is_union(T) ---
			
 
				-type_union_base_tag_value :: proc($T: typeid) -> int where type_is_union(U) ---
			
 
				-type_union_variant_count :: proc($T: typeid) -> int where type_is_union(T) ---
			
 
				-type_variant_type_of :: proc($T: typeid, $index: int) -> typeid where type_is_union(T) ---
			
 
				-type_variant_index_of :: proc($U, $V: typeid) -> int where type_is_union(U) ---
			
 
				+type_is_variant_of        :: proc($U, $V: typeid)          -> bool    where type_is_union(U) ---
			
 
				+type_union_tag_type       :: proc($T: typeid)              -> typeid  where type_is_union(T) ---
			
 
				+type_union_tag_offset     :: proc($T: typeid)              -> uintptr where type_is_union(T) ---
			
 
				+type_union_base_tag_value :: proc($T: typeid)              -> int     where type_is_union(U) ---
			
 
				+type_union_variant_count  :: proc($T: typeid)              -> int     where type_is_union(T) ---
			
 
				+type_variant_type_of      :: proc($T: typeid, $index: int) -> typeid  where type_is_union(T) ---
			
 
				+type_variant_index_of     :: proc($U, $V: typeid)          -> int     where type_is_union(U) ---
			
 
				+
			
 
				+type_bit_set_elem_type       :: proc($T: typeid) -> typeid where type_is_bit_set(T) ---
			
 
				+type_bit_set_underlying_type :: proc($T: typeid) -> typeid where type_is_bit_set(T) ---
			
 
				 
			
 
				-type_has_field :: proc($T: typeid, $name: string) -> bool ---
			
 
				+type_has_field  :: proc($T: typeid, $name: string) -> bool ---
			
 
				 type_field_type :: proc($T: typeid, $name: string) -> typeid ---
			
 
				 
			
 
				 type_proc_parameter_count :: proc($T: typeid) -> int where type_is_proc(T) ---
			
@@ -186,7 +197,8 @@ type_proc_return_count    :: proc($T: typeid) -> int where type_is_proc(T) ---
 
				 type_proc_parameter_type  :: proc($T: typeid, index: int) -> typeid where type_is_proc(T) ---
			
 
				 type_proc_return_type     :: proc($T: typeid, index: int) -> typeid where type_is_proc(T) ---
			
 
				 
			
 
				-type_struct_field_count :: proc($T: typeid) -> int where type_is_struct(T) ---
			
 
				+type_struct_field_count          :: proc($T: typeid) -> int  where type_is_struct(T) ---
			
 
				+type_struct_has_implicit_padding :: proc($T: typeid) -> bool where type_is_struct(T) ---
			
 
				 
			
 
				 type_polymorphic_record_parameter_count :: proc($T: typeid) -> typeid ---
			
 
				 type_polymorphic_record_parameter_value :: proc($T: typeid, index: int) -> $V ---
			
@@ -282,6 +294,16 @@ simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
 
				 simd_rotate_left  :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
			
 
				 simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
			
 
				 
			
 
				+// Checks if the current target supports the given target features.
			
 
				+//
			
 
				+// Takes a constant comma-seperated string (eg: "sha512,sse4.1"), or a procedure type which has either
			
 
				+// `@(require_target_feature)` or `@(enable_target_feature)` as its input and returns a boolean indicating
			
 
				+// if all listed features are supported.
			
 
				+has_target_feature :: proc($test: $T) -> bool where type_is_string(T) || type_is_proc(T) ---
			
 
				+
			
 
				+
			
 
				+// Returns the value of the procedure where `x` must be a call expression
			
 
				+procedure_of :: proc(x: $T) -> T where type_is_proc(T) ---
			
 
				 
			
 
				 // WASM targets only
			
 
				 wasm_memory_grow :: proc(index, delta: uintptr) -> int ---
			
@@ -293,9 +315,9 @@ wasm_memory_size :: proc(index: uintptr)        -> int ---
 
				 // 0 - indicates that the thread blocked and then was woken up
			
 
				 // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block
			
 
				 // 2 - the thread blocked, but the timeout
			
 
				-@(enable_target_feature="atomics")
			
 
				+@(require_target_feature="atomics")
			
 
				 wasm_memory_atomic_wait32   :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 ---
			
 
				-@(enable_target_feature="atomics")
			
 
				+@(require_target_feature="atomics")
			
 
				 wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
			
 
				 
			
 
				 // x86 Targets (i386, amd64)
			
--- a/base/runtime/core.odin
+++ b/base/runtime/core.odin
@@ -66,7 +66,7 @@ Type_Info_Named :: struct {
 
				 	name: string,
			
 
				 	base: ^Type_Info,
			
 
				 	pkg:  string,
			
 
				-	loc:  Source_Code_Location,
			
 
				+	loc:  ^Source_Code_Location,
			
 
				 }
			
 
				 Type_Info_Integer    :: struct {signed: bool, endianness: Platform_Endianness}
			
 
				 Type_Info_Rune       :: struct {}
			
@@ -112,23 +112,32 @@ Type_Info_Parameters :: struct { // Only used for procedures parameters and resu
 
				 }
			
 
				 Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually
			
 
				 
			
 
				+Type_Info_Struct_Flags :: distinct bit_set[Type_Info_Struct_Flag; u8]
			
 
				+Type_Info_Struct_Flag :: enum u8 {
			
 
				+	packed    = 0,
			
 
				+	raw_union = 1,
			
 
				+	no_copy   = 2,
			
 
				+	align     = 3,
			
 
				+}
			
 
				+
			
 
				 Type_Info_Struct :: struct {
			
 
				-	types:        []^Type_Info,
			
 
				-	names:        []string,
			
 
				-	offsets:      []uintptr,
			
 
				-	usings:       []bool,
			
 
				-	tags:         []string,
			
 
				-	is_packed:    bool,
			
 
				-	is_raw_union: bool,
			
 
				-	is_no_copy:   bool,
			
 
				-	custom_align: bool,
			
 
				+	// Slice these with `field_count`
			
 
				+	types:   [^]^Type_Info `fmt:"v,field_count"`,
			
 
				+	names:   [^]string     `fmt:"v,field_count"`,
			
 
				+	offsets: [^]uintptr    `fmt:"v,field_count"`,
			
 
				+	usings:  [^]bool       `fmt:"v,field_count"`,
			
 
				+	tags:    [^]string     `fmt:"v,field_count"`,
			
 
				 
			
 
				-	equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
			
 
				+	field_count: i32,
			
 
				+
			
 
				+	flags: Type_Info_Struct_Flags,
			
 
				 
			
 
				 	// These are only set iff this structure is an SOA structure
			
 
				 	soa_kind:      Type_Info_Struct_Soa_Kind,
			
 
				+	soa_len:       i32,
			
 
				 	soa_base_type: ^Type_Info,
			
 
				-	soa_len:       int,
			
 
				+
			
 
				+	equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
			
 
				 }
			
 
				 Type_Info_Union :: struct {
			
 
				 	variants:     []^Type_Info,
			
@@ -142,9 +151,9 @@ Type_Info_Union :: struct {
 
				 	shared_nil:   bool,
			
 
				 }
			
 
				 Type_Info_Enum :: struct {
			
 
				-	base:      ^Type_Info,
			
 
				-	names:     []string,
			
 
				-	values:    []Type_Info_Enum_Value,
			
 
				+	base:   ^Type_Info,
			
 
				+	names:  []string,
			
 
				+	values: []Type_Info_Enum_Value,
			
 
				 }
			
 
				 Type_Info_Map :: struct {
			
 
				 	key:      ^Type_Info,
			
@@ -187,11 +196,12 @@ Type_Info_Soa_Pointer :: struct {
 
				 }
			
 
				 Type_Info_Bit_Field :: struct {
			
 
				 	backing_type: ^Type_Info,
			
 
				-	names:        []string,
			
 
				-	types:        []^Type_Info,
			
 
				-	bit_sizes:    []uintptr,
			
 
				-	bit_offsets:  []uintptr,
			
 
				-	tags:         []string,
			
 
				+	names:        [^]string     `fmt:"v,field_count"`,
			
 
				+	types:        [^]^Type_Info `fmt:"v,field_count"`,
			
 
				+	bit_sizes:    [^]uintptr    `fmt:"v,field_count"`,
			
 
				+	bit_offsets:  [^]uintptr    `fmt:"v,field_count"`,
			
 
				+	tags:         [^]string     `fmt:"v,field_count"`,
			
 
				+	field_count:  int,
			
 
				 }
			
 
				 
			
 
				 Type_Info_Flag :: enum u8 {
			
@@ -273,14 +283,14 @@ Typeid_Kind :: enum u8 {
 
				 }
			
 
				 #assert(len(Typeid_Kind) < 32)
			
 
				 
			
 
				-// Typeid_Bit_Field :: bit_field #align(align_of(uintptr)) {
			
 
				-// 	index:    8*size_of(uintptr) - 8,
			
 
				-// 	kind:     5, // Typeid_Kind
			
 
				-// 	named:    1,
			
 
				-// 	special:  1, // signed, cstring, etc
			
 
				-// 	reserved: 1,
			
 
				-// }
			
 
				-// #assert(size_of(Typeid_Bit_Field) == size_of(uintptr));
			
 
				+Typeid_Bit_Field :: bit_field uintptr {
			
 
				+	index:    uintptr     | 8*size_of(uintptr) - 8,
			
 
				+	kind:     Typeid_Kind | 5, // Typeid_Kind
			
 
				+	named:    bool        | 1,
			
 
				+	special:  bool        | 1, // signed, cstring, etc
			
 
				+	reserved: bool        | 1,
			
 
				+}
			
 
				+#assert(size_of(Typeid_Bit_Field) == size_of(uintptr))
			
 
				 
			
 
				 // NOTE(bill): only the ones that are needed (not all types)
			
 
				 // This will be set by the compiler
			
@@ -299,6 +309,8 @@ when ODIN_OS == .Windows {
 
				 		Thread_Detach  = 3,
			
 
				 	}
			
 
				 	dll_forward_reason: DLL_Forward_Reason
			
 
				+
			
 
				+	dll_instance: rawptr
			
 
				 }
			
 
				 
			
 
				 // IMPORTANT NOTE(bill): Must be in this order (as the compiler relies upon it)
			
@@ -397,11 +409,34 @@ Logger :: struct {
 
				 	options:      Logger_Options,
			
 
				 }
			
 
				 
			
 
				+
			
 
				+Random_Generator_Mode :: enum {
			
 
				+	Read,
			
 
				+	Reset,
			
 
				+	Query_Info,
			
 
				+}
			
 
				+
			
 
				+Random_Generator_Query_Info_Flag :: enum u32 {
			
 
				+	Cryptographic,
			
 
				+	Uniform,
			
 
				+	External_Entropy,
			
 
				+	Resettable,
			
 
				+}
			
 
				+Random_Generator_Query_Info :: distinct bit_set[Random_Generator_Query_Info_Flag; u32]
			
 
				+
			
 
				+Random_Generator_Proc :: #type proc(data: rawptr, mode: Random_Generator_Mode, p: []byte)
			
 
				+
			
 
				+Random_Generator :: struct {
			
 
				+	procedure: Random_Generator_Proc,
			
 
				+	data:      rawptr,
			
 
				+}
			
 
				+
			
 
				 Context :: struct {
			
 
				 	allocator:              Allocator,
			
 
				 	temp_allocator:         Allocator,
			
 
				 	assertion_failure_proc: Assertion_Failure_Proc,
			
 
				 	logger:                 Logger,
			
 
				+	random_generator:       Random_Generator,
			
 
				 
			
 
				 	user_ptr:   rawptr,
			
 
				 	user_index: int,
			
@@ -470,6 +505,15 @@ Raw_Soa_Pointer :: struct {
 
				 	index: int,
			
 
				 }
			
 
				 
			
 
				+Raw_Complex32     :: struct {real, imag: f16}
			
 
				+Raw_Complex64     :: struct {real, imag: f32}
			
 
				+Raw_Complex128    :: struct {real, imag: f64}
			
 
				+Raw_Quaternion64  :: struct {imag, jmag, kmag: f16, real: f16}
			
 
				+Raw_Quaternion128 :: struct {imag, jmag, kmag: f32, real: f32}
			
 
				+Raw_Quaternion256 :: struct {imag, jmag, kmag: f64, real: f64}
			
 
				+Raw_Quaternion64_Vector_Scalar  :: struct {vector: [3]f16, scalar: f16}
			
 
				+Raw_Quaternion128_Vector_Scalar :: struct {vector: [3]f32, scalar: f32}
			
 
				+Raw_Quaternion256_Vector_Scalar :: struct {vector: [3]f64, scalar: f64}
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -481,7 +525,9 @@ Raw_Soa_Pointer :: struct {
 
				 		Linux,
			
 
				 		Essence,
			
 
				 		FreeBSD,
			
 
				+		Haiku,
			
 
				 		OpenBSD,
			
 
				+		NetBSD,
			
 
				 		WASI,
			
 
				 		JS,
			
 
				 		Freestanding,
			
@@ -508,6 +554,7 @@ Odin_Arch_Type :: type_of(ODIN_ARCH)
 
				 	Odin_Build_Mode_Type :: enum int {
			
 
				 		Executable,
			
 
				 		Dynamic,
			
 
				+		Static,
			
 
				 		Object,
			
 
				 		Assembly,
			
 
				 		LLVM_IR,
			
@@ -548,6 +595,19 @@ Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)
 
				 */
			
 
				 Odin_Sanitizer_Flags :: type_of(ODIN_SANITIZER_FLAGS)
			
 
				 
			
 
				+/*
			
 
				+	// Defined internally by the compiler
			
 
				+	Odin_Optimization_Mode :: enum int {
			
 
				+		None       = -1,
			
 
				+		Minimal    =  0,
			
 
				+		Size       =  1,
			
 
				+		Speed      =  2,
			
 
				+		Aggressive =  3,
			
 
				+	}
			
 
				+
			
 
				+	ODIN_OPTIMIZATION_MODE // is a constant
			
 
				+*/
			
 
				+Odin_Optimization_Mode :: type_of(ODIN_OPTIMIZATION_MODE)
			
 
				 
			
 
				 /////////////////////////////
			
 
				 // Init Startup Procedures //
			
@@ -683,13 +743,16 @@ __init_context :: proc "contextless" (c: ^Context) {
 
				 
			
 
				 	c.logger.procedure = default_logger_proc
			
 
				 	c.logger.data = nil
			
 
				+
			
 
				+	c.random_generator.procedure = default_random_generator_proc
			
 
				+	c.random_generator.data = nil
			
 
				 }
			
 
				 
			
 
				 default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
			
 
				 	when ODIN_OS == .Freestanding {
			
 
				 		// Do nothing
			
 
				 	} else {
			
 
				-		when !ODIN_DISABLE_ASSERT {
			
 
				+		when ODIN_OS != .Orca && !ODIN_DISABLE_ASSERT {
			
 
				 			print_caller_location(loc)
			
 
				 			print_string(" ")
			
 
				 		}
			
@@ -698,7 +761,18 @@ default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code
 
				 			print_string(": ")
			
 
				 			print_string(message)
			
 
				 		}
			
 
				-		print_byte('\n')
			
 
				+
			
 
				+		when ODIN_OS == .Orca {
			
 
				+			assert_fail(
			
 
				+				cstring(raw_data(loc.file_path)),
			
 
				+				cstring(raw_data(loc.procedure)),
			
 
				+				loc.line,
			
 
				+				"",
			
 
				+				cstring(raw_data(orca_stderr_buffer[:orca_stderr_buffer_idx])),
			
 
				+			)
			
 
				+		} else {
			
 
				+			print_byte('\n')
			
 
				+		}
			
 
				 	}
			
 
				 	trap()
			
 
				 }
			
--- a/base/runtime/core_builtin.odin
+++ b/base/runtime/core_builtin.odin
@@ -65,7 +65,7 @@ copy :: proc{copy_slice, copy_from_string}
 
				 // with the old value, and reducing the length of the dynamic array by 1.
			
 
				 //
			
 
				 // Note: This is an O(1) operation.
			
 
				-// Note: If you the elements to remain in their order, use `ordered_remove`.
			
 
				+// Note: If you want the elements to remain in their order, use `ordered_remove`.
			
 
				 // Note: If the index is out of bounds, this procedure will panic.
			
 
				 @builtin
			
 
				 unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
			
@@ -79,7 +79,7 @@ unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_loca
 
				 // `ordered_remove` removed the element at the specified `index` whilst keeping the order of the other elements.
			
 
				 //
			
 
				 // Note: This is an O(N) operation.
			
 
				-// Note: If you the elements do not have to remain in their order, prefer `unordered_remove`.
			
 
				+// Note: If the elements do not have to remain in their order, prefer `unordered_remove`.
			
 
				 // Note: If the index is out of bounds, this procedure will panic.
			
 
				 @builtin
			
 
				 ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
			
@@ -163,21 +163,43 @@ pop_front_safe :: proc "contextless" (array: ^$T/[dynamic]$E) -> (res: E, ok: bo
 
				 
			
 
				 // `clear` will set the length of a passed dynamic array or map to `0`
			
 
				 @builtin
			
 
				-clear :: proc{clear_dynamic_array, clear_map}
			
 
				+clear :: proc{
			
 
				+	clear_dynamic_array,
			
 
				+	clear_map,
			
 
				+
			
 
				+	clear_soa_dynamic_array,
			
 
				+}
			
 
				 
			
 
				 // `reserve` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
			
 
				 @builtin
			
 
				-reserve :: proc{reserve_dynamic_array, reserve_map}
			
 
				+reserve :: proc{
			
 
				+	reserve_dynamic_array,
			
 
				+	reserve_map,
			
 
				+
			
 
				+	reserve_soa,
			
 
				+}
			
 
				 
			
 
				 @builtin
			
 
				-non_zero_reserve :: proc{non_zero_reserve_dynamic_array}
			
 
				+non_zero_reserve :: proc{
			
 
				+	non_zero_reserve_dynamic_array,
			
 
				+
			
 
				+	non_zero_reserve_soa,
			
 
				+}
			
 
				 
			
 
				 // `resize` will try to resize memory of a passed dynamic array to the requested element count (setting the `len`, and possibly `cap`).
			
 
				 @builtin
			
 
				-resize :: proc{resize_dynamic_array}
			
 
				+resize :: proc{
			
 
				+	resize_dynamic_array,
			
 
				+
			
 
				+	resize_soa,
			
 
				+}
			
 
				 
			
 
				 @builtin
			
 
				-non_zero_resize :: proc{non_zero_resize_dynamic_array}
			
 
				+non_zero_resize :: proc{
			
 
				+	non_zero_resize_dynamic_array,
			
 
				+
			
 
				+	non_zero_resize_soa,
			
 
				+}
			
 
				 
			
 
				 // Shrinks the capacity of a dynamic array or map down to the current length, or the given capacity.
			
 
				 @builtin
			
@@ -268,7 +290,7 @@ new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_locat
 
				 	return
			
 
				 }
			
 
				 
			
 
				-DEFAULT_RESERVE_CAPACITY :: 16
			
 
				+DEFAULT_DYNAMIC_ARRAY_CAPACITY :: 8
			
 
				 
			
 
				 @(require_results)
			
 
				 make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
			
@@ -295,7 +317,7 @@ make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allo
 
				 // Note: Prefer using the procedure group `make`.
			
 
				 @(builtin, require_results)
			
 
				 make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
			
 
				-	return make_dynamic_array_len_cap(T, 0, DEFAULT_RESERVE_CAPACITY, allocator, loc)
			
 
				+	return make_dynamic_array_len_cap(T, 0, 0, allocator, loc)
			
 
				 }
			
 
				 // `make_dynamic_array_len` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
			
 
				 // Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
			
@@ -311,16 +333,23 @@ make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, alloca
 
				 // Note: Prefer using the procedure group `make`.
			
 
				 @(builtin, require_results)
			
 
				 make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
			
 
				+	err = _make_dynamic_array_len_cap((^Raw_Dynamic_Array)(&array), size_of(E), align_of(E), len, cap, allocator, loc)
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+_make_dynamic_array_len_cap :: proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
			
 
				 	make_dynamic_array_error_loc(loc, len, cap)
			
 
				 	array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
			
 
				-	data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
			
 
				-	s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
			
 
				-	if data == nil && size_of(E) != 0 {
			
 
				-		s.len, s.cap = 0, 0
			
 
				-	}
			
 
				-	array = transmute(T)s
			
 
				+	data := mem_alloc_bytes(size_of_elem*cap, align_of_elem, allocator, loc) or_return
			
 
				+	use_zero := data == nil && size_of_elem != 0
			
 
				+	array.data = raw_data(data)
			
 
				+	array.len = 0 if use_zero else len
			
 
				+	array.cap = 0 if use_zero else cap
			
 
				+	array.allocator = allocator
			
 
				 	return
			
 
				 }
			
 
				+
			
 
				 // `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
			
 
				 // Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
			
 
				 //
			
@@ -364,6 +393,11 @@ make :: proc{
 
				 	make_dynamic_array_len_cap,
			
 
				 	make_map,
			
 
				 	make_multi_pointer,
			
 
				+
			
 
				+	make_soa_slice,
			
 
				+	make_soa_dynamic_array,
			
 
				+	make_soa_dynamic_array_len,
			
 
				+	make_soa_dynamic_array_len_cap,
			
 
				 }
			
 
				 
			
 
				 
			
@@ -383,7 +417,7 @@ clear_map :: proc "contextless" (m: ^$T/map[$K]$V) {
 
				 //
			
 
				 // Note: Prefer the procedure group `reserve`
			
 
				 @builtin
			
 
				-reserve_map :: proc(m: ^$T/map[$K]$V, capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				+reserve_map :: proc(m: ^$T/map[$K]$V, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				 	return __dynamic_map_reserve((^Raw_Map)(m), map_info(T), uint(capacity), loc) if m != nil else nil
			
 
				 }
			
 
				 
			
@@ -413,106 +447,103 @@ delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value:
 
				 	return
			
 
				 }
			
 
				 
			
 
				-_append_elem :: #force_inline proc(array: ^$T/[dynamic]$E, arg: E, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+_append_elem :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, arg_ptr: rawptr, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				 	if array == nil {
			
 
				-		return 0, nil
			
 
				+		return
			
 
				 	}
			
 
				-	when size_of(E) == 0 {
			
 
				-		array := (^Raw_Dynamic_Array)(array)
			
 
				+
			
 
				+	if array.cap < array.len+1 {
			
 
				+		// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
			
 
				+		cap := 2 * array.cap + DEFAULT_DYNAMIC_ARRAY_CAPACITY
			
 
				+
			
 
				+		// do not 'or_return' here as it could be a partial success
			
 
				+		err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
			
 
				+	}
			
 
				+	if array.cap-array.len > 0 {
			
 
				+		data := ([^]byte)(array.data)
			
 
				+		assert(data != nil, loc=loc)
			
 
				+		data = data[array.len*size_of_elem:]
			
 
				+		intrinsics.mem_copy_non_overlapping(data, arg_ptr, size_of_elem)
			
 
				 		array.len += 1
			
 
				-		return 1, nil
			
 
				-	} else {
			
 
				-		if cap(array) < len(array)+1 {
			
 
				-			cap := 2 * cap(array) + max(8, 1)
			
 
				-
			
 
				-			// do not 'or_return' here as it could be a partial success
			
 
				-			if should_zero {
			
 
				-				err = reserve(array, cap, loc)
			
 
				-			} else {
			
 
				-				err = non_zero_reserve(array, cap, loc) 
			
 
				-			}
			
 
				-		}
			
 
				-		if cap(array)-len(array) > 0 {
			
 
				-			a := (^Raw_Dynamic_Array)(array)
			
 
				-			when size_of(E) != 0 {
			
 
				-				data := ([^]E)(a.data)
			
 
				-				assert(data != nil, loc=loc)
			
 
				-				data[a.len] = arg
			
 
				-			}
			
 
				-			a.len += 1
			
 
				-			return 1, err
			
 
				-		}
			
 
				-		return 0, err
			
 
				+		n = 1
			
 
				 	}
			
 
				+	return
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				 append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				-	return _append_elem(array, arg, true, loc=loc)
			
 
				+	when size_of(E) == 0 {
			
 
				+		(^Raw_Dynamic_Array)(array).len += 1
			
 
				+		return 1, nil
			
 
				+	} else {
			
 
				+		arg := arg
			
 
				+		return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, true, loc=loc)
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				 non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				-	return _append_elem(array, arg, false, loc=loc)
			
 
				+	when size_of(E) == 0 {
			
 
				+		(^Raw_Dynamic_Array)(array).len += 1
			
 
				+		return 1, nil
			
 
				+	} else {
			
 
				+		arg := arg
			
 
				+		return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, false, loc=loc)
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-_append_elems :: #force_inline proc(array: ^$T/[dynamic]$E, should_zero: bool, loc := #caller_location, args: ..E) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+_append_elems :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, should_zero: bool, loc := #caller_location, args: rawptr, arg_len: int) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				 	if array == nil {
			
 
				 		return 0, nil
			
 
				 	}
			
 
				 
			
 
				-	arg_len := len(args)
			
 
				 	if arg_len <= 0 {
			
 
				 		return 0, nil
			
 
				 	}
			
 
				 
			
 
				-	when size_of(E) == 0 {
			
 
				-		array := (^Raw_Dynamic_Array)(array)
			
 
				+	if array.cap < array.len+arg_len {
			
 
				+		cap := 2 * array.cap + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
			
 
				+
			
 
				+		// do not 'or_return' here as it could be a partial success
			
 
				+		err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
			
 
				+	}
			
 
				+	arg_len := arg_len
			
 
				+	arg_len = min(array.cap-array.len, arg_len)
			
 
				+	if arg_len > 0 {
			
 
				+		data := ([^]byte)(array.data)
			
 
				+		assert(data != nil, loc=loc)
			
 
				+		data = data[array.len*size_of_elem:]
			
 
				+		intrinsics.mem_copy(data, args, size_of_elem * arg_len) // must be mem_copy (overlapping)
			
 
				 		array.len += arg_len
			
 
				-		return arg_len, nil
			
 
				-	} else {
			
 
				-		if cap(array) < len(array)+arg_len {
			
 
				-			cap := 2 * cap(array) + max(8, arg_len)
			
 
				-
			
 
				-			// do not 'or_return' here as it could be a partial success
			
 
				-			if should_zero {
			
 
				-				err = reserve(array, cap, loc)
			
 
				-			} else {
			
 
				-				err = non_zero_reserve(array, cap, loc)
			
 
				-			}
			
 
				-		}
			
 
				-		arg_len = min(cap(array)-len(array), arg_len)
			
 
				-		if arg_len > 0 {
			
 
				-			a := (^Raw_Dynamic_Array)(array)
			
 
				-			when size_of(E) != 0 {
			
 
				-				data := ([^]E)(a.data)
			
 
				-				assert(data != nil, loc=loc)
			
 
				-				intrinsics.mem_copy(&data[a.len], raw_data(args), size_of(E) * arg_len)
			
 
				-			}
			
 
				-			a.len += arg_len
			
 
				-		}
			
 
				-		return arg_len, err
			
 
				 	}
			
 
				+	return arg_len, err
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				 append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				-	return _append_elems(array, true, loc, ..args)
			
 
				+	when size_of(E) == 0 {
			
 
				+		a := (^Raw_Dynamic_Array)(array)
			
 
				+		a.len += len(args)
			
 
				+		return len(args), nil
			
 
				+	} else {
			
 
				+		return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), true, loc, raw_data(args), len(args))
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				 non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				-	return _append_elems(array, false, loc, ..args)
			
 
				+	when size_of(E) == 0 {
			
 
				+		a := (^Raw_Dynamic_Array)(array)
			
 
				+		a.len += len(args)
			
 
				+		return len(args), nil
			
 
				+	} else {
			
 
				+		return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), false, loc, raw_data(args), len(args))
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 // The append_string built-in procedure appends a string to the end of a [dynamic]u8 like type
			
 
				 _append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				-	args := transmute([]E)arg
			
 
				-	if should_zero { 
			
 
				-		return append_elems(array, ..args, loc=loc)
			
 
				-	} else {
			
 
				-		return non_zero_append_elems(array, ..args, loc=loc)
			
 
				-	}
			
 
				+	return _append_elems((^Raw_Dynamic_Array)(array), 1, 1, should_zero, loc, raw_data(arg), len(arg))
			
 
				 }
			
 
				 
			
 
				 @builtin
			
@@ -540,8 +571,23 @@ append_string :: proc(array: ^$T/[dynamic]$E/u8, args: ..string, loc := #caller_
 
				 }
			
 
				 
			
 
				 // The append built-in procedure appends elements to the end of a dynamic array
			
 
				-@builtin append :: proc{append_elem, append_elems, append_elem_string}
			
 
				-@builtin non_zero_append :: proc{non_zero_append_elem, non_zero_append_elems, non_zero_append_elem_string}
			
 
				+@builtin append :: proc{
			
 
				+	append_elem,
			
 
				+	append_elems,
			
 
				+	append_elem_string,
			
 
				+
			
 
				+	append_soa_elem,
			
 
				+	append_soa_elems,
			
 
				+}
			
 
				+
			
 
				+@builtin non_zero_append :: proc{
			
 
				+	non_zero_append_elem,
			
 
				+	non_zero_append_elems,
			
 
				+	non_zero_append_elem_string,
			
 
				+
			
 
				+	non_zero_append_soa_elem,
			
 
				+	non_zero_append_soa_elems,
			
 
				+}
			
 
				 
			
 
				 
			
 
				 @builtin
			
@@ -636,7 +682,7 @@ assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
 
				 
			
 
				 
			
 
				 @builtin
			
 
				-assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
			
 
				+assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
			
 
				 	new_size := index + len(args)
			
 
				 	if len(args) == 0 {
			
 
				 		ok = true
			
@@ -686,11 +732,10 @@ clear_dynamic_array :: proc "contextless" (array: ^$T/[dynamic]$E) {
 
				 // `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
			
 
				 //
			
 
				 // Note: Prefer the procedure group `reserve`.
			
 
				-_reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
			
 
				-	if array == nil {
			
 
				+_reserve_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
			
 
				+	if a == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	a := (^Raw_Dynamic_Array)(array)
			
 
				 
			
 
				 	if capacity <= a.cap {
			
 
				 		return nil
			
@@ -701,15 +746,15 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
 
				 	}
			
 
				 	assert(a.allocator.procedure != nil)
			
 
				 
			
 
				-	old_size  := a.cap * size_of(E)
			
 
				-	new_size  := capacity * size_of(E)
			
 
				+	old_size  := a.cap * size_of_elem
			
 
				+	new_size  := capacity * size_of_elem
			
 
				 	allocator := a.allocator
			
 
				 
			
 
				 	new_data: []byte
			
 
				 	if should_zero {
			
 
				-		new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
			
 
				+		new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
			
 
				 	} else {
			
 
				-		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
			
 
				+		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
			
 
				 	}
			
 
				 	if new_data == nil && new_size > 0 {
			
 
				 		return .Out_Of_Memory
			
@@ -721,27 +766,24 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				-reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				-	return _reserve_dynamic_array(array, capacity, true, loc)
			
 
				+reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, true, loc)
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				-non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				-	return _reserve_dynamic_array(array, capacity, false, loc)
			
 
				+non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, false, loc)
			
 
				 }
			
 
				 
			
 
				-// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
			
 
				-//
			
 
				-// Note: Prefer the procedure group `resize`
			
 
				-_resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
			
 
				-	if array == nil {
			
 
				+
			
 
				+_resize_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
			
 
				+	if a == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	a := (^Raw_Dynamic_Array)(array)
			
 
				 
			
 
				 	if length <= a.cap {
			
 
				 		if should_zero && a.len < length {
			
 
				-			intrinsics.mem_zero(([^]E)(a.data)[a.len:], (length-a.len)*size_of(E))
			
 
				+			intrinsics.mem_zero(([^]byte)(a.data)[a.len*size_of_elem:], (length-a.len)*size_of_elem)
			
 
				 		}
			
 
				 		a.len = max(length, 0)
			
 
				 		return nil
			
@@ -752,15 +794,15 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
 
				 	}
			
 
				 	assert(a.allocator.procedure != nil)
			
 
				 
			
 
				-	old_size  := a.cap * size_of(E)
			
 
				-	new_size  := length * size_of(E)
			
 
				+	old_size  := a.cap  * size_of_elem
			
 
				+	new_size  := length * size_of_elem
			
 
				 	allocator := a.allocator
			
 
				 
			
 
				 	new_data : []byte
			
 
				 	if should_zero {
			
 
				-		new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
			
 
				+		new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
			
 
				 	} else {
			
 
				-		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
			
 
				+		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
			
 
				 	}
			
 
				 	if new_data == nil && new_size > 0 {
			
 
				 		return .Out_Of_Memory
			
@@ -772,14 +814,17 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
			
 
				+//
			
 
				+// Note: Prefer the procedure group `resize`
			
 
				 @builtin
			
 
				-resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
			
 
				-	return _resize_dynamic_array(array, length, true, loc=loc)
			
 
				+resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, true, loc=loc)
			
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				-non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
			
 
				-	return _resize_dynamic_array(array, length, false, loc=loc)
			
 
				+non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, false, loc=loc)
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -794,10 +839,13 @@ non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc :
 
				 	Note: Prefer the procedure group `shrink`
			
 
				 */
			
 
				 shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
			
 
				-	if array == nil {
			
 
				+	return _shrink_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), new_cap, loc)
			
 
				+}
			
 
				+
			
 
				+_shrink_dynamic_array :: proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
			
 
				+	if a == nil {
			
 
				 		return
			
 
				 	}
			
 
				-	a := (^Raw_Dynamic_Array)(array)
			
 
				 
			
 
				 	new_cap := new_cap if new_cap >= 0 else a.len
			
 
				 
			
@@ -810,10 +858,10 @@ shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #call
 
				 	}
			
 
				 	assert(a.allocator.procedure != nil)
			
 
				 
			
 
				-	old_size := a.cap * size_of(E)
			
 
				-	new_size := new_cap * size_of(E)
			
 
				+	old_size := a.cap * size_of_elem
			
 
				+	new_size := new_cap * size_of_elem
			
 
				 
			
 
				-	new_data := mem_resize(a.data, old_size, new_size, align_of(E), a.allocator, loc) or_return
			
 
				+	new_data := mem_resize(a.data, old_size, new_size, align_of_elem, a.allocator, loc) or_return
			
 
				 
			
 
				 	a.data = raw_data(new_data)
			
 
				 	a.len = min(new_cap, a.len)
			
--- a/base/runtime/core_builtin_soa.odin
+++ b/base/runtime/core_builtin_soa.odin
@@ -55,7 +55,7 @@ raw_soa_footer_slice :: proc(array: ^$T/#soa[]$E) -> (footer: ^Raw_SOA_Footer_Sl
 
				 	if array == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	field_count := uintptr(intrinsics.type_struct_field_count(E))
			
 
				+	field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 	footer = (^Raw_SOA_Footer_Slice)(uintptr(array) + field_count*size_of(rawptr))
			
 
				 	return
			
 
				 }
			
@@ -64,12 +64,7 @@ raw_soa_footer_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) -> (footer: ^Ra
 
				 	if array == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	field_count: uintptr
			
 
				-	when intrinsics.type_is_array(E) {
			
 
				-		field_count = len(E)
			
 
				-	} else {
			
 
				-		field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-	}
			
 
				+	field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 	footer = (^Raw_SOA_Footer_Dynamic_Array)(uintptr(array) + field_count*size_of(rawptr))
			
 
				 	return
			
 
				 }
			
@@ -98,11 +93,11 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
 
				 	ti = type_info_base(ti)
			
 
				 	si := &ti.variant.(Type_Info_Struct)
			
 
				 
			
 
				-	field_count := uintptr(intrinsics.type_struct_field_count(E))
			
 
				+	field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 	total_size := 0
			
 
				 	for i in 0..<field_count {
			
 
				-		type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+		type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 		total_size += type.size * length
			
 
				 		total_size = align_forward_int(total_size, max_align)
			
 
				 	}
			
@@ -126,7 +121,7 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
 
				 	data := uintptr(&array)
			
 
				 	offset := 0
			
 
				 	for i in 0..<field_count {
			
 
				-		type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+		type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 		offset = align_forward_int(offset, max_align)
			
 
				 
			
@@ -147,7 +142,7 @@ make_soa_slice :: proc($T: typeid/#soa[]$E, length: int, allocator := context.al
 
				 @(builtin, require_results)
			
 
				 make_soa_dynamic_array :: proc($T: typeid/#soa[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
			
 
				 	context.allocator = allocator
			
 
				-	reserve_soa(&array, DEFAULT_RESERVE_CAPACITY, loc) or_return
			
 
				+	reserve_soa(&array, 0, loc) or_return
			
 
				 	return array, nil
			
 
				 }
			
 
				 
			
@@ -187,8 +182,28 @@ resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_locat
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+@builtin
			
 
				+non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	if array == nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	non_zero_reserve_soa(array, length, loc) or_return
			
 
				+	footer := raw_soa_footer(array)
			
 
				+	footer.len = length
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				 @builtin
			
 
				 reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _reserve_soa(array, capacity, true, loc)
			
 
				+}
			
 
				+
			
 
				+@builtin
			
 
				+non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
			
 
				+	return _reserve_soa(array, capacity, false, loc)
			
 
				+}
			
 
				+
			
 
				+_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, zero_memory: bool, loc := #caller_location) -> Allocator_Error {
			
 
				 	if array == nil {
			
 
				 		return nil
			
 
				 	}
			
@@ -213,12 +228,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
 
				 	ti = type_info_base(ti)
			
 
				 	si := &ti.variant.(Type_Info_Struct)
			
 
				 
			
 
				-	field_count: uintptr
			
 
				-	when intrinsics.type_is_array(E) {
			
 
				-		field_count = len(E)
			
 
				-	} else {
			
 
				-		field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-	}
			
 
				+	field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 	assert(footer.cap == old_cap)
			
 
				 
			
 
				 	old_size := 0
			
@@ -226,7 +236,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
 
				 
			
 
				 	max_align :: align_of(E)
			
 
				 	for i in 0..<field_count {
			
 
				-		type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+		type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 		old_size += type.size * old_cap
			
 
				 		new_size += type.size * capacity
			
@@ -238,7 +248,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
 
				 	old_data := (^rawptr)(array)^
			
 
				 
			
 
				 	new_bytes := array.allocator.procedure(
			
 
				-		array.allocator.data, .Alloc, new_size, max_align,
			
 
				+		array.allocator.data, .Alloc if zero_memory else .Alloc_Non_Zeroed, new_size, max_align,
			
 
				 		nil, old_size, loc,
			
 
				 	) or_return
			
 
				 	new_data := raw_data(new_bytes)
			
@@ -249,7 +259,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
 
				 	old_offset := 0
			
 
				 	new_offset := 0
			
 
				 	for i in 0..<field_count {
			
 
				-		type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+		type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 		old_offset = align_forward_int(old_offset, max_align)
			
 
				 		new_offset = align_forward_int(new_offset, max_align)
			
@@ -273,15 +283,26 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+
			
 
				+@builtin
			
 
				+append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+	return _append_soa_elem(array, true, arg, loc)
			
 
				+}
			
 
				+
			
 
				 @builtin
			
 
				-append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+non_zero_append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+	return _append_soa_elem(array, false, arg, loc)
			
 
				+}
			
 
				+
			
 
				+_append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				 	if array == nil {
			
 
				 		return 0, nil
			
 
				 	}
			
 
				 
			
 
				 	if cap(array) <= len(array) + 1 {
			
 
				-		cap := 2 * cap(array) + 8
			
 
				-		err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
			
 
				+		// Same behavior as append_soa_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
			
 
				+		cap := 2 * cap(array) + DEFAULT_DYNAMIC_ARRAY_CAPACITY
			
 
				+		err = _reserve_soa(array, cap, zero_memory, loc) // do not 'or_return' here as it could be a partial success
			
 
				 	}
			
 
				 
			
 
				 	footer := raw_soa_footer(array)
			
@@ -290,12 +311,7 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
 
				 		ti := type_info_of(T)
			
 
				 		ti = type_info_base(ti)
			
 
				 		si := &ti.variant.(Type_Info_Struct)
			
 
				-		field_count: uintptr
			
 
				-		when intrinsics.type_is_array(E) {
			
 
				-			field_count = len(E)
			
 
				-		} else {
			
 
				-			field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-		}
			
 
				+		field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 		data := (^rawptr)(array)^
			
 
				 
			
@@ -307,7 +323,7 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
 
				 
			
 
				 		max_align :: align_of(E)
			
 
				 		for i in 0..<field_count {
			
 
				-			type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 			soa_offset  = align_forward_int(soa_offset, max_align)
			
 
				 			item_offset = align_forward_int(item_offset, type.align)
			
@@ -326,7 +342,17 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
 
				 }
			
 
				 
			
 
				 @builtin
			
 
				-append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+	return _append_soa_elems(array, true, args=args, loc=loc)
			
 
				+}
			
 
				+
			
 
				+@builtin
			
 
				+non_zero_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				+	return _append_soa_elems(array, false, args=args, loc=loc)
			
 
				+}
			
 
				+
			
 
				+
			
 
				+_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast args: []E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
			
 
				 	if array == nil {
			
 
				 		return
			
 
				 	}
			
@@ -337,8 +363,8 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
 
				 	}
			
 
				 
			
 
				 	if cap(array) <= len(array)+arg_len {
			
 
				-		cap := 2 * cap(array) + max(8, arg_len)
			
 
				-		err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
			
 
				+		cap := 2 * cap(array) + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
			
 
				+		err = _reserve_soa(array, cap, zero_memory, loc) // do not 'or_return' here as it could be a partial success
			
 
				 	}
			
 
				 	arg_len = min(cap(array)-len(array), arg_len)
			
 
				 
			
@@ -347,7 +373,7 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
 
				 		ti := type_info_of(typeid_of(T))
			
 
				 		ti = type_info_base(ti)
			
 
				 		si := &ti.variant.(Type_Info_Struct)
			
 
				-		field_count := uintptr(intrinsics.type_struct_field_count(E))
			
 
				+		field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 		data := (^rawptr)(array)^
			
 
				 
			
@@ -358,7 +384,7 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
 
				 
			
 
				 		max_align :: align_of(E)
			
 
				 		for i in 0..<field_count {
			
 
				-			type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 			soa_offset  = align_forward_int(soa_offset, max_align)
			
 
				 			item_offset = align_forward_int(item_offset, type.align)
			
@@ -389,7 +415,8 @@ append_soa :: proc{
 
				 
			
 
				 
			
 
				 delete_soa_slice :: proc(array: $T/#soa[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
			
 
				-	when intrinsics.type_struct_field_count(E) != 0 {
			
 
				+	field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
			
 
				+	when field_count != 0 {
			
 
				 		array := array
			
 
				 		ptr := (^rawptr)(&array)^
			
 
				 		free(ptr, allocator, loc) or_return
			
@@ -398,7 +425,8 @@ delete_soa_slice :: proc(array: $T/#soa[]$E, allocator := context.allocator, loc
 
				 }
			
 
				 
			
 
				 delete_soa_dynamic_array :: proc(array: $T/#soa[dynamic]$E, loc := #caller_location) -> Allocator_Error {
			
 
				-	when intrinsics.type_struct_field_count(E) != 0 {
			
 
				+	field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
			
 
				+	when field_count != 0 {
			
 
				 		array := array
			
 
				 		ptr := (^rawptr)(&array)^
			
 
				 		footer := raw_soa_footer(&array)
			
@@ -416,7 +444,8 @@ delete_soa :: proc{
 
				 
			
 
				 
			
 
				 clear_soa_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) {
			
 
				-	when intrinsics.type_struct_field_count(E) != 0 {
			
 
				+	field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
			
 
				+	when field_count != 0 {
			
 
				 		footer := raw_soa_footer(array)
			
 
				 		footer.len = 0
			
 
				 	}
			
@@ -438,12 +467,7 @@ into_dynamic_soa :: proc(array: $T/#soa[]$E) -> #soa[dynamic]E {
 
				 		allocator = nil_allocator(),
			
 
				 	}
			
 
				 
			
 
				-	field_count: uintptr
			
 
				-	when intrinsics.type_is_array(E) {
			
 
				-		field_count = len(E)
			
 
				-	} else {
			
 
				-		field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-	}
			
 
				+	field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 	array := array
			
 
				 	dynamic_data := ([^]rawptr)(&d)[:field_count]
			
@@ -467,16 +491,11 @@ unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #cal
 
				 		ti = type_info_base(ti)
			
 
				 		si := &ti.variant.(Type_Info_Struct)
			
 
				 
			
 
				-		field_count: uintptr
			
 
				-		when intrinsics.type_is_array(E) {
			
 
				-			field_count = len(E)
			
 
				-		} else {
			
 
				-			field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-		}
			
 
				+		field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 		data := uintptr(array)
			
 
				 		for i in 0..<field_count {
			
 
				-			type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 			offset := rawptr((^uintptr)(data)^ + uintptr(index*type.size))
			
 
				 			final := rawptr((^uintptr)(data)^ + uintptr((len(array)-1)*type.size))
			
@@ -500,16 +519,11 @@ ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #calle
 
				 		ti = type_info_base(ti)
			
 
				 		si := &ti.variant.(Type_Info_Struct)
			
 
				 
			
 
				-		field_count: uintptr
			
 
				-		when intrinsics.type_is_array(E) {
			
 
				-			field_count = len(E)
			
 
				-		} else {
			
 
				-			field_count = uintptr(intrinsics.type_struct_field_count(E))
			
 
				-		}
			
 
				+		field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
			
 
				 
			
 
				 		data := uintptr(array)
			
 
				 		for i in 0..<field_count {
			
 
				-			type := si.types[i].variant.(Type_Info_Pointer).elem
			
 
				+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
			
 
				 
			
 
				 			offset := (^uintptr)(data)^ + uintptr(index*type.size)
			
 
				 			length := type.size*(len(array) - index - 1)
			
--- a/base/runtime/default_allocators_general.odin
+++ b/base/runtime/default_allocators_general.odin
@@ -6,6 +6,9 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
 
				 } else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
			
 
				 	default_allocator_proc :: panic_allocator_proc
			
 
				 	default_allocator :: panic_allocator
			
 
				+} else when ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
			
 
				+	default_allocator :: default_wasm_allocator
			
 
				+	default_allocator_proc :: wasm_allocator_proc
			
 
				 } else {
			
 
				 	default_allocator :: heap_allocator
			
 
				 	default_allocator_proc :: heap_allocator_proc
			
--- a/base/runtime/default_temp_allocator_arena.odin
+++ b/base/runtime/default_temp_allocator_arena.odin
@@ -12,6 +12,8 @@ Memory_Block :: struct {
 
				 	capacity:  uint,
			
 
				 }
			
 
				 
			
 
				+// NOTE: This is a growing arena that is only used for the default temp allocator.
			
 
				+// For your own growing arena needs, prefer `Arena` from `core:mem/virtual`.
			
 
				 Arena :: struct {
			
 
				 	backing_allocator:  Allocator,
			
 
				 	curr_block:         ^Memory_Block,
			
--- a/base/runtime/default_temporary_allocator.odin
+++ b/base/runtime/default_temporary_allocator.odin
@@ -1,7 +1,7 @@
 
				 package runtime
			
 
				 
			
 
				 DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte)
			
 
				-NO_DEFAULT_TEMP_ALLOCATOR: bool : ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR
			
 
				+NO_DEFAULT_TEMP_ALLOCATOR: bool : ODIN_OS == .Freestanding || ODIN_DEFAULT_TO_NIL_ALLOCATOR
			
 
				 
			
 
				 when NO_DEFAULT_TEMP_ALLOCATOR {
			
 
				 	Default_Temp_Allocator :: struct {}
			
--- a/base/runtime/docs.odin
+++ b/base/runtime/docs.odin
@@ -157,7 +157,7 @@ __dynamic_map_get // dynamic map calls
 
				 __dynamic_map_set // dynamic map calls
			
 
				 
			
 
				 
			
 
				-## Dynamic literals ([dymamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
			
 
				+## Dynamic literals ([dynamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
			
 
				 
			
 
				 __dynamic_array_reserve
			
 
				 __dynamic_array_append
			
--- a/base/runtime/dynamic_map_internal.odin
+++ b/base/runtime/dynamic_map_internal.odin
@@ -577,7 +577,7 @@ map_grow_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Inf
 
				 
			
 
				 
			
 
				 @(require_results)
			
 
				-map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
			
 
				+map_reserve_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
			
 
				 	@(require_results)
			
 
				 	ceil_log2 :: #force_inline proc "contextless" (x: uintptr) -> uintptr {
			
 
				 		z := intrinsics.count_leading_zeros(x)
			
@@ -641,7 +641,7 @@ map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_
 
				 
			
 
				 
			
 
				 @(require_results)
			
 
				-map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
			
 
				+map_shrink_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
			
 
				 	if m.allocator.procedure == nil {
			
 
				 		m.allocator = context.allocator
			
 
				 	}
			
@@ -688,7 +688,7 @@ map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_I
 
				 }
			
 
				 
			
 
				 @(require_results)
			
 
				-map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
			
 
				+map_free_dynamic :: #force_no_inline proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
			
 
				 	ptr := rawptr(map_data(m))
			
 
				 	size := int(map_total_allocation_size(uintptr(map_cap(m)), info))
			
 
				 	err := mem_free_with_size(ptr, size, m.allocator, loc)
			
@@ -700,7 +700,7 @@ map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_loc
 
				 }
			
 
				 
			
 
				 @(require_results)
			
 
				-map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
			
 
				+map_lookup_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
			
 
				 	if map_len(m) == 0 {
			
 
				 		return 0, false
			
 
				 	}
			
@@ -723,7 +723,7 @@ map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
 
				 	}
			
 
				 }
			
 
				 @(require_results)
			
 
				-map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
			
 
				+map_exists_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
			
 
				 	if map_len(m) == 0 {
			
 
				 		return false
			
 
				 	}
			
@@ -749,7 +749,7 @@ map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
 
				 
			
 
				 
			
 
				 @(require_results)
			
 
				-map_erase_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
			
 
				+map_erase_dynamic :: #force_no_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
			
 
				 	index := map_lookup_dynamic(m^, info, k) or_return
			
 
				 	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
			
 
				 	hs[index] |= TOMBSTONE_MASK
			
--- a/base/runtime/entry_unix.odin
+++ b/base/runtime/entry_unix.odin
@@ -1,5 +1,5 @@
 
				 //+private
			
 
				-//+build linux, darwin, freebsd, openbsd, haiku
			
 
				+//+build linux, darwin, freebsd, openbsd, netbsd, haiku
			
 
				 //+no-instrumentation
			
 
				 package runtime
			
 
				 
			
--- a/base/runtime/entry_wasm.odin
+++ b/base/runtime/entry_wasm.odin
@@ -6,15 +6,34 @@ package runtime
 
				 import "base:intrinsics"
			
 
				 
			
 
				 when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
			
 
				-	@(link_name="_start", linkage="strong", require, export)
			
 
				-	_start :: proc "c" () {
			
 
				-		context = default_context()
			
 
				-		#force_no_inline _startup_runtime()
			
 
				-		intrinsics.__entry_point()
			
 
				-	}
			
 
				-	@(link_name="_end", linkage="strong", require, export)
			
 
				-	_end :: proc "c" () {
			
 
				-		context = default_context()
			
 
				-		#force_no_inline _cleanup_runtime()
			
 
				+	when ODIN_OS == .Orca {
			
 
				+		@(linkage="strong", require, export)
			
 
				+		oc_on_init :: proc "c" () {
			
 
				+			context = default_context()
			
 
				+			#force_no_inline _startup_runtime()
			
 
				+			intrinsics.__entry_point()
			
 
				+		}
			
 
				+		@(linkage="strong", require, export)
			
 
				+		oc_on_terminate :: proc "c" () {
			
 
				+			context = default_context()
			
 
				+			#force_no_inline _cleanup_runtime()
			
 
				+		}
			
 
				+	} else {
			
 
				+		@(link_name="_start", linkage="strong", require, export)
			
 
				+		_start :: proc "c" () {
			
 
				+			context = default_context()
			
 
				+
			
 
				+			when ODIN_OS == .WASI {
			
 
				+				_wasi_setup_args()
			
 
				+			}
			
 
				+
			
 
				+			#force_no_inline _startup_runtime()
			
 
				+			intrinsics.__entry_point()
			
 
				+		}
			
 
				+		@(link_name="_end", linkage="strong", require, export)
			
 
				+		_end :: proc "c" () {
			
 
				+			context = default_context()
			
 
				+			#force_no_inline _cleanup_runtime()
			
 
				+		}
			
 
				 	}
			
 
				-}
			
 
				+}
			
--- a/base/runtime/entry_windows.odin
+++ b/base/runtime/entry_windows.odin
@@ -10,8 +10,9 @@ when ODIN_BUILD_MODE == .Dynamic {
 
				 	DllMain :: proc "system" (hinstDLL: rawptr, fdwReason: u32, lpReserved: rawptr) -> b32 {
			
 
				 		context = default_context()
			
 
				 
			
 
				-		// Populate Windows DLL-specific global
			
 
				+		// Populate Windows DLL-specific globals
			
 
				 		dll_forward_reason = DLL_Forward_Reason(fdwReason)
			
 
				+		dll_instance       = hinstDLL
			
 
				 
			
 
				 		switch dll_forward_reason {
			
 
				 		case .Process_Attach:
			
--- a/base/runtime/error_checks.odin
+++ b/base/runtime/error_checks.odin
@@ -4,6 +4,8 @@ package runtime
 
				 bounds_trap :: proc "contextless" () -> ! {
			
 
				 	when ODIN_OS == .Windows {
			
 
				 		windows_trap_array_bounds()
			
 
				+	} else when ODIN_OS == .Orca {
			
 
				+		abort_ext("", "", 0, "bounds trap")
			
 
				 	} else {
			
 
				 		trap()
			
 
				 	}
			
@@ -13,6 +15,8 @@ bounds_trap :: proc "contextless" () -> ! {
 
				 type_assertion_trap :: proc "contextless" () -> ! {
			
 
				 	when ODIN_OS == .Windows {
			
 
				 		windows_trap_type_assertion()
			
 
				+	} else when ODIN_OS == .Orca {
			
 
				+		abort_ext("", "", 0, "type assertion trap")
			
 
				 	} else {
			
 
				 		trap()
			
 
				 	}
			
--- a/base/runtime/heap_allocator.odin
+++ b/base/runtime/heap_allocator.odin
@@ -97,14 +97,14 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 
				 }
			
 
				 
			
 
				 
			
 
				-heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
			
 
				+heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				 	return _heap_alloc(size, zero_memory)
			
 
				 }
			
 
				 
			
 
				-heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
			
 
				+heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				 	return _heap_resize(ptr, new_size)
			
 
				 }
			
 
				 
			
 
				-heap_free :: proc(ptr: rawptr) {
			
 
				+heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				 	_heap_free(ptr)
			
 
				 }
			
--- a/base/runtime/heap_allocator_orca.odin
+++ b/base/runtime/heap_allocator_orca.odin
@@ -0,0 +1,29 @@
 
				+//+build orca
			
 
				+//+private
			
 
				+package runtime
			
 
				+
			
 
				+foreign {
			
 
				+	@(link_name="malloc")   _orca_malloc   :: proc "c" (size: int) -> rawptr ---
			
 
				+	@(link_name="calloc")   _orca_calloc   :: proc "c" (num, size: int) -> rawptr ---
			
 
				+	@(link_name="free")     _orca_free     :: proc "c" (ptr: rawptr) ---
			
 
				+	@(link_name="realloc")  _orca_realloc  :: proc "c" (ptr: rawptr, size: int) -> rawptr ---
			
 
				+}
			
 
				+
			
 
				+_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				+	if size <= 0 {
			
 
				+		return nil
			
 
				+	}
			
 
				+	if zero_memory {
			
 
				+		return _orca_calloc(1, size)
			
 
				+	} else {
			
 
				+		return _orca_malloc(size)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				+	return _orca_realloc(ptr, new_size)
			
 
				+}
			
 
				+
			
 
				+_heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				+	_orca_free(ptr)
			
 
				+}
			
--- a/base/runtime/heap_allocator_other.odin
+++ b/base/runtime/heap_allocator_other.odin
@@ -2,14 +2,17 @@
 
				 //+private
			
 
				 package runtime
			
 
				 
			
 
				-_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
			
 
				+_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				+	context = default_context()
			
 
				 	unimplemented("base:runtime 'heap_alloc' procedure is not supported on this platform")
			
 
				 }
			
 
				 
			
 
				-_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
			
 
				+_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				+	context = default_context()
			
 
				 	unimplemented("base:runtime 'heap_resize' procedure is not supported on this platform")
			
 
				 }
			
 
				 
			
 
				-_heap_free :: proc(ptr: rawptr) {
			
 
				+_heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				+	context = default_context()
			
 
				 	unimplemented("base:runtime 'heap_free' procedure is not supported on this platform")
			
 
				-}
			
 
				+}
			
--- a/base/runtime/heap_allocator_unix.odin
+++ b/base/runtime/heap_allocator_unix.odin
@@ -1,4 +1,4 @@
 
				-//+build linux, darwin, freebsd, openbsd, haiku
			
 
				+//+build linux, darwin, freebsd, openbsd, netbsd, haiku
			
 
				 //+private
			
 
				 package runtime
			
 
				 
			
@@ -16,7 +16,7 @@ foreign libc {
 
				 	@(link_name="realloc")  _unix_realloc  :: proc(ptr: rawptr, size: int) -> rawptr ---
			
 
				 }
			
 
				 
			
 
				-_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
			
 
				+_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				 	if size <= 0 {
			
 
				 		return nil
			
 
				 	}
			
@@ -27,12 +27,12 @@ _heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
			
 
				+_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				 	// NOTE: _unix_realloc doesn't guarantee new memory will be zeroed on
			
 
				 	// POSIX platforms. Ensure your caller takes this into account.
			
 
				 	return _unix_realloc(ptr, new_size)
			
 
				 }
			
 
				 
			
 
				-_heap_free :: proc(ptr: rawptr) {
			
 
				+_heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				 	_unix_free(ptr)
			
 
				 }
			
--- a/base/runtime/heap_allocator_windows.odin
+++ b/base/runtime/heap_allocator_windows.odin
@@ -14,11 +14,11 @@ foreign kernel32 {
 
				 	HeapFree       :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr) -> b32 ---
			
 
				 }
			
 
				 
			
 
				-_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
			
 
				+_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
			
 
				 	HEAP_ZERO_MEMORY :: 0x00000008
			
 
				 	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
			
 
				 }
			
 
				-_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
			
 
				+_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
			
 
				 	if new_size == 0 {
			
 
				 		_heap_free(ptr)
			
 
				 		return nil
			
@@ -30,7 +30,7 @@ _heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
 
				 	HEAP_ZERO_MEMORY :: 0x00000008
			
 
				 	return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
			
 
				 }
			
 
				-_heap_free :: proc(ptr: rawptr) {
			
 
				+_heap_free :: proc "contextless" (ptr: rawptr) {
			
 
				 	if ptr == nil {
			
 
				 		return
			
 
				 	}
			
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -1,3 +1,4 @@
 
				+//+vet !cast
			
 
				 package runtime
			
 
				 
			
 
				 import "base:intrinsics"
			
@@ -29,7 +30,7 @@ is_power_of_two_int :: #force_inline proc "contextless" (x: int) -> bool {
 
				 	return (x & (x-1)) == 0
			
 
				 }
			
 
				 
			
 
				-align_forward_int :: #force_inline proc(ptr, align: int) -> int {
			
 
				+align_forward_int :: #force_inline proc "odin" (ptr, align: int) -> int {
			
 
				 	assert(is_power_of_two_int(align))
			
 
				 
			
 
				 	p := ptr
			
@@ -40,6 +41,24 @@ align_forward_int :: #force_inline proc(ptr, align: int) -> int {
 
				 	return p
			
 
				 }
			
 
				 
			
 
				+is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool {
			
 
				+	if x <= 0 {
			
 
				+		return false
			
 
				+	}
			
 
				+	return (x & (x-1)) == 0
			
 
				+}
			
 
				+
			
 
				+align_forward_uint :: #force_inline proc "odin" (ptr, align: uint) -> uint {
			
 
				+	assert(is_power_of_two_uint(align))
			
 
				+
			
 
				+	p := ptr
			
 
				+	modulo := p & (align-1)
			
 
				+	if modulo != 0 {
			
 
				+		p += align - modulo
			
 
				+	}
			
 
				+	return p
			
 
				+}
			
 
				+
			
 
				 is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
			
 
				 	if x <= 0 {
			
 
				 		return false
			
@@ -47,7 +66,7 @@ is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool
 
				 	return (x & (x-1)) == 0
			
 
				 }
			
 
				 
			
 
				-align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
			
 
				+align_forward_uintptr :: #force_inline proc "odin" (ptr, align: uintptr) -> uintptr {
			
 
				 	assert(is_power_of_two_uintptr(align))
			
 
				 
			
 
				 	p := ptr
			
@@ -58,6 +77,18 @@ align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
 
				 	return p
			
 
				 }
			
 
				 
			
 
				+is_power_of_two :: proc {
			
 
				+	is_power_of_two_int,
			
 
				+	is_power_of_two_uint,
			
 
				+	is_power_of_two_uintptr,
			
 
				+}
			
 
				+
			
 
				+align_forward :: proc {
			
 
				+	align_forward_int,
			
 
				+	align_forward_uint,
			
 
				+	align_forward_uintptr,
			
 
				+}
			
 
				+
			
 
				 mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
			
 
				 	if data == nil {
			
 
				 		return nil
			
@@ -453,7 +484,7 @@ quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bo
 
				 string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int) {
			
 
				 	// NOTE(bill): Duplicated here to remove dependency on package unicode/utf8
			
 
				 
			
 
				-	@static accept_sizes := [256]u8{
			
 
				+	@(static, rodata) accept_sizes := [256]u8{
			
 
				 		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f
			
 
				 		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f
			
 
				 		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f
			
@@ -474,7 +505,7 @@ string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int
 
				 	}
			
 
				 	Accept_Range :: struct {lo, hi: u8}
			
 
				 
			
 
				-	@static accept_ranges := [5]Accept_Range{
			
 
				+	@(static, rodata) accept_ranges := [5]Accept_Range{
			
 
				 		{0x80, 0xbf},
			
 
				 		{0xa0, 0xbf},
			
 
				 		{0x80, 0x9f},
			
@@ -612,21 +643,24 @@ abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64
 
				 
			
 
				 
			
 
				 quo_complex32 :: proc "contextless" (n, m: complex32) -> complex32 {
			
 
				-	e, f: f16
			
 
				+	nr, ni := f32(real(n)), f32(imag(n))
			
 
				+	mr, mi := f32(real(m)), f32(imag(m))
			
 
				 
			
 
				-	if abs(real(m)) >= abs(imag(m)) {
			
 
				-		ratio := imag(m) / real(m)
			
 
				-		denom := real(m) + ratio*imag(m)
			
 
				-		e = (real(n) + imag(n)*ratio) / denom
			
 
				-		f = (imag(n) - real(n)*ratio) / denom
			
 
				+	e, f: f32
			
 
				+
			
 
				+	if abs(mr) >= abs(mi) {
			
 
				+		ratio := mi / mr
			
 
				+		denom := mr + ratio*mi
			
 
				+		e = (nr + ni*ratio) / denom
			
 
				+		f = (ni - nr*ratio) / denom
			
 
				 	} else {
			
 
				-		ratio := real(m) / imag(m)
			
 
				-		denom := imag(m) + ratio*real(m)
			
 
				-		e = (real(n)*ratio + imag(n)) / denom
			
 
				-		f = (imag(n)*ratio - real(n)) / denom
			
 
				+		ratio := mr / mi
			
 
				+		denom := mi + ratio*mr
			
 
				+		e = (nr*ratio + ni) / denom
			
 
				+		f = (ni*ratio - nr) / denom
			
 
				 	}
			
 
				 
			
 
				-	return complex(e, f)
			
 
				+	return complex(f16(e), f16(f))
			
 
				 }
			
 
				 
			
 
				 
			
@@ -667,15 +701,15 @@ quo_complex128 :: proc "contextless" (n, m: complex128) -> complex128 {
 
				 }
			
 
				 
			
 
				 mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
			
 
				-	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
			
 
				-	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
			
 
				+	q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
			
 
				+	r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
			
 
				 
			
 
				 	t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
			
 
				 	t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
			
 
				 	t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
			
 
				 	t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
			
 
				 
			
 
				-	return quaternion(w=t0, x=t1, y=t2, z=t3)
			
 
				+	return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
			
 
				 }
			
 
				 
			
 
				 mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
			
@@ -703,8 +737,8 @@ mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
 
				 }
			
 
				 
			
 
				 quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
			
 
				-	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
			
 
				-	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
			
 
				+	q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
			
 
				+	r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
			
 
				 
			
 
				 	invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
			
 
				 
			
@@ -713,7 +747,7 @@ quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
 
				 	t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
			
 
				 	t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
			
 
				 
			
 
				-	return quaternion(w=t0, x=t1, y=t2, z=t3)
			
 
				+	return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
			
 
				 }
			
 
				 
			
 
				 quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
			
@@ -801,6 +835,10 @@ truncsfhf2 :: proc "c" (value: f32) -> __float16 {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+@(link_name="__aeabi_d2h", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
			
 
				+aeabi_d2h :: proc "c" (value: f64) -> __float16 {
			
 
				+	return truncsfhf2(f32(value))
			
 
				+}
			
 
				 
			
 
				 @(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
			
 
				 truncdfhf2 :: proc "c" (value: f64) -> __float16 {
			
@@ -978,26 +1016,26 @@ modti3 :: proc "c" (a, b: i128) -> i128 {
 
				 	bn := (b ~ s_b) - s_b
			
 
				 
			
 
				 	r: u128 = ---
			
 
				-	_ = udivmod128(transmute(u128)an, transmute(u128)bn, &r)
			
 
				-	return (transmute(i128)r ~ s_a) - s_a
			
 
				+	_ = udivmod128(u128(an), u128(bn), &r)
			
 
				+	return (i128(r) ~ s_a) - s_a
			
 
				 }
			
 
				 
			
 
				 
			
 
				 @(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
			
 
				 divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
			
 
				-	u := udivmod128(transmute(u128)a, transmute(u128)b, cast(^u128)rem)
			
 
				-	return transmute(i128)u
			
 
				+	u := udivmod128(u128(a), u128(b), (^u128)(rem))
			
 
				+	return i128(u)
			
 
				 }
			
 
				 
			
 
				 @(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
			
 
				 divti3 :: proc "c" (a, b: i128) -> i128 {
			
 
				-	u := udivmodti4(transmute(u128)a, transmute(u128)b, nil)
			
 
				-	return transmute(i128)u
			
 
				+	u := udivmodti4(u128(a), u128(b), nil)
			
 
				+	return i128(u)
			
 
				 }
			
 
				 
			
 
				 
			
 
				 @(link_name="__fixdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
			
 
				-fixdfti :: proc(a: u64) -> i128 {
			
 
				+fixdfti :: proc "c" (a: u64) -> i128 {
			
 
				 	significandBits :: 52
			
 
				 	typeWidth       :: (size_of(u64)*8)
			
 
				 	exponentBits    :: (typeWidth - significandBits - 1)
			
@@ -1042,19 +1080,17 @@ fixdfti :: proc(a: u64) -> i128 {
 
				 __write_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
			
 
				 	for i in 0..<size {
			
 
				 		j := offset+i
			
 
				-		the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
			
 
				-		b := the_bit<<(j&7)
			
 
				-		dst[j/8] &~= b
			
 
				-		dst[j/8] |=  b
			
 
				+		the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
			
 
				+		dst[j>>3] &~=       1<<(j&7)
			
 
				+		dst[j>>3]  |= the_bit<<(j&7)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
			
 
				 	for j in 0..<size {
			
 
				 		i := offset+j
			
 
				-		the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
			
 
				-		b := the_bit<<(j&7)
			
 
				-		dst[j/8] &~= b
			
 
				-		dst[j/8] |=  b
			
 
				+		the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
			
 
				+		dst[j>>3] &~=       1<<(j&7)
			
 
				+		dst[j>>3]  |= the_bit<<(j&7)
			
 
				 	}
			
 
				-}
			
 
				+}
			
--- a/base/runtime/os_specific_bsd.odin
+++ b/base/runtime/os_specific_bsd.odin
@@ -1,4 +1,4 @@
 
				-//+build freebsd, openbsd
			
 
				+//+build freebsd, openbsd, netbsd
			
 
				 //+private
			
 
				 package runtime
			
 
				 
			
@@ -9,7 +9,11 @@ foreign libc {
 
				 	@(link_name="write")
			
 
				 	_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---
			
 
				 
			
 
				-	__error :: proc() -> ^i32 ---
			
 
				+	when ODIN_OS == .NetBSD {
			
 
				+		@(link_name="__errno") __error :: proc() -> ^i32 ---
			
 
				+	} else {
			
 
				+		__error :: proc() -> ^i32 ---
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
			
--- a/base/runtime/os_specific_darwin.odin
+++ b/base/runtime/os_specific_darwin.odin
@@ -5,11 +5,24 @@ package runtime
 
				 import "base:intrinsics"
			
 
				 
			
 
				 _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
			
 
				-	WRITE  :: 0x2000004
			
 
				 	STDERR :: 2
			
 
				-	ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
			
 
				-	if ret < 0 {
			
 
				-		return 0, _OS_Errno(-ret)
			
 
				+	when ODIN_NO_CRT {
			
 
				+		WRITE  :: 0x2000004
			
 
				+		ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
			
 
				+		if ret < 0 {
			
 
				+			return 0, _OS_Errno(-ret)
			
 
				+		}
			
 
				+		return int(ret), 0
			
 
				+	} else {
			
 
				+		foreign {
			
 
				+			write   :: proc(handle: i32, buffer: [^]byte, count: uint) -> int ---
			
 
				+			__error :: proc() -> ^i32 ---
			
 
				+		}
			
 
				+
			
 
				+		if ret := write(STDERR, raw_data(data), len(data)); ret >= 0 {
			
 
				+			return int(ret), 0
			
 
				+		}
			
 
				+
			
 
				+		return 0, _OS_Errno(__error()^)
			
 
				 	}
			
 
				-	return int(ret), 0
			
 
				 }
			
--- a/base/runtime/os_specific_orca.odin
+++ b/base/runtime/os_specific_orca.odin
@@ -0,0 +1,43 @@
 
				+//+build orca
			
 
				+//+private
			
 
				+package runtime
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+// Constants allowing to specify the level of logging verbosity.
			
 
				+log_level :: enum u32 {
			
 
				+	// Only errors are logged.
			
 
				+	ERROR = 0,
			
 
				+	// Only warnings and errors are logged.
			
 
				+	WARNING = 1,
			
 
				+	// All messages are logged.
			
 
				+	INFO = 2,
			
 
				+	COUNT = 3,
			
 
				+}
			
 
				+
			
 
				+@(default_calling_convention="c", link_prefix="oc_")
			
 
				+foreign {
			
 
				+	abort_ext   :: proc(file: cstring, function: cstring, line: i32, fmt: cstring, #c_vararg args: ..any) -> ! ---
			
 
				+	assert_fail :: proc(file: cstring, function: cstring, line: i32, src: cstring, fmt: cstring, #c_vararg args: ..any) -> ! ---
			
 
				+	log_ext     :: proc(level: log_level, function: cstring, file: cstring, line: i32, fmt: cstring, #c_vararg args: ..any) ---
			
 
				+}
			
 
				+
			
 
				+// NOTE: This is all pretty gross, don't look.
			
 
				+
			
 
				+// WASM is single threaded so this should be fine.
			
 
				+orca_stderr_buffer:     [4096]byte
			
 
				+orca_stderr_buffer_idx: int
			
 
				+
			
 
				+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
			
 
				+	for b in data {
			
 
				+		orca_stderr_buffer[orca_stderr_buffer_idx] = b
			
 
				+		orca_stderr_buffer_idx += 1
			
 
				+
			
 
				+		if b == '\n' || orca_stderr_buffer_idx == len(orca_stderr_buffer)-1 {
			
 
				+			log_ext(.ERROR, "", "", 0, cstring(raw_data(orca_stderr_buffer[:orca_stderr_buffer_idx])))
			
 
				+			orca_stderr_buffer_idx = 0
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return len(data), 0
			
 
				+}
			
--- a/base/runtime/os_specific_wasi.odin
+++ b/base/runtime/os_specific_wasi.odin
@@ -2,10 +2,54 @@
 
				 //+private
			
 
				 package runtime
			
 
				 
			
 
				-import "core:sys/wasm/wasi"
			
 
				+foreign import wasi "wasi_snapshot_preview1"
			
 
				+
			
 
				+@(default_calling_convention="contextless")
			
 
				+foreign wasi {
			
 
				+	fd_write :: proc(
			
 
				+		fd: i32,
			
 
				+		iovs: [][]byte,
			
 
				+		n: ^uint,
			
 
				+	) -> u16 ---
			
 
				+
			
 
				+	@(private="file")
			
 
				+	args_sizes_get :: proc(
			
 
				+		num_of_args:  ^uint,
			
 
				+		size_of_args: ^uint,
			
 
				+	) -> u16 ---
			
 
				+
			
 
				+	@(private="file")
			
 
				+	args_get :: proc(
			
 
				+		argv:     [^]cstring,
			
 
				+		argv_buf: [^]byte,
			
 
				+	) -> u16 ---
			
 
				+}
			
 
				 
			
 
				 _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
			
 
				-	data := (wasi.ciovec_t)(data)
			
 
				-	n, err := wasi.fd_write(1, {data})
			
 
				+	n: uint
			
 
				+	err := fd_write(1, {data}, &n)
			
 
				 	return int(n), _OS_Errno(err)
			
 
				 }
			
 
				+
			
 
				+_wasi_setup_args :: proc() {
			
 
				+	num_of_args, size_of_args: uint
			
 
				+	if errno := args_sizes_get(&num_of_args, &size_of_args); errno != 0 {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	err: Allocator_Error
			
 
				+	if args__, err = make([]cstring, num_of_args); err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	args_buf: []byte
			
 
				+	if args_buf, err = make([]byte, size_of_args); err != nil {
			
 
				+		delete(args__)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	if errno := args_get(raw_data(args__), raw_data(args_buf)); errno != 0 {
			
 
				+		delete(args__)
			
 
				+		delete(args_buf)
			
 
				+	}
			
 
				+}
			
--- a/base/runtime/print.odin
+++ b/base/runtime/print.odin
@@ -262,7 +262,7 @@ print_typeid :: #force_no_inline proc "contextless" (id: typeid) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="size")
			
 
				+@(optimization_mode="favor_size")
			
 
				 print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
			
 
				 	if ti == nil {
			
 
				 		print_string("nil")
			
@@ -401,15 +401,16 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
 
				 		}
			
 
				 
			
 
				 		print_string("struct ")
			
 
				-		if info.is_packed    { print_string("#packed ") }
			
 
				-		if info.is_raw_union { print_string("#raw_union ") }
			
 
				-		if info.custom_align {
			
 
				+		if .packed    in info.flags { print_string("#packed ") }
			
 
				+		if .raw_union in info.flags { print_string("#raw_union ") }
			
 
				+		if .no_copy   in info.flags { print_string("#no_copy ") }
			
 
				+		if .align in info.flags {
			
 
				 			print_string("#align(")
			
 
				 			print_u64(u64(ti.align))
			
 
				 			print_string(") ")
			
 
				 		}
			
 
				 		print_byte('{')
			
 
				-		for name, i in info.names {
			
 
				+		for name, i in info.names[:info.field_count] {
			
 
				 			if i > 0 { print_string(", ") }
			
 
				 			print_string(name)
			
 
				 			print_string(": ")
			
@@ -469,7 +470,7 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
 
				 		print_string("bit_field ")
			
 
				 		print_type(info.backing_type)
			
 
				 		print_string(" {")
			
 
				-		for name, i in info.names {
			
 
				+		for name, i in info.names[:info.field_count] {
			
 
				 			if i > 0 { print_string(", ") }
			
 
				 			print_string(name)
			
 
				 			print_string(": ")
			
--- a/base/runtime/procs.odin
+++ b/base/runtime/procs.odin
@@ -25,13 +25,19 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 		RtlMoveMemory(dst, src, len)
			
 
				 		return dst
			
 
				 	}
			
 
				-} else when ODIN_NO_CRT || (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
			
 
				+} else when ODIN_NO_CRT || (ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32)) {
			
 
				+	// NOTE: on wasm, calls to these procs are generated (by LLVM) with type `i32` instead of `int`.
			
 
				+	//
			
 
				+	// NOTE: `#any_int` is also needed, because calls that we generate (and package code)
			
 
				+	//       will be using `int` and need to be converted.
			
 
				+	int_t :: i32 when ODIN_ARCH == .wasm64p32 else int
			
 
				+
			
 
				 	@(link_name="memset", linkage="strong", require)
			
 
				-	memset :: proc "c" (ptr: rawptr, val: i32, len: int) -> rawptr {
			
 
				+	memset :: proc "c" (ptr: rawptr, val: i32, #any_int len: int_t) -> rawptr {
			
 
				 		if ptr != nil && len != 0 {
			
 
				 			b := byte(val)
			
 
				 			p := ([^]byte)(ptr)
			
 
				-			for i := 0; i < len; i += 1 {
			
 
				+			for i := int_t(0); i < len; i += 1 {
			
 
				 				p[i] = b
			
 
				 			}
			
 
				 		}
			
@@ -39,10 +45,10 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 	}
			
 
				 
			
 
				 	@(link_name="bzero", linkage="strong", require)
			
 
				-	bzero :: proc "c" (ptr: rawptr, len: int) -> rawptr {
			
 
				+	bzero :: proc "c" (ptr: rawptr, #any_int len: int_t) -> rawptr {
			
 
				 		if ptr != nil && len != 0 {
			
 
				 			p := ([^]byte)(ptr)
			
 
				-			for i := 0; i < len; i += 1 {
			
 
				+			for i := int_t(0); i < len; i += 1 {
			
 
				 				p[i] = 0
			
 
				 			}
			
 
				 		}
			
@@ -50,7 +56,7 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 	}
			
 
				 
			
 
				 	@(link_name="memmove", linkage="strong", require)
			
 
				-	memmove :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
			
 
				+	memmove :: proc "c" (dst, src: rawptr, #any_int len: int_t) -> rawptr {
			
 
				 		d, s := ([^]byte)(dst), ([^]byte)(src)
			
 
				 		if d == s || len == 0 {
			
 
				 			return dst
			
@@ -63,7 +69,7 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 		}
			
 
				 
			
 
				 		if s > d && uintptr(s)-uintptr(d) < uintptr(len) {
			
 
				-			for i := 0; i < len; i += 1 {
			
 
				+			for i := int_t(0); i < len; i += 1 {
			
 
				 				d[i] = s[i]
			
 
				 			}
			
 
				 			return dst
			
@@ -71,10 +77,10 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 		return memcpy(dst, src, len)
			
 
				 	}
			
 
				 	@(link_name="memcpy", linkage="strong", require)
			
 
				-	memcpy :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
			
 
				+	memcpy :: proc "c" (dst, src: rawptr, #any_int len: int_t) -> rawptr {
			
 
				 		d, s := ([^]byte)(dst), ([^]byte)(src)
			
 
				 		if d != s {
			
 
				-			for i := 0; i < len; i += 1 {
			
 
				+			for i := int_t(0); i < len; i += 1 {
			
 
				 				d[i] = s[i]
			
 
				 			}
			
 
				 		}
			
@@ -92,4 +98,4 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
 
				 		}
			
 
				 		return ptr
			
 
				 	}
			
 
				-}
			
 
				+}
			
--- a/base/runtime/random_generator.odin
+++ b/base/runtime/random_generator.odin
@@ -0,0 +1,127 @@
 
				+package runtime
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+@(require_results)
			
 
				+random_generator_read_bytes :: proc(rg: Random_Generator, p: []byte) -> bool {
			
 
				+	if rg.procedure != nil {
			
 
				+		rg.procedure(rg.data, .Read, p)
			
 
				+		return true
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+random_generator_read_ptr :: proc(rg: Random_Generator, p: rawptr, len: uint) -> bool {
			
 
				+	if rg.procedure != nil {
			
 
				+		rg.procedure(rg.data, .Read, ([^]byte)(p)[:len])
			
 
				+		return true
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+random_generator_query_info :: proc(rg: Random_Generator) -> (info: Random_Generator_Query_Info) {
			
 
				+	if rg.procedure != nil {
			
 
				+		rg.procedure(rg.data, .Query_Info, ([^]byte)(&info)[:size_of(info)])
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+
			
 
				+random_generator_reset_bytes :: proc(rg: Random_Generator, p: []byte) {
			
 
				+	if rg.procedure != nil {
			
 
				+		rg.procedure(rg.data, .Reset, p)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+random_generator_reset_u64 :: proc(rg: Random_Generator, p: u64) {
			
 
				+	if rg.procedure != nil {
			
 
				+		p := p
			
 
				+		rg.procedure(rg.data, .Reset, ([^]byte)(&p)[:size_of(p)])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Default_Random_State :: struct {
			
 
				+	state: u64,
			
 
				+	inc:   u64,
			
 
				+}
			
 
				+
			
 
				+default_random_generator_proc :: proc(data: rawptr, mode: Random_Generator_Mode, p: []byte) {
			
 
				+	@(require_results)
			
 
				+	read_u64 :: proc "contextless" (r: ^Default_Random_State) -> u64 {
			
 
				+		old_state := r.state
			
 
				+		r.state = old_state * 6364136223846793005 + (r.inc|1)
			
 
				+		xor_shifted := (((old_state >> 59) + 5) ~ old_state) * 12605985483714917081
			
 
				+		rot := (old_state >> 59)
			
 
				+		return (xor_shifted >> rot) | (xor_shifted << ((-rot) & 63))
			
 
				+	}
			
 
				+
			
 
				+	@(thread_local)
			
 
				+	global_rand_seed: Default_Random_State
			
 
				+
			
 
				+	init :: proc "contextless" (r: ^Default_Random_State, seed: u64) {
			
 
				+		seed := seed
			
 
				+		if seed == 0 {
			
 
				+			seed = u64(intrinsics.read_cycle_counter())
			
 
				+		}
			
 
				+		r.state = 0
			
 
				+		r.inc = (seed << 1) | 1
			
 
				+		_ = read_u64(r)
			
 
				+		r.state += seed
			
 
				+		_ = read_u64(r)
			
 
				+	}
			
 
				+
			
 
				+	r: ^Default_Random_State = ---
			
 
				+	if data == nil {
			
 
				+		r = &global_rand_seed
			
 
				+	} else {
			
 
				+		r = cast(^Default_Random_State)data
			
 
				+	}
			
 
				+
			
 
				+	switch mode {
			
 
				+	case .Read:
			
 
				+		if r.state == 0 && r.inc == 0 {
			
 
				+			init(r, 0)
			
 
				+		}
			
 
				+
			
 
				+		switch len(p) {
			
 
				+		case size_of(u64):
			
 
				+			// Fast path for a 64-bit destination.
			
 
				+			intrinsics.unaligned_store((^u64)(raw_data(p)), read_u64(r))
			
 
				+		case:
			
 
				+			// All other cases.
			
 
				+			pos := i8(0)
			
 
				+			val := u64(0)
			
 
				+			for &v in p {
			
 
				+				if pos == 0 {
			
 
				+					val = read_u64(r)
			
 
				+					pos = 7
			
 
				+				}
			
 
				+				v = byte(val)
			
 
				+				val >>= 8
			
 
				+				pos -= 1
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	case .Reset:
			
 
				+		seed: u64
			
 
				+		mem_copy_non_overlapping(&seed, raw_data(p), min(size_of(seed), len(p)))
			
 
				+		init(r, seed)
			
 
				+
			
 
				+	case .Query_Info:
			
 
				+		if len(p) != size_of(Random_Generator_Query_Info) {
			
 
				+			return
			
 
				+		}
			
 
				+		info := (^Random_Generator_Query_Info)(raw_data(p))
			
 
				+		info^ += {.Uniform, .Resettable}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+default_random_generator :: proc "contextless" (state: ^Default_Random_State = nil) -> Random_Generator {
			
 
				+	return {
			
 
				+		procedure = default_random_generator_proc,
			
 
				+		data = state,
			
 
				+	}
			
 
				+}
			
--- a/base/runtime/udivmod128.odin
+++ b/base/runtime/udivmod128.odin
@@ -58,7 +58,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 
				 			return u128(n[high] >> _ctz(d[high]))
			
 
				 		}
			
 
				 
			
 
				-		sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
			
 
				+		sr = u32(i32(_clz(d[high])) - i32(_clz(n[high])))
			
 
				 		if sr > U64_BITS - 2 {
			
 
				 			if rem != nil {
			
 
				 				rem^ = a
			
@@ -107,7 +107,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 
				 				r[low]  = n[high] >> (sr - U64_BITS)
			
 
				 			}
			
 
				 		} else {
			
 
				-			sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
			
 
				+			sr = u32(i32(_clz(d[high])) - i32(_clz(n[high])))
			
 
				 
			
 
				 			if sr > U64_BITS - 1 {
			
 
				 				if rem != nil {
			
@@ -143,7 +143,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 
				 		r_all = transmute(u128)r
			
 
				 		s := i128(b - r_all - 1) >> (U128_BITS - 1)
			
 
				 		carry = u32(s & 1)
			
 
				-		r_all -= b & transmute(u128)s
			
 
				+		r_all -= b & u128(s)
			
 
				 		r = transmute([2]u64)r_all
			
 
				 	}
			
 
				 
			
--- a/base/runtime/wasm_allocator.odin
+++ b/base/runtime/wasm_allocator.odin
@@ -0,0 +1,871 @@
 
				+//+build wasm32, wasm64p32
			
 
				+package runtime
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+/*
			
 
				+Port of emmalloc, modified for use in Odin.
			
 
				+
			
 
				+Invariants:
			
 
				+	- Per-allocation header overhead is 8 bytes, smallest allocated payload
			
 
				+	  amount is 8 bytes, and a multiple of 4 bytes.
			
 
				+	- Acquired memory blocks are subdivided into disjoint regions that lie
			
 
				+	  next to each other.
			
 
				+	- A region is either in used or free.
			
 
				+	  Used regions may be adjacent, and a used and unused region
			
 
				+	  may be adjacent, but not two unused ones - they would be
			
 
				+	  merged.
			
 
				+	- Memory allocation takes constant time, unless the alloc needs to wasm_memory_grow()
			
 
				+	  or memory is very close to being exhausted.
			
 
				+	- Free and used regions are managed inside "root regions", which are slabs
			
 
				+	  of memory acquired via wasm_memory_grow().
			
 
				+	- Memory retrieved using wasm_memory_grow() can not be given back to the OS.
			
 
				+	  Therefore, frees are internal to the allocator.
			
 
				+
			
 
				+Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file.
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in
			
 
				+all copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+THE SOFTWARE.
			
 
				+*/
			
 
				+
			
 
				+WASM_Allocator :: struct #no_copy {
			
 
				+	// The minimum alignment of allocations.
			
 
				+	alignment: uint,
			
 
				+	// A region that contains as payload a single forward linked list of pointers to
			
 
				+	// root regions of each disjoint region blocks.
			
 
				+	list_of_all_regions: ^Root_Region,
			
 
				+	// For each of the buckets, maintain a linked list head node. The head node for each
			
 
				+	// free region is a sentinel node that does not actually represent any free space, but
			
 
				+	// the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode)
			
 
				+	// when adding and removing elements from the linked list, i.e. we are guaranteed that
			
 
				+	// the sentinel node is always fixed and there, and the actual free region list elements
			
 
				+	// start at free_region_buckets[i].next each.
			
 
				+	free_region_buckets: [NUM_FREE_BUCKETS]Region,
			
 
				+	// A bitmask that tracks the population status for each of the 64 distinct memory regions:
			
 
				+	// a zero at bit position i means that the free list bucket i is empty. This bitmask is
			
 
				+	// used to avoid redundant scanning of the 64 different free region buckets: instead by
			
 
				+	// looking at the bitmask we can find in constant time an index to a free region bucket
			
 
				+	// that contains free memory of desired size.
			
 
				+	free_region_buckets_used: BUCKET_BITMASK_T,
			
 
				+	// Because wasm memory can only be allocated in pages of 64k at a time, we keep any
			
 
				+	// spilled/unused bytes that are left from the allocated pages here, first using this
			
 
				+	// when bytes are needed.
			
 
				+	spill: []byte,
			
 
				+	// Mutex for thread safety, only used if the target feature "atomics" is enabled.
			
 
				+	mu: Mutex_State,
			
 
				+}
			
 
				+
			
 
				+// Not required to be called, called on first allocation otherwise.
			
 
				+wasm_allocator_init :: proc(a: ^WASM_Allocator, alignment: uint = 8) {
			
 
				+	assert(is_power_of_two(alignment), "alignment must be a power of two")
			
 
				+	assert(alignment > 4, "alignment must be more than 4")
			
 
				+
			
 
				+	a.alignment = alignment
			
 
				+
			
 
				+	for i in 0..<NUM_FREE_BUCKETS {
			
 
				+		a.free_region_buckets[i].next = &a.free_region_buckets[i]
			
 
				+		a.free_region_buckets[i].prev = a.free_region_buckets[i].next
			
 
				+	}
			
 
				+
			
 
				+	if !claim_more_memory(a, 3*size_of(Region)) {
			
 
				+		panic("wasm_allocator: initial memory could not be allocated")
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+global_default_wasm_allocator_data: WASM_Allocator
			
 
				+
			
 
				+default_wasm_allocator :: proc() -> Allocator {
			
 
				+	return wasm_allocator(&global_default_wasm_allocator_data)
			
 
				+}
			
 
				+
			
 
				+wasm_allocator :: proc(a: ^WASM_Allocator) -> Allocator {
			
 
				+	return {
			
 
				+		data      = a,
			
 
				+		procedure = wasm_allocator_proc,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+wasm_allocator_proc :: proc(a: rawptr, mode: Allocator_Mode, size, alignment: int, old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
			
 
				+	a := (^WASM_Allocator)(a)
			
 
				+	if a == nil {
			
 
				+		a = &global_default_wasm_allocator_data
			
 
				+	}
			
 
				+
			
 
				+	if a.alignment == 0 {
			
 
				+		wasm_allocator_init(a)
			
 
				+	}
			
 
				+
			
 
				+	switch mode {
			
 
				+	case .Alloc:
			
 
				+		ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
			
 
				+		if ptr == nil {
			
 
				+			return nil, .Out_Of_Memory
			
 
				+		}
			
 
				+		intrinsics.mem_zero(ptr, size)
			
 
				+		return ([^]byte)(ptr)[:size], nil
			
 
				+
			
 
				+	case .Alloc_Non_Zeroed:
			
 
				+		ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
			
 
				+		if ptr == nil {
			
 
				+			return nil, .Out_Of_Memory
			
 
				+		}
			
 
				+		return ([^]byte)(ptr)[:size], nil
			
 
				+
			
 
				+	case .Resize:
			
 
				+		ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
			
 
				+		if ptr == nil {
			
 
				+			return nil, .Out_Of_Memory
			
 
				+		}
			
 
				+
			
 
				+		bytes := ([^]byte)(ptr)[:size]
			
 
				+
			
 
				+		if size > old_size {
			
 
				+			new_region := raw_data(bytes[old_size:])
			
 
				+			intrinsics.mem_zero(new_region, size - old_size)
			
 
				+		}
			
 
				+
			
 
				+		return bytes, nil
			
 
				+
			
 
				+	case .Resize_Non_Zeroed:
			
 
				+		ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
			
 
				+		if ptr == nil {
			
 
				+			return nil, .Out_Of_Memory
			
 
				+		}
			
 
				+		return ([^]byte)(ptr)[:size], nil
			
 
				+
			
 
				+	case .Free:
			
 
				+		free(a, old_memory, loc)
			
 
				+		return nil, nil
			
 
				+
			
 
				+	case .Free_All, .Query_Info:
			
 
				+		return nil, .Mode_Not_Implemented
			
 
				+
			
 
				+	case .Query_Features:
			
 
				+		set := (^Allocator_Mode_Set)(old_memory)
			
 
				+		if set != nil {
			
 
				+			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features }
			
 
				+		}
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+
			
 
				+	unreachable()
			
 
				+}
			
 
				+
			
 
				+// Returns the allocated size of the allocator (both free and used).
			
 
				+// If `nil` is given, the global allocator is used.
			
 
				+wasm_allocator_size :: proc(a: ^WASM_Allocator = nil) -> (size: uint) {
			
 
				+	a := a
			
 
				+	if a == nil {
			
 
				+		a = &global_default_wasm_allocator_data
			
 
				+	}
			
 
				+
			
 
				+	lock(a)
			
 
				+	defer unlock(a)
			
 
				+
			
 
				+	root := a.list_of_all_regions
			
 
				+	for root != nil {
			
 
				+		size += uint(uintptr(root.end_ptr) - uintptr(root))
			
 
				+		root = root.next
			
 
				+	}
			
 
				+
			
 
				+	size += len(a.spill)
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// Returns the amount of free memory on the allocator.
			
 
				+// If `nil` is given, the global allocator is used.
			
 
				+wasm_allocator_free_space :: proc(a: ^WASM_Allocator = nil) -> (free: uint) {
			
 
				+	a := a
			
 
				+	if a == nil {
			
 
				+		a = &global_default_wasm_allocator_data
			
 
				+	}
			
 
				+
			
 
				+	lock(a)
			
 
				+	defer unlock(a)
			
 
				+
			
 
				+	bucket_index: u64 = 0
			
 
				+	bucket_mask := a.free_region_buckets_used
			
 
				+
			
 
				+	for bucket_mask != 0 {
			
 
				+		index_add := intrinsics.count_trailing_zeros(bucket_mask)
			
 
				+		bucket_index += index_add
			
 
				+		bucket_mask >>= index_add
			
 
				+		for free_region := a.free_region_buckets[bucket_index].next; free_region != &a.free_region_buckets[bucket_index]; free_region = free_region.next {
			
 
				+			free += free_region.size - REGION_HEADER_SIZE
			
 
				+		}
			
 
				+		bucket_index += 1
			
 
				+		bucket_mask >>= 1
			
 
				+	}
			
 
				+
			
 
				+	free += len(a.spill)
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+NUM_FREE_BUCKETS :: 64
			
 
				+@(private="file")
			
 
				+BUCKET_BITMASK_T :: u64
			
 
				+
			
 
				+// Dynamic memory is subdivided into regions, in the format
			
 
				+
			
 
				+// <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | .....
			
 
				+
			
 
				+// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the
			
 
				+// memory regions backwards and forwards. Because each allocation must be at least a multiple of 4 bytes, the lowest two bits of
			
 
				+// each size field is unused. Free regions are distinguished by used regions by having the FREE_REGION_FLAG bit present
			
 
				+// in the size field. I.e. for free regions, the size field is odd, and for used regions, the size field reads even.
			
 
				+@(private="file")
			
 
				+FREE_REGION_FLAG :: 0x1
			
 
				+
			
 
				+// Attempts to alloc more than this many bytes would cause an overflow when calculating the size of a region,
			
 
				+// therefore allocations larger than this are short-circuited immediately on entry.
			
 
				+@(private="file")
			
 
				+MAX_ALLOC_SIZE :: 0xFFFFFFC7
			
 
				+
			
 
				+// A free region has the following structure:
			
 
				+// <size:uint> <prevptr> <nextptr> ... <size:uint>
			
 
				+
			
 
				+@(private="file")
			
 
				+Region :: struct {
			
 
				+	size: uint,
			
 
				+	prev, next: ^Region,
			
 
				+	_at_the_end_of_this_struct_size: uint,
			
 
				+}
			
 
				+
			
 
				+// Each memory block starts with a Root_Region at the beginning.
			
 
				+// The Root_Region specifies the size of the region block, and forms a linked
			
 
				+// list of all Root_Regions in the program, starting with `list_of_all_regions`
			
 
				+// below.
			
 
				+@(private="file")
			
 
				+Root_Region :: struct {
			
 
				+	size:    u32,
			
 
				+	next:    ^Root_Region,
			
 
				+	end_ptr: ^byte,
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+Mutex_State :: enum u32 {
			
 
				+	Unlocked = 0,
			
 
				+	Locked   = 1,
			
 
				+	Waiting  = 2,
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+lock :: proc(a: ^WASM_Allocator) {
			
 
				+	when intrinsics.has_target_feature("atomics") {
			
 
				+		@(cold)
			
 
				+		lock_slow :: proc(a: ^WASM_Allocator, curr_state: Mutex_State) {
			
 
				+			new_state := curr_state // Make a copy of it
			
 
				+
			
 
				+			spin_lock: for spin in 0..<i32(100) {
			
 
				+				state, ok := intrinsics.atomic_compare_exchange_weak_explicit(&a.mu, .Unlocked, new_state, .Acquire, .Consume)
			
 
				+				if ok {
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+				if state == .Waiting {
			
 
				+					break spin_lock
			
 
				+				}
			
 
				+
			
 
				+				for i := min(spin+1, 32); i > 0; i -= 1 {
			
 
				+					intrinsics.cpu_relax()
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// Set just in case 100 iterations did not do it
			
 
				+			new_state = .Waiting
			
 
				+
			
 
				+			for {
			
 
				+				if intrinsics.atomic_exchange_explicit(&a.mu, .Waiting, .Acquire) == .Unlocked {
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+				ret := intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1)
			
 
				+				assert(ret != 0)
			
 
				+				intrinsics.cpu_relax()
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		if v := intrinsics.atomic_exchange_explicit(&a.mu, .Locked, .Acquire); v != .Unlocked {
			
 
				+			lock_slow(a, v)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+unlock :: proc(a: ^WASM_Allocator) {
			
 
				+	when intrinsics.has_target_feature("atomics") {
			
 
				+		@(cold)
			
 
				+		unlock_slow :: proc(a: ^WASM_Allocator) {
			
 
				+			for {
			
 
				+				s := intrinsics.wasm_memory_atomic_notify32((^u32)(&a.mu), 1)
			
 
				+				if s >= 1 {
			
 
				+					return
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		switch intrinsics.atomic_exchange_explicit(&a.mu, .Unlocked, .Release) {
			
 
				+		case .Unlocked:
			
 
				+			unreachable()
			
 
				+		case .Locked:
			
 
				+		// Okay
			
 
				+		case .Waiting:
			
 
				+			unlock_slow(a)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+assert_locked :: proc(a: ^WASM_Allocator) {
			
 
				+	when intrinsics.has_target_feature("atomics") {
			
 
				+		assert(intrinsics.atomic_load(&a.mu) != .Unlocked)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+has_alignment_uintptr :: proc(ptr: uintptr, #any_int alignment: uintptr) -> bool {
			
 
				+	return ptr & (alignment-1) == 0
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+has_alignment_uint :: proc(ptr: uint, alignment: uint) -> bool {
			
 
				+	return ptr & (alignment-1) == 0
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+has_alignment :: proc {
			
 
				+	has_alignment_uintptr,
			
 
				+	has_alignment_uint,
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+REGION_HEADER_SIZE :: 2*size_of(uint)
			
 
				+
			
 
				+@(private="file")
			
 
				+SMALLEST_ALLOCATION_SIZE :: 2*size_of(rawptr)
			
 
				+
			
 
				+// Subdivide regions of free space into distinct circular doubly linked lists, where each linked list
			
 
				+// represents a range of free space blocks. The following function compute_free_list_bucket() converts
			
 
				+// an allocation size to the bucket index that should be looked at.
			
 
				+#assert(NUM_FREE_BUCKETS == 64, "Following function is tailored specifically for the NUM_FREE_BUCKETS == 64 case")
			
 
				+@(private="file")
			
 
				+compute_free_list_bucket :: proc(size: uint) -> uint {
			
 
				+	if size < 128 { return (size >> 3) - 1 }
			
 
				+
			
 
				+	clz := intrinsics.count_leading_zeros(i32(size))
			
 
				+	bucket_index: i32 = ((clz > 19) \
			
 
				+		?     110 - (clz<<2) + ((i32)(size >> (u32)(29-clz)) ~ 4) \
			
 
				+		: min( 71 - (clz<<1) + ((i32)(size >> (u32)(30-clz)) ~ 2), NUM_FREE_BUCKETS-1))
			
 
				+
			
 
				+	assert(bucket_index >= 0)
			
 
				+	assert(bucket_index < NUM_FREE_BUCKETS)
			
 
				+	return uint(bucket_index)
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+prev_region :: proc(region: ^Region) -> ^Region {
			
 
				+	prev_region_size := ([^]uint)(region)[-1]
			
 
				+	prev_region_size  = prev_region_size & ~uint(FREE_REGION_FLAG)
			
 
				+	return (^Region)(uintptr(region)-uintptr(prev_region_size))
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+next_region :: proc(region: ^Region) -> ^Region {
			
 
				+	return (^Region)(uintptr(region)+uintptr(region.size))
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+region_ceiling_size :: proc(region: ^Region) -> uint {
			
 
				+	return ([^]uint)(uintptr(region)+uintptr(region.size))[-1]
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+region_is_free :: proc(r: ^Region) -> bool {
			
 
				+	return region_ceiling_size(r) & FREE_REGION_FLAG >= 1
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+region_is_in_use :: proc(r: ^Region) -> bool {
			
 
				+	return r.size == region_ceiling_size(r)
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+region_payload_start_ptr :: proc(r: ^Region) -> [^]byte {
			
 
				+	return ([^]byte)(r)[size_of(uint):]
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+region_payload_end_ptr :: proc(r: ^Region) -> [^]byte {
			
 
				+	return ([^]byte)(r)[r.size-size_of(uint):]
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+create_used_region :: proc(ptr: rawptr, size: uint) {
			
 
				+	assert(has_alignment(uintptr(ptr), size_of(uint)))
			
 
				+	assert(has_alignment(size, size_of(uint)))
			
 
				+	assert(size >= size_of(Region))
			
 
				+
			
 
				+	uptr := ([^]uint)(ptr)
			
 
				+	uptr[0] = size
			
 
				+	uptr[size/size_of(uint)-1] = size
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+create_free_region :: proc(ptr: rawptr, size: uint) {
			
 
				+	assert(has_alignment(uintptr(ptr), size_of(uint)))
			
 
				+	assert(has_alignment(size, size_of(uint)))
			
 
				+	assert(size >= size_of(Region))
			
 
				+
			
 
				+	free_region := (^Region)(ptr)
			
 
				+	free_region.size = size
			
 
				+	([^]uint)(ptr)[size/size_of(uint)-1] = size | FREE_REGION_FLAG
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+prepend_to_free_list :: proc(region: ^Region, prepend_to: ^Region) {
			
 
				+	assert(region_is_free(region))
			
 
				+	region.next = prepend_to
			
 
				+	region.prev = prepend_to.prev
			
 
				+	prepend_to.prev = region
			
 
				+	region.prev.next = region
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+unlink_from_free_list :: proc(region: ^Region) {
			
 
				+	assert(region_is_free(region))
			
 
				+	region.prev.next = region.next
			
 
				+	region.next.prev = region.prev
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+link_to_free_list :: proc(a: ^WASM_Allocator, free_region: ^Region) {
			
 
				+	assert(free_region.size >= size_of(Region))
			
 
				+	bucket_index := compute_free_list_bucket(free_region.size-REGION_HEADER_SIZE)
			
 
				+	free_list_head := &a.free_region_buckets[bucket_index]
			
 
				+	free_region.prev = free_list_head
			
 
				+	free_region.next = free_list_head.next
			
 
				+	free_list_head.next = free_region
			
 
				+	free_region.next.prev = free_region
			
 
				+	a.free_region_buckets_used |= BUCKET_BITMASK_T(1) << bucket_index
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+claim_more_memory :: proc(a: ^WASM_Allocator, num_bytes: uint) -> bool {
			
 
				+
			
 
				+	PAGE_SIZE :: 64 * 1024
			
 
				+
			
 
				+	page_alloc :: proc(page_count: int) -> []byte {
			
 
				+		prev_page_count := intrinsics.wasm_memory_grow(0, uintptr(page_count))
			
 
				+		if prev_page_count < 0 { return nil }
			
 
				+
			
 
				+		ptr := ([^]byte)(uintptr(prev_page_count) * PAGE_SIZE)
			
 
				+		return ptr[:page_count * PAGE_SIZE]
			
 
				+	}
			
 
				+
			
 
				+	alloc :: proc(a: ^WASM_Allocator, num_bytes: uint) -> (bytes: [^]byte) #no_bounds_check {
			
 
				+		if uint(len(a.spill)) >= num_bytes {
			
 
				+			bytes = raw_data(a.spill[:num_bytes])
			
 
				+			a.spill = a.spill[num_bytes:]
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		pages := int((num_bytes / PAGE_SIZE) + 1)
			
 
				+		allocated := page_alloc(pages)
			
 
				+		if allocated == nil { return nil }
			
 
				+
			
 
				+		// If the allocated memory is a direct continuation of the spill from before,
			
 
				+		// we can just extend the spill.
			
 
				+		spill_end := uintptr(raw_data(a.spill)) + uintptr(len(a.spill))
			
 
				+		if spill_end == uintptr(raw_data(allocated)) {
			
 
				+			raw_spill := (^Raw_Slice)(&a.spill)
			
 
				+			raw_spill.len += len(allocated)
			
 
				+		} else {
			
 
				+			// Otherwise, we have to "waste" the previous spill.
			
 
				+			// Now this is probably uncommon, and will only happen if another code path
			
 
				+			// is also requesting pages.
			
 
				+			a.spill = allocated
			
 
				+		}
			
 
				+
			
 
				+		bytes = raw_data(a.spill)
			
 
				+		a.spill = a.spill[num_bytes:]
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	num_bytes := num_bytes
			
 
				+	num_bytes  = align_forward(num_bytes, a.alignment)
			
 
				+
			
 
				+	start_ptr := alloc(a, uint(num_bytes))
			
 
				+	if start_ptr == nil { return false }
			
 
				+
			
 
				+	assert(has_alignment(uintptr(start_ptr), align_of(uint)))
			
 
				+	end_ptr := start_ptr[num_bytes:]
			
 
				+
			
 
				+	end_sentinel_region := (^Region)(end_ptr[-size_of(Region):])
			
 
				+	create_used_region(end_sentinel_region, size_of(Region))
			
 
				+
			
 
				+	// If we are the sole user of wasm_memory_grow(), it will feed us continuous/consecutive memory addresses - take advantage
			
 
				+	// of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size.
			
 
				+	prev_alloc_end_address := a.list_of_all_regions != nil ? a.list_of_all_regions.end_ptr : nil
			
 
				+	if start_ptr == prev_alloc_end_address {
			
 
				+		prev_end_sentinel := prev_region((^Region)(start_ptr))
			
 
				+		assert(region_is_in_use(prev_end_sentinel))
			
 
				+		prev_region := prev_region(prev_end_sentinel)
			
 
				+
			
 
				+		a.list_of_all_regions.end_ptr = end_ptr
			
 
				+
			
 
				+		// Two scenarios, either the last region of the previous block was in use, in which case we need to create
			
 
				+		// a new free region in the newly allocated space; or it was free, in which case we can extend that region
			
 
				+		// to cover a larger size.
			
 
				+		if region_is_free(prev_region) {
			
 
				+			new_free_region_size := uint(uintptr(end_sentinel_region) - uintptr(prev_region))
			
 
				+			unlink_from_free_list(prev_region)
			
 
				+			create_free_region(prev_region, new_free_region_size)
			
 
				+			link_to_free_list(a, prev_region)
			
 
				+			return true
			
 
				+		}
			
 
				+
			
 
				+		start_ptr = start_ptr[-size_of(Region):]
			
 
				+	} else {
			
 
				+		create_used_region(start_ptr, size_of(Region))
			
 
				+
			
 
				+		new_region_block := (^Root_Region)(start_ptr)
			
 
				+		new_region_block.next = a.list_of_all_regions
			
 
				+		new_region_block.end_ptr = end_ptr
			
 
				+		a.list_of_all_regions = new_region_block
			
 
				+		start_ptr = start_ptr[size_of(Region):]
			
 
				+	}
			
 
				+
			
 
				+	create_free_region(start_ptr, uint(uintptr(end_sentinel_region)-uintptr(start_ptr)))
			
 
				+	link_to_free_list(a, (^Region)(start_ptr))
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+validate_alloc_size :: proc(size: uint) -> uint {
			
 
				+	#assert(size_of(uint) >= size_of(uintptr))
			
 
				+	#assert(size_of(uint)  % size_of(uintptr) == 0)
			
 
				+
			
 
				+	// NOTE: emmalloc aligns this forward on pointer size, but I think that is a mistake and will
			
 
				+	// do bad on wasm64p32.
			
 
				+
			
 
				+	validated_size := size > SMALLEST_ALLOCATION_SIZE ? align_forward(size, size_of(uint)) : SMALLEST_ALLOCATION_SIZE
			
 
				+	assert(validated_size >= size) // Assert we haven't wrapped.
			
 
				+
			
 
				+	return validated_size
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+allocate_memory :: proc(a: ^WASM_Allocator, alignment: uint, size: uint, loc := #caller_location) -> rawptr {
			
 
				+
			
 
				+	attempt_allocate :: proc(a: ^WASM_Allocator, free_region: ^Region, alignment, size: uint) -> rawptr {
			
 
				+		assert_locked(a)
			
 
				+		free_region := free_region
			
 
				+
			
 
				+		payload_start_ptr := uintptr(region_payload_start_ptr(free_region))
			
 
				+		payload_start_ptr_aligned := align_forward(payload_start_ptr, uintptr(alignment))
			
 
				+		payload_end_ptr := uintptr(region_payload_end_ptr(free_region))
			
 
				+
			
 
				+		if payload_start_ptr_aligned + uintptr(size) > payload_end_ptr {
			
 
				+			return nil
			
 
				+		}
			
 
				+
			
 
				+		// We have enough free space, so the memory allocation will be made into this region. Remove this free region
			
 
				+		// from the list of free regions: whatever slop remains will be later added back to the free region pool.
			
 
				+		unlink_from_free_list(free_region)
			
 
				+
			
 
				+		// Before we proceed further, fix up the boundary between this and the preceding region,
			
 
				+		// so that the boundary between the two regions happens at a right spot for the payload to be aligned.
			
 
				+		if payload_start_ptr != payload_start_ptr_aligned {
			
 
				+			prev := prev_region(free_region)
			
 
				+			assert(region_is_in_use(prev))
			
 
				+			region_boundary_bump_amount := payload_start_ptr_aligned - payload_start_ptr
			
 
				+			new_this_region_size := free_region.size - uint(region_boundary_bump_amount)
			
 
				+			create_used_region(prev, prev.size + uint(region_boundary_bump_amount))
			
 
				+			free_region = (^Region)(uintptr(free_region) + region_boundary_bump_amount)
			
 
				+			free_region.size = new_this_region_size
			
 
				+		}
			
 
				+
			
 
				+		// Next, we need to decide whether this region is so large that it should be split into two regions,
			
 
				+		// one representing the newly used memory area, and at the high end a remaining leftover free area.
			
 
				+		// This splitting to two is done always if there is enough space for the high end to fit a region.
			
 
				+		// Carve 'size' bytes of payload off this region. So,
			
 
				+		// [sz prev next sz]
			
 
				+		// becomes
			
 
				+		// [sz payload sz] [sz prev next sz]
			
 
				+		if size_of(Region) + REGION_HEADER_SIZE + size <= free_region.size {
			
 
				+			new_free_region := (^Region)(uintptr(free_region) + REGION_HEADER_SIZE + uintptr(size))
			
 
				+			create_free_region(new_free_region, free_region.size - size - REGION_HEADER_SIZE)
			
 
				+			link_to_free_list(a, new_free_region)
			
 
				+			create_used_region(free_region, size + REGION_HEADER_SIZE)
			
 
				+		} else {
			
 
				+			// There is not enough space to split the free memory region into used+free parts, so consume the whole
			
 
				+			// region as used memory, not leaving a free memory region behind.
			
 
				+			// Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom.
			
 
				+			([^]uint)(uintptr(free_region) + uintptr(free_region.size))[-1] = free_region.size
			
 
				+		}
			
 
				+
			
 
				+		return rawptr(uintptr(free_region) + size_of(uint))
			
 
				+	}
			
 
				+
			
 
				+	assert_locked(a)
			
 
				+	assert(is_power_of_two(alignment))
			
 
				+	assert(size <= MAX_ALLOC_SIZE, "allocation too big", loc=loc)
			
 
				+
			
 
				+	alignment := alignment
			
 
				+	alignment  = max(alignment, a.alignment)
			
 
				+
			
 
				+	size := size
			
 
				+	size  = validate_alloc_size(size)
			
 
				+
			
 
				+	// Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory.
			
 
				+	// Under normal alignment conditions this should always be the first or second bucket we look at, but if
			
 
				+	// performing an allocation with complex alignment, we may need to look at multiple buckets.
			
 
				+	bucket_index := compute_free_list_bucket(size)
			
 
				+	bucket_mask := a.free_region_buckets_used >> bucket_index
			
 
				+
			
 
				+	// Loop through each bucket that has free regions in it, based on bits set in free_region_buckets_used bitmap.
			
 
				+	for bucket_mask != 0 {
			
 
				+		index_add := intrinsics.count_trailing_zeros(bucket_mask)
			
 
				+		bucket_index += uint(index_add)
			
 
				+		bucket_mask >>= index_add
			
 
				+		assert(bucket_index <= NUM_FREE_BUCKETS-1)
			
 
				+		assert(a.free_region_buckets_used & (BUCKET_BITMASK_T(1) << bucket_index) > 0)
			
 
				+
			
 
				+		free_region := a.free_region_buckets[bucket_index].next
			
 
				+		assert(free_region != nil)
			
 
				+		if free_region != &a.free_region_buckets[bucket_index] {
			
 
				+			ptr := attempt_allocate(a, free_region, alignment, size)
			
 
				+			if ptr != nil {
			
 
				+				return ptr
			
 
				+			}
			
 
				+
			
 
				+			// We were not able to allocate from the first region found in this bucket, so penalize
			
 
				+			// the region by cycling it to the end of the doubly circular linked list. (constant time)
			
 
				+			// This provides a randomized guarantee that when performing allocations of size k to a
			
 
				+			// bucket of [k-something, k+something] range, we will not always attempt to satisfy the
			
 
				+			// allocation from the same available region at the front of the list, but we try each
			
 
				+			// region in turn.
			
 
				+			unlink_from_free_list(free_region)
			
 
				+			prepend_to_free_list(free_region, &a.free_region_buckets[bucket_index])
			
 
				+			// But do not stick around to attempt to look at other regions in this bucket - move
			
 
				+			// to search the next populated bucket index if this did not fit. This gives a practical
			
 
				+			// "allocation in constant time" guarantee, since the next higher bucket will only have
			
 
				+			// regions that are all of strictly larger size than the requested allocation. Only if
			
 
				+			// there is a difficult alignment requirement we may fail to perform the allocation from
			
 
				+			// a region in the next bucket, and if so, we keep trying higher buckets until one of them
			
 
				+			// works.
			
 
				+			bucket_index += 1
			
 
				+			bucket_mask >>= 1
			
 
				+		} else {
			
 
				+			// This bucket was not populated after all with any regions,
			
 
				+			// but we just had a stale bit set to mark a populated bucket.
			
 
				+			// Reset the bit to update latest status so that we do not
			
 
				+			// redundantly look at this bucket again.
			
 
				+			a.free_region_buckets_used &~= BUCKET_BITMASK_T(1) << bucket_index
			
 
				+			bucket_mask ~= 1
			
 
				+		}
			
 
				+
			
 
				+		assert((bucket_index == NUM_FREE_BUCKETS && bucket_mask == 0) || (bucket_mask == a.free_region_buckets_used >> bucket_index))
			
 
				+	}
			
 
				+
			
 
				+	// None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory.
			
 
				+	// The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so
			
 
				+	// as a last resort, loop through more free regions in the bucket that represents the largest allocations available.
			
 
				+	// But only if the bucket representing largest allocations available is not any of the first thirty buckets,
			
 
				+	// these represent allocatable areas less than <1024 bytes - which could be a lot of scrap.
			
 
				+	// In such case, prefer to claim more memory right away.
			
 
				+	largest_bucket_index := NUM_FREE_BUCKETS - 1 - intrinsics.count_leading_zeros(a.free_region_buckets_used)
			
 
				+	// free_region will be null if there is absolutely no memory left. (all buckets are 100% used)
			
 
				+	free_region := a.free_region_buckets_used > 0 ? a.free_region_buckets[largest_bucket_index].next : nil
			
 
				+	// The 30 first free region buckets cover memory blocks < 2048 bytes, so skip looking at those here (too small)
			
 
				+	if a.free_region_buckets_used >> 30 > 0 {
			
 
				+		// Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior.
			
 
				+		// If this many regions cannot find free space, we give up and prefer to claim more memory instead.
			
 
				+		max_regions_to_try_before_giving_up :: 99
			
 
				+		num_tries_left := max_regions_to_try_before_giving_up
			
 
				+		for ; free_region != &a.free_region_buckets[largest_bucket_index] && num_tries_left > 0; num_tries_left -= 1 {
			
 
				+			ptr := attempt_allocate(a, free_region, alignment, size)
			
 
				+			if ptr != nil {
			
 
				+				return ptr
			
 
				+			}
			
 
				+			free_region = free_region.next
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// We were unable to find a free memory region. Must claim more memory!
			
 
				+	num_bytes_to_claim := size+size_of(Region)*3
			
 
				+	if alignment > a.alignment {
			
 
				+		num_bytes_to_claim += alignment
			
 
				+	}
			
 
				+	success := claim_more_memory(a, num_bytes_to_claim)
			
 
				+	if (success) {
			
 
				+		// Try allocate again with the newly available memory.
			
 
				+		return allocate_memory(a, alignment, size)
			
 
				+	}
			
 
				+
			
 
				+	// also claim_more_memory failed, we are really really constrained :( As a last resort, go back to looking at the
			
 
				+	// bucket we already looked at above, continuing where the above search left off - perhaps there are
			
 
				+	// regions we overlooked the first time that might be able to satisfy the allocation.
			
 
				+	if free_region != nil {
			
 
				+		for free_region != &a.free_region_buckets[largest_bucket_index] {
			
 
				+			ptr := attempt_allocate(a, free_region, alignment, size)
			
 
				+			if ptr != nil {
			
 
				+				return ptr
			
 
				+			}
			
 
				+			free_region = free_region.next
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Fully out of memory.
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+aligned_alloc :: proc(a: ^WASM_Allocator, alignment, size: uint, loc := #caller_location) -> rawptr {
			
 
				+	lock(a)
			
 
				+	defer unlock(a)
			
 
				+
			
 
				+	return allocate_memory(a, alignment, size, loc)
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+free :: proc(a: ^WASM_Allocator, ptr: rawptr, loc := #caller_location) {
			
 
				+	if ptr == nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	region_start_ptr := uintptr(ptr) - size_of(uint)
			
 
				+	region := (^Region)(region_start_ptr)
			
 
				+	assert(has_alignment(region_start_ptr, size_of(uint)))
			
 
				+
			
 
				+	lock(a)
			
 
				+	defer unlock(a)
			
 
				+
			
 
				+	size := region.size
			
 
				+	assert(region_is_in_use(region), "double free or corrupt region", loc=loc)
			
 
				+
			
 
				+	prev_region_size_field := ([^]uint)(region)[-1]
			
 
				+	prev_region_size := prev_region_size_field & ~uint(FREE_REGION_FLAG)
			
 
				+	if prev_region_size_field != prev_region_size {
			
 
				+		prev_region := (^Region)(uintptr(region) - uintptr(prev_region_size))
			
 
				+		unlink_from_free_list(prev_region)
			
 
				+		region_start_ptr = uintptr(prev_region)
			
 
				+		size += prev_region_size
			
 
				+	}
			
 
				+
			
 
				+	next_reg := next_region(region)
			
 
				+	size_at_end := (^uint)(region_payload_end_ptr(next_reg))^
			
 
				+	if next_reg.size != size_at_end {
			
 
				+		unlink_from_free_list(next_reg)
			
 
				+		size += next_reg.size
			
 
				+	}
			
 
				+
			
 
				+	create_free_region(rawptr(region_start_ptr), size)
			
 
				+	link_to_free_list(a, (^Region)(region_start_ptr))
			
 
				+}
			
 
				+
			
 
				+@(private="file")
			
 
				+aligned_realloc :: proc(a: ^WASM_Allocator, ptr: rawptr, alignment, size: uint, loc := #caller_location) -> rawptr {
			
 
				+
			
 
				+	attempt_region_resize :: proc(a: ^WASM_Allocator, region: ^Region, size: uint) -> bool {
			
 
				+		lock(a)
			
 
				+		defer unlock(a)
			
 
				+
			
 
				+		// First attempt to resize this region, if the next region that follows this one
			
 
				+		// is a free region.
			
 
				+		next_reg := next_region(region)
			
 
				+		next_region_end_ptr := uintptr(next_reg) + uintptr(next_reg.size)
			
 
				+		size_at_ceiling := ([^]uint)(next_region_end_ptr)[-1]
			
 
				+		if next_reg.size != size_at_ceiling { // Next region is free?
			
 
				+			assert(region_is_free(next_reg))
			
 
				+			new_next_region_start_ptr := uintptr(region) + uintptr(size)
			
 
				+			assert(has_alignment(new_next_region_start_ptr, size_of(uint)))
			
 
				+			// Next region does not shrink to too small size?
			
 
				+			if new_next_region_start_ptr + size_of(Region) <= next_region_end_ptr {
			
 
				+				unlink_from_free_list(next_reg)
			
 
				+				create_free_region(rawptr(new_next_region_start_ptr), uint(next_region_end_ptr - new_next_region_start_ptr))
			
 
				+				link_to_free_list(a, (^Region)(new_next_region_start_ptr))
			
 
				+				create_used_region(region, uint(new_next_region_start_ptr - uintptr(region)))
			
 
				+				return true
			
 
				+			}
			
 
				+			// If we remove the next region altogether, allocation is satisfied?
			
 
				+			if new_next_region_start_ptr <= next_region_end_ptr {
			
 
				+				unlink_from_free_list(next_reg)
			
 
				+				create_used_region(region, region.size + next_reg.size)
			
 
				+				return true
			
 
				+			}
			
 
				+		} else {
			
 
				+			// Next region is an used region - we cannot change its starting address. However if we are shrinking the
			
 
				+			// size of this region, we can create a new free region between this and the next used region.
			
 
				+			if size + size_of(Region) <= region.size {
			
 
				+				free_region_size := region.size - size
			
 
				+				create_used_region(region, size)
			
 
				+				free_region := (^Region)(uintptr(region) + uintptr(size))
			
 
				+				create_free_region(free_region, free_region_size)
			
 
				+				link_to_free_list(a, free_region)
			
 
				+				return true
			
 
				+			} else if size <= region.size {
			
 
				+				// Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk
			
 
				+				// area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case,
			
 
				+				// just call it success without doing any work.
			
 
				+				return true
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	if ptr == nil {
			
 
				+		return aligned_alloc(a, alignment, size, loc)
			
 
				+	}
			
 
				+
			
 
				+	if size == 0 {
			
 
				+		free(a, ptr, loc)
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	if size > MAX_ALLOC_SIZE {
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	assert(is_power_of_two(alignment))
			
 
				+	assert(has_alignment(uintptr(ptr), alignment), "realloc on different alignment than original allocation", loc=loc)
			
 
				+
			
 
				+	size := size
			
 
				+	size  = validate_alloc_size(size)
			
 
				+
			
 
				+	region := (^Region)(uintptr(ptr) - size_of(uint))
			
 
				+
			
 
				+	// Attempt an in-place resize.
			
 
				+	if attempt_region_resize(a, region, size + REGION_HEADER_SIZE) {
			
 
				+		return ptr
			
 
				+	}
			
 
				+
			
 
				+	// Can't do it in-place, allocate new region and copy over.
			
 
				+	newptr := aligned_alloc(a, alignment, size, loc)
			
 
				+	if newptr != nil {
			
 
				+		intrinsics.mem_copy(newptr, ptr, min(size, region.size - REGION_HEADER_SIZE))
			
 
				+		free(a, ptr, loc=loc)
			
 
				+	}
			
 
				+
			
 
				+	return newptr
			
 
				+}
			
--- a/bin/llvm/windows/LLVM-C.lib
+++ b/bin/llvm/windows/LLVM-C.lib
--- a/bin/llvm/windows/clang_rt.asan-x86_64.lib
+++ b/bin/llvm/windows/clang_rt.asan-x86_64.lib
--- a/build.bat
+++ b/build.bat
@@ -48,6 +48,9 @@ if "%2" == "1" (
 
				 set odin_version_raw="dev-%curr_year%-%curr_month%"
			
 
				 
			
 
				 set compiler_flags= -nologo -Oi -TP -fp:precise -Gm- -MP -FC -EHsc- -GR- -GF
			
 
				+rem Parse source code as utf-8 even on shift-jis and other codepages
			
 
				+rem See https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
			
 
				+set compiler_flags= %compiler_flags% /utf-8
			
 
				 set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\"
			
 
				 
			
 
				 if not exist .git\ goto skip_git_hash
			
@@ -111,7 +114,7 @@ call build_vendor.bat
 
				 if %errorlevel% neq 0 goto end_of_build
			
 
				 
			
 
				 rem If the demo doesn't run for you and your CPU is more than a decade old, try -microarch:native
			
 
				-if %release_mode% EQU 0 odin run examples/demo -- Hellope World
			
 
				+if %release_mode% EQU 0 odin run examples/demo -vet -strict-style -- Hellope World
			
 
				 
			
 
				 del *.obj > NUL 2> NUL
			
 
				 
			
--- a/build_odin.sh
+++ b/build_odin.sh
@@ -2,7 +2,6 @@
 
				 set -eu
			
 
				 
			
 
				 : ${CPPFLAGS=}
			
 
				-: ${CXX=clang++}
			
 
				 : ${CXXFLAGS=}
			
 
				 : ${LDFLAGS=}
			
 
				 : ${LLVM_CONFIG=}
			
@@ -26,17 +25,19 @@ error() {
 
				 
			
 
				 if [ -z "$LLVM_CONFIG" ]; then
			
 
				 	# darwin, linux, openbsd
			
 
				-	if   [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
			
 
				+	if   [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
			
 
				+	elif [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
			
 
				 	elif [ -n "$(command -v llvm-config-14)" ]; then LLVM_CONFIG="llvm-config-14"
			
 
				 	elif [ -n "$(command -v llvm-config-13)" ]; then LLVM_CONFIG="llvm-config-13"
			
 
				 	elif [ -n "$(command -v llvm-config-12)" ]; then LLVM_CONFIG="llvm-config-12"
			
 
				 	elif [ -n "$(command -v llvm-config-11)" ]; then LLVM_CONFIG="llvm-config-11"
			
 
				 	# freebsd
			
 
				-	elif [ -n "$(command -v llvm-config17)" ]; then  LLVM_CONFIG="llvm-config-17"
			
 
				-	elif [ -n "$(command -v llvm-config14)" ]; then  LLVM_CONFIG="llvm-config-14"
			
 
				-	elif [ -n "$(command -v llvm-config13)" ]; then  LLVM_CONFIG="llvm-config-13"
			
 
				-	elif [ -n "$(command -v llvm-config12)" ]; then  LLVM_CONFIG="llvm-config-12"
			
 
				-	elif [ -n "$(command -v llvm-config11)" ]; then  LLVM_CONFIG="llvm-config-11"
			
 
				+	elif [ -n "$(command -v llvm-config18)" ]; then  LLVM_CONFIG="llvm-config18"
			
 
				+	elif [ -n "$(command -v llvm-config17)" ]; then  LLVM_CONFIG="llvm-config17"
			
 
				+	elif [ -n "$(command -v llvm-config14)" ]; then  LLVM_CONFIG="llvm-config14"
			
 
				+	elif [ -n "$(command -v llvm-config13)" ]; then  LLVM_CONFIG="llvm-config13"
			
 
				+	elif [ -n "$(command -v llvm-config12)" ]; then  LLVM_CONFIG="llvm-config12"
			
 
				+	elif [ -n "$(command -v llvm-config11)" ]; then  LLVM_CONFIG="llvm-config11"
			
 
				 	# fallback
			
 
				 	elif [ -n "$(command -v llvm-config)" ]; then LLVM_CONFIG="llvm-config"
			
 
				 	else
			
@@ -44,31 +45,51 @@ if [ -z "$LLVM_CONFIG" ]; then
 
				 	fi
			
 
				 fi
			
 
				 
			
 
				+if [ -x "$(which clang++)" ]; then
			
 
				+	: ${CXX="clang++"}
			
 
				+elif [ -x "$($LLVM_CONFIG --bindir)/clang++" ]; then
			
 
				+	: ${CXX=$($LLVM_CONFIG --bindir)/clang++}
			
 
				+else
			
 
				+	error "No clang++ command found. Set CXX to proceed."
			
 
				+fi
			
 
				+
			
 
				 LLVM_VERSION="$($LLVM_CONFIG --version)"
			
 
				 LLVM_VERSION_MAJOR="$(echo $LLVM_VERSION | awk -F. '{print $1}')"
			
 
				 LLVM_VERSION_MINOR="$(echo $LLVM_VERSION | awk -F. '{print $2}')"
			
 
				 LLVM_VERSION_PATCH="$(echo $LLVM_VERSION | awk -F. '{print $3}')"
			
 
				 
			
 
				-if [ $LLVM_VERSION_MAJOR -lt 11 ] ||
			
 
				-	([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]); then
			
 
				-	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14 or 17"
			
 
				+if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 18 ]; then
			
 
				+	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17 or 18"
			
 
				 fi
			
 
				 
			
 
				 case "$OS_NAME" in
			
 
				 Darwin)
			
 
				 	if [ "$OS_ARCH" = "arm64" ]; then
			
 
				-		if [ $LLVM_VERSION_MAJOR -lt 13 ] || [ $LLVM_VERSION_MAJOR -gt 17 ]; then
			
 
				-			error "Darwin Arm64 requires LLVM 13, 14 or 17"
			
 
				+		if [ $LLVM_VERSION_MAJOR -lt 13 ]; then
			
 
				+			error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17 or 18"
			
 
				 		fi
			
 
				 	fi
			
 
				 
			
 
				-	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
			
 
				+	darwin_sysroot=
			
 
				+	if [ $(which xcrun) ]; then
			
 
				+		darwin_sysroot="--sysroot $(xcrun --sdk macosx --show-sdk-path)"
			
 
				+	elif [[ -e "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" ]]; then
			
 
				+		darwin_sysroot="--sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"
			
 
				+	else
			
 
				+		echo "Warning: MacOSX.sdk not found."
			
 
				+	fi
			
 
				+
			
 
				+	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags) ${darwin_sysroot}"
			
 
				 	LDFLAGS="$LDFLAGS -liconv -ldl -framework System -lLLVM"
			
 
				 	;;
			
 
				 FreeBSD)
			
 
				 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
			
 
				 	LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
			
 
				 	;;
			
 
				+NetBSD)
			
 
				+	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
			
 
				+	LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
			
 
				+	;;
			
 
				 Linux)
			
 
				 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
			
 
				 	LDFLAGS="$LDFLAGS -ldl $($LLVM_CONFIG --libs core native --system-libs --libfiles)"
			
@@ -123,7 +144,7 @@ build_odin() {
 
				 }
			
 
				 
			
 
				 run_demo() {
			
 
				-	./odin run examples/demo/demo.odin -file -- Hellope World
			
 
				+	./odin run examples/demo -vet -strict-style -- Hellope World
			
 
				 }
			
 
				 
			
 
				 if [ $# -eq 0 ]; then
			
--- a/ci/create_nightly_json.py
+++ b/ci/create_nightly_json.py
@@ -1,51 +0,0 @@
 
				-import subprocess
			
 
				-import sys
			
 
				-import json
			
 
				-import datetime
			
 
				-import urllib.parse
			
 
				-import sys
			
 
				-
			
 
				-def main():
			
 
				-    files_by_date = {}
			
 
				-    bucket = sys.argv[1]
			
 
				-
			
 
				-    files_lines = execute_cli(f"b2 ls --long {bucket} nightly").split("\n")
			
 
				-    for x in files_lines:
			
 
				-        parts = x.split(" ", 1)
			
 
				-        if parts[0]:
			
 
				-            json_str = execute_cli(f"b2 get-file-info {parts[0]}")
			
 
				-            data = json.loads(json_str)
			
 
				-            name = remove_prefix(data['fileName'], "nightly/")
			
 
				-            url = f"https://f001.backblazeb2.com/file/{bucket}/nightly/{urllib.parse.quote_plus(name)}"
			
 
				-            sha1 = data['contentSha1']
			
 
				-            size = int(data['size'])
			
 
				-            ts = int(data['fileInfo']['src_last_modified_millis'])
			
 
				-            date = datetime.datetime.fromtimestamp(ts/1000).strftime('%Y-%m-%d')
			
 
				-            
			
 
				-            if date not in files_by_date.keys():
			
 
				-                files_by_date[date] = []
			
 
				-
			
 
				-            files_by_date[date].append({
			
 
				-                                            'name': name,
			
 
				-                                            'url': url,
			
 
				-                                            'sha1': sha1,
			
 
				-                                            'sizeInBytes': size,
			
 
				-                                         })
			
 
				-
			
 
				-    now = datetime.datetime.utcnow().isoformat()
			
 
				-
			
 
				-    print(json.dumps({
			
 
				-                        'last_updated' : now,
			
 
				-                        'files': files_by_date
			
 
				-                     }, sort_keys=True, indent=4))
			
 
				-
			
 
				-def remove_prefix(text, prefix):
			
 
				-    return text[text.startswith(prefix) and len(prefix):]
			
 
				-
			
 
				-def execute_cli(command):
			
 
				-    sb = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
			
 
				-    return sb.stdout.read().decode("utf-8");
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    sys.exit(main())
			
 
				-
			
--- a/ci/delete_old_binaries.py
+++ b/ci/delete_old_binaries.py
@@ -1,34 +0,0 @@
 
				-import subprocess
			
 
				-import sys
			
 
				-import json
			
 
				-import datetime
			
 
				-import urllib.parse
			
 
				-import sys
			
 
				-
			
 
				-def main():
			
 
				-    files_by_date = {}
			
 
				-    bucket = sys.argv[1]
			
 
				-    days_to_keep = int(sys.argv[2])
			
 
				-    print(f"Looking for binaries to delete older than {days_to_keep} days")
			
 
				-
			
 
				-    files_lines = execute_cli(f"b2 ls --long --versions {bucket} nightly").split("\n")
			
 
				-    for x in files_lines:
			
 
				-        parts = [y for y in x.split(' ') if y]
			
 
				-
			
 
				-        if parts and parts[0]:
			
 
				-            date = datetime.datetime.strptime(parts[2], '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0)
			
 
				-            now = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
			
 
				-            delta = now - date
			
 
				-
			
 
				-            if delta.days > days_to_keep:
			
 
				-                print(f'Deleting {parts[5]}')
			
 
				-                execute_cli(f'b2 delete-file-version {parts[0]}')
			
 
				-
			
 
				-
			
 
				-def execute_cli(command):
			
 
				-    sb = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
			
 
				-    return sb.stdout.read().decode("utf-8");
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    sys.exit(main())
			
 
				-
			
--- a/ci/nightly.py
+++ b/ci/nightly.py
@@ -0,0 +1,140 @@
 
				+import os
			
 
				+import sys
			
 
				+from zipfile  import ZipFile, ZIP_DEFLATED
			
 
				+from b2sdk.v2 import InMemoryAccountInfo, B2Api
			
 
				+from datetime import datetime
			
 
				+import json
			
 
				+
			
 
				+UPLOAD_FOLDER = "nightly/"
			
 
				+
			
 
				+info   = InMemoryAccountInfo()
			
 
				+b2_api = B2Api(info)
			
 
				+application_key_id = os.environ['APPID']
			
 
				+application_key    = os.environ['APPKEY']
			
 
				+bucket_name        = os.environ['BUCKET']
			
 
				+days_to_keep       = os.environ['DAYS_TO_KEEP']
			
 
				+
			
 
				+def auth() -> bool:
			
 
				+	try:
			
 
				+		realm = b2_api.account_info.get_realm()
			
 
				+		return True # Already authenticated
			
 
				+	except:
			
 
				+		pass        # Not yet authenticated
			
 
				+
			
 
				+	err = b2_api.authorize_account("production", application_key_id, application_key)
			
 
				+	return err == None
			
 
				+
			
 
				+def get_bucket():
			
 
				+	if not auth(): sys.exit(1)
			
 
				+	return b2_api.get_bucket_by_name(bucket_name)
			
 
				+
			
 
				+def remove_prefix(text: str, prefix: str) -> str:
			
 
				+	return text[text.startswith(prefix) and len(prefix):]
			
 
				+
			
 
				+def create_and_upload_artifact_zip(platform: str, artifact: str) -> int:
			
 
				+	now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
			
 
				+	destination_zip_name = "odin-{}-nightly+{}.zip".format(platform, now.strftime("%Y-%m-%d"))
			
 
				+
			
 
				+	source_zip_name = artifact
			
 
				+	if not artifact.endswith(".zip"):
			
 
				+		print(f"Creating archive {destination_zip_name} from {artifact} and uploading to {bucket_name}")
			
 
				+
			
 
				+		source_zip_name = destination_zip_name
			
 
				+		with ZipFile(source_zip_name, mode='w', compression=ZIP_DEFLATED, compresslevel=9) as z:
			
 
				+			for root, directory, filenames in os.walk(artifact):
			
 
				+				for file in filenames:
			
 
				+					file_path = os.path.join(root, file)
			
 
				+					zip_path  = os.path.join("dist", os.path.relpath(file_path, artifact))
			
 
				+					z.write(file_path, zip_path)
			
 
				+
			
 
				+		if not os.path.exists(source_zip_name):
			
 
				+			print(f"Error: Newly created ZIP archive {source_zip_name} not found.")
			
 
				+			return 1
			
 
				+
			
 
				+	print("Uploading {} to {}".format(source_zip_name, UPLOAD_FOLDER + destination_zip_name))
			
 
				+	bucket = get_bucket()
			
 
				+	res = bucket.upload_local_file(
			
 
				+		source_zip_name,                   # Local file to upload
			
 
				+		"nightly/" + destination_zip_name, # B2 destination path
			
 
				+	)
			
 
				+	return 0
			
 
				+
			
 
				+def prune_artifacts():
			
 
				+	print(f"Looking for binaries to delete older than {days_to_keep} days")
			
 
				+
			
 
				+	bucket = get_bucket()
			
 
				+	for file, _ in bucket.ls(UPLOAD_FOLDER, latest_only=False):
			
 
				+		# Timestamp is in milliseconds
			
 
				+		date  = datetime.fromtimestamp(file.upload_timestamp / 1_000.0).replace(hour=0, minute=0, second=0, microsecond=0)
			
 
				+		now   = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
			
 
				+		delta = now - date
			
 
				+
			
 
				+		if delta.days > int(days_to_keep):
			
 
				+			print("Deleting {}".format(file.file_name))
			
 
				+			file.delete()
			
 
				+
			
 
				+	return 0
			
 
				+
			
 
				+def update_nightly_json():
			
 
				+	print(f"Updating nightly.json with files {days_to_keep} days or newer")
			
 
				+
			
 
				+	files_by_date = {}
			
 
				+
			
 
				+	bucket = get_bucket()
			
 
				+
			
 
				+	for file, _ in bucket.ls(UPLOAD_FOLDER, latest_only=True):
			
 
				+		# Timestamp is in milliseconds
			
 
				+		date = datetime.fromtimestamp(file.upload_timestamp / 1_000.0).replace(hour=0, minute=0, second=0, microsecond=0).strftime('%Y-%m-%d')
			
 
				+		name = remove_prefix(file.file_name, UPLOAD_FOLDER)
			
 
				+		sha1 = file.content_sha1
			
 
				+		size = file.size
			
 
				+		url  = bucket.get_download_url(file.file_name)
			
 
				+
			
 
				+		if date not in files_by_date.keys():
			
 
				+			files_by_date[date] = []
			
 
				+
			
 
				+		files_by_date[date].append({
			
 
				+			'name':        name,
			
 
				+			'url':         url,
			
 
				+			'sha1':        sha1,
			
 
				+			'sizeInBytes': size,
			
 
				+		})
			
 
				+
			
 
				+	now = datetime.utcnow().isoformat()
			
 
				+
			
 
				+	nightly = json.dumps({
			
 
				+		'last_updated' : now,
			
 
				+		'files': files_by_date
			
 
				+	}, sort_keys=True, indent=4, ensure_ascii=False).encode('utf-8')
			
 
				+
			
 
				+	res = bucket.upload_bytes(
			
 
				+		nightly,        # JSON bytes
			
 
				+		"nightly.json", # B2 destination path
			
 
				+	)
			
 
				+	return 0
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+	if len(sys.argv) == 1:
			
 
				+		print("Usage: {} <verb> [arguments]".format(sys.argv[0]))
			
 
				+		print("\tartifact <platform prefix> <artifact path>\n\t\tCreates and uploads a platform artifact zip.")
			
 
				+		print("\tprune\n\t\tDeletes old artifacts from bucket")
			
 
				+		print("\tjson\n\t\tUpdate and upload nightly.json")
			
 
				+		sys.exit(1)
			
 
				+	else:
			
 
				+		command = sys.argv[1].lower()
			
 
				+		if command == "artifact":
			
 
				+			if len(sys.argv) != 4:
			
 
				+				print("Usage: {} artifact <platform prefix> <artifact path>".format(sys.argv[0]))
			
 
				+				print("Error: Expected artifact command to be given platform prefix and artifact path.\n")
			
 
				+				sys.exit(1)
			
 
				+
			
 
				+			res = create_and_upload_artifact_zip(sys.argv[2], sys.argv[3])
			
 
				+			sys.exit(res)
			
 
				+
			
 
				+		elif command == "prune":
			
 
				+			res = prune_artifacts()
			
 
				+			sys.exit(res)
			
 
				+
			
 
				+		elif command == "json":
			
 
				+			res = update_nightly_json()
			
 
				+			sys.exit(res)
			
--- a/ci/upload_create_nightly.sh
+++ b/ci/upload_create_nightly.sh
@@ -1,25 +0,0 @@
 
				-#!/bin/bash
			
 
				-
			
 
				-set -e
			
 
				-
			
 
				-bucket=$1
			
 
				-platform=$2
			
 
				-artifact=$3
			
 
				-
			
 
				-now=$(date +'%Y-%m-%d')
			
 
				-filename="odin-$platform-nightly+$now.zip"
			
 
				-
			
 
				-echo "Creating archive $filename from $artifact and uploading to $bucket"
			
 
				-
			
 
				-# If this is already zipped up (done before artifact upload to keep permissions in tact), just move it.
			
 
				-if [ "${artifact: -4}" == ".zip" ]
			
 
				-then
			
 
				-	echo "Artifact already a zip"
			
 
				-	mkdir -p "output"
			
 
				-	mv "$artifact" "output/$filename"
			
 
				-else
			
 
				-	echo "Artifact needs to be zipped"
			
 
				-	7z a -bd "output/$filename" -r "$artifact"
			
 
				-fi
			
 
				-
			
 
				-b2 upload-file --noProgress "$bucket" "output/$filename" "nightly/$filename"
			
--- a/core/bufio/reader.odin
+++ b/core/bufio/reader.odin
@@ -29,12 +29,12 @@ MIN_READ_BUFFER_SIZE :: 16
 
				 @(private)
			
 
				 DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128
			
 
				 
			
 
				-reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) {
			
 
				+reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator, loc := #caller_location) {
			
 
				 	size := size
			
 
				 	size = max(size, MIN_READ_BUFFER_SIZE)
			
 
				 	reader_reset(b, rd)
			
 
				 	b.buf_allocator = allocator
			
 
				-	b.buf = make([]byte, size, allocator)
			
 
				+	b.buf = make([]byte, size, allocator, loc)
			
 
				 }
			
 
				 
			
 
				 reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) {
			
@@ -81,7 +81,7 @@ _reader_read_new_chunk :: proc(b: ^Reader) -> io.Error {
 
				 	for i := b.max_consecutive_empty_reads; i > 0; i -= 1 {
			
 
				 		n, err := io.read(b.rd, b.buf[b.w:])
			
 
				 		if n < 0 {
			
 
				-			return .Negative_Read
			
 
				+			return err if err != nil else .Negative_Read
			
 
				 		}
			
 
				 		b.w += n
			
 
				 		if err != nil {
			
@@ -189,7 +189,7 @@ reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
 
				 		if len(p) >= len(b.buf) {
			
 
				 			n, b.err = io.read(b.rd, p)
			
 
				 			if n < 0 {
			
 
				-				return 0, .Negative_Read
			
 
				+				return 0, b.err if b.err != nil else .Negative_Read
			
 
				 			}
			
 
				 
			
 
				 			if n > 0 {
			
@@ -202,7 +202,7 @@ reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
 
				 		b.r, b.w = 0, 0
			
 
				 		n, b.err = io.read(b.rd, b.buf)
			
 
				 		if n < 0 {
			
 
				-			return 0, .Negative_Read
			
 
				+			return 0, b.err if b.err != nil else .Negative_Read
			
 
				 		}
			
 
				 		if n == 0 {
			
 
				 			return 0, _reader_consume_err(b)
			
@@ -290,7 +290,7 @@ reader_write_to :: proc(b: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
 
				 	write_buf :: proc(b: ^Reader, w: io.Writer) -> (i64, io.Error) {
			
 
				 		n, err := io.write(w, b.buf[b.r:b.w])
			
 
				 		if n < 0 {
			
 
				-			return 0, .Negative_Write
			
 
				+			return 0, err if err != nil else .Negative_Write
			
 
				 		}
			
 
				 		b.r += n
			
 
				 		return i64(n), err
			
--- a/core/bufio/writer.odin
+++ b/core/bufio/writer.odin
@@ -95,6 +95,10 @@ writer_write :: proc(b: ^Writer, p: []byte) -> (n: int, err: io.Error) {
 
				 		m: int
			
 
				 		if writer_buffered(b) == 0 {
			
 
				 			m, b.err = io.write(b.wr, p)
			
 
				+			if m < 0 && b.err == nil {
			
 
				+				b.err = .Negative_Write
			
 
				+				break
			
 
				+			}
			
 
				 		} else {
			
 
				 			m = copy(b.buf[b.n:], p)
			
 
				 			b.n += m
			
--- a/core/bytes/buffer.odin
+++ b/core/bytes/buffer.odin
@@ -27,19 +27,19 @@ Read_Op :: enum i8 {
 
				 }
			
 
				 
			
 
				 
			
 
				-buffer_init :: proc(b: ^Buffer, buf: []byte) {
			
 
				-	resize(&b.buf, len(buf))
			
 
				+buffer_init :: proc(b: ^Buffer, buf: []byte, loc := #caller_location) {
			
 
				+	resize(&b.buf, len(buf), loc=loc)
			
 
				 	copy(b.buf[:], buf)
			
 
				 }
			
 
				 
			
 
				-buffer_init_string :: proc(b: ^Buffer, s: string) {
			
 
				-	resize(&b.buf, len(s))
			
 
				+buffer_init_string :: proc(b: ^Buffer, s: string, loc := #caller_location) {
			
 
				+	resize(&b.buf, len(s), loc=loc)
			
 
				 	copy(b.buf[:], s)
			
 
				 }
			
 
				 
			
 
				-buffer_init_allocator :: proc(b: ^Buffer, len, cap: int, allocator := context.allocator) {
			
 
				+buffer_init_allocator :: proc(b: ^Buffer, len, cap: int, allocator := context.allocator, loc := #caller_location) {
			
 
				 	if b.buf == nil {
			
 
				-		b.buf = make([dynamic]byte, len, cap, allocator)
			
 
				+		b.buf = make([dynamic]byte, len, cap, allocator, loc)
			
 
				 		return
			
 
				 	}
			
 
				 
			
@@ -96,28 +96,28 @@ buffer_truncate :: proc(b: ^Buffer, n: int) {
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-_buffer_try_grow :: proc(b: ^Buffer, n: int) -> (int, bool) {
			
 
				+_buffer_try_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) -> (int, bool) {
			
 
				 	if l := len(b.buf); n <= cap(b.buf)-l {
			
 
				-		resize(&b.buf, l+n)
			
 
				+		resize(&b.buf, l+n, loc=loc)
			
 
				 		return l, true
			
 
				 	}
			
 
				 	return 0, false
			
 
				 }
			
 
				 
			
 
				 @(private)
			
 
				-_buffer_grow :: proc(b: ^Buffer, n: int) -> int {
			
 
				+_buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) -> int {
			
 
				 	m := buffer_length(b)
			
 
				 	if m == 0 && b.off != 0 {
			
 
				 		buffer_reset(b)
			
 
				 	}
			
 
				-	if i, ok := _buffer_try_grow(b, n); ok {
			
 
				+	if i, ok := _buffer_try_grow(b, n, loc=loc); ok {
			
 
				 		return i
			
 
				 	}
			
 
				 
			
 
				 	if b.buf == nil && n <= SMALL_BUFFER_SIZE {
			
 
				 		// Fixes #2756 by preserving allocator if already set on Buffer via init_buffer_allocator
			
 
				-		reserve(&b.buf, SMALL_BUFFER_SIZE)
			
 
				-		resize(&b.buf, n)
			
 
				+		reserve(&b.buf, SMALL_BUFFER_SIZE, loc=loc)
			
 
				+		resize(&b.buf, n, loc=loc)
			
 
				 		return 0
			
 
				 	}
			
 
				 
			
@@ -127,31 +127,31 @@ _buffer_grow :: proc(b: ^Buffer, n: int) -> int {
 
				 	} else if c > max(int) - c - n {
			
 
				 		panic("bytes.Buffer: too large")
			
 
				 	} else {
			
 
				-		resize(&b.buf, 2*c + n)
			
 
				+		resize(&b.buf, 2*c + n, loc=loc)
			
 
				 		copy(b.buf[:], b.buf[b.off:])
			
 
				 	}
			
 
				 	b.off = 0
			
 
				-	resize(&b.buf, m+n)
			
 
				+	resize(&b.buf, m+n, loc=loc)
			
 
				 	return m
			
 
				 }
			
 
				 
			
 
				-buffer_grow :: proc(b: ^Buffer, n: int) {
			
 
				+buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) {
			
 
				 	if n < 0 {
			
 
				 		panic("bytes.buffer_grow: negative count")
			
 
				 	}
			
 
				-	m := _buffer_grow(b, n)
			
 
				-	resize(&b.buf, m)
			
 
				+	m := _buffer_grow(b, n, loc=loc)
			
 
				+	resize(&b.buf, m, loc=loc)
			
 
				 }
			
 
				 
			
 
				-buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.Error) {
			
 
				+buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int, loc := #caller_location) -> (n: int, err: io.Error) {
			
 
				 	b.last_read = .Invalid
			
 
				 	if offset < 0 {
			
 
				 		err = .Invalid_Offset
			
 
				 		return
			
 
				 	}
			
 
				-	_, ok := _buffer_try_grow(b, offset+len(p))
			
 
				+	_, ok := _buffer_try_grow(b, offset+len(p), loc=loc)
			
 
				 	if !ok {
			
 
				-		_ = _buffer_grow(b, offset+len(p))
			
 
				+		_ = _buffer_grow(b, offset+len(p), loc=loc)
			
 
				 	}
			
 
				 	if len(b.buf) <= offset {
			
 
				 		return 0, .Short_Write
			
@@ -160,47 +160,47 @@ buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.
 
				 }
			
 
				 
			
 
				 
			
 
				-buffer_write :: proc(b: ^Buffer, p: []byte) -> (n: int, err: io.Error) {
			
 
				+buffer_write :: proc(b: ^Buffer, p: []byte, loc := #caller_location) -> (n: int, err: io.Error) {
			
 
				 	b.last_read = .Invalid
			
 
				-	m, ok := _buffer_try_grow(b, len(p))
			
 
				+	m, ok := _buffer_try_grow(b, len(p), loc=loc)
			
 
				 	if !ok {
			
 
				-		m = _buffer_grow(b, len(p))
			
 
				+		m = _buffer_grow(b, len(p), loc=loc)
			
 
				 	}
			
 
				 	return copy(b.buf[m:], p), nil
			
 
				 }
			
 
				 
			
 
				-buffer_write_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int) -> (n: int, err: io.Error) {
			
 
				-	return buffer_write(b, ([^]byte)(ptr)[:size])
			
 
				+buffer_write_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int, loc := #caller_location) -> (n: int, err: io.Error) {
			
 
				+	return buffer_write(b, ([^]byte)(ptr)[:size], loc=loc)
			
 
				 }
			
 
				 
			
 
				-buffer_write_string :: proc(b: ^Buffer, s: string) -> (n: int, err: io.Error) {
			
 
				+buffer_write_string :: proc(b: ^Buffer, s: string, loc := #caller_location) -> (n: int, err: io.Error) {
			
 
				 	b.last_read = .Invalid
			
 
				-	m, ok := _buffer_try_grow(b, len(s))
			
 
				+	m, ok := _buffer_try_grow(b, len(s), loc=loc)
			
 
				 	if !ok {
			
 
				-		m = _buffer_grow(b, len(s))
			
 
				+		m = _buffer_grow(b, len(s), loc=loc)
			
 
				 	}
			
 
				 	return copy(b.buf[m:], s), nil
			
 
				 }
			
 
				 
			
 
				-buffer_write_byte :: proc(b: ^Buffer, c: byte) -> io.Error {
			
 
				+buffer_write_byte :: proc(b: ^Buffer, c: byte, loc := #caller_location) -> io.Error {
			
 
				 	b.last_read = .Invalid
			
 
				-	m, ok := _buffer_try_grow(b, 1)
			
 
				+	m, ok := _buffer_try_grow(b, 1, loc=loc)
			
 
				 	if !ok {
			
 
				-		m = _buffer_grow(b, 1)
			
 
				+		m = _buffer_grow(b, 1, loc=loc)
			
 
				 	}
			
 
				 	b.buf[m] = c
			
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-buffer_write_rune :: proc(b: ^Buffer, r: rune) -> (n: int, err: io.Error) {
			
 
				+buffer_write_rune :: proc(b: ^Buffer, r: rune, loc := #caller_location) -> (n: int, err: io.Error) {
			
 
				 	if r < utf8.RUNE_SELF {
			
 
				-		buffer_write_byte(b, byte(r))
			
 
				+		buffer_write_byte(b, byte(r), loc=loc)
			
 
				 		return 1, nil
			
 
				 	}
			
 
				 	b.last_read = .Invalid
			
 
				-	m, ok := _buffer_try_grow(b, utf8.UTF_MAX)
			
 
				+	m, ok := _buffer_try_grow(b, utf8.UTF_MAX, loc=loc)
			
 
				 	if !ok {
			
 
				-		m = _buffer_grow(b, utf8.UTF_MAX)
			
 
				+		m = _buffer_grow(b, utf8.UTF_MAX, loc=loc)
			
 
				 	}
			
 
				 	res: [4]byte
			
 
				 	res, n = utf8.encode_rune(r)
			
@@ -359,7 +359,7 @@ buffer_read_from :: proc(b: ^Buffer, r: io.Reader) -> (n: i64, err: io.Error) #n
 
				 		resize(&b.buf, i)
			
 
				 		m, e := io.read(r, b.buf[i:cap(b.buf)])
			
 
				 		if m < 0 {
			
 
				-			err = .Negative_Read
			
 
				+			err = e if e != nil else .Negative_Read
			
 
				 			return
			
 
				 		}
			
 
				 
			
--- a/core/bytes/bytes.odin
+++ b/core/bytes/bytes.odin
@@ -1167,3 +1167,28 @@ fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.alloc
 
				 
			
 
				 	return subslices[:]
			
 
				 }
			
 
				+
			
 
				+// alias returns true iff a and b have a non-zero length, and any part of
			
 
				+// a overlaps with b.
			
 
				+alias :: proc "contextless" (a, b: []byte) -> bool {
			
 
				+	a_len, b_len := len(a), len(b)
			
 
				+	if a_len == 0 || b_len == 0 {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	a_start, b_start := uintptr(raw_data(a)), uintptr(raw_data(b))
			
 
				+	a_end, b_end := a_start + uintptr(a_len-1), b_start + uintptr(b_len-1)
			
 
				+
			
 
				+	return a_start <= b_end && b_start <= a_end
			
 
				+}
			
 
				+
			
 
				+// alias_inexactly returns true iff a and b have a non-zero length,
			
 
				+// the base pointer of a and b are NOT equal, and any part of a overlaps
			
 
				+// with b (ie: `alias(a, b)` with an exception that returns false for
			
 
				+// `a == b`, `b = a[:len(a)-69]` and similar conditions).
			
 
				+alias_inexactly :: proc "contextless" (a, b: []byte) -> bool {
			
 
				+	if raw_data(a) == raw_data(b) {
			
 
				+		return false
			
 
				+	}
			
 
				+	return alias(a, b)
			
 
				+}
			
--- a/core/c/libc/errno.odin
+++ b/core/c/libc/errno.odin
@@ -40,7 +40,7 @@ when ODIN_OS == .FreeBSD {
 
				 	ERANGE :: 34
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == .OpenBSD {
			
 
				+when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
			
 
				 	@(private="file")
			
 
				 	@(default_calling_convention="c")
			
 
				 	foreign libc {
			
--- a/core/c/libc/signal.odin
+++ b/core/c/libc/signal.odin
@@ -34,20 +34,7 @@ when ODIN_OS == .Windows {
 
				 	SIGTERM :: 15
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == .Linux || ODIN_OS == .FreeBSD {
			
 
				-	SIG_ERR  :: rawptr(~uintptr(0))
			
 
				-	SIG_DFL  :: rawptr(uintptr(0))
			
 
				-	SIG_IGN  :: rawptr(uintptr(1)) 
			
 
				-
			
 
				-	SIGABRT  :: 6
			
 
				-	SIGFPE   :: 8
			
 
				-	SIGILL   :: 4
			
 
				-	SIGINT   :: 2
			
 
				-	SIGSEGV  :: 11
			
 
				-	SIGTERM  :: 15
			
 
				-}
			
 
				-
			
 
				-when ODIN_OS == .Darwin {
			
 
				+when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Haiku || ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD || ODIN_OS == .Darwin {
			
 
				 	SIG_ERR  :: rawptr(~uintptr(0))
			
 
				 	SIG_DFL  :: rawptr(uintptr(0))
			
 
				 	SIG_IGN  :: rawptr(uintptr(1)) 
			
--- a/core/c/libc/stdio.odin
+++ b/core/c/libc/stdio.odin
@@ -83,7 +83,7 @@ when ODIN_OS == .Linux {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == .OpenBSD {
			
 
				+when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
			
 
				 	fpos_t :: distinct i64
			
 
				 
			
 
				 	_IOFBF :: 0
			
@@ -102,10 +102,12 @@ when ODIN_OS == .OpenBSD {
 
				 	SEEK_END :: 2
			
 
				 
			
 
				 	foreign libc {
			
 
				-		stderr: ^FILE
			
 
				-		stdin:  ^FILE
			
 
				-		stdout: ^FILE
			
 
				+		__sF: [3]FILE
			
 
				 	}
			
 
				+
			
 
				+	stdin:  ^FILE = &__sF[0]
			
 
				+	stdout: ^FILE = &__sF[1]
			
 
				+	stderr: ^FILE = &__sF[2]
			
 
				 }
			
 
				 
			
 
				 when ODIN_OS == .FreeBSD {
			
@@ -127,9 +129,9 @@ when ODIN_OS == .FreeBSD {
 
				 	SEEK_END :: 2
			
 
				 
			
 
				 	foreign libc {
			
 
				-		stderr: ^FILE
			
 
				-		stdin:  ^FILE
			
 
				-		stdout: ^FILE
			
 
				+		@(link_name="__stderrp") stderr: ^FILE
			
 
				+		@(link_name="__stdinp")  stdin:  ^FILE
			
 
				+		@(link_name="__stdoutp") stdout: ^FILE
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/core/c/libc/time.odin
+++ b/core/c/libc/time.odin
@@ -45,7 +45,7 @@ when ODIN_OS == .Windows {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku {
			
 
				+when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD || ODIN_OS == .Haiku {
			
 
				 	@(default_calling_convention="c")
			
 
				 	foreign libc {
			
 
				 		// 7.27.2 Time manipulation functions
			
--- a/core/c/libc/wctype.odin
+++ b/core/c/libc/wctype.odin
@@ -22,7 +22,7 @@ when ODIN_OS == .Windows {
 
				 	wctrans_t :: distinct int
			
 
				 	wctype_t  :: distinct u32
			
 
				 
			
 
				-} else when ODIN_OS == .OpenBSD {
			
 
				+} else when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
			
 
				 	wctrans_t :: distinct rawptr
			
 
				 	wctype_t  :: distinct rawptr
			
 
				 
			
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -34,13 +34,13 @@ COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 2
 
				 */
			
 
				 when size_of(uintptr) == 8 {
			
 
				 
			
 
				-        // For 64-bit platforms, we set the default max buffer size to 4 GiB,
			
 
				-        // which is GZIP and PKZIP's max payload size.
			
 
				+	// For 64-bit platforms, we set the default max buffer size to 4 GiB,
			
 
				+	// which is GZIP and PKZIP's max payload size.
			
 
				 	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32))
			
 
				 } else {
			
 
				 	
			
 
				 	// For 32-bit platforms, we set the default max buffer size to 512 MiB.
			
 
				-        COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29))
			
 
				+	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29))
			
 
				 }
			
 
				 
			
 
				 
			
@@ -186,7 +186,7 @@ input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Erro
 
				 
			
 
				 input_size :: proc{input_size_from_memory, input_size_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
			
 
				 	#no_bounds_check {
			
 
				 		if len(z.input_data) >= size {
			
@@ -203,7 +203,7 @@ read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
			
 
				 	// TODO: REMOVE ALL USE OF context.temp_allocator here
			
 
				 	// there is literally no need for it
			
@@ -214,13 +214,13 @@ read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int
 
				 
			
 
				 read_slice :: proc{read_slice_from_memory, read_slice_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
			
 
				 	b := read_slice(z, size_of(T)) or_return
			
 
				 	return (^T)(&b[0])^, nil
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
			
 
				 	#no_bounds_check {
			
 
				 		if len(z.input_data) >= 1 {
			
@@ -232,7 +232,7 @@ read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8,
 
				 	return 0, .EOF
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
			
 
				 	b := read_slice_from_stream(z, 1) or_return
			
 
				 	return b[0], nil
			
@@ -242,7 +242,7 @@ read_u8 :: proc{read_u8_from_memory, read_u8_from_stream}
 
				 
			
 
				 // You would typically only use this at the end of Inflate, to drain bits from the code buffer
			
 
				 // preferentially.
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
			
 
				 	if z.num_bits >= 8 {
			
 
				 		res = u8(read_bits_no_refill_lsb(z, 8))
			
@@ -257,7 +257,7 @@ read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: i
 
				 	return
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
			
 
				 	size :: size_of(T)
			
 
				 
			
@@ -275,7 +275,7 @@ peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
			
 
				 	size :: size_of(T)
			
 
				 
			
@@ -293,7 +293,7 @@ peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
			
 
				 	size :: size_of(T)
			
 
				 
			
@@ -317,7 +317,7 @@ peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid
 
				 	return res, .None
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_data_at_offset_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
			
 
				 	size :: size_of(T)
			
 
				 
			
@@ -352,14 +352,14 @@ peek_data :: proc{peek_data_from_memory, peek_data_from_stream, peek_data_at_off
 
				 
			
 
				 
			
 
				 // Sliding window read back
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
			
 
				 	// Look back into the sliding window.
			
 
				 	return z.output.buf[z.bytes_written - offset], .None
			
 
				 }
			
 
				 
			
 
				 // Generalized bit reader LSB
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
			
 
				 	refill := u64(width)
			
 
				 	b      := u64(0)
			
@@ -385,7 +385,7 @@ refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width :=
 
				 }
			
 
				 
			
 
				 // Generalized bit reader LSB
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
			
 
				 	refill := u64(width)
			
 
				 
			
@@ -414,13 +414,13 @@ refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
 
				 refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream}
			
 
				 
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
			
 
				 	z.code_buffer >>= width
			
 
				 	z.num_bits -= u64(width)
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
			
 
				 	z.code_buffer >>= width
			
 
				 	z.num_bits -= u64(width)
			
@@ -428,7 +428,7 @@ consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, wid
 
				 
			
 
				 consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
			
 
				 	if z.num_bits < u64(width) {
			
 
				 		refill_lsb(z)
			
@@ -436,7 +436,7 @@ peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width:
 
				 	return u32(z.code_buffer &~ (~u64(0) << width))
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
			
 
				 	if z.num_bits < u64(width) {
			
 
				 		refill_lsb(z)
			
@@ -446,13 +446,13 @@ peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
 
				 
			
 
				 peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
			
 
				 	assert(z.num_bits >= u64(width))
			
 
				 	return u32(z.code_buffer &~ (~u64(0) << width))
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
			
 
				 	assert(z.num_bits >= u64(width))
			
 
				 	return u32(z.code_buffer &~ (~u64(0) << width))
			
@@ -460,14 +460,14 @@ peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
 
				 
			
 
				 peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
			
 
				 	k := #force_inline peek_bits_lsb(z, width)
			
 
				 	#force_inline consume_bits_lsb(z, width)
			
 
				 	return k
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
			
 
				 	k := peek_bits_lsb(z, width)
			
 
				 	consume_bits_lsb(z, width)
			
@@ -476,14 +476,14 @@ read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
 
				 
			
 
				 read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream}
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
			
 
				 	k := #force_inline peek_bits_no_refill_lsb(z, width)
			
 
				 	#force_inline consume_bits_lsb(z, width)
			
 
				 	return k
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
			
 
				 	k := peek_bits_no_refill_lsb(z, width)
			
 
				 	consume_bits_lsb(z, width)
			
@@ -493,14 +493,14 @@ read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
 
				 read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream}
			
 
				 
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
			
 
				 	discard := u8(z.num_bits & 7)
			
 
				 	#force_inline consume_bits_lsb(z, discard)
			
 
				 }
			
 
				 
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
			
 
				 	discard := u8(z.num_bits & 7)
			
 
				 	consume_bits_lsb(z, discard)
			
--- a/core/compress/shoco/shoco.odin
+++ b/core/compress/shoco/shoco.odin
@@ -98,7 +98,7 @@ decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DE
 
				 	validate_model(model) or_return
			
 
				 
			
 
				 	for inp < inp_end {
			
 
				-		val  := transmute(i8)input[inp]
			
 
				+		val  := i8(input[inp])
			
 
				 		mark := int(-1)
			
 
				 
			
 
				 		for val < 0 {
			
@@ -274,12 +274,9 @@ compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_
 
				 				out_ptr := raw_data(output[out:])
			
 
				 
			
 
				 				switch pack.bytes_packed {
			
 
				-				case 4:
			
 
				-					intrinsics.unaligned_store(transmute(^u32)out_ptr, code)
			
 
				-				case 2:
			
 
				-					intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code))
			
 
				-				case 1:
			
 
				-					intrinsics.unaligned_store(transmute(^u8)out_ptr,  u8(code))
			
 
				+				case 4: intrinsics.unaligned_store((^u32)(out_ptr), code)
			
 
				+				case 2: intrinsics.unaligned_store((^u16)(out_ptr), u16(code))
			
 
				+				case 1: intrinsics.unaligned_store( (^u8)(out_ptr),  u8(code))
			
 
				 				case:
			
 
				 					return out, .Unknown_Compression_Method
			
 
				 				}
			
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -120,7 +120,7 @@ Huffman_Table :: struct {
 
				 }
			
 
				 
			
 
				 // Implementation starts here
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
			
 
				 	assert(bits <= 16)
			
 
				 	// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
			
@@ -136,7 +136,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 
				 }
			
 
				 
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
			
 
				 	/*
			
 
				 		That we get here at all means that we didn't pass an expected output size,
			
@@ -154,7 +154,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 
				 	TODO: Make these return compress.Error.
			
 
				 */
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
			
 
				 	/*
			
 
				 		Resize if needed.
			
@@ -173,7 +173,7 @@ write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_ch
 
				 	return .None
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check {
			
 
				 	/*
			
 
				 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
			
@@ -201,7 +201,7 @@ repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check
 
				 	return .None
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
			
 
				 	/*
			
 
				 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
			
@@ -234,8 +234,8 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
 
				 	return new(Huffman_Table, allocator), nil
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				-build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
			
 
				+@(optimization_mode="favor_size")
			
 
				+build_huffman :: #force_no_inline proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
			
 
				 	sizes:     [HUFFMAN_MAX_BITS+1]int
			
 
				 	next_code: [HUFFMAN_MAX_BITS+1]int
			
 
				 
			
@@ -293,7 +293,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
			
 
				 	code := u16(compress.peek_bits_lsb(z,16))
			
 
				 
			
@@ -324,7 +324,7 @@ decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Erro
 
				 	return r, nil
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
			
 
				 	if z.num_bits < 16 {
			
 
				 		if z.num_bits > 63 {
			
@@ -344,7 +344,7 @@ decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bo
 
				 	return decode_huffman_slowpath(z, t)
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
			
 
				 	#no_bounds_check for {
			
 
				 		value, e := decode_huffman(z, z_repeat)
			
@@ -413,7 +413,7 @@ parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
			
 
				 	/*
			
 
				 		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
			
@@ -486,7 +486,7 @@ inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := f
 
				 
			
 
				 // TODO: Check alignment of reserve/resize.
			
 
				 
			
 
				-@(optimization_mode="speed")
			
 
				+@(optimization_mode="favor_size")
			
 
				 inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
			
 
				 	context.allocator = allocator
			
 
				 	expected_output_size := expected_output_size
			
@@ -670,4 +670,4 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := fals
 
				 	return inflate_raw(&ctx, expected_output_size=expected_output_size)
			
 
				 }
			
 
				 
			
 
				-inflate :: proc{inflate_from_context, inflate_from_byte_array}
			
 
				+inflate :: proc{inflate_from_context, inflate_from_byte_array}
			
--- a/core/container/avl/avl.odin
+++ b/core/container/avl/avl.odin
@@ -5,13 +5,10 @@ The implementation is non-intrusive, and non-recursive.
 
				 */
			
 
				 package container_avl
			
 
				 
			
 
				-import "base:intrinsics"
			
 
				-import "base:runtime"
			
 
				+@(require) import "base:intrinsics"
			
 
				+@(require) import "base:runtime"
			
 
				 import "core:slice"
			
 
				 
			
 
				-_ :: intrinsics
			
 
				-_ :: runtime
			
 
				-
			
 
				 // Originally based on the CC0 implementation by Eric Biggers
			
 
				 // See: https://github.com/ebiggers/avl_tree/
			
 
				 
			
@@ -90,7 +87,7 @@ init_cmp :: proc(
 
				 init_ordered :: proc(
			
 
				 	t: ^$T/Tree($Value),
			
 
				 	node_allocator := context.allocator,
			
 
				-) where intrinsics.type_is_ordered_numeric(Value) {
			
 
				+) where intrinsics.type_is_ordered(Value) {
			
 
				 	init_cmp(t, slice.cmp_proc(Value), node_allocator)
			
 
				 }
			
 
				 
			
@@ -675,4 +672,4 @@ iterator_first :: proc "contextless" (it: ^Iterator($Value)) {
 
				 	if it._cur != nil {
			
 
				 		it._next = node_next_or_prev_in_order(it._cur, it._direction)
			
 
				 	}
			
 
				-}
			
 
				+}
			
--- a/core/container/bit_array/bit_array.odin
+++ b/core/container/bit_array/bit_array.odin
@@ -210,8 +210,11 @@ set :: proc(ba: ^Bit_Array, #any_int index: uint, set_to: bool = true, allocator
 
				 
			
 
				 	ba.max_index = max(idx, ba.max_index)
			
 
				 
			
 
				-	if set_to{ ba.bits[leg_index] |= 1 << uint(bit_index) }
			
 
				-	else { ba.bits[leg_index] &= ~(1 << uint(bit_index)) }
			
 
				+	if set_to {
			
 
				+		ba.bits[leg_index] |=  1 << uint(bit_index)
			
 
				+	} else {
			
 
				+		ba.bits[leg_index] &~= 1 << uint(bit_index)
			
 
				+	}
			
 
				 
			
 
				 	return true
			
 
				 }
			
@@ -253,7 +256,7 @@ Inputs:
 
				 - index: Which bit in the array
			
 
				 */
			
 
				 unsafe_unset :: proc(b: ^Bit_Array, bit: int) #no_bounds_check {
			
 
				-	b.bits[bit >> INDEX_SHIFT] &= ~(1 << uint(bit & INDEX_MASK))
			
 
				+	b.bits[bit >> INDEX_SHIFT] &~= 1 << uint(bit & INDEX_MASK)
			
 
				 }
			
 
				 /*
			
 
				 A helper function to create a Bit Array with optional bias, in case your smallest index is non-zero (including negative).
			
--- a/core/container/intrusive/list/doc.odin
+++ b/core/container/intrusive/list/doc.odin
@@ -0,0 +1,46 @@
 
				+/*
			
 
				+Package list implements an intrusive doubly-linked list.
			
 
				+
			
 
				+An intrusive container requires a `Node` to be embedded in your own structure, like this:
			
 
				+
			
 
				+	My_String :: struct {
			
 
				+		node:  list.Node,
			
 
				+		value: string,
			
 
				+	}
			
 
				+
			
 
				+Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed:
			
 
				+
			
 
				+	My_String :: struct {
			
 
				+		using node: list.Node,
			
 
				+		value: string,
			
 
				+	}
			
 
				+
			
 
				+Here is a full example:
			
 
				+
			
 
				+	package test
			
 
				+	
			
 
				+	import "core:fmt"
			
 
				+	import "core:container/intrusive/list"
			
 
				+	
			
 
				+	main :: proc() {
			
 
				+	    l: list.List
			
 
				+	
			
 
				+	    one := My_String{value="Hello"}
			
 
				+	    two := My_String{value="World"}
			
 
				+	
			
 
				+	    list.push_back(&l, &one.node)
			
 
				+	    list.push_back(&l, &two.node)
			
 
				+	
			
 
				+	    iter := list.iterator_head(l, My_String, "node")
			
 
				+	    for s in list.iterate_next(&iter) {
			
 
				+	        fmt.println(s.value)
			
 
				+	    }
			
 
				+	}
			
 
				+	
			
 
				+	My_String :: struct {
			
 
				+	    node:  list.Node,
			
 
				+	    value: string,
			
 
				+	}
			
 
				+
			
 
				+*/
			
 
				+package container_intrusive_list
			
--- a/core/container/intrusive/list/intrusive_list.odin
+++ b/core/container/intrusive/list/intrusive_list.odin
@@ -18,11 +18,18 @@ List :: struct {
 
				 	tail: ^Node,
			
 
				 }
			
 
				 
			
 
				-
			
 
				+// The list link you must include in your own structure.
			
 
				 Node :: struct {
			
 
				 	prev, next: ^Node,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Inserts a new element at the front of the list with O(1) time complexity.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- node: The node member of the user-defined element structure
			
 
				+*/
			
 
				 push_front :: proc "contextless" (list: ^List, node: ^Node) {
			
 
				 	if list.head != nil {
			
 
				 		list.head.prev = node
			
@@ -33,7 +40,13 @@ push_front :: proc "contextless" (list: ^List, node: ^Node) {
 
				 		node.prev, node.next = nil, nil
			
 
				 	}
			
 
				 }
			
 
				+/*
			
 
				+Inserts a new element at the back of the list with O(1) time complexity.
			
 
				 
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- node: The node member of the user-defined element structure
			
 
				+*/
			
 
				 push_back :: proc "contextless" (list: ^List, node: ^Node) {
			
 
				 	if list.tail != nil {
			
 
				 		list.tail.next = node
			
@@ -45,6 +58,13 @@ push_back :: proc "contextless" (list: ^List, node: ^Node) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes an element from a list with O(1) time complexity.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- node: The node member of the user-defined element structure to be removed
			
 
				+*/
			
 
				 remove :: proc "contextless" (list: ^List, node: ^Node) {
			
 
				 	if node != nil {
			
 
				 		if node.next != nil {
			
@@ -61,7 +81,13 @@ remove :: proc "contextless" (list: ^List, node: ^Node) {
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				+/*
			
 
				+Removes from the given list all elements that satisfy a condition with O(N) time complexity.
			
 
				 
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- to_erase: The condition procedure. It should return `true` if a node should be removed, `false` otherwise
			
 
				+*/
			
 
				 remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
			
 
				 	for node := list.head; node != nil; {
			
 
				 		next := node.next
			
@@ -82,7 +108,13 @@ remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
 
				 		node = next
			
 
				 	}
			
 
				 }
			
 
				+/*
			
 
				+Removes from the given list all elements that satisfy a condition with O(N) time complexity.
			
 
				 
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- to_erase: The _contextless_ condition procedure. It should return `true` if a node should be removed, `false` otherwise
			
 
				+*/
			
 
				 remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^Node) -> bool) {
			
 
				 	for node := list.head; node != nil; {
			
 
				 		next := node.next
			
@@ -104,12 +136,26 @@ remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^N
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Checks whether the given list does not contain any element.
			
 
				 
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				 
			
 
				+**Returns** `true` if `list` is empty, `false` otherwise
			
 
				+*/
			
 
				 is_empty :: proc "contextless" (list: ^List) -> bool {
			
 
				 	return list.head == nil
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Removes and returns the element at the front of the list with O(1) time complexity.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+
			
 
				+**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
			
 
				+*/
			
 
				 pop_front :: proc "contextless" (list: ^List) -> ^Node {
			
 
				 	link := list.head
			
 
				 	if link == nil {
			
@@ -130,6 +176,14 @@ pop_front :: proc "contextless" (list: ^List) -> ^Node {
 
				 	return link
			
 
				 
			
 
				 }
			
 
				+/*
			
 
				+Removes and returns the element at the back of the list with O(1) time complexity.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+
			
 
				+**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
			
 
				+*/
			
 
				 pop_back :: proc "contextless" (list: ^List) -> ^Node {
			
 
				 	link := list.tail
			
 
				 	if link == nil {
			
@@ -151,29 +205,102 @@ pop_back :: proc "contextless" (list: ^List) -> ^Node {
 
				 }
			
 
				 
			
 
				 
			
 
				+
			
 
				 Iterator :: struct($T: typeid) {
			
 
				 	curr:   ^Node,
			
 
				 	offset: uintptr,
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Creates an iterator pointing at the head of the given list. For an example, see `iterate_next`.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- T: The type of the list's elements
			
 
				+- field_name: The name of the node field in the `T` structure
			
 
				+
			
 
				+**Returns** An iterator pointing at the head of `list`
			
 
				+
			
 
				+*/
			
 
				 iterator_head :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
			
 
				 	where intrinsics.type_has_field(T, field_name),
			
 
				 	      intrinsics.type_field_type(T, field_name) == Node {
			
 
				 	return {list.head, offset_of_by_string(T, field_name)}
			
 
				 }
			
 
				+/*
			
 
				+Creates an iterator pointing at the tail of the given list. For an example, see `iterate_prev`.
			
 
				+
			
 
				+**Inputs**
			
 
				+- list: The container list
			
 
				+- T: The type of the list's elements
			
 
				+- field_name: The name of the node field in the `T` structure
			
 
				 
			
 
				+**Returns** An iterator pointing at the tail of `list`
			
 
				+
			
 
				+*/
			
 
				 iterator_tail :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
			
 
				 	where intrinsics.type_has_field(T, field_name),
			
 
				 	      intrinsics.type_field_type(T, field_name) == Node {
			
 
				 	return {list.tail, offset_of_by_string(T, field_name)}
			
 
				 }
			
 
				+/*
			
 
				+Creates an iterator pointing at the specified node of a list.
			
 
				+
			
 
				+**Inputs**
			
 
				+- node: a list node
			
 
				+- T: The type of the list's elements
			
 
				+- field_name: The name of the node field in the `T` structure
			
 
				+
			
 
				+**Returns** An iterator pointing at `node`
			
 
				 
			
 
				+*/
			
 
				 iterator_from_node :: proc "contextless" (node: ^Node, $T: typeid, $field_name: string) -> Iterator(T)
			
 
				 	where intrinsics.type_has_field(T, field_name),
			
 
				 	      intrinsics.type_field_type(T, field_name) == Node {
			
 
				 	return {node, offset_of_by_string(T, field_name)}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+Retrieves the next element in a list and advances the iterator.
			
 
				+
			
 
				+**Inputs**  
			
 
				+- it: The iterator
			
 
				+
			
 
				+**Returns**
			
 
				+- ptr: The next list element
			
 
				+- ok: `true` if the element is valid (the iterator could advance), `false` otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:fmt"
			
 
				+	import "core:container/intrusive/list"
			
 
				+
			
 
				+	iterate_next_example :: proc() {
			
 
				+		l: list.List
			
 
				+
			
 
				+		one := My_Struct{value=1}
			
 
				+		two := My_Struct{value=2}
			
 
				+
			
 
				+		list.push_back(&l, &one.node)
			
 
				+		list.push_back(&l, &two.node)
			
 
				+
			
 
				+		it := list.iterator_head(l, My_Struct, "node")
			
 
				+		for num in list.iterate_next(&it) {
			
 
				+			fmt.println(num.value)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	My_Struct :: struct {
			
 
				+		node : list.Node,
			
 
				+		value: int,
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	1
			
 
				+	2
			
 
				+
			
 
				+*/
			
 
				 iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
			
 
				 	node := it.curr
			
 
				 	if node == nil {
			
@@ -183,7 +310,47 @@ iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
 
				 
			
 
				 	return (^T)(uintptr(node) - it.offset), true
			
 
				 }
			
 
				+/*
			
 
				+Retrieves the previous element in a list and recede the iterator.
			
 
				+
			
 
				+**Inputs**  
			
 
				+- it: The iterator
			
 
				+
			
 
				+**Returns**
			
 
				+- ptr: The previous list element
			
 
				+- ok: `true` if the element is valid (the iterator could recede), `false` otherwise
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+	import "core:fmt"
			
 
				+	import "core:container/intrusive/list"
			
 
				 
			
 
				+	iterate_next_example :: proc() {
			
 
				+		l: list.List
			
 
				+
			
 
				+		one := My_Struct{value=1}
			
 
				+		two := My_Struct{value=2}
			
 
				+
			
 
				+		list.push_back(&l, &one.node)
			
 
				+		list.push_back(&l, &two.node)
			
 
				+
			
 
				+		it := list.iterator_tail(l, My_Struct, "node")
			
 
				+		for num in list.iterate_prev(&it) {
			
 
				+			fmt.println(num.value)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	My_Struct :: struct {
			
 
				+		node : list.Node,
			
 
				+		value: int,
			
 
				+	}
			
 
				+
			
 
				+Output:
			
 
				+
			
 
				+	2
			
 
				+	1
			
 
				+
			
 
				+*/
			
 
				 iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
			
 
				 	node := it.curr
			
 
				 	if node == nil {
			
@@ -192,4 +359,4 @@ iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
 
				 	it.curr = node.prev
			
 
				 
			
 
				 	return (^T)(uintptr(node) - it.offset), true
			
 
				-}
			
 
				+}
			
--- a/core/container/lru/lru_cache.odin
+++ b/core/container/lru/lru_cache.odin
@@ -70,8 +70,7 @@ set :: proc(c: ^$C/Cache($Key, $Value), key: Key, value: Value) -> runtime.Alloc
 
				 	if c.count == c.capacity {
			
 
				 		e = c.tail
			
 
				 		_remove_node(c, e)
			
 
				-	}
			
 
				-	else {
			
 
				+	} else {
			
 
				 		c.count += 1
			
 
				 		e = new(Node(Key, Value), c.node_allocator) or_return
			
 
				 	}
			
--- a/core/container/queue/queue.odin
+++ b/core/container/queue/queue.odin
@@ -95,11 +95,11 @@ front_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
 
				 }
			
 
				 
			
 
				 back :: proc(q: ^$Q/Queue($T)) -> T {
			
 
				-	idx := (q.offset+uint(q.len))%builtin.len(q.data)
			
 
				+	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
			
 
				 	return q.data[idx]
			
 
				 }
			
 
				 back_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
			
 
				-	idx := (q.offset+uint(q.len))%builtin.len(q.data)
			
 
				+	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
			
 
				 	return &q.data[idx]
			
 
				 }
			
 
				 
			
@@ -189,7 +189,7 @@ pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				-// Push multiple elements to the front of the queue
			
 
				+// Push multiple elements to the back of the queue
			
 
				 push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime.Allocator_Error)  {
			
 
				 	n := uint(builtin.len(elems))
			
 
				 	if space(q^) < int(n) {
			
@@ -241,7 +241,7 @@ clear :: proc(q: ^$Q/Queue($T)) {
 
				 }
			
 
				 
			
 
				 
			
 
				-// Internal growinh procedure
			
 
				+// Internal growing procedure
			
 
				 _grow :: proc(q: ^$Q/Queue($T), min_capacity: uint = 0) -> runtime.Allocator_Error {
			
 
				 	new_capacity := max(min_capacity, uint(8), uint(builtin.len(q.data))*2)
			
 
				 	n := uint(builtin.len(q.data))
			
--- a/core/container/rbtree/rbtree.odin
+++ b/core/container/rbtree/rbtree.odin
@@ -0,0 +1,568 @@
 
				+// This package implements a red-black tree
			
 
				+package container_rbtree
			
 
				+
			
 
				+@(require) import "base:intrinsics"
			
 
				+@(require) import "base:runtime"
			
 
				+import "core:slice"
			
 
				+
			
 
				+// Originally based on the CC0 implementation from literateprograms.org
			
 
				+// But with API design mimicking `core:container/avl` for ease of use.
			
 
				+
			
 
				+// Direction specifies the traversal direction for a tree iterator.
			
 
				+Direction :: enum i8 {
			
 
				+	// Backward is the in-order backwards direction.
			
 
				+	Backward = -1,
			
 
				+	// Forward is the in-order forwards direction.
			
 
				+	Forward  = 1,
			
 
				+}
			
 
				+
			
 
				+Ordering :: slice.Ordering
			
 
				+
			
 
				+// Tree is a red-black tree
			
 
				+Tree :: struct($Key: typeid, $Value: typeid) {
			
 
				+	// user_data is a parameter that will be passed to the on_remove
			
 
				+	// callback.
			
 
				+	user_data: rawptr,
			
 
				+	// on_remove is an optional callback that can be called immediately
			
 
				+	// after a node is removed from the tree.
			
 
				+	on_remove: proc(key: Key, value: Value, user_data: rawptr),
			
 
				+
			
 
				+	_root:           ^Node(Key, Value),
			
 
				+	_node_allocator: runtime.Allocator,
			
 
				+	_cmp_fn:         proc(Key, Key) -> Ordering,
			
 
				+	_size:           int,
			
 
				+}
			
 
				+
			
 
				+// Node is a red-black tree node.
			
 
				+//
			
 
				+// WARNING: It is unsafe to mutate value if the node is part of a tree
			
 
				+// if doing so will alter the Node's sort position relative to other
			
 
				+// elements in the tree.
			
 
				+Node :: struct($Key: typeid, $Value: typeid) {
			
 
				+	key:    Key,
			
 
				+	value:  Value,
			
 
				+
			
 
				+	_parent: ^Node(Key, Value),
			
 
				+	_left:   ^Node(Key, Value),
			
 
				+	_right:  ^Node(Key, Value),
			
 
				+	_color:  Color,
			
 
				+}
			
 
				+
			
 
				+// Might store this in the node pointer in the future, but that'll require a decent amount of rework to pass ^^N instead of ^N
			
 
				+Color :: enum uintptr {Black = 0, Red = 1}
			
 
				+
			
 
				+// Iterator is a tree iterator.
			
 
				+//
			
 
				+// WARNING: It is unsafe to modify the tree while iterating, except via
			
 
				+// the iterator_remove method.
			
 
				+Iterator :: struct($Key: typeid, $Value: typeid) {
			
 
				+	_tree:        ^Tree(Key, Value),
			
 
				+	_cur:         ^Node(Key, Value),
			
 
				+	_next:        ^Node(Key, Value),
			
 
				+	_direction:   Direction,
			
 
				+	_called_next: bool,
			
 
				+}
			
 
				+
			
 
				+// init initializes a tree.
			
 
				+init :: proc {
			
 
				+	init_ordered,
			
 
				+	init_cmp,
			
 
				+}
			
 
				+
			
 
				+// init_cmp initializes a tree.
			
 
				+init_cmp :: proc(t: ^$T/Tree($Key, $Value), cmp_fn: proc(a, b: Key) -> Ordering, node_allocator := context.allocator) {
			
 
				+	t._root   = nil
			
 
				+	t._node_allocator = node_allocator
			
 
				+	t._cmp_fn = cmp_fn
			
 
				+	t._size = 0
			
 
				+}
			
 
				+
			
 
				+// init_ordered initializes a tree containing ordered keys, with
			
 
				+// a comparison function that results in an ascending order sort.
			
 
				+init_ordered :: proc(t: ^$T/Tree($Key, $Value), node_allocator := context.allocator) where intrinsics.type_is_ordered(Key) {
			
 
				+	init_cmp(t, slice.cmp_proc(Key), node_allocator)
			
 
				+}
			
 
				+
			
 
				+// destroy de-initializes a tree.
			
 
				+destroy :: proc(t: ^$T/Tree($Key, $Value), call_on_remove: bool = true) {
			
 
				+	iter := iterator(t, .Forward)
			
 
				+	for _ in iterator_next(&iter) {
			
 
				+		iterator_remove(&iter, call_on_remove)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+len :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> (node_count: int) {
			
 
				+	return t._size
			
 
				+}
			
 
				+
			
 
				+// first returns the first node in the tree (in-order) or nil iff
			
 
				+// the tree is empty.
			
 
				+first :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> ^Node(Key, Value) {
			
 
				+	return tree_first_or_last_in_order(t, Direction.Backward)
			
 
				+}
			
 
				+
			
 
				+// last returns the last element in the tree (in-order) or nil iff
			
 
				+// the tree is empty.
			
 
				+last :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> ^Node(Key, Value) {
			
 
				+	return tree_first_or_last_in_order(t, Direction.Forward)
			
 
				+}
			
 
				+
			
 
				+// find finds the key in the tree, and returns the corresponding node, or nil iff the value is not present.
			
 
				+find :: proc(t: ^$T/Tree($Key, $Value), key: Key) -> (node: ^Node(Key, Value)) {
			
 
				+	node = t._root
			
 
				+	for node != nil {
			
 
				+		switch t._cmp_fn(key, node.key) {
			
 
				+		case .Equal:   return node
			
 
				+		case .Less:    node = node._left
			
 
				+		case .Greater: node = node._right
			
 
				+		}
			
 
				+	}
			
 
				+	return node
			
 
				+}
			
 
				+
			
 
				+// find_value finds the key in the tree, and returns the corresponding value, or nil iff the value is not present.
			
 
				+find_value :: proc(t: ^$T/Tree($Key, $Value), key: Key) -> (value: Value, ok: bool) #optional_ok {
			
 
				+	if n := find(t, key); n != nil {
			
 
				+		return n.value, true
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// find_or_insert attempts to insert the value into the tree, and returns
			
 
				+// the node, a boolean indicating if the value was inserted, and the
			
 
				+// node allocator error if relevant.  If the value is already present, the existing node is updated.
			
 
				+find_or_insert :: proc(t: ^$T/Tree($Key, $Value), key: Key, value: Value) -> (n: ^Node(Key, Value), inserted: bool, err: runtime.Allocator_Error) {
			
 
				+	n_ptr := &t._root
			
 
				+	for n_ptr^ != nil {
			
 
				+		n = n_ptr^
			
 
				+		switch t._cmp_fn(key, n.key) {
			
 
				+		case .Less:
			
 
				+			n_ptr = &n._left
			
 
				+		case .Greater:
			
 
				+			n_ptr = &n._right
			
 
				+		case .Equal:
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+	_parent := n
			
 
				+
			
 
				+	n = new_clone(Node(Key, Value){key=key, value=value, _parent=_parent, _color=.Red}, t._node_allocator) or_return
			
 
				+	n_ptr^ = n
			
 
				+	insert_case1(t, n)
			
 
				+	t._size += 1
			
 
				+	return n, true, nil
			
 
				+}
			
 
				+
			
 
				+// remove removes a node or value from the tree, and returns true iff the
			
 
				+// removal was successful.  While the node's value will be left intact,
			
 
				+// the node itself will be freed via the tree's node allocator.
			
 
				+remove :: proc {
			
 
				+	remove_key,
			
 
				+	remove_node,
			
 
				+}
			
 
				+
			
 
				+// remove_value removes a value from the tree, and returns true iff the
			
 
				+// removal was successful.  While the node's key + value will be left intact,
			
 
				+// the node itself will be freed via the tree's node allocator.
			
 
				+remove_key :: proc(t: ^$T/Tree($Key, $Value), key: Key, call_on_remove := true) -> bool {
			
 
				+	n := find(t, key)
			
 
				+	if n == nil {
			
 
				+		return false // Key not found, nothing to do
			
 
				+	}
			
 
				+	return remove_node(t, n, call_on_remove)
			
 
				+}
			
 
				+
			
 
				+// remove_node removes a node from the tree, and returns true iff the
			
 
				+// removal was successful.  While the node's key + value will be left intact,
			
 
				+// the node itself will be freed via the tree's node allocator.
			
 
				+remove_node :: proc(t: ^$T/Tree($Key, $Value), node: ^$N/Node(Key, Value), call_on_remove := true) -> (found: bool) {
			
 
				+	if node._parent == node || (node._parent == nil && t._root != node) {
			
 
				+		return false // Don't touch self-parented or dangling nodes.
			
 
				+	}
			
 
				+	node := node
			
 
				+	if node._left != nil && node._right != nil {
			
 
				+		// Copy key + value from predecessor and delete it instead
			
 
				+		predecessor := maximum_node(node._left)
			
 
				+		node.key   = predecessor.key
			
 
				+		node.value = predecessor.value
			
 
				+		node = predecessor
			
 
				+	}
			
 
				+
			
 
				+	child := node._right == nil ? node._left : node._right
			
 
				+	if node_color(node) == .Black {
			
 
				+		node._color = node_color(child)
			
 
				+		remove_case1(t, node)
			
 
				+	}
			
 
				+	replace_node(t, node, child)
			
 
				+	if node._parent == nil && child != nil {
			
 
				+		child._color = .Black // root should be black
			
 
				+	}
			
 
				+
			
 
				+	if call_on_remove && t.on_remove != nil {
			
 
				+		t.on_remove(node.key, node.value, t.user_data)
			
 
				+	}
			
 
				+	free(node, t._node_allocator)
			
 
				+	t._size -= 1
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+// iterator returns a tree iterator in the specified direction.
			
 
				+iterator :: proc "contextless" (t: ^$T/Tree($Key, $Value), direction: Direction) -> Iterator(Key, Value) {
			
 
				+	it: Iterator(Key, Value)
			
 
				+	it._tree      = cast(^Tree(Key, Value))t
			
 
				+	it._direction = direction
			
 
				+
			
 
				+	iterator_first(&it)
			
 
				+
			
 
				+	return it
			
 
				+}
			
 
				+
			
 
				+// iterator_from_pos returns a tree iterator in the specified direction,
			
 
				+// spanning the range [pos, last] (inclusive).
			
 
				+iterator_from_pos :: proc "contextless" (t: ^$T/Tree($Key, $Value), pos: ^Node(Key, Value), direction: Direction) -> Iterator(Key, Value) {
			
 
				+	it: Iterator(Key, Value)
			
 
				+	it._tree        = transmute(^Tree(Key, Value))t
			
 
				+	it._direction   = direction
			
 
				+	it._next        = nil
			
 
				+	it._called_next = false
			
 
				+
			
 
				+	if it._cur = pos; pos != nil {
			
 
				+		it._next = node_next_or_prev_in_order(it._cur, it._direction)
			
 
				+	}
			
 
				+
			
 
				+	return it
			
 
				+}
			
 
				+
			
 
				+// iterator_get returns the node currently pointed to by the iterator,
			
 
				+// or nil iff the node has been removed, the tree is empty, or the end
			
 
				+// of the tree has been reached.
			
 
				+iterator_get :: proc "contextless" (it: ^$I/Iterator($Key, $Value)) -> ^Node(Key, Value) {
			
 
				+	return it._cur
			
 
				+}
			
 
				+
			
 
				+// iterator_remove removes the node currently pointed to by the iterator,
			
 
				+// and returns true iff the removal was successful.  Semantics are the
			
 
				+// same as the Tree remove.
			
 
				+iterator_remove :: proc(it: ^$I/Iterator($Key, $Value), call_on_remove: bool = true) -> bool {
			
 
				+	if it._cur == nil {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	ok := remove_node(it._tree, it._cur , call_on_remove)
			
 
				+	if ok {
			
 
				+		it._cur = nil
			
 
				+	}
			
 
				+
			
 
				+	return ok
			
 
				+}
			
 
				+
			
 
				+// iterator_next advances the iterator and returns the (node, true) or
			
 
				+// or (nil, false) iff the end of the tree has been reached.
			
 
				+//
			
 
				+// Note: The first call to iterator_next will return the first node instead
			
 
				+// of advancing the iterator.
			
 
				+iterator_next :: proc "contextless" (it: ^$I/Iterator($Key, $Value)) -> (^Node(Key, Value), bool) {
			
 
				+	// This check is needed so that the first element gets returned from
			
 
				+	// a brand-new iterator, and so that the somewhat contrived case where
			
 
				+	// iterator_remove is called before the first call to iterator_next
			
 
				+	// returns the correct value.
			
 
				+	if !it._called_next {
			
 
				+		it._called_next = true
			
 
				+
			
 
				+		// There can be the contrived case where iterator_remove is
			
 
				+		// called before ever calling iterator_next, which needs to be
			
 
				+		// handled as an actual call to next.
			
 
				+		//
			
 
				+		// If this happens it._cur will be nil, so only return the
			
 
				+		// first value, if it._cur is valid.
			
 
				+		if it._cur != nil {
			
 
				+			return it._cur, true
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if it._next == nil {
			
 
				+		return nil, false
			
 
				+	}
			
 
				+
			
 
				+	it._cur = it._next
			
 
				+	it._next = node_next_or_prev_in_order(it._cur, it._direction)
			
 
				+
			
 
				+	return it._cur, true
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+tree_first_or_last_in_order :: proc "contextless" (t: ^$T/Tree($Key, $Value), direction: Direction) -> ^Node(Key, Value) {
			
 
				+	first, sign := t._root, i8(direction)
			
 
				+	if first != nil {
			
 
				+		for {
			
 
				+			tmp := node_get_child(first, sign)
			
 
				+			if tmp == nil {
			
 
				+				break
			
 
				+			}
			
 
				+			first = tmp
			
 
				+		}
			
 
				+	}
			
 
				+	return first
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+node_get_child :: #force_inline proc "contextless" (n: ^Node($Key, $Value), sign: i8) -> ^Node(Key, Value) {
			
 
				+	if sign < 0 {
			
 
				+		return n._left
			
 
				+	}
			
 
				+	return n._right
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+node_next_or_prev_in_order :: proc "contextless" (n: ^Node($Key, $Value), direction: Direction) -> ^Node(Key, Value) {
			
 
				+	next, tmp: ^Node(Key, Value)
			
 
				+	sign := i8(direction)
			
 
				+
			
 
				+	if next = node_get_child(n, +sign); next != nil {
			
 
				+		for {
			
 
				+			tmp = node_get_child(next, -sign)
			
 
				+			if tmp == nil {
			
 
				+				break
			
 
				+			}
			
 
				+			next = tmp
			
 
				+		}
			
 
				+	} else {
			
 
				+		tmp, next = n, n._parent
			
 
				+		for next != nil && tmp == node_get_child(next, +sign) {
			
 
				+			tmp, next = next, next._parent
			
 
				+		}
			
 
				+	}
			
 
				+	return next
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+iterator_first :: proc "contextless" (it: ^Iterator($Key, $Value)) {
			
 
				+	// This is private because behavior when the user manually calls
			
 
				+	// iterator_first followed by iterator_next is unintuitive, since
			
 
				+	// the first call to iterator_next MUST return the first node
			
 
				+	// instead of advancing so that `for node in iterator_next(&next)`
			
 
				+	// works as expected.
			
 
				+
			
 
				+	switch it._direction {
			
 
				+	case .Forward:
			
 
				+		it._cur = tree_first_or_last_in_order(it._tree, .Backward)
			
 
				+	case .Backward:
			
 
				+		it._cur = tree_first_or_last_in_order(it._tree, .Forward)
			
 
				+	}
			
 
				+
			
 
				+	it._next = nil
			
 
				+	it._called_next = false
			
 
				+
			
 
				+	if it._cur != nil {
			
 
				+		it._next = node_next_or_prev_in_order(it._cur, it._direction)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+grand_parent :: proc(n: ^$N/Node($Key, $Value)) -> (g: ^N) {
			
 
				+	return n._parent._parent
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+sibling :: proc(n: ^$N/Node($Key, $Value)) -> (s: ^N) {
			
 
				+	if n == n._parent._left {
			
 
				+		return n._parent._right
			
 
				+		} else {
			
 
				+			return n._parent._left
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+uncle :: proc(n: ^$N/Node($Key, $Value)) -> (u: ^N) {
			
 
				+	return sibling(n._parent)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+rotate__left :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	r := n._right
			
 
				+	replace_node(t, n, r)
			
 
				+	n._right = r._left
			
 
				+	if r._left != nil {
			
 
				+		r._left._parent = n
			
 
				+	}
			
 
				+	r._left   = n
			
 
				+	n._parent = r
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+rotate__right :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	l := n._left
			
 
				+	replace_node(t, n, l)
			
 
				+	n._left = l._right
			
 
				+	if l._right != nil {
			
 
				+		l._right._parent = n
			
 
				+	}
			
 
				+	l._right  = n
			
 
				+	n._parent = l
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+replace_node :: proc(t: ^$T/Tree($Key, $Value), old_n: ^$N/Node(Key, Value), new_n: ^N) {
			
 
				+	if old_n._parent == nil {
			
 
				+		t._root = new_n
			
 
				+	} else {
			
 
				+		if (old_n == old_n._parent._left) {
			
 
				+			old_n._parent._left  = new_n
			
 
				+		} else {
			
 
				+			old_n._parent._right = new_n
			
 
				+		}
			
 
				+	}
			
 
				+	if new_n != nil {
			
 
				+		new_n._parent = old_n._parent
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+insert_case1 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if n._parent == nil {
			
 
				+		n._color = .Black
			
 
				+	} else {
			
 
				+		insert_case2(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+insert_case2 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if node_color(n._parent) == .Black {
			
 
				+		return // Tree is still valid
			
 
				+	} else {
			
 
				+		insert_case3(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+insert_case3 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if node_color(uncle(n)) == .Red {
			
 
				+		n._parent._color       = .Black
			
 
				+		uncle(n)._color       = .Black
			
 
				+		grand_parent(n)._color = .Red
			
 
				+		insert_case1(t, grand_parent(n))
			
 
				+	} else {
			
 
				+		insert_case4(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+insert_case4 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	n := n
			
 
				+	if n == n._parent._right && n._parent == grand_parent(n)._left {
			
 
				+		rotate__left(t, n._parent)
			
 
				+		n = n._left
			
 
				+	} else if n == n._parent._left && n._parent == grand_parent(n)._right {
			
 
				+		rotate__right(t, n._parent)
			
 
				+		n = n._right
			
 
				+	}
			
 
				+	insert_case5(t, n)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+insert_case5 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	n._parent._color = .Black
			
 
				+	grand_parent(n)._color = .Red
			
 
				+	if n == n._parent._left && n._parent == grand_parent(n)._left {
			
 
				+		rotate__right(t, grand_parent(n))
			
 
				+	} else {
			
 
				+		rotate__left(t, grand_parent(n))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// The maximum_node() helper function just walks _right until it reaches the last non-leaf:
			
 
				+@(private)
			
 
				+maximum_node :: proc(n: ^$N/Node($Key, $Value)) -> (max_node: ^N) {
			
 
				+	n := n
			
 
				+	for n._right != nil {
			
 
				+		n = n._right
			
 
				+	}
			
 
				+	return n
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case1 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if n._parent == nil {
			
 
				+		return
			
 
				+	} else {
			
 
				+		remove_case2(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case2 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if node_color(sibling(n)) == .Red {
			
 
				+		n._parent._color = .Red
			
 
				+		sibling(n)._color = .Black
			
 
				+		if n == n._parent._left {
			
 
				+			rotate__left(t, n._parent)
			
 
				+		} else {
			
 
				+			rotate__right(t, n._parent)
			
 
				+		}
			
 
				+	}
			
 
				+	remove_case3(t, n)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case3 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if node_color(n._parent) == .Black &&
			
 
				+		node_color(sibling(n)) == .Black &&
			
 
				+		node_color(sibling(n)._left) == .Black &&
			
 
				+		node_color(sibling(n)._right) == .Black {
			
 
				+			sibling(n)._color = .Red
			
 
				+			remove_case1(t, n._parent)
			
 
				+	} else {
			
 
				+		remove_case4(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case4 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if node_color(n._parent) == .Red &&
			
 
				+		node_color(sibling(n)) == .Black &&
			
 
				+		node_color(sibling(n)._left) == .Black &&
			
 
				+		node_color(sibling(n)._right) == .Black {
			
 
				+			sibling(n)._color = .Red
			
 
				+			n._parent._color = .Black
			
 
				+	} else {
			
 
				+		remove_case5(t, n)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case5 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	if n == n._parent._left &&
			
 
				+		node_color(sibling(n)) == .Black &&
			
 
				+		node_color(sibling(n)._left) == .Red &&
			
 
				+		node_color(sibling(n)._right) == .Black {
			
 
				+			sibling(n)._color = .Red
			
 
				+			sibling(n)._left._color = .Black
			
 
				+			rotate__right(t, sibling(n))
			
 
				+	} else if n == n._parent._right &&
			
 
				+		node_color(sibling(n)) == .Black &&
			
 
				+		node_color(sibling(n)._right) == .Red &&
			
 
				+		node_color(sibling(n)._left) == .Black {
			
 
				+			sibling(n)._color = .Red
			
 
				+			sibling(n)._right._color = .Black
			
 
				+			rotate__left(t, sibling(n))
			
 
				+	}
			
 
				+	remove_case6(t, n)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+remove_case6 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
			
 
				+	sibling(n)._color = node_color(n._parent)
			
 
				+	n._parent._color = .Black
			
 
				+	if n == n._parent._left {
			
 
				+		sibling(n)._right._color = .Black
			
 
				+		rotate__left(t, n._parent)
			
 
				+	} else {
			
 
				+		sibling(n)._left._color = .Black
			
 
				+		rotate__right(t, n._parent)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+node_color :: proc(n: ^$N/Node($Key, $Value)) -> (c: Color) {
			
 
				+	return n == nil ? .Black : n._color
			
 
				+}
			
--- a/core/container/small_array/small_array.odin
+++ b/core/container/small_array/small_array.odin
@@ -119,20 +119,20 @@ consume :: proc "odin" (a: ^$A/Small_Array($N, $T), count: int, loc := #caller_l
 
				 }
			
 
				 
			
 
				 ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
			
 
				-    runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				-    if index+1 < a.len {
			
 
				+	runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				+	if index+1 < a.len {
			
 
				 		copy(a.data[index:], a.data[index+1:])
			
 
				 	}
			
 
				 	a.len -= 1
			
 
				 }
			
 
				 
			
 
				 unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
			
 
				-    runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				+	runtime.bounds_check_error_loc(loc, index, a.len)
			
 
				 	n := a.len-1
			
 
				 	if index != n {
			
 
				 		a.data[index] = a.data[n]
			
 
				 	}
			
 
				-    a.len -= 1
			
 
				+	a.len -= 1
			
 
				 }
			
 
				 
			
 
				 clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {
			
--- a/core/container/topological_sort/topological_sort.odin
+++ b/core/container/topological_sort/topological_sort.odin
@@ -61,7 +61,7 @@ add_dependency :: proc(sorter: ^$S/Sorter($K), key, dependency: K) -> bool {
 
				 	}
			
 
				 	find.dependents[key] = true
			
 
				 
			
 
				- 	find = &sorter.relations[key]
			
 
				+	find = &sorter.relations[key]
			
 
				 	if find == nil {
			
 
				 		find = map_insert(&sorter.relations, key, make_relations(sorter))
			
 
				 	}
			
--- a/core/crypto/_aes/aes.odin
+++ b/core/crypto/_aes/aes.odin
@@ -0,0 +1,28 @@
 
				+package _aes
			
 
				+
			
 
				+// KEY_SIZE_128 is the AES-128 key size in bytes.
			
 
				+KEY_SIZE_128 :: 16
			
 
				+// KEY_SIZE_192 is the AES-192 key size in bytes.
			
 
				+KEY_SIZE_192 :: 24
			
 
				+// KEY_SIZE_256 is the AES-256 key size in bytes.
			
 
				+KEY_SIZE_256 :: 32
			
 
				+
			
 
				+// BLOCK_SIZE is the AES block size in bytes.
			
 
				+BLOCK_SIZE :: 16
			
 
				+
			
 
				+// ROUNDS_128 is the number of rounds for AES-128.
			
 
				+ROUNDS_128 :: 10
			
 
				+// ROUNDS_192 is the number of rounds for AES-192.
			
 
				+ROUNDS_192 :: 12
			
 
				+// ROUNDS_256 is the number of rounds for AES-256.
			
 
				+ROUNDS_256 :: 14
			
 
				+
			
 
				+// GHASH_KEY_SIZE is the GHASH key size in bytes.
			
 
				+GHASH_KEY_SIZE :: 16
			
 
				+// GHASH_BLOCK_SIZE is the GHASH block size in bytes.
			
 
				+GHASH_BLOCK_SIZE :: 16
			
 
				+// GHASH_TAG_SIZE is the GHASH tag size in bytes.
			
 
				+GHASH_TAG_SIZE :: 16
			
 
				+
			
 
				+// RCON is the AES keyschedule round constants.
			
 
				+RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
			
--- a/core/crypto/_aes/ct64/api.odin
+++ b/core/crypto/_aes/ct64/api.odin
@@ -0,0 +1,86 @@
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:mem"
			
 
				+
			
 
				+STRIDE :: 4
			
 
				+
			
 
				+// Context is a keyed AES (ECB) instance.
			
 
				+Context :: struct {
			
 
				+	_sk_exp:     [120]u64,
			
 
				+	_num_rounds: int,
			
 
				+}
			
 
				+
			
 
				+// init initializes a context for AES with the provided key.
			
 
				+init :: proc(ctx: ^Context, key: []byte) {
			
 
				+	skey: [30]u64 = ---
			
 
				+
			
 
				+	ctx._num_rounds = keysched(skey[:], key)
			
 
				+	skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
			
 
				+}
			
 
				+
			
 
				+// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
			
 
				+encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
			
 
				+	q: [8]u64
			
 
				+	load_blockx1(&q, src)
			
 
				+	_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+	store_blockx1(dst, &q)
			
 
				+}
			
 
				+
			
 
				+// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
			
 
				+decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
			
 
				+	q: [8]u64
			
 
				+	load_blockx1(&q, src)
			
 
				+	_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+	store_blockx1(dst, &q)
			
 
				+}
			
 
				+
			
 
				+// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
			
 
				+encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
			
 
				+	q: [8]u64 = ---
			
 
				+	src, dst := src, dst
			
 
				+
			
 
				+	n := len(src)
			
 
				+	for n > 4 {
			
 
				+		load_blocks(&q, src[0:4])
			
 
				+		_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+		store_blocks(dst[0:4], &q)
			
 
				+
			
 
				+		src = src[4:]
			
 
				+		dst = dst[4:]
			
 
				+		n -= 4
			
 
				+	}
			
 
				+	if n > 0 {
			
 
				+		load_blocks(&q, src)
			
 
				+		_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+		store_blocks(dst, &q)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
			
 
				+decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
			
 
				+	q: [8]u64 = ---
			
 
				+	src, dst := src, dst
			
 
				+
			
 
				+	n := len(src)
			
 
				+	for n > 4 {
			
 
				+		load_blocks(&q, src[0:4])
			
 
				+		_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+		store_blocks(dst[0:4], &q)
			
 
				+
			
 
				+		src = src[4:]
			
 
				+		dst = dst[4:]
			
 
				+		n -= 4
			
 
				+	}
			
 
				+	if n > 0 {
			
 
				+		load_blocks(&q, src)
			
 
				+		_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
			
 
				+		store_blocks(dst, &q)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// reset sanitizes the Context.  The Context must be re-initialized to
			
 
				+// be used again.
			
 
				+reset :: proc(ctx: ^Context) {
			
 
				+	mem.zero_explicit(ctx, size_of(ctx))
			
 
				+}
			
--- a/core/crypto/_aes/ct64/ct64.odin
+++ b/core/crypto/_aes/ct64/ct64.odin
@@ -0,0 +1,265 @@
 
				+// Copyright (c) 2016 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+// Bitsliced AES for 64-bit general purpose (integer) registers.  Each
			
 
				+// invocation will process up to 4 blocks at a time.  This implementation
			
 
				+// is derived from the BearSSL ct64 code, and distributed under a 1-clause
			
 
				+// BSD license with permission from the original author.
			
 
				+//
			
 
				+// WARNING: "hic sunt dracones"
			
 
				+//
			
 
				+// This package also deliberately exposes enough internals to be able to
			
 
				+// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
			
 
				+// allow the implementation of non-AES primitives that use the AES round
			
 
				+// function such as AEGIS and Deoxys-II.  This should ONLY be done when
			
 
				+// implementing something other than AES itself.
			
 
				+
			
 
				+sub_bytes :: proc "contextless" (q: ^[8]u64) {
			
 
				+	// This S-box implementation is a straightforward translation of
			
 
				+	// the circuit described by Boyar and Peralta in "A new
			
 
				+	// combinational logic minimization technique with applications
			
 
				+	// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
			
 
				+	//
			
 
				+	// Note that variables x* (input) and s* (output) are numbered
			
 
				+	// in "reverse" order (x0 is the high bit, x7 is the low bit).
			
 
				+
			
 
				+	x0 := q[7]
			
 
				+	x1 := q[6]
			
 
				+	x2 := q[5]
			
 
				+	x3 := q[4]
			
 
				+	x4 := q[3]
			
 
				+	x5 := q[2]
			
 
				+	x6 := q[1]
			
 
				+	x7 := q[0]
			
 
				+
			
 
				+	// Top linear transformation.
			
 
				+	y14 := x3 ~ x5
			
 
				+	y13 := x0 ~ x6
			
 
				+	y9 := x0 ~ x3
			
 
				+	y8 := x0 ~ x5
			
 
				+	t0 := x1 ~ x2
			
 
				+	y1 := t0 ~ x7
			
 
				+	y4 := y1 ~ x3
			
 
				+	y12 := y13 ~ y14
			
 
				+	y2 := y1 ~ x0
			
 
				+	y5 := y1 ~ x6
			
 
				+	y3 := y5 ~ y8
			
 
				+	t1 := x4 ~ y12
			
 
				+	y15 := t1 ~ x5
			
 
				+	y20 := t1 ~ x1
			
 
				+	y6 := y15 ~ x7
			
 
				+	y10 := y15 ~ t0
			
 
				+	y11 := y20 ~ y9
			
 
				+	y7 := x7 ~ y11
			
 
				+	y17 := y10 ~ y11
			
 
				+	y19 := y10 ~ y8
			
 
				+	y16 := t0 ~ y11
			
 
				+	y21 := y13 ~ y16
			
 
				+	y18 := x0 ~ y16
			
 
				+
			
 
				+	// Non-linear section.
			
 
				+	t2 := y12 & y15
			
 
				+	t3 := y3 & y6
			
 
				+	t4 := t3 ~ t2
			
 
				+	t5 := y4 & x7
			
 
				+	t6 := t5 ~ t2
			
 
				+	t7 := y13 & y16
			
 
				+	t8 := y5 & y1
			
 
				+	t9 := t8 ~ t7
			
 
				+	t10 := y2 & y7
			
 
				+	t11 := t10 ~ t7
			
 
				+	t12 := y9 & y11
			
 
				+	t13 := y14 & y17
			
 
				+	t14 := t13 ~ t12
			
 
				+	t15 := y8 & y10
			
 
				+	t16 := t15 ~ t12
			
 
				+	t17 := t4 ~ t14
			
 
				+	t18 := t6 ~ t16
			
 
				+	t19 := t9 ~ t14
			
 
				+	t20 := t11 ~ t16
			
 
				+	t21 := t17 ~ y20
			
 
				+	t22 := t18 ~ y19
			
 
				+	t23 := t19 ~ y21
			
 
				+	t24 := t20 ~ y18
			
 
				+
			
 
				+	t25 := t21 ~ t22
			
 
				+	t26 := t21 & t23
			
 
				+	t27 := t24 ~ t26
			
 
				+	t28 := t25 & t27
			
 
				+	t29 := t28 ~ t22
			
 
				+	t30 := t23 ~ t24
			
 
				+	t31 := t22 ~ t26
			
 
				+	t32 := t31 & t30
			
 
				+	t33 := t32 ~ t24
			
 
				+	t34 := t23 ~ t33
			
 
				+	t35 := t27 ~ t33
			
 
				+	t36 := t24 & t35
			
 
				+	t37 := t36 ~ t34
			
 
				+	t38 := t27 ~ t36
			
 
				+	t39 := t29 & t38
			
 
				+	t40 := t25 ~ t39
			
 
				+
			
 
				+	t41 := t40 ~ t37
			
 
				+	t42 := t29 ~ t33
			
 
				+	t43 := t29 ~ t40
			
 
				+	t44 := t33 ~ t37
			
 
				+	t45 := t42 ~ t41
			
 
				+	z0 := t44 & y15
			
 
				+	z1 := t37 & y6
			
 
				+	z2 := t33 & x7
			
 
				+	z3 := t43 & y16
			
 
				+	z4 := t40 & y1
			
 
				+	z5 := t29 & y7
			
 
				+	z6 := t42 & y11
			
 
				+	z7 := t45 & y17
			
 
				+	z8 := t41 & y10
			
 
				+	z9 := t44 & y12
			
 
				+	z10 := t37 & y3
			
 
				+	z11 := t33 & y4
			
 
				+	z12 := t43 & y13
			
 
				+	z13 := t40 & y5
			
 
				+	z14 := t29 & y2
			
 
				+	z15 := t42 & y9
			
 
				+	z16 := t45 & y14
			
 
				+	z17 := t41 & y8
			
 
				+
			
 
				+	// Bottom linear transformation.
			
 
				+	t46 := z15 ~ z16
			
 
				+	t47 := z10 ~ z11
			
 
				+	t48 := z5 ~ z13
			
 
				+	t49 := z9 ~ z10
			
 
				+	t50 := z2 ~ z12
			
 
				+	t51 := z2 ~ z5
			
 
				+	t52 := z7 ~ z8
			
 
				+	t53 := z0 ~ z3
			
 
				+	t54 := z6 ~ z7
			
 
				+	t55 := z16 ~ z17
			
 
				+	t56 := z12 ~ t48
			
 
				+	t57 := t50 ~ t53
			
 
				+	t58 := z4 ~ t46
			
 
				+	t59 := z3 ~ t54
			
 
				+	t60 := t46 ~ t57
			
 
				+	t61 := z14 ~ t57
			
 
				+	t62 := t52 ~ t58
			
 
				+	t63 := t49 ~ t58
			
 
				+	t64 := z4 ~ t59
			
 
				+	t65 := t61 ~ t62
			
 
				+	t66 := z1 ~ t63
			
 
				+	s0 := t59 ~ t63
			
 
				+	s6 := t56 ~ ~t62
			
 
				+	s7 := t48 ~ ~t60
			
 
				+	t67 := t64 ~ t65
			
 
				+	s3 := t53 ~ t66
			
 
				+	s4 := t51 ~ t66
			
 
				+	s5 := t47 ~ t65
			
 
				+	s1 := t64 ~ ~s3
			
 
				+	s2 := t55 ~ ~t67
			
 
				+
			
 
				+	q[7] = s0
			
 
				+	q[6] = s1
			
 
				+	q[5] = s2
			
 
				+	q[4] = s3
			
 
				+	q[3] = s4
			
 
				+	q[2] = s5
			
 
				+	q[1] = s6
			
 
				+	q[0] = s7
			
 
				+}
			
 
				+
			
 
				+orthogonalize :: proc "contextless" (q: ^[8]u64) {
			
 
				+	CL2 :: 0x5555555555555555
			
 
				+	CH2 :: 0xAAAAAAAAAAAAAAAA
			
 
				+	q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
			
 
				+	q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
			
 
				+	q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
			
 
				+	q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)
			
 
				+
			
 
				+	CL4 :: 0x3333333333333333
			
 
				+	CH4 :: 0xCCCCCCCCCCCCCCCC
			
 
				+	q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
			
 
				+	q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
			
 
				+	q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
			
 
				+	q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)
			
 
				+
			
 
				+	CL8 :: 0x0F0F0F0F0F0F0F0F
			
 
				+	CH8 :: 0xF0F0F0F0F0F0F0F0
			
 
				+	q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
			
 
				+	q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
			
 
				+	q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
			
 
				+	q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
			
 
				+	if len(w) < 4 {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+	x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
			
 
				+	x0 |= (x0 << 16)
			
 
				+	x1 |= (x1 << 16)
			
 
				+	x2 |= (x2 << 16)
			
 
				+	x3 |= (x3 << 16)
			
 
				+	x0 &= 0x0000FFFF0000FFFF
			
 
				+	x1 &= 0x0000FFFF0000FFFF
			
 
				+	x2 &= 0x0000FFFF0000FFFF
			
 
				+	x3 &= 0x0000FFFF0000FFFF
			
 
				+	x0 |= (x0 << 8)
			
 
				+	x1 |= (x1 << 8)
			
 
				+	x2 |= (x2 << 8)
			
 
				+	x3 |= (x3 << 8)
			
 
				+	x0 &= 0x00FF00FF00FF00FF
			
 
				+	x1 &= 0x00FF00FF00FF00FF
			
 
				+	x2 &= 0x00FF00FF00FF00FF
			
 
				+	x3 &= 0x00FF00FF00FF00FF
			
 
				+	q0 = x0 | (x2 << 8)
			
 
				+	q1 = x1 | (x3 << 8)
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@(require_results)
			
 
				+interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
			
 
				+	x0 := q0 & 0x00FF00FF00FF00FF
			
 
				+	x1 := q1 & 0x00FF00FF00FF00FF
			
 
				+	x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
			
 
				+	x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
			
 
				+	x0 |= (x0 >> 8)
			
 
				+	x1 |= (x1 >> 8)
			
 
				+	x2 |= (x2 >> 8)
			
 
				+	x3 |= (x3 >> 8)
			
 
				+	x0 &= 0x0000FFFF0000FFFF
			
 
				+	x1 &= 0x0000FFFF0000FFFF
			
 
				+	x2 &= 0x0000FFFF0000FFFF
			
 
				+	x3 &= 0x0000FFFF0000FFFF
			
 
				+	w0 = u32(x0) | u32(x0 >> 16)
			
 
				+	w1 = u32(x1) | u32(x1 >> 16)
			
 
				+	w2 = u32(x2) | u32(x2 >> 16)
			
 
				+	w3 = u32(x3) | u32(x3 >> 16)
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
			
 
				+	return (x << 32) | (x >> 32)
			
 
				+}
			
--- a/core/crypto/_aes/ct64/ct64_dec.odin
+++ b/core/crypto/_aes/ct64/ct64_dec.odin
@@ -0,0 +1,135 @@
 
				+// Copyright (c) 2016 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+inv_sub_bytes :: proc "contextless" (q: ^[8]u64) {
			
 
				+	// AES S-box is:
			
 
				+	//   S(x) = A(I(x)) ^ 0x63
			
 
				+	// where I() is inversion in GF(256), and A() is a linear
			
 
				+	// transform (0 is formally defined to be its own inverse).
			
 
				+	// Since inversion is an involution, the inverse S-box can be
			
 
				+	// computed from the S-box as:
			
 
				+	//   iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
			
 
				+	// where B() is the inverse of A(). Indeed, for any y in GF(256):
			
 
				+	//   iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
			
 
				+	//
			
 
				+	// Note: we reuse the implementation of the forward S-box,
			
 
				+	// instead of duplicating it here, so that total code size is
			
 
				+	// lower. By merging the B() transforms into the S-box circuit
			
 
				+	// we could make faster CBC decryption, but CBC decryption is
			
 
				+	// already quite faster than CBC encryption because we can
			
 
				+	// process four blocks in parallel.
			
 
				+
			
 
				+	q0 := ~q[0]
			
 
				+	q1 := ~q[1]
			
 
				+	q2 := q[2]
			
 
				+	q3 := q[3]
			
 
				+	q4 := q[4]
			
 
				+	q5 := ~q[5]
			
 
				+	q6 := ~q[6]
			
 
				+	q7 := q[7]
			
 
				+	q[7] = q1 ~ q4 ~ q6
			
 
				+	q[6] = q0 ~ q3 ~ q5
			
 
				+	q[5] = q7 ~ q2 ~ q4
			
 
				+	q[4] = q6 ~ q1 ~ q3
			
 
				+	q[3] = q5 ~ q0 ~ q2
			
 
				+	q[2] = q4 ~ q7 ~ q1
			
 
				+	q[1] = q3 ~ q6 ~ q0
			
 
				+	q[0] = q2 ~ q5 ~ q7
			
 
				+
			
 
				+	sub_bytes(q)
			
 
				+
			
 
				+	q0 = ~q[0]
			
 
				+	q1 = ~q[1]
			
 
				+	q2 = q[2]
			
 
				+	q3 = q[3]
			
 
				+	q4 = q[4]
			
 
				+	q5 = ~q[5]
			
 
				+	q6 = ~q[6]
			
 
				+	q7 = q[7]
			
 
				+	q[7] = q1 ~ q4 ~ q6
			
 
				+	q[6] = q0 ~ q3 ~ q5
			
 
				+	q[5] = q7 ~ q2 ~ q4
			
 
				+	q[4] = q6 ~ q1 ~ q3
			
 
				+	q[3] = q5 ~ q0 ~ q2
			
 
				+	q[2] = q4 ~ q7 ~ q1
			
 
				+	q[1] = q3 ~ q6 ~ q0
			
 
				+	q[0] = q2 ~ q5 ~ q7
			
 
				+}
			
 
				+
			
 
				+inv_shift_rows :: proc "contextless" (q: ^[8]u64) {
			
 
				+	for x, i in q {
			
 
				+		q[i] =
			
 
				+			(x & 0x000000000000FFFF) |
			
 
				+			((x & 0x000000000FFF0000) << 4) |
			
 
				+			((x & 0x00000000F0000000) >> 12) |
			
 
				+			((x & 0x000000FF00000000) << 8) |
			
 
				+			((x & 0x0000FF0000000000) >> 8) |
			
 
				+			((x & 0x000F000000000000) << 12) |
			
 
				+			((x & 0xFFF0000000000000) >> 4)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inv_mix_columns :: proc "contextless" (q: ^[8]u64) {
			
 
				+	q0 := q[0]
			
 
				+	q1 := q[1]
			
 
				+	q2 := q[2]
			
 
				+	q3 := q[3]
			
 
				+	q4 := q[4]
			
 
				+	q5 := q[5]
			
 
				+	q6 := q[6]
			
 
				+	q7 := q[7]
			
 
				+	r0 := (q0 >> 16) | (q0 << 48)
			
 
				+	r1 := (q1 >> 16) | (q1 << 48)
			
 
				+	r2 := (q2 >> 16) | (q2 << 48)
			
 
				+	r3 := (q3 >> 16) | (q3 << 48)
			
 
				+	r4 := (q4 >> 16) | (q4 << 48)
			
 
				+	r5 := (q5 >> 16) | (q5 << 48)
			
 
				+	r6 := (q6 >> 16) | (q6 << 48)
			
 
				+	r7 := (q7 >> 16) | (q7 << 48)
			
 
				+
			
 
				+	q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5)
			
 
				+	q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6)
			
 
				+	q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7)
			
 
				+	q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7)
			
 
				+	q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6)
			
 
				+	q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7)
			
 
				+	q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7)
			
 
				+	q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
			
 
				+	add_round_key(q, skey[num_rounds << 3:])
			
 
				+	for u := num_rounds - 1; u > 0; u -= 1 {
			
 
				+		inv_shift_rows(q)
			
 
				+		inv_sub_bytes(q)
			
 
				+		add_round_key(q, skey[u << 3:])
			
 
				+		inv_mix_columns(q)
			
 
				+	}
			
 
				+	inv_shift_rows(q)
			
 
				+	inv_sub_bytes(q)
			
 
				+	add_round_key(q, skey)
			
 
				+}
			
--- a/core/crypto/_aes/ct64/ct64_enc.odin
+++ b/core/crypto/_aes/ct64/ct64_enc.odin
@@ -0,0 +1,95 @@
 
				+// Copyright (c) 2016 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+
			
 
				+add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
			
 
				+	if len(sk) < 8 {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	q[0] ~= sk[0]
			
 
				+	q[1] ~= sk[1]
			
 
				+	q[2] ~= sk[2]
			
 
				+	q[3] ~= sk[3]
			
 
				+	q[4] ~= sk[4]
			
 
				+	q[5] ~= sk[5]
			
 
				+	q[6] ~= sk[6]
			
 
				+	q[7] ~= sk[7]
			
 
				+}
			
 
				+
			
 
				+shift_rows :: proc "contextless" (q: ^[8]u64) {
			
 
				+	for x, i in q {
			
 
				+		q[i] =
			
 
				+			(x & 0x000000000000FFFF) |
			
 
				+			((x & 0x00000000FFF00000) >> 4) |
			
 
				+			((x & 0x00000000000F0000) << 12) |
			
 
				+			((x & 0x0000FF0000000000) >> 8) |
			
 
				+			((x & 0x000000FF00000000) << 8) |
			
 
				+			((x & 0xF000000000000000) >> 12) |
			
 
				+			((x & 0x0FFF000000000000) << 4)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+mix_columns :: proc "contextless" (q: ^[8]u64) {
			
 
				+	q0 := q[0]
			
 
				+	q1 := q[1]
			
 
				+	q2 := q[2]
			
 
				+	q3 := q[3]
			
 
				+	q4 := q[4]
			
 
				+	q5 := q[5]
			
 
				+	q6 := q[6]
			
 
				+	q7 := q[7]
			
 
				+	r0 := (q0 >> 16) | (q0 << 48)
			
 
				+	r1 := (q1 >> 16) | (q1 << 48)
			
 
				+	r2 := (q2 >> 16) | (q2 << 48)
			
 
				+	r3 := (q3 >> 16) | (q3 << 48)
			
 
				+	r4 := (q4 >> 16) | (q4 << 48)
			
 
				+	r5 := (q5 >> 16) | (q5 << 48)
			
 
				+	r6 := (q6 >> 16) | (q6 << 48)
			
 
				+	r7 := (q7 >> 16) | (q7 << 48)
			
 
				+
			
 
				+	q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0)
			
 
				+	q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1)
			
 
				+	q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2)
			
 
				+	q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3)
			
 
				+	q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4)
			
 
				+	q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5)
			
 
				+	q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6)
			
 
				+	q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
			
 
				+	add_round_key(q, skey)
			
 
				+	for u in 1 ..< num_rounds {
			
 
				+		sub_bytes(q)
			
 
				+		shift_rows(q)
			
 
				+		mix_columns(q)
			
 
				+		add_round_key(q, skey[u << 3:])
			
 
				+	}
			
 
				+	sub_bytes(q)
			
 
				+	shift_rows(q)
			
 
				+	add_round_key(q, skey[num_rounds << 3:])
			
 
				+}
			
--- a/core/crypto/_aes/ct64/ct64_keysched.odin
+++ b/core/crypto/_aes/ct64/ct64_keysched.odin
@@ -0,0 +1,179 @@
 
				+// Copyright (c) 2016 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+
			
 
				+@(private, require_results)
			
 
				+sub_word :: proc "contextless" (x: u32) -> u32 {
			
 
				+	q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0}
			
 
				+
			
 
				+	orthogonalize(&q)
			
 
				+	sub_bytes(&q)
			
 
				+	orthogonalize(&q)
			
 
				+	ret := u32(q[0])
			
 
				+
			
 
				+	mem.zero_explicit(&q[0], size_of(u64))
			
 
				+
			
 
				+	return ret
			
 
				+}
			
 
				+
			
 
				+@(private, require_results)
			
 
				+keysched :: proc(comp_skey: []u64, key: []byte) -> int {
			
 
				+	num_rounds, key_len := 0, len(key)
			
 
				+	switch key_len {
			
 
				+	case _aes.KEY_SIZE_128:
			
 
				+		num_rounds = _aes.ROUNDS_128
			
 
				+	case _aes.KEY_SIZE_192:
			
 
				+		num_rounds = _aes.ROUNDS_192
			
 
				+	case _aes.KEY_SIZE_256:
			
 
				+		num_rounds = _aes.ROUNDS_256
			
 
				+	case:
			
 
				+		panic("crypto/aes: invalid AES key size")
			
 
				+	}
			
 
				+
			
 
				+	skey: [60]u32 = ---
			
 
				+	nk, nkf := key_len >> 2, (num_rounds + 1) << 2
			
 
				+	for i in 0 ..< nk {
			
 
				+		skey[i] = endian.unchecked_get_u32le(key[i << 2:])
			
 
				+	}
			
 
				+	tmp := skey[(key_len >> 2) - 1]
			
 
				+	for i, j, k := nk, 0, 0; i < nkf; i += 1 {
			
 
				+		if j == 0 {
			
 
				+			tmp = (tmp << 24) | (tmp >> 8)
			
 
				+			tmp = sub_word(tmp) ~ u32(_aes.RCON[k])
			
 
				+		} else if nk > 6 && j == 4 {
			
 
				+			tmp = sub_word(tmp)
			
 
				+		}
			
 
				+		tmp ~= skey[i - nk]
			
 
				+		skey[i] = tmp
			
 
				+		if j += 1; j == nk {
			
 
				+			j = 0
			
 
				+			k += 1
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	q: [8]u64 = ---
			
 
				+	for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
			
 
				+		q[0], q[4] = interleave_in(skey[i:])
			
 
				+		q[1] = q[0]
			
 
				+		q[2] = q[0]
			
 
				+		q[3] = q[0]
			
 
				+		q[5] = q[4]
			
 
				+		q[6] = q[4]
			
 
				+		q[7] = q[4]
			
 
				+		orthogonalize(&q)
			
 
				+		comp_skey[j + 0] =
			
 
				+			(q[0] & 0x1111111111111111) |
			
 
				+			(q[1] & 0x2222222222222222) |
			
 
				+			(q[2] & 0x4444444444444444) |
			
 
				+			(q[3] & 0x8888888888888888)
			
 
				+		comp_skey[j + 1] =
			
 
				+			(q[4] & 0x1111111111111111) |
			
 
				+			(q[5] & 0x2222222222222222) |
			
 
				+			(q[6] & 0x4444444444444444) |
			
 
				+			(q[7] & 0x8888888888888888)
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&skey, size_of(skey))
			
 
				+	mem.zero_explicit(&q, size_of(q))
			
 
				+
			
 
				+	return num_rounds
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
			
 
				+	n := (num_rounds + 1) << 1
			
 
				+	for u, v := 0, 0; u < n; u, v = u + 1, v + 4 {
			
 
				+		x0 := comp_skey[u]
			
 
				+		x1, x2, x3 := x0, x0, x0
			
 
				+		x0 &= 0x1111111111111111
			
 
				+		x1 &= 0x2222222222222222
			
 
				+		x2 &= 0x4444444444444444
			
 
				+		x3 &= 0x8888888888888888
			
 
				+		x1 >>= 1
			
 
				+		x2 >>= 2
			
 
				+		x3 >>= 3
			
 
				+		skey[v + 0] = (x0 << 4) - x0
			
 
				+		skey[v + 1] = (x1 << 4) - x1
			
 
				+		skey[v + 2] = (x2 << 4) - x2
			
 
				+		skey[v + 3] = (x3 << 4) - x3
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
			
 
				+	if len(qq) < 8 || len(key) != 16 {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	skey: [4]u32 = ---
			
 
				+	skey[0] = endian.unchecked_get_u32le(key[0:])
			
 
				+	skey[1] = endian.unchecked_get_u32le(key[4:])
			
 
				+	skey[2] = endian.unchecked_get_u32le(key[8:])
			
 
				+	skey[3] = endian.unchecked_get_u32le(key[12:])
			
 
				+
			
 
				+	q: [8]u64 = ---
			
 
				+	q[0], q[4] = interleave_in(skey[:])
			
 
				+	q[1] = q[0]
			
 
				+	q[2] = q[0]
			
 
				+	q[3] = q[0]
			
 
				+	q[5] = q[4]
			
 
				+	q[6] = q[4]
			
 
				+	q[7] = q[4]
			
 
				+	orthogonalize(&q)
			
 
				+
			
 
				+	comp_skey: [2]u64 = ---
			
 
				+	comp_skey[0] =
			
 
				+		(q[0] & 0x1111111111111111) |
			
 
				+		(q[1] & 0x2222222222222222) |
			
 
				+		(q[2] & 0x4444444444444444) |
			
 
				+		(q[3] & 0x8888888888888888)
			
 
				+	comp_skey[1] =
			
 
				+		(q[4] & 0x1111111111111111) |
			
 
				+		(q[5] & 0x2222222222222222) |
			
 
				+		(q[6] & 0x4444444444444444) |
			
 
				+		(q[7] & 0x8888888888888888)
			
 
				+
			
 
				+	for x, u in comp_skey {
			
 
				+		x0 := x
			
 
				+		x1, x2, x3 := x0, x0, x0
			
 
				+		x0 &= 0x1111111111111111
			
 
				+		x1 &= 0x2222222222222222
			
 
				+		x2 &= 0x4444444444444444
			
 
				+		x3 &= 0x8888888888888888
			
 
				+		x1 >>= 1
			
 
				+		x2 >>= 2
			
 
				+		x3 >>= 3
			
 
				+		qq[u * 4 + 0] = (x0 << 4) - x0
			
 
				+		qq[u * 4 + 1] = (x1 << 4) - x1
			
 
				+		qq[u * 4 + 2] = (x2 << 4) - x2
			
 
				+		qq[u * 4 + 3] = (x3 << 4) - x3
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&skey, size_of(skey))
			
 
				+	mem.zero_explicit(&q, size_of(q))
			
 
				+	mem.zero_explicit(&comp_skey, size_of(comp_skey))
			
 
				+}
			
--- a/core/crypto/_aes/ct64/ghash.odin
+++ b/core/crypto/_aes/ct64/ghash.odin
@@ -0,0 +1,136 @@
 
				+// Copyright (c) 2016 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:encoding/endian"
			
 
				+
			
 
				+@(private = "file")
			
 
				+bmul64 :: proc "contextless" (x, y: u64) -> u64 {
			
 
				+	x0 := x & 0x1111111111111111
			
 
				+	x1 := x & 0x2222222222222222
			
 
				+	x2 := x & 0x4444444444444444
			
 
				+	x3 := x & 0x8888888888888888
			
 
				+	y0 := y & 0x1111111111111111
			
 
				+	y1 := y & 0x2222222222222222
			
 
				+	y2 := y & 0x4444444444444444
			
 
				+	y3 := y & 0x8888888888888888
			
 
				+	z0 := (x0 * y0) ~ (x1 * y3) ~ (x2 * y2) ~ (x3 * y1)
			
 
				+	z1 := (x0 * y1) ~ (x1 * y0) ~ (x2 * y3) ~ (x3 * y2)
			
 
				+	z2 := (x0 * y2) ~ (x1 * y1) ~ (x2 * y0) ~ (x3 * y3)
			
 
				+	z3 := (x0 * y3) ~ (x1 * y2) ~ (x2 * y1) ~ (x3 * y0)
			
 
				+	z0 &= 0x1111111111111111
			
 
				+	z1 &= 0x2222222222222222
			
 
				+	z2 &= 0x4444444444444444
			
 
				+	z3 &= 0x8888888888888888
			
 
				+	return z0 | z1 | z2 | z3
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+rev64 :: proc "contextless" (x: u64) -> u64 {
			
 
				+	x := x
			
 
				+	x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555)
			
 
				+	x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333)
			
 
				+	x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F)
			
 
				+	x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF)
			
 
				+	x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF)
			
 
				+	return (x << 32) | (x >> 32)
			
 
				+}
			
 
				+
			
 
				+// ghash calculates the GHASH of data, with the key `key`, and input `dst`
			
 
				+// and `data`, and stores the resulting digest in `dst`.
			
 
				+//
			
 
				+// Note: `dst` is both an input and an output, to support easy implementation
			
 
				+// of GCM.
			
 
				+ghash :: proc "contextless" (dst, key, data: []byte) {
			
 
				+	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	buf := data
			
 
				+	l := len(buf)
			
 
				+
			
 
				+	y1 := endian.unchecked_get_u64be(dst[0:])
			
 
				+	y0 := endian.unchecked_get_u64be(dst[8:])
			
 
				+	h1 := endian.unchecked_get_u64be(key[0:])
			
 
				+	h0 := endian.unchecked_get_u64be(key[8:])
			
 
				+	h0r := rev64(h0)
			
 
				+	h1r := rev64(h1)
			
 
				+	h2 := h0 ~ h1
			
 
				+	h2r := h0r ~ h1r
			
 
				+
			
 
				+	src: []byte
			
 
				+	for l > 0 {
			
 
				+		if l >= _aes.GHASH_BLOCK_SIZE {
			
 
				+			src = buf
			
 
				+			buf = buf[_aes.GHASH_BLOCK_SIZE:]
			
 
				+			l -= _aes.GHASH_BLOCK_SIZE
			
 
				+		} else {
			
 
				+			tmp: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+			copy(tmp[:], buf)
			
 
				+			src = tmp[:]
			
 
				+			l = 0
			
 
				+		}
			
 
				+		y1 ~= endian.unchecked_get_u64be(src)
			
 
				+		y0 ~= endian.unchecked_get_u64be(src[8:])
			
 
				+
			
 
				+		y0r := rev64(y0)
			
 
				+		y1r := rev64(y1)
			
 
				+		y2 := y0 ~ y1
			
 
				+		y2r := y0r ~ y1r
			
 
				+
			
 
				+		z0 := bmul64(y0, h0)
			
 
				+		z1 := bmul64(y1, h1)
			
 
				+		z2 := bmul64(y2, h2)
			
 
				+		z0h := bmul64(y0r, h0r)
			
 
				+		z1h := bmul64(y1r, h1r)
			
 
				+		z2h := bmul64(y2r, h2r)
			
 
				+		z2 ~= z0 ~ z1
			
 
				+		z2h ~= z0h ~ z1h
			
 
				+		z0h = rev64(z0h) >> 1
			
 
				+		z1h = rev64(z1h) >> 1
			
 
				+		z2h = rev64(z2h) >> 1
			
 
				+
			
 
				+		v0 := z0
			
 
				+		v1 := z0h ~ z2
			
 
				+		v2 := z1 ~ z2h
			
 
				+		v3 := z1h
			
 
				+
			
 
				+		v3 = (v3 << 1) | (v2 >> 63)
			
 
				+		v2 = (v2 << 1) | (v1 >> 63)
			
 
				+		v1 = (v1 << 1) | (v0 >> 63)
			
 
				+		v0 = (v0 << 1)
			
 
				+
			
 
				+		v2 ~= v0 ~ (v0 >> 1) ~ (v0 >> 2) ~ (v0 >> 7)
			
 
				+		v1 ~= (v0 << 63) ~ (v0 << 62) ~ (v0 << 57)
			
 
				+		v3 ~= v1 ~ (v1 >> 1) ~ (v1 >> 2) ~ (v1 >> 7)
			
 
				+		v2 ~= (v1 << 63) ~ (v1 << 62) ~ (v1 << 57)
			
 
				+
			
 
				+		y0 = v2
			
 
				+		y1 = v3
			
 
				+	}
			
 
				+
			
 
				+	endian.unchecked_put_u64be(dst[0:], y1)
			
 
				+	endian.unchecked_put_u64be(dst[8:], y0)
			
 
				+}
			
--- a/core/crypto/_aes/ct64/helpers.odin
+++ b/core/crypto/_aes/ct64/helpers.odin
@@ -0,0 +1,75 @@
 
				+package aes_ct64
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:encoding/endian"
			
 
				+
			
 
				+load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
			
 
				+	if len(src) != _aes.BLOCK_SIZE {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	w: [4]u32 = ---
			
 
				+	w[0] = endian.unchecked_get_u32le(src[0:])
			
 
				+	w[1] = endian.unchecked_get_u32le(src[4:])
			
 
				+	w[2] = endian.unchecked_get_u32le(src[8:])
			
 
				+	w[3] = endian.unchecked_get_u32le(src[12:])
			
 
				+	q[0], q[4] = interleave_in(w[:])
			
 
				+	orthogonalize(q)
			
 
				+}
			
 
				+
			
 
				+store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
			
 
				+	if len(dst) != _aes.BLOCK_SIZE {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	orthogonalize(q)
			
 
				+	w0, w1, w2, w3 := interleave_out(q[0], q[4])
			
 
				+	endian.unchecked_put_u32le(dst[0:], w0)
			
 
				+	endian.unchecked_put_u32le(dst[4:], w1)
			
 
				+	endian.unchecked_put_u32le(dst[8:], w2)
			
 
				+	endian.unchecked_put_u32le(dst[12:], w3)
			
 
				+}
			
 
				+
			
 
				+load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
			
 
				+	if n := len(src); n > STRIDE || n == 0 {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	w: [4]u32 = ---
			
 
				+	for s, i in src {
			
 
				+		if len(s) != _aes.BLOCK_SIZE {
			
 
				+			intrinsics.trap()
			
 
				+		}
			
 
				+
			
 
				+		w[0] = endian.unchecked_get_u32le(s[0:])
			
 
				+		w[1] = endian.unchecked_get_u32le(s[4:])
			
 
				+		w[2] = endian.unchecked_get_u32le(s[8:])
			
 
				+		w[3] = endian.unchecked_get_u32le(s[12:])
			
 
				+		q[i], q[i + 4] = interleave_in(w[:])
			
 
				+	}
			
 
				+	orthogonalize(q)
			
 
				+}
			
 
				+
			
 
				+store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
			
 
				+	if n := len(dst); n > STRIDE || n == 0 {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	orthogonalize(q)
			
 
				+	for d, i in dst {
			
 
				+		// Allow storing [0,4] blocks.
			
 
				+		if d == nil {
			
 
				+			break
			
 
				+		}
			
 
				+		if len(d) != _aes.BLOCK_SIZE {
			
 
				+			intrinsics.trap()
			
 
				+		}
			
 
				+
			
 
				+		w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
			
 
				+		endian.unchecked_put_u32le(d[0:], w0)
			
 
				+		endian.unchecked_put_u32le(d[4:], w1)
			
 
				+		endian.unchecked_put_u32le(d[8:], w2)
			
 
				+		endian.unchecked_put_u32le(d[12:], w3)
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/_aes/hw_intel/api.odin
+++ b/core/crypto/_aes/hw_intel/api.odin
@@ -0,0 +1,43 @@
 
				+//+build amd64
			
 
				+package aes_hw_intel
			
 
				+
			
 
				+import "core:sys/info"
			
 
				+
			
 
				+// is_supporte returns true iff hardware accelerated AES
			
 
				+// is supported.
			
 
				+is_supported :: proc "contextless" () -> bool {
			
 
				+	features, ok := info.cpu_features.?
			
 
				+	if !ok {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	// Note: Everything with AES-NI and PCLMULQDQ has support for
			
 
				+	// the required SSE extxtensions.
			
 
				+	req_features :: info.CPU_Features{
			
 
				+		.sse2,
			
 
				+		.ssse3,
			
 
				+		.sse41,
			
 
				+		.aes,
			
 
				+		.pclmulqdq,
			
 
				+	}
			
 
				+	return features >= req_features
			
 
				+}
			
 
				+
			
 
				+// Context is a keyed AES (ECB) instance.
			
 
				+Context :: struct {
			
 
				+	// Note: The ideal thing to do is for the expanded round keys to be
			
 
				+	// arrays of `__m128i`, however that implies alignment (or using AVX).
			
 
				+	//
			
 
				+	// All the people using e-waste processors that don't support an
			
 
				+	// insturction set that has been around for over 10 years are why
			
 
				+	// we can't have nice things.
			
 
				+	_sk_exp_enc: [15][16]byte,
			
 
				+	_sk_exp_dec: [15][16]byte,
			
 
				+	_num_rounds: int,
			
 
				+}
			
 
				+
			
 
				+// init initializes a context for AES with the provided key.
			
 
				+init :: proc(ctx: ^Context, key: []byte) {
			
 
				+	keysched(ctx, key)
			
 
				+}
			
 
				+
			
--- a/core/crypto/_aes/hw_intel/ghash.odin
+++ b/core/crypto/_aes/hw_intel/ghash.odin
@@ -0,0 +1,281 @@
 
				+// Copyright (c) 2017 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+//+build amd64
			
 
				+package aes_hw_intel
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:simd"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+@(private = "file")
			
 
				+GHASH_STRIDE_HW :: 4
			
 
				+@(private = "file")
			
 
				+GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
			
 
				+
			
 
				+// GHASH is defined over elements of GF(2^128) with "full little-endian"
			
 
				+// representation: leftmost byte is least significant, and, within each
			
 
				+// byte, leftmost _bit_ is least significant. The natural ordering in
			
 
				+// x86 is "mixed little-endian": bytes are ordered from least to most
			
 
				+// significant, but bits within a byte are in most-to-least significant
			
 
				+// order. Going to full little-endian representation would require
			
 
				+// reversing bits within each byte, which is doable but expensive.
			
 
				+//
			
 
				+// Instead, we go to full big-endian representation, by swapping bytes
			
 
				+// around, which is done with a single _mm_shuffle_epi8() opcode (it
			
 
				+// comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
			
 
				+// can use a full big-endian representation because in a carryless
			
 
				+// multiplication, we have a nice bit reversal property:
			
 
				+//
			
 
				+// rev_128(x) * rev_128(y) = rev_255(x * y)
			
 
				+//
			
 
				+// So by using full big-endian, we still get the right result, except
			
 
				+// that it is right-shifted by 1 bit. The left-shift is relatively
			
 
				+// inexpensive, and it can be mutualised.
			
 
				+//
			
 
				+// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
			
 
				+// values with bit precision, we have to break down values into 64-bit
			
 
				+// chunks. We number chunks from 0 to 3 in left to right order.
			
 
				+
			
 
				+@(private = "file")
			
 
				+byteswap_index := transmute(x86.__m128i)simd.i8x16{
			
 
				+	// Note: simd.i8x16 is reverse order from x86._mm_set_epi8.
			
 
				+	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2,ssse3")
			
 
				+byteswap :: #force_inline proc "contextless" (x: x86.__m128i) -> x86.__m128i {
			
 
				+	return x86._mm_shuffle_epi8(x, byteswap_index)
			
 
				+}
			
 
				+
			
 
				+// From a 128-bit value kw, compute kx as the XOR of the two 64-bit
			
 
				+// halves of kw (into the right half of kx; left half is unspecified),
			
 
				+// and return kx.
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+bk :: #force_inline proc "contextless" (kw: x86.__m128i) -> x86.__m128i {
			
 
				+	return x86._mm_xor_si128(kw, x86._mm_shuffle_epi32(kw, 0x0e))
			
 
				+}
			
 
				+
			
 
				+// Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
			
 
				+// the XOR of the two values (kx), and return (kw, kx).
			
 
				+@(private = "file", enable_target_feature = "sse2")
			
 
				+pbk :: #force_inline proc "contextless" (k0, k1: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
			
 
				+	kw := x86._mm_unpacklo_epi64(k1, k0)
			
 
				+	kx := x86._mm_xor_si128(k0, k1)
			
 
				+	return kw, kx
			
 
				+}
			
 
				+
			
 
				+// Left-shift by 1 bit a 256-bit value (in four 64-bit words).
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+sl_256 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) {
			
 
				+	x0, x1, x2, x3 := x0, x1, x2, x3
			
 
				+
			
 
				+	x0 = x86._mm_or_si128(x86._mm_slli_epi64(x0, 1), x86._mm_srli_epi64(x1, 63))
			
 
				+	x1 = x86._mm_or_si128(x86._mm_slli_epi64(x1, 1), x86._mm_srli_epi64(x2, 63))
			
 
				+	x2 = x86._mm_or_si128(x86._mm_slli_epi64(x2, 1), x86._mm_srli_epi64(x3, 63))
			
 
				+	x3 = x86._mm_slli_epi64(x3, 1)
			
 
				+
			
 
				+	return x0, x1, x2, x3
			
 
				+}
			
 
				+
			
 
				+// Perform reduction in GF(2^128).
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+reduce_f128 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
			
 
				+	x0, x1, x2 := x0, x1, x2
			
 
				+
			
 
				+	x1 = x86._mm_xor_si128(
			
 
				+		x1,
			
 
				+		x86._mm_xor_si128(
			
 
				+			x86._mm_xor_si128(
			
 
				+				x3,
			
 
				+				x86._mm_srli_epi64(x3, 1)),
			
 
				+			x86._mm_xor_si128(
			
 
				+				x86._mm_srli_epi64(x3, 2),
			
 
				+				x86._mm_srli_epi64(x3, 7))))
			
 
				+	x2 = x86._mm_xor_si128(
			
 
				+		x86._mm_xor_si128(
			
 
				+			x2,
			
 
				+			x86._mm_slli_epi64(x3, 63)),
			
 
				+		x86._mm_xor_si128(
			
 
				+			x86._mm_slli_epi64(x3, 62),
			
 
				+			x86._mm_slli_epi64(x3, 57)))
			
 
				+	x0 = x86._mm_xor_si128(
			
 
				+		x0,
			
 
				+		x86._mm_xor_si128(
			
 
				+			x86._mm_xor_si128(
			
 
				+				x2,
			
 
				+				x86._mm_srli_epi64(x2, 1)),
			
 
				+			x86._mm_xor_si128(
			
 
				+				x86._mm_srli_epi64(x2, 2),
			
 
				+				x86._mm_srli_epi64(x2, 7))))
			
 
				+	x1 = x86._mm_xor_si128(
			
 
				+		x86._mm_xor_si128(
			
 
				+			x1,
			
 
				+			x86._mm_slli_epi64(x2, 63)),
			
 
				+		x86._mm_xor_si128(
			
 
				+			x86._mm_slli_epi64(x2, 62),
			
 
				+			x86._mm_slli_epi64(x2, 57)))
			
 
				+
			
 
				+	return x0, x1
			
 
				+}
			
 
				+
			
 
				+// Square value kw in GF(2^128) into (dw,dx).
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2,pclmul")
			
 
				+square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
			
 
				+	z1 := x86._mm_clmulepi64_si128(kw, kw, 0x11)
			
 
				+	z3 := x86._mm_clmulepi64_si128(kw, kw, 0x00)
			
 
				+	z0 := x86._mm_shuffle_epi32(z1, 0x0E)
			
 
				+	z2 := x86._mm_shuffle_epi32(z3, 0x0E)
			
 
				+	z0, z1, z2, z3 = sl_256(z0, z1, z2, z3)
			
 
				+	z0, z1 = reduce_f128(z0, z1, z2, z3)
			
 
				+	return pbk(z0, z1)
			
 
				+}
			
 
				+
			
 
				+// ghash calculates the GHASH of data, with the key `key`, and input `dst`
			
 
				+// and `data`, and stores the resulting digest in `dst`.
			
 
				+//
			
 
				+// Note: `dst` is both an input and an output, to support easy implementation
			
 
				+// of GCM.
			
 
				+@(enable_target_feature = "sse2,ssse3,pclmul")
			
 
				+ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
			
 
				+	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
			
 
				+		intrinsics.trap()
			
 
				+	}
			
 
				+
			
 
				+	// Note: BearSSL opts to copy the remainder into a zero-filled
			
 
				+	// 64-byte buffer.  We do something slightly more simple.
			
 
				+
			
 
				+	// Load key and dst (h and y).
			
 
				+	yw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(dst)))
			
 
				+	h1w := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
			
 
				+	yw = byteswap(yw)
			
 
				+	h1w = byteswap(h1w)
			
 
				+	h1x := bk(h1w)
			
 
				+
			
 
				+	// Process 4 blocks at a time
			
 
				+	buf := data
			
 
				+	l := len(buf)
			
 
				+	if l >= GHASH_STRIDE_BYTES_HW {
			
 
				+		// Compute h2 = h^2
			
 
				+		h2w, h2x := square_f128(h1w)
			
 
				+
			
 
				+		// Compute h3 = h^3 = h*(h^2)
			
 
				+		t1 := x86._mm_clmulepi64_si128(h1w, h2w, 0x11)
			
 
				+		t3 := x86._mm_clmulepi64_si128(h1w, h2w, 0x00)
			
 
				+		t2 := x86._mm_xor_si128(
			
 
				+			x86._mm_clmulepi64_si128(h1x, h2x, 0x00),
			
 
				+			x86._mm_xor_si128(t1, t3))
			
 
				+		t0 := x86._mm_shuffle_epi32(t1, 0x0E)
			
 
				+		t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
			
 
				+		t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
			
 
				+		t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
			
 
				+		t0, t1 = reduce_f128(t0, t1, t2, t3)
			
 
				+		h3w, h3x := pbk(t0, t1)
			
 
				+
			
 
				+		// Compute h4 = h^4 = (h^2)^2
			
 
				+		h4w, h4x := square_f128(h2w)
			
 
				+
			
 
				+		for l >= GHASH_STRIDE_BYTES_HW {
			
 
				+			aw0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf)))
			
 
				+			aw1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[16:])))
			
 
				+			aw2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[32:])))
			
 
				+			aw3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[48:])))
			
 
				+			aw0 = byteswap(aw0)
			
 
				+			aw1 = byteswap(aw1)
			
 
				+			aw2 = byteswap(aw2)
			
 
				+			aw3 = byteswap(aw3)
			
 
				+			buf, l = buf[GHASH_STRIDE_BYTES_HW:], l - GHASH_STRIDE_BYTES_HW
			
 
				+
			
 
				+			aw0 = x86._mm_xor_si128(aw0, yw)
			
 
				+			ax1 := bk(aw1)
			
 
				+			ax2 := bk(aw2)
			
 
				+			ax3 := bk(aw3)
			
 
				+			ax0 := bk(aw0)
			
 
				+
			
 
				+			t1 = x86._mm_xor_si128(
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(aw0, h4w, 0x11),
			
 
				+					x86._mm_clmulepi64_si128(aw1, h3w, 0x11)),
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(aw2, h2w, 0x11),
			
 
				+					x86._mm_clmulepi64_si128(aw3, h1w, 0x11)))
			
 
				+			t3 = x86._mm_xor_si128(
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(aw0, h4w, 0x00),
			
 
				+					x86._mm_clmulepi64_si128(aw1, h3w, 0x00)),
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(aw2, h2w, 0x00),
			
 
				+					x86._mm_clmulepi64_si128(aw3, h1w, 0x00)))
			
 
				+			t2 = x86._mm_xor_si128(
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(ax0, h4x, 0x00),
			
 
				+					x86._mm_clmulepi64_si128(ax1, h3x, 0x00)),
			
 
				+				x86._mm_xor_si128(
			
 
				+					x86._mm_clmulepi64_si128(ax2, h2x, 0x00),
			
 
				+					x86._mm_clmulepi64_si128(ax3, h1x, 0x00)))
			
 
				+			t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
			
 
				+			t0 = x86._mm_shuffle_epi32(t1, 0x0E)
			
 
				+			t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
			
 
				+			t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
			
 
				+			t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
			
 
				+			t0, t1 = reduce_f128(t0, t1, t2, t3)
			
 
				+			yw = x86._mm_unpacklo_epi64(t1, t0)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Process 1 block at a time
			
 
				+	src: []byte
			
 
				+	for l > 0 {
			
 
				+		if l >= _aes.GHASH_BLOCK_SIZE {
			
 
				+			src = buf
			
 
				+			buf = buf[_aes.GHASH_BLOCK_SIZE:]
			
 
				+			l -= _aes.GHASH_BLOCK_SIZE
			
 
				+		} else {
			
 
				+			tmp: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+			copy(tmp[:], buf)
			
 
				+			src = tmp[:]
			
 
				+			l = 0
			
 
				+		}
			
 
				+
			
 
				+		aw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
			
 
				+		aw = byteswap(aw)
			
 
				+
			
 
				+		aw = x86._mm_xor_si128(aw, yw)
			
 
				+		ax := bk(aw)
			
 
				+
			
 
				+		t1 := x86._mm_clmulepi64_si128(aw, h1w, 0x11)
			
 
				+		t3 := x86._mm_clmulepi64_si128(aw, h1w, 0x00)
			
 
				+		t2 := x86._mm_clmulepi64_si128(ax, h1x, 0x00)
			
 
				+		t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
			
 
				+		t0 := x86._mm_shuffle_epi32(t1, 0x0E)
			
 
				+		t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
			
 
				+		t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
			
 
				+		t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
			
 
				+		t0, t1 = reduce_f128(t0, t1, t2, t3)
			
 
				+		yw = x86._mm_unpacklo_epi64(t1, t0)
			
 
				+	}
			
 
				+
			
 
				+	// Write back the hash (dst, aka y)
			
 
				+	yw = byteswap(yw)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), yw)
			
 
				+}
			
--- a/core/crypto/_aes/hw_intel/hw_intel_keysched.odin
+++ b/core/crypto/_aes/hw_intel/hw_intel_keysched.odin
@@ -0,0 +1,178 @@
 
				+// Copyright (c) 2017 Thomas Pornin <[email protected]>
			
 
				+// All rights reserved.
			
 
				+//
			
 
				+// Redistribution and use in source and binary forms, with or without
			
 
				+// modification, are permitted provided that the following conditions
			
 
				+// are met:
			
 
				+//
			
 
				+//   1. Redistributions of source code must retain the above copyright
			
 
				+//      notice, this list of conditions and the following disclaimer.
			
 
				+//
			
 
				+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
			
 
				+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
			
 
				+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
			
 
				+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
			
 
				+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
			
 
				+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
			
 
				+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				+
			
 
				+//+build amd64
			
 
				+package aes_hw_intel
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:mem"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+// Intel AES-NI based implementation.  Inspiration taken from BearSSL.
			
 
				+//
			
 
				+// Note: This assumes that the SROA optimization pass is enabled to be
			
 
				+// anything resembling performat otherwise, LLVM will not elide a massive
			
 
				+// number of redundant loads/stores it generates for every intrinsic call.
			
 
				+
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+expand_step128 :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
			
 
				+	k1, k2 := k1, k2
			
 
				+
			
 
				+	k2 = x86._mm_shuffle_epi32(k2, 0xff)
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	return x86._mm_xor_si128(k1, k2)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results, enable_target_feature = "sse,sse2")
			
 
				+expand_step192a :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
			
 
				+	k1, k2, k3 := k1_^, k2_^, k3
			
 
				+
			
 
				+	k3 = x86._mm_shuffle_epi32(k3, 0x55)
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, k3)
			
 
				+
			
 
				+	tmp := k2
			
 
				+	k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
			
 
				+	k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
			
 
				+
			
 
				+	k1_, k2_ := k1_, k2_
			
 
				+	k1_^, k2_^ = k1, k2
			
 
				+
			
 
				+	r1 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(tmp), transmute(x86.__m128)(k1), 0x44))
			
 
				+	r2 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(k1), transmute(x86.__m128)(k2), 0x4e))
			
 
				+
			
 
				+	return r1, r2
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+expand_step192b :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> x86.__m128i {
			
 
				+	k1, k2, k3 := k1_^, k2_^, k3
			
 
				+
			
 
				+	k3 = x86._mm_shuffle_epi32(k3, 0x55)
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, k3)
			
 
				+
			
 
				+	k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
			
 
				+	k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
			
 
				+
			
 
				+	k1_, k2_ := k1_, k2_
			
 
				+	k1_^, k2_^ = k1, k2
			
 
				+
			
 
				+	return k1
			
 
				+}
			
 
				+
			
 
				+@(private = "file", require_results, enable_target_feature = "sse2")
			
 
				+expand_step256b :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
			
 
				+	k1, k2 := k1, k2
			
 
				+
			
 
				+	k2 = x86._mm_shuffle_epi32(k2, 0xaa)
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
			
 
				+	return x86._mm_xor_si128(k1, k2)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "aes")
			
 
				+derive_dec_keys :: proc(ctx: ^Context, sks: ^[15]x86.__m128i, num_rounds: int) {
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[0]), sks[num_rounds])
			
 
				+	for i in 1 ..< num_rounds {
			
 
				+		tmp := x86._mm_aesimc_si128(sks[i])
			
 
				+		intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds - i]), tmp)
			
 
				+	}
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds]), sks[0])
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse,sse2,aes")
			
 
				+keysched :: proc(ctx: ^Context, key: []byte) {
			
 
				+	sks: [15]x86.__m128i = ---
			
 
				+
			
 
				+	// Compute the encryption keys.
			
 
				+	num_rounds, key_len := 0, len(key)
			
 
				+	switch key_len {
			
 
				+	case _aes.KEY_SIZE_128:
			
 
				+		sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
			
 
				+		sks[1] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[0], 0x01))
			
 
				+		sks[2] = expand_step128(sks[1], x86._mm_aeskeygenassist_si128(sks[1], 0x02))
			
 
				+		sks[3] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[2], 0x04))
			
 
				+		sks[4] = expand_step128(sks[3], x86._mm_aeskeygenassist_si128(sks[3], 0x08))
			
 
				+		sks[5] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[4], 0x10))
			
 
				+		sks[6] = expand_step128(sks[5], x86._mm_aeskeygenassist_si128(sks[5], 0x20))
			
 
				+		sks[7] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[6], 0x40))
			
 
				+		sks[8] = expand_step128(sks[7], x86._mm_aeskeygenassist_si128(sks[7], 0x80))
			
 
				+		sks[9] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[8], 0x1b))
			
 
				+		sks[10] = expand_step128(sks[9], x86._mm_aeskeygenassist_si128(sks[9], 0x36))
			
 
				+		num_rounds = _aes.ROUNDS_128
			
 
				+	case _aes.KEY_SIZE_192:
			
 
				+		k0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
			
 
				+		k1 := x86.__m128i{
			
 
				+			intrinsics.unaligned_load((^i64)(raw_data(key[16:]))),
			
 
				+			0,
			
 
				+		}
			
 
				+		sks[0] = k0
			
 
				+		sks[1], sks[2] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x01))
			
 
				+		sks[3] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x02))
			
 
				+		sks[4], sks[5] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x04))
			
 
				+		sks[6] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x08))
			
 
				+		sks[7], sks[8] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x10))
			
 
				+		sks[9] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x20))
			
 
				+		sks[10], sks[11] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x40))
			
 
				+		sks[12] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x80))
			
 
				+		num_rounds = _aes.ROUNDS_192
			
 
				+	case _aes.KEY_SIZE_256:
			
 
				+		sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
			
 
				+		sks[1] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key[16:])))
			
 
				+		sks[2] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[1], 0x01))
			
 
				+		sks[3] = expand_step256b(sks[1], x86._mm_aeskeygenassist_si128(sks[2], 0x01))
			
 
				+		sks[4] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[3], 0x02))
			
 
				+		sks[5] = expand_step256b(sks[3], x86._mm_aeskeygenassist_si128(sks[4], 0x02))
			
 
				+		sks[6] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[5], 0x04))
			
 
				+		sks[7] = expand_step256b(sks[5], x86._mm_aeskeygenassist_si128(sks[6], 0x04))
			
 
				+		sks[8] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[7], 0x08))
			
 
				+		sks[9] = expand_step256b(sks[7], x86._mm_aeskeygenassist_si128(sks[8], 0x08))
			
 
				+		sks[10] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[9], 0x10))
			
 
				+		sks[11] = expand_step256b(sks[9], x86._mm_aeskeygenassist_si128(sks[10], 0x10))
			
 
				+		sks[12] = expand_step128(sks[10], x86._mm_aeskeygenassist_si128(sks[11], 0x20))
			
 
				+		sks[13] = expand_step256b(sks[11], x86._mm_aeskeygenassist_si128(sks[12], 0x20))
			
 
				+		sks[14] = expand_step128(sks[12], x86._mm_aeskeygenassist_si128(sks[13], 0x40))
			
 
				+		num_rounds = _aes.ROUNDS_256
			
 
				+	case:
			
 
				+		panic("crypto/aes: invalid AES key size")
			
 
				+	}
			
 
				+	for i in 0 ..= num_rounds {
			
 
				+		intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_enc[i]), sks[i])
			
 
				+	}
			
 
				+
			
 
				+	// Compute the decryption keys.  GCM and CTR do not need this, however
			
 
				+	// ECB, CBC, OCB3, etc do.
			
 
				+	derive_dec_keys(ctx, &sks, num_rounds)
			
 
				+
			
 
				+	ctx._num_rounds = num_rounds
			
 
				+
			
 
				+	mem.zero_explicit(&sks, size_of(sks))
			
 
				+}
			
--- a/core/crypto/_edwards25519/edwards25519.odin
+++ b/core/crypto/_edwards25519/edwards25519.odin
@@ -110,7 +110,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
 
				 	if len(b) != 32 {
			
 
				 		intrinsics.trap()
			
 
				 	}
			
 
				-	b_ := transmute(^[32]byte)(raw_data(b))
			
 
				+	b_ := (^[32]byte)(raw_data(b))
			
 
				 
			
 
				 	// Do the work in a scratch element, so that ge is unchanged on
			
 
				 	// failure.
			
@@ -169,7 +169,7 @@ ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
 
				 	if len(dst) != 32 {
			
 
				 		intrinsics.trap()
			
 
				 	}
			
 
				-	dst_ := transmute(^[32]byte)(raw_data(dst))
			
 
				+	dst_ := (^[32]byte)(raw_data(dst))
			
 
				 
			
 
				 	// Convert the element to affine (x, y) representation.
			
 
				 	x, y, z_inv: field.Tight_Field_Element = ---, ---, ---
			
--- a/core/crypto/_edwards25519/edwards25519_scalar.odin
+++ b/core/crypto/_edwards25519/edwards25519_scalar.odin
@@ -28,7 +28,7 @@ sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
 
				 	if len(b) != 32 {
			
 
				 		intrinsics.trap()
			
 
				 	}
			
 
				-	b_ := transmute(^[32]byte)(raw_data(b))
			
 
				+	b_ := (^[32]byte)(raw_data(b))
			
 
				 	return field.fe_from_bytes(sc, b_)
			
 
				 }
			
 
				 
			
@@ -36,7 +36,7 @@ sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
 
				 	if len(b) != 32 {
			
 
				 		intrinsics.trap()
			
 
				 	}
			
 
				-	b_ := transmute(^[32]byte)(raw_data(b))
			
 
				+	b_ := (^[32]byte)(raw_data(b))
			
 
				 	field.fe_from_bytes_rfc8032(sc, b_)
			
 
				 }
			
 
				 
			
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -6,13 +6,13 @@ import "core:mem"
 
				 fe_relax_cast :: #force_inline proc "contextless" (
			
 
				 	arg1: ^Tight_Field_Element,
			
 
				 ) -> ^Loose_Field_Element {
			
 
				-	return transmute(^Loose_Field_Element)(arg1)
			
 
				+	return (^Loose_Field_Element)(arg1)
			
 
				 }
			
 
				 
			
 
				 fe_tighten_cast :: #force_inline proc "contextless" (
			
 
				 	arg1: ^Loose_Field_Element,
			
 
				 ) -> ^Tight_Field_Element {
			
 
				-	return transmute(^Tight_Field_Element)(arg1)
			
 
				+	return (^Tight_Field_Element)(arg1)
			
 
				 }
			
 
				 
			
 
				 fe_clear :: proc "contextless" (
			
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -7,13 +7,13 @@ import "core:mem"
 
				 fe_relax_cast :: #force_inline proc "contextless" (
			
 
				 	arg1: ^Tight_Field_Element,
			
 
				 ) -> ^Loose_Field_Element {
			
 
				-	return transmute(^Loose_Field_Element)(arg1)
			
 
				+	return (^Loose_Field_Element)(arg1)
			
 
				 }
			
 
				 
			
 
				 fe_tighten_cast :: #force_inline proc "contextless" (
			
 
				 	arg1: ^Loose_Field_Element,
			
 
				 ) -> ^Tight_Field_Element {
			
 
				-	return transmute(^Tight_Field_Element)(arg1)
			
 
				+	return (^Tight_Field_Element)(arg1)
			
 
				 }
			
 
				 
			
 
				 fe_from_bytes :: #force_inline proc "contextless" (
			
--- a/core/crypto/aes/aes.odin
+++ b/core/crypto/aes/aes.odin
@@ -0,0 +1,21 @@
 
				+/*
			
 
				+package aes implements the AES block cipher and some common modes.
			
 
				+
			
 
				+See:
			
 
				+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
			
 
				+- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
			
 
				+- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
			
 
				+*/
			
 
				+package aes
			
 
				+
			
 
				+import "core:crypto/_aes"
			
 
				+
			
 
				+// KEY_SIZE_128 is the AES-128 key size in bytes.
			
 
				+KEY_SIZE_128 :: _aes.KEY_SIZE_128
			
 
				+// KEY_SIZE_192 is the AES-192 key size in bytes.
			
 
				+KEY_SIZE_192 :: _aes.KEY_SIZE_192
			
 
				+// KEY_SIZE_256 is the AES-256 key size in bytes.
			
 
				+KEY_SIZE_256 :: _aes.KEY_SIZE_256
			
 
				+
			
 
				+// BLOCK_SIZE is the AES block size in bytes.
			
 
				+BLOCK_SIZE :: _aes.BLOCK_SIZE
			
--- a/core/crypto/aes/aes_ctr.odin
+++ b/core/crypto/aes/aes_ctr.odin
@@ -0,0 +1,201 @@
 
				+package aes
			
 
				+
			
 
				+import "core:bytes"
			
 
				+import "core:crypto/_aes/ct64"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:math/bits"
			
 
				+import "core:mem"
			
 
				+
			
 
				+// CTR_IV_SIZE is the size of the CTR mode IV in bytes.
			
 
				+CTR_IV_SIZE :: 16
			
 
				+
			
 
				+// Context_CTR is a keyed AES-CTR instance.
			
 
				+Context_CTR :: struct {
			
 
				+	_impl:           Context_Impl,
			
 
				+	_buffer:         [BLOCK_SIZE]byte,
			
 
				+	_off:            int,
			
 
				+	_ctr_hi:         u64,
			
 
				+	_ctr_lo:         u64,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+// init_ctr initializes a Context_CTR with the provided key and IV.
			
 
				+init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
			
 
				+	if len(iv) != CTR_IV_SIZE {
			
 
				+		panic("crypto/aes: invalid CTR IV size")
			
 
				+	}
			
 
				+
			
 
				+	init_impl(&ctx._impl, key, impl)
			
 
				+	ctx._off = BLOCK_SIZE
			
 
				+	ctx._ctr_hi = endian.unchecked_get_u64be(iv[0:])
			
 
				+	ctx._ctr_lo = endian.unchecked_get_u64be(iv[8:])
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+// xor_bytes_ctr XORs each byte in src with bytes taken from the AES-CTR
			
 
				+// keystream, and writes the resulting output to dst.  dst and src MUST
			
 
				+// alias exactly or not at all.
			
 
				+xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	src, dst := src, dst
			
 
				+	if dst_len := len(dst); dst_len < len(src) {
			
 
				+		src = src[:dst_len]
			
 
				+	}
			
 
				+
			
 
				+	if bytes.alias_inexactly(dst, src) {
			
 
				+		panic("crypto/aes: dst and src alias inexactly")
			
 
				+	}
			
 
				+
			
 
				+	for remaining := len(src); remaining > 0; {
			
 
				+		// Process multiple blocks at once
			
 
				+		if ctx._off == BLOCK_SIZE {
			
 
				+			if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
			
 
				+				direct_bytes := nr_blocks * BLOCK_SIZE
			
 
				+				ctr_blocks(ctx, dst, src, nr_blocks)
			
 
				+				remaining -= direct_bytes
			
 
				+				if remaining == 0 {
			
 
				+					return
			
 
				+				}
			
 
				+				dst = dst[direct_bytes:]
			
 
				+				src = src[direct_bytes:]
			
 
				+			}
			
 
				+
			
 
				+			// If there is a partial block, generate and buffer 1 block
			
 
				+			// worth of keystream.
			
 
				+			ctr_blocks(ctx, ctx._buffer[:], nil, 1)
			
 
				+			ctx._off = 0
			
 
				+		}
			
 
				+
			
 
				+		// Process partial blocks from the buffered keystream.
			
 
				+		to_xor := min(BLOCK_SIZE - ctx._off, remaining)
			
 
				+		buffered_keystream := ctx._buffer[ctx._off:]
			
 
				+		for i := 0; i < to_xor; i = i + 1 {
			
 
				+			dst[i] = buffered_keystream[i] ~ src[i]
			
 
				+		}
			
 
				+		ctx._off += to_xor
			
 
				+		dst = dst[to_xor:]
			
 
				+		src = src[to_xor:]
			
 
				+		remaining -= to_xor
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
			
 
				+keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	dst := dst
			
 
				+	for remaining := len(dst); remaining > 0; {
			
 
				+		// Process multiple blocks at once
			
 
				+		if ctx._off == BLOCK_SIZE {
			
 
				+			if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
			
 
				+				direct_bytes := nr_blocks * BLOCK_SIZE
			
 
				+				ctr_blocks(ctx, dst, nil, nr_blocks)
			
 
				+				remaining -= direct_bytes
			
 
				+				if remaining == 0 {
			
 
				+					return
			
 
				+				}
			
 
				+				dst = dst[direct_bytes:]
			
 
				+			}
			
 
				+
			
 
				+			// If there is a partial block, generate and buffer 1 block
			
 
				+			// worth of keystream.
			
 
				+			ctr_blocks(ctx, ctx._buffer[:], nil, 1)
			
 
				+			ctx._off = 0
			
 
				+		}
			
 
				+
			
 
				+		// Process partial blocks from the buffered keystream.
			
 
				+		to_copy := min(BLOCK_SIZE - ctx._off, remaining)
			
 
				+		buffered_keystream := ctx._buffer[ctx._off:]
			
 
				+		copy(dst[:to_copy], buffered_keystream[:to_copy])
			
 
				+		ctx._off += to_copy
			
 
				+		dst = dst[to_copy:]
			
 
				+		remaining -= to_copy
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// reset_ctr sanitizes the Context_CTR.  The Context_CTR must be
			
 
				+// re-initialized to be used again.
			
 
				+reset_ctr :: proc "contextless" (ctx: ^Context_CTR) {
			
 
				+	reset_impl(&ctx._impl)
			
 
				+	ctx._off = 0
			
 
				+	ctx._ctr_hi = 0
			
 
				+	ctx._ctr_lo = 0
			
 
				+	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
			
 
				+	// Use the optimized hardware implementation if available.
			
 
				+	if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
			
 
				+		ctr_blocks_hw(ctx, dst, src, nr_blocks)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// Portable implementation.
			
 
				+	ct64_inc_ctr := #force_inline proc "contextless" (dst: []byte, hi, lo: u64) -> (u64, u64) {
			
 
				+		endian.unchecked_put_u64be(dst[0:], hi)
			
 
				+		endian.unchecked_put_u64be(dst[8:], lo)
			
 
				+
			
 
				+		hi, lo := hi, lo
			
 
				+		carry: u64
			
 
				+		lo, carry = bits.add_u64(lo, 1, 0)
			
 
				+		hi, _ = bits.add_u64(hi, 0, carry)
			
 
				+		return hi, lo
			
 
				+	}
			
 
				+
			
 
				+	impl := &ctx._impl.(ct64.Context)
			
 
				+	src, dst := src, dst
			
 
				+	nr_blocks := nr_blocks
			
 
				+	ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
			
 
				+
			
 
				+	tmp: [ct64.STRIDE][BLOCK_SIZE]byte = ---
			
 
				+	ctrs: [ct64.STRIDE][]byte = ---
			
 
				+	for i in 0 ..< ct64.STRIDE {
			
 
				+		ctrs[i] = tmp[i][:]
			
 
				+	}
			
 
				+	for nr_blocks > 0 {
			
 
				+		n := min(ct64.STRIDE, nr_blocks)
			
 
				+		blocks := ctrs[:n]
			
 
				+
			
 
				+		for i in 0 ..< n {
			
 
				+			ctr_hi, ctr_lo = ct64_inc_ctr(blocks[i], ctr_hi, ctr_lo)
			
 
				+		}
			
 
				+		ct64.encrypt_blocks(impl, blocks, blocks)
			
 
				+
			
 
				+		xor_blocks(dst, src, blocks)
			
 
				+
			
 
				+		if src != nil {
			
 
				+			src = src[n * BLOCK_SIZE:]
			
 
				+		}
			
 
				+		dst = dst[n * BLOCK_SIZE:]
			
 
				+		nr_blocks -= n
			
 
				+	}
			
 
				+
			
 
				+	// Write back the counter.
			
 
				+	ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
			
 
				+
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]byte) {
			
 
				+	// Note: This would be faster `core:simd` was used, however if
			
 
				+	// performance of this implementation matters to where that
			
 
				+	// optimization would be worth it, use chacha20poly1305, or a
			
 
				+	// CPU that isn't e-waste.
			
 
				+	#no_bounds_check {
			
 
				+		if src != nil {
			
 
				+				for i in 0 ..< len(blocks) {
			
 
				+					off := i * BLOCK_SIZE
			
 
				+					for j in 0 ..< BLOCK_SIZE {
			
 
				+						blocks[i][j] ~= src[off + j]
			
 
				+					}
			
 
				+				}
			
 
				+		}
			
 
				+		for i in 0 ..< len(blocks) {
			
 
				+			copy(dst[i * BLOCK_SIZE:], blocks[i])
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/aes/aes_ctr_hw_intel.odin
+++ b/core/crypto/aes/aes_ctr_hw_intel.odin
@@ -0,0 +1,151 @@
 
				+//+build amd64
			
 
				+package aes
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:math/bits"
			
 
				+import "core:mem"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+@(private)
			
 
				+CTR_STRIDE_HW :: 4
			
 
				+@(private)
			
 
				+CTR_STRIDE_BYTES_HW :: CTR_STRIDE_HW * BLOCK_SIZE
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
			
 
				+	hw_ctx := ctx._impl.(Context_Impl_Hardware)
			
 
				+
			
 
				+	sks: [15]x86.__m128i = ---
			
 
				+	for i in 0 ..= hw_ctx._num_rounds {
			
 
				+		sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&hw_ctx._sk_exp_enc[i]))
			
 
				+	}
			
 
				+
			
 
				+	hw_inc_ctr := #force_inline proc "contextless" (hi, lo: u64) -> (x86.__m128i, u64, u64) {
			
 
				+		ret := x86.__m128i{
			
 
				+			i64(intrinsics.byte_swap(hi)),
			
 
				+			i64(intrinsics.byte_swap(lo)),
			
 
				+		}
			
 
				+
			
 
				+		hi, lo := hi, lo
			
 
				+		carry: u64
			
 
				+
			
 
				+		lo, carry = bits.add_u64(lo, 1, 0)
			
 
				+		hi, _ = bits.add_u64(hi, 0, carry)
			
 
				+		return ret, hi, lo
			
 
				+	}
			
 
				+
			
 
				+	// The latency of AESENC depends on mfg and microarchitecture:
			
 
				+	// - 7 -> up to Broadwell
			
 
				+	// - 4 -> AMD and Skylake - Cascade Lake
			
 
				+	// - 3 -> Ice Lake and newer
			
 
				+	//
			
 
				+	// This implementation does 4 blocks at once, since performance
			
 
				+	// should be "adequate" across most CPUs.
			
 
				+
			
 
				+	src, dst := src, dst
			
 
				+	nr_blocks := nr_blocks
			
 
				+	ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
			
 
				+
			
 
				+	blks: [CTR_STRIDE_HW]x86.__m128i = ---
			
 
				+	for nr_blocks >= CTR_STRIDE_HW {
			
 
				+		#unroll for i in 0..< CTR_STRIDE_HW {
			
 
				+			blks[i], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
			
 
				+		}
			
 
				+
			
 
				+		#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+			blks[i] = x86._mm_xor_si128(blks[i], sks[0])
			
 
				+		}
			
 
				+		#unroll for i in 1 ..= 9 {
			
 
				+			#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+			}
			
 
				+		}
			
 
				+		switch hw_ctx._num_rounds {
			
 
				+		case _aes.ROUNDS_128:
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
			
 
				+			}
			
 
				+		case _aes.ROUNDS_192:
			
 
				+			#unroll for i in 10 ..= 11 {
			
 
				+				#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+					blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+				}
			
 
				+			}
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
			
 
				+			}
			
 
				+		case _aes.ROUNDS_256:
			
 
				+			#unroll for i in 10 ..= 13 {
			
 
				+				#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+					blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+				}
			
 
				+			}
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		xor_blocks_hw(dst, src, blks[:])
			
 
				+
			
 
				+		if src != nil {
			
 
				+			src = src[CTR_STRIDE_BYTES_HW:]
			
 
				+		}
			
 
				+		dst = dst[CTR_STRIDE_BYTES_HW:]
			
 
				+		nr_blocks -= CTR_STRIDE_HW
			
 
				+	}
			
 
				+
			
 
				+	// Handle the remainder.
			
 
				+	for nr_blocks > 0 {
			
 
				+		blks[0], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
			
 
				+
			
 
				+		blks[0] = x86._mm_xor_si128(blks[0], sks[0])
			
 
				+		#unroll for i in 1 ..= 9 {
			
 
				+			blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+		}
			
 
				+		switch hw_ctx._num_rounds {
			
 
				+		case _aes.ROUNDS_128:
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
			
 
				+		case _aes.ROUNDS_192:
			
 
				+			#unroll for i in 10 ..= 11 {
			
 
				+				blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+			}
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
			
 
				+		case _aes.ROUNDS_256:
			
 
				+			#unroll for i in 10 ..= 13 {
			
 
				+				blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+			}
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
			
 
				+		}
			
 
				+
			
 
				+		xor_blocks_hw(dst, src, blks[:1])
			
 
				+
			
 
				+		if src != nil {
			
 
				+			src = src[BLOCK_SIZE:]
			
 
				+		}
			
 
				+		dst = dst[BLOCK_SIZE:]
			
 
				+		nr_blocks -= 1
			
 
				+	}
			
 
				+
			
 
				+	// Write back the counter.
			
 
				+	ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
			
 
				+
			
 
				+	mem.zero_explicit(&blks, size_of(blks))
			
 
				+	mem.zero_explicit(&sks, size_of(sks))
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2")
			
 
				+xor_blocks_hw :: proc(dst, src: []byte, blocks: []x86.__m128i) {
			
 
				+	#no_bounds_check {
			
 
				+		if src != nil {
			
 
				+				for i in 0 ..< len(blocks) {
			
 
				+					off := i * BLOCK_SIZE
			
 
				+					tmp := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[off:])))
			
 
				+					blocks[i] = x86._mm_xor_si128(blocks[i], tmp)
			
 
				+				}
			
 
				+		}
			
 
				+		for i in 0 ..< len(blocks) {
			
 
				+			intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst[i * BLOCK_SIZE:])), blocks[i])
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/core/crypto/aes/aes_ecb.odin
+++ b/core/crypto/aes/aes_ecb.odin
@@ -0,0 +1,57 @@
 
				+package aes
			
 
				+
			
 
				+import "core:crypto/_aes/ct64"
			
 
				+
			
 
				+// Context_ECB is a keyed AES-ECB instance.
			
 
				+//
			
 
				+// WARNING: Using ECB mode is strongly discouraged unless it is being
			
 
				+// used to implement higher level constructs.
			
 
				+Context_ECB :: struct {
			
 
				+	_impl:           Context_Impl,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+// init_ecb initializes a Context_ECB with the provided key.
			
 
				+init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
			
 
				+	init_impl(&ctx._impl, key, impl)
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
			
 
				+encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
			
 
				+		panic("crypto/aes: invalid buffer size(s)")
			
 
				+	}
			
 
				+
			
 
				+	switch &impl in ctx._impl {
			
 
				+	case ct64.Context:
			
 
				+		ct64.encrypt_block(&impl, dst, src)
			
 
				+	case Context_Impl_Hardware:
			
 
				+		encrypt_block_hw(&impl, dst, src)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
			
 
				+decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
			
 
				+		panic("crypto/aes: invalid buffer size(s)")
			
 
				+	}
			
 
				+
			
 
				+	switch &impl in ctx._impl {
			
 
				+	case ct64.Context:
			
 
				+		ct64.decrypt_block(&impl, dst, src)
			
 
				+	case Context_Impl_Hardware:
			
 
				+		decrypt_block_hw(&impl, dst, src)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// reset_ecb sanitizes the Context_ECB.  The Context_ECB must be
			
 
				+// re-initialized to be used again.
			
 
				+reset_ecb :: proc "contextless" (ctx: ^Context_ECB) {
			
 
				+	reset_impl(&ctx._impl)
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
--- a/core/crypto/aes/aes_ecb_hw_intel.odin
+++ b/core/crypto/aes/aes_ecb_hw_intel.odin
@@ -0,0 +1,58 @@
 
				+//+build amd64
			
 
				+package aes
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
			
 
				+	blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
			
 
				+
			
 
				+	blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[0])))
			
 
				+	#unroll for i in 1 ..= 9 {
			
 
				+		blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
			
 
				+	}
			
 
				+	switch ctx._num_rounds {
			
 
				+	case _aes.ROUNDS_128:
			
 
				+		blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[10])))
			
 
				+	case _aes.ROUNDS_192:
			
 
				+		#unroll for i in 10 ..= 11 {
			
 
				+			blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
			
 
				+		}
			
 
				+		blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[12])))
			
 
				+	case _aes.ROUNDS_256:
			
 
				+		#unroll for i in 10 ..= 13 {
			
 
				+			blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
			
 
				+		}
			
 
				+		blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[14])))
			
 
				+	}
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
			
 
				+}
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
			
 
				+	blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
			
 
				+
			
 
				+	blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[0])))
			
 
				+	#unroll for i in 1 ..= 9 {
			
 
				+		blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
			
 
				+	}
			
 
				+	switch ctx._num_rounds {
			
 
				+	case _aes.ROUNDS_128:
			
 
				+		blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[10])))
			
 
				+	case _aes.ROUNDS_192:
			
 
				+		#unroll for i in 10 ..= 11 {
			
 
				+			blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
			
 
				+		}
			
 
				+		blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[12])))
			
 
				+	case _aes.ROUNDS_256:
			
 
				+		#unroll for i in 10 ..= 13 {
			
 
				+			blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
			
 
				+		}
			
 
				+		blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[14])))
			
 
				+	}
			
 
				+
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
			
 
				+}
			
--- a/core/crypto/aes/aes_gcm.odin
+++ b/core/crypto/aes/aes_gcm.odin
@@ -0,0 +1,269 @@
 
				+package aes
			
 
				+
			
 
				+import "core:bytes"
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:crypto/_aes/ct64"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+
			
 
				+// GCM_NONCE_SIZE is the default size of the GCM nonce in bytes.
			
 
				+GCM_NONCE_SIZE :: 12
			
 
				+// GCM_NONCE_SIZE_MAX is the maximum size of the GCM nonce in bytes.
			
 
				+GCM_NONCE_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
			
 
				+// GCM_TAG_SIZE is the size of a GCM tag in bytes.
			
 
				+GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
			
 
				+
			
 
				+@(private)
			
 
				+GCM_A_MAX :: max(u64) / 8 // 2^64 - 1 bits -> bytes
			
 
				+@(private)
			
 
				+GCM_P_MAX :: 0xfffffffe0 // 2^39 - 256 bits -> bytes
			
 
				+
			
 
				+// Context_GCM is a keyed AES-GCM instance.
			
 
				+Context_GCM :: struct {
			
 
				+	_impl:           Context_Impl,
			
 
				+	_is_initialized: bool,
			
 
				+}
			
 
				+
			
 
				+// init_gcm initializes a Context_GCM with the provided key.
			
 
				+init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
			
 
				+	init_impl(&ctx._impl, key, impl)
			
 
				+	ctx._is_initialized = true
			
 
				+}
			
 
				+
			
 
				+// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
			
 
				+// with the provided Context_GCM and nonce, stores the output in dst and tag.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
			
 
				+	if len(dst) != len(plaintext) {
			
 
				+		panic("crypto/aes: invalid destination ciphertext size")
			
 
				+	}
			
 
				+	if bytes.alias_inexactly(dst, plaintext) {
			
 
				+		panic("crypto/aes: dst and plaintext alias inexactly")
			
 
				+	}
			
 
				+
			
 
				+	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
			
 
				+		gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	h: [_aes.GHASH_KEY_SIZE]byte
			
 
				+	j0: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	s: [_aes.GHASH_TAG_SIZE]byte
			
 
				+	init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
			
 
				+
			
 
				+	// Note: Our GHASH implementation handles appending padding.
			
 
				+	ct64.ghash(s[:], h[:], aad)
			
 
				+	gctr_ct64(ctx, dst, &s, plaintext, &h, &j0, true)
			
 
				+	final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(plaintext))
			
 
				+	copy(tag, s[:])
			
 
				+
			
 
				+	mem.zero_explicit(&h, len(h))
			
 
				+	mem.zero_explicit(&j0, len(j0))
			
 
				+	mem.zero_explicit(&j0_enc, len(j0_enc))
			
 
				+}
			
 
				+
			
 
				+// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
			
 
				+// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
			
 
				+// returning true iff the authentication was successful.  If authentication
			
 
				+// fails, the destination buffer will be zeroed.
			
 
				+//
			
 
				+// dst and plaintext MUST alias exactly or not at all.
			
 
				+open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	assert(ctx._is_initialized)
			
 
				+
			
 
				+	gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
			
 
				+	if len(dst) != len(ciphertext) {
			
 
				+		panic("crypto/aes: invalid destination plaintext size")
			
 
				+	}
			
 
				+	if bytes.alias_inexactly(dst, ciphertext) {
			
 
				+		panic("crypto/aes: dst and ciphertext alias inexactly")
			
 
				+	}
			
 
				+
			
 
				+	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
			
 
				+		return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
			
 
				+	}
			
 
				+
			
 
				+	h: [_aes.GHASH_KEY_SIZE]byte
			
 
				+	j0: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	s: [_aes.GHASH_TAG_SIZE]byte
			
 
				+	init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
			
 
				+
			
 
				+	ct64.ghash(s[:], h[:], aad)
			
 
				+	gctr_ct64(ctx, dst, &s, ciphertext, &h, &j0, false)
			
 
				+	final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(ciphertext))
			
 
				+
			
 
				+	ok := crypto.compare_constant_time(s[:], tag) == 1
			
 
				+	if !ok {
			
 
				+		mem.zero_explicit(raw_data(dst), len(dst))
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&h, len(h))
			
 
				+	mem.zero_explicit(&j0, len(j0))
			
 
				+	mem.zero_explicit(&j0_enc, len(j0_enc))
			
 
				+	mem.zero_explicit(&s, len(s))
			
 
				+
			
 
				+	return ok
			
 
				+}
			
 
				+
			
 
				+// reset_ctr sanitizes the Context_GCM.  The Context_GCM must be
			
 
				+// re-initialized to be used again.
			
 
				+reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
			
 
				+	reset_impl(&ctx._impl)
			
 
				+	ctx._is_initialized = false
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
			
 
				+	if len(tag) != GCM_TAG_SIZE {
			
 
				+		panic("crypto/aes: invalid GCM tag size")
			
 
				+	}
			
 
				+
			
 
				+	// The specification supports nonces in the range [1, 2^64) bits.
			
 
				+	if l := len(nonce); l == 0 || u64(l) >= GCM_NONCE_SIZE_MAX {
			
 
				+		panic("crypto/aes: invalid GCM nonce size")
			
 
				+	}
			
 
				+
			
 
				+	if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
			
 
				+		panic("crypto/aes: oversized GCM aad")
			
 
				+	}
			
 
				+	if text_len := u64(len(text)); text_len > GCM_P_MAX {
			
 
				+		panic("crypto/aes: oversized GCM src data")
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+init_ghash_ct64 :: proc(
			
 
				+	ctx: ^Context_GCM,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	nonce: []byte,
			
 
				+) {
			
 
				+	impl := &ctx._impl.(ct64.Context)
			
 
				+
			
 
				+	// 1. Let H = CIPH(k, 0^128)
			
 
				+	ct64.encrypt_block(impl, h[:], h[:])
			
 
				+
			
 
				+	// Define a block, J0, as follows:
			
 
				+	if l := len(nonce); l == GCM_NONCE_SIZE {
			
 
				+		// if len(IV) = 96, then let J0 = IV || 0^31 || 1
			
 
				+		copy(j0[:], nonce)
			
 
				+		j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
			
 
				+	} else {
			
 
				+		// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
			
 
				+		// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
			
 
				+		ct64.ghash(j0[:], h[:], nonce)
			
 
				+
			
 
				+		tmp: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+		endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
			
 
				+		ct64.ghash(j0[:], h[:], tmp[:])
			
 
				+	}
			
 
				+
			
 
				+	// ECB encrypt j0, so that we can just XOR with the tag.  In theory
			
 
				+	// this could be processed along with the final GCTR block, to
			
 
				+	// potentially save a call to AES-ECB, but... just use AES-NI.
			
 
				+	ct64.encrypt_block(impl, j0_enc[:], j0[:])
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+final_ghash_ct64 :: proc(
			
 
				+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	a_len: int,
			
 
				+	t_len: int,
			
 
				+) {
			
 
				+	blk: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
			
 
				+	endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
			
 
				+
			
 
				+	ct64.ghash(s[:], h[:], blk[:])
			
 
				+	for i in 0 ..< len(s) {
			
 
				+		s[i] ~= j0[i]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+gctr_ct64 :: proc(
			
 
				+	ctx: ^Context_GCM,
			
 
				+	dst: []byte,
			
 
				+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	src: []byte,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	is_seal: bool,
			
 
				+) #no_bounds_check {
			
 
				+	ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
			
 
				+		endian.unchecked_put_u32be(dst[12:], ctr)
			
 
				+		return ctr + 1
			
 
				+	}
			
 
				+
			
 
				+	// Setup the counter blocks.
			
 
				+	tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
			
 
				+	ctrs, blks: [ct64.STRIDE][]byte = ---, ---
			
 
				+	ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
			
 
				+	for i in 0 ..< ct64.STRIDE {
			
 
				+		// Setup scratch space for the keystream.
			
 
				+		blks[i] = tmp2[i][:]
			
 
				+
			
 
				+		// Pre-copy the IV to all the counter blocks.
			
 
				+		ctrs[i] = tmp[i][:]
			
 
				+		copy(ctrs[i], nonce[:GCM_NONCE_SIZE])
			
 
				+	}
			
 
				+
			
 
				+	impl := &ctx._impl.(ct64.Context)
			
 
				+	src, dst := src, dst
			
 
				+
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks > 0 {
			
 
				+		n := min(ct64.STRIDE, nr_blocks)
			
 
				+		l := n * BLOCK_SIZE
			
 
				+
			
 
				+		if !is_seal {
			
 
				+			ct64.ghash(s[:], h[:], src[:l])
			
 
				+		}
			
 
				+
			
 
				+		// The keystream is written to a separate buffer, as we will
			
 
				+		// reuse the first 96-bits of each counter.
			
 
				+		for i in 0 ..< n {
			
 
				+			ctr = ct64_inc_ctr32(ctrs[i], ctr)
			
 
				+		}
			
 
				+		ct64.encrypt_blocks(impl, blks[:n], ctrs[:n])
			
 
				+
			
 
				+		xor_blocks(dst, src, blks[:n])
			
 
				+
			
 
				+		if is_seal {
			
 
				+			ct64.ghash(s[:], h[:], dst[:l])
			
 
				+		}
			
 
				+
			
 
				+		src = src[l:]
			
 
				+		dst = dst[l:]
			
 
				+		nr_blocks -= n
			
 
				+	}
			
 
				+	if l := len(src); l > 0 {
			
 
				+		if !is_seal {
			
 
				+			ct64.ghash(s[:], h[:], src[:l])
			
 
				+		}
			
 
				+
			
 
				+		ct64_inc_ctr32(ctrs[0], ctr)
			
 
				+		ct64.encrypt_block(impl, ctrs[0], ctrs[0])
			
 
				+
			
 
				+		for i in 0 ..< l {
			
 
				+			dst[i] = src[i] ~ ctrs[0][i]
			
 
				+		}
			
 
				+
			
 
				+		if is_seal {
			
 
				+			ct64.ghash(s[:], h[:], dst[:l])
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&tmp, size_of(tmp))
			
 
				+	mem.zero_explicit(&tmp2, size_of(tmp2))
			
 
				+}
			
--- a/core/crypto/aes/aes_gcm_hw_intel.odin
+++ b/core/crypto/aes/aes_gcm_hw_intel.odin
@@ -0,0 +1,243 @@
 
				+//+build amd64
			
 
				+package aes
			
 
				+
			
 
				+import "base:intrinsics"
			
 
				+import "core:crypto"
			
 
				+import "core:crypto/_aes"
			
 
				+import "core:crypto/_aes/hw_intel"
			
 
				+import "core:encoding/endian"
			
 
				+import "core:mem"
			
 
				+import "core:simd/x86"
			
 
				+
			
 
				+@(private)
			
 
				+gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
			
 
				+	h: [_aes.GHASH_KEY_SIZE]byte
			
 
				+	j0: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	s: [_aes.GHASH_TAG_SIZE]byte
			
 
				+	init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
			
 
				+
			
 
				+	// Note: Our GHASH implementation handles appending padding.
			
 
				+	hw_intel.ghash(s[:], h[:], aad)
			
 
				+	gctr_hw(ctx, dst, &s, plaintext, &h, &j0, true)
			
 
				+	final_ghash_hw(&s, &h, &j0_enc, len(aad), len(plaintext))
			
 
				+	copy(tag, s[:])
			
 
				+
			
 
				+	mem.zero_explicit(&h, len(h))
			
 
				+	mem.zero_explicit(&j0, len(j0))
			
 
				+	mem.zero_explicit(&j0_enc, len(j0_enc))
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	h: [_aes.GHASH_KEY_SIZE]byte
			
 
				+	j0: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	s: [_aes.GHASH_TAG_SIZE]byte
			
 
				+	init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
			
 
				+
			
 
				+	hw_intel.ghash(s[:], h[:], aad)
			
 
				+	gctr_hw(ctx, dst, &s, ciphertext, &h, &j0, false)
			
 
				+	final_ghash_hw(&s, &h, &j0_enc, len(aad), len(ciphertext))
			
 
				+
			
 
				+	ok := crypto.compare_constant_time(s[:], tag) == 1
			
 
				+	if !ok {
			
 
				+		mem.zero_explicit(raw_data(dst), len(dst))
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&h, len(h))
			
 
				+	mem.zero_explicit(&j0, len(j0))
			
 
				+	mem.zero_explicit(&j0_enc, len(j0_enc))
			
 
				+	mem.zero_explicit(&s, len(s))
			
 
				+
			
 
				+	return ok
			
 
				+}
			
 
				+
			
 
				+@(private = "file")
			
 
				+init_ghash_hw :: proc(
			
 
				+	ctx: ^Context_Impl_Hardware,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	nonce: []byte,
			
 
				+) {
			
 
				+	// 1. Let H = CIPH(k, 0^128)
			
 
				+	encrypt_block_hw(ctx, h[:], h[:])
			
 
				+
			
 
				+	// Define a block, J0, as follows:
			
 
				+	if l := len(nonce); l == GCM_NONCE_SIZE {
			
 
				+		// if len(IV) = 96, then let J0 = IV || 0^31 || 1
			
 
				+		copy(j0[:], nonce)
			
 
				+		j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
			
 
				+	} else {
			
 
				+		// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
			
 
				+		// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
			
 
				+		hw_intel.ghash(j0[:], h[:], nonce)
			
 
				+
			
 
				+		tmp: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+		endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
			
 
				+		hw_intel.ghash(j0[:], h[:], tmp[:])
			
 
				+	}
			
 
				+
			
 
				+	// ECB encrypt j0, so that we can just XOR with the tag.
			
 
				+	encrypt_block_hw(ctx, j0_enc[:], j0[:])
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2")
			
 
				+final_ghash_hw :: proc(
			
 
				+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	a_len: int,
			
 
				+	t_len: int,
			
 
				+) {
			
 
				+	blk: [_aes.GHASH_BLOCK_SIZE]byte
			
 
				+	endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
			
 
				+	endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
			
 
				+
			
 
				+	hw_intel.ghash(s[:], h[:], blk[:])
			
 
				+	j0_vec := intrinsics.unaligned_load((^x86.__m128i)(j0))
			
 
				+	s_vec := intrinsics.unaligned_load((^x86.__m128i)(s))
			
 
				+	s_vec = x86._mm_xor_si128(s_vec, j0_vec)
			
 
				+	intrinsics.unaligned_store((^x86.__m128i)(s), s_vec)
			
 
				+}
			
 
				+
			
 
				+@(private = "file", enable_target_feature = "sse2,sse4.1,aes")
			
 
				+gctr_hw :: proc(
			
 
				+	ctx: ^Context_Impl_Hardware,
			
 
				+	dst: []byte,
			
 
				+	s: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	src: []byte,
			
 
				+	h: ^[_aes.GHASH_KEY_SIZE]byte,
			
 
				+	nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
			
 
				+	is_seal: bool,
			
 
				+) #no_bounds_check {
			
 
				+	sks: [15]x86.__m128i = ---
			
 
				+	for i in 0 ..= ctx._num_rounds {
			
 
				+		sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i]))
			
 
				+	}
			
 
				+
			
 
				+	// Setup the counter block
			
 
				+	ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(nonce))
			
 
				+	ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
			
 
				+
			
 
				+	src, dst := src, dst
			
 
				+
			
 
				+	// Note: Instead of doing GHASH and CTR separately, it is more
			
 
				+	// performant to interleave (stitch) the two operations together.
			
 
				+	// This results in an unreadable mess, so we opt for simplicity
			
 
				+	// as performance is adequate.
			
 
				+
			
 
				+	blks: [CTR_STRIDE_HW]x86.__m128i = ---
			
 
				+	nr_blocks := len(src) / BLOCK_SIZE
			
 
				+	for nr_blocks >= CTR_STRIDE_HW {
			
 
				+		if !is_seal {
			
 
				+			hw_intel.ghash(s[:], h[:], src[:CTR_STRIDE_BYTES_HW])
			
 
				+		}
			
 
				+
			
 
				+		#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+			blks[i], ctr = hw_inc_ctr32(&ctr_blk, ctr)
			
 
				+		}
			
 
				+
			
 
				+		#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+			blks[i] = x86._mm_xor_si128(blks[i], sks[0])
			
 
				+		}
			
 
				+		#unroll for i in 1 ..= 9 {
			
 
				+			#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+			}
			
 
				+		}
			
 
				+		switch ctx._num_rounds {
			
 
				+		case _aes.ROUNDS_128:
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
			
 
				+			}
			
 
				+		case _aes.ROUNDS_192:
			
 
				+			#unroll for i in 10 ..= 11 {
			
 
				+				#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+					blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+				}
			
 
				+			}
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
			
 
				+			}
			
 
				+		case _aes.ROUNDS_256:
			
 
				+			#unroll for i in 10 ..= 13 {
			
 
				+				#unroll for j in 0 ..< CTR_STRIDE_HW {
			
 
				+					blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
			
 
				+				}
			
 
				+			}
			
 
				+			#unroll for i in 0 ..< CTR_STRIDE_HW {
			
 
				+				blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		xor_blocks_hw(dst, src, blks[:])
			
 
				+
			
 
				+		if is_seal {
			
 
				+			hw_intel.ghash(s[:], h[:], dst[:CTR_STRIDE_BYTES_HW])
			
 
				+		}
			
 
				+
			
 
				+		src = src[CTR_STRIDE_BYTES_HW:]
			
 
				+		dst = dst[CTR_STRIDE_BYTES_HW:]
			
 
				+		nr_blocks -= CTR_STRIDE_HW
			
 
				+	}
			
 
				+
			
 
				+	// Handle the remainder.
			
 
				+	for n := len(src); n > 0; {
			
 
				+		l := min(n, BLOCK_SIZE)
			
 
				+		if !is_seal {
			
 
				+			hw_intel.ghash(s[:], h[:], src[:l])
			
 
				+		}
			
 
				+
			
 
				+		blks[0], ctr = hw_inc_ctr32(&ctr_blk, ctr)
			
 
				+
			
 
				+		blks[0] = x86._mm_xor_si128(blks[0], sks[0])
			
 
				+		#unroll for i in 1 ..= 9 {
			
 
				+			blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+		}
			
 
				+		switch ctx._num_rounds {
			
 
				+		case _aes.ROUNDS_128:
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
			
 
				+		case _aes.ROUNDS_192:
			
 
				+			#unroll for i in 10 ..= 11 {
			
 
				+				blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+			}
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
			
 
				+		case _aes.ROUNDS_256:
			
 
				+			#unroll for i in 10 ..= 13 {
			
 
				+				blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
			
 
				+			}
			
 
				+			blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
			
 
				+		}
			
 
				+
			
 
				+		if l == BLOCK_SIZE {
			
 
				+			xor_blocks_hw(dst, src, blks[:1])
			
 
				+		} else {
			
 
				+			blk: [BLOCK_SIZE]byte
			
 
				+			copy(blk[:], src)
			
 
				+			xor_blocks_hw(blk[:], blk[:], blks[:1])
			
 
				+			copy(dst, blk[:l])
			
 
				+		}
			
 
				+		if is_seal {
			
 
				+			hw_intel.ghash(s[:], h[:], dst[:l])
			
 
				+		}
			
 
				+
			
 
				+		dst = dst[l:]
			
 
				+		src = src[l:]
			
 
				+		n -= l
			
 
				+	}
			
 
				+
			
 
				+	mem.zero_explicit(&blks, size_of(blks))
			
 
				+	mem.zero_explicit(&sks, size_of(sks))
			
 
				+}
			
 
				+
			
 
				+// BUG: Sticking this in gctr_hw (like the other implementations) crashes
			
 
				+// the compiler.
			
 
				+//
			
 
				+// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
			
 
				+@(private = "file", enable_target_feature = "sse4.1")
			
 
				+hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
			
 
				+	ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
			
 
				+	return ret, ctr + 1
			
 
				+}
			
--- a/core/crypto/aes/aes_impl.odin
+++ b/core/crypto/aes/aes_impl.odin
@@ -0,0 +1,41 @@
 
				+package aes
			
 
				+
			
 
				+import "core:crypto/_aes/ct64"
			
 
				+import "core:mem"
			
 
				+import "core:reflect"
			
 
				+
			
 
				+@(private)
			
 
				+Context_Impl :: union {
			
 
				+	ct64.Context,
			
 
				+	Context_Impl_Hardware,
			
 
				+}
			
 
				+
			
 
				+// Implementation is an AES implementation.  Most callers will not need
			
 
				+// to use this as the package will automatically select the most performant
			
 
				+// implementation available (See `is_hardware_accelerated()`).
			
 
				+Implementation :: enum {
			
 
				+	Portable,
			
 
				+	Hardware,
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+init_impl :: proc(ctx: ^Context_Impl, key: []byte, impl: Implementation) {
			
 
				+	impl := impl
			
 
				+	if !is_hardware_accelerated() {
			
 
				+		impl = .Portable
			
 
				+	}
			
 
				+
			
 
				+	switch impl {
			
 
				+	case .Portable:
			
 
				+		reflect.set_union_variant_typeid(ctx^, typeid_of(ct64.Context))
			
 
				+		ct64.init(&ctx.(ct64.Context), key)
			
 
				+	case .Hardware:
			
 
				+		reflect.set_union_variant_typeid(ctx^, typeid_of(Context_Impl_Hardware))
			
 
				+		init_impl_hw(&ctx.(Context_Impl_Hardware), key)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+reset_impl :: proc "contextless" (ctx: ^Context_Impl) {
			
 
				+	mem.zero_explicit(ctx, size_of(Context_Impl))
			
 
				+}
			
--- a/core/crypto/aes/aes_impl_hw_gen.odin
+++ b/core/crypto/aes/aes_impl_hw_gen.odin
@@ -0,0 +1,44 @@
 
				+//+build !amd64
			
 
				+package aes
			
 
				+
			
 
				+@(private = "file")
			
 
				+ERR_HW_NOT_SUPPORTED :: "crypto/aes: hardware implementation unsupported"
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated AES
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+Context_Impl_Hardware :: struct {}
			
 
				+
			
 
				+@(private)
			
 
				+init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
			
 
				+	panic(ERR_HW_NOT_SUPPORTED)
			
 
				+}
			
--- a/core/crypto/aes/aes_impl_hw_intel.odin
+++ b/core/crypto/aes/aes_impl_hw_intel.odin
@@ -0,0 +1,18 @@
 
				+//+build amd64
			
 
				+package aes
			
 
				+
			
 
				+import "core:crypto/_aes/hw_intel"
			
 
				+
			
 
				+// is_hardware_accelerated returns true iff hardware accelerated AES
			
 
				+// is supported.
			
 
				+is_hardware_accelerated :: proc "contextless" () -> bool {
			
 
				+	return hw_intel.is_supported()
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+Context_Impl_Hardware :: hw_intel.Context
			
 
				+
			
 
				+@(private, enable_target_feature = "sse2,aes")
			
 
				+init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
			
 
				+	hw_intel.init(ctx, key)
			
 
				+}
			
--- a/core/crypto/chacha20/chacha20.odin
+++ b/core/crypto/chacha20/chacha20.odin
@@ -7,6 +7,7 @@ See:
 
				 */
			
 
				 package chacha20
			
 
				 
			
 
				+import "core:bytes"
			
 
				 import "core:encoding/endian"
			
 
				 import "core:math/bits"
			
 
				 import "core:mem"
			
@@ -121,14 +122,15 @@ seek :: proc(ctx: ^Context, block_nr: u64) {
 
				 xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
			
 
				 	assert(ctx._is_initialized)
			
 
				 
			
 
				-	// TODO: Enforcing that dst and src alias exactly or not at all
			
 
				-	// is a good idea, though odd aliasing should be extremely uncommon.
			
 
				-
			
 
				 	src, dst := src, dst
			
 
				 	if dst_len := len(dst); dst_len < len(src) {
			
 
				 		src = src[:dst_len]
			
 
				 	}
			
 
				 
			
 
				+	if bytes.alias_inexactly(dst, src) {
			
 
				+		panic("crypto/chacha20: dst and src alias inexactly")
			
 
				+	}
			
 
				+
			
 
				 	for remaining := len(src); remaining > 0; {
			
 
				 		// Process multiple blocks at once
			
 
				 		if ctx._off == _BLOCK_SIZE {
			
--- a/core/crypto/crypto.odin
+++ b/core/crypto/crypto.odin
@@ -4,6 +4,7 @@ helper routines.
 
				 */
			
 
				 package crypto
			
 
				 
			
 
				+import "base:runtime"
			
 
				 import "core:mem"
			
 
				 
			
 
				 // compare_constant_time returns 1 iff a and b are equal, 0 otherwise.
			
@@ -49,6 +50,9 @@ compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> i
 
				 // the system entropy source.  This routine will block if the system entropy
			
 
				 // source is not ready yet.  All system entropy source failures are treated
			
 
				 // as catastrophic, resulting in a panic.
			
 
				+//
			
 
				+// Support for the system entropy source can be checked with the
			
 
				+// `HAS_RAND_BYTES` boolean constant.
			
 
				 rand_bytes :: proc (dst: []byte) {
			
 
				 	// zero-fill the buffer first
			
 
				 	mem.zero_explicit(raw_data(dst), len(dst))
			
@@ -56,8 +60,27 @@ rand_bytes :: proc (dst: []byte) {
 
				 	_rand_bytes(dst)
			
 
				 }
			
 
				 
			
 
				-// has_rand_bytes returns true iff the target has support for accessing the
			
 
				-// system entropty source.
			
 
				-has_rand_bytes :: proc () -> bool {
			
 
				-	return _has_rand_bytes()
			
 
				+// random_generator returns a `runtime.Random_Generator` backed by the
			
 
				+// system entropy source.
			
 
				+//
			
 
				+// Support for the system entropy source can be checked with the
			
 
				+// `HAS_RAND_BYTES` boolean constant.
			
 
				+random_generator :: proc() -> runtime.Random_Generator {
			
 
				+	return {
			
 
				+		procedure = proc(data: rawptr, mode: runtime.Random_Generator_Mode, p: []byte) {
			
 
				+			switch mode {
			
 
				+			case .Read:
			
 
				+				rand_bytes(p)
			
 
				+			case .Reset:
			
 
				+				// do nothing
			
 
				+			case .Query_Info:
			
 
				+				if len(p) != size_of(runtime.Random_Generator_Query_Info) {
			
 
				+					return
			
 
				+				}
			
 
				+				info := (^runtime.Random_Generator_Query_Info)(raw_data(p))
			
 
				+				info^ += {.Uniform, .Cryptographic, .External_Entropy}
			
 
				+			}
			
 
				+		},
			
 
				+		data = nil,
			
 
				+	}
			
 
				 }
			
--- a/core/crypto/kmac/kmac.odin
+++ b/core/crypto/kmac/kmac.odin
@@ -61,7 +61,7 @@ init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				 	assert(ctx.is_initialized)
			
 
				 
			
 
				-	shake.write(transmute(^shake.Context)(ctx), data)
			
 
				+	shake.write((^shake.Context)(ctx), data)
			
 
				 }
			
 
				 
			
 
				 // final finalizes the Context, writes the tag to dst, and calls reset
			
@@ -75,7 +75,7 @@ final :: proc(ctx: ^Context, dst: []byte) {
 
				 		panic("crypto/kmac: invalid KMAC tag_size, too short")
			
 
				 	}
			
 
				 
			
 
				-	_sha3.final_cshake(transmute(^_sha3.Context)(ctx), dst)
			
 
				+	_sha3.final_cshake((^_sha3.Context)(ctx), dst)
			
 
				 }
			
 
				 
			
 
				 // clone clones the Context other into ctx.
			
@@ -84,7 +84,7 @@ clone :: proc(ctx, other: ^Context) {
 
				 		return
			
 
				 	}
			
 
				 
			
 
				-	shake.clone(transmute(^shake.Context)(ctx), transmute(^shake.Context)(other))
			
 
				+	shake.clone((^shake.Context)(ctx), (^shake.Context)(other))
			
 
				 }
			
 
				 
			
 
				 // reset sanitizes the Context.  The Context must be re-initialized to
			
@@ -94,7 +94,7 @@ reset :: proc(ctx: ^Context) {
 
				 		return
			
 
				 	}
			
 
				 
			
 
				-	shake.reset(transmute(^shake.Context)(ctx))
			
 
				+	shake.reset((^shake.Context)(ctx))
			
 
				 }
			
 
				 
			
 
				 @(private)
			
@@ -107,7 +107,7 @@ _init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
 
				 		panic("crypto/kmac: invalid KMAC key, too short")
			
 
				 	}
			
 
				 
			
 
				-	ctx_ := transmute(^_sha3.Context)(ctx)
			
 
				+	ctx_ := (^_sha3.Context)(ctx)
			
 
				 	_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
			
 
				 	_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
			
 
				 }
			
--- a/core/crypto/legacy/keccak/keccak.odin
+++ b/core/crypto/legacy/keccak/keccak.odin
@@ -66,12 +66,12 @@ init_512 :: proc(ctx: ^Context) {
 
				 @(private)
			
 
				 _init :: proc(ctx: ^Context) {
			
 
				 	ctx.dsbyte = _sha3.DS_KECCAK
			
 
				-	_sha3.init(transmute(^_sha3.Context)(ctx))
			
 
				+	_sha3.init((^_sha3.Context)(ctx))
			
 
				 }
			
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	_sha3.update(transmute(^_sha3.Context)(ctx), data)
			
 
				+	_sha3.update((^_sha3.Context)(ctx), data)
			
 
				 }
			
 
				 
			
 
				 // final finalizes the Context, writes the digest to hash, and calls
			
@@ -80,16 +80,16 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	_sha3.final(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
			
 
				+	_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
			
 
				 }
			
 
				 
			
 
				 // clone clones the Context other into ctx.
			
 
				 clone :: proc(ctx, other: ^Context) {
			
 
				-	_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
			
 
				+	_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
			
 
				 }
			
 
				 
			
 
				 // reset sanitizes the Context.  The Context must be re-initialized to
			
 
				 // be used again.
			
 
				 reset :: proc(ctx: ^Context) {
			
 
				-	_sha3.reset(transmute(^_sha3.Context)(ctx))
			
 
				+	_sha3.reset((^_sha3.Context)(ctx))
			
 
				 }
			
--- a/core/crypto/rand_bsd.odin
+++ b/core/crypto/rand_bsd.odin
@@ -1,16 +1,15 @@
 
				-//+build freebsd, openbsd
			
 
				+//+build freebsd, openbsd, netbsd
			
 
				 package crypto
			
 
				 
			
 
				 foreign import libc "system:c"
			
 
				 
			
 
				+HAS_RAND_BYTES :: true
			
 
				+
			
 
				 foreign libc {
			
 
				 	arc4random_buf :: proc(buf: [^]byte, nbytes: uint) ---
			
 
				 }
			
 
				 
			
 
				+@(private)
			
 
				 _rand_bytes :: proc(dst: []byte) {
			
 
				 	arc4random_buf(raw_data(dst), len(dst))
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return true
			
 
				-}
			
--- a/core/crypto/rand_darwin.odin
+++ b/core/crypto/rand_darwin.odin
@@ -5,14 +5,13 @@ import "core:fmt"
 
				 import CF "core:sys/darwin/CoreFoundation"
			
 
				 import Sec "core:sys/darwin/Security"
			
 
				 
			
 
				+HAS_RAND_BYTES :: true
			
 
				+
			
 
				+@(private)
			
 
				 _rand_bytes :: proc(dst: []byte) {
			
 
				 	err := Sec.RandomCopyBytes(count=len(dst), bytes=raw_data(dst))
			
 
				 	if err != .Success {
			
 
				-        msg := CF.StringCopyToOdinString(Sec.CopyErrorMessageString(err))
			
 
				-        panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", err, msg))
			
 
				+		msg := CF.StringCopyToOdinString(Sec.CopyErrorMessageString(err))
			
 
				+		fmt.panicf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", err, msg)
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return true
			
 
				-}
			
--- a/core/crypto/rand_generic.odin
+++ b/core/crypto/rand_generic.odin
@@ -2,14 +2,14 @@
 
				 //+build !windows
			
 
				 //+build !openbsd
			
 
				 //+build !freebsd
			
 
				+//+build !netbsd
			
 
				 //+build !darwin
			
 
				 //+build !js
			
 
				 package crypto
			
 
				 
			
 
				+HAS_RAND_BYTES :: false
			
 
				+
			
 
				+@(private)
			
 
				 _rand_bytes :: proc(dst: []byte) {
			
 
				 	unimplemented("crypto: rand_bytes not supported on this OS")
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return false
			
 
				-}
			
--- a/core/crypto/rand_js.odin
+++ b/core/crypto/rand_js.odin
@@ -6,8 +6,12 @@ foreign odin_env {
 
				 	env_rand_bytes :: proc "contextless" (buf: []byte) ---
			
 
				 }
			
 
				 
			
 
				+HAS_RAND_BYTES :: true
			
 
				+
			
 
				+@(private)
			
 
				 _MAX_PER_CALL_BYTES :: 65536 // 64kiB
			
 
				 
			
 
				+@(private)
			
 
				 _rand_bytes :: proc(dst: []byte) {
			
 
				 	dst := dst
			
 
				 
			
@@ -18,7 +22,3 @@ _rand_bytes :: proc(dst: []byte) {
 
				 		dst = dst[to_read:]
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return true
			
 
				-}
			
--- a/core/crypto/rand_linux.odin
+++ b/core/crypto/rand_linux.odin
@@ -4,8 +4,12 @@ import "core:fmt"
 
				 
			
 
				 import "core:sys/linux"
			
 
				 
			
 
				+HAS_RAND_BYTES :: true
			
 
				+
			
 
				+@(private)
			
 
				 _MAX_PER_CALL_BYTES :: 33554431 // 2^25 - 1
			
 
				 
			
 
				+@(private)
			
 
				 _rand_bytes :: proc (dst: []byte) {
			
 
				 	dst := dst
			
 
				 	l := len(dst)
			
@@ -28,13 +32,9 @@ _rand_bytes :: proc (dst: []byte) {
 
				 			// All other failures are things that should NEVER happen
			
 
				 			// unless the kernel interface changes (ie: the Linux
			
 
				 			// developers break userland).
			
 
				-			panic(fmt.tprintf("crypto: getrandom failed: %v", errno))
			
 
				+			fmt.panicf("crypto: getrandom failed: %v", errno)
			
 
				 		}
			
 
				 		l -= n_read
			
 
				 		dst = dst[n_read:]
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return true
			
 
				-}
			
--- a/core/crypto/rand_windows.odin
+++ b/core/crypto/rand_windows.odin
@@ -4,24 +4,23 @@ import win32 "core:sys/windows"
 
				 import "core:os"
			
 
				 import "core:fmt"
			
 
				 
			
 
				+HAS_RAND_BYTES :: true
			
 
				+
			
 
				+@(private)
			
 
				 _rand_bytes :: proc(dst: []byte) {
			
 
				 	ret := (os.Errno)(win32.BCryptGenRandom(nil, raw_data(dst), u32(len(dst)), win32.BCRYPT_USE_SYSTEM_PREFERRED_RNG))
			
 
				 	if ret != os.ERROR_NONE {
			
 
				 		switch ret {
			
 
				-			case os.ERROR_INVALID_HANDLE:
			
 
				-				// The handle to the first parameter is invalid.
			
 
				-				// This should not happen here, since we explicitly pass nil to it
			
 
				-				panic("crypto: BCryptGenRandom Invalid handle for hAlgorithm")
			
 
				-			case os.ERROR_INVALID_PARAMETER:
			
 
				-				// One of the parameters was invalid
			
 
				-				panic("crypto: BCryptGenRandom Invalid parameter")
			
 
				-			case:
			
 
				-				// Unknown error
			
 
				-				panic(fmt.tprintf("crypto: BCryptGenRandom failed: %d\n", ret))
			
 
				+		case os.ERROR_INVALID_HANDLE:
			
 
				+			// The handle to the first parameter is invalid.
			
 
				+			// This should not happen here, since we explicitly pass nil to it
			
 
				+			panic("crypto: BCryptGenRandom Invalid handle for hAlgorithm")
			
 
				+		case os.ERROR_INVALID_PARAMETER:
			
 
				+			// One of the parameters was invalid
			
 
				+			panic("crypto: BCryptGenRandom Invalid parameter")
			
 
				+		case:
			
 
				+			// Unknown error
			
 
				+			fmt.panicf("crypto: BCryptGenRandom failed: %d\n", ret)
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-_has_rand_bytes :: proc() -> bool {
			
 
				-	return true
			
 
				-}
			
--- a/core/crypto/ristretto255/ristretto255.odin
+++ b/core/crypto/ristretto255/ristretto255.odin
@@ -112,7 +112,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
 
				 		return false
			
 
				 	}
			
 
				 
			
 
				-	b_ := transmute(^[32]byte)(raw_data(b))
			
 
				+	b_ := (^[32]byte)(raw_data(b))
			
 
				 
			
 
				 	s: field.Tight_Field_Element = ---
			
 
				 	defer field.fe_clear(&s)
			
@@ -297,7 +297,7 @@ ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
 
				 	// 2.  Return the 32-byte little-endian encoding of s.  More
			
 
				 	// specifically, this is the encoding of the canonical
			
 
				 	// representation of s as an integer between 0 and p-1, inclusive.
			
 
				-	dst_ := transmute(^[32]byte)(raw_data(dst))
			
 
				+	dst_ := (^[32]byte)(raw_data(dst))
			
 
				 	field.fe_to_bytes(dst_, &tmp)
			
 
				 
			
 
				 	field.fe_clear_vec([]^field.Tight_Field_Element{&u1, &u2, &tmp, &z_inv, &ix0, &iy0, &x, &y})
			
@@ -417,7 +417,7 @@ ge_is_identity :: proc(ge: ^Group_Element) -> int {
 
				 
			
 
				 @(private)
			
 
				 ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
			
 
				-	b_ := transmute(^[32]byte)(raw_data(b))
			
 
				+	b_ := (^[32]byte)(raw_data(b))
			
 
				 
			
 
				 	// The MAP function is defined on 32-byte strings as:
			
 
				 	//
			
--- a/core/crypto/ristretto255/ristretto255_scalar.odin
+++ b/core/crypto/ristretto255/ristretto255_scalar.odin
@@ -46,7 +46,7 @@ sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
 
				 		panic("crypto/ristretto255: invalid wide input size")
			
 
				 	}
			
 
				 
			
 
				-	b_ := transmute(^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
			
 
				+	b_ := (^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
			
 
				 	grp.sc_set_bytes_wide(sc, b_)
			
 
				 }
			
 
				 
			
--- a/core/crypto/sha3/sha3.odin
+++ b/core/crypto/sha3/sha3.odin
@@ -68,12 +68,12 @@ init_512 :: proc(ctx: ^Context) {
 
				 @(private)
			
 
				 _init :: proc(ctx: ^Context) {
			
 
				 	ctx.dsbyte = _sha3.DS_SHA3
			
 
				-	_sha3.init(transmute(^_sha3.Context)(ctx))
			
 
				+	_sha3.init((^_sha3.Context)(ctx))
			
 
				 }
			
 
				 
			
 
				 // update adds more data to the Context.
			
 
				 update :: proc(ctx: ^Context, data: []byte) {
			
 
				-	_sha3.update(transmute(^_sha3.Context)(ctx), data)
			
 
				+	_sha3.update((^_sha3.Context)(ctx), data)
			
 
				 }
			
 
				 
			
 
				 // final finalizes the Context, writes the digest to hash, and calls
			
@@ -82,16 +82,16 @@ update :: proc(ctx: ^Context, data: []byte) {
 
				 // Iff finalize_clone is set, final will work on a copy of the Context,
			
 
				 // which is useful for for calculating rolling digests.
			
 
				 final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
			
 
				-	_sha3.final(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
			
 
				+	_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
			
 
				 }
			
 
				 
			
 
				 // clone clones the Context other into ctx.
			
 
				 clone :: proc(ctx, other: ^Context) {
			
 
				-	_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
			
 
				+	_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
			
 
				 }
			
 
				 
			
 
				 // reset sanitizes the Context.  The Context must be re-initialized to
			
 
				 // be used again.
			
 
				 reset :: proc(ctx: ^Context) {
			
 
				-	_sha3.reset(transmute(^_sha3.Context)(ctx))
			
 
				+	_sha3.reset((^_sha3.Context)(ctx))
			
 
				 }