Browse Source

Merge branch 'master' into parsing-package-fixes

RilleP 1 year ago
parent
commit
95a38d5a96
100 changed files with 5303 additions and 1316 deletions
  1. 52 11
      .github/workflows/ci.yml
  2. 70 15
      .github/workflows/nightly.yml
  3. 3 1
      .gitignore
  4. 1 1
      LICENSE
  5. 0 0
      base/builtin/builtin.odin
  6. 8 0
      base/intrinsics/intrinsics.odin
  7. 28 5
      base/runtime/core.odin
  8. 28 41
      base/runtime/core_builtin.odin
  9. 95 2
      base/runtime/core_builtin_soa.odin
  10. 1 1
      base/runtime/default_allocators_arena.odin
  11. 12 0
      base/runtime/default_allocators_general.odin
  12. 0 8
      base/runtime/default_allocators_nil.odin
  13. 0 0
      base/runtime/default_temporary_allocator.odin
  14. 3 2
      base/runtime/docs.odin
  15. 0 0
      base/runtime/dynamic_array_internal.odin
  16. 71 4
      base/runtime/dynamic_map_internal.odin
  17. 2 2
      base/runtime/entry_unix.odin
  18. 0 0
      base/runtime/entry_unix_no_crt_amd64.asm
  19. 0 0
      base/runtime/entry_unix_no_crt_darwin_arm64.asm
  20. 0 0
      base/runtime/entry_unix_no_crt_i386.asm
  21. 1 1
      base/runtime/entry_wasm.odin
  22. 1 1
      base/runtime/entry_windows.odin
  23. 0 0
      base/runtime/error_checks.odin
  24. 110 0
      base/runtime/heap_allocator.odin
  25. 15 0
      base/runtime/heap_allocator_other.odin
  26. 38 0
      base/runtime/heap_allocator_unix.odin
  27. 39 0
      base/runtime/heap_allocator_windows.odin
  28. 31 81
      base/runtime/internal.odin
  29. 7 0
      base/runtime/os_specific.odin
  30. 22 0
      base/runtime/os_specific_bsd.odin
  31. 15 0
      base/runtime/os_specific_darwin.odin
  32. 2 1
      base/runtime/os_specific_freestanding.odin
  33. 21 0
      base/runtime/os_specific_haiku.odin
  34. 2 1
      base/runtime/os_specific_js.odin
  35. 24 0
      base/runtime/os_specific_linux.odin
  36. 2 1
      base/runtime/os_specific_wasi.odin
  37. 51 0
      base/runtime/os_specific_windows.odin
  38. 20 6
      base/runtime/print.odin
  39. 0 0
      base/runtime/procs.odin
  40. 1 1
      base/runtime/procs_darwin.odin
  41. 0 0
      base/runtime/procs_js.odin
  42. 22 8
      base/runtime/procs_wasm.odin
  43. 0 0
      base/runtime/procs_windows_amd64.asm
  44. 0 0
      base/runtime/procs_windows_amd64.odin
  45. 0 0
      base/runtime/procs_windows_i386.odin
  46. 1 1
      base/runtime/udivmod128.odin
  47. 8 4
      build_odin.sh
  48. 14 2
      ci/upload_create_nightly.sh
  49. 1 1
      core/bufio/scanner.odin
  50. 0 1
      core/bufio/writer.odin
  51. 11 1
      core/c/c.odin
  52. 1 1
      core/c/libc/complex.odin
  53. 18 0
      core/c/libc/errno.odin
  54. 1 1
      core/c/libc/math.odin
  55. 4 10
      core/c/libc/stdarg.odin
  56. 1 1
      core/c/libc/stdatomic.odin
  57. 30 0
      core/c/libc/stdio.odin
  58. 1 1
      core/c/libc/string.odin
  59. 1 1
      core/c/libc/time.odin
  60. 5 1
      core/c/libc/wctype.odin
  61. 1 1
      core/compress/common.odin
  62. 1 1
      core/compress/shoco/shoco.odin
  63. 678 0
      core/container/avl/avl.odin
  64. 1 1
      core/container/bit_array/bit_array.odin
  65. 1 1
      core/container/intrusive/list/intrusive_list.odin
  66. 2 2
      core/container/lru/lru_cache.odin
  67. 1 1
      core/container/priority_queue/priority_queue.odin
  68. 2 2
      core/container/queue/queue.odin
  69. 2 2
      core/container/small_array/small_array.odin
  70. 2 2
      core/container/topological_sort/topological_sort.odin
  71. 16 70
      core/crypto/README.md
  72. 58 39
      core/crypto/_blake2/blake2.odin
  73. 428 0
      core/crypto/_edwards25519/edwards25519.odin
  74. 61 0
      core/crypto/_edwards25519/edwards25519_scalar.odin
  75. 288 0
      core/crypto/_edwards25519/edwards25519_scalar_mul.odin
  76. 2 2
      core/crypto/_fiat/fiat.odin
  77. 175 46
      core/crypto/_fiat/field_curve25519/field.odin
  78. 29 61
      core/crypto/_fiat/field_curve25519/field51.odin
  79. 47 4
      core/crypto/_fiat/field_poly1305/field.odin
  80. 35 39
      core/crypto/_fiat/field_poly1305/field4344.odin
  81. 153 0
      core/crypto/_fiat/field_scalar25519/field.odin
  82. 535 0
      core/crypto/_fiat/field_scalar25519/field64.odin
  83. 104 73
      core/crypto/_sha3/sha3.odin
  84. 145 0
      core/crypto/_sha3/sp800_185.odin
  85. 32 100
      core/crypto/blake2b/blake2b.odin
  86. 32 100
      core/crypto/blake2s/blake2s.odin
  87. 33 15
      core/crypto/chacha20/chacha20.odin
  88. 17 0
      core/crypto/chacha20poly1305/chacha20poly1305.odin
  89. 10 0
      core/crypto/crypto.odin
  90. 314 0
      core/crypto/ed25519/ed25519.odin
  91. 62 0
      core/crypto/hash/doc.odin
  92. 116 0
      core/crypto/hash/hash.odin
  93. 353 0
      core/crypto/hash/low_level.odin
  94. 103 0
      core/crypto/hkdf/hkdf.odin
  95. 174 0
      core/crypto/hmac/hmac.odin
  96. 116 0
      core/crypto/kmac/kmac.odin
  97. 55 337
      core/crypto/legacy/keccak/keccak.odin
  98. 50 98
      core/crypto/legacy/md5/md5.odin
  99. 52 99
      core/crypto/legacy/sha1/sha1.odin
  100. 122 0
      core/crypto/pbkdf2/pbkdf2.odin

+ 52 - 11
.github/workflows/ci.yml

@@ -3,11 +3,16 @@ on: [push, pull_request, workflow_dispatch]
 
 
 jobs:
 jobs:
   build_linux:
   build_linux:
+    name: Ubuntu Build, Check, and Test
     runs-on: ubuntu-latest
     runs-on: ubuntu-latest
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1
-      - name: Download LLVM, botan
-        run: sudo apt-get install llvm-11 clang-11 libbotan-2-dev botan
+      - name: Download LLVM
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
+          echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
       - name: build odin
       - name: build odin
         run: ./build_odin.sh release
         run: ./build_odin.sh release
       - name: Odin version
       - name: Odin version
@@ -46,6 +51,9 @@ jobs:
       - name: Odin check examples/all for Linux i386
       - name: Odin check examples/all for Linux i386
         run: ./odin check examples/all -vet -strict-style -target:linux_i386
         run: ./odin check examples/all -vet -strict-style -target:linux_i386
         timeout-minutes: 10
         timeout-minutes: 10
+      - name: Odin check examples/all for Linux arm64
+        run: ./odin check examples/all -vet -strict-style -target:linux_arm64
+        timeout-minutes: 10
       - name: Odin check examples/all for FreeBSD amd64
       - name: Odin check examples/all for FreeBSD amd64
         run: ./odin check examples/all -vet -strict-style -target:freebsd_amd64
         run: ./odin check examples/all -vet -strict-style -target:freebsd_amd64
         timeout-minutes: 10
         timeout-minutes: 10
@@ -53,15 +61,14 @@ jobs:
         run: ./odin check examples/all -vet -strict-style -target:openbsd_amd64
         run: ./odin check examples/all -vet -strict-style -target:openbsd_amd64
         timeout-minutes: 10
         timeout-minutes: 10
   build_macOS:
   build_macOS:
+    name: MacOS Build, Check, and Test
     runs-on: macos-latest
     runs-on: macos-latest
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1
-      - name: Download LLVM, botan and setup PATH
+      - name: Download LLVM, and setup PATH
         run: |
         run: |
-          brew install llvm@13 botan
-          echo "/usr/local/opt/llvm@13/bin" >> $GITHUB_PATH
-          TMP_PATH=$(xcrun --show-sdk-path)/user/include
-          echo "CPATH=$TMP_PATH" >> $GITHUB_ENV
+          brew install llvm@17
+          echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
       - name: build odin
       - name: build odin
         run: ./build_odin.sh release
         run: ./build_odin.sh release
       - name: Odin version
       - name: Odin version
@@ -92,13 +99,47 @@ jobs:
           cd tests/internal
           cd tests/internal
           make
           make
         timeout-minutes: 10
         timeout-minutes: 10
-      - name: Odin check examples/all for Darwin arm64
-        run: ./odin check examples/all -vet -strict-style -target:darwin_arm64
+  build_macOS_arm:
+    name: MacOS ARM Build, Check, and Test
+    runs-on: macos-14 # This is an arm/m1 runner.
+    steps:
+      - uses: actions/checkout@v1
+      - name: Download LLVM and setup PATH
+        run: |
+          brew install llvm@17
+          echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
+      - name: build odin
+        run: ./build_odin.sh release
+      - name: Odin version
+        run: ./odin version
+        timeout-minutes: 1
+      - name: Odin report
+        run: ./odin report
+        timeout-minutes: 1
+      - name: Odin check
+        run: ./odin check examples/demo -vet
         timeout-minutes: 10
         timeout-minutes: 10
-      - name: Odin check examples/all for Linux arm64
-        run: ./odin check examples/all -vet -strict-style -target:linux_arm64
+      - name: Odin run
+        run: ./odin run examples/demo
+        timeout-minutes: 10
+      - name: Odin run -debug
+        run: ./odin run examples/demo -debug
+        timeout-minutes: 10
+      - name: Odin check examples/all
+        run: ./odin check examples/all -strict-style
+        timeout-minutes: 10
+      - name: Core library tests
+        run: |
+          cd tests/core
+          make
+        timeout-minutes: 10
+      - name: Odin internals tests
+        run: |
+          cd tests/internal
+          make
         timeout-minutes: 10
         timeout-minutes: 10
   build_windows:
   build_windows:
+    name: Windows Build, Check, and Test
     runs-on: windows-2022
     runs-on: windows-2022
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1

+ 70 - 15
.github/workflows/nightly.yml

@@ -7,6 +7,7 @@ on:
 
 
 jobs:
 jobs:
   build_windows:
   build_windows:
+    name: Windows Build
     if: github.repository == 'odin-lang/Odin'
     if: github.repository == 'odin-lang/Odin'
     runs-on: windows-2022
     runs-on: windows-2022
     steps:
     steps:
@@ -29,6 +30,7 @@ jobs:
           cp LICENSE dist
           cp LICENSE dist
           cp LLVM-C.dll dist
           cp LLVM-C.dll dist
           cp -r shared dist
           cp -r shared dist
+          cp -r base dist
           cp -r core dist
           cp -r core dist
           cp -r vendor dist
           cp -r vendor dist
           cp -r bin dist
           cp -r bin dist
@@ -39,12 +41,17 @@ jobs:
           name: windows_artifacts
           name: windows_artifacts
           path: dist
           path: dist
   build_ubuntu:
   build_ubuntu:
+    name: Ubuntu Build
     if: github.repository == 'odin-lang/Odin'
     if: github.repository == 'odin-lang/Odin'
     runs-on: ubuntu-latest
     runs-on: ubuntu-latest
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1
       - name: (Linux) Download LLVM
       - name: (Linux) Download LLVM
-        run: sudo apt-get install llvm-11 clang-11
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
+          echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
       - name: build odin
       - name: build odin
         run: make nightly
         run: make nightly
       - name: Odin run
       - name: Odin run
@@ -56,46 +63,88 @@ jobs:
           cp LICENSE dist
           cp LICENSE dist
           cp libLLVM* dist
           cp libLLVM* dist
           cp -r shared dist
           cp -r shared dist
+          cp -r base dist
           cp -r core dist
           cp -r core dist
           cp -r vendor dist
           cp -r vendor dist
           cp -r examples dist
           cp -r examples dist
+          # Zipping so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
+          zip -r dist.zip dist
       - name: Upload artifact
       - name: Upload artifact
         uses: actions/upload-artifact@v1
         uses: actions/upload-artifact@v1
         with:
         with:
           name: ubuntu_artifacts
           name: ubuntu_artifacts
-          path: dist
+          path: dist.zip
   build_macos:
   build_macos:
+    name: MacOS Build
     if: github.repository == 'odin-lang/Odin'
     if: github.repository == 'odin-lang/Odin'
-    runs-on: macOS-latest
+    runs-on: macos-latest
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1
       - name: Download LLVM and setup PATH
       - name: Download LLVM and setup PATH
         run: |
         run: |
-          brew install llvm@13
-          echo "/usr/local/opt/llvm@13/bin" >> $GITHUB_PATH
-          TMP_PATH=$(xcrun --show-sdk-path)/user/include
-          echo "CPATH=$TMP_PATH" >> $GITHUB_ENV
+          brew install llvm@17 dylibbundler
+          echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
       - name: build odin
       - name: build odin
-        run: make nightly
-      - name: Odin run
-        run: ./odin run examples/demo
-      - name: Copy artifacts
+        # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
+        # not link with libunwind bundled with LLVM but link with libunwind on the system.
+        run: CXXFLAGS="-L/usr/lib/system -L/usr/lib" make nightly
+      - name: Bundle
         run: |
         run: |
           mkdir dist
           mkdir dist
           cp odin dist
           cp odin dist
           cp LICENSE dist
           cp LICENSE dist
           cp -r shared dist
           cp -r shared dist
+          cp -r base dist
           cp -r core dist
           cp -r core dist
           cp -r vendor dist
           cp -r vendor dist
           cp -r examples dist
           cp -r examples dist
+          dylibbundler -b -x dist/odin -d dist/libs -od -p @executable_path/libs
+          # Zipping so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
+          zip -r dist.zip dist
+      - name: Odin run
+        run: ./dist/odin run examples/demo
       - name: Upload artifact
       - name: Upload artifact
         uses: actions/upload-artifact@v1
         uses: actions/upload-artifact@v1
         with:
         with:
           name: macos_artifacts
           name: macos_artifacts
-          path: dist
+          path: dist.zip
+  build_macos_arm:
+    name: MacOS ARM Build
+    if: github.repository == 'odin-lang/Odin'
+    runs-on: macos-14 # ARM machine
+    steps:
+      - uses: actions/checkout@v1
+      - name: Download LLVM and setup PATH
+        run: |
+          brew install llvm@17 dylibbundler
+          echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
+      - name: build odin
+        # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
+        # not link with libunwind bundled with LLVM but link with libunwind on the system.
+        run: CXXFLAGS="-L/usr/lib/system -L/usr/lib" make nightly
+      - name: Bundle
+        run: |
+          mkdir dist
+          cp odin dist
+          cp LICENSE dist
+          cp -r shared dist
+          cp -r base dist
+          cp -r core dist
+          cp -r vendor dist
+          cp -r examples dist
+          dylibbundler -b -x dist/odin -d dist/libs -od -p @executable_path/libs
+          # Zipping so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
+          zip -r dist.zip dist
+      - name: Odin run
+        run: ./dist/odin run examples/demo
+      - name: Upload artifact
+        uses: actions/upload-artifact@v1
+        with:
+          name: macos_arm_artifacts
+          path: dist.zip
   upload_b2:
   upload_b2:
     runs-on: [ubuntu-latest]
     runs-on: [ubuntu-latest]
-    needs: [build_windows, build_macos, build_ubuntu]
+    needs: [build_windows, build_macos, build_macos_arm, build_ubuntu]
     steps:
     steps:
       - uses: actions/checkout@v1
       - uses: actions/checkout@v1
       - uses: actions/setup-python@v2
       - uses: actions/setup-python@v2
@@ -126,6 +175,11 @@ jobs:
         with:
         with:
           name: macos_artifacts
           name: macos_artifacts
 
 
+      - name: Download macOS arm artifacts
+        uses: actions/download-artifact@v1
+        with:
+          name: macos_arm_artifacts
+
       - name: Create archives and upload
       - name: Create archives and upload
         shell: bash
         shell: bash
         env:
         env:
@@ -140,8 +194,9 @@ jobs:
           echo Uploading artifcates to B2
           echo Uploading artifcates to B2
           chmod +x ./ci/upload_create_nightly.sh
           chmod +x ./ci/upload_create_nightly.sh
           ./ci/upload_create_nightly.sh "$BUCKET" windows-amd64 windows_artifacts/
           ./ci/upload_create_nightly.sh "$BUCKET" windows-amd64 windows_artifacts/
-          ./ci/upload_create_nightly.sh "$BUCKET" ubuntu-amd64 ubuntu_artifacts/
-          ./ci/upload_create_nightly.sh "$BUCKET" macos-amd64 macos_artifacts/
+          ./ci/upload_create_nightly.sh "$BUCKET" ubuntu-amd64 ubuntu_artifacts/dist.zip
+          ./ci/upload_create_nightly.sh "$BUCKET" macos-amd64 macos_artifacts/dist.zip
+          ./ci/upload_create_nightly.sh "$BUCKET" macos-arm64 macos_arm_artifacts/dist.zip
 
 
           echo Deleting old artifacts in B2
           echo Deleting old artifacts in B2
           python3 ci/delete_old_binaries.py "$BUCKET" "$DAYS_TO_KEEP"
           python3 ci/delete_old_binaries.py "$BUCKET" "$DAYS_TO_KEEP"

+ 3 - 1
.gitignore

@@ -28,6 +28,7 @@ tests/internal/test_map
 tests/internal/test_pow
 tests/internal/test_pow
 tests/internal/test_rtti
 tests/internal/test_rtti
 tests/core/test_core_compress
 tests/core/test_core_compress
+tests/core/test_core_container
 tests/core/test_core_filepath
 tests/core/test_core_filepath
 tests/core/test_core_fmt
 tests/core/test_core_fmt
 tests/core/test_core_i18n
 tests/core/test_core_i18n
@@ -39,7 +40,7 @@ tests/core/test_core_net
 tests/core/test_core_os_exit
 tests/core/test_core_os_exit
 tests/core/test_core_reflect
 tests/core/test_core_reflect
 tests/core/test_core_strings
 tests/core/test_core_strings
-tests/core/test_crypto_hash
+tests/core/test_crypto
 tests/core/test_hash
 tests/core/test_hash
 tests/core/test_hxa
 tests/core/test_hxa
 tests/core/test_json
 tests/core/test_json
@@ -49,6 +50,7 @@ tests/core/test_varint
 tests/core/test_xml
 tests/core/test_xml
 tests/core/test_core_slice
 tests/core/test_core_slice
 tests/core/test_core_thread
 tests/core/test_core_thread
+tests/core/test_core_runtime
 tests/vendor/vendor_botan
 tests/vendor/vendor_botan
 # Visual Studio 2015 cache/options directory
 # Visual Studio 2015 cache/options directory
 .vs/
 .vs/

+ 1 - 1
LICENSE

@@ -1,4 +1,4 @@
-Copyright (c) 2016-2022 Ginger Bill. All rights reserved.
+Copyright (c) 2016-2024 Ginger Bill. All rights reserved.
 
 
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 modification, are permitted provided that the following conditions are met:

+ 0 - 0
core/builtin/builtin.odin → base/builtin/builtin.odin


+ 8 - 0
core/intrinsics/intrinsics.odin → base/intrinsics/intrinsics.odin

@@ -5,6 +5,12 @@ package intrinsics
 // Package-Related
 // Package-Related
 is_package_imported :: proc(package_name: string) -> bool ---
 is_package_imported :: proc(package_name: string) -> bool ---
 
 
+// Matrix Related Procedures
+transpose        :: proc(m: $T/matrix[$R, $C]$E)    -> matrix[C, R]E ---
+outer_product    :: proc(a: $A/[$X]$E, b: $B/[$Y]E) -> matrix[X, Y]E ---
+hadamard_product :: proc(a, b: $T/matrix[$R, $C]$E) -> T ---
+matrix_flatten   :: proc(m: $T/matrix[$R, $C]$E)    -> [R*C]E ---
+
 // Types
 // Types
 soa_struct :: proc($N: int, $T: typeid) -> type/#soa[N]T
 soa_struct :: proc($N: int, $T: typeid) -> type/#soa[N]T
 
 
@@ -287,7 +293,9 @@ wasm_memory_size :: proc(index: uintptr)        -> int ---
 // 0 - indicates that the thread blocked and then was woken up
 // 0 - indicates that the thread blocked and then was woken up
 // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block
 // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block
 // 2 - the thread blocked, but the timeout
 // 2 - the thread blocked, but the timeout
+@(enable_target_feature="atomics")
 wasm_memory_atomic_wait32   :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 ---
 wasm_memory_atomic_wait32   :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 ---
+@(enable_target_feature="atomics")
 wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
 wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
 
 
 // x86 Targets (i386, amd64)
 // x86 Targets (i386, amd64)

+ 28 - 5
core/runtime/core.odin → base/runtime/core.odin

@@ -21,7 +21,7 @@
 //+no-instrumentation
 //+no-instrumentation
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 // NOTE(bill): This must match the compiler's
 // NOTE(bill): This must match the compiler's
 Calling_Convention :: enum u8 {
 Calling_Convention :: enum u8 {
@@ -177,10 +177,22 @@ Type_Info_Matrix :: struct {
 	row_count:    int,
 	row_count:    int,
 	column_count: int,
 	column_count: int,
 	// Total element count = column_count * elem_stride
 	// Total element count = column_count * elem_stride
+	layout: enum u8 {
+		Column_Major, // array of column vectors
+		Row_Major,    // array of row vectors
+	},
 }
 }
 Type_Info_Soa_Pointer :: struct {
 Type_Info_Soa_Pointer :: struct {
 	elem: ^Type_Info,
 	elem: ^Type_Info,
 }
 }
+Type_Info_Bit_Field :: struct {
+	backing_type: ^Type_Info,
+	names:        []string,
+	types:        []^Type_Info,
+	bit_sizes:    []uintptr,
+	bit_offsets:  []uintptr,
+	tags:         []string,
+}
 
 
 Type_Info_Flag :: enum u8 {
 Type_Info_Flag :: enum u8 {
 	Comparable     = 0,
 	Comparable     = 0,
@@ -223,6 +235,7 @@ Type_Info :: struct {
 		Type_Info_Relative_Multi_Pointer,
 		Type_Info_Relative_Multi_Pointer,
 		Type_Info_Matrix,
 		Type_Info_Matrix,
 		Type_Info_Soa_Pointer,
 		Type_Info_Soa_Pointer,
+		Type_Info_Bit_Field,
 	},
 	},
 }
 }
 
 
@@ -256,6 +269,7 @@ Typeid_Kind :: enum u8 {
 	Relative_Multi_Pointer,
 	Relative_Multi_Pointer,
 	Matrix,
 	Matrix,
 	Soa_Pointer,
 	Soa_Pointer,
+	Bit_Field,
 }
 }
 #assert(len(Typeid_Kind) < 32)
 #assert(len(Typeid_Kind) < 32)
 
 
@@ -270,7 +284,7 @@ Typeid_Kind :: enum u8 {
 
 
 // NOTE(bill): only the ones that are needed (not all types)
 // NOTE(bill): only the ones that are needed (not all types)
 // This will be set by the compiler
 // This will be set by the compiler
-type_table: []Type_Info
+type_table: []^Type_Info
 
 
 args__: []cstring
 args__: []cstring
 
 
@@ -296,6 +310,14 @@ Source_Code_Location :: struct {
 	procedure:    string,
 	procedure:    string,
 }
 }
 
 
+/*
+	Used by the built-in directory `#load_directory(path: string) -> []Load_Directory_File`
+*/
+Load_Directory_File :: struct {
+	name: string,
+	data: []byte, // immutable data
+}
+
 Assertion_Failure_Proc :: #type proc(prefix, message: string, loc: Source_Code_Location) -> !
 Assertion_Failure_Proc :: #type proc(prefix, message: string, loc: Source_Code_Location) -> !
 
 
 // Allocation Stuff
 // Allocation Stuff
@@ -575,8 +597,9 @@ type_info_core :: proc "contextless" (info: ^Type_Info) -> ^Type_Info {
 	base := info
 	base := info
 	loop: for {
 	loop: for {
 		#partial switch i in base.variant {
 		#partial switch i in base.variant {
-		case Type_Info_Named:  base = i.base
-		case Type_Info_Enum:   base = i.base
+		case Type_Info_Named:     base = i.base
+		case Type_Info_Enum:      base = i.base
+		case Type_Info_Bit_Field: base = i.backing_type
 		case: break loop
 		case: break loop
 		}
 		}
 	}
 	}
@@ -591,7 +614,7 @@ __type_info_of :: proc "contextless" (id: typeid) -> ^Type_Info #no_bounds_check
 	if n < 0 || n >= len(type_table) {
 	if n < 0 || n >= len(type_table) {
 		n = 0
 		n = 0
 	}
 	}
-	return &type_table[n]
+	return type_table[n]
 }
 }
 
 
 when !ODIN_NO_RTTI {
 when !ODIN_NO_RTTI {

+ 28 - 41
core/runtime/core_builtin.odin → base/runtime/core_builtin.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 @builtin
 @builtin
 Maybe :: union($T: typeid) {T}
 Maybe :: union($T: typeid) {T}
@@ -122,7 +122,7 @@ pop :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (res: E) #no_bou
 // `pop_safe` trys to remove and return the end value of dynamic array `array` and reduces the length of `array` by 1.
 // `pop_safe` trys to remove and return the end value of dynamic array `array` and reduces the length of `array` by 1.
 // If the operation is not possible, it will return false.
 // If the operation is not possible, it will return false.
 @builtin
 @builtin
-pop_safe :: proc(array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
+pop_safe :: proc "contextless" (array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
 	if len(array) == 0 {
 	if len(array) == 0 {
 		return
 		return
 	}
 	}
@@ -148,7 +148,7 @@ pop_front :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (res: E) #
 // `pop_front_safe` trys to return and remove the first value of dynamic array `array` and reduces the length of `array` by 1.
 // `pop_front_safe` trys to return and remove the first value of dynamic array `array` and reduces the length of `array` by 1.
 // If the operation is not possible, it will return false.
 // If the operation is not possible, it will return false.
 @builtin
 @builtin
-pop_front_safe :: proc(array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
+pop_front_safe :: proc "contextless" (array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
 	if len(array) == 0 {
 	if len(array) == 0 {
 		return
 		return
 	}
 	}
@@ -172,7 +172,7 @@ reserve :: proc{reserve_dynamic_array, reserve_map}
 @builtin
 @builtin
 non_zero_reserve :: proc{non_zero_reserve_dynamic_array}
 non_zero_reserve :: proc{non_zero_reserve_dynamic_array}
 
 
-// `resize` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
+// `resize` will try to resize memory of a passed dynamic array to the requested element count (setting the `len`, and possibly `cap`).
 @builtin
 @builtin
 resize :: proc{resize_dynamic_array}
 resize :: proc{resize_dynamic_array}
 
 
@@ -312,6 +312,7 @@ make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, alloca
 @(builtin, require_results)
 @(builtin, require_results)
 make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
 make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
 	make_dynamic_array_error_loc(loc, len, cap)
 	make_dynamic_array_error_loc(loc, len, cap)
+	array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
 	data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
 	data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
 	s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
 	s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
 	if data == nil && size_of(E) != 0 {
 	if data == nil && size_of(E) != 0 {
@@ -446,12 +447,12 @@ _append_elem :: #force_inline proc(array: ^$T/[dynamic]$E, arg: E, should_zero:
 }
 }
 
 
 @builtin
 @builtin
-append_elem :: proc(array: ^$T/[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	return _append_elem(array, arg, true, loc=loc)
 	return _append_elem(array, arg, true, loc=loc)
 }
 }
 
 
 @builtin
 @builtin
-non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	return _append_elem(array, arg, false, loc=loc)
 	return _append_elem(array, arg, false, loc=loc)
 }
 }
 
 
@@ -495,12 +496,12 @@ _append_elems :: #force_inline proc(array: ^$T/[dynamic]$E, should_zero: bool, l
 }
 }
 
 
 @builtin
 @builtin
-append_elems :: proc(array: ^$T/[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	return _append_elems(array, true, loc, ..args)
 	return _append_elems(array, true, loc, ..args)
 }
 }
 
 
 @builtin
 @builtin
-non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	return _append_elems(array, false, loc, ..args)
 	return _append_elems(array, false, loc, ..args)
 }
 }
 
 
@@ -555,7 +556,7 @@ append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: i
 
 
 
 
 @builtin
 @builtin
-inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
 	if array == nil {
 	if array == nil {
 		return
 		return
 	}
 	}
@@ -573,7 +574,7 @@ inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
 }
 }
 
 
 @builtin
 @builtin
-inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
 	if array == nil {
 	if array == nil {
 		return
 		return
 	}
 	}
@@ -739,6 +740,9 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
 	a := (^Raw_Dynamic_Array)(array)
 	a := (^Raw_Dynamic_Array)(array)
 
 
 	if length <= a.cap {
 	if length <= a.cap {
+		if should_zero && a.len < length {
+			intrinsics.mem_zero(([^]E)(a.data)[a.len:], (length-a.len)*size_of(E))
+		}
 		a.len = max(length, 0)
 		a.len = max(length, 0)
 		return nil
 		return nil
 	}
 	}
@@ -823,42 +827,25 @@ map_insert :: proc(m: ^$T/map[$K]$V, key: K, value: V, loc := #caller_location)
 	return (^V)(__dynamic_map_set_without_hash((^Raw_Map)(m), map_info(T), rawptr(&key), rawptr(&value), loc))
 	return (^V)(__dynamic_map_set_without_hash((^Raw_Map)(m), map_info(T), rawptr(&key), rawptr(&value), loc))
 }
 }
 
 
-
-@builtin
-incl_elem :: proc(s: ^$S/bit_set[$E; $U], elem: E) {
-	s^ |= {elem}
-}
-@builtin
-incl_elems :: proc(s: ^$S/bit_set[$E; $U], elems: ..E) {
-	for elem in elems {
-		s^ |= {elem}
-	}
-}
-@builtin
-incl_bit_set :: proc(s: ^$S/bit_set[$E; $U], other: S) {
-	s^ |= other
-}
-@builtin
-excl_elem :: proc(s: ^$S/bit_set[$E; $U], elem: E) {
-	s^ &~= {elem}
-}
-@builtin
-excl_elems :: proc(s: ^$S/bit_set[$E; $U], elems: ..E) {
-	for elem in elems {
-		s^ &~= {elem}
+// Explicitly inserts a key and value into a map `m`, the same as `map_insert`, but the return values differ.
+// - `prev_key` will return the previous pointer of a key if it exists, check `found_previous` if was previously found
+// - `value_ptr` will return the pointer of the memory where the insertion happens, and `nil` if the map failed to resize
+// - `found_previous` will be true a previous key was found
+@(builtin, require_results)
+map_upsert :: proc(m: ^$T/map[$K]$V, key: K, value: V, loc := #caller_location) -> (prev_key: K, value_ptr: ^V, found_previous: bool) {
+	key, value := key, value
+	kp, vp := __dynamic_map_set_extra_without_hash((^Raw_Map)(m), map_info(T), rawptr(&key), rawptr(&value), loc)
+	if kp != nil {
+		prev_key = (^K)(kp)^
+		found_previous = true
 	}
 	}
+	value_ptr = (^V)(vp)
+	return
 }
 }
-@builtin
-excl_bit_set :: proc(s: ^$S/bit_set[$E; $U], other: S) {
-	s^ &~= other
-}
-
-@builtin incl :: proc{incl_elem, incl_elems, incl_bit_set}
-@builtin excl :: proc{excl_elem, excl_elems, excl_bit_set}
 
 
 
 
 @builtin
 @builtin
-card :: proc(s: $S/bit_set[$E; $U]) -> int {
+card :: proc "contextless" (s: $S/bit_set[$E; $U]) -> int {
 	when size_of(S) == 1 {
 	when size_of(S) == 1 {
 		return int(intrinsics.count_ones(transmute(u8)s))
 		return int(intrinsics.count_ones(transmute(u8)s))
 	} else when size_of(S) == 2 {
 	} else when size_of(S) == 2 {

+ 95 - 2
core/runtime/core_builtin_soa.odin → base/runtime/core_builtin_soa.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 _ :: intrinsics
 _ :: intrinsics
 
 
 /*
 /*
@@ -425,4 +425,97 @@ clear_soa_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) {
 @builtin
 @builtin
 clear_soa :: proc{
 clear_soa :: proc{
 	clear_soa_dynamic_array,
 	clear_soa_dynamic_array,
-}
+}
+
+// Converts soa slice into a soa dynamic array without cloning or allocating memory
+@(require_results)
+into_dynamic_soa :: proc(array: $T/#soa[]$E) -> #soa[dynamic]E {
+	d: #soa[dynamic]E
+	footer := raw_soa_footer_dynamic_array(&d)
+	footer^ = {
+		cap = len(array),
+		len = 0,
+		allocator = nil_allocator(),
+	}
+
+	field_count: uintptr
+	when intrinsics.type_is_array(E) {
+		field_count = len(E)
+	} else {
+		field_count = uintptr(intrinsics.type_struct_field_count(E))
+	}
+
+	array := array
+	dynamic_data := ([^]rawptr)(&d)[:field_count]
+	slice_data   := ([^]rawptr)(&array)[:field_count]
+	copy(dynamic_data, slice_data)
+
+	return d
+}
+
+// `unordered_remove_soa` removed the element at the specified `index`. It does so by replacing the current end value
+// with the old value, and reducing the length of the dynamic array by 1.
+//
+// Note: This is an O(1) operation.
+// Note: If you the elements to remain in their order, use `ordered_remove_soa`.
+// Note: If the index is out of bounds, this procedure will panic.
+@builtin
+unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
+	bounds_check_error_loc(loc, index, len(array))
+	if index+1 < len(array) {
+		ti := type_info_of(typeid_of(T))
+		ti = type_info_base(ti)
+		si := &ti.variant.(Type_Info_Struct)
+
+		field_count: uintptr
+		when intrinsics.type_is_array(E) {
+			field_count = len(E)
+		} else {
+			field_count = uintptr(intrinsics.type_struct_field_count(E))
+		}
+
+		data := uintptr(array)
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Pointer).elem
+
+			offset := rawptr((^uintptr)(data)^ + uintptr(index*type.size))
+			final := rawptr((^uintptr)(data)^ + uintptr((len(array)-1)*type.size))
+			mem_copy(offset, final, type.size)
+			data += size_of(rawptr)
+		}
+	}
+	raw_soa_footer_dynamic_array(array).len -= 1
+}
+
+// `ordered_remove_soa` removed the element at the specified `index` whilst keeping the order of the other elements.
+//
+// Note: This is an O(N) operation.
+// Note: If you the elements do not have to remain in their order, prefer `unordered_remove_soa`.
+// Note: If the index is out of bounds, this procedure will panic.
+@builtin
+ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
+	bounds_check_error_loc(loc, index, len(array))
+	if index+1 < len(array) {
+		ti := type_info_of(typeid_of(T))
+		ti = type_info_base(ti)
+		si := &ti.variant.(Type_Info_Struct)
+
+		field_count: uintptr
+		when intrinsics.type_is_array(E) {
+			field_count = len(E)
+		} else {
+			field_count = uintptr(intrinsics.type_struct_field_count(E))
+		}
+
+		data := uintptr(array)
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Pointer).elem
+
+			offset := (^uintptr)(data)^ + uintptr(index*type.size)
+			length := type.size*(len(array) - index - 1)
+			mem_copy(rawptr(offset), rawptr(offset + uintptr(type.size)), length)
+			data += size_of(rawptr)
+		}
+	}
+	raw_soa_footer_dynamic_array(array).len -= 1
+}

+ 1 - 1
core/runtime/default_allocators_arena.odin → base/runtime/default_allocators_arena.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: uint(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE)
 DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: uint(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE)
 
 

+ 12 - 0
base/runtime/default_allocators_general.odin

@@ -0,0 +1,12 @@
+package runtime
+
+when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
+	default_allocator_proc :: nil_allocator_proc
+	default_allocator :: nil_allocator
+} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
+	default_allocator_proc :: panic_allocator_proc
+	default_allocator :: panic_allocator
+} else {
+	default_allocator :: heap_allocator
+	default_allocator_proc :: heap_allocator_proc
+}

+ 0 - 8
core/runtime/default_allocators_nil.odin → base/runtime/default_allocators_nil.odin

@@ -31,14 +31,6 @@ nil_allocator :: proc() -> Allocator {
 }
 }
 
 
 
 
-
-when ODIN_OS == .Freestanding {
-	default_allocator_proc :: nil_allocator_proc
-	default_allocator :: nil_allocator
-}
-
-
-
 panic_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 panic_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
                              size, alignment: int,
                              size, alignment: int,
                              old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
                              old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {

+ 0 - 0
core/runtime/default_temporary_allocator.odin → base/runtime/default_temporary_allocator.odin


+ 3 - 2
core/runtime/docs.odin → base/runtime/docs.odin

@@ -44,7 +44,7 @@ memcpy
 memove
 memove
 
 
 
 
-## Procedures required by the LLVM backend
+## Procedures required by the LLVM backend if u128/i128 is used
 umodti3
 umodti3
 udivti3
 udivti3
 modti3
 modti3
@@ -59,11 +59,12 @@ truncdfhf2
 gnu_h2f_ieee
 gnu_h2f_ieee
 gnu_f2h_ieee
 gnu_f2h_ieee
 extendhfsf2
 extendhfsf2
+
+## Procedures required by the LLVM backend if f16 is used
 __ashlti3 // wasm specific
 __ashlti3 // wasm specific
 __multi3  // wasm specific
 __multi3  // wasm specific
 
 
 
 
-
 ## Required an entry point is defined (i.e. 'main')
 ## Required an entry point is defined (i.e. 'main')
 
 
 args__
 args__

+ 0 - 0
core/runtime/dynamic_array_internal.odin → base/runtime/dynamic_array_internal.odin


+ 71 - 4
core/runtime/dynamic_map_internal.odin → base/runtime/dynamic_map_internal.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 _ :: intrinsics
 _ :: intrinsics
 
 
 // High performance, cache-friendly, open-addressed Robin Hood hashing hash map
 // High performance, cache-friendly, open-addressed Robin Hood hashing hash map
@@ -333,7 +333,7 @@ map_kvh_data_values_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^
 }
 }
 
 
 
 
-@(private, require_results)
+@(require_results)
 map_total_allocation_size :: #force_inline proc "contextless" (capacity: uintptr, info: ^Map_Info) -> uintptr {
 map_total_allocation_size :: #force_inline proc "contextless" (capacity: uintptr, info: ^Map_Info) -> uintptr {
 	round :: #force_inline proc "contextless" (value: uintptr) -> uintptr {
 	round :: #force_inline proc "contextless" (value: uintptr) -> uintptr {
 		CACHE_MASK :: MAP_CACHE_LINE_SIZE - 1
 		CACHE_MASK :: MAP_CACHE_LINE_SIZE - 1
@@ -350,6 +350,12 @@ map_total_allocation_size :: #force_inline proc "contextless" (capacity: uintptr
 	return size
 	return size
 }
 }
 
 
+@(require_results)
+map_total_allocation_size_from_value :: #force_inline proc "contextless" (m: $M/map[$K]$V) -> uintptr {
+	return map_total_allocation_size(uintptr(cap(m)), map_info(M))
+}
+
+
 // The only procedure which needs access to the context is the one which allocates the map.
 // The only procedure which needs access to the context is the one which allocates the map.
 @(require_results)
 @(require_results)
 map_alloc_dynamic :: proc "odin" (info: ^Map_Info, log2_capacity: uintptr, allocator := context.allocator, loc := #caller_location) -> (result: Raw_Map, err: Allocator_Error) {
 map_alloc_dynamic :: proc "odin" (info: ^Map_Info, log2_capacity: uintptr, allocator := context.allocator, loc := #caller_location) -> (result: Raw_Map, err: Allocator_Error) {
@@ -391,7 +397,8 @@ map_alloc_dynamic :: proc "odin" (info: ^Map_Info, log2_capacity: uintptr, alloc
 // arrays to reduce variance. This swapping can only be done with memcpy since
 // arrays to reduce variance. This swapping can only be done with memcpy since
 // there is no type information.
 // there is no type information.
 //
 //
-// This procedure returns the address of the just inserted value.
+// This procedure returns the address of the just inserted value, and will
+// return 'nil' if there was no room to insert the entry
 @(require_results)
 @(require_results)
 map_insert_hash_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, ik: uintptr, iv: uintptr) -> (result: uintptr) {
 map_insert_hash_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, ik: uintptr, iv: uintptr) -> (result: uintptr) {
 	h        := h
 	h        := h
@@ -415,6 +422,11 @@ map_insert_hash_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^
 	tv := map_cell_index_dynamic(sv, info.vs, 1)
 	tv := map_cell_index_dynamic(sv, info.vs, 1)
 
 
 	swap_loop: for {
 	swap_loop: for {
+		if distance > mask {
+			// Failed to find an empty slot and prevent infinite loop
+			panic("unable to insert into a map")
+		}
+
 		element_hash := hs[pos]
 		element_hash := hs[pos]
 
 
 		if map_hash_is_empty(element_hash) {
 		if map_hash_is_empty(element_hash) {
@@ -841,6 +853,33 @@ __dynamic_map_get :: proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info:
 	}
 	}
 }
 }
 
 
+__dynamic_map_get_key_and_value :: proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, key: rawptr) -> (key_ptr, value_ptr: rawptr) {
+	if m.len == 0 {
+		return nil, nil
+	}
+	pos := map_desired_position(m^, h)
+	distance := uintptr(0)
+	mask := (uintptr(1) << map_log2_cap(m^)) - 1
+	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
+	for {
+		element_hash := hs[pos]
+		if map_hash_is_empty(element_hash) {
+			return nil, nil
+		} else if distance > map_probe_distance(m^, element_hash, pos) {
+			return nil, nil
+		} else if element_hash == h {
+			other_key := rawptr(map_cell_index_dynamic(ks, info.ks, pos))
+			if info.key_equal(key, other_key) {
+				key_ptr   = other_key
+				value_ptr = rawptr(map_cell_index_dynamic(vs, info.vs, pos))
+				return
+			}
+		}
+		pos = (pos + 1) & mask
+		distance += 1
+	}
+}
+
 // IMPORTANT: USED WITHIN THE COMPILER
 // IMPORTANT: USED WITHIN THE COMPILER
 __dynamic_map_check_grow :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (err: Allocator_Error, has_grown: bool) {
 __dynamic_map_check_grow :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (err: Allocator_Error, has_grown: bool) {
 	if m.len >= map_resize_threshold(m^) {
 	if m.len >= map_resize_threshold(m^) {
@@ -871,9 +910,37 @@ __dynamic_map_set :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_In
 	}
 	}
 
 
 	result := map_insert_hash_dynamic(m, info, hash, uintptr(key), uintptr(value))
 	result := map_insert_hash_dynamic(m, info, hash, uintptr(key), uintptr(value))
-	m.len += 1
+	if result != 0 {
+		m.len += 1
+	}
 	return rawptr(result)
 	return rawptr(result)
 }
 }
+__dynamic_map_set_extra_without_hash :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, key, value: rawptr, loc := #caller_location) -> (prev_key_ptr, value_ptr: rawptr) {
+	return __dynamic_map_set_extra(m, info, info.key_hasher(key, map_seed(m^)), key, value, loc)
+}
+
+__dynamic_map_set_extra :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, hash: Map_Hash, key, value: rawptr, loc := #caller_location) -> (prev_key_ptr, value_ptr: rawptr) {
+	if prev_key_ptr, value_ptr = __dynamic_map_get_key_and_value(m, info, hash, key); value_ptr != nil {
+		intrinsics.mem_copy_non_overlapping(value_ptr, value, info.vs.size_of_type)
+		return
+	}
+
+	hash := hash
+	err, has_grown := __dynamic_map_check_grow(m, info, loc)
+	if err != nil {
+		return nil, nil
+	}
+	if has_grown {
+		hash = info.key_hasher(key, map_seed(m^))
+	}
+
+	result := map_insert_hash_dynamic(m, info, hash, uintptr(key), uintptr(value))
+	if result != 0 {
+		m.len += 1
+	}
+	return nil, rawptr(result)
+}
+
 
 
 // IMPORTANT: USED WITHIN THE COMPILER
 // IMPORTANT: USED WITHIN THE COMPILER
 @(private)
 @(private)

+ 2 - 2
core/runtime/entry_unix.odin → base/runtime/entry_unix.odin

@@ -1,9 +1,9 @@
 //+private
 //+private
-//+build linux, darwin, freebsd, openbsd
+//+build linux, darwin, freebsd, openbsd, haiku
 //+no-instrumentation
 //+no-instrumentation
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 when ODIN_BUILD_MODE == .Dynamic {
 when ODIN_BUILD_MODE == .Dynamic {
 	@(link_name="_odin_entry_point", linkage="strong", require/*, link_section=".init"*/)
 	@(link_name="_odin_entry_point", linkage="strong", require/*, link_section=".init"*/)

+ 0 - 0
core/runtime/entry_unix_no_crt_amd64.asm → base/runtime/entry_unix_no_crt_amd64.asm


+ 0 - 0
core/runtime/entry_unix_no_crt_darwin_arm64.asm → base/runtime/entry_unix_no_crt_darwin_arm64.asm


+ 0 - 0
core/runtime/entry_unix_no_crt_i386.asm → base/runtime/entry_unix_no_crt_i386.asm


+ 1 - 1
core/runtime/entry_wasm.odin → base/runtime/entry_wasm.odin

@@ -3,7 +3,7 @@
 //+no-instrumentation
 //+no-instrumentation
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
 when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
 	@(link_name="_start", linkage="strong", require, export)
 	@(link_name="_start", linkage="strong", require, export)

+ 1 - 1
core/runtime/entry_windows.odin → base/runtime/entry_windows.odin

@@ -3,7 +3,7 @@
 //+no-instrumentation
 //+no-instrumentation
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 when ODIN_BUILD_MODE == .Dynamic {
 when ODIN_BUILD_MODE == .Dynamic {
 	@(link_name="DllMain", linkage="strong", require)
 	@(link_name="DllMain", linkage="strong", require)

+ 0 - 0
core/runtime/error_checks.odin → base/runtime/error_checks.odin


+ 110 - 0
base/runtime/heap_allocator.odin

@@ -0,0 +1,110 @@
+package runtime
+
+import "base:intrinsics"
+
+heap_allocator :: proc() -> Allocator {
+	return Allocator{
+		procedure = heap_allocator_proc,
+		data = nil,
+	}
+}
+
+heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+                            size, alignment: int,
+                            old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	//
+	// NOTE(tetra, 2020-01-14): The heap doesn't respect alignment.
+	// Instead, we overallocate by `alignment + size_of(rawptr) - 1`, and insert
+	// padding. We also store the original pointer returned by heap_alloc right before
+	// the pointer we return to the user.
+	//
+
+	aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr = nil, zero_memory := true) -> ([]byte, Allocator_Error) {
+		a := max(alignment, align_of(rawptr))
+		space := size + a - 1
+
+		allocated_mem: rawptr
+		if old_ptr != nil {
+			original_old_ptr := ([^]rawptr)(old_ptr)[-1]
+			allocated_mem = heap_resize(original_old_ptr, space+size_of(rawptr))
+		} else {
+			allocated_mem = heap_alloc(space+size_of(rawptr), zero_memory)
+		}
+		aligned_mem := rawptr(([^]u8)(allocated_mem)[size_of(rawptr):])
+
+		ptr := uintptr(aligned_mem)
+		aligned_ptr := (ptr - 1 + uintptr(a)) & -uintptr(a)
+		diff := int(aligned_ptr - ptr)
+		if (size + diff) > space || allocated_mem == nil {
+			return nil, .Out_Of_Memory
+		}
+
+		aligned_mem = rawptr(aligned_ptr)
+		([^]rawptr)(aligned_mem)[-1] = allocated_mem
+
+		return byte_slice(aligned_mem, size), nil
+	}
+
+	aligned_free :: proc(p: rawptr) {
+		if p != nil {
+			heap_free(([^]rawptr)(p)[-1])
+		}
+	}
+
+	aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
+		if p == nil {
+			return nil, nil
+		}
+
+		new_memory = aligned_alloc(new_size, new_alignment, p, zero_memory) or_return
+
+		// NOTE: heap_resize does not zero the new memory, so we do it
+		if zero_memory && new_size > old_size {
+			new_region := raw_data(new_memory[old_size:])
+			intrinsics.mem_zero(new_region, new_size - old_size)
+		}
+		return
+	}
+
+	switch mode {
+	case .Alloc, .Alloc_Non_Zeroed:
+		return aligned_alloc(size, alignment, nil, mode == .Alloc)
+
+	case .Free:
+		aligned_free(old_memory)
+
+	case .Free_All:
+		return nil, .Mode_Not_Implemented
+
+	case .Resize, .Resize_Non_Zeroed:
+		if old_memory == nil {
+			return aligned_alloc(size, alignment, nil, mode == .Resize)
+		}
+		return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)
+
+	case .Query_Features:
+		set := (^Allocator_Mode_Set)(old_memory)
+		if set != nil {
+			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features}
+		}
+		return nil, nil
+
+	case .Query_Info:
+		return nil, .Mode_Not_Implemented
+	}
+
+	return nil, nil
+}
+
+
+heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
+	return _heap_alloc(size, zero_memory)
+}
+
+heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
+	return _heap_resize(ptr, new_size)
+}
+
+heap_free :: proc(ptr: rawptr) {
+	_heap_free(ptr)
+}

+ 15 - 0
base/runtime/heap_allocator_other.odin

@@ -0,0 +1,15 @@
+//+build js, wasi, freestanding, essence
+//+private
+package runtime
+
+_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
+	unimplemented("base:runtime 'heap_alloc' procedure is not supported on this platform")
+}
+
+_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
+	unimplemented("base:runtime 'heap_resize' procedure is not supported on this platform")
+}
+
+_heap_free :: proc(ptr: rawptr) {
+	unimplemented("base:runtime 'heap_free' procedure is not supported on this platform")
+}

+ 38 - 0
base/runtime/heap_allocator_unix.odin

@@ -0,0 +1,38 @@
+//+build linux, darwin, freebsd, openbsd, haiku
+//+private
+package runtime
+
+when ODIN_OS == .Darwin {
+	foreign import libc "system:System.framework"
+} else {
+	foreign import libc "system:c"
+}
+
+@(default_calling_convention="c")
+foreign libc {
+	@(link_name="malloc")   _unix_malloc   :: proc(size: int) -> rawptr ---
+	@(link_name="calloc")   _unix_calloc   :: proc(num, size: int) -> rawptr ---
+	@(link_name="free")     _unix_free     :: proc(ptr: rawptr) ---
+	@(link_name="realloc")  _unix_realloc  :: proc(ptr: rawptr, size: int) -> rawptr ---
+}
+
+_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
+	if size <= 0 {
+		return nil
+	}
+	if zero_memory {
+		return _unix_calloc(1, size)
+	} else {
+		return _unix_malloc(size)
+	}
+}
+
+_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
+	// NOTE: _unix_realloc doesn't guarantee new memory will be zeroed on
+	// POSIX platforms. Ensure your caller takes this into account.
+	return _unix_realloc(ptr, new_size)
+}
+
+_heap_free :: proc(ptr: rawptr) {
+	_unix_free(ptr)
+}

+ 39 - 0
base/runtime/heap_allocator_windows.odin

@@ -0,0 +1,39 @@
+package runtime
+
+foreign import kernel32 "system:Kernel32.lib"
+
+@(private="file")
+@(default_calling_convention="system")
+foreign kernel32 {
+	// NOTE(bill): The types are not using the standard names (e.g. DWORD and LPVOID) to just minimizing the dependency
+
+	// default_allocator
+	GetProcessHeap :: proc() -> rawptr ---
+	HeapAlloc      :: proc(hHeap: rawptr, dwFlags: u32, dwBytes: uint) -> rawptr ---
+	HeapReAlloc    :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr, dwBytes: uint) -> rawptr ---
+	HeapFree       :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr) -> b32 ---
+}
+
+_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
+	HEAP_ZERO_MEMORY :: 0x00000008
+	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
+}
+_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
+	if new_size == 0 {
+		_heap_free(ptr)
+		return nil
+	}
+	if ptr == nil {
+		return _heap_alloc(new_size)
+	}
+
+	HEAP_ZERO_MEMORY :: 0x00000008
+	return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
+}
+_heap_free :: proc(ptr: rawptr) {
+	if ptr == nil {
+		return
+	}
+	HeapFree(GetProcessHeap(), 0, ptr)
+}
+

+ 31 - 81
core/runtime/internal.odin → base/runtime/internal.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 @(private="file")
 @(private="file")
 IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
@@ -11,7 +11,7 @@ RUNTIME_LINKAGE :: "strong" when (
 	ODIN_BUILD_MODE == .Dynamic ||
 	ODIN_BUILD_MODE == .Dynamic ||
 	!ODIN_NO_CRT) &&
 	!ODIN_NO_CRT) &&
 	!IS_WASM) else "internal"
 	!IS_WASM) else "internal"
-RUNTIME_REQUIRE :: !ODIN_TILDE
+RUNTIME_REQUIRE :: false // !ODIN_TILDE
 
 
 @(private)
 @(private)
 __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
 __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
@@ -22,51 +22,7 @@ byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byt
 	return ([^]byte)(data)[:max(len, 0)]
 	return ([^]byte)(data)[:max(len, 0)]
 }
 }
 
 
-bswap_16 :: proc "contextless" (x: u16) -> u16 {
-	return x>>8 | x<<8
-}
-
-bswap_32 :: proc "contextless" (x: u32) -> u32 {
-	return x>>24 | (x>>8)&0xff00 | (x<<8)&0xff0000 | x<<24
-}
-
-bswap_64 :: proc "contextless" (x: u64) -> u64 {
-	z := x
-	z = (z & 0x00000000ffffffff) << 32 | (z & 0xffffffff00000000) >> 32
-	z = (z & 0x0000ffff0000ffff) << 16 | (z & 0xffff0000ffff0000) >> 16
-	z = (z & 0x00ff00ff00ff00ff) << 8  | (z & 0xff00ff00ff00ff00) >> 8
-	return z
-}
-
-bswap_128 :: proc "contextless" (x: u128) -> u128 {
-	z := transmute([4]u32)x
-	z[0], z[3] = bswap_32(z[3]), bswap_32(z[0])
-	z[1], z[2] = bswap_32(z[2]), bswap_32(z[1])
-	return transmute(u128)z
-}
-
-bswap_f16 :: proc "contextless" (f: f16) -> f16 {
-	x := transmute(u16)f
-	z := bswap_16(x)
-	return transmute(f16)z
-
-}
-
-bswap_f32 :: proc "contextless" (f: f32) -> f32 {
-	x := transmute(u32)f
-	z := bswap_32(x)
-	return transmute(f32)z
-
-}
-
-bswap_f64 :: proc "contextless" (f: f64) -> f64 {
-	x := transmute(u64)f
-	z := bswap_64(x)
-	return transmute(f64)z
-}
-
-
-is_power_of_two_int :: #force_inline proc(x: int) -> bool {
+is_power_of_two_int :: #force_inline proc "contextless" (x: int) -> bool {
 	if x <= 0 {
 	if x <= 0 {
 		return false
 		return false
 	}
 	}
@@ -84,7 +40,7 @@ align_forward_int :: #force_inline proc(ptr, align: int) -> int {
 	return p
 	return p
 }
 }
 
 
-is_power_of_two_uintptr :: #force_inline proc(x: uintptr) -> bool {
+is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
 	if x <= 0 {
 	if x <= 0 {
 		return false
 		return false
 	}
 	}
@@ -608,36 +564,6 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
 	return r, size
 	return r, size
 }
 }
 
 
-
-abs_f16 :: #force_inline proc "contextless" (x: f16) -> f16 {
-	return -x if x < 0 else x
-}
-abs_f32 :: #force_inline proc "contextless" (x: f32) -> f32 {
-	return -x if x < 0 else x
-}
-abs_f64 :: #force_inline proc "contextless" (x: f64) -> f64 {
-	return -x if x < 0 else x
-}
-
-min_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 {
-	return a if a < b else b
-}
-min_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 {
-	return a if a < b else b
-}
-min_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 {
-	return a if a < b else b
-}
-max_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 {
-	return a if a > b else b
-}
-max_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 {
-	return a if a > b else b
-}
-max_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 {
-	return a if a > b else b
-}
-
 abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
 abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
 	p, q := abs(real(x)), abs(imag(x))
 	p, q := abs(real(x)), abs(imag(x))
 	if p < q {
 	if p < q {
@@ -1036,9 +962,11 @@ udivmodti4 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 	return udivmod128(a, b, rem)
 	return udivmod128(a, b, rem)
 }
 }
 
 
-@(link_name="__udivti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
-udivti3 :: proc "c" (a, b: u128) -> u128 {
-	return udivmodti4(a, b, nil)
+when !IS_WASM {
+	@(link_name="__udivti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+	udivti3 :: proc "c" (a, b: u128) -> u128 {
+		return udivmodti4(a, b, nil)
+	}
 }
 }
 
 
 
 
@@ -1108,3 +1036,25 @@ fixdfti :: proc(a: u64) -> i128 {
 	}
 	}
 
 
 }
 }
+
+
+
+__write_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
+	for i in 0..<size {
+		j := offset+i
+		the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
+		b := the_bit<<(j&7)
+		dst[j/8] &~= b
+		dst[j/8] |=  b
+	}
+}
+
+__read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
+	for j in 0..<size {
+		i := offset+j
+		the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
+		b := the_bit<<(j&7)
+		dst[j/8] &~= b
+		dst[j/8] |=  b
+	}
+}

+ 7 - 0
base/runtime/os_specific.odin

@@ -0,0 +1,7 @@
+package runtime
+
+_OS_Errno :: distinct int
+
+stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	return _stderr_write(data)
+}

+ 22 - 0
base/runtime/os_specific_bsd.odin

@@ -0,0 +1,22 @@
+//+build freebsd, openbsd
+//+private
+package runtime
+
+foreign import libc "system:c"
+
+@(default_calling_convention="c")
+foreign libc {
+	@(link_name="write")
+	_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---
+
+	__error :: proc() -> ^i32 ---
+}
+
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	ret := _unix_write(2, raw_data(data), len(data))
+	if ret < len(data) {
+		err := __error()
+		return int(ret), _OS_Errno(err^ if err != nil else 0)
+	}
+	return int(ret), 0
+}

+ 15 - 0
base/runtime/os_specific_darwin.odin

@@ -0,0 +1,15 @@
+//+build darwin
+//+private
+package runtime
+
+import "base:intrinsics"
+
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	WRITE  :: 0x2000004
+	STDERR :: 2
+	ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
+	if ret < 0 {
+		return 0, _OS_Errno(-ret)
+	}
+	return int(ret), 0
+}

+ 2 - 1
core/runtime/os_specific_freestanding.odin → base/runtime/os_specific_freestanding.odin

@@ -1,7 +1,8 @@
 //+build freestanding
 //+build freestanding
+//+private
 package runtime
 package runtime
 
 
 // TODO(bill): reimplement `os.write`
 // TODO(bill): reimplement `os.write`
-_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
 	return 0, -1
 	return 0, -1
 }
 }

+ 21 - 0
base/runtime/os_specific_haiku.odin

@@ -0,0 +1,21 @@
+//+build haiku
+//+private
+package runtime
+
+foreign import libc "system:c"
+
+foreign libc {
+	@(link_name="write")
+	_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---
+
+	_errnop :: proc() -> ^i32 ---
+}
+
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	ret := _unix_write(2, raw_data(data), len(data))
+	if ret < len(data) {
+		err := _errnop()
+		return int(ret), _OS_Errno(err^ if err != nil else 0)
+	}
+	return int(ret), 0
+}

+ 2 - 1
core/runtime/os_specific_js.odin → base/runtime/os_specific_js.odin

@@ -1,9 +1,10 @@
 //+build js
 //+build js
+//+private
 package runtime
 package runtime
 
 
 foreign import "odin_env"
 foreign import "odin_env"
 
 
-_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
 	foreign odin_env {
 	foreign odin_env {
 		write :: proc "contextless" (fd: u32, p: []byte) ---
 		write :: proc "contextless" (fd: u32, p: []byte) ---
 	}
 	}

+ 24 - 0
base/runtime/os_specific_linux.odin

@@ -0,0 +1,24 @@
+//+private
+package runtime
+
+import "base:intrinsics"
+
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	when ODIN_ARCH == .amd64 {
+		SYS_write :: uintptr(1)
+	} else when ODIN_ARCH == .arm64 {
+		SYS_write :: uintptr(64)
+	} else when ODIN_ARCH == .i386 {
+		SYS_write :: uintptr(4)
+	} else when ODIN_ARCH == .arm32 {
+		SYS_write :: uintptr(4)
+	}
+
+	stderr :: 2
+
+	ret := int(intrinsics.syscall(SYS_write, uintptr(stderr), uintptr(raw_data(data)), uintptr(len(data))))
+	if ret < 0 && ret > -4096 {
+		return 0, _OS_Errno(-ret)
+	}
+	return ret, 0
+}

+ 2 - 1
core/runtime/os_specific_wasi.odin → base/runtime/os_specific_wasi.odin

@@ -1,9 +1,10 @@
 //+build wasi
 //+build wasi
+//+private
 package runtime
 package runtime
 
 
 import "core:sys/wasm/wasi"
 import "core:sys/wasm/wasi"
 
 
-_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
 	data := (wasi.ciovec_t)(data)
 	data := (wasi.ciovec_t)(data)
 	n, err := wasi.fd_write(1, {data})
 	n, err := wasi.fd_write(1, {data})
 	return int(n), _OS_Errno(err)
 	return int(n), _OS_Errno(err)

+ 51 - 0
base/runtime/os_specific_windows.odin

@@ -0,0 +1,51 @@
+//+build windows
+//+private
+package runtime
+
+foreign import kernel32 "system:Kernel32.lib"
+
+@(private="file")
+@(default_calling_convention="system")
+foreign kernel32 {
+	// NOTE(bill): The types are not using the standard names (e.g. DWORD and LPVOID) to just minimizing the dependency
+
+	// stderr_write
+	GetStdHandle         :: proc(which: u32) -> rawptr ---
+	SetHandleInformation :: proc(hObject: rawptr, dwMask: u32, dwFlags: u32) -> b32 ---
+	WriteFile            :: proc(hFile: rawptr, lpBuffer: rawptr, nNumberOfBytesToWrite: u32, lpNumberOfBytesWritten: ^u32, lpOverlapped: rawptr) -> b32 ---
+	GetLastError         :: proc() -> u32 ---
+}
+
+_stderr_write :: proc "contextless" (data: []byte) -> (n: int, err: _OS_Errno) #no_bounds_check {
+	if len(data) == 0 {
+		return 0, 0
+	}
+
+	STD_ERROR_HANDLE :: ~u32(0) -12 + 1
+	HANDLE_FLAG_INHERIT :: 0x00000001
+	MAX_RW :: 1<<30
+
+	h := GetStdHandle(STD_ERROR_HANDLE)
+	when size_of(uintptr) == 8 {
+		SetHandleInformation(h, HANDLE_FLAG_INHERIT, 0)
+	}
+
+	single_write_length: u32
+	total_write: i64
+	length := i64(len(data))
+
+	for total_write < length {
+		remaining := length - total_write
+		to_write := u32(min(i32(remaining), MAX_RW))
+
+		e := WriteFile(h, &data[total_write], to_write, &single_write_length, nil)
+		if single_write_length <= 0 || !e {
+			err = _OS_Errno(GetLastError())
+			n = int(total_write)
+			return
+		}
+		total_write += i64(single_write_length)
+	}
+	n = int(total_write)
+	return
+}

+ 20 - 6
core/runtime/print.odin → base/runtime/print.odin

@@ -123,13 +123,13 @@ encode_rune :: proc "contextless" (c: rune) -> ([4]u8, int) {
 }
 }
 
 
 print_string :: proc "contextless" (str: string) -> (n: int) {
 print_string :: proc "contextless" (str: string) -> (n: int) {
-	n, _ = os_write(transmute([]byte)str)
+	n, _ = stderr_write(transmute([]byte)str)
 	return
 	return
 }
 }
 
 
 print_strings :: proc "contextless" (args: ..string) -> (n: int) {
 print_strings :: proc "contextless" (args: ..string) -> (n: int) {
 	for str in args {
 	for str in args {
-		m, err := os_write(transmute([]byte)str)
+		m, err := stderr_write(transmute([]byte)str)
 		n += m
 		n += m
 		if err != 0 {
 		if err != 0 {
 			break
 			break
@@ -139,7 +139,7 @@ print_strings :: proc "contextless" (args: ..string) -> (n: int) {
 }
 }
 
 
 print_byte :: proc "contextless" (b: byte) -> (n: int) {
 print_byte :: proc "contextless" (b: byte) -> (n: int) {
-	n, _ = os_write([]byte{b})
+	n, _ = stderr_write([]byte{b})
 	return
 	return
 }
 }
 
 
@@ -178,7 +178,7 @@ print_rune :: proc "contextless" (r: rune) -> int #no_bounds_check {
 	}
 	}
 
 
 	b, n := encode_rune(r)
 	b, n := encode_rune(r)
-	m, _ := os_write(b[:n])
+	m, _ := stderr_write(b[:n])
 	return m
 	return m
 }
 }
 
 
@@ -194,7 +194,7 @@ print_u64 :: proc "contextless" (x: u64) #no_bounds_check {
 	}
 	}
 	i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
 	i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
 
 
-	os_write(a[i:])
+	stderr_write(a[i:])
 }
 }
 
 
 
 
@@ -216,7 +216,7 @@ print_i64 :: proc "contextless" (x: i64) #no_bounds_check {
 		i -= 1; a[i] = '-'
 		i -= 1; a[i] = '-'
 	}
 	}
 
 
-	os_write(a[i:])
+	stderr_write(a[i:])
 }
 }
 
 
 print_uint    :: proc "contextless" (x: uint)    { print_u64(u64(x)) }
 print_uint    :: proc "contextless" (x: uint)    { print_u64(u64(x)) }
@@ -459,6 +459,20 @@ print_type :: proc "contextless" (ti: ^Type_Info) {
 		}
 		}
 		print_byte(']')
 		print_byte(']')
 
 
+	case Type_Info_Bit_Field:
+		print_string("bit_field ")
+		print_type(info.backing_type)
+		print_string(" {")
+		for name, i in info.names {
+			if i > 0 { print_string(", ") }
+			print_string(name)
+			print_string(": ")
+			print_type(info.types[i])
+			print_string(" | ")
+			print_u64(u64(info.bit_sizes[i]))
+		}
+		print_byte('}')
+
 
 
 	case Type_Info_Simd_Vector:
 	case Type_Info_Simd_Vector:
 		print_string("#simd[")
 		print_string("#simd[")

+ 0 - 0
core/runtime/procs.odin → base/runtime/procs.odin


+ 1 - 1
core/runtime/procs_darwin.odin → base/runtime/procs_darwin.odin

@@ -3,7 +3,7 @@ package runtime
 
 
 foreign import "system:Foundation.framework"
 foreign import "system:Foundation.framework"
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 objc_id :: ^intrinsics.objc_object
 objc_id :: ^intrinsics.objc_object
 objc_Class :: ^intrinsics.objc_class
 objc_Class :: ^intrinsics.objc_class

+ 0 - 0
core/runtime/procs_js.odin → base/runtime/procs_js.odin


+ 22 - 8
core/runtime/procs_wasm.odin → base/runtime/procs_wasm.odin

@@ -7,19 +7,25 @@ ti_int :: struct #raw_union {
 	all: i128,
 	all: i128,
 }
 }
 
 
+@(private="file")
+ti_uint :: struct #raw_union {
+	using s: struct { lo, hi: u64 },
+	all: u128,
+}
+
 @(link_name="__ashlti3", linkage="strong")
 @(link_name="__ashlti3", linkage="strong")
-__ashlti3 :: proc "contextless" (a: i128, b_: u32) -> i128 {
+__ashlti3 :: proc "contextless" (la, ha: u64, b_: u32) -> i128 {
 	bits_in_dword :: size_of(u32)*8
 	bits_in_dword :: size_of(u32)*8
 	b := u32(b_)
 	b := u32(b_)
 	
 	
 	input, result: ti_int
 	input, result: ti_int
-	input.all = a
+	input.lo, input.hi = la, ha
 	if b & bits_in_dword != 0 {
 	if b & bits_in_dword != 0 {
 		result.lo = 0
 		result.lo = 0
 		result.hi = input.lo << (b-bits_in_dword)
 		result.hi = input.lo << (b-bits_in_dword)
 	} else {
 	} else {
 		if b == 0 {
 		if b == 0 {
-			return a
+			return input.all
 		}
 		}
 		result.lo = input.lo<<b
 		result.lo = input.lo<<b
 		result.hi = (input.hi<<b) | (input.lo>>(bits_in_dword-b))
 		result.hi = (input.hi<<b) | (input.lo>>(bits_in_dword-b))
@@ -29,12 +35,20 @@ __ashlti3 :: proc "contextless" (a: i128, b_: u32) -> i128 {
 
 
 
 
 @(link_name="__multi3", linkage="strong")
 @(link_name="__multi3", linkage="strong")
-__multi3 :: proc "contextless" (a, b: i128) -> i128 {
+__multi3 :: proc "contextless" (la, ha, lb, hb: u64) -> i128 {
 	x, y, r: ti_int
 	x, y, r: ti_int
-	
-	x.all = a
-	y.all = b
+
+	x.lo, x.hi = la, ha
+	y.lo, y.hi = lb, hb
 	r.all = i128(x.lo * y.lo) // TODO this is incorrect
 	r.all = i128(x.lo * y.lo) // TODO this is incorrect
 	r.hi += x.hi*y.lo + x.lo*y.hi
 	r.hi += x.hi*y.lo + x.lo*y.hi
 	return r.all
 	return r.all
-}
+}
+
+@(link_name="__udivti3", linkage="strong")
+udivti3 :: proc "c" (la, ha, lb, hb: u64) -> u128 {
+	a, b: ti_uint
+	a.lo, a.hi = la, ha
+	b.lo, b.hi = lb, hb
+	return udivmodti4(a.all, b.all, nil)
+}

+ 0 - 0
core/runtime/procs_windows_amd64.asm → base/runtime/procs_windows_amd64.asm


+ 0 - 0
core/runtime/procs_windows_amd64.odin → base/runtime/procs_windows_amd64.odin


+ 0 - 0
core/runtime/procs_windows_i386.odin → base/runtime/procs_windows_i386.odin


+ 1 - 1
core/runtime/udivmod128.odin → base/runtime/udivmod128.odin

@@ -1,6 +1,6 @@
 package runtime
 package runtime
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 	_ctz :: intrinsics.count_trailing_zeros
 	_ctz :: intrinsics.count_trailing_zeros

+ 8 - 4
build_odin.sh

@@ -56,15 +56,14 @@ fi
 
 
 case "$OS_NAME" in
 case "$OS_NAME" in
 Darwin)
 Darwin)
-	if [ "$OS_ARCH" == "arm64" ]; then
+	if [ "$OS_ARCH" = "arm64" ]; then
 		if [ $LLVM_VERSION_MAJOR -lt 13 ] || [ $LLVM_VERSION_MAJOR -gt 17 ]; then
 		if [ $LLVM_VERSION_MAJOR -lt 13 ] || [ $LLVM_VERSION_MAJOR -gt 17 ]; then
 			error "Darwin Arm64 requires LLVM 13, 14 or 17"
 			error "Darwin Arm64 requires LLVM 13, 14 or 17"
 		fi
 		fi
 	fi
 	fi
 
 
 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
-	LDFLAGS="$LDFLAGS -liconv -ldl -framework System"
-	LDFLAGS="$LDFLAGS -lLLVM-C"
+	LDFLAGS="$LDFLAGS -liconv -ldl -framework System -lLLVM"
 	;;
 	;;
 FreeBSD)
 FreeBSD)
 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
 	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
@@ -83,6 +82,11 @@ OpenBSD)
 	LDFLAGS="$LDFLAGS -liconv"
 	LDFLAGS="$LDFLAGS -liconv"
 	LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
 	LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
 	;;
 	;;
+Haiku)
+	CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags) -I/system/develop/headers/private/shared -I/system/develop/headers/private/kernel"
+	LDFLAGS="$LDFLAGS -liconv"
+	LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
+	;;
 *)
 *)
 	error "Platform \"$OS_NAME\" unsupported"
 	error "Platform \"$OS_NAME\" unsupported"
 	;;
 	;;
@@ -97,7 +101,7 @@ build_odin() {
 		EXTRAFLAGS="-O3"
 		EXTRAFLAGS="-O3"
 		;;
 		;;
 	release-native)
 	release-native)
-		if [ "$OS_ARCH" == "arm64" ]; then
+		if [ "$OS_ARCH" = "arm64" ]; then
 			# Use preferred flag for Arm (ie arm64 / aarch64 / etc)
 			# Use preferred flag for Arm (ie arm64 / aarch64 / etc)
 			EXTRAFLAGS="-O3 -mcpu=native"
 			EXTRAFLAGS="-O3 -mcpu=native"
 		else
 		else

+ 14 - 2
ci/upload_create_nightly.sh

@@ -1,5 +1,7 @@
 #!/bin/bash
 #!/bin/bash
 
 
+set -e
+
 bucket=$1
 bucket=$1
 platform=$2
 platform=$2
 artifact=$3
 artifact=$3
@@ -9,5 +11,15 @@ filename="odin-$platform-nightly+$now.zip"
 
 
 echo "Creating archive $filename from $artifact and uploading to $bucket"
 echo "Creating archive $filename from $artifact and uploading to $bucket"
 
 
-7z a -bd "output/$filename" -r "$artifact"
-b2 upload-file --noProgress "$bucket" "output/$filename" "nightly/$filename"
+# If this is already zipped up (done before artifact upload to keep permissions in tact), just move it.
+if [ "${artifact: -4}" == ".zip" ]
+then
+	echo "Artifact already a zip"
+	mkdir -p "output"
+	mv "$artifact" "output/$filename"
+else
+	echo "Artifact needs to be zipped"
+	7z a -bd "output/$filename" -r "$artifact"
+fi
+
+b2 upload-file --noProgress "$bucket" "output/$filename" "nightly/$filename"

+ 1 - 1
core/bufio/scanner.odin

@@ -4,7 +4,7 @@ import "core:bytes"
 import "core:io"
 import "core:io"
 import "core:mem"
 import "core:mem"
 import "core:unicode/utf8"
 import "core:unicode/utf8"
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 // Extra errors returns by scanning procedures
 // Extra errors returns by scanning procedures
 Scanner_Extra_Error :: enum i32 {
 Scanner_Extra_Error :: enum i32 {

+ 0 - 1
core/bufio/writer.odin

@@ -226,7 +226,6 @@ writer_to_writer :: proc(b: ^Writer) -> (s: io.Writer) {
 
 
 
 
 
 
-@(private)
 _writer_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offset: i64, whence: io.Seek_From) -> (n: i64, err: io.Error) {
 _writer_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offset: i64, whence: io.Seek_From) -> (n: i64, err: io.Error) {
 	b := (^Writer)(stream_data)
 	b := (^Writer)(stream_data)
 	#partial switch mode {
 	#partial switch mode {

+ 11 - 1
core/c/c.odin

@@ -1,6 +1,6 @@
 package c
 package c
 
 
-import builtin "core:builtin"
+import builtin "base:builtin"
 
 
 char           :: builtin.u8  // assuming -funsigned-char
 char           :: builtin.u8  // assuming -funsigned-char
 
 
@@ -104,3 +104,13 @@ NULL           :: rawptr(uintptr(0))
 NDEBUG         :: !ODIN_DEBUG
 NDEBUG         :: !ODIN_DEBUG
 
 
 CHAR_BIT :: 8
 CHAR_BIT :: 8
+
+// Since there are no types in C with an alignment larger than that of
+// max_align_t, which cannot be larger than sizeof(long double) as any other
+// exposed type wouldn't be valid C, the maximum alignment possible in a
+// strictly conformant C implementation is 16 on the platforms we care about.
+// The choice of 4096 bytes for storage of this type is more than enough on all
+// relevant platforms.
+va_list :: struct #align(16) {
+	_: [4096]u8,
+}

+ 1 - 1
core/c/libc/complex.odin

@@ -67,7 +67,7 @@ foreign libc {
 	crealf  :: proc(z: complex_float) -> float ---
 	crealf  :: proc(z: complex_float) -> float ---
 }
 }
 
 
-import builtin "core:builtin"
+import builtin "base:builtin"
 
 
 complex_float  :: distinct builtin.complex64
 complex_float  :: distinct builtin.complex64
 complex_double :: distinct builtin.complex128
 complex_double :: distinct builtin.complex128

+ 18 - 0
core/c/libc/errno.odin

@@ -80,6 +80,24 @@ when ODIN_OS == .Darwin {
 	ERANGE :: 34
 	ERANGE :: 34
 }
 }
 
 
+when ODIN_OS == .Haiku {
+	@(private="file")
+	@(default_calling_convention="c")
+	foreign libc {
+		@(link_name="_errnop")
+		_get_errno :: proc() -> ^int ---
+	}
+
+	@(private="file")
+	B_GENERAL_ERROR_BASE :: min(i32)
+	@(private="file")
+	B_POSIX_ERROR_BASE   :: B_GENERAL_ERROR_BASE + 0x7000
+
+	EDOM   :: B_POSIX_ERROR_BASE + 16
+	EILSEQ :: B_POSIX_ERROR_BASE + 38
+	ERANGE :: B_POSIX_ERROR_BASE + 17
+}
+
 // Odin has no way to make an identifier "errno" behave as a function call to
 // Odin has no way to make an identifier "errno" behave as a function call to
 // read the value, or to produce an lvalue such that you can assign a different
 // read the value, or to produce an lvalue such that you can assign a different
 // error value to errno. To work around this, just expose it as a function like
 // error value to errno. To work around this, just expose it as a function like

+ 1 - 1
core/c/libc/math.odin

@@ -2,7 +2,7 @@ package libc
 
 
 // 7.12 Mathematics
 // 7.12 Mathematics
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 when ODIN_OS == .Windows {
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 	foreign import libc "system:libucrt.lib"

+ 4 - 10
core/c/libc/stdarg.odin

@@ -2,7 +2,9 @@ package libc
 
 
 // 7.16 Variable arguments
 // 7.16 Variable arguments
 
 
-import "core:intrinsics"
+import "base:intrinsics"
+
+import "core:c"
 
 
 @(private="file")
 @(private="file")
 @(default_calling_convention="none")
 @(default_calling_convention="none")
@@ -12,15 +14,7 @@ foreign _ {
 	@(link_name="llvm.va_copy")  _va_copy  :: proc(dst, src: ^i8) ---
 	@(link_name="llvm.va_copy")  _va_copy  :: proc(dst, src: ^i8) ---
 }
 }
 
 
-// Since there are no types in C with an alignment larger than that of
-// max_align_t, which cannot be larger than sizeof(long double) as any other
-// exposed type wouldn't be valid C, the maximum alignment possible in a
-// strictly conformant C implementation is 16 on the platforms we care about.
-// The choice of 4096 bytes for storage of this type is more than enough on all
-// relevant platforms.
-va_list :: struct #align(16) {
-	_: [4096]u8,
-}
+va_list :: c.va_list
 
 
 va_start :: #force_inline proc(ap: ^va_list, _: any) {
 va_start :: #force_inline proc(ap: ^va_list, _: any) {
 	_va_start(cast(^i8)ap)
 	_va_start(cast(^i8)ap)

+ 1 - 1
core/c/libc/stdatomic.odin

@@ -2,7 +2,7 @@ package libc
 
 
 // 7.17 Atomics
 // 7.17 Atomics
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 ATOMIC_BOOL_LOCK_FREE     :: true
 ATOMIC_BOOL_LOCK_FREE     :: true
 ATOMIC_CHAR_LOCK_FREE     :: true
 ATOMIC_CHAR_LOCK_FREE     :: true

+ 30 - 0
core/c/libc/stdio.odin

@@ -163,6 +163,36 @@ when ODIN_OS == .Darwin {
 	}
 	}
 }
 }
 
 
+when ODIN_OS == .Haiku {
+	fpos_t :: distinct i64
+	
+	_IOFBF        :: 0
+	_IOLBF        :: 1
+	_IONBF        :: 2
+
+	BUFSIZ        :: 8192
+
+	EOF           :: int(-1)
+
+	FOPEN_MAX     :: 128
+
+	FILENAME_MAX  :: 256
+
+	L_tmpnam      :: 512
+
+	SEEK_SET      :: 0
+	SEEK_CUR      :: 1
+	SEEK_END      :: 2
+
+	TMP_MAX       :: 32768
+
+	foreign libc {
+		stderr: ^FILE
+		stdin:  ^FILE
+		stdout: ^FILE
+	}
+}
+
 @(default_calling_convention="c")
 @(default_calling_convention="c")
 foreign libc {
 foreign libc {
 	// 7.21.4 Operations on files
 	// 7.21.4 Operations on files

+ 1 - 1
core/c/libc/string.odin

@@ -1,6 +1,6 @@
 package libc
 package libc
 
 
-import "core:runtime"
+import "base:runtime"
 
 
 // 7.24 String handling
 // 7.24 String handling
 
 

+ 1 - 1
core/c/libc/time.odin

@@ -45,7 +45,7 @@ when ODIN_OS == .Windows {
 	}
 	}
 }
 }
 
 
-when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD {
+when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku {
 	@(default_calling_convention="c")
 	@(default_calling_convention="c")
 	foreign libc {
 	foreign libc {
 		// 7.27.2 Time manipulation functions
 		// 7.27.2 Time manipulation functions

+ 5 - 1
core/c/libc/wctype.odin

@@ -29,7 +29,11 @@ when ODIN_OS == .Windows {
 } else when ODIN_OS == .FreeBSD {
 } else when ODIN_OS == .FreeBSD {
 	wctrans_t :: distinct int
 	wctrans_t :: distinct int
 	wctype_t  :: distinct ulong
 	wctype_t  :: distinct ulong
-	
+
+} else when ODIN_OS == .Haiku {
+	wctrans_t :: distinct i32
+	wctype_t  :: distinct i32
+
 }
 }
 
 
 @(default_calling_convention="c")
 @(default_calling_convention="c")

+ 1 - 1
core/compress/common.odin

@@ -12,7 +12,7 @@ package compress
 
 
 import "core:io"
 import "core:io"
 import "core:bytes"
 import "core:bytes"
-import "core:runtime"
+import "base:runtime"
 
 
 /*
 /*
 	These settings bound how much compression algorithms will allocate for their output buffer.
 	These settings bound how much compression algorithms will allocate for their output buffer.

+ 1 - 1
core/compress/shoco/shoco.odin

@@ -11,7 +11,7 @@
 // package shoco is an implementation of the shoco short string compressor
 // package shoco is an implementation of the shoco short string compressor
 package shoco
 package shoco
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 import "core:compress"
 import "core:compress"
 
 
 Shoco_Pack :: struct {
 Shoco_Pack :: struct {

+ 678 - 0
core/container/avl/avl.odin

@@ -0,0 +1,678 @@
+/*
+package avl implements an AVL tree.
+
+The implementation is non-intrusive, and non-recursive.
+*/
+package container_avl
+
+import "base:intrinsics"
+import "base:runtime"
+import "core:slice"
+
+_ :: intrinsics
+_ :: runtime
+
+// Originally based on the CC0 implementation by Eric Biggers
+// See: https://github.com/ebiggers/avl_tree/
+
+// Direction specifies the traversal direction for a tree iterator.
+Direction :: enum i8 {
+	// Backward is the in-order backwards direction.
+	Backward = -1,
+	// Forward is the in-order forwards direction.
+	Forward  = 1,
+}
+
+// Ordering specifies order when inserting/finding values into the tree.
+Ordering :: slice.Ordering
+
+// Tree is an AVL tree.
+Tree :: struct($Value: typeid) {
+	// user_data is a parameter that will be passed to the on_remove
+	// callback.
+	user_data: rawptr,
+	// on_remove is an optional callback that can be called immediately
+	// after a node is removed from the tree.
+	on_remove: proc(value: Value, user_data: rawptr),
+
+	_root:           ^Node(Value),
+	_node_allocator: runtime.Allocator,
+	_cmp_fn:         proc(a, b: Value) -> Ordering,
+	_size:           int,
+}
+
+// Node is an AVL tree node.
+//
+// WARNING: It is unsafe to mutate value if the node is part of a tree
+// if doing so will alter the Node's sort position relative to other
+// elements in the tree.
+Node :: struct($Value: typeid) {
+	value: Value,
+
+	_parent:  ^Node(Value),
+	_left:    ^Node(Value),
+	_right:   ^Node(Value),
+	_balance: i8,
+}
+
+// Iterator is a tree iterator.
+//
+// WARNING: It is unsafe to modify the tree while iterating, except via
+// the iterator_remove method.
+Iterator :: struct($Value: typeid) {
+	_tree:        ^Tree(Value),
+	_cur:         ^Node(Value),
+	_next:        ^Node(Value),
+	_direction:   Direction,
+	_called_next: bool,
+}
+
+// init initializes a tree.
+init :: proc {
+	init_ordered,
+	init_cmp,
+}
+
+// init_cmp initializes a tree.
+init_cmp :: proc(
+	t: ^$T/Tree($Value),
+	cmp_fn: proc(a, b: Value) -> Ordering,
+	node_allocator := context.allocator,
+) {
+	t._root = nil
+	t._node_allocator = node_allocator
+	t._cmp_fn = cmp_fn
+	t._size = 0
+}
+
+// init_ordered initializes a tree containing ordered items, with
+// a comparison function that results in an ascending order sort.
+init_ordered :: proc(
+	t: ^$T/Tree($Value),
+	node_allocator := context.allocator,
+) where intrinsics.type_is_ordered_numeric(Value) {
+	init_cmp(t, slice.cmp_proc(Value), node_allocator)
+}
+
+// destroy de-initializes a tree.
+destroy :: proc(t: ^$T/Tree($Value), call_on_remove: bool = true) {
+	iter := iterator(t, Direction.Forward)
+	for _ in iterator_next(&iter) {
+		iterator_remove(&iter, call_on_remove)
+	}
+}
+
+// len returns the number of elements in the tree.
+len :: proc "contextless" (t: ^$T/Tree($Value)) -> int {
+	return t._size
+}
+
+// first returns the first node in the tree (in-order) or nil iff
+// the tree is empty.
+first :: proc "contextless" (t: ^$T/Tree($Value)) -> ^Node(Value) {
+	return tree_first_or_last_in_order(t, Direction.Backward)
+}
+
+// last returns the last element in the tree (in-order) or nil iff
+// the tree is empty.
+last :: proc "contextless" (t: ^$T/Tree($Value)) -> ^Node(Value) {
+	return tree_first_or_last_in_order(t, Direction.Forward)
+}
+
+// find finds the value in the tree, and returns the corresponding
+// node or nil iff the value is not present.
+find :: proc(t: ^$T/Tree($Value), value: Value) -> ^Node(Value) {
+	cur := t._root
+	descend_loop: for cur != nil {
+		switch t._cmp_fn(value, cur.value) {
+		case .Less:
+			cur = cur._left
+		case .Greater:
+			cur = cur._right
+		case .Equal:
+			break descend_loop
+		}
+	}
+
+	return cur
+}
+
+// find_or_insert attempts to insert the value into the tree, and returns
+// the node, a boolean indicating if the value was inserted, and the
+// node allocator error if relevant.  If the value is already
+// present, the existing node is returned un-altered.
+find_or_insert :: proc(
+	t: ^$T/Tree($Value),
+	value: Value,
+) -> (
+	n: ^Node(Value),
+	inserted: bool,
+	err: runtime.Allocator_Error,
+) {
+	n_ptr := &t._root
+	for n_ptr^ != nil {
+		n = n_ptr^
+		switch t._cmp_fn(value, n.value) {
+		case .Less:
+			n_ptr = &n._left
+		case .Greater:
+			n_ptr = &n._right
+		case .Equal:
+			return
+		}
+	}
+
+	parent := n
+	n = new(Node(Value), t._node_allocator) or_return
+	n.value = value
+	n._parent = parent
+	n_ptr^ = n
+	tree_rebalance_after_insert(t, n)
+
+	t._size += 1
+	inserted = true
+
+	return
+}
+
+// remove removes a node or value from the tree, and returns true iff the
+// removal was successful.  While the node's value will be left intact,
+// the node itself will be freed via the tree's node allocator.
+remove :: proc {
+	remove_value,
+	remove_node,
+}
+
+// remove_value removes a value from the tree, and returns true iff the
+// removal was successful.  While the node's value will be left intact,
+// the node itself will be freed via the tree's node allocator.
+remove_value :: proc(t: ^$T/Tree($Value), value: Value, call_on_remove: bool = true) -> bool {
+	n := find(t, value)
+	if n == nil {
+		return false
+	}
+	return remove_node(t, n, call_on_remove)
+}
+
+// remove_node removes a node from the tree, and returns true iff the
+// removal was successful.  While the node's value will be left intact,
+// the node itself will be freed via the tree's node allocator.
+remove_node :: proc(t: ^$T/Tree($Value), node: ^Node(Value), call_on_remove: bool = true) -> bool {
+	if node._parent == node || (node._parent == nil && t._root != node) {
+		return false
+	}
+	defer {
+		if call_on_remove && t.on_remove != nil {
+			t.on_remove(node.value, t.user_data)
+		}
+		free(node, t._node_allocator)
+	}
+
+	parent: ^Node(Value)
+	left_deleted: bool
+
+	t._size -= 1
+	if node._left != nil && node._right != nil {
+		parent, left_deleted = tree_swap_with_successor(t, node)
+	} else {
+		child := node._left
+		if child == nil {
+			child = node._right
+		}
+		parent = node._parent
+		if parent != nil {
+			if node == parent._left {
+				parent._left = child
+				left_deleted = true
+			} else {
+				parent._right = child
+				left_deleted = false
+			}
+			if child != nil {
+				child._parent = parent
+			}
+		} else {
+			if child != nil {
+				child._parent = parent
+			}
+			t._root = child
+			node_reset(node)
+			return true
+		}
+	}
+
+	for {
+		if left_deleted {
+			parent = tree_handle_subtree_shrink(t, parent, +1, &left_deleted)
+		} else {
+			parent = tree_handle_subtree_shrink(t, parent, -1, &left_deleted)
+		}
+		if parent == nil {
+			break
+		}
+	}
+	node_reset(node)
+
+	return true
+}
+
+// iterator returns a tree iterator in the specified direction.
+iterator :: proc "contextless" (t: ^$T/Tree($Value), direction: Direction) -> Iterator(Value) {
+	it: Iterator(Value)
+	it._tree = transmute(^Tree(Value))t
+	it._direction = direction
+
+	iterator_first(&it)
+
+	return it
+}
+
+// iterator_from_pos returns a tree iterator in the specified direction,
+// spanning the range [pos, last] (inclusive).
+iterator_from_pos :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	pos: ^Node(Value),
+	direction: Direction,
+) -> Iterator(Value) {
+	it: Iterator(Value)
+	it._tree = transmute(^Tree(Value))t
+	it._direction = direction
+	it._next = nil
+	it._called_next = false
+
+	if it._cur = pos; pos != nil {
+		it._next = node_next_or_prev_in_order(it._cur, it._direction)
+	}
+
+	return it
+}
+
+// iterator_get returns the node currently pointed to by the iterator,
+// or nil iff the node has been removed, the tree is empty, or the end
+// of the tree has been reached.
+iterator_get :: proc "contextless" (it: ^$I/Iterator($Value)) -> ^Node(Value) {
+	return it._cur
+}
+
+// iterator_remove removes the node currently pointed to by the iterator,
+// and returns true iff the removal was successful.  Semantics are the
+// same as the Tree remove.
+iterator_remove :: proc(it: ^$I/Iterator($Value), call_on_remove: bool = true) -> bool {
+	if it._cur == nil {
+		return false
+	}
+
+	ok := remove_node(it._tree, it._cur, call_on_remove)
+	if ok {
+		it._cur = nil
+	}
+
+	return ok
+}
+
+// iterator_next advances the iterator and returns the (node, true) or
+// or (nil, false) iff the end of the tree has been reached.
+//
+// Note: The first call to iterator_next will return the first node instead
+// of advancing the iterator.
+iterator_next :: proc "contextless" (it: ^$I/Iterator($Value)) -> (^Node(Value), bool) {
+	// This check is needed so that the first element gets returned from
+	// a brand-new iterator, and so that the somewhat contrived case where
+	// iterator_remove is called before the first call to iterator_next
+	// returns the correct value.
+	if !it._called_next {
+		it._called_next = true
+
+		// There can be the contrived case where iterator_remove is
+		// called before ever calling iterator_next, which needs to be
+		// handled as an actual call to next.
+		//
+		// If this happens it._cur will be nil, so only return the
+		// first value, if it._cur is valid.
+		if it._cur != nil {
+			return it._cur, true
+		}
+	}
+
+	if it._next == nil {
+		return nil, false
+	}
+
+	it._cur = it._next
+	it._next = node_next_or_prev_in_order(it._cur, it._direction)
+
+	return it._cur, true
+}
+
+@(private)
+tree_first_or_last_in_order :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	direction: Direction,
+) -> ^Node(Value) {
+	first, sign := t._root, i8(direction)
+	if first != nil {
+		for {
+			tmp := node_get_child(first, +sign)
+			if tmp == nil {
+				break
+			}
+			first = tmp
+		}
+	}
+
+	return first
+}
+
+@(private)
+tree_replace_child :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	parent, old_child, new_child: ^Node(Value),
+) {
+	if parent != nil {
+		if old_child == parent._left {
+			parent._left = new_child
+		} else {
+			parent._right = new_child
+		}
+	} else {
+		t._root = new_child
+	}
+}
+
+@(private)
+tree_rotate :: proc "contextless" (t: ^$T/Tree($Value), a: ^Node(Value), sign: i8) {
+	b := node_get_child(a, -sign)
+	e := node_get_child(b, +sign)
+	p := a._parent
+
+	node_set_child(a, -sign, e)
+	a._parent = b
+
+	node_set_child(b, +sign, a)
+	b._parent = p
+
+	if e != nil {
+		e._parent = a
+	}
+
+	tree_replace_child(t, p, a, b)
+}
+
+@(private)
+tree_double_rotate :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	b, a: ^Node(Value),
+	sign: i8,
+) -> ^Node(Value) {
+	e := node_get_child(b, +sign)
+	f := node_get_child(e, -sign)
+	g := node_get_child(e, +sign)
+	p := a._parent
+	e_bal := e._balance
+
+	node_set_child(a, -sign, g)
+	a_bal := -e_bal
+	if sign * e_bal >= 0 {
+		a_bal = 0
+	}
+	node_set_parent_balance(a, e, a_bal)
+
+	node_set_child(b, +sign, f)
+	b_bal := -e_bal
+	if sign * e_bal <= 0 {
+		b_bal = 0
+	}
+	node_set_parent_balance(b, e, b_bal)
+
+	node_set_child(e, +sign, a)
+	node_set_child(e, -sign, b)
+	node_set_parent_balance(e, p, 0)
+
+	if g != nil {
+		g._parent = a
+	}
+
+	if f != nil {
+		f._parent = b
+	}
+
+	tree_replace_child(t, p, a, e)
+
+	return e
+}
+
+@(private)
+tree_handle_subtree_growth :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	node, parent: ^Node(Value),
+	sign: i8,
+) -> bool {
+	old_balance_factor := parent._balance
+	if old_balance_factor == 0 {
+		node_adjust_balance_factor(parent, sign)
+		return false
+	}
+
+	new_balance_factor := old_balance_factor + sign
+	if new_balance_factor == 0 {
+		node_adjust_balance_factor(parent, sign)
+		return true
+	}
+
+	if sign * node._balance > 0 {
+		tree_rotate(t, parent, -sign)
+		node_adjust_balance_factor(parent, -sign)
+		node_adjust_balance_factor(node, -sign)
+	} else {
+		tree_double_rotate(t, node, parent, -sign)
+	}
+
+	return true
+}
+
+@(private)
+tree_rebalance_after_insert :: proc "contextless" (t: ^$T/Tree($Value), inserted: ^Node(Value)) {
+	node, parent := inserted, inserted._parent
+	switch {
+	case parent == nil:
+		return
+	case node == parent._left:
+		node_adjust_balance_factor(parent, -1)
+	case:
+		node_adjust_balance_factor(parent, +1)
+	}
+
+	if parent._balance == 0 {
+		return
+	}
+
+	for done := false; !done; {
+		node = parent
+		if parent = node._parent; parent == nil {
+			return
+		}
+
+		if node == parent._left {
+			done = tree_handle_subtree_growth(t, node, parent, -1)
+		} else {
+			done = tree_handle_subtree_growth(t, node, parent, +1)
+		}
+	}
+}
+
+@(private)
+tree_swap_with_successor :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	x: ^Node(Value),
+) -> (
+	^Node(Value),
+	bool,
+) {
+	ret: ^Node(Value)
+	left_deleted: bool
+
+	y := x._right
+	if y._left == nil {
+		ret = y
+	} else {
+		q: ^Node(Value)
+
+		for {
+			q = y
+			if y = y._left; y._left == nil {
+				break
+			}
+		}
+
+		if q._left = y._right; q._left != nil {
+			q._left._parent = q
+		}
+		y._right = x._right
+		x._right._parent = y
+		ret = q
+		left_deleted = true
+	}
+
+	y._left = x._left
+	x._left._parent = y
+
+	y._parent = x._parent
+	y._balance = x._balance
+
+	tree_replace_child(t, x._parent, x, y)
+
+	return ret, left_deleted
+}
+
+@(private)
+tree_handle_subtree_shrink :: proc "contextless" (
+	t: ^$T/Tree($Value),
+	parent: ^Node(Value),
+	sign: i8,
+	left_deleted: ^bool,
+) -> ^Node(Value) {
+	old_balance_factor := parent._balance
+	if old_balance_factor == 0 {
+		node_adjust_balance_factor(parent, sign)
+		return nil
+	}
+
+	node: ^Node(Value)
+	new_balance_factor := old_balance_factor + sign
+	if new_balance_factor == 0 {
+		node_adjust_balance_factor(parent, sign)
+		node = parent
+	} else {
+		node = node_get_child(parent, sign)
+		if sign * node._balance >= 0 {
+			tree_rotate(t, parent, -sign)
+			if node._balance == 0 {
+				node_adjust_balance_factor(node, -sign)
+				return nil
+			}
+			node_adjust_balance_factor(parent, -sign)
+			node_adjust_balance_factor(node, -sign)
+		} else {
+			node = tree_double_rotate(t, node, parent, -sign)
+		}
+	}
+
+	parent := parent
+	if parent = node._parent; parent != nil {
+		left_deleted^ = node == parent._left
+	}
+	return parent
+}
+
+@(private)
+node_reset :: proc "contextless" (n: ^Node($Value)) {
+	// Mostly pointless as n will be deleted after this is called, but
+	// attempt to be able to catch cases of n not being in the tree.
+	n._parent = n
+	n._left = nil
+	n._right = nil
+	n._balance = 0
+}
+
+@(private)
+node_set_parent_balance :: #force_inline proc "contextless" (
+	n, parent: ^Node($Value),
+	balance: i8,
+) {
+	n._parent = parent
+	n._balance = balance
+}
+
+@(private)
+node_get_child :: #force_inline proc "contextless" (n: ^Node($Value), sign: i8) -> ^Node(Value) {
+	if sign < 0 {
+		return n._left
+	}
+	return n._right
+}
+
+@(private)
+node_next_or_prev_in_order :: proc "contextless" (
+	n: ^Node($Value),
+	direction: Direction,
+) -> ^Node(Value) {
+	next, tmp: ^Node(Value)
+	sign := i8(direction)
+
+	if next = node_get_child(n, +sign); next != nil {
+		for {
+			tmp = node_get_child(next, -sign)
+			if tmp == nil {
+				break
+			}
+			next = tmp
+		}
+	} else {
+		tmp, next = n, n._parent
+		for next != nil && tmp == node_get_child(next, +sign) {
+			tmp, next = next, next._parent
+		}
+	}
+	return next
+}
+
+@(private)
+node_set_child :: #force_inline proc "contextless" (
+	n: ^Node($Value),
+	sign: i8,
+	child: ^Node(Value),
+) {
+	if sign < 0 {
+		n._left = child
+	} else {
+		n._right = child
+	}
+}
+
+@(private)
+node_adjust_balance_factor :: #force_inline proc "contextless" (n: ^Node($Value), amount: i8) {
+	n._balance += amount
+}
+
+@(private)
+iterator_first :: proc "contextless" (it: ^Iterator($Value)) {
+	// This is private because behavior when the user manually calls
+	// iterator_first followed by iterator_next is unintuitive, since
+	// the first call to iterator_next MUST return the first node
+	// instead of advancing so that `for node in iterator_next(&next)`
+	// works as expected.
+
+	switch it._direction {
+	case .Forward:
+		it._cur = tree_first_or_last_in_order(it._tree, .Backward)
+	case .Backward:
+		it._cur = tree_first_or_last_in_order(it._tree, .Forward)
+	}
+
+	it._next = nil
+	it._called_next = false
+
+	if it._cur != nil {
+		it._next = node_next_or_prev_in_order(it._cur, it._direction)
+	}
+}

+ 1 - 1
core/container/bit_array/bit_array.odin

@@ -1,6 +1,6 @@
 package dynamic_bit_array
 package dynamic_bit_array
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 import "core:mem"
 import "core:mem"
 
 
 /*
 /*

+ 1 - 1
core/container/intrusive/list/intrusive_list.odin

@@ -1,6 +1,6 @@
 package container_intrusive_list
 package container_intrusive_list
 
 
-import "core:intrinsics"
+import "base:intrinsics"
 
 
 // An intrusive doubly-linked list
 // An intrusive doubly-linked list
 //
 //

+ 2 - 2
core/container/lru/lru_cache.odin

@@ -1,7 +1,7 @@
 package container_lru
 package container_lru
 
 
-import "core:runtime"
-import "core:intrinsics"
+import "base:runtime"
+import "base:intrinsics"
 _ :: runtime
 _ :: runtime
 _ :: intrinsics
 _ :: intrinsics
 
 

+ 1 - 1
core/container/priority_queue/priority_queue.odin

@@ -1,6 +1,6 @@
 package container_priority_queue
 package container_priority_queue
 
 
-import "core:builtin"
+import "base:builtin"
 
 
 Priority_Queue :: struct($T: typeid) {
 Priority_Queue :: struct($T: typeid) {
 	queue: [dynamic]T,
 	queue: [dynamic]T,

+ 2 - 2
core/container/queue/queue.odin

@@ -1,7 +1,7 @@
 package container_queue
 package container_queue
 
 
-import "core:builtin"
-import "core:runtime"
+import "base:builtin"
+import "base:runtime"
 _ :: runtime
 _ :: runtime
 
 
 // Dynamically resizable double-ended queue/ring-buffer
 // Dynamically resizable double-ended queue/ring-buffer

+ 2 - 2
core/container/small_array/small_array.odin

@@ -1,7 +1,7 @@
 package container_small_array
 package container_small_array
 
 
-import "core:builtin"
-import "core:runtime"
+import "base:builtin"
+import "base:runtime"
 _ :: runtime
 _ :: runtime
 
 
 Small_Array :: struct($N: int, $T: typeid) where N >= 0 {
 Small_Array :: struct($N: int, $T: typeid) where N >= 0 {

+ 2 - 2
core/container/topological_sort/topological_sort.odin

@@ -3,8 +3,8 @@
 // map type is being used to accelerate lookups.
 // map type is being used to accelerate lookups.
 package container_topological_sort
 package container_topological_sort
 
 
-import "core:intrinsics"
-import "core:runtime"
+import "base:intrinsics"
+import "base:runtime"
 _ :: intrinsics
 _ :: intrinsics
 _ :: runtime
 _ :: runtime
 
 

+ 16 - 70
core/crypto/README.md

@@ -1,84 +1,30 @@
 # crypto
 # crypto
 
 
-A cryptography library for the Odin language
+A cryptography library for the Odin language.
 
 
 ## Supported
 ## Supported
 
 
-This library offers various algorithms implemented in Odin.
-Please see the chart below for some of the options.
-
-## Hashing algorithms
-
-| Algorithm                                                                                                    |                  |
-|:-------------------------------------------------------------------------------------------------------------|:-----------------|
-| [BLAKE2B](https://datatracker.ietf.org/doc/html/rfc7693)                                                     | &#10004;&#65039; |
-| [BLAKE2S](https://datatracker.ietf.org/doc/html/rfc7693)                                                     | &#10004;&#65039; |
-| [SHA-2](https://csrc.nist.gov/csrc/media/publications/fips/180/2/archive/2002-08-01/documents/fips180-2.pdf) | &#10004;&#65039; |
-| [SHA-3](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf)                                            | &#10004;&#65039; |
-| [SHAKE](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf)                                            | &#10004;&#65039; |
-| [SM3](https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02)                                           | &#10004;&#65039; |
-| legacy/[Keccak](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf)                                    | &#10004;&#65039; |
-| legacy/[MD5](https://datatracker.ietf.org/doc/html/rfc1321)                                                  | &#10004;&#65039; |
-| legacy/[SHA-1](https://datatracker.ietf.org/doc/html/rfc3174)                                                | &#10004;&#65039; |
-
-#### High level API
-
-Each hash algorithm contains a procedure group named `hash`, or if the algorithm provides more than one digest size `hash_<size>`\*.
-Included in these groups are six procedures.
-- `hash_string` - Hash a given string and return the computed hash. Just calls `hash_bytes` internally
-- `hash_bytes` - Hash a given byte slice and return the computed hash
-- `hash_string_to_buffer` - Hash a given string and put the computed hash in the second proc parameter. Just calls `hash_bytes_to_buffer` internally
-- `hash_bytes_to_buffer` - Hash a given string and put the computed hash in the second proc parameter. The destination buffer has to be at least as big as the digest size of the hash
-- `hash_stream` - Takes a stream from io.Stream and returns the computed hash from it
-- `hash_file` - Takes a file handle and returns the computed hash from it. A second optional boolean parameter controls if the file is streamed (this is the default) or read at once (set to true)
-
-\* On some algorithms there is another part to the name, since they might offer control about additional parameters.
-For instance, `SHA-2` offers different sizes.
-Computing a 512-bit hash is therefore achieved by calling `sha2.hash_512(...)`.
-
-#### Low level API
-
-The above mentioned procedures internally call three procedures: `init`, `update` and `final`.
-You may also directly call them, if you wish.
-
-#### Example
-
-```odin
-package crypto_example
-
-// Import the desired package
-import "core:crypto/blake2b"
-
-main :: proc() {
-    input := "foo"
-
-    // Compute the hash, using the high level API
-    computed_hash := blake2b.hash(input)
-
-    // Variant that takes a destination buffer, instead of returning the computed hash
-    hash := make([]byte, sha2.DIGEST_SIZE) // @note: Destination buffer has to be at least as big as the digest size of the hash
-    blake2b.hash(input, hash[:])
-
-    // Compute the hash, using the low level API
-    ctx: blake2b.Context
-    computed_hash_low: [blake2b.DIGEST_SIZE]byte
-    blake2b.init(&ctx)
-    blake2b.update(&ctx, transmute([]byte)input)
-    blake2b.final(&ctx, computed_hash_low[:])
-}
-```
-For example uses of all available algorithms, please see the tests within `tests/core/crypto`.
+This package offers various algorithms implemented in Odin, along with
+useful helpers such as access to the system entropy source, and a
+constant-time byte comparison.
 
 
 ## Implementation considerations
 ## Implementation considerations
 
 
 - The crypto packages are not thread-safe.
 - The crypto packages are not thread-safe.
 - Best-effort is make to mitigate timing side-channels on reasonable
 - Best-effort is make to mitigate timing side-channels on reasonable
-  architectures. Architectures that are known to be unreasonable include
+  architectures.  Architectures that are known to be unreasonable include
   but are not limited to i386, i486, and WebAssembly.
   but are not limited to i386, i486, and WebAssembly.
-- Some but not all of the packages attempt to santize sensitive data,
-  however this is not done consistently through the library at the moment.
-  As Thomas Pornin puts it "In general, such memory cleansing is a fool's
-  quest."
+- Implementations assume a 64-bit architecture (64-bit integer arithmetic
+  is fast, and includes add-with-carry, sub-with-borrow, and full-result
+  multiply).
+- Hardware sidechannels are explicitly out of scope for this package.
+  Notable examples include but are not limited to:
+  - Power/RF side-channels etc.
+  - Fault injection attacks etc.
+  - Hardware vulnerabilities ("apply mitigations or buy a new CPU").
+- The packages attempt to santize sensitive data, however this is, and
+  will remain a "best-effort" implementation decision.  As Thomas Pornin
+  puts it "In general, such memory cleansing is a fool's quest."
 - All of these packages have not received independent third party review.
 - All of these packages have not received independent third party review.
 
 
 ## License
 ## License

+ 58 - 39
core/crypto/_blake2/blake2.odin

@@ -11,6 +11,7 @@ package _blake2
 */
 */
 
 
 import "core:encoding/endian"
 import "core:encoding/endian"
+import "core:mem"
 
 
 BLAKE2S_BLOCK_SIZE :: 64
 BLAKE2S_BLOCK_SIZE :: 64
 BLAKE2S_SIZE :: 32
 BLAKE2S_SIZE :: 32
@@ -28,7 +29,6 @@ Blake2s_Context :: struct {
 	is_keyed:     bool,
 	is_keyed:     bool,
 	size:         byte,
 	size:         byte,
 	is_last_node: bool,
 	is_last_node: bool,
-	cfg:          Blake2_Config,
 
 
 	is_initialized: bool,
 	is_initialized: bool,
 }
 }
@@ -44,7 +44,6 @@ Blake2b_Context :: struct {
 	is_keyed:     bool,
 	is_keyed:     bool,
 	size:         byte,
 	size:         byte,
 	is_last_node: bool,
 	is_last_node: bool,
-	cfg:          Blake2_Config,
 
 
 	is_initialized: bool,
 	is_initialized: bool,
 }
 }
@@ -83,62 +82,61 @@ BLAKE2B_IV := [8]u64 {
 	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
 	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
 }
 }
 
 
-init :: proc(ctx: ^$T) {
+init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
 	when T == Blake2s_Context {
 	when T == Blake2s_Context {
-		block_size :: BLAKE2S_BLOCK_SIZE
 		max_size :: BLAKE2S_SIZE
 		max_size :: BLAKE2S_SIZE
 	} else when T == Blake2b_Context {
 	} else when T == Blake2b_Context {
-		block_size :: BLAKE2B_BLOCK_SIZE
 		max_size :: BLAKE2B_SIZE
 		max_size :: BLAKE2B_SIZE
 	}
 	}
 
 
-	if ctx.cfg.size > max_size {
+	if cfg.size > max_size {
 		panic("blake2: requested output size exceeeds algorithm max")
 		panic("blake2: requested output size exceeeds algorithm max")
 	}
 	}
 
 
-	p := make([]byte, block_size)
-	defer delete(p)
+	// To save having to allocate a scratch buffer, use the internal
+	// data buffer (`ctx.x`), as it is exactly the correct size.
+	p := ctx.x[:]
 
 
-	p[0] = ctx.cfg.size
-	p[1] = byte(len(ctx.cfg.key))
+	p[0] = cfg.size
+	p[1] = byte(len(cfg.key))
 
 
-	if ctx.cfg.salt != nil {
+	if cfg.salt != nil {
 		when T == Blake2s_Context {
 		when T == Blake2s_Context {
-			copy(p[16:], ctx.cfg.salt)
+			copy(p[16:], cfg.salt)
 		} else when T == Blake2b_Context {
 		} else when T == Blake2b_Context {
-			copy(p[32:], ctx.cfg.salt)
+			copy(p[32:], cfg.salt)
 		}
 		}
 	}
 	}
-	if ctx.cfg.person != nil {
+	if cfg.person != nil {
 		when T == Blake2s_Context {
 		when T == Blake2s_Context {
-			copy(p[24:], ctx.cfg.person)
+			copy(p[24:], cfg.person)
 		} else when T == Blake2b_Context {
 		} else when T == Blake2b_Context {
-			copy(p[48:], ctx.cfg.person)
+			copy(p[48:], cfg.person)
 		}
 		}
 	}
 	}
 
 
-	if ctx.cfg.tree != nil {
-		p[2] = ctx.cfg.tree.(Blake2_Tree).fanout
-		p[3] = ctx.cfg.tree.(Blake2_Tree).max_depth
-		endian.unchecked_put_u32le(p[4:], ctx.cfg.tree.(Blake2_Tree).leaf_size)
+	if cfg.tree != nil {
+		p[2] = cfg.tree.(Blake2_Tree).fanout
+		p[3] = cfg.tree.(Blake2_Tree).max_depth
+		endian.unchecked_put_u32le(p[4:], cfg.tree.(Blake2_Tree).leaf_size)
 		when T == Blake2s_Context {
 		when T == Blake2s_Context {
-			p[8] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset)
-			p[9] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset >> 8)
-			p[10] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset >> 16)
-			p[11] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset >> 24)
-			p[12] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset >> 32)
-			p[13] = byte(ctx.cfg.tree.(Blake2_Tree).node_offset >> 40)
-			p[14] = ctx.cfg.tree.(Blake2_Tree).node_depth
-			p[15] = ctx.cfg.tree.(Blake2_Tree).inner_hash_size
+			p[8] = byte(cfg.tree.(Blake2_Tree).node_offset)
+			p[9] = byte(cfg.tree.(Blake2_Tree).node_offset >> 8)
+			p[10] = byte(cfg.tree.(Blake2_Tree).node_offset >> 16)
+			p[11] = byte(cfg.tree.(Blake2_Tree).node_offset >> 24)
+			p[12] = byte(cfg.tree.(Blake2_Tree).node_offset >> 32)
+			p[13] = byte(cfg.tree.(Blake2_Tree).node_offset >> 40)
+			p[14] = cfg.tree.(Blake2_Tree).node_depth
+			p[15] = cfg.tree.(Blake2_Tree).inner_hash_size
 		} else when T == Blake2b_Context {
 		} else when T == Blake2b_Context {
-			endian.unchecked_put_u64le(p[8:], ctx.cfg.tree.(Blake2_Tree).node_offset)
-			p[16] = ctx.cfg.tree.(Blake2_Tree).node_depth
-			p[17] = ctx.cfg.tree.(Blake2_Tree).inner_hash_size
+			endian.unchecked_put_u64le(p[8:], cfg.tree.(Blake2_Tree).node_offset)
+			p[16] = cfg.tree.(Blake2_Tree).node_depth
+			p[17] = cfg.tree.(Blake2_Tree).inner_hash_size
 		}
 		}
 	} else {
 	} else {
 		p[2], p[3] = 1, 1
 		p[2], p[3] = 1, 1
 	}
 	}
-	ctx.size = ctx.cfg.size
+	ctx.size = cfg.size
 	for i := 0; i < 8; i += 1 {
 	for i := 0; i < 8; i += 1 {
 		when T == Blake2s_Context {
 		when T == Blake2s_Context {
 			ctx.h[i] = BLAKE2S_IV[i] ~ endian.unchecked_get_u32le(p[i * 4:])
 			ctx.h[i] = BLAKE2S_IV[i] ~ endian.unchecked_get_u32le(p[i * 4:])
@@ -147,11 +145,14 @@ init :: proc(ctx: ^$T) {
 			ctx.h[i] = BLAKE2B_IV[i] ~ endian.unchecked_get_u64le(p[i * 8:])
 			ctx.h[i] = BLAKE2B_IV[i] ~ endian.unchecked_get_u64le(p[i * 8:])
 		}
 		}
 	}
 	}
-	if ctx.cfg.tree != nil && ctx.cfg.tree.(Blake2_Tree).is_last_node {
+
+	mem.zero(&ctx.x, size_of(ctx.x)) // Done with the scratch space, no barrier.
+
+	if cfg.tree != nil && cfg.tree.(Blake2_Tree).is_last_node {
 		ctx.is_last_node = true
 		ctx.is_last_node = true
 	}
 	}
-	if len(ctx.cfg.key) > 0 {
-		copy(ctx.padded_key[:], ctx.cfg.key)
+	if len(cfg.key) > 0 {
+		copy(ctx.padded_key[:], cfg.key)
 		update(ctx, ctx.padded_key[:])
 		update(ctx, ctx.padded_key[:])
 		ctx.is_keyed = true
 		ctx.is_keyed = true
 	}
 	}
@@ -194,22 +195,40 @@ update :: proc(ctx: ^$T, p: []byte) {
 	ctx.nx += copy(ctx.x[ctx.nx:], p)
 	ctx.nx += copy(ctx.x[ctx.nx:], p)
 }
 }
 
 
-final :: proc(ctx: ^$T, hash: []byte) {
+final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
 	assert(ctx.is_initialized)
 	assert(ctx.is_initialized)
 
 
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: T
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer(reset(ctx))
+
 	when T == Blake2s_Context {
 	when T == Blake2s_Context {
-		if len(hash) < int(ctx.cfg.size) {
+		if len(hash) < int(ctx.size) {
 			panic("crypto/blake2s: invalid destination digest size")
 			panic("crypto/blake2s: invalid destination digest size")
 		}
 		}
 		blake2s_final(ctx, hash)
 		blake2s_final(ctx, hash)
 	} else when T == Blake2b_Context {
 	} else when T == Blake2b_Context {
-		if len(hash) < int(ctx.cfg.size) {
+		if len(hash) < int(ctx.size) {
 			panic("crypto/blake2b: invalid destination digest size")
 			panic("crypto/blake2b: invalid destination digest size")
 		}
 		}
 		blake2b_final(ctx, hash)
 		blake2b_final(ctx, hash)
 	}
 	}
+}
+
+clone :: proc(ctx, other: ^$T) {
+	ctx^ = other^
+}
+
+reset :: proc(ctx: ^$T) {
+	if !ctx.is_initialized {
+		return
+	}
 
 
-	ctx.is_initialized = false
+	mem.zero_explicit(ctx, size_of(ctx^))
 }
 }
 
 
 @(private)
 @(private)

+ 428 - 0
core/crypto/_edwards25519/edwards25519.odin

@@ -0,0 +1,428 @@
+package _edwards25519
+
+/*
+This implements the edwards25519 composite-order group, primarily for
+the purpose of implementing X25519, Ed25519, and ristretto255.  Use of
+this package for other purposes is NOT RECOMMENDED.
+
+See:
+- https://eprint.iacr.org/2011/368.pdf
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
+*/
+
+import "base:intrinsics"
+import "core:crypto"
+import field "core:crypto/_fiat/field_curve25519"
+import "core:mem"
+
+// Group_Element is an edwards25519 group element, as extended homogenous
+// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`,
+// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`.
+//
+// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555
+// a = -1
+//
+// Notes:
+// - There is considerable scope for optimization, however that
+//   will not change the external API, and this is simple and reasonably
+//   performant.
+// - The API delibarately makes it hard to create arbitrary group
+//   elements that are not on the curve.
+// - The group element decoding routine takes the opinionated stance of
+//   rejecting non-canonical encodings.
+
+FE_D := field.Tight_Field_Element {
+	929955233495203,
+	466365720129213,
+	1662059464998953,
+	2033849074728123,
+	1442794654840575,
+}
+@(private)
+FE_A := field.Tight_Field_Element {
+	2251799813685228,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+}
+@(private)
+FE_D2 := field.Tight_Field_Element {
+	1859910466990425,
+	932731440258426,
+	1072319116312658,
+	1815898335770999,
+	633789495995903,
+}
+@(private)
+GE_BASEPOINT := Group_Element {
+	field.Tight_Field_Element {
+		1738742601995546,
+		1146398526822698,
+		2070867633025821,
+		562264141797630,
+		587772402128613,
+	},
+	field.Tight_Field_Element {
+		1801439850948184,
+		1351079888211148,
+		450359962737049,
+		900719925474099,
+		1801439850948198,
+	},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element {
+		1841354044333475,
+		16398895984059,
+		755974180946558,
+		900171276175154,
+		1821297809914039,
+	},
+}
+GE_IDENTITY := Group_Element {
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+}
+
+Group_Element :: struct {
+	x: field.Tight_Field_Element,
+	y: field.Tight_Field_Element,
+	z: field.Tight_Field_Element,
+	t: field.Tight_Field_Element,
+}
+
+ge_clear :: proc "contextless" (ge: ^Group_Element) {
+	mem.zero_explicit(ge, size_of(Group_Element))
+}
+
+ge_set :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_set(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_set(&ge.t, &a.t)
+}
+
+@(require_results)
+ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	// Do the work in a scratch element, so that ge is unchanged on
+	// failure.
+	tmp: Group_Element = ---
+	defer ge_clear(&tmp)
+	field.fe_one(&tmp.z) // Z = 1
+
+	// The encoding is the y-coordinate, with the x-coordinate polarity
+	// (odd/even) encoded in the MSB.
+	field.fe_from_bytes(&tmp.y, b_) // ignores high bit
+
+	// Recover the candidate x-coordinate via the curve equation:
+	// x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p)
+
+	fe_tmp := &tmp.t // Use this to store intermediaries.
+	fe_one := &tmp.z
+
+	// x = num = y^2 - 1
+	field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2
+	field.fe_carry_sub(&tmp.x, fe_tmp, fe_one)
+
+	// den = d * y^2 + 1
+	field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D))
+	field.fe_carry_add(fe_tmp, fe_tmp, fe_one)
+
+	// x = invsqrt(den/num)
+	is_square := field.fe_carry_sqrt_ratio_m1(
+		&tmp.x,
+		field.fe_relax_cast(&tmp.x),
+		field.fe_relax_cast(fe_tmp),
+	)
+	if is_square == 0 {
+		return false
+	}
+
+	// Pick the right x-coordinate.
+	field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7))
+
+	// t = x * y
+	field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y))
+
+	// Reject non-canonical encodings of ge.
+	buf: [32]byte = ---
+	field.fe_to_bytes(&buf, &tmp.y)
+	buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7
+	is_canonical := crypto.compare_constant_time(b, buf[:])
+
+	ge_cond_assign(ge, &tmp, is_canonical)
+
+	mem.zero_explicit(&buf, size_of(buf))
+
+	return is_canonical == 1
+}
+
+ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
+	if len(dst) != 32 {
+		intrinsics.trap()
+	}
+	dst_ := transmute(^[32]byte)(raw_data(dst))
+
+	// Convert the element to affine (x, y) representation.
+	x, y, z_inv: field.Tight_Field_Element = ---, ---, ---
+	field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z))
+	field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv))
+	field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv))
+
+	// Encode the y-coordinate.
+	field.fe_to_bytes(dst_, &y)
+
+	// Copy the least significant bit of the x-coordinate to the most
+	// significant bit of the encoded y-coordinate.
+	dst_[31] |= byte((x[0] & 1) << 7)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv})
+}
+
+ge_identity :: proc "contextless" (ge: ^Group_Element) {
+	field.fe_zero(&ge.x)
+	field.fe_one(&ge.y)
+	field.fe_one(&ge.z)
+	field.fe_zero(&ge.t)
+}
+
+ge_generator :: proc "contextless" (ge: ^Group_Element) {
+	ge_set(ge, &GE_BASEPOINT)
+}
+
+@(private)
+Addend_Group_Element :: struct {
+	y2_minus_x2:  field.Loose_Field_Element, // t1
+	y2_plus_x2:   field.Loose_Field_Element, // t3
+	k_times_t2:   field.Tight_Field_Element, // t4
+	two_times_z2: field.Loose_Field_Element, // t5
+}
+
+@(private)
+ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) {
+	field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x)
+	field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x)
+	field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t))
+	field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z)
+}
+
+@(private)
+ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl)
+	field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl)
+	field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl)
+	field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl)
+}
+
+@(private)
+Add_Scratch :: struct {
+	A, B, C, D: field.Tight_Field_Element,
+	E, F, G, H: field.Loose_Field_Element,
+	t0, t2:     field.Loose_Field_Element,
+}
+
+ge_add :: proc "contextless" (ge, a, b: ^Group_Element) {
+	b_: Addend_Group_Element = ---
+	ge_addend_set(&b_, b)
+
+	scratch: Add_Scratch = ---
+	ge_add_addend(ge, a, &b_, &scratch)
+
+	mem.zero_explicit(&b_, size_of(Addend_Group_Element))
+	mem.zero_explicit(&scratch, size_of(Add_Scratch))
+}
+
+@(private)
+ge_add_addend :: proc "contextless" (
+	ge, a: ^Group_Element,
+	b: ^Addend_Group_Element,
+	scratch: ^Add_Scratch,
+) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
+	// Assumptions: k=2*d.
+	//
+	// t0 = Y1-X1
+	// t1 = Y2-X2
+	// A = t0*t1
+	// t2 = Y1+X1
+	// t3 = Y2+X2
+	// B = t2*t3
+	// t4 = k*T2
+	// C = T1*t4
+	// t5 = 2*Z2
+	// D = Z1*t5
+	// E = B-A
+	// F = D-C
+	// G = D+C
+	// H = B+A
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+	//
+	// In order to make the scalar multiply faster, the addend is provided
+	// as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as
+	// it is trivially obvious that those are the only values used by the
+	// formula that are directly dependent on `b`, and are only dependent
+	// on `b` and constants.  This saves 1 sub, 2 adds, and 1 multiply,
+	// each time the intermediate representation can be reused.
+
+	A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D
+	E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H
+	t0, t2 := &scratch.t0, &scratch.t2
+
+	field.fe_sub(t0, &a.y, &a.x)
+	t1 := &b.y2_minus_x2
+	field.fe_carry_mul(A, t0, t1)
+	field.fe_add(t2, &a.y, &a.x)
+	t3 := &b.y2_plus_x2
+	field.fe_carry_mul(B, t2, t3)
+	t4 := &b.k_times_t2
+	field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4))
+	t5 := &b.two_times_z2
+	field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5)
+	field.fe_sub(E, B, A)
+	field.fe_sub(F, D, C)
+	field.fe_add(G, D, C)
+	field.fe_add(H, B, A)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G)
+}
+
+@(private)
+Double_Scratch :: struct {
+	A, B, C, D, G: field.Tight_Field_Element,
+	t0, t2, t3:    field.Tight_Field_Element,
+	E, F, H:       field.Loose_Field_Element,
+	t1:            field.Loose_Field_Element,
+}
+
+ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+	//
+	// A = X1^2
+	// B = Y1^2
+	// t0 = Z1^2
+	// C = 2*t0
+	// D = a*A
+	// t1 = X1+Y1
+	// t2 = t1^2
+	// t3 = t2-A
+	// E = t3-B
+	// G = D+B
+	// F = G-C
+	// H = D-B
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+
+	sanitize, scratch := scratch == nil, scratch
+	if sanitize {
+		tmp: Double_Scratch = ---
+		scratch = &tmp
+	}
+
+	A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G
+	t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3
+	E, F, H := &scratch.E, &scratch.F, &scratch.H
+	t1 := &scratch.t1
+
+	field.fe_carry_square(A, field.fe_relax_cast(&a.x))
+	field.fe_carry_square(B, field.fe_relax_cast(&a.y))
+	field.fe_carry_square(t0, field.fe_relax_cast(&a.z))
+	field.fe_carry_add(C, t0, t0)
+	field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A))
+	field.fe_add(t1, &a.x, &a.y)
+	field.fe_carry_square(t2, t1)
+	field.fe_carry_sub(t3, t2, A)
+	field.fe_sub(E, t3, B)
+	field.fe_carry_add(G, D, B)
+	field.fe_sub(F, G, C)
+	field.fe_sub(H, D, B)
+	G_ := field.fe_relax_cast(G)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G_, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G_)
+
+	if sanitize {
+		mem.zero_explicit(scratch, size_of(Double_Scratch))
+	}
+}
+
+ge_negate :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_carry_opp(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_carry_opp(&ge.t, &a.t)
+}
+
+ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	tmp: Group_Element = ---
+	ge_negate(&tmp, a)
+	ge_cond_assign(ge, &tmp, ctrl)
+
+	ge_clear(&tmp)
+}
+
+ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	field.fe_cond_assign(&ge.x, &a.x, ctrl)
+	field.fe_cond_assign(&ge.y, &a.y, ctrl)
+	field.fe_cond_assign(&ge.z, &a.z, ctrl)
+	field.fe_cond_assign(&ge.t, &a.t, ctrl)
+}
+
+ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl)
+	field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl)
+	field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl)
+	field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl)
+}
+
+@(require_results)
+ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int {
+	// (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z')
+	// X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z
+	ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z))
+	field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z))
+
+	ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az})
+
+	return ret
+}
+
+@(require_results)
+ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool {
+	tmp: Group_Element = ---
+	ge_double(&tmp, ge)
+	ge_double(&tmp, &tmp)
+	ge_double(&tmp, &tmp)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}
+
+@(require_results)
+ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool {
+	// This is currently *very* expensive.  The faster method would be
+	// something like (https://eprint.iacr.org/2022/1164.pdf), however
+	// that is a ~50% speedup, and a lot of added complexity for something
+	// that is better solved by "just use ristretto255".
+	tmp: Group_Element = ---
+	_ge_scalarmult(&tmp, ge, &SC_ELL, true)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}

+ 61 - 0
core/crypto/_edwards25519/edwards25519_scalar.odin

@@ -0,0 +1,61 @@
+package _edwards25519
+
+import "base:intrinsics"
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:mem"
+
+Scalar :: field.Montgomery_Domain_Field_Element
+
+// WARNING: This is non-canonical and only to be used when checking if
+// a group element is on the prime-order subgroup.
+@(private)
+SC_ELL := field.Non_Montgomery_Domain_Field_Element {
+	field.ELL[0],
+	field.ELL[1],
+	field.ELL[2],
+	field.ELL[3],
+}
+
+sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
+	tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0}
+	field.fe_to_montgomery(sc, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(require_results)
+sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	return field.fe_from_bytes(sc, b_)
+}
+
+sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	field.fe_from_bytes_rfc8032(sc, b_)
+}
+
+sc_clear :: proc "contextless" (sc: ^Scalar) {
+	mem.zero_explicit(sc, size_of(Scalar))
+}
+
+sc_set :: field.fe_set
+sc_set_bytes_wide :: field.fe_from_bytes_wide
+sc_bytes :: field.fe_to_bytes
+
+sc_zero :: field.fe_zero
+sc_one :: field.fe_one
+
+sc_add :: field.fe_add
+sc_sub :: field.fe_sub
+sc_negate :: field.fe_opp
+sc_mul :: field.fe_mul
+sc_square :: field.fe_square
+
+sc_cond_assign :: field.fe_cond_assign
+sc_equal :: field.fe_equal

+ 288 - 0
core/crypto/_edwards25519/edwards25519_scalar_mul.odin

@@ -0,0 +1,288 @@
+package _edwards25519
+
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:math/bits"
+import "core:mem"
+
+// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format.
+//
+// Note: When generating, the values were reduced to Tight_Field_Element
+// ranges, even though that is not required.
+@(private)
+GE_BASEPOINT_TABLE := Multiply_Table {
+	{
+		{62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585},
+		{1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563},
+		{301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142},
+		{2, 0, 0, 0, 0},
+	},
+	{
+		{1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772},
+		{338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369},
+		{1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646},
+		{763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705},
+	},
+	{
+		{960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586},
+		{1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896},
+		{851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520},
+		{197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676},
+	},
+	{
+		{1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224},
+		{809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815},
+		{997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854},
+		{887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490},
+	},
+	{
+		{2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226},
+		{1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448},
+		{1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541},
+		{220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054},
+	},
+	{
+		{1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907},
+		{1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455},
+		{641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865},
+		{743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917},
+	},
+	{
+		{1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810},
+		{2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148},
+		{1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091},
+		{429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554},
+	},
+	{
+		{2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624},
+		{677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787},
+		{1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830},
+		{1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195},
+	},
+	{
+		{660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005},
+		{1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389},
+		{1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714},
+		{1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390},
+	},
+	{
+		{1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353},
+		{1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173},
+		{2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458},
+		{1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371},
+	},
+	{
+		{478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981},
+		{1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086},
+		{268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483},
+		{1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320},
+	},
+	{
+		{1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647},
+		{1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475},
+		{1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176},
+		{902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479},
+	},
+	{
+		{1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221},
+		{1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795},
+		{1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619},
+		{197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934},
+	},
+	{
+		{225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319},
+		{1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628},
+		{690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156},
+		{1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556},
+	},
+	{
+		{805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903},
+		{2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774},
+		{460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653},
+		{849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895},
+	},
+}
+
+ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
+	// Something like the comb method from "Fast and compact elliptic-curve
+	// cryptography" Section 3.3, would be more performant, but more
+	// complex.
+	//
+	// - https://eprint.iacr.org/2012/309
+	ge_scalarmult(ge, &GE_BASEPOINT, sc)
+}
+
+ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp, true)
+}
+
+ge_double_scalarmult_basepoint_vartime :: proc "contextless" (
+	ge: ^Group_Element,
+	a: ^Scalar,
+	A: ^Group_Element,
+	b: ^Scalar,
+) {
+	// Strauss-Shamir, commonly referred to as the "Shamir trick",
+	// saves half the doublings, relative to doing this the naive way.
+	//
+	// ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster,
+	// but significantly more complex, and has incompatibilities with
+	// mixed-order group elements.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	A_tbl: Multiply_Table = ---
+	mul_tbl_set(&A_tbl, A, &tmp_add)
+
+	sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&sc_a, a)
+	field.fe_from_montgomery(&sc_b, b)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+
+		limb_byte_a := sc_a[limb] >> shift
+		limb_byte_b := sc_b[limb] >> shift
+
+		hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f
+		hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true)
+	}
+
+	ge_set(ge, &tmp)
+}
+
+@(private)
+_ge_scalarmult :: proc "contextless" (
+	ge, p: ^Group_Element,
+	sc: ^field.Non_Montgomery_Domain_Field_Element,
+	unsafe_is_vartime := false,
+) {
+	// Do the simplest possible thing that works and provides adequate,
+	// performance, which is windowed add-then-multiply.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	p_tbl: Multiply_Table = ---
+	mul_tbl_set(&p_tbl, p, &tmp_add)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+		limb_byte := sc[limb] >> shift
+
+		hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime)
+	}
+
+	ge_set(ge, &tmp)
+
+	if !unsafe_is_vartime {
+		ge_clear(&tmp)
+		mem.zero_explicit(&tmp_add, size_of(Add_Scratch))
+		mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element))
+		mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch))
+	}
+}
+
+@(private)
+Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit.
+
+@(private)
+mul_tbl_set :: proc "contextless" (
+	tbl: ^Multiply_Table,
+	ge: ^Group_Element,
+	tmp_add: ^Add_Scratch,
+) {
+	tmp: Group_Element = ---
+	ge_set(&tmp, ge)
+
+	ge_addend_set(&tbl[0], ge)
+	for i := 1; i < 15; i = i + 1 {
+		ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add)
+		ge_addend_set(&tbl[i], &tmp)
+	}
+
+	ge_clear(&tmp)
+}
+
+@(private)
+mul_tbl_add :: proc "contextless" (
+	ge: ^Group_Element,
+	tbl: ^Multiply_Table,
+	idx: u64,
+	tmp_add: ^Add_Scratch,
+	tmp_addend: ^Addend_Group_Element,
+	unsafe_is_vartime: bool,
+) {
+	// Variable time lookup, with the addition omitted entirely if idx == 0.
+	if unsafe_is_vartime {
+		// Skip adding the point at infinity.
+		if idx != 0 {
+			ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add)
+		}
+		return
+	}
+
+	// Constant time lookup.
+	tmp_addend^ = {
+		// Point at infinity (0, 1, 1, 0) in precomputed form
+		{1, 0, 0, 0, 0}, // y - x
+		{1, 0, 0, 0, 0}, // y + x
+		{0, 0, 0, 0, 0}, // t * 2d
+		{2, 0, 0, 0, 0}, // z * 2
+	}
+	for i := u64(1); i < 16; i = i + 1 {
+		_, ctrl := bits.sub_u64(0, (i ~ idx), 0)
+		ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1)
+	}
+	ge_add_addend(ge, ge, tmp_addend, tmp_add)
+}

+ 2 - 2
core/crypto/_fiat/fiat.odin

@@ -9,7 +9,7 @@ package fiat
 u1 :: distinct u8
 u1 :: distinct u8
 i1 :: distinct i8
 i1 :: distinct i8
 
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	x1 := (u64(arg1) * 0xffffffffffffffff)
 	x1 := (u64(arg1) * 0xffffffffffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
@@ -17,7 +17,7 @@ cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	return
 	return
 }
 }
 
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
 cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
 	x1 := (u32(arg1) * 0xffffffff)
 	x1 := (u32(arg1) * 0xffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
 	x2 := ((x1 & arg3) | ((~x1) & arg2))

+ 175 - 46
core/crypto/_fiat/field_curve25519/field.odin

@@ -3,14 +3,32 @@ package field_curve25519
 import "core:crypto"
 import "core:crypto"
 import "core:mem"
 import "core:mem"
 
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 	return transmute(^Loose_Field_Element)(arg1)
 }
 }
 
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 	return transmute(^Tight_Field_Element)(arg1)
 }
 }
 
 
+fe_clear :: proc "contextless" (
+	arg1: $T,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mem.zero_explicit(arg1, size_of(arg1^))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: $T,
+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
 fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
 fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
 	// Ignore the unused bit by copying the input and masking the bit off
 	// Ignore the unused bit by copying the input and masking the bit off
 	// prior to deserialization.
 	// prior to deserialization.
@@ -23,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 }
 }
 
 
+fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int {
+	tmp1: [32]byte = ---
+
+	fe_to_bytes(&tmp1, arg1)
+	ret := tmp1[0] & 1
+
+	mem.zero_explicit(&tmp1, size_of(tmp1))
+
+	return int(ret)
+}
+
 fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
 fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
-	tmp2: [32]byte = ---
+	tmp1, tmp2: [32]byte = ---, ---
 
 
+	fe_to_bytes(&tmp1, arg1)
 	fe_to_bytes(&tmp2, arg2)
 	fe_to_bytes(&tmp2, arg2)
-	ret := fe_equal_bytes(arg1, &tmp2)
+	ret := crypto.compare_constant_time(tmp1[:], tmp2[:])
 
 
+	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
 
 
 	return ret
 	return ret
@@ -46,7 +77,11 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt
 	return ret
 	return ret
 }
 }
 
 
-fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
+fe_carry_pow2k :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: uint,
+) {
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	if arg2 == 0 {
 	if arg2 == 0 {
 		fe_one(out1)
 		fe_one(out1)
@@ -54,27 +89,46 @@ fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element,
 	}
 	}
 
 
 	fe_carry_square(out1, arg1)
 	fe_carry_square(out1, arg1)
-	for _ in 1..<arg2 {
+	for _ in 1 ..< arg2 {
 		fe_carry_square(out1, fe_relax_cast(out1))
 		fe_carry_square(out1, fe_relax_cast(out1))
 	}
 	}
 }
 }
 
 
+fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_add(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
+fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_sub(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
 fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 	fe_opp(fe_relax_cast(out1), arg1)
 	fe_opp(fe_relax_cast(out1), arg1)
 	fe_carry(out1, fe_relax_cast(out1))
 	fe_carry(out1, fe_relax_cast(out1))
 }
 }
 
 
-fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
-	// Inverse square root taken from Monocypher.
+fe_carry_abs :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	fe_cond_negate(out1, arg1, fe_is_negative(arg1))
+}
 
 
-	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
+fe_carry_sqrt_ratio_m1 :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element, // u
+	arg2: ^Loose_Field_Element, // v
+) -> int {
+	// SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse
+	// square root from Monocypher.
+
+	w: Tight_Field_Element = ---
+	fe_carry_mul(&w, arg1, arg2) // u * v
 
 
-	// t0 = x^((p-5)/8)
-	// Can be achieved with a simple double & add ladder,
-	// but it would be slower.
-	fe_carry_pow2k(&tmp1, arg1, 1)
+	// r = tmp1 = u * w^((p-5)/8)
+	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
+	fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
+	fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
@@ -93,46 +147,121 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
-
-	// quartic = x^((p-1)/4)
-	quartic := &tmp2
-	fe_carry_square(quartic, fe_relax_cast(&tmp1))
-	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
-
-	// Serialize quartic once to save on repeated serialization/sanitization.
-	quartic_buf: [32]byte = ---
-	fe_to_bytes(&quartic_buf, quartic)
-	check := &tmp3
-
-	fe_one(check)
-	p1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, check)
-	m1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, &SQRT_M1)
-	ms := fe_equal_bytes(check, &quartic_buf)
-
-	// if quartic == -1 or sqrt(-1)
-	// then  isr = x^((p-1)/4) * sqrt(-1)
-	// else  isr = x^((p-1)/4)
-	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
-	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8)
 
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
-	mem.zero_explicit(&tmp2, size_of(tmp2))
-	mem.zero_explicit(&tmp3, size_of(tmp3))
-	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8)
+
+	// Serialize `check` once to save on repeated serialization.
+	r, check := &tmp1, &tmp2
+	b: [32]byte = ---
+	fe_carry_square(check, fe_relax_cast(r))
+	fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v
+	fe_to_bytes(&b, check)
+
+	u, neg_u, neg_u_i := &tmp3, &w, check
+	fe_carry(u, arg1)
+	fe_carry_opp(neg_u, u)
+	fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1))
+
+	correct_sign_sqrt := fe_equal_bytes(u, &b)
+	flipped_sign_sqrt := fe_equal_bytes(neg_u, &b)
+	flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b)
 
 
-	return p1 | m1
+	r_prime := check
+	fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1))
+	fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i)
+
+	// Pick the non-negative square root.
+	fe_carry_abs(out1, r)
+
+	fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3})
+	mem.zero_explicit(&b, size_of(b))
+
+	return correct_sign_sqrt | flipped_sign_sqrt
 }
 }
 
 
-fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	tmp1: Tight_Field_Element
 	tmp1: Tight_Field_Element
 
 
 	fe_carry_square(&tmp1, arg1)
 	fe_carry_square(&tmp1, arg1)
-	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
+	_ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1))
 	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
 	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
 
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
+	fe_clear(&tmp1)
+}
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 1
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	x5 := arg1[4]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	x = (out1[4] ~ out2[4]) & mask
+	x5, y5 := out1[4] ~ x, out2[4] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+	out1[4], out2[4] = x5, y5
+}
+
+@(optimization_mode = "none")
+fe_cond_select :: #force_no_inline proc "contextless" (
+	out1, arg1, arg2: $T,
+	arg3: int,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mask := (u64(arg3) * 0xffffffffffffffff)
+	x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
+	x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
+	x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
+	x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
+	x5 := ((mask & arg2[4]) | ((~mask) & arg1[4]))
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
+
+fe_cond_negate :: proc "contextless" (out1, arg1: ^Tight_Field_Element, ctrl: int) {
+	tmp1: Tight_Field_Element = ---
+	fe_carry_opp(&tmp1, arg1)
+	fe_cond_select(out1, arg1, &tmp1, ctrl)
+
+	fe_clear(&tmp1)
 }
 }

+ 29 - 61
core/crypto/_fiat/field_curve25519/field51.odin

@@ -30,8 +30,6 @@ package field_curve25519
 //
 //
 // While the base implementation is provably correct, this implementation
 // While the base implementation is provably correct, this implementation
 // makes no such claims as the port and optimizations were done by hand.
 // makes no such claims as the port and optimizations were done by hand.
-// At some point, it may be worth adding support to fiat-crypto for
-// generating Odin output.
 //
 //
 // TODO:
 // TODO:
 //  * When fiat-crypto supports it, using a saturated 64-bit limbs
 //  * When fiat-crypto supports it, using a saturated 64-bit limbs
@@ -44,7 +42,10 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [5]u64
 Loose_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 
 
-SQRT_M1 := Tight_Field_Element{
+FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
+FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
+
+FE_SQRT_M1 := Tight_Field_Element {
 	1718705420411056,
 	1718705420411056,
 	234908883556509,
 	234908883556509,
 	2233514472574048,
 	2233514472574048,
@@ -52,7 +53,13 @@ SQRT_M1 := Tight_Field_Element{
 	765476049583133,
 	765476049583133,
 }
 }
 
 
-_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffffff)
 	x2 := (x1 & 0x7ffffffffffff)
 	x3 := fiat.u1((x1 >> 51))
 	x3 := fiat.u1((x1 >> 51))
@@ -61,7 +68,13 @@ _addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 51))
 	x2 := fiat.i1((x1 >> 51))
 	x3 := (u64(x1) & 0x7ffffffffffff)
 	x3 := (u64(x1) & 0x7ffffffffffff)
@@ -70,7 +83,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
@@ -169,7 +182,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[4] = x152
 	out1[4] = x152
 }
 }
 
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[4] * 0x13)
 	x1 := (arg1[4] * 0x13)
 	x2 := (x1 * 0x2)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[4] * 0x2)
 	x3 := (arg1[4] * 0x2)
@@ -305,8 +318,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[4] = x5
 	out1[4] = x5
 }
 }
 
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: int,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
@@ -527,7 +543,10 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[4] = x5
 	out1[4] = x5
 }
 }
 
 
-fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_scmul_121666 :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) {
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
@@ -565,54 +584,3 @@ fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_El
 	out1[3] = x27
 	out1[3] = x27
 	out1[4] = x32
 	out1[4] = x32
 }
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 1
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	x4 := arg1[3]
-	x5 := arg1[4]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-	out1[3] = x4
-	out1[4] = x5
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
-	mask := -u64(arg1)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	x = (out1[3] ~ out2[3]) & mask
-	x4, y4 := out1[3] ~ x, out2[3] ~ x
-	x = (out1[4] ~ out2[4]) & mask
-	x5, y5 := out1[4] ~ x, out2[4] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-	out1[3], out2[3] = x4, y4
-	out1[4], out2[4] = x5, y5
-}

+ 47 - 4
core/crypto/_fiat/field_poly1305/field.odin

@@ -1,17 +1,26 @@
 package field_poly1305
 package field_poly1305
 
 
+import "base:intrinsics"
 import "core:encoding/endian"
 import "core:encoding/endian"
 import "core:mem"
 import "core:mem"
 
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 	return transmute(^Loose_Field_Element)(arg1)
 }
 }
 
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 	return transmute(^Tight_Field_Element)(arg1)
 }
 }
 
 
-fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) {
+fe_from_bytes :: #force_inline proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: []byte,
+	arg2: byte,
+) {
 	// fiat-crypto's deserialization routine effectively processes a
 	// fiat-crypto's deserialization routine effectively processes a
 	// single byte at a time, and wants 256-bits of input for a value
 	// single byte at a time, and wants 256-bits of input for a value
 	// that will be 128-bits or 129-bits.
 	// that will be 128-bits or 129-bits.
@@ -20,7 +29,9 @@ fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, a
 	// makes implementing the actual MAC block processing considerably
 	// makes implementing the actual MAC block processing considerably
 	// neater.
 	// neater.
 
 
-	assert(len(arg1) == 16)
+	if len(arg1) != 16 {
+		intrinsics.trap()
+	}
 
 
 	// While it may be unwise to do deserialization here on our
 	// While it may be unwise to do deserialization here on our
 	// own when fiat-crypto provides equivalent functionality,
 	// own when fiat-crypto provides equivalent functionality,
@@ -51,3 +62,35 @@ fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
 	// This routine is only used to deserialize `r` which is confidential.
 	// This routine is only used to deserialize `r` which is confidential.
 	mem.zero_explicit(&tmp, size_of(tmp))
 	mem.zero_explicit(&tmp, size_of(tmp))
 }
 }
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+}
+
+fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (
+	out1, out2: ^Tight_Field_Element,
+	arg1: bool,
+) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+}

+ 35 - 39
core/crypto/_fiat/field_poly1305/field4344.odin

@@ -39,7 +39,13 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [3]u64
 Loose_Field_Element :: distinct [3]u64
 Tight_Field_Element :: distinct [3]u64
 Tight_Field_Element :: distinct [3]u64
 
 
-_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0xfffffffffff)
 	x2 := (x1 & 0xfffffffffff)
 	x3 := fiat.u1((x1 >> 44))
 	x3 := fiat.u1((x1 >> 44))
@@ -48,7 +54,13 @@ _addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 44))
 	x2 := fiat.i1((x1 >> 44))
 	x3 := (u64(x1) & 0xfffffffffff)
 	x3 := (u64(x1) & 0xfffffffffff)
@@ -57,7 +69,13 @@ _subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffff)
 	x2 := (x1 & 0x7ffffffffff)
 	x3 := fiat.u1((x1 >> 43))
 	x3 := fiat.u1((x1 >> 43))
@@ -66,7 +84,13 @@ _addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 	return
 }
 }
 
 
-_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 43))
 	x2 := fiat.i1((x1 >> 43))
 	x3 := (u64(x1) & 0x7ffffffffff)
 	x3 := (u64(x1) & 0x7ffffffffff)
@@ -75,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 	return
 }
 }
 
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
@@ -120,7 +144,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[2] = x62
 	out1[2] = x62
 }
 }
 
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[2] * 0x5)
 	x1 := (arg1[2] * 0x5)
 	x2 := (x1 * 0x2)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[2] * 0x2)
 	x3 := (arg1[2] * 0x2)
@@ -201,8 +225,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[2] = x3
 	out1[2] = x3
 }
 }
 
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: bool,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
@@ -325,34 +352,3 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[1] = x2
 	out1[1] = x2
 	out1[2] = x3
 	out1[2] = x3
 }
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-}
-
-fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
-	mask := -u64(arg1)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-}

+ 153 - 0
core/crypto/_fiat/field_scalar25519/field.odin

@@ -0,0 +1,153 @@
+package field_scalar25519
+
+import "base:intrinsics"
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+@(private)
+_TWO_168 := Montgomery_Domain_Field_Element {
+	0x5b8ab432eac74798,
+	0x38afddd6de59d5d7,
+	0xa2c131b399411b7c,
+	0x6329a7ed9ce5a30,
+}
+@(private)
+_TWO_336 := Montgomery_Domain_Field_Element {
+	0xbd3d108e2b35ecc5,
+	0x5c3a3718bdf9c90b,
+	0x63aa97a331b4f2ee,
+	0x3d217f5be65cb5c,
+}
+
+fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
+	mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
+}
+
+fe_from_bytes :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+	unsafe_assume_canonical := false,
+) -> bool {
+	tmp := Non_Montgomery_Domain_Field_Element {
+		endian.unchecked_get_u64le(arg1[0:]),
+		endian.unchecked_get_u64le(arg1[8:]),
+		endian.unchecked_get_u64le(arg1[16:]),
+		endian.unchecked_get_u64le(arg1[24:]),
+	}
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	// Check that tmp is in the the range [0, ELL).
+	if !unsafe_assume_canonical {
+		_, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0)
+		_, borrow = bits.sub_u64(ELL[1], tmp[1], borrow)
+		_, borrow = bits.sub_u64(ELL[2], tmp[2], borrow)
+		_, borrow = bits.sub_u64(ELL[3], tmp[3], borrow)
+		if borrow != 0 {
+			return false
+		}
+	}
+
+	fe_to_montgomery(out1, &tmp)
+
+	return true
+}
+
+fe_from_bytes_rfc8032 :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+) {
+	tmp: [64]byte
+	copy(tmp[:], arg1[:])
+
+	// Apply "clamping" as in RFC 8032.
+	tmp[0] &= 248
+	tmp[31] &= 127
+	tmp[31] |= 64 // Sets the 254th bit, so the encoding is non-canonical.
+
+	fe_from_bytes_wide(out1, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_from_bytes_wide :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[64]byte,
+) {
+	tmp: Montgomery_Domain_Field_Element
+	// Use Frank Denis' trick, as documented by Filippo Valsorda
+	// at https://words.filippo.io/dispatches/wide-reduction/
+	//
+	// x = c * 2^336 + b * 2^168 + a  mod l
+	_fe_from_bytes_short(out1, arg1[:21]) // a
+
+	_fe_from_bytes_short(&tmp, arg1[21:42]) // b
+	fe_mul(&tmp, &tmp, &_TWO_168) // b * 2^168
+	fe_add(out1, out1, &tmp) // a + b * 2^168
+
+	_fe_from_bytes_short(&tmp, arg1[42:]) // c
+	fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336
+	fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336
+
+	fe_clear(&tmp)
+}
+
+@(private)
+_fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
+	// INVARIANT: len(arg1) < 32.
+	if len(arg1) >= 32 {
+		intrinsics.trap()
+	}
+	tmp: [32]byte
+	copy(tmp[:], arg1)
+
+	_ = fe_from_bytes(out1, &tmp, true)
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
+	if len(out1) != 32 {
+		intrinsics.trap()
+	}
+
+	tmp: Non_Montgomery_Domain_Field_Element
+	fe_from_montgomery(&tmp, arg1)
+
+	endian.unchecked_put_u64le(out1[0:], tmp[0])
+	endian.unchecked_put_u64le(out1[8:], tmp[1])
+	endian.unchecked_put_u64le(out1[16:], tmp[2])
+	endian.unchecked_put_u64le(out1[24:], tmp[3])
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Montgomery_Domain_Field_Element
+	fe_sub(&tmp, arg1, arg2)
+
+	// This will only underflow iff arg1 == arg2, and we return the borrow,
+	// which will be 1.
+	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
+
+	fe_clear(&tmp)
+
+	return int(borrow)
+}
+
+fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}

+ 535 - 0
core/crypto/_fiat/field_scalar25519/field64.odin

@@ -0,0 +1,535 @@
+// The BSD 1-Clause License (BSD-1-Clause)
+//
+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     1. Redistributions of source code must retain the above copyright
+//        notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package field_scalar25519
+
+// The file provides arithmetic on the field Z/(2^252+27742317777372353535851937790883648493)
+// using a 64-bit Montgomery form internal representation.  It is derived
+// primarily from the machine generated Golang output from the fiat-crypto
+// project.
+//
+// While the base implementation is provably correct, this implementation
+// makes no such claims as the port and optimizations were done by hand.
+
+import fiat "core:crypto/_fiat"
+import "core:math/bits"
+
+// ELL is the saturated representation of the field order, least-significant
+// limb first.
+ELL :: [4]u64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0x0, 0x1000000000000000}
+
+Montgomery_Domain_Field_Element :: distinct [4]u64
+Non_Montgomery_Domain_Field_Element :: distinct [4]u64
+
+fe_mul :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg2[3])
+	x8, x7 := bits.mul_u64(x4, arg2[2])
+	x10, x9 := bits.mul_u64(x4, arg2[1])
+	x12, x11 := bits.mul_u64(x4, arg2[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg2[3])
+	x44, x43 := bits.mul_u64(x1, arg2[2])
+	x46, x45 := bits.mul_u64(x1, arg2[1])
+	x48, x47 := bits.mul_u64(x1, arg2[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg2[3])
+	x91, x90 := bits.mul_u64(x2, arg2[2])
+	x93, x92 := bits.mul_u64(x2, arg2[1])
+	x95, x94 := bits.mul_u64(x2, arg2[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg2[3])
+	x138, x137 := bits.mul_u64(x3, arg2[2])
+	x140, x139 := bits.mul_u64(x3, arg2[1])
+	x142, x141 := bits.mul_u64(x3, arg2[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_square :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg1[3])
+	x8, x7 := bits.mul_u64(x4, arg1[2])
+	x10, x9 := bits.mul_u64(x4, arg1[1])
+	x12, x11 := bits.mul_u64(x4, arg1[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg1[3])
+	x44, x43 := bits.mul_u64(x1, arg1[2])
+	x46, x45 := bits.mul_u64(x1, arg1[1])
+	x48, x47 := bits.mul_u64(x1, arg1[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg1[3])
+	x91, x90 := bits.mul_u64(x2, arg1[2])
+	x93, x92 := bits.mul_u64(x2, arg1[1])
+	x95, x94 := bits.mul_u64(x2, arg1[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg1[3])
+	x138, x137 := bits.mul_u64(x3, arg1[2])
+	x140, x139 := bits.mul_u64(x3, arg1[1])
+	x142, x141 := bits.mul_u64(x3, arg1[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_add :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.add_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.add_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.add_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.add_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9, x10 := bits.sub_u64(x1, 0x5812631a5cf5d3ed, u64(0x0))
+	x11, x12 := bits.sub_u64(x3, 0x14def9dea2f79cd6, u64(fiat.u1(x10)))
+	x13, x14 := bits.sub_u64(x5, u64(0x0), u64(fiat.u1(x12)))
+	x15, x16 := bits.sub_u64(x7, 0x1000000000000000, u64(fiat.u1(x14)))
+	_, x18 := bits.sub_u64(u64(fiat.u1(x8)), u64(0x0), u64(fiat.u1(x16)))
+	x19 := fiat.cmovznz_u64(fiat.u1(x18), x9, x1)
+	x20 := fiat.cmovznz_u64(fiat.u1(x18), x11, x3)
+	x21 := fiat.cmovznz_u64(fiat.u1(x18), x13, x5)
+	x22 := fiat.cmovznz_u64(fiat.u1(x18), x15, x7)
+	out1[0] = x19
+	out1[1] = x20
+	out1[2] = x21
+	out1[3] = x22
+}
+
+fe_sub :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.sub_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_opp :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(u64(0x0), arg1[0], u64(0x0))
+	x3, x4 := bits.sub_u64(u64(0x0), arg1[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(u64(0x0), arg1[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(u64(0x0), arg1[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_one :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0xd6ec31748d98951d
+	out1[1] = 0xc6ef5bf4737dcf70
+	out1[2] = 0xfffffffffffffffe
+	out1[3] = 0xfffffffffffffff
+}
+
+fe_non_zero :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> u64 {
+	return arg1[0] | (arg1[1] | (arg1[2] | arg1[3]))
+}
+
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Montgomery_Domain_Field_Element,
+	arg2: int,
+) {
+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+fe_from_montgomery :: proc "contextless" (
+	out1: ^Non_Montgomery_Domain_Field_Element,
+	arg1: ^Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[0]
+	_, x2 := bits.mul_u64(x1, 0xd2b51da312547e1b)
+	x5, x4 := bits.mul_u64(x2, 0x1000000000000000)
+	x7, x6 := bits.mul_u64(x2, 0x14def9dea2f79cd6)
+	x9, x8 := bits.mul_u64(x2, 0x5812631a5cf5d3ed)
+	x10, x11 := bits.add_u64(x9, x6, u64(0x0))
+	_, x13 := bits.add_u64(x1, x8, u64(0x0))
+	x14, x15 := bits.add_u64(u64(0x0), x10, u64(fiat.u1(x13)))
+	x16, x17 := bits.add_u64(x14, arg1[1], u64(0x0))
+	_, x18 := bits.mul_u64(x16, 0xd2b51da312547e1b)
+	x21, x20 := bits.mul_u64(x18, 0x1000000000000000)
+	x23, x22 := bits.mul_u64(x18, 0x14def9dea2f79cd6)
+	x25, x24 := bits.mul_u64(x18, 0x5812631a5cf5d3ed)
+	x26, x27 := bits.add_u64(x25, x22, u64(0x0))
+	_, x29 := bits.add_u64(x16, x24, u64(0x0))
+	x30, x31 := bits.add_u64(
+		(u64(fiat.u1(x17)) + (u64(fiat.u1(x15)) + (u64(fiat.u1(x11)) + x7))),
+		x26,
+		u64(fiat.u1(x29)),
+	)
+	x32, x33 := bits.add_u64(x4, (u64(fiat.u1(x27)) + x23), u64(fiat.u1(x31)))
+	x34, x35 := bits.add_u64(x5, x20, u64(fiat.u1(x33)))
+	x36, x37 := bits.add_u64(x30, arg1[2], u64(0x0))
+	x38, x39 := bits.add_u64(x32, u64(0x0), u64(fiat.u1(x37)))
+	x40, x41 := bits.add_u64(x34, u64(0x0), u64(fiat.u1(x39)))
+	_, x42 := bits.mul_u64(x36, 0xd2b51da312547e1b)
+	x45, x44 := bits.mul_u64(x42, 0x1000000000000000)
+	x47, x46 := bits.mul_u64(x42, 0x14def9dea2f79cd6)
+	x49, x48 := bits.mul_u64(x42, 0x5812631a5cf5d3ed)
+	x50, x51 := bits.add_u64(x49, x46, u64(0x0))
+	_, x53 := bits.add_u64(x36, x48, u64(0x0))
+	x54, x55 := bits.add_u64(x38, x50, u64(fiat.u1(x53)))
+	x56, x57 := bits.add_u64(x40, (u64(fiat.u1(x51)) + x47), u64(fiat.u1(x55)))
+	x58, x59 := bits.add_u64(
+		(u64(fiat.u1(x41)) + (u64(fiat.u1(x35)) + x21)),
+		x44,
+		u64(fiat.u1(x57)),
+	)
+	x60, x61 := bits.add_u64(x54, arg1[3], u64(0x0))
+	x62, x63 := bits.add_u64(x56, u64(0x0), u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(x58, u64(0x0), u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x60, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	_, x77 := bits.add_u64(x60, x72, u64(0x0))
+	x78, x79 := bits.add_u64(x62, x74, u64(fiat.u1(x77)))
+	x80, x81 := bits.add_u64(x64, (u64(fiat.u1(x75)) + x71), u64(fiat.u1(x79)))
+	x82, x83 := bits.add_u64(
+		(u64(fiat.u1(x65)) + (u64(fiat.u1(x59)) + x45)),
+		x68,
+		u64(fiat.u1(x81)),
+	)
+	x84 := (u64(fiat.u1(x83)) + x69)
+	x85, x86 := bits.sub_u64(x78, 0x5812631a5cf5d3ed, u64(0x0))
+	x87, x88 := bits.sub_u64(x80, 0x14def9dea2f79cd6, u64(fiat.u1(x86)))
+	x89, x90 := bits.sub_u64(x82, u64(0x0), u64(fiat.u1(x88)))
+	x91, x92 := bits.sub_u64(x84, 0x1000000000000000, u64(fiat.u1(x90)))
+	_, x94 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x92)))
+	x95 := fiat.cmovznz_u64(fiat.u1(x94), x85, x78)
+	x96 := fiat.cmovznz_u64(fiat.u1(x94), x87, x80)
+	x97 := fiat.cmovznz_u64(fiat.u1(x94), x89, x82)
+	x98 := fiat.cmovznz_u64(fiat.u1(x94), x91, x84)
+	out1[0] = x95
+	out1[1] = x96
+	out1[2] = x97
+	out1[3] = x98
+}
+
+fe_to_montgomery :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^Non_Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, 0x399411b7c309a3d)
+	x8, x7 := bits.mul_u64(x4, 0xceec73d217f5be65)
+	x10, x9 := bits.mul_u64(x4, 0xd00e1ba768859347)
+	x12, x11 := bits.mul_u64(x4, 0xa40611e3449c0f01)
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	_, x19 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x22, x21 := bits.mul_u64(x19, 0x1000000000000000)
+	x24, x23 := bits.mul_u64(x19, 0x14def9dea2f79cd6)
+	x26, x25 := bits.mul_u64(x19, 0x5812631a5cf5d3ed)
+	x27, x28 := bits.add_u64(x26, x23, u64(0x0))
+	_, x30 := bits.add_u64(x11, x25, u64(0x0))
+	x31, x32 := bits.add_u64(x13, x27, u64(fiat.u1(x30)))
+	x33, x34 := bits.add_u64(x15, (u64(fiat.u1(x28)) + x24), u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x17, x21, u64(fiat.u1(x34)))
+	x38, x37 := bits.mul_u64(x1, 0x399411b7c309a3d)
+	x40, x39 := bits.mul_u64(x1, 0xceec73d217f5be65)
+	x42, x41 := bits.mul_u64(x1, 0xd00e1ba768859347)
+	x44, x43 := bits.mul_u64(x1, 0xa40611e3449c0f01)
+	x45, x46 := bits.add_u64(x44, x41, u64(0x0))
+	x47, x48 := bits.add_u64(x42, x39, u64(fiat.u1(x46)))
+	x49, x50 := bits.add_u64(x40, x37, u64(fiat.u1(x48)))
+	x51, x52 := bits.add_u64(x31, x43, u64(0x0))
+	x53, x54 := bits.add_u64(x33, x45, u64(fiat.u1(x52)))
+	x55, x56 := bits.add_u64(x35, x47, u64(fiat.u1(x54)))
+	x57, x58 := bits.add_u64(
+		((u64(fiat.u1(x36)) + (u64(fiat.u1(x18)) + x6)) + x22),
+		x49,
+		u64(fiat.u1(x56)),
+	)
+	_, x59 := bits.mul_u64(x51, 0xd2b51da312547e1b)
+	x62, x61 := bits.mul_u64(x59, 0x1000000000000000)
+	x64, x63 := bits.mul_u64(x59, 0x14def9dea2f79cd6)
+	x66, x65 := bits.mul_u64(x59, 0x5812631a5cf5d3ed)
+	x67, x68 := bits.add_u64(x66, x63, u64(0x0))
+	_, x70 := bits.add_u64(x51, x65, u64(0x0))
+	x71, x72 := bits.add_u64(x53, x67, u64(fiat.u1(x70)))
+	x73, x74 := bits.add_u64(x55, (u64(fiat.u1(x68)) + x64), u64(fiat.u1(x72)))
+	x75, x76 := bits.add_u64(x57, x61, u64(fiat.u1(x74)))
+	x78, x77 := bits.mul_u64(x2, 0x399411b7c309a3d)
+	x80, x79 := bits.mul_u64(x2, 0xceec73d217f5be65)
+	x82, x81 := bits.mul_u64(x2, 0xd00e1ba768859347)
+	x84, x83 := bits.mul_u64(x2, 0xa40611e3449c0f01)
+	x85, x86 := bits.add_u64(x84, x81, u64(0x0))
+	x87, x88 := bits.add_u64(x82, x79, u64(fiat.u1(x86)))
+	x89, x90 := bits.add_u64(x80, x77, u64(fiat.u1(x88)))
+	x91, x92 := bits.add_u64(x71, x83, u64(0x0))
+	x93, x94 := bits.add_u64(x73, x85, u64(fiat.u1(x92)))
+	x95, x96 := bits.add_u64(x75, x87, u64(fiat.u1(x94)))
+	x97, x98 := bits.add_u64(
+		((u64(fiat.u1(x76)) + (u64(fiat.u1(x58)) + (u64(fiat.u1(x50)) + x38))) + x62),
+		x89,
+		u64(fiat.u1(x96)),
+	)
+	_, x99 := bits.mul_u64(x91, 0xd2b51da312547e1b)
+	x102, x101 := bits.mul_u64(x99, 0x1000000000000000)
+	x104, x103 := bits.mul_u64(x99, 0x14def9dea2f79cd6)
+	x106, x105 := bits.mul_u64(x99, 0x5812631a5cf5d3ed)
+	x107, x108 := bits.add_u64(x106, x103, u64(0x0))
+	_, x110 := bits.add_u64(x91, x105, u64(0x0))
+	x111, x112 := bits.add_u64(x93, x107, u64(fiat.u1(x110)))
+	x113, x114 := bits.add_u64(x95, (u64(fiat.u1(x108)) + x104), u64(fiat.u1(x112)))
+	x115, x116 := bits.add_u64(x97, x101, u64(fiat.u1(x114)))
+	x118, x117 := bits.mul_u64(x3, 0x399411b7c309a3d)
+	x120, x119 := bits.mul_u64(x3, 0xceec73d217f5be65)
+	x122, x121 := bits.mul_u64(x3, 0xd00e1ba768859347)
+	x124, x123 := bits.mul_u64(x3, 0xa40611e3449c0f01)
+	x125, x126 := bits.add_u64(x124, x121, u64(0x0))
+	x127, x128 := bits.add_u64(x122, x119, u64(fiat.u1(x126)))
+	x129, x130 := bits.add_u64(x120, x117, u64(fiat.u1(x128)))
+	x131, x132 := bits.add_u64(x111, x123, u64(0x0))
+	x133, x134 := bits.add_u64(x113, x125, u64(fiat.u1(x132)))
+	x135, x136 := bits.add_u64(x115, x127, u64(fiat.u1(x134)))
+	x137, x138 := bits.add_u64(
+		((u64(fiat.u1(x116)) + (u64(fiat.u1(x98)) + (u64(fiat.u1(x90)) + x78))) + x102),
+		x129,
+		u64(fiat.u1(x136)),
+	)
+	_, x139 := bits.mul_u64(x131, 0xd2b51da312547e1b)
+	x142, x141 := bits.mul_u64(x139, 0x1000000000000000)
+	x144, x143 := bits.mul_u64(x139, 0x14def9dea2f79cd6)
+	x146, x145 := bits.mul_u64(x139, 0x5812631a5cf5d3ed)
+	x147, x148 := bits.add_u64(x146, x143, u64(0x0))
+	_, x150 := bits.add_u64(x131, x145, u64(0x0))
+	x151, x152 := bits.add_u64(x133, x147, u64(fiat.u1(x150)))
+	x153, x154 := bits.add_u64(x135, (u64(fiat.u1(x148)) + x144), u64(fiat.u1(x152)))
+	x155, x156 := bits.add_u64(x137, x141, u64(fiat.u1(x154)))
+	x157 := ((u64(fiat.u1(x156)) + (u64(fiat.u1(x138)) + (u64(fiat.u1(x130)) + x118))) + x142)
+	x158, x159 := bits.sub_u64(x151, 0x5812631a5cf5d3ed, u64(0x0))
+	x160, x161 := bits.sub_u64(x153, 0x14def9dea2f79cd6, u64(fiat.u1(x159)))
+	x162, x163 := bits.sub_u64(x155, u64(0x0), u64(fiat.u1(x161)))
+	x164, x165 := bits.sub_u64(x157, 0x1000000000000000, u64(fiat.u1(x163)))
+	_, x167 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x165)))
+	x168 := fiat.cmovznz_u64(fiat.u1(x167), x158, x151)
+	x169 := fiat.cmovznz_u64(fiat.u1(x167), x160, x153)
+	x170 := fiat.cmovznz_u64(fiat.u1(x167), x162, x155)
+	x171 := fiat.cmovznz_u64(fiat.u1(x167), x164, x157)
+	out1[0] = x168
+	out1[1] = x169
+	out1[2] = x170
+	out1[3] = x171
+}

+ 104 - 73
core/crypto/_sha3/sha3.odin

@@ -7,50 +7,69 @@ package _sha3
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
 
 
-    Implementation of the Keccak hashing algorithm, standardized as SHA3 in <https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf>
-    To use the original Keccak padding, set the is_keccak bool to true, otherwise it will use SHA3 padding.
+    Implementation of the Keccak hashing algorithm, standardized as SHA3
+    in <https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf>.
+
+    As the only difference between the legacy Keccak and SHA3 is the domain
+    separation byte, set dsbyte to the appropriate value to pick the desired
+    algorithm.
 */
 */
 
 
 import "core:math/bits"
 import "core:math/bits"
+import "core:mem"
 
 
 ROUNDS :: 24
 ROUNDS :: 24
 
 
-Sha3_Context :: struct {
-	st:        struct #raw_union {
+RATE_128 :: 1344 / 8 // ONLY for SHAKE128.
+RATE_224 :: 1152 / 8
+RATE_256 :: 1088 / 8
+RATE_384 :: 832 / 8
+RATE_512 :: 576 / 8
+
+DS_KECCAK :: 0x01
+DS_SHA3 :: 0x06
+DS_SHAKE :: 0x1f
+DS_CSHAKE :: 0x04
+
+Context :: struct {
+	st:             struct #raw_union {
 		b: [200]u8,
 		b: [200]u8,
 		q: [25]u64,
 		q: [25]u64,
 	},
 	},
-	pt:        int,
-	rsiz:      int,
-	mdlen:     int,
-	is_keccak: bool,
-
+	pt:             int,
+	rsiz:           int,
+	mdlen:          int,
+	dsbyte:         byte,
 	is_initialized: bool,
 	is_initialized: bool,
 	is_finalized:   bool, // For SHAKE (unlimited squeeze is allowed)
 	is_finalized:   bool, // For SHAKE (unlimited squeeze is allowed)
 }
 }
 
 
-keccakf :: proc "contextless" (st: ^[25]u64) {
-	keccakf_rndc := [?]u64 {
-		0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-		0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-		0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-		0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-		0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-		0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-		0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-		0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
-	}
+@(private)
+keccakf_rndc := [?]u64 {
+	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+	0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
+}
 
 
-	keccakf_rotc := [?]int {
-		1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
-		27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
-	}
+@(private)
+keccakf_rotc := [?]int {
+	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
+}
 
 
-	keccakf_piln := [?]i32 {
-		10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
-		15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
-	}
+@(private)
+keccakf_piln := [?]i32 {
+	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
+}
 
 
+@(private)
+keccakf :: proc "contextless" (st: ^[25]u64) {
 	i, j, r: i32 = ---, ---, ---
 	i, j, r: i32 = ---, ---, ---
 	t: u64 = ---
 	t: u64 = ---
 	bc: [5]u64 = ---
 	bc: [5]u64 = ---
@@ -103,81 +122,93 @@ keccakf :: proc "contextless" (st: ^[25]u64) {
 	}
 	}
 }
 }
 
 
-init :: proc(c: ^Sha3_Context) {
+init :: proc(ctx: ^Context) {
 	for i := 0; i < 25; i += 1 {
 	for i := 0; i < 25; i += 1 {
-		c.st.q[i] = 0
+		ctx.st.q[i] = 0
 	}
 	}
-	c.rsiz = 200 - 2 * c.mdlen
-	c.pt = 0
+	ctx.rsiz = 200 - 2 * ctx.mdlen
+	ctx.pt = 0
 
 
-	c.is_initialized = true
-	c.is_finalized = false
+	ctx.is_initialized = true
+	ctx.is_finalized = false
 }
 }
 
 
-update :: proc(c: ^Sha3_Context, data: []byte) {
-	assert(c.is_initialized)
-	assert(!c.is_finalized)
+update :: proc(ctx: ^Context, data: []byte) {
+	assert(ctx.is_initialized)
+	assert(!ctx.is_finalized)
 
 
-	j := c.pt
+	j := ctx.pt
 	for i := 0; i < len(data); i += 1 {
 	for i := 0; i < len(data); i += 1 {
-		c.st.b[j] ~= data[i]
+		ctx.st.b[j] ~= data[i]
 		j += 1
 		j += 1
-		if j >= c.rsiz {
-			keccakf(&c.st.q)
+		if j >= ctx.rsiz {
+			keccakf(&ctx.st.q)
 			j = 0
 			j = 0
 		}
 		}
 	}
 	}
-	c.pt = j
+	ctx.pt = j
 }
 }
 
 
-final :: proc(c: ^Sha3_Context, hash: []byte) {
-	assert(c.is_initialized)
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	assert(ctx.is_initialized)
 
 
-	if len(hash) < c.mdlen {
-		if c.is_keccak {
-			panic("crypto/keccac: invalid destination digest size")
-		}
+	if len(hash) < ctx.mdlen {
 		panic("crypto/sha3: invalid destination digest size")
 		panic("crypto/sha3: invalid destination digest size")
 	}
 	}
-	if c.is_keccak {
-		c.st.b[c.pt] ~= 0x01
-	} else {
-		c.st.b[c.pt] ~= 0x06
+
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: Context
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer (reset(ctx))
+
+	ctx.st.b[ctx.pt] ~= ctx.dsbyte
+
+	ctx.st.b[ctx.rsiz - 1] ~= 0x80
+	keccakf(&ctx.st.q)
+	for i := 0; i < ctx.mdlen; i += 1 {
+		hash[i] = ctx.st.b[i]
 	}
 	}
+}
+
+clone :: proc(ctx, other: ^Context) {
+	ctx^ = other^
+}
 
 
-	c.st.b[c.rsiz - 1] ~= 0x80
-	keccakf(&c.st.q)
-	for i := 0; i < c.mdlen; i += 1 {
-		hash[i] = c.st.b[i]
+reset :: proc(ctx: ^Context) {
+	if !ctx.is_initialized {
+		return
 	}
 	}
 
 
-	c.is_initialized = false // No more absorb, no more squeeze.
+	mem.zero_explicit(ctx, size_of(ctx^))
 }
 }
 
 
-shake_xof :: proc(c: ^Sha3_Context) {
-	assert(c.is_initialized)
-	assert(!c.is_finalized)
+shake_xof :: proc(ctx: ^Context) {
+	assert(ctx.is_initialized)
+	assert(!ctx.is_finalized)
 
 
-	c.st.b[c.pt] ~= 0x1F
-	c.st.b[c.rsiz - 1] ~= 0x80
-	keccakf(&c.st.q)
-	c.pt = 0
+	ctx.st.b[ctx.pt] ~= ctx.dsbyte
+	ctx.st.b[ctx.rsiz - 1] ~= 0x80
+	keccakf(&ctx.st.q)
+	ctx.pt = 0
 
 
-	c.is_finalized = true // No more absorb, unlimited squeeze.
+	ctx.is_finalized = true // No more absorb, unlimited squeeze.
 }
 }
 
 
-shake_out :: proc(c: ^Sha3_Context, hash: []byte) {
-	assert(c.is_initialized)
-	assert(c.is_finalized)
+shake_out :: proc(ctx: ^Context, hash: []byte) {
+	assert(ctx.is_initialized)
+	assert(ctx.is_finalized)
 
 
-	j := c.pt
+	j := ctx.pt
 	for i := 0; i < len(hash); i += 1 {
 	for i := 0; i < len(hash); i += 1 {
-		if j >= c.rsiz {
-			keccakf(&c.st.q)
+		if j >= ctx.rsiz {
+			keccakf(&ctx.st.q)
 			j = 0
 			j = 0
 		}
 		}
-		hash[i] = c.st.b[j]
+		hash[i] = ctx.st.b[j]
 		j += 1
 		j += 1
 	}
 	}
-	c.pt = j
+	ctx.pt = j
 }
 }

+ 145 - 0
core/crypto/_sha3/sp800_185.odin

@@ -0,0 +1,145 @@
+package _sha3
+
+import "core:encoding/endian"
+import "core:math/bits"
+
+init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
+	ctx.mdlen = sec_strength / 8
+
+	// No domain separator is equivalent to vanilla SHAKE.
+	if len(n) == 0 && len(s) == 0 {
+		ctx.dsbyte = DS_SHAKE
+		init(ctx)
+		return
+	}
+
+	ctx.dsbyte = DS_CSHAKE
+	init(ctx)
+	bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength))
+}
+
+final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: Context
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer reset(ctx)
+
+	encode_byte_len(ctx, len(dst), false) // right_encode
+	shake_xof(ctx)
+	shake_out(ctx, dst)
+}
+
+rate_cshake :: #force_inline proc(sec_strength: int) -> int {
+	switch sec_strength {
+	case 128:
+		return RATE_128
+	case 256:
+		return RATE_256
+	}
+
+	panic("crypto/sha3: invalid security strength")
+}
+
+// right_encode and left_encode are defined to support 0 <= x < 2^2040
+// however, the largest value we will ever need to encode is `max(int) * 8`.
+//
+// This is unfortunate as the extreme upper edge is larger than
+// `max(u64)`.  While such values are impractical at present,
+// they are possible (ie: https://arxiv.org/pdf/quant-ph/9908043.pdf).
+//
+// Thus we support 0 <= x < 2^128.
+
+@(private)
+_PAD: [RATE_128]byte // Biggest possible value of w per spec.
+
+bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
+	// 1. z = left_encode(w) || X.
+	z_hi: u64
+	z_lo := left_right_encode(ctx, 0, u64(w), true)
+	for x in x_strings {
+		// All uses of bytepad in SP 800-185 use the output from
+		// one or more encode_string values for `X`.
+		hi, lo := encode_string(ctx, x)
+
+		carry: u64
+		z_lo, carry = bits.add_u64(z_lo, lo, 0)
+		z_hi, carry = bits.add_u64(z_hi, hi, carry)
+
+		// This isn't actually possible, at least with the currently
+		// defined SP 800-185 routines.
+		if carry != 0 {
+			panic("crypto/sha3: bytepad input length overflow")
+		}
+	}
+
+	// We skip this step as we are doing a byte-oriented implementation
+	// rather than a bit oriented one.
+	//
+	// 2. while len(z) mod 8 ≠ 0:
+	//    z = z || 0
+
+	// 3. while (len(z)/8) mod w ≠ 0:
+	//    z = z || 00000000
+	z_len := u128(z_hi) << 64 | u128(z_lo)
+	z_rem := int(z_len % u128(w))
+	pad := _PAD[:w - z_rem]
+
+	// We just add the padding to the state, instead of returning z.
+	//
+	// 4. return z.
+	update(ctx, pad)
+}
+
+encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
+	l := encode_byte_len(ctx, len(s), true) // left_encode
+	update(ctx, s)
+
+	lo, hi := bits.add_u64(l, u64(len(s)), 0)
+
+	return hi, lo
+}
+
+encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 {
+	hi, lo := bits.mul_u64(u64(l), 8)
+	return left_right_encode(ctx, hi, lo, is_left)
+}
+
+@(private)
+left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
+	HI_OFFSET :: 1
+	LO_OFFSET :: HI_OFFSET + 8
+	RIGHT_OFFSET :: LO_OFFSET + 8
+	BUF_LEN :: RIGHT_OFFSET + 1
+
+	buf: [BUF_LEN]byte // prefix + largest uint + postfix
+
+	endian.unchecked_put_u64be(buf[HI_OFFSET:], hi)
+	endian.unchecked_put_u64be(buf[LO_OFFSET:], lo)
+
+	// 2. Strip leading `0x00` bytes.
+	off: int
+	for off = HI_OFFSET; off < RIGHT_OFFSET - 1; off = off + 1 {// Note: Minimum size is 1, not 0.
+		if buf[off] != 0 {
+			break
+		}
+	}
+	n := byte(RIGHT_OFFSET - off)
+
+	// 3. Prefix (left_encode) or postfix (right_encode) the length in bytes.
+	b: []byte
+	switch is_left {
+	case true:
+		buf[off - 1] = n // n | x
+		b = buf[off - 1:RIGHT_OFFSET]
+	case false:
+		buf[RIGHT_OFFSET] = n // x | n
+		b = buf[off:]
+	}
+
+	update(ctx, b)
+
+	return u64(len(b))
+}

+ 32 - 100
core/crypto/blake2b/blake2b.odin

@@ -1,3 +1,10 @@
+/*
+package blake2b implements the BLAKE2b hash algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc7693
+- https://www.blake2.net
+*/
 package blake2b
 package blake2b
 
 
 /*
 /*
@@ -6,122 +13,47 @@ package blake2b
 
 
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
-
-    Interface for the BLAKE2b hashing algorithm.
-    BLAKE2b and BLAKE2s share the implementation in the _blake2 package.
 */
 */
 
 
-import "core:io"
-import "core:os"
-
 import "../_blake2"
 import "../_blake2"
 
 
-/*
-    High level API
-*/
-
+// DIGEST_SIZE is the BLAKE2b digest size in bytes.
 DIGEST_SIZE :: 64
 DIGEST_SIZE :: 64
 
 
-// hash_string will hash the given input and return the
-// computed hash
-hash_string :: proc(data: string) -> [DIGEST_SIZE]byte {
-	return hash_bytes(transmute([]byte)(data))
-}
+// BLOCK_SIZE is the BLAKE2b block size in bytes.
+BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
 
 
-// hash_bytes will hash the given input and return the
-// computed hash
-hash_bytes :: proc(data: []byte) -> [DIGEST_SIZE]byte {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2B_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer :: proc(data, hash: []byte) {
-	ctx: Context
-	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2B_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
+// Context is a BLAKE2b instance.
+Context :: _blake2.Blake2b_Context
 
 
-// hash_stream will read the stream in chunks and compute a
-// hash from its contents
-hash_stream :: proc(s: io.Stream) -> ([DIGEST_SIZE]byte, bool) {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
+// init initializes a Context with the default BLAKE2b config.
+init :: proc(ctx: ^Context) {
 	cfg: _blake2.Blake2_Config
 	cfg: _blake2.Blake2_Config
 	cfg.size = _blake2.BLAKE2B_SIZE
 	cfg.size = _blake2.BLAKE2B_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
+	_blake2.init(ctx, &cfg)
 }
 }
 
 
-// hash_file will read the file provided by the given handle
-// and compute a hash
-hash_file :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE]byte, bool) {
-	if !load_at_once {
-		return hash_stream(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE]byte{}, false
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	_blake2.update(ctx, data)
 }
 }
 
 
-hash :: proc {
-	hash_stream,
-	hash_file,
-	hash_bytes,
-	hash_string,
-	hash_bytes_to_buffer,
-	hash_string_to_buffer,
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	_blake2.final(ctx, hash, finalize_clone)
 }
 }
 
 
-/*
-    Low level API
-*/
-
-Context :: _blake2.Blake2b_Context
-
-init :: proc(ctx: ^Context) {
-	_blake2.init(ctx)
-}
-
-update :: proc(ctx: ^Context, data: []byte) {
-	_blake2.update(ctx, data)
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	_blake2.clone(ctx, other)
 }
 }
 
 
-final :: proc(ctx: ^Context, hash: []byte) {
-	_blake2.final(ctx, hash)
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	_blake2.reset(ctx)
 }
 }

+ 32 - 100
core/crypto/blake2s/blake2s.odin

@@ -1,3 +1,10 @@
+/*
+package blake2s implements the BLAKE2s hash algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc7693
+- https://www.blake2.net/
+*/
 package blake2s
 package blake2s
 
 
 /*
 /*
@@ -6,122 +13,47 @@ package blake2s
 
 
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
-
-    Interface for the BLAKE2s hashing algorithm.
-    BLAKE2s and BLAKE2b share the implementation in the _blake2 package.
 */
 */
 
 
-import "core:io"
-import "core:os"
-
 import "../_blake2"
 import "../_blake2"
 
 
-/*
-    High level API
-*/
-
+// DIGEST_SIZE is the BLAKE2s digest size in bytes.
 DIGEST_SIZE :: 32
 DIGEST_SIZE :: 32
 
 
-// hash_string will hash the given input and return the
-// computed hash
-hash_string :: proc(data: string) -> [DIGEST_SIZE]byte {
-	return hash_bytes(transmute([]byte)(data))
-}
+// BLOCK_SIZE is the BLAKE2s block size in bytes.
+BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
 
 
-// hash_bytes will hash the given input and return the
-// computed hash
-hash_bytes :: proc(data: []byte) -> [DIGEST_SIZE]byte {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2S_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer :: proc(data, hash: []byte) {
-	ctx: Context
-	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2S_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
+// Context is a BLAKE2s instance.
+Context :: _blake2.Blake2s_Context
 
 
-// hash_stream will read the stream in chunks and compute a
-// hash from its contents
-hash_stream :: proc(s: io.Stream) -> ([DIGEST_SIZE]byte, bool) {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
+// init initializes a Context with the default BLAKE2s config.
+init :: proc(ctx: ^Context) {
 	cfg: _blake2.Blake2_Config
 	cfg: _blake2.Blake2_Config
 	cfg.size = _blake2.BLAKE2S_SIZE
 	cfg.size = _blake2.BLAKE2S_SIZE
-	ctx.cfg = cfg
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
+	_blake2.init(ctx, &cfg)
 }
 }
 
 
-// hash_file will read the file provided by the given handle
-// and compute a hash
-hash_file :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE]byte, bool) {
-	if !load_at_once {
-		return hash_stream(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE]byte{}, false
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	_blake2.update(ctx, data)
 }
 }
 
 
-hash :: proc {
-	hash_stream,
-	hash_file,
-	hash_bytes,
-	hash_string,
-	hash_bytes_to_buffer,
-	hash_string_to_buffer,
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	_blake2.final(ctx, hash, finalize_clone)
 }
 }
 
 
-/*
-    Low level API
-*/
-
-Context :: _blake2.Blake2s_Context
-
-init :: proc(ctx: ^Context) {
-	_blake2.init(ctx)
-}
-
-update :: proc(ctx: ^Context, data: []byte) {
-	_blake2.update(ctx, data)
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	_blake2.clone(ctx, other)
 }
 }
 
 
-final :: proc(ctx: ^Context, hash: []byte) {
-	_blake2.final(ctx, hash)
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	_blake2.reset(ctx)
 }
 }

+ 33 - 15
core/crypto/chacha20/chacha20.odin

@@ -1,11 +1,21 @@
+/*
+package chacha20 implements the ChaCha20 and XChaCha20 stream ciphers.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8439
+- https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/
+*/
 package chacha20
 package chacha20
 
 
 import "core:encoding/endian"
 import "core:encoding/endian"
 import "core:math/bits"
 import "core:math/bits"
 import "core:mem"
 import "core:mem"
 
 
+// KEY_SIZE is the (X)ChaCha20 key size in bytes.
 KEY_SIZE :: 32
 KEY_SIZE :: 32
+// NONCE_SIZE is the ChaCha20 nonce size in bytes.
 NONCE_SIZE :: 12
 NONCE_SIZE :: 12
+// XNONCE_SIZE is the XChaCha20 nonce size in bytes.
 XNONCE_SIZE :: 24
 XNONCE_SIZE :: 24
 
 
 @(private)
 @(private)
@@ -19,25 +29,26 @@ _STATE_SIZE_U32 :: 16
 _ROUNDS :: 20
 _ROUNDS :: 20
 
 
 @(private)
 @(private)
-_SIGMA_0 : u32 : 0x61707865
+_SIGMA_0: u32 : 0x61707865
 @(private)
 @(private)
-_SIGMA_1 : u32 : 0x3320646e
+_SIGMA_1: u32 : 0x3320646e
 @(private)
 @(private)
-_SIGMA_2 : u32 : 0x79622d32
+_SIGMA_2: u32 : 0x79622d32
 @(private)
 @(private)
-_SIGMA_3 : u32 : 0x6b206574
+_SIGMA_3: u32 : 0x6b206574
 
 
+// Context is a ChaCha20 or XChaCha20 instance.
 Context :: struct {
 Context :: struct {
-	_s: [_STATE_SIZE_U32]u32,
-
-	_buffer: [_BLOCK_SIZE]byte,
-	_off: int,
-
+	_s:              [_STATE_SIZE_U32]u32,
+	_buffer:         [_BLOCK_SIZE]byte,
+	_off:            int,
 	_is_ietf_flavor: bool,
 	_is_ietf_flavor: bool,
 	_is_initialized: bool,
 	_is_initialized: bool,
 }
 }
 
 
-init :: proc (ctx: ^Context, key, nonce: []byte) {
+// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
+// key and nonce.
+init :: proc(ctx: ^Context, key, nonce: []byte) {
 	if len(key) != KEY_SIZE {
 	if len(key) != KEY_SIZE {
 		panic("crypto/chacha20: invalid ChaCha20 key size")
 		panic("crypto/chacha20: invalid ChaCha20 key size")
 	}
 	}
@@ -89,7 +100,8 @@ init :: proc (ctx: ^Context, key, nonce: []byte) {
 	ctx._is_initialized = true
 	ctx._is_initialized = true
 }
 }
 
 
-seek :: proc (ctx: ^Context, block_nr: u64) {
+// seek seeks the (X)ChaCha20 stream counter to the specified block.
+seek :: proc(ctx: ^Context, block_nr: u64) {
 	assert(ctx._is_initialized)
 	assert(ctx._is_initialized)
 
 
 	if ctx._is_ietf_flavor {
 	if ctx._is_ietf_flavor {
@@ -103,7 +115,10 @@ seek :: proc (ctx: ^Context, block_nr: u64) {
 	ctx._off = _BLOCK_SIZE
 	ctx._off = _BLOCK_SIZE
 }
 }
 
 
-xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
+// xor_bytes XORs each byte in src with bytes taken from the (X)ChaCha20
+// keystream, and writes the resulting output to dst.  Dst and src MUST
+// alias exactly or not at all.
+xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
 	assert(ctx._is_initialized)
 	assert(ctx._is_initialized)
 
 
 	// TODO: Enforcing that dst and src alias exactly or not at all
 	// TODO: Enforcing that dst and src alias exactly or not at all
@@ -147,7 +162,8 @@ xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
 	}
 	}
 }
 }
 
 
-keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
+// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
+keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
 	assert(ctx._is_initialized)
 	assert(ctx._is_initialized)
 
 
 	dst := dst
 	dst := dst
@@ -180,7 +196,9 @@ keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
 	}
 	}
 }
 }
 
 
-reset :: proc (ctx: ^Context) {
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
 	mem.zero_explicit(&ctx._s, size_of(ctx._s))
 	mem.zero_explicit(&ctx._s, size_of(ctx._s))
 	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
 	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
 
 
@@ -188,7 +206,7 @@ reset :: proc (ctx: ^Context) {
 }
 }
 
 
 @(private)
 @(private)
-_do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) {
+_do_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
 	// Enforce the maximum consumed keystream per nonce.
 	// Enforce the maximum consumed keystream per nonce.
 	//
 	//
 	// While all modern "standard" definitions of ChaCha20 use
 	// While all modern "standard" definitions of ChaCha20 use

+ 17 - 0
core/crypto/chacha20poly1305/chacha20poly1305.odin

@@ -1,3 +1,10 @@
+/*
+package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 Authenticated
+Encryption with Additional Data algorithm.
+
+See:
+- https://www.rfc-editor.org/rfc/rfc8439
+*/
 package chacha20poly1305
 package chacha20poly1305
 
 
 import "core:crypto"
 import "core:crypto"
@@ -6,8 +13,11 @@ import "core:crypto/poly1305"
 import "core:encoding/endian"
 import "core:encoding/endian"
 import "core:mem"
 import "core:mem"
 
 
+// KEY_SIZE is the chacha20poly1305 key size in bytes.
 KEY_SIZE :: chacha20.KEY_SIZE
 KEY_SIZE :: chacha20.KEY_SIZE
+// NONCE_SIZE is the chacha20poly1305 nonce size in bytes.
 NONCE_SIZE :: chacha20.NONCE_SIZE
 NONCE_SIZE :: chacha20.NONCE_SIZE
+// TAG_SIZE is the chacha20poly1305 tag size in bytes.
 TAG_SIZE :: poly1305.TAG_SIZE
 TAG_SIZE :: poly1305.TAG_SIZE
 
 
 @(private)
 @(private)
@@ -49,6 +59,8 @@ _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
 	}
 	}
 }
 }
 
 
+// encrypt encrypts the plaintext and authenticates the aad and ciphertext,
+// with the provided key and nonce, stores the output in ciphertext and tag.
 encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
 encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
 	_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
 	_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
 	if len(ciphertext) != len(plaintext) {
 	if len(ciphertext) != len(plaintext) {
@@ -95,6 +107,11 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
 	poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
 	poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
 }
 }
 
 
+// decrypt authenticates the aad and ciphertext, and decrypts the ciphertext,
+// with the provided key, nonce, and tag, and stores the output in plaintext,
+// returning true iff the authentication was successful.
+//
+// If authentication fails, the destination plaintext buffer will be zeroed.
 decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
 decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
 	_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
 	_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
 	if len(ciphertext) != len(plaintext) {
 	if len(ciphertext) != len(plaintext) {

+ 10 - 0
core/crypto/crypto.odin

@@ -1,3 +1,7 @@
+/*
+package crypto implements a selection of cryptography algorithms and useful
+helper routines.
+*/
 package crypto
 package crypto
 
 
 import "core:mem"
 import "core:mem"
@@ -51,3 +55,9 @@ rand_bytes :: proc (dst: []byte) {
 
 
 	_rand_bytes(dst)
 	_rand_bytes(dst)
 }
 }
+
+// has_rand_bytes returns true iff the target has support for accessing the
+// system entropty source.
+has_rand_bytes :: proc () -> bool {
+	return _has_rand_bytes()
+}

+ 314 - 0
core/crypto/ed25519/ed25519.odin

@@ -0,0 +1,314 @@
+/*
+package ed25519 implements the Ed25519 EdDSA signature algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf
+- https://eprint.iacr.org/2020/1244.pdf
+*/
+package ed25519
+
+import "core:crypto"
+import grp "core:crypto/_edwards25519"
+import "core:crypto/sha2"
+import "core:mem"
+
+// PRIVATE_KEY_SIZE is the byte-encoded private key size.
+PRIVATE_KEY_SIZE :: 32
+// PUBLIC_KEY_SIZE is the byte-encoded public key size.
+PUBLIC_KEY_SIZE :: 32
+// SIGNATURE_SIZE is the byte-encoded signature size.
+SIGNATURE_SIZE :: 64
+
+@(private)
+NONCE_SIZE :: 32
+
+// Private_Key is an Ed25519 private key.
+Private_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Private_Key structure is intended to be opaque).  There are
+	// subtle vulnerabilities that can be introduced if the internal
+	// values are allowed to be altered.
+	//
+	// See: https://github.com/MystenLabs/ed25519-unsafe-libs
+	_b:              [PRIVATE_KEY_SIZE]byte,
+	_s:              grp.Scalar,
+	_nonce:          [NONCE_SIZE]byte,
+	_pub_key:        Public_Key,
+	_is_initialized: bool,
+}
+
+// Public_Key is an Ed25519 public key.
+Public_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Public_Key structure is intended to be opaque).
+	_b:              [PUBLIC_KEY_SIZE]byte,
+	_neg_A:          grp.Group_Element,
+	_is_valid:       bool,
+	_is_initialized: bool,
+}
+
+// private_key_set_bytes decodes a byte-encoded private key, and returns
+// true iff the operation was successful.
+private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
+	if len(b) != PRIVATE_KEY_SIZE {
+		return false
+	}
+
+	// Derive the private key.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, b)
+	sha2.final(&ctx, h_bytes[:])
+
+	copy(priv_key._b[:], b)
+	copy(priv_key._nonce[:], h_bytes[32:])
+	grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
+
+	// Derive the corresponding public key.
+	A: grp.Group_Element = ---
+	grp.ge_scalarmult_basepoint(&A, &priv_key._s)
+	grp.ge_bytes(&A, priv_key._pub_key._b[:])
+	grp.ge_negate(&priv_key._pub_key._neg_A, &A)
+	priv_key._pub_key._is_valid = !grp.ge_is_small_order(&A)
+	priv_key._pub_key._is_initialized = true
+
+	priv_key._is_initialized = true
+
+	return true
+}
+
+// private_key_bytes sets dst to byte-encoding of priv_key.
+private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(dst) != PRIVATE_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, priv_key._b[:])
+}
+
+// private_key_clear clears priv_key to the uninitialized state.
+private_key_clear :: proc "contextless" (priv_key: ^Private_Key) {
+	mem.zero_explicit(priv_key, size_of(Private_Key))
+}
+
+// sign writes the signature by priv_key over msg to sig.
+sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(sig) != SIGNATURE_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	// 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1)
+	// using SHA-512 for Ed25519.  H(d) may be precomputed.
+	//
+	// 2. Using the second half of the digest hdigest2 = hb || ... || h2b-1,
+	// define:
+	//
+	// 2.1 For Ed25519, r = SHA-512(hdigest2 || M); Interpret r as a
+	// 64-octet little-endian integer.
+	ctx: sha2.Context_512 = ---
+	digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, priv_key._nonce[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	r: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&r, &digest_bytes)
+
+	// 3. Compute the point [r]G. The octet string R is the encoding of
+	// the point [r]G.
+	R: grp.Group_Element = ---
+	R_bytes := sig[:32]
+	grp.ge_scalarmult_basepoint(&R, &r)
+	grp.ge_bytes(&R, R_bytes)
+
+	// 4. Derive s from H(d) as in the key pair generation algorithm.
+	// Use octet strings R, Q, and M to define:
+	//
+	// 4.1 For Ed25519, digest = SHA-512(R || Q || M).
+	// Interpret digest as a little-endian integer.
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, R_bytes)
+	sha2.update(&ctx, priv_key._pub_key._b[:]) // Q in NIST terminology.
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	sc: grp.Scalar = --- // `digest` in NIST terminology.
+	grp.sc_set_bytes_wide(&sc, &digest_bytes)
+
+	// 5. Compute S = (r + digest × s) mod n. The octet string S is the
+	// encoding of the resultant integer.
+	grp.sc_mul(&sc, &sc, &priv_key._s)
+	grp.sc_add(&sc, &sc, &r)
+
+	// 6. Form the signature as the concatenation of the octet strings
+	// R and S.
+	grp.sc_bytes(sig[32:], &sc)
+
+	grp.sc_clear(&r)
+}
+
+// public_key_set_bytes decodes a byte-encoded public key, and returns
+// true iff the operation was successful.
+public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) -> bool {
+	if len(b) != PUBLIC_KEY_SIZE {
+		return false
+	}
+
+	A: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&A, b) {
+		return false
+	}
+
+	copy(pub_key._b[:], b)
+	grp.ge_negate(&pub_key._neg_A, &A)
+	pub_key._is_valid = !grp.ge_is_small_order(&A)
+	pub_key._is_initialized = true
+
+	return true
+}
+
+// public_key_set_priv sets pub_key to the public component of priv_key.
+public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	src := &priv_key._pub_key
+	copy(pub_key._b[:], src._b[:])
+	grp.ge_set(&pub_key._neg_A, &src._neg_A)
+	pub_key._is_valid = src._is_valid
+	pub_key._is_initialized = src._is_initialized
+}
+
+// public_key_bytes sets dst to byte-encoding of pub_key.
+public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) {
+	if !pub_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+	if len(dst) != PUBLIC_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, pub_key._b[:])
+}
+
+// public_key_equal returns true iff pub_key is equal to other.
+public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool {
+	if !pub_key._is_initialized || !other._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1
+}
+
+// verify returns true iff sig is a valid signature by pub_key over msg.
+//
+// The optional `allow_small_order_A` parameter will make this
+// implementation strictly compatible with FIPS 186-5, at the expense of
+// SBS-security.  Doing so is NOT recommended, and the disallowed
+// public keys all have a known discrete-log.
+verify :: proc(pub_key: ^Public_Key, msg, sig: []byte, allow_small_order_A := false) -> bool {
+	switch {
+	case !pub_key._is_initialized:
+		return false
+	case len(sig) != SIGNATURE_SIZE:
+		return false
+	}
+
+	// TLDR: Just use ristretto255.
+	//
+	// While there are two "standards" for EdDSA, existing implementations
+	// diverge (sometimes dramatically).  This implementation opts for
+	// "Algorithm 2" from "Taming the Many EdDSAs", which provides the
+	// strongest notion of security (SUF-CMA + SBS).
+	//
+	// The relevant properties are:
+	// - Reject non-canonical S.
+	// - Reject non-canonical A/R.
+	// - Reject small-order A (Extra non-standard check).
+	// - Cofactored verification equation.
+	//
+	// There are 19 possible non-canonical group element encodings of
+	// which:
+	// - 2 are small order
+	// - 10 are mixed order
+	// - 7 are not on the curve
+	//
+	// While historical implementations have been lax about enforcing
+	// that A/R are canonically encoded, that behavior is mandated by
+	// both the RFC and FIPS specification.  No valid key generation
+	// or sign implementation will ever produce non-canonically encoded
+	// public keys or signatures.
+	//
+	// There are 8 small-order group elements, 1 which is in the
+	// prime-order sub-group, and thus the probability that a properly
+	// generated A is small-order is cryptographically insignificant.
+	//
+	// While both the RFC and FIPS standard allow for either the
+	// cofactored or non-cofactored equation.  It is possible to
+	// artificially produce signatures that are valid for the former
+	// but not the latter.  This will NEVER occur with a valid sign
+	// implementation.  The choice of the latter is to be compatible
+	// with ABGLSV-Pornin, batch verification, and FROST (among other
+	// things).
+
+	s_bytes, r_bytes := sig[32:], sig[:32]
+
+	// 1. Reject the signature if S is not in the range [0, L).
+	s: grp.Scalar = ---
+	if !grp.sc_set_bytes(&s, s_bytes) {
+		return false
+	}
+
+	// 2. Reject the signature if the public key A is one of 8 small
+	// order points.
+	//
+	// As this check is optional and not part of the standard, we allow
+	// the caller to bypass it if desired.  Disabling the check makes
+	// the scheme NOT SBS-secure.
+	if !pub_key._is_valid && !allow_small_order_A {
+		return false
+	}
+
+	// 3. Reject the signature if A or R are non-canonical.
+	//
+	// Note: All initialized public keys are guaranteed to be canonical.
+	neg_R: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&neg_R, r_bytes) {
+		return false
+	}
+	grp.ge_negate(&neg_R, &neg_R)
+
+	// 4. Compute the hash SHA512(R||A||M) and reduce it mod L to get a
+	// scalar h.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, r_bytes)
+	sha2.update(&ctx, pub_key._b[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, h_bytes[:])
+
+	h: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&h, &h_bytes)
+
+	// 5. Accept if 8(s * G) - 8R - 8(h * A) = 0
+	//
+	// > first compute V = SB − R − hA and then accept if V is one of
+	// > 8 small order points (or alternatively compute 8V with 3
+	// > doublings and check against the neutral element)
+	V: grp.Group_Element = ---
+	grp.ge_double_scalarmult_basepoint_vartime(&V, &h, &pub_key._neg_A, &s)
+	grp.ge_add(&V, &V, &neg_R)
+
+	return grp.ge_is_small_order(&V)
+}

+ 62 - 0
core/crypto/hash/doc.odin

@@ -0,0 +1,62 @@
+/*
+package hash provides a generic interface to the supported hash algorithms.
+
+A high-level convenience procedure group `hash` is provided to easily
+accomplish common tasks.
+- `hash_string` - Hash a given string and return the digest.
+- `hash_bytes` - Hash a given byte slice and return the digest.
+- `hash_string_to_buffer` - Hash a given string and put the digest in
+  the third parameter.  It requires that the destination buffer
+  is at least as big as the digest size.
+- `hash_bytes_to_buffer` - Hash a given string and put the computed
+  digest in the third parameter.  It requires that the destination
+  buffer is at least as big as the digest size.
+- `hash_stream` - Incrementally fully consume a `io.Stream`, and return
+  the computed digest.
+- `hash_file` - Takes a file handle and returns the computed digest.
+  A third optional boolean parameter controls if the file is streamed
+  (default), or or read at once.
+
+```odin
+package hash_example
+
+import "core:crypto/hash"
+
+main :: proc() {
+	input := "Feed the fire."
+
+	// Compute the digest, using the high level API.
+	returned_digest := hash.hash(hash.Algorithm.SHA512_256, input)
+	defer delete(returned_digest)
+
+	// Variant that takes a destination buffer, instead of returning
+	// the digest.
+	digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.BLAKE2B]) // @note: Destination buffer has to be at least as big as the digest size of the hash.
+	defer delete(digest)
+	hash.hash(hash.Algorithm.BLAKE2B, input, digest)
+}
+```
+
+A generic low level API is provided supporting the init/update/final interface
+that is typical with cryptographic hash function implementations.
+
+```odin
+package hash_example
+
+import "core:crypto/hash"
+
+main :: proc() {
+    input := "Let the cinders burn."
+
+    // Compute the digest, using the low level API.
+    ctx: hash.Context
+    digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.SHA3_512])
+    defer delete(digest)
+
+    hash.init(&ctx, hash.Algorithm.SHA3_512)
+    hash.update(&ctx, transmute([]byte)input)
+    hash.final(&ctx, digest)
+}
+```
+*/
+package crypto_hash

+ 116 - 0
core/crypto/hash/hash.odin

@@ -0,0 +1,116 @@
+package crypto_hash
+
+/*
+    Copyright 2021 zhibog
+    Made available under the BSD-3 license.
+
+    List of contributors:
+        zhibog, dotbmp:  Initial implementation.
+*/
+
+import "core:io"
+import "core:mem"
+import "core:os"
+
+// hash_bytes will hash the given input and return the computed digest
+// in a newly allocated slice.
+hash_string :: proc(algorithm: Algorithm, data: string, allocator := context.allocator) -> []byte {
+	return hash_bytes(algorithm, transmute([]byte)(data), allocator)
+}
+
+// hash_bytes will hash the given input and return the computed digest
+// in a newly allocated slice.
+hash_bytes :: proc(algorithm: Algorithm, data: []byte, allocator := context.allocator) -> []byte {
+	dst := make([]byte, DIGEST_SIZES[algorithm], allocator)
+	hash_bytes_to_buffer(algorithm, data, dst)
+	return dst
+}
+
+// hash_string_to_buffer will hash the given input and assign the
+// computed digest to the third parameter.  It requires that the
+// destination buffer is at least as big as the digest size.
+hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) {
+	hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
+}
+
+// hash_bytes_to_buffer will hash the given input and write the
+// computed digest into the third parameter.  It requires that the
+// destination buffer is at least as big as the digest size.
+hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) {
+	ctx: Context
+
+	init(&ctx, algorithm)
+	update(&ctx, data)
+	final(&ctx, hash)
+}
+
+// hash_stream will incrementally fully consume a stream, and return the
+// computed digest in a newly allocated slice.
+hash_stream :: proc(
+	algorithm: Algorithm,
+	s: io.Stream,
+	allocator := context.allocator,
+) -> (
+	[]byte,
+	io.Error,
+) {
+	ctx: Context
+
+	buf: [MAX_BLOCK_SIZE * 4]byte
+	defer mem.zero_explicit(&buf, size_of(buf))
+
+	init(&ctx, algorithm)
+
+	loop: for {
+		n, err := io.read(s, buf[:])
+		if n > 0 {
+			// XXX/yawning: Can io.read return n > 0 and EOF?
+			update(&ctx, buf[:n])
+		}
+		#partial switch err {
+		case .None:
+		case .EOF:
+			break loop
+		case:
+			return nil, err
+		}
+	}
+
+	dst := make([]byte, DIGEST_SIZES[algorithm], allocator)
+	final(&ctx, dst)
+
+	return dst, io.Error.None
+}
+
+// hash_file will read the file provided by the given handle and return the
+// computed digest in a newly allocated slice.
+hash_file :: proc(
+	algorithm: Algorithm,
+	hd: os.Handle,
+	load_at_once := false,
+	allocator := context.allocator,
+) -> (
+	[]byte,
+	io.Error,
+) {
+	if !load_at_once {
+		return hash_stream(algorithm, os.stream_from_handle(hd), allocator)
+	}
+
+	buf, ok := os.read_entire_file(hd, allocator)
+	if !ok {
+		return nil, io.Error.Unknown
+	}
+	defer delete(buf, allocator)
+
+	return hash_bytes(algorithm, buf, allocator), io.Error.None
+}
+
+hash :: proc {
+	hash_stream,
+	hash_file,
+	hash_bytes,
+	hash_string,
+	hash_bytes_to_buffer,
+	hash_string_to_buffer,
+}

+ 353 - 0
core/crypto/hash/low_level.odin

@@ -0,0 +1,353 @@
+package crypto_hash
+
+import "core:crypto/blake2b"
+import "core:crypto/blake2s"
+import "core:crypto/sha2"
+import "core:crypto/sha3"
+import "core:crypto/sm3"
+import "core:crypto/legacy/keccak"
+import "core:crypto/legacy/md5"
+import "core:crypto/legacy/sha1"
+
+import "core:reflect"
+
+// MAX_DIGEST_SIZE is the maximum size digest that can be returned by any
+// of the Algorithms supported via this package.
+MAX_DIGEST_SIZE :: 64
+// MAX_BLOCK_SIZE is the maximum block size used by any of Algorithms
+// supported by this package.
+MAX_BLOCK_SIZE :: sha3.BLOCK_SIZE_224
+
+// Algorithm is the algorithm identifier associated with a given Context.
+Algorithm :: enum {
+	Invalid,
+	BLAKE2B,
+	BLAKE2S,
+	SHA224,
+	SHA256,
+	SHA384,
+	SHA512,
+	SHA512_256,
+	SHA3_224,
+	SHA3_256,
+	SHA3_384,
+	SHA3_512,
+	SM3,
+	Legacy_KECCAK_224,
+	Legacy_KECCAK_256,
+	Legacy_KECCAK_384,
+	Legacy_KECCAK_512,
+	Insecure_MD5,
+	Insecure_SHA1,
+}
+
+// ALGORITHM_NAMES is the Algorithm to algorithm name string.
+ALGORITHM_NAMES := [Algorithm]string {
+	.Invalid           = "Invalid",
+	.BLAKE2B           = "BLAKE2b",
+	.BLAKE2S           = "BLAKE2s",
+	.SHA224            = "SHA-224",
+	.SHA256            = "SHA-256",
+	.SHA384            = "SHA-384",
+	.SHA512            = "SHA-512",
+	.SHA512_256        = "SHA-512/256",
+	.SHA3_224          = "SHA3-224",
+	.SHA3_256          = "SHA3-256",
+	.SHA3_384          = "SHA3-384",
+	.SHA3_512          = "SHA3-512",
+	.SM3               = "SM3",
+	.Legacy_KECCAK_224 = "Keccak-224",
+	.Legacy_KECCAK_256 = "Keccak-256",
+	.Legacy_KECCAK_384 = "Keccak-384",
+	.Legacy_KECCAK_512 = "Keccak-512",
+	.Insecure_MD5      = "MD5",
+	.Insecure_SHA1     = "SHA-1",
+}
+
+// DIGEST_SIZES is the Algorithm to digest size in bytes.
+DIGEST_SIZES := [Algorithm]int {
+	.Invalid           = 0,
+	.BLAKE2B           = blake2b.DIGEST_SIZE,
+	.BLAKE2S           = blake2s.DIGEST_SIZE,
+	.SHA224            = sha2.DIGEST_SIZE_224,
+	.SHA256            = sha2.DIGEST_SIZE_256,
+	.SHA384            = sha2.DIGEST_SIZE_384,
+	.SHA512            = sha2.DIGEST_SIZE_512,
+	.SHA512_256        = sha2.DIGEST_SIZE_512_256,
+	.SHA3_224          = sha3.DIGEST_SIZE_224,
+	.SHA3_256          = sha3.DIGEST_SIZE_256,
+	.SHA3_384          = sha3.DIGEST_SIZE_384,
+	.SHA3_512          = sha3.DIGEST_SIZE_512,
+	.SM3               = sm3.DIGEST_SIZE,
+	.Legacy_KECCAK_224 = keccak.DIGEST_SIZE_224,
+	.Legacy_KECCAK_256 = keccak.DIGEST_SIZE_256,
+	.Legacy_KECCAK_384 = keccak.DIGEST_SIZE_384,
+	.Legacy_KECCAK_512 = keccak.DIGEST_SIZE_512,
+	.Insecure_MD5      = md5.DIGEST_SIZE,
+	.Insecure_SHA1     = sha1.DIGEST_SIZE,
+}
+
+// BLOCK_SIZES is the Algoritm to block size in bytes.
+BLOCK_SIZES := [Algorithm]int {
+	.Invalid           = 0,
+	.BLAKE2B           = blake2b.BLOCK_SIZE,
+	.BLAKE2S           = blake2s.BLOCK_SIZE,
+	.SHA224            = sha2.BLOCK_SIZE_256,
+	.SHA256            = sha2.BLOCK_SIZE_256,
+	.SHA384            = sha2.BLOCK_SIZE_512,
+	.SHA512            = sha2.BLOCK_SIZE_512,
+	.SHA512_256        = sha2.BLOCK_SIZE_512,
+	.SHA3_224          = sha3.BLOCK_SIZE_224,
+	.SHA3_256          = sha3.BLOCK_SIZE_256,
+	.SHA3_384          = sha3.BLOCK_SIZE_384,
+	.SHA3_512          = sha3.BLOCK_SIZE_512,
+	.SM3               = sm3.BLOCK_SIZE,
+	.Legacy_KECCAK_224 = keccak.BLOCK_SIZE_224,
+	.Legacy_KECCAK_256 = keccak.BLOCK_SIZE_256,
+	.Legacy_KECCAK_384 = keccak.BLOCK_SIZE_384,
+	.Legacy_KECCAK_512 = keccak.BLOCK_SIZE_512,
+	.Insecure_MD5      = md5.BLOCK_SIZE,
+	.Insecure_SHA1     = sha1.BLOCK_SIZE,
+}
+
+// Context is a concrete instantiation of a specific hash algorithm.
+Context :: struct {
+	_algo: Algorithm,
+	_impl: union {
+		blake2b.Context,
+		blake2s.Context,
+		sha2.Context_256,
+		sha2.Context_512,
+		sha3.Context,
+		sm3.Context,
+		keccak.Context,
+		md5.Context,
+		sha1.Context,
+	},
+}
+
+@(private)
+_IMPL_IDS := [Algorithm]typeid {
+	.Invalid           = nil,
+	.BLAKE2B           = typeid_of(blake2b.Context),
+	.BLAKE2S           = typeid_of(blake2s.Context),
+	.SHA224            = typeid_of(sha2.Context_256),
+	.SHA256            = typeid_of(sha2.Context_256),
+	.SHA384            = typeid_of(sha2.Context_512),
+	.SHA512            = typeid_of(sha2.Context_512),
+	.SHA512_256        = typeid_of(sha2.Context_512),
+	.SHA3_224          = typeid_of(sha3.Context),
+	.SHA3_256          = typeid_of(sha3.Context),
+	.SHA3_384          = typeid_of(sha3.Context),
+	.SHA3_512          = typeid_of(sha3.Context),
+	.SM3               = typeid_of(sm3.Context),
+	.Legacy_KECCAK_224 = typeid_of(keccak.Context),
+	.Legacy_KECCAK_256 = typeid_of(keccak.Context),
+	.Legacy_KECCAK_384 = typeid_of(keccak.Context),
+	.Legacy_KECCAK_512 = typeid_of(keccak.Context),
+	.Insecure_MD5      = typeid_of(md5.Context),
+	.Insecure_SHA1     = typeid_of(sha1.Context),
+}
+
+// init initializes a Context with a specific hash Algorithm.
+init :: proc(ctx: ^Context, algorithm: Algorithm) {
+	if ctx._impl != nil {
+		reset(ctx)
+	}
+
+	// Directly specialize the union by setting the type ID (save a copy).
+	reflect.set_union_variant_typeid(
+		ctx._impl,
+		_IMPL_IDS[algorithm],
+	)
+	switch algorithm {
+	case .BLAKE2B:
+		blake2b.init(&ctx._impl.(blake2b.Context))
+	case .BLAKE2S:
+		blake2s.init(&ctx._impl.(blake2s.Context))
+	case .SHA224:
+		sha2.init_224(&ctx._impl.(sha2.Context_256))
+	case .SHA256:
+		sha2.init_256(&ctx._impl.(sha2.Context_256))
+	case .SHA384:
+		sha2.init_384(&ctx._impl.(sha2.Context_512))
+	case .SHA512:
+		sha2.init_512(&ctx._impl.(sha2.Context_512))
+	case .SHA512_256:
+		sha2.init_512_256(&ctx._impl.(sha2.Context_512))
+	case .SHA3_224:
+		sha3.init_224(&ctx._impl.(sha3.Context))
+	case .SHA3_256:
+		sha3.init_256(&ctx._impl.(sha3.Context))
+	case .SHA3_384:
+		sha3.init_384(&ctx._impl.(sha3.Context))
+	case .SHA3_512:
+		sha3.init_512(&ctx._impl.(sha3.Context))
+	case .SM3:
+		sm3.init(&ctx._impl.(sm3.Context))
+	case .Legacy_KECCAK_224:
+		keccak.init_224(&ctx._impl.(keccak.Context))
+	case .Legacy_KECCAK_256:
+		keccak.init_256(&ctx._impl.(keccak.Context))
+	case .Legacy_KECCAK_384:
+		keccak.init_384(&ctx._impl.(keccak.Context))
+	case .Legacy_KECCAK_512:
+		keccak.init_512(&ctx._impl.(keccak.Context))
+	case .Insecure_MD5:
+		md5.init(&ctx._impl.(md5.Context))
+	case .Insecure_SHA1:
+		sha1.init(&ctx._impl.(sha1.Context))
+	case .Invalid:
+		panic("crypto/hash: uninitialized algorithm")
+	case:
+		panic("crypto/hash: invalid algorithm")
+	}
+
+	ctx._algo = algorithm
+}
+
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	switch &impl in ctx._impl {
+	case blake2b.Context:
+		blake2b.update(&impl, data)
+	case blake2s.Context:
+		blake2s.update(&impl, data)
+	case sha2.Context_256:
+		sha2.update(&impl, data)
+	case sha2.Context_512:
+		sha2.update(&impl, data)
+	case sha3.Context:
+		sha3.update(&impl, data)
+	case sm3.Context:
+		sm3.update(&impl, data)
+	case keccak.Context:
+		keccak.update(&impl, data)
+	case md5.Context:
+		md5.update(&impl, data)
+	case sha1.Context:
+		sha1.update(&impl, data)
+	case:
+		panic("crypto/hash: uninitialized algorithm")
+	}
+}
+
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	switch &impl in ctx._impl {
+	case blake2b.Context:
+		blake2b.final(&impl, hash, finalize_clone)
+	case blake2s.Context:
+		blake2s.final(&impl, hash, finalize_clone)
+	case sha2.Context_256:
+		sha2.final(&impl, hash, finalize_clone)
+	case sha2.Context_512:
+		sha2.final(&impl, hash, finalize_clone)
+	case sha3.Context:
+		sha3.final(&impl, hash, finalize_clone)
+	case sm3.Context:
+		sm3.final(&impl, hash, finalize_clone)
+	case keccak.Context:
+		keccak.final(&impl, hash, finalize_clone)
+	case md5.Context:
+		md5.final(&impl, hash, finalize_clone)
+	case sha1.Context:
+		sha1.final(&impl, hash, finalize_clone)
+	case:
+		panic("crypto/hash: uninitialized algorithm")
+	}
+
+	if !finalize_clone {
+		reset(ctx)
+	}
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	// XXX/yawning: Maybe these cases should panic, because both cases,
+	// are probably bugs.
+	if ctx == other {
+		return
+	}
+	if ctx._impl != nil {
+		reset(ctx)
+	}
+
+	ctx._algo = other._algo
+
+	reflect.set_union_variant_typeid(
+		ctx._impl,
+		reflect.union_variant_typeid(other._impl),
+	)
+	switch &src_impl in other._impl {
+	case blake2b.Context:
+		blake2b.clone(&ctx._impl.(blake2b.Context), &src_impl)
+	case blake2s.Context:
+		blake2s.clone(&ctx._impl.(blake2s.Context), &src_impl)
+	case sha2.Context_256:
+		sha2.clone(&ctx._impl.(sha2.Context_256), &src_impl)
+	case sha2.Context_512:
+		sha2.clone(&ctx._impl.(sha2.Context_512), &src_impl)
+	case sha3.Context:
+		sha3.clone(&ctx._impl.(sha3.Context), &src_impl)
+	case sm3.Context:
+		sm3.clone(&ctx._impl.(sm3.Context), &src_impl)
+	case keccak.Context:
+		keccak.clone(&ctx._impl.(keccak.Context), &src_impl)
+	case md5.Context:
+		md5.clone(&ctx._impl.(md5.Context), &src_impl)
+	case sha1.Context:
+		sha1.clone(&ctx._impl.(sha1.Context), &src_impl)
+	case:
+		panic("crypto/hash: uninitialized algorithm")
+	}
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	switch &impl in ctx._impl {
+	case blake2b.Context:
+		blake2b.reset(&impl)
+	case blake2s.Context:
+		blake2s.reset(&impl)
+	case sha2.Context_256:
+		sha2.reset(&impl)
+	case sha2.Context_512:
+		sha2.reset(&impl)
+	case sha3.Context:
+		sha3.reset(&impl)
+	case sm3.Context:
+		sm3.reset(&impl)
+	case keccak.Context:
+		keccak.reset(&impl)
+	case md5.Context:
+		md5.reset(&impl)
+	case sha1.Context:
+		sha1.reset(&impl)
+	case:
+	// Unlike clone, calling reset repeatedly is fine.
+	}
+
+	ctx._algo = .Invalid
+	ctx._impl = nil
+}
+
+// algorithm returns the Algorithm used by a Context instance.
+algorithm :: proc(ctx: ^Context) -> Algorithm {
+	return ctx._algo
+}
+
+// digest_size returns the digest size of a Context instance in bytes.
+digest_size :: proc(ctx: ^Context) -> int {
+	return DIGEST_SIZES[ctx._algo]
+}
+
+// block_size returns the block size of a Context instance in bytes.
+block_size :: proc(ctx: ^Context) -> int {
+	return BLOCK_SIZES[ctx._algo]
+}

+ 103 - 0
core/crypto/hkdf/hkdf.odin

@@ -0,0 +1,103 @@
+/*
+package hkdf implements the HKDF HMAC-based Extract-and-Expand Key
+Derivation Function.
+
+See: https://www.rfc-editor.org/rfc/rfc5869
+*/
+package hkdf
+
+import "core:crypto/hash"
+import "core:crypto/hmac"
+import "core:mem"
+
+// extract_and_expand derives output keying material (OKM) via the
+// HKDF-Extract and HKDF-Expand algorithms, with the specified has
+// function, salt, input keying material (IKM), and optional info.
+// The dst buffer must be less-than-or-equal to 255 HMAC tags.
+extract_and_expand :: proc(algorithm: hash.Algorithm, salt, ikm, info, dst: []byte) {
+	h_len := hash.DIGEST_SIZES[algorithm]
+
+	tmp: [hash.MAX_DIGEST_SIZE]byte
+	prk := tmp[:h_len]
+	defer mem.zero_explicit(raw_data(prk), h_len)
+
+	extract(algorithm, salt, ikm, prk)
+	expand(algorithm, prk, info, dst)
+}
+
+// extract derives a pseudorandom key (PRK) via the HKDF-Extract algorithm,
+// with the specified hash function, salt, and input keying material (IKM).
+// It requires that the dst buffer be the HMAC tag size for the specified
+// hash function.
+extract :: proc(algorithm: hash.Algorithm, salt, ikm, dst: []byte) {
+	// PRK = HMAC-Hash(salt, IKM)
+	hmac.sum(algorithm, dst, ikm, salt)
+}
+
+// expand derives output keying material (OKM) via the HKDF-Expand algorithm,
+// with the specified hash function, pseudorandom key (PRK), and optional
+// info.  The dst buffer must be less-than-or-equal to 255 HMAC tags.
+expand :: proc(algorithm: hash.Algorithm, prk, info, dst: []byte) {
+	h_len := hash.DIGEST_SIZES[algorithm]
+
+	// (<= 255*HashLen)
+	dk_len := len(dst)
+	switch {
+	case dk_len == 0:
+		return
+	case dk_len > h_len * 255:
+		panic("crypto/hkdf: derived key too long")
+	case:
+	}
+
+	// The output OKM is calculated as follows:
+	//
+	// N = ceil(L/HashLen)
+	// T = T(1) | T(2) | T(3) | ... | T(N)
+	// OKM = first L octets of T
+	//
+	// where:
+	// T(0) = empty string (zero length)
+	// T(1) = HMAC-Hash(PRK, T(0) | info | 0x01)
+	// T(2) = HMAC-Hash(PRK, T(1) | info | 0x02)
+	// T(3) = HMAC-Hash(PRK, T(2) | info | 0x03)
+	// ...
+
+	n := dk_len / h_len
+	r := dk_len % h_len
+
+	base: hmac.Context
+	defer hmac.reset(&base)
+
+	hmac.init(&base, algorithm, prk)
+
+	dst_blk := dst
+	prev: []byte
+
+	for i in 1 ..= n {
+		_F(&base, prev, info, i, dst_blk[:h_len])
+
+		prev = dst_blk[:h_len]
+		dst_blk = dst_blk[h_len:]
+	}
+
+	if r > 0 {
+		tmp: [hash.MAX_DIGEST_SIZE]byte
+		blk := tmp[:h_len]
+		defer mem.zero_explicit(raw_data(blk), h_len)
+
+		_F(&base, prev, info, n + 1, blk)
+		copy(dst_blk, blk)
+	}
+}
+
+@(private)
+_F :: proc(base: ^hmac.Context, prev, info: []byte, i: int, dst_blk: []byte) {
+	prf: hmac.Context
+
+	hmac.clone(&prf, base)
+	hmac.update(&prf, prev)
+	hmac.update(&prf, info)
+	hmac.update(&prf, []byte{u8(i)})
+	hmac.final(&prf, dst_blk)
+}

+ 174 - 0
core/crypto/hmac/hmac.odin

@@ -0,0 +1,174 @@
+/*
+package hmac implements the HMAC MAC algorithm.
+
+See:
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.198-1.pdf
+*/
+package hmac
+
+import "core:crypto"
+import "core:crypto/hash"
+import "core:mem"
+
+// sum will compute the HMAC with the specified algorithm and key
+// over msg, and write the computed tag to dst.  It requires that
+// the dst buffer is the tag size.
+sum :: proc(algorithm: hash.Algorithm, dst, msg, key: []byte) {
+	ctx: Context
+
+	init(&ctx, algorithm, key)
+	update(&ctx, msg)
+	final(&ctx, dst)
+}
+
+// verify will verify the HMAC tag computed with the specified algorithm
+// and key over msg and return true iff the tag is valid.  It requires
+// that the tag is correctly sized.
+verify :: proc(algorithm: hash.Algorithm, tag, msg, key: []byte) -> bool {
+	tag_buf: [hash.MAX_DIGEST_SIZE]byte
+
+	derived_tag := tag_buf[:hash.DIGEST_SIZES[algorithm]]
+	sum(algorithm, derived_tag, msg, key)
+
+	return crypto.compare_constant_time(derived_tag, tag) == 1
+}
+
+// Context is a concrete instantiation of HMAC with a specific hash
+// algorithm.
+Context :: struct {
+	_o_hash:         hash.Context, // H(k ^ ipad) (not finalized)
+	_i_hash:         hash.Context, // H(k ^ opad) (not finalized)
+	_tag_sz:         int,
+	_is_initialized: bool,
+}
+
+// init initializes a Context with a specific hash Algorithm and key.
+init :: proc(ctx: ^Context, algorithm: hash.Algorithm, key: []byte) {
+	if ctx._is_initialized {
+		reset(ctx)
+	}
+
+	_init_hashes(ctx, algorithm, key)
+
+	ctx._tag_sz = hash.DIGEST_SIZES[algorithm]
+	ctx._is_initialized = true
+}
+
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	assert(ctx._is_initialized)
+
+	hash.update(&ctx._i_hash, data)
+}
+
+// final finalizes the Context, writes the tag to dst, and calls
+// reset on the Context.
+final :: proc(ctx: ^Context, dst: []byte) {
+	assert(ctx._is_initialized)
+
+	defer (reset(ctx))
+
+	if len(dst) != ctx._tag_sz {
+		panic("crypto/hmac: invalid destination tag size")
+	}
+
+	hash.final(&ctx._i_hash, dst) // H((k ^ ipad) || text)
+
+	hash.update(&ctx._o_hash, dst) // H((k ^ opad) || H((k ^ ipad) || text))
+	hash.final(&ctx._o_hash, dst)
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	if ctx == other {
+		return
+	}
+
+	hash.clone(&ctx._o_hash, &other._o_hash)
+	hash.clone(&ctx._i_hash, &other._i_hash)
+	ctx._tag_sz = other._tag_sz
+	ctx._is_initialized = other._is_initialized
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	if !ctx._is_initialized {
+		return
+	}
+
+	hash.reset(&ctx._o_hash)
+	hash.reset(&ctx._i_hash)
+	ctx._tag_sz = 0
+	ctx._is_initialized = false
+}
+
+// algorithm returns the Algorithm used by a Context instance.
+algorithm :: proc(ctx: ^Context) -> hash.Algorithm {
+	assert(ctx._is_initialized)
+
+	return hash.algorithm(&ctx._i_hash)
+}
+
+// tag_size returns the tag size of a Context instance in bytes.
+tag_size :: proc(ctx: ^Context) -> int {
+	assert(ctx._is_initialized)
+
+	return ctx._tag_sz
+}
+
+@(private)
+_I_PAD :: 0x36
+_O_PAD :: 0x5c
+
+@(private)
+_init_hashes :: proc(ctx: ^Context, algorithm: hash.Algorithm, key: []byte) {
+	K0_buf: [hash.MAX_BLOCK_SIZE]byte
+	kPad_buf: [hash.MAX_BLOCK_SIZE]byte
+
+	kLen := len(key)
+	B := hash.BLOCK_SIZES[algorithm]
+	K0 := K0_buf[:B]
+	defer mem.zero_explicit(raw_data(K0), B)
+
+	switch {
+	case kLen == B, kLen < B:
+		// If the length of K = B: set K0 = K.
+		//
+		// If the length of K < B: append zeros to the end of K to
+		// create a B-byte string K0 (e.g., if K is 20 bytes in
+		// length and B = 64, then K will be appended with 44 zero
+		// bytes x’00’).
+		//
+		// K0 is zero-initialized, so the copy handles both cases.
+		copy(K0, key)
+	case kLen > B:
+		// If the length of K > B: hash K to obtain an L byte string,
+		// then append (B-L) zeros to create a B-byte string K0
+		// (i.e., K0 = H(K) || 00...00).
+		tmpCtx := &ctx._o_hash // Saves allocating a hash.Context.
+		hash.init(tmpCtx, algorithm)
+		hash.update(tmpCtx, key)
+		hash.final(tmpCtx, K0)
+	}
+
+	// Initialize the hashes, and write the padded keys:
+	// - ctx._i_hash -> H(K0 ^ ipad)
+	// - ctx._o_hash -> H(K0 ^ opad)
+
+	hash.init(&ctx._o_hash, algorithm)
+	hash.init(&ctx._i_hash, algorithm)
+
+	kPad := kPad_buf[:B]
+	defer mem.zero_explicit(raw_data(kPad), B)
+
+	for v, i in K0 {
+		kPad[i] = v ~ _I_PAD
+	}
+	hash.update(&ctx._i_hash, kPad)
+
+	for v, i in K0 {
+		kPad[i] = v ~ _O_PAD
+	}
+	hash.update(&ctx._o_hash, kPad)
+}

+ 116 - 0
core/crypto/kmac/kmac.odin

@@ -0,0 +1,116 @@
+/*
+package kmac implements the KMAC MAC algorithm.
+
+See:
+- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
+*/
+package kmac
+
+import "../_sha3"
+import "core:crypto"
+import "core:crypto/shake"
+
+// MIN_KEY_SIZE_128 is the minimum key size for KMAC128 in bytes.
+MIN_KEY_SIZE_128 :: 128 / 8
+// MIN_KEY_SIZE_256 is the minimum key size for KMAC256 in bytes.
+MIN_KEY_SIZE_256 :: 256 / 8
+
+// MIN_TAG_SIZE is the absolute minimum tag size for KMAC in bytes (8.4.2).
+// Most callers SHOULD use at least 128-bits if not 256-bits for the tag
+// size.
+MIN_TAG_SIZE :: 32 / 8
+
+// sum will compute the KMAC with the specified security strength,
+// key, and domain separator over msg, and write the computed digest to
+// dst.
+sum :: proc(sec_strength: int, dst, msg, key, domain_sep: []byte) {
+	ctx: Context
+
+	_init_kmac(&ctx, key, domain_sep, sec_strength)
+	update(&ctx, msg)
+	final(&ctx, dst)
+}
+
+// verify will verify the KMAC tag computed with the specified security
+// strength, key and domain separator over msg and return true iff the
+// tag is valid.
+verify :: proc(sec_strength: int, tag, msg, key, domain_sep: []byte, allocator := context.temp_allocator) -> bool {
+	derived_tag := make([]byte, len(tag), allocator)
+
+	sum(sec_strength, derived_tag, msg, key, domain_sep)
+
+	return crypto.compare_constant_time(derived_tag, tag) == 1
+}
+
+// Context is a KMAC instance.
+Context :: distinct shake.Context
+
+// init_128 initializes a Context for KMAC28.  This routine will panic if
+// the key length is less than MIN_KEY_SIZE_128.
+init_128 :: proc(ctx: ^Context, key, domain_sep: []byte) {
+	_init_kmac(ctx, key, domain_sep, 128)
+}
+
+// init_256 initializes a Context for KMAC256.  This routine will panic if
+// the key length is less than MIN_KEY_SIZE_256.
+init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
+	_init_kmac(ctx, key, domain_sep, 256)
+}
+
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	assert(ctx.is_initialized)
+
+	shake.write(transmute(^shake.Context)(ctx), data)
+}
+
+// final finalizes the Context, writes the tag to dst, and calls reset
+// on the Context.  This routine will panic if the dst length is less than
+// MIN_TAG_SIZE.
+final :: proc(ctx: ^Context, dst: []byte) {
+	assert(ctx.is_initialized)
+	defer reset(ctx)
+
+	if len(dst) < MIN_TAG_SIZE {
+		panic("crypto/kmac: invalid KMAC tag_size, too short")
+	}
+
+	_sha3.final_cshake(transmute(^_sha3.Context)(ctx), dst)
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	if ctx == other {
+		return
+	}
+
+	shake.clone(transmute(^shake.Context)(ctx), transmute(^shake.Context)(other))
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	if !ctx.is_initialized {
+		return
+	}
+
+	shake.reset(transmute(^shake.Context)(ctx))
+}
+
+@(private)
+_init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
+	if ctx.is_initialized {
+		reset(ctx)
+	}
+
+	if len(key) < sec_strength / 8 {
+		panic("crypto/kmac: invalid KMAC key, too short")
+	}
+
+	ctx_ := transmute(^_sha3.Context)(ctx)
+	_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
+	_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
+}
+
+@(private)
+N_KMAC := []byte{'K', 'M', 'A', 'C'}

+ 55 - 337
core/crypto/legacy/keccak/keccak.odin

@@ -1,3 +1,11 @@
+/*
+package keccak implements the Keccak hash algorithm family.
+
+During the SHA-3 standardization process, the padding scheme was changed
+thus Keccac and SHA-3 produce different outputs.  Most users should use
+SHA-3 and/or SHAKE instead, however the legacy algorithm is provided for
+backward compatibility purposes.
+*/
 package keccak
 package keccak
 
 
 /*
 /*
@@ -6,372 +14,82 @@ package keccak
 
 
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
-
-    Interface for the Keccak hashing algorithm.
-    This is done because the padding in the SHA3 standard was changed by the NIST, resulting in a different output.
 */
 */
 
 
-import "core:io"
-import "core:os"
-
 import "../../_sha3"
 import "../../_sha3"
 
 
-/*
-    High level API
-*/
-
+// DIGEST_SIZE_224 is the Keccak-224 digest size.
 DIGEST_SIZE_224 :: 28
 DIGEST_SIZE_224 :: 28
+// DIGEST_SIZE_256 is the Keccak-256 digest size.
 DIGEST_SIZE_256 :: 32
 DIGEST_SIZE_256 :: 32
+// DIGEST_SIZE_384 is the Keccak-384 digest size.
 DIGEST_SIZE_384 :: 48
 DIGEST_SIZE_384 :: 48
+// DIGEST_SIZE_512 is the Keccak-512 digest size.
 DIGEST_SIZE_512 :: 64
 DIGEST_SIZE_512 :: 64
 
 
-// hash_string_224 will hash the given input and return the
-// computed hash
-hash_string_224 :: proc(data: string) -> [DIGEST_SIZE_224]byte {
-	return hash_bytes_224(transmute([]byte)(data))
-}
-
-// hash_bytes_224 will hash the given input and return the
-// computed hash
-hash_bytes_224 :: proc(data: []byte) -> [DIGEST_SIZE_224]byte {
-	hash: [DIGEST_SIZE_224]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_224
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
+// BLOCK_SIZE_224 is the Keccak-224 block size in bytes.
+BLOCK_SIZE_224 :: _sha3.RATE_224
+// BLOCK_SIZE_256 is the Keccak-256 block size in bytes.
+BLOCK_SIZE_256 :: _sha3.RATE_256
+// BLOCK_SIZE_384 is the Keccak-384 block size in bytes.
+BLOCK_SIZE_384 :: _sha3.RATE_384
+// BLOCK_SIZE_512 is the Keccak-512 block size in bytes.
+BLOCK_SIZE_512 :: _sha3.RATE_512
 
 
-// hash_string_to_buffer_224 will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer_224 :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer_224(transmute([]byte)(data), hash)
-}
+// Context is a Keccak instance.
+Context :: distinct _sha3.Context
 
 
-// hash_bytes_to_buffer_224 will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer_224 :: proc(data, hash: []byte) {
-	ctx: Context
+// init_224 initializes a Context for Keccak-224.
+init_224 :: proc(ctx: ^Context) {
 	ctx.mdlen = DIGEST_SIZE_224
 	ctx.mdlen = DIGEST_SIZE_224
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream_224 will read the stream in chunks and compute a
-// hash from its contents
-hash_stream_224 :: proc(s: io.Stream) -> ([DIGEST_SIZE_224]byte, bool) {
-	hash: [DIGEST_SIZE_224]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_224
-	ctx.is_keccak = true
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
+	_init(ctx)
 }
 }
 
 
-// hash_file_224 will read the file provided by the given handle
-// and compute a hash
-hash_file_224 :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE_224]byte, bool) {
-	if !load_at_once {
-		return hash_stream_224(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes_224(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE_224]byte{}, false
-}
-
-hash_224 :: proc {
-	hash_stream_224,
-	hash_file_224,
-	hash_bytes_224,
-	hash_string_224,
-	hash_bytes_to_buffer_224,
-	hash_string_to_buffer_224,
-}
-
-// hash_string_256 will hash the given input and return the
-// computed hash
-hash_string_256 :: proc(data: string) -> [DIGEST_SIZE_256]byte {
-	return hash_bytes_256(transmute([]byte)(data))
-}
-
-// hash_bytes_256 will hash the given input and return the
-// computed hash
-hash_bytes_256 :: proc(data: []byte) -> [DIGEST_SIZE_256]byte {
-	hash: [DIGEST_SIZE_256]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_256
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer_256 will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer_256 :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer_256(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer_256 will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer_256 :: proc(data, hash: []byte) {
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_256
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream_256 will read the stream in chunks and compute a
-// hash from its contents
-hash_stream_256 :: proc(s: io.Stream) -> ([DIGEST_SIZE_256]byte, bool) {
-	hash: [DIGEST_SIZE_256]byte
-	ctx: Context
+// init_256 initializes a Context for Keccak-256.
+init_256 :: proc(ctx: ^Context) {
 	ctx.mdlen = DIGEST_SIZE_256
 	ctx.mdlen = DIGEST_SIZE_256
-	ctx.is_keccak = true
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
-}
-
-// hash_file_256 will read the file provided by the given handle
-// and compute a hash
-hash_file_256 :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE_256]byte, bool) {
-	if !load_at_once {
-		return hash_stream_256(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes_256(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE_256]byte{}, false
+	_init(ctx)
 }
 }
 
 
-hash_256 :: proc {
-	hash_stream_256,
-	hash_file_256,
-	hash_bytes_256,
-	hash_string_256,
-	hash_bytes_to_buffer_256,
-	hash_string_to_buffer_256,
-}
-
-// hash_string_384 will hash the given input and return the
-// computed hash
-hash_string_384 :: proc(data: string) -> [DIGEST_SIZE_384]byte {
-	return hash_bytes_384(transmute([]byte)(data))
-}
-
-// hash_bytes_384 will hash the given input and return the
-// computed hash
-hash_bytes_384 :: proc(data: []byte) -> [DIGEST_SIZE_384]byte {
-	hash: [DIGEST_SIZE_384]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_384
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer_384 will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer_384 :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer_384(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer_384 will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer_384 :: proc(data, hash: []byte) {
-	ctx: Context
+// init_384 initializes a Context for Keccak-384.
+init_384 :: proc(ctx: ^Context) {
 	ctx.mdlen = DIGEST_SIZE_384
 	ctx.mdlen = DIGEST_SIZE_384
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream_384 will read the stream in chunks and compute a
-// hash from its contents
-hash_stream_384 :: proc(s: io.Stream) -> ([DIGEST_SIZE_384]byte, bool) {
-	hash: [DIGEST_SIZE_384]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_384
-	ctx.is_keccak = true
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
-}
-
-// hash_file_384 will read the file provided by the given handle
-// and compute a hash
-hash_file_384 :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE_384]byte, bool) {
-	if !load_at_once {
-		return hash_stream_384(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes_384(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE_384]byte{}, false
-}
-
-hash_384 :: proc {
-	hash_stream_384,
-	hash_file_384,
-	hash_bytes_384,
-	hash_string_384,
-	hash_bytes_to_buffer_384,
-	hash_string_to_buffer_384,
+	_init(ctx)
 }
 }
 
 
-// hash_string_512 will hash the given input and return the
-// computed hash
-hash_string_512 :: proc(data: string) -> [DIGEST_SIZE_512]byte {
-	return hash_bytes_512(transmute([]byte)(data))
-}
-
-// hash_bytes_512 will hash the given input and return the
-// computed hash
-hash_bytes_512 :: proc(data: []byte) -> [DIGEST_SIZE_512]byte {
-	hash: [DIGEST_SIZE_512]byte
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_512
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer_512 will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer_512 :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer_512(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer_512 will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer_512 :: proc(data, hash: []byte) {
-	ctx: Context
-	ctx.mdlen = DIGEST_SIZE_512
-	ctx.is_keccak = true
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream_512 will read the stream in chunks and compute a
-// hash from its contents
-hash_stream_512 :: proc(s: io.Stream) -> ([DIGEST_SIZE_512]byte, bool) {
-	hash: [DIGEST_SIZE_512]byte
-	ctx: Context
+// init_512 initializes a Context for Keccak-512.
+init_512 :: proc(ctx: ^Context) {
 	ctx.mdlen = DIGEST_SIZE_512
 	ctx.mdlen = DIGEST_SIZE_512
-	ctx.is_keccak = true
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
+	_init(ctx)
 }
 }
 
 
-// hash_file_512 will read the file provided by the given handle
-// and compute a hash
-hash_file_512 :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE_512]byte, bool) {
-	if !load_at_once {
-		return hash_stream_512(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes_512(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE_512]byte{}, false
+@(private)
+_init :: proc(ctx: ^Context) {
+	ctx.dsbyte = _sha3.DS_KECCAK
+	_sha3.init(transmute(^_sha3.Context)(ctx))
 }
 }
 
 
-hash_512 :: proc {
-	hash_stream_512,
-	hash_file_512,
-	hash_bytes_512,
-	hash_string_512,
-	hash_bytes_to_buffer_512,
-	hash_string_to_buffer_512,
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	_sha3.update(transmute(^_sha3.Context)(ctx), data)
 }
 }
 
 
-/*
-    Low level API
-*/
-
-Context :: _sha3.Sha3_Context
-
-init :: proc(ctx: ^Context) {
-	ctx.is_keccak = true
-	_sha3.init(ctx)
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	_sha3.final(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
 }
 }
 
 
-update :: proc(ctx: ^Context, data: []byte) {
-	_sha3.update(ctx, data)
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
 }
 }
 
 
-final :: proc(ctx: ^Context, hash: []byte) {
-	_sha3.final(ctx, hash)
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	_sha3.reset(transmute(^_sha3.Context)(ctx))
 }
 }

+ 50 - 98
core/crypto/legacy/md5/md5.odin

@@ -1,3 +1,13 @@
+/*
+package md5 implements the MD5 hash algorithm.
+
+WARNING: The MD5 algorithm is known to be insecure and should only be
+used for interoperating with legacy applications.
+
+See:
+- https://eprint.iacr.org/2005/075
+- https://datatracker.ietf.org/doc/html/rfc1321
+*/
 package md5
 package md5
 
 
 /*
 /*
@@ -6,103 +16,29 @@ package md5
 
 
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
-
-    Implementation of the MD5 hashing algorithm, as defined in RFC 1321 <https://datatracker.ietf.org/doc/html/rfc1321>
 */
 */
 
 
 import "core:encoding/endian"
 import "core:encoding/endian"
-import "core:io"
 import "core:math/bits"
 import "core:math/bits"
 import "core:mem"
 import "core:mem"
-import "core:os"
-
-/*
-    High level API
-*/
 
 
+// DIGEST_SIZE is the MD5 digest size in bytes.
 DIGEST_SIZE :: 16
 DIGEST_SIZE :: 16
 
 
-// hash_string will hash the given input and return the
-// computed hash
-hash_string :: proc(data: string) -> [DIGEST_SIZE]byte {
-	return hash_bytes(transmute([]byte)(data))
-}
-
-// hash_bytes will hash the given input and return the
-// computed hash
-hash_bytes :: proc(data: []byte) -> [DIGEST_SIZE]byte {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer :: proc(data, hash: []byte) {
-	ctx: Context
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream will read the stream in chunks and compute a
-// hash from its contents
-hash_stream :: proc(s: io.Stream) -> ([DIGEST_SIZE]byte, bool) {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
-}
+// BLOCK_SIZE is the MD5 block size in bytes.
+BLOCK_SIZE :: 64
 
 
-// hash_file will read the file provided by the given handle
-// and compute a hash
-hash_file :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE]byte, bool) {
-	if !load_at_once {
-		return hash_stream(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE]byte{}, false
-}
+// Context is a MD5 instance.
+Context :: struct {
+	data:    [BLOCK_SIZE]byte,
+	state:   [4]u32,
+	bitlen:  u64,
+	datalen: u32,
 
 
-hash :: proc {
-	hash_stream,
-	hash_file,
-	hash_bytes,
-	hash_string,
-	hash_bytes_to_buffer,
-	hash_string_to_buffer,
+	is_initialized: bool,
 }
 }
 
 
-/*
-    Low level API
-*/
-
+// init initializes a Context.
 init :: proc(ctx: ^Context) {
 init :: proc(ctx: ^Context) {
 	ctx.state[0] = 0x67452301
 	ctx.state[0] = 0x67452301
 	ctx.state[1] = 0xefcdab89
 	ctx.state[1] = 0xefcdab89
@@ -115,6 +51,7 @@ init :: proc(ctx: ^Context) {
 	ctx.is_initialized = true
 	ctx.is_initialized = true
 }
 }
 
 
+// update adds more data to the Context.
 update :: proc(ctx: ^Context, data: []byte) {
 update :: proc(ctx: ^Context, data: []byte) {
 	assert(ctx.is_initialized)
 	assert(ctx.is_initialized)
 
 
@@ -129,13 +66,26 @@ update :: proc(ctx: ^Context, data: []byte) {
 	}
 	}
 }
 }
 
 
-final :: proc(ctx: ^Context, hash: []byte) {
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 	assert(ctx.is_initialized)
 	assert(ctx.is_initialized)
 
 
 	if len(hash) < DIGEST_SIZE {
 	if len(hash) < DIGEST_SIZE {
 		panic("crypto/md5: invalid destination digest size")
 		panic("crypto/md5: invalid destination digest size")
 	}
 	}
 
 
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: Context
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer(reset(ctx))
+
 	i := ctx.datalen
 	i := ctx.datalen
 
 
 	if ctx.datalen < 56 {
 	if ctx.datalen < 56 {
@@ -163,25 +113,27 @@ final :: proc(ctx: ^Context, hash: []byte) {
 	for i = 0; i < DIGEST_SIZE / 4; i += 1 {
 	for i = 0; i < DIGEST_SIZE / 4; i += 1 {
 		endian.unchecked_put_u32le(hash[i * 4:], ctx.state[i])
 		endian.unchecked_put_u32le(hash[i * 4:], ctx.state[i])
 	}
 	}
+}
 
 
-	ctx.is_initialized = false
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^$T) {
+	ctx^ = other^
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^$T) {
+	if !ctx.is_initialized {
+		return
+	}
+
+	mem.zero_explicit(ctx, size_of(ctx^))
 }
 }
 
 
 /*
 /*
     MD5 implementation
     MD5 implementation
 */
 */
 
 
-BLOCK_SIZE :: 64
-
-Context :: struct {
-	data:    [BLOCK_SIZE]byte,
-	state:   [4]u32,
-	bitlen:  u64,
-	datalen: u32,
-
-	is_initialized: bool,
-}
-
 /*
 /*
     @note(zh): F, G, H and I, as mentioned in the RFC, have been inlined into FF, GG, HH
     @note(zh): F, G, H and I, as mentioned in the RFC, have been inlined into FF, GG, HH
     and II respectively, instead of declaring them separately.
     and II respectively, instead of declaring them separately.

+ 52 - 99
core/crypto/legacy/sha1/sha1.odin

@@ -1,3 +1,14 @@
+/*
+package sha1 implements the SHA1 hash algorithm.
+
+WARNING: The SHA1 algorithm is known to be insecure and should only be
+used for interoperating with legacy applications.
+
+See:
+- https://eprint.iacr.org/2017/190
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
+- https://datatracker.ietf.org/doc/html/rfc3174
+*/
 package sha1
 package sha1
 
 
 /*
 /*
@@ -6,103 +17,30 @@ package sha1
 
 
     List of contributors:
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
         zhibog, dotbmp:  Initial implementation.
-
-    Implementation of the SHA1 hashing algorithm, as defined in RFC 3174 <https://datatracker.ietf.org/doc/html/rfc3174>
 */
 */
 
 
 import "core:encoding/endian"
 import "core:encoding/endian"
-import "core:io"
 import "core:math/bits"
 import "core:math/bits"
 import "core:mem"
 import "core:mem"
-import "core:os"
-
-/*
-    High level API
-*/
 
 
+// DIGEST_SIZE is the SHA1 digest size in bytes.
 DIGEST_SIZE :: 20
 DIGEST_SIZE :: 20
 
 
-// hash_string will hash the given input and return the
-// computed hash
-hash_string :: proc(data: string) -> [DIGEST_SIZE]byte {
-	return hash_bytes(transmute([]byte)(data))
-}
-
-// hash_bytes will hash the given input and return the
-// computed hash
-hash_bytes :: proc(data: []byte) -> [DIGEST_SIZE]byte {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash[:])
-	return hash
-}
-
-// hash_string_to_buffer will hash the given input and assign the
-// computed hash to the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_string_to_buffer :: proc(data: string, hash: []byte) {
-	hash_bytes_to_buffer(transmute([]byte)(data), hash)
-}
-
-// hash_bytes_to_buffer will hash the given input and write the
-// computed hash into the second parameter.
-// It requires that the destination buffer is at least as big as the digest size
-hash_bytes_to_buffer :: proc(data, hash: []byte) {
-	ctx: Context
-	init(&ctx)
-	update(&ctx, data)
-	final(&ctx, hash)
-}
-
-// hash_stream will read the stream in chunks and compute a
-// hash from its contents
-hash_stream :: proc(s: io.Stream) -> ([DIGEST_SIZE]byte, bool) {
-	hash: [DIGEST_SIZE]byte
-	ctx: Context
-	init(&ctx)
-
-	buf := make([]byte, 512)
-	defer delete(buf)
-
-	read := 1
-	for read > 0 {
-		read, _ = io.read(s, buf)
-		if read > 0 {
-			update(&ctx, buf[:read])
-		}
-	}
-	final(&ctx, hash[:])
-	return hash, true
-}
+// BLOCK_SIZE is the SHA1 block size in bytes.
+BLOCK_SIZE :: 64
 
 
-// hash_file will read the file provided by the given handle
-// and compute a hash
-hash_file :: proc(hd: os.Handle, load_at_once := false) -> ([DIGEST_SIZE]byte, bool) {
-	if !load_at_once {
-		return hash_stream(os.stream_from_handle(hd))
-	} else {
-		if buf, ok := os.read_entire_file(hd); ok {
-			return hash_bytes(buf[:]), ok
-		}
-	}
-	return [DIGEST_SIZE]byte{}, false
-}
+// Context is a SHA1 instance.
+Context :: struct {
+	data:    [BLOCK_SIZE]byte,
+	state:   [5]u32,
+	k:       [4]u32,
+	bitlen:  u64,
+	datalen: u32,
 
 
-hash :: proc {
-	hash_stream,
-	hash_file,
-	hash_bytes,
-	hash_string,
-	hash_bytes_to_buffer,
-	hash_string_to_buffer,
+	is_initialized: bool,
 }
 }
 
 
-/*
-    Low level API
-*/
-
+// init initializes a Context.
 init :: proc(ctx: ^Context) {
 init :: proc(ctx: ^Context) {
 	ctx.state[0] = 0x67452301
 	ctx.state[0] = 0x67452301
 	ctx.state[1] = 0xefcdab89
 	ctx.state[1] = 0xefcdab89
@@ -120,6 +58,7 @@ init :: proc(ctx: ^Context) {
 	ctx.is_initialized = true
 	ctx.is_initialized = true
 }
 }
 
 
+// update adds more data to the Context.
 update :: proc(ctx: ^Context, data: []byte) {
 update :: proc(ctx: ^Context, data: []byte) {
 	assert(ctx.is_initialized)
 	assert(ctx.is_initialized)
 
 
@@ -134,13 +73,26 @@ update :: proc(ctx: ^Context, data: []byte) {
 	}
 	}
 }
 }
 
 
-final :: proc(ctx: ^Context, hash: []byte) {
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 	assert(ctx.is_initialized)
 	assert(ctx.is_initialized)
 
 
 	if len(hash) < DIGEST_SIZE {
 	if len(hash) < DIGEST_SIZE {
 		panic("crypto/sha1: invalid destination digest size")
 		panic("crypto/sha1: invalid destination digest size")
 	}
 	}
 
 
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: Context
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer(reset(ctx))
+
 	i := ctx.datalen
 	i := ctx.datalen
 
 
 	if ctx.datalen < 56 {
 	if ctx.datalen < 56 {
@@ -168,26 +120,27 @@ final :: proc(ctx: ^Context, hash: []byte) {
 	for i = 0; i < DIGEST_SIZE / 4; i += 1 {
 	for i = 0; i < DIGEST_SIZE / 4; i += 1 {
 		endian.unchecked_put_u32be(hash[i * 4:], ctx.state[i])
 		endian.unchecked_put_u32be(hash[i * 4:], ctx.state[i])
 	}
 	}
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^$T) {
+	ctx^ = other^
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^$T) {
+	if !ctx.is_initialized {
+		return
+	}
 
 
-	ctx.is_initialized = false
+	mem.zero_explicit(ctx, size_of(ctx^))
 }
 }
 
 
 /*
 /*
     SHA1 implementation
     SHA1 implementation
 */
 */
 
 
-BLOCK_SIZE :: 64
-
-Context :: struct {
-	data:    [BLOCK_SIZE]byte,
-	datalen: u32,
-	bitlen:  u64,
-	state:   [5]u32,
-	k:       [4]u32,
-
-	is_initialized: bool,
-}
-
 @(private)
 @(private)
 transform :: proc "contextless" (ctx: ^Context, data: []byte) {
 transform :: proc "contextless" (ctx: ^Context, data: []byte) {
 	a, b, c, d, e, i, t: u32
 	a, b, c, d, e, i, t: u32

+ 122 - 0
core/crypto/pbkdf2/pbkdf2.odin

@@ -0,0 +1,122 @@
+/*
+package pbkdf2 implements the PBKDF2 password-based key derivation function.
+
+See: https://www.rfc-editor.org/rfc/rfc2898
+*/
+package pbkdf2
+
+import "core:crypto/hash"
+import "core:crypto/hmac"
+import "core:encoding/endian"
+import "core:mem"
+
+// derive invokes PBKDF2-HMAC with the specified hash algorithm, password,
+// salt, iteration count, and outputs the derived key to dst.
+derive :: proc(
+	hmac_hash: hash.Algorithm,
+	password: []byte,
+	salt: []byte,
+	iterations: u32,
+	dst: []byte,
+) {
+	h_len := hash.DIGEST_SIZES[hmac_hash]
+
+	// 1. If dkLen > (2^32 - 1) * hLen, output "derived key too long"
+	// and stop.
+
+	dk_len := len(dst)
+	switch {
+	case dk_len == 0:
+		return
+	case u64(dk_len) > u64(max(u32)) * u64(h_len):
+		// This is so beyond anything that is practical or reasonable,
+		// so just panic instead of returning an error.
+		panic("crypto/pbkdf2: derived key too long")
+	case:
+	}
+
+	// 2. Let l be the number of hLen-octet blocks in the derived key,
+	// rounding up, and let r be the number of octets in the last block.
+
+	l := dk_len / h_len // Don't need to round up.
+	r := dk_len % h_len
+
+	// 3. For each block of the derived key apply the function F defined
+	// below to the password P, the salt S, the iteration count c, and
+	// the block index to compute the block.
+	//
+	// 4. Concatenate the blocks and extract the first dkLen octets to
+	// produce a derived key DK.
+	//
+	// 5. Output the derived key DK.
+
+	// Each iteration of F is always `PRF (P, ...)`, so instantiate the
+	// PRF, and clone since memcpy is faster than having to re-initialize
+	// HMAC repeatedly.
+
+	base: hmac.Context
+	defer hmac.reset(&base)
+
+	hmac.init(&base, hmac_hash, password)
+
+	// Process all of the blocks that will be written directly to dst.
+	dst_blk := dst
+	for i in 1 ..= l { 	// F expects i starting at 1.
+		_F(&base, salt, iterations, u32(i), dst_blk[:h_len])
+		dst_blk = dst_blk[h_len:]
+	}
+
+	// Instead of rounding l up, just proceass the one extra block iff
+	// r != 0.
+	if r > 0 {
+		tmp: [hash.MAX_DIGEST_SIZE]byte
+		blk := tmp[:h_len]
+		defer mem.zero_explicit(raw_data(blk), h_len)
+
+		_F(&base, salt, iterations, u32(l + 1), blk)
+		copy(dst_blk, blk)
+	}
+}
+
+@(private)
+_F :: proc(base: ^hmac.Context, salt: []byte, c: u32, i: u32, dst_blk: []byte) {
+	h_len := len(dst_blk)
+
+	tmp: [hash.MAX_DIGEST_SIZE]byte
+	u := tmp[:h_len]
+	defer mem.zero_explicit(raw_data(u), h_len)
+
+	// F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c
+	//
+	// where
+	//
+	// U_1 = PRF (P, S || INT (i)) ,
+	// U_2 = PRF (P, U_1) ,
+	// ...
+	// U_c = PRF (P, U_{c-1}) .
+	//
+	// Here, INT (i) is a four-octet encoding of the integer i, most
+	// significant octet first.
+
+	prf: hmac.Context
+
+	// U_1: PRF (P, S || INT (i))
+	hmac.clone(&prf, base)
+	hmac.update(&prf, salt)
+	endian.unchecked_put_u32be(u, i) // Use u as scratch space.
+	hmac.update(&prf, u[:4])
+	hmac.final(&prf, u)
+	copy(dst_blk, u)
+
+	// U_2 ... U_c: U_n = PRF (P, U_(n-1))
+	for _ in 1 ..< c {
+		hmac.clone(&prf, base)
+		hmac.update(&prf, u)
+		hmac.final(&prf, u)
+
+		// XOR dst_blk and u.
+		for v, i in u {
+			dst_blk[i] ~= v
+		}
+	}
+}

Some files were not shown because too many files changed in this diff