aes_ecb_hw_intel.odin 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #+build amd64
  2. package aes
  3. import "base:intrinsics"
  4. import "core:crypto/_aes"
  5. import "core:simd/x86"
  6. @(private, enable_target_feature = "sse2,aes")
  7. encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
  8. blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
  9. blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[0])))
  10. #unroll for i in 1 ..= 9 {
  11. blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
  12. }
  13. switch ctx._num_rounds {
  14. case _aes.ROUNDS_128:
  15. blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[10])))
  16. case _aes.ROUNDS_192:
  17. #unroll for i in 10 ..= 11 {
  18. blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
  19. }
  20. blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[12])))
  21. case _aes.ROUNDS_256:
  22. #unroll for i in 10 ..= 13 {
  23. blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
  24. }
  25. blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[14])))
  26. }
  27. intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
  28. }
  29. @(private, enable_target_feature = "sse2,aes")
  30. decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
  31. blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
  32. blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[0])))
  33. #unroll for i in 1 ..= 9 {
  34. blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
  35. }
  36. switch ctx._num_rounds {
  37. case _aes.ROUNDS_128:
  38. blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[10])))
  39. case _aes.ROUNDS_192:
  40. #unroll for i in 10 ..= 11 {
  41. blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
  42. }
  43. blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[12])))
  44. case _aes.ROUNDS_256:
  45. #unroll for i in 10 ..= 13 {
  46. blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
  47. }
  48. blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[14])))
  49. }
  50. intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
  51. }