Browse Source

Small AES tweak

Adam Ierymenko 5 years ago
parent
commit
56bf504ec2
1 changed files with 36 additions and 33 deletions
  1. 36 33
      node/AES.cpp

+ 36 - 33
node/AES.cpp

@@ -478,7 +478,8 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept
 		uint64_t c1 = Utils::ntoh(_ctr[1]);
 
 		// There are 16 XMM registers. We can reserve six of them for the
-		// first six parts of the expanded AES key.
+		// first six parts of the expanded AES key. The rest are used for
+		// other key material, counter, or data depending on the chunk size.
 		const __m128i k0 = _aes._k.ni.k[0];
 		const __m128i k1 = _aes._k.ni.k[1];
 		const __m128i k2 = _aes._k.ni.k[2];
@@ -642,41 +643,43 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept
 			out += 64;
 		}
 
-		while (len >= 16) {
-			__m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1),(long long)c0);
-			d0 = _mm_xor_si128(d0,k0);
-			d0 = _mm_aesenc_si128(d0,k1);
+		{
 			__m128i ka = _aes._k.ni.k[6];
-			d0 = _mm_aesenc_si128(d0,k2);
 			__m128i kb = _aes._k.ni.k[7];
-			d0 = _mm_aesenc_si128(d0,k3);
-			__m128i kc = _aes._k.ni.k[8];
-			d0 = _mm_aesenc_si128(d0,k4);
-			__m128i kd = _aes._k.ni.k[9];
-			d0 = _mm_aesenc_si128(d0,k5);
-			__m128i ke = _aes._k.ni.k[10];
-			d0 = _mm_aesenc_si128(d0,ka);
-			__m128i kf = _aes._k.ni.k[11];
-			d0 = _mm_aesenc_si128(d0,kb);
-			__m128i kg = _aes._k.ni.k[12];
-			d0 = _mm_aesenc_si128(d0,kc);
-			__m128i p0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in));
-			d0 = _mm_aesenc_si128(d0,kd);
-			__m128i kh = _aes._k.ni.k[13];
-			d0 = _mm_aesenc_si128(d0,ke);
-			ka = _aes._k.ni.k[14];
-			d0 = _mm_aesenc_si128(d0,kf);
-			d0 = _mm_aesenc_si128(d0,kg);
-			d0 = _mm_aesenc_si128(d0,kh);
-			d0 = _mm_aesenclast_si128(d0,ka);
-			p0 = _mm_xor_si128(d0,p0);
-			_mm_storeu_si128(reinterpret_cast<__m128i *>(out),p0);
+			const __m128i kc = _aes._k.ni.k[8];
+			const __m128i kd = _aes._k.ni.k[9];
+			const __m128i ke = _aes._k.ni.k[10];
+			const __m128i kf = _aes._k.ni.k[11];
+			const __m128i kg = _aes._k.ni.k[12];
+			const __m128i kh = _aes._k.ni.k[13];
+			while (len >= 16) {
+				__m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1),(long long)c0);
+				d0 = _mm_xor_si128(d0,k0);
+				d0 = _mm_aesenc_si128(d0,k1);
+				d0 = _mm_aesenc_si128(d0,k2);
+				d0 = _mm_aesenc_si128(d0,k3);
+				d0 = _mm_aesenc_si128(d0,k4);
+				d0 = _mm_aesenc_si128(d0,k5);
+				d0 = _mm_aesenc_si128(d0,ka);
+				d0 = _mm_aesenc_si128(d0,kb);
+				d0 = _mm_aesenc_si128(d0,kc);
+				d0 = _mm_aesenc_si128(d0,kd);
+				ka = _aes._k.ni.k[14];
+				d0 = _mm_aesenc_si128(d0,ke);
+				d0 = _mm_aesenc_si128(d0,kf);
+				d0 = _mm_aesenc_si128(d0,kg);
+				d0 = _mm_aesenc_si128(d0,kh);
+				kb = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in));
+				d0 = _mm_aesenclast_si128(d0,ka);
+				kb = _mm_xor_si128(d0,kb);
+				_mm_storeu_si128(reinterpret_cast<__m128i *>(out),kb);
+
+				in += 16;
+				len -= 16;
+				out += 16;
 
-			in += 16;
-			len -= 16;
-			out += 16;
-
-			if (unlikely(++c1 == 0ULL)) c0 = Utils::hton(Utils::ntoh(c0) + 1ULL);
+				if (unlikely(++c1 == 0ULL)) c0 = Utils::hton(Utils::ntoh(c0) + 1ULL);
+			}
 		}
 
 		// Any remaining input is placed in _out. This will be picked up and crypted