소스 검색

Likely fix for some alignment issues on ARM.

Adam Ierymenko 4 년 전
부모
커밋
9d66d876f4
3개의 변경된 파일20개의 추가작업 그리고 26개의 파일을 삭제
  1. 6 16
      node/AES.cpp
  2. 8 9
      node/AES_armcrypto.cpp
  3. 6 1
      node/Constants.hpp

+ 6 - 16
node/AES.cpp

@@ -149,22 +149,12 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept
 		}
 	}
 
-	if (likely(((uintptr_t)in & 7U) == 0U)) {
-		while (len >= 16) {
-			y0 ^= *reinterpret_cast<const uint64_t *>(in);
-			y1 ^= *reinterpret_cast<const uint64_t *>(in + 8);
-			in += 16;
-			s_gfmul(h0, h1, y0, y1);
-			len -= 16;
-		}
-	} else {
-		while (len >= 16) {
-			y0 ^= Utils::loadMachineEndian< uint64_t >(in);
-			y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
-			in += 16;
-			s_gfmul(h0, h1, y0, y1);
-			len -= 16;
-		}
+	while (len >= 16) {
+		y0 ^= Utils::loadMachineEndian< uint64_t >(in);
+		y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
+		in += 16;
+		s_gfmul(h0, h1, y0, y1);
+		len -= 16;
 	}
 
 	_y[0] = y0;

+ 8 - 9
node/AES_armcrypto.cpp

@@ -131,7 +131,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 	uint8x16_t k14 = _aes.p_k.neon.ek[14];
 
 	unsigned int totalLen = _len;
-	if ((totalLen & 15U)) {
+	if ((totalLen & 15U) != 0) {
 		for (;;) {
 			if (unlikely(!len)) {
 				vst1q_u8(reinterpret_cast<uint8_t *>(_ctr), vrev32q_u8(dd));
@@ -140,7 +140,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			}
 			--len;
 			out[totalLen++] = *(in++);
-			if (!(totalLen & 15U)) {
+			if ((totalLen & 15U) == 0) {
 				uint8_t *const otmp = out + (totalLen - 16);
 				uint8x16_t d0 = vrev32q_u8(dd);
 				uint8x16_t pt = vld1q_u8(otmp);
@@ -180,7 +180,10 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			uint8x16_t d2 = vrev32q_u8(dd2);
 			uint8x16_t d3 = vrev32q_u8(dd3);
 			uint8x16_t pt0 = vld1q_u8(in);
-			in += 16;
+			uint8x16_t pt1 = vld1q_u8(in + 16);
+			uint8x16_t pt2 = vld1q_u8(in + 16);
+			uint8x16_t pt3 = vld1q_u8(in + 16);
+
 			d0 = vaesmcq_u8(vaeseq_u8(d0, k0));
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k0));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k0));
@@ -193,8 +196,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k2));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k2));
 			d3 = vaesmcq_u8(vaeseq_u8(d3, k2));
-			uint8x16_t pt1 = vld1q_u8(in);
-			in += 16;
 			d0 = vaesmcq_u8(vaeseq_u8(d0, k3));
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k3));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k3));
@@ -207,8 +208,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k5));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k5));
 			d3 = vaesmcq_u8(vaeseq_u8(d3, k5));
-			uint8x16_t pt2 = vld1q_u8(in);
-			in += 16;
 			d0 = vaesmcq_u8(vaeseq_u8(d0, k6));
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k6));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k6));
@@ -221,8 +220,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k8));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k8));
 			d3 = vaesmcq_u8(vaeseq_u8(d3, k8));
-			uint8x16_t pt3 = vld1q_u8(in);
-			in += 16;
 			d0 = vaesmcq_u8(vaeseq_u8(d0, k9));
 			d1 = vaesmcq_u8(vaeseq_u8(d1, k9));
 			d2 = vaesmcq_u8(vaeseq_u8(d2, k9));
@@ -253,7 +250,9 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
 			vst1q_u8(out + 16, d1);
 			vst1q_u8(out + 32, d2);
 			vst1q_u8(out + 48, d3);
+
 			out += 64;
+			in += 64;
 
 			dd = (uint8x16_t)vaddq_u32((uint32x4_t)dd, four);
 			if (unlikely(len < 64))

+ 6 - 1
node/Constants.hpp

@@ -126,7 +126,12 @@
 // Define ZT_NO_TYPE_PUNNING to disable reckless casts on anything other than x86/x64.
 #if (!(defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) || defined(__X86__) || defined(_X86_) || defined(__I86__) || defined(__INTEL__) || defined(__386)))
 #ifndef ZT_NO_TYPE_PUNNING
-#define ZT_NO_TYPE_PUNNING
+#define ZT_NO_TYPE_PUNNING 1
+#endif
+#endif
+#ifdef ZT_NO_TYPE_PUNNING
+#ifndef ZT_NO_UNALIGNED_ACCESS
+#define ZT_NO_UNALIGNED_ACCESS 1
 #endif
 #endif