2
0
Эх сурвалжийг харах

Do not always enable SSE4 on X64 due to old Atom chips. Enable instead only for AES-NI code which is only run if AES-NI is present, which it is not on these old chips.

Adam Ierymenko 4 жил өмнө
parent
commit
2c75be0d64
2 өөрчлөгдсөн 9 нэмэгдсэн , 4 устгасан
  1. 4 4
      make-linux.mk
  2. 5 0
      node/AES.cpp

+ 4 - 4
make-linux.mk

@@ -119,15 +119,15 @@ ifeq ($(CC_MACH),x86_64)
 	ZT_ARCHITECTURE=2
 	ZT_USE_X64_ASM_SALSA=1
 	ZT_USE_X64_ASM_ED25519=1
-	override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
-	override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
+	override CFLAGS+=-msse -msse2 -maes -mpclmul
+	override CXXFLAGS+=-msse -msse2 -maes -mpclmul
 endif
 ifeq ($(CC_MACH),amd64)
 	ZT_ARCHITECTURE=2
 	ZT_USE_X64_ASM_SALSA=1
 	ZT_USE_X64_ASM_ED25519=1
-	override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
-	override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
+	override CFLAGS+=-msse -msse2 -maes -mpclmul
+	override CXXFLAGS+=-msse -msse2 -maes -mpclmul
 endif
 ifeq ($(CC_MACH),powerpc64le)
 	ZT_ARCHITECTURE=8

+ 5 - 0
node/AES.cpp

@@ -146,6 +146,7 @@ void s_gfmul(const uint64_t hh, const uint64_t hl, uint64_t &y0, uint64_t &y1) n
 // SSE shuffle parameter to reverse bytes in a 128-bit vector.
 static const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 
+__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
 static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
 {
 	y = _mm_shuffle_epi8(y, s_sseSwapBytes);
@@ -169,6 +170,7 @@ static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
 
 #endif
 
+__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
 void AES::GMAC::update(const void *const data, unsigned int len) noexcept
 {
 	const uint8_t *in = reinterpret_cast<const uint8_t *>(data);
@@ -322,6 +324,7 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept
 	_rp = len; // len is always less than 16 here
 }
 
+__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
 void AES::GMAC::finish(uint8_t tag[16]) noexcept
 {
 #ifdef ZT_AES_AESNI
@@ -593,6 +596,7 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
 
 #endif // ZT_AES_AESNI
 
+__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
 void AES::CTR::crypt(const void *const input, unsigned int len) noexcept
 {
 	const uint8_t *in = reinterpret_cast<const uint8_t *>(input);
@@ -1473,6 +1477,7 @@ static __m128i _init256_2_aesni(__m128i a, __m128i b) noexcept
 	return x;
 }
 
+__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
 void AES::_init_aesni(const uint8_t key[32]) noexcept
 {
 	__m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13;