浏览代码

ARM NEON Salsa20/12 in build and selftest. Almost 2X speedup on a Raspberry Pi.

Adam Ierymenko 8 年之前
父节点
当前提交
a376bcc654
共有 3 个文件被更改,包括 42 次插入2 次删除
  1. 8 0
      ext/arm32-neon-salsa2012-asm/salsa2012.h
  2. 12 1
      make-linux.mk
  3. 22 1
      selftest.cpp

+ 8 - 0
ext/arm32-neon-salsa2012-asm/salsa2012.h

@@ -1,6 +1,14 @@
 #ifndef ZT_SALSA2012_ARM32NEON_ASM
 #define ZT_SALSA2012_ARM32NEON_ASM
 
+#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux)
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+#define zt_arm_has_neon() (getauxval(AT_HWCAP) & HWCAP_NEON)
+#else
+#define zt_arm_has_neon() (true)
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif

+ 12 - 1
make-linux.mk

@@ -98,30 +98,37 @@ endif
 ifeq ($(CC_MACH),arm)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armel)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armhf)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armv6)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armv6zk)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armv6kz)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),armv7)
         ZT_ARCHITECTURE=3
 	override DEFS+=-DZT_NO_TYPE_PUNNING
+	ZT_USE_ARM32_NEON_ASM_SALSA2012=1
 endif
 ifeq ($(CC_MACH),arm64)
         ZT_ARCHITECTURE=4
@@ -158,11 +165,15 @@ endif
 # Disable software updates by default on Linux since that is normally done with package management
 override DEFS+=-DZT_BUILD_PLATFORM=1 -DZT_BUILD_ARCHITECTURE=$(ZT_ARCHITECTURE) -DZT_SOFTWARE_UPDATE_DEFAULT="\"disable\""
 
-# Use X64 ASM Salsa20/12 on X86_64 target
+# Build faster crypto on some targets
 ifeq ($(ZT_USE_X64_ASM_SALSA2012),1)
 	override DEFS+=-DZT_USE_X64_ASM_SALSA2012
 	override OBJS+=ext/x64-salsa2012-asm/salsa2012.o
 endif
+ifeq ($(ZT_USE_ARM32_NEON_ASM_SALSA2012),1)
+	override DEFS+=-DZT_USE_ARM32_NEON_ASM_SALSA2012
+	override OBJS+=ext/arm32-neon-salsa2012-asm/salsa2012.o
+endif
 
 # Static builds, which are currently done for a number of Linux targets
 ifeq ($(ZT_STATIC),1)

+ 22 - 1
selftest.cpp

@@ -57,6 +57,9 @@
 #ifdef ZT_USE_X64_ASM_SALSA2012
 #include "ext/x64-salsa2012-asm/salsa2012.h"
 #endif
+#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
+#include "ext/arm32-neon-salsa2012-asm/salsa2012.h"
+#endif
 
 #ifdef __WINDOWS__
 #include <tchar.h>
@@ -215,7 +218,7 @@ static int testCrypto()
 		double bytes = 0.0;
 		uint64_t start = OSUtils::now();
 		for(unsigned int i=0;i<200;++i) {
-			zt_salsa2012_amd64_xmm6(bb, 1234567, s20TV0Iv, s20TV0Key);
+			zt_salsa2012_amd64_xmm6(bb,1234567,s20TV0Iv,s20TV0Key);
 			bytes += 1234567.0;
 		}
 		uint64_t end = OSUtils::now();
@@ -224,6 +227,24 @@ static int testCrypto()
 	}
 #endif
 
+#ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012
+	if (zt_arm_has_neon()) {
+		std::cout << "[crypto] Benchmarking Salsa20/12 fast arm32/neon ASM... "; std::cout.flush();
+		{
+			unsigned char *bb = (unsigned char *)::malloc(1234567);
+			double bytes = 0.0;
+			uint64_t start = OSUtils::now();
+			for(unsigned int i=0;i<200;++i) {
+				zt_salsa2012_armneon3_xor(bb,(const unsigned char *)0,1234567,s20TV0Iv,s20TV0Key);
+				bytes += 1234567.0;
+			}
+			uint64_t end = OSUtils::now();
+			std::cout << ((bytes / 1048576.0) / ((double)(end - start) / 1024.0)) << " MiB/second" << std::endl;
+			::free((void *)bb);
+		}
+	}
+#endif
+
 	std::cout << "[crypto] Benchmarking Salsa20/20... "; std::cout.flush();
 	{
 		unsigned char *bb = (unsigned char *)::malloc(1234567);