Browse Source

cleanup, Locator fix and self-test

Adam Ierymenko 6 years ago
parent
commit
7650786fb5
5 changed files with 282 additions and 236 deletions
  1. 0 26
      node/AES.cpp
  2. 57 18
      node/AES.hpp
  3. 14 10
      node/Locator.hpp
  4. 1 0
      node/Str.hpp
  5. 210 182
      selftest.cpp

File diff suppressed because it is too large
+ 0 - 26
node/AES.cpp


+ 57 - 18
node/AES.hpp

@@ -37,23 +37,14 @@
 #define ZT_AES_AESNI 1
 #define ZT_AES_AESNI 1
 #endif
 #endif
 
 
-#if defined(__arm__) || defined(__aarch32__) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM)
-#if defined(_M_ARM64)
+#if defined(_M_ARM64) || defined(__aarch64__) || defined(__aarch64) || defined(__AARCH64__)
 #include <arm64intr.h>
 #include <arm64intr.h>
 #include <arm64_neon.h>
 #include <arm64_neon.h>
 #ifndef ZT_AES_ARMNEON
 #ifndef ZT_AES_ARMNEON
 #define ZT_AES_ARMNEON 1
 #define ZT_AES_ARMNEON 1
 #endif
 #endif
-#endif
-#if defined(__ARM_NEON) || defined(__MSC_VER) || defined(_M_ARM)
-#include <armintr.h>
-#include <arm_neon.h>
-#ifndef ZT_AES_ARMNEON
-#define ZT_AES_ARMNEON 1
-#endif
-#endif
-#if defined(ZT_AES_ARMNEON) && defined(__GNUC__) && (defined(__ARM_ACL) || defined(__ARM_FEATURE_CRYPTO))
-#include <arm_acl.h>
+#if defined(__GNUC__) && !defined(__apple_build_version__) && (defined(__ARM_ACLE) || defined(__ARM_FEATURE_CRYPTO))
+#include <arm_acle.h>
 #endif
 #endif
 #endif
 #endif
 
 
@@ -165,6 +156,18 @@ public:
 	}
 	}
 
 
 private:
 private:
+	static const uint32_t Te0[256];
+	static const uint32_t Te1[256];
+	static const uint32_t Te2[256];
+	static const uint32_t Te3[256];
+	static const uint32_t Te4[256];
+	static const uint32_t Td0[256];
+	static const uint32_t Td1[256];
+	static const uint32_t Td2[256];
+	static const uint32_t Td3[256];
+	static const uint8_t Td4[256];
+	static const uint32_t rcon[10];
+
 	void _initSW(const uint8_t key[32]);
 	void _initSW(const uint8_t key[32]);
 	void _encryptSW(const uint8_t in[16],uint8_t out[16]) const;
 	void _encryptSW(const uint8_t in[16],uint8_t out[16]) const;
 	void _decryptSW(const uint8_t in[16],uint8_t out[16]) const;
 	void _decryptSW(const uint8_t in[16],uint8_t out[16]) const;
@@ -370,7 +373,7 @@ private:
 		a = _mm_xor_si128(a, b);
 		a = _mm_xor_si128(a, b);
 		return a;
 		return a;
 	}
 	}
-	static inline void _expand128_aesni(__m128i schedule[10],const void *const key)
+	/*static inline void _expand128_aesni(__m128i schedule[10],const void *const key)
 	{
 	{
 		__m128i t;
 		__m128i t;
 		schedule[0] = t = _mm_loadu_si128((const __m128i *)key);
 		schedule[0] = t = _mm_loadu_si128((const __m128i *)key);
@@ -384,7 +387,7 @@ private:
 		schedule[8] = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x80));
 		schedule[8] = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x80));
 		schedule[9] = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x1b));
 		schedule[9] = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x1b));
 		schedule[10] = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x36));
 		schedule[10] = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x36));
-	}
+	}*/
 	static inline void _scramble_aesni(const uint8_t key[16],const uint8_t *in,uint8_t *out,unsigned int len)
 	static inline void _scramble_aesni(const uint8_t key[16],const uint8_t *in,uint8_t *out,unsigned int len)
 	{
 	{
 		__m128i t = _mm_loadu_si128((const __m128i *)key);
 		__m128i t = _mm_loadu_si128((const __m128i *)key);
@@ -395,29 +398,47 @@ private:
 		__m128i k4 = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x08));
 		__m128i k4 = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x08));
 		__m128i k5 = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x10));
 		__m128i k5 = t = _assist128_aesni(t, _mm_aeskeygenassist_si128(t, 0x10));
 
 
-		while (len >= 32) {
-			len -= 32;
+		while (len >= 64) {
+			len -= 64;
 
 
 			__m128i d0 = _mm_loadu_si128((const __m128i *)in);
 			__m128i d0 = _mm_loadu_si128((const __m128i *)in);
 			in += 16;
 			in += 16;
 			__m128i d1 = _mm_loadu_si128((const __m128i *)in);
 			__m128i d1 = _mm_loadu_si128((const __m128i *)in);
 			in += 16;
 			in += 16;
+			__m128i d2 = _mm_loadu_si128((const __m128i *)in);
+			in += 16;
+			__m128i d3 = _mm_loadu_si128((const __m128i *)in);
+			in += 16;
 
 
 			d0 = _mm_xor_si128(d0,k0);
 			d0 = _mm_xor_si128(d0,k0);
 			d1 = _mm_xor_si128(d1,k0);
 			d1 = _mm_xor_si128(d1,k0);
+			d2 = _mm_xor_si128(d2,k0);
+			d3 = _mm_xor_si128(d3,k0);
 			d0 = _mm_aesenc_si128(d0,k1);
 			d0 = _mm_aesenc_si128(d0,k1);
 			d1 = _mm_aesenc_si128(d1,k1);
 			d1 = _mm_aesenc_si128(d1,k1);
+			d2 = _mm_aesenc_si128(d2,k1);
+			d3 = _mm_aesenc_si128(d3,k1);
 			d0 = _mm_aesenc_si128(d0,k2);
 			d0 = _mm_aesenc_si128(d0,k2);
 			d1 = _mm_aesenc_si128(d1,k2);
 			d1 = _mm_aesenc_si128(d1,k2);
+			d2 = _mm_aesenc_si128(d2,k2);
+			d3 = _mm_aesenc_si128(d3,k2);
 			d0 = _mm_aesenc_si128(d0,k3);
 			d0 = _mm_aesenc_si128(d0,k3);
 			d1 = _mm_aesenc_si128(d1,k3);
 			d1 = _mm_aesenc_si128(d1,k3);
+			d2 = _mm_aesenc_si128(d2,k3);
+			d3 = _mm_aesenc_si128(d3,k3);
 			d0 = _mm_aesenc_si128(d0,k4);
 			d0 = _mm_aesenc_si128(d0,k4);
 			d1 = _mm_aesenc_si128(d1,k4);
 			d1 = _mm_aesenc_si128(d1,k4);
+			d2 = _mm_aesenc_si128(d2,k4);
+			d3 = _mm_aesenc_si128(d3,k4);
 
 
 			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d0,k5));
 			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d0,k5));
 			out += 16;
 			out += 16;
 			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d1,k5));
 			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d1,k5));
 			out += 16;
 			out += 16;
+			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d2,k5));
+			out += 16;
+			_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(d3,k5));
+			out += 16;
 		}
 		}
 
 
 		while (len >= 16) {
 		while (len >= 16) {
@@ -464,29 +485,47 @@ private:
 		__m128i dk3 = _mm_aesimc_si128(k2);
 		__m128i dk3 = _mm_aesimc_si128(k2);
 		__m128i dk4 = _mm_aesimc_si128(k1);
 		__m128i dk4 = _mm_aesimc_si128(k1);
 
 
-		while (len >= 32) {
-			len -= 32;
+		while (len >= 64) {
+			len -= 64;
 
 
 			__m128i d0 = _mm_loadu_si128((const __m128i *)in);
 			__m128i d0 = _mm_loadu_si128((const __m128i *)in);
 			in += 16;
 			in += 16;
 			__m128i d1 = _mm_loadu_si128((const __m128i *)in);
 			__m128i d1 = _mm_loadu_si128((const __m128i *)in);
 			in += 16;
 			in += 16;
+			__m128i d2 = _mm_loadu_si128((const __m128i *)in);
+			in += 16;
+			__m128i d3 = _mm_loadu_si128((const __m128i *)in);
+			in += 16;
 
 
 			d0 = _mm_xor_si128(d0,dk0);
 			d0 = _mm_xor_si128(d0,dk0);
 			d1 = _mm_xor_si128(d1,dk0);
 			d1 = _mm_xor_si128(d1,dk0);
+			d2 = _mm_xor_si128(d2,dk0);
+			d3 = _mm_xor_si128(d3,dk0);
 			d0 = _mm_aesdec_si128(d0,dk1);
 			d0 = _mm_aesdec_si128(d0,dk1);
 			d1 = _mm_aesdec_si128(d1,dk1);
 			d1 = _mm_aesdec_si128(d1,dk1);
+			d2 = _mm_aesdec_si128(d2,dk1);
+			d3 = _mm_aesdec_si128(d3,dk1);
 			d0 = _mm_aesdec_si128(d0,dk2);
 			d0 = _mm_aesdec_si128(d0,dk2);
 			d1 = _mm_aesdec_si128(d1,dk2);
 			d1 = _mm_aesdec_si128(d1,dk2);
+			d2 = _mm_aesdec_si128(d2,dk2);
+			d3 = _mm_aesdec_si128(d3,dk2);
 			d0 = _mm_aesdec_si128(d0,dk3);
 			d0 = _mm_aesdec_si128(d0,dk3);
 			d1 = _mm_aesdec_si128(d1,dk3);
 			d1 = _mm_aesdec_si128(d1,dk3);
+			d2 = _mm_aesdec_si128(d2,dk3);
+			d3 = _mm_aesdec_si128(d3,dk3);
 			d0 = _mm_aesdec_si128(d0,dk4);
 			d0 = _mm_aesdec_si128(d0,dk4);
 			d1 = _mm_aesdec_si128(d1,dk4);
 			d1 = _mm_aesdec_si128(d1,dk4);
+			d2 = _mm_aesdec_si128(d2,dk4);
+			d3 = _mm_aesdec_si128(d3,dk4);
 
 
 			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d0,dk5));
 			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d0,dk5));
 			out += 16;
 			out += 16;
 			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d1,dk5));
 			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d1,dk5));
 			out += 16;
 			out += 16;
+			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d2,dk5));
+			out += 16;
+			_mm_storeu_si128((__m128i *)out,_mm_aesdeclast_si128(d3,dk5));
+			out += 16;
 		}
 		}
 
 
 		while (len >= 16) {
 		while (len >= 16) {

+ 14 - 10
node/Locator.hpp

@@ -161,7 +161,7 @@ public:
 	inline std::vector<Str> makeTxtRecords(const uint8_t p384SigningKeyPublic[ZT_ECC384_PUBLIC_KEY_SIZE],const uint8_t p384SigningKeyPrivate[ZT_ECC384_PUBLIC_KEY_SIZE])
 	inline std::vector<Str> makeTxtRecords(const uint8_t p384SigningKeyPublic[ZT_ECC384_PUBLIC_KEY_SIZE],const uint8_t p384SigningKeyPrivate[ZT_ECC384_PUBLIC_KEY_SIZE])
 	{
 	{
 		uint8_t s384[48],dnsSig[ZT_ECC384_SIGNATURE_SIZE];
 		uint8_t s384[48],dnsSig[ZT_ECC384_SIGNATURE_SIZE];
-		char enc[256];
+		char enc[512];
 
 
 		Buffer<65536> *const tmp = new Buffer<65536>();
 		Buffer<65536> *const tmp = new Buffer<65536>();
 		serialize(*tmp,false);
 		serialize(*tmp,false);
@@ -172,12 +172,17 @@ public:
 		// Blob must be broken into multiple TXT records that must remain sortable so they are prefixed by a hex value.
 		// Blob must be broken into multiple TXT records that must remain sortable so they are prefixed by a hex value.
 		// 186-byte chunks yield 248-byte base64 chunks which leaves some margin below the limit of 255.
 		// 186-byte chunks yield 248-byte base64 chunks which leaves some margin below the limit of 255.
 		std::vector<Str> txtRecords;
 		std::vector<Str> txtRecords;
-		for(unsigned int p=0;p<tmp->size();p+=186) {
-			unsigned int rem = tmp->size() - p;
-			if (rem > 186) rem = 186;
-			Utils::b64e(((const uint8_t *)tmp->data()) + p,rem,enc,sizeof(enc));
+		unsigned int txtRecNo = 0;
+		for(unsigned int p=0;p<tmp->size();) {
+			unsigned int chunkSize = tmp->size() - p;
+			if (chunkSize > 186) chunkSize = 186;
+
+			Utils::b64e(((const uint8_t *)tmp->data()) + p,chunkSize,enc,sizeof(enc));
+			p += chunkSize;
+
 			txtRecords.push_back(Str());
 			txtRecords.push_back(Str());
-			txtRecords.back() << Utils::HEXCHARS[(p >> 4) & 0xf] << Utils::HEXCHARS[p & 0xf] << enc;
+			txtRecords.back() << Utils::HEXCHARS[(txtRecNo >> 4) & 0xf] << Utils::HEXCHARS[txtRecNo & 0xf] << enc;
+			++txtRecNo;
 		}
 		}
 
 
 		delete tmp;
 		delete tmp;
@@ -199,15 +204,14 @@ public:
 	template<typename I>
 	template<typename I>
 	inline bool decodeTxtRecords(I start,I end,const uint8_t p384SigningKeyPublic[ZT_ECC384_PUBLIC_KEY_SIZE])
 	inline bool decodeTxtRecords(I start,I end,const uint8_t p384SigningKeyPublic[ZT_ECC384_PUBLIC_KEY_SIZE])
 	{
 	{
-		uint8_t dec[256],s384[48];
+		uint8_t dec[512],s384[48];
 		Buffer<65536> *tmp = nullptr;
 		Buffer<65536> *tmp = nullptr;
 		try {
 		try {
 			std::vector<Str> txtRecords;
 			std::vector<Str> txtRecords;
 			while (start != end) {
 			while (start != end) {
 				try {
 				try {
-					Str ts(start);
-					if (ts.length() > 2)
-						txtRecords.push_back(ts);
+					if (start->length() > 2)
+						txtRecords.push_back(*start);
 				} catch ( ... ) {} // skip any records that trigger out of bounds exceptions
 				} catch ( ... ) {} // skip any records that trigger out of bounds exceptions
 				++start;
 				++start;
 			}
 			}

+ 1 - 0
node/Str.hpp

@@ -118,6 +118,7 @@ public:
 					throw ZT_EXCEPTION_OUT_OF_BOUNDS;
 					throw ZT_EXCEPTION_OUT_OF_BOUNDS;
 				}
 				}
 				_s[l++] = *s;
 				_s[l++] = *s;
+				++s;
 			}
 			}
 			_s[l] = 0;
 			_s[l] = 0;
 			_l = (uint8_t)l;
 			_l = (uint8_t)l;

File diff suppressed because it is too large
+ 210 - 182
selftest.cpp


Some files were not shown because too many files changed in this diff