5 年之前 · ef64e6a336
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,7 +31,6 @@ add_executable(ZeroTierOne
 
				         include/ZeroTierCore.h
			
 
				         include/ZeroTierDebug.h
			
 
				         node/Address.hpp
			
 
				-        node/AES-aesni.c
			
 
				         node/AES.cpp
			
 
				         node/AES.hpp
			
 
				         node/AtomicCounter.hpp
			
--- a/controller/EmbeddedNetworkController.cpp
+++ b/controller/EmbeddedNetworkController.cpp
@@ -481,14 +481,6 @@ void EmbeddedNetworkController::init(const Identity &signingId,Sender *sender)
 
				 	char tmp[64];
			
 
				 
			
 
				 	_signingId = signingId;
			
 
				-
			
 
				-	// Base the identity hash, which is used to generate network tokens, on
			
 
				-	// only the type 0 public and private keys so that type 0 identities can
			
 
				-	// upgrade without these tokens changing.
			
 
				-	Identity downgraded;
			
 
				-	_signingId.downgrade(downgraded,Identity::C25519);
			
 
				-	downgraded.hash(_signingIdHash,true);
			
 
				-
			
 
				 	_sender = sender;
			
 
				 	_signingIdAddressString = signingId.address().toString(tmp);
			
 
				 
			
@@ -1466,17 +1458,6 @@ void EmbeddedNetworkController::_request(
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// Generate a unique semi-secret token known only to members and former members
			
 
				-	// of this network by hashing the hash of our signing identity (including its
			
 
				-	// secret part) with the network ID. Deriving the token like this eliminates the
			
 
				-	// need to store it somewhere.
			
 
				-	uint64_t tokenHashIn[7];
			
 
				-	memcpy(tokenHashIn,_signingIdHash,48);
			
 
				-	tokenHashIn[6] = Utils::hton(nwid);
			
 
				-	uint64_t tokenHash[6];
			
 
				-	SHA384(tokenHash,tokenHashIn,sizeof(tokenHashIn));
			
 
				-	nc->token = Utils::ntoh(tokenHash[0]);
			
 
				-
			
 
				 	bool haveManagedIpv4AutoAssignment = false;
			
 
				 	bool haveManagedIpv6AutoAssignment = false; // "special" NDP-emulated address types do not count
			
 
				 	json ipAssignments = member["ipAssignments"]; // we want to make a copy
			
--- a/controller/EmbeddedNetworkController.hpp
+++ b/controller/EmbeddedNetworkController.hpp
@@ -132,7 +132,6 @@ private:
 
				 	std::string _ztPath;
			
 
				 	std::string _path;
			
 
				 	Identity _signingId;
			
 
				-	uint8_t _signingIdHash[48];
			
 
				 	std::string _signingIdAddressString;
			
 
				 	NetworkController::Sender *_sender;
			
 
				 
			
--- a/node/AES-aesni.c
+++ b/node/AES-aesni.c
@@ -1,168 +0,0 @@
 
				-/*
			
 
				- * Copyright (c)2019 ZeroTier, Inc.
			
 
				- *
			
 
				- * Use of this software is governed by the Business Source License included
			
 
				- * in the LICENSE.TXT file in the project's root directory.
			
 
				- *
			
 
				- * Change Date: 2023-01-01
			
 
				- *
			
 
				- * On the date above, in accordance with the Business Source License, use
			
 
				- * of this software will be governed by version 2.0 of the Apache License.
			
 
				- */
			
 
				-/****/
			
 
				-
			
 
				-/* This is done in plain C because the compiler (at least GCC and CLANG) seem
			
 
				- * to do a *slightly* better job optimizing this intrinsic code when compiling
			
 
				- * plain C. C also gives us the register hint keyword, which seems to actually
			
 
				- * make a small difference. */
			
 
				-
			
 
				-#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <wmmintrin.h>
			
 
				-#include <emmintrin.h>
			
 
				-#include <smmintrin.h>
			
 
				-
			
 
				-#define ZT_AES_CTR_AESNI_ROUND(kk) c0 = _mm_aesenc_si128(c0,kk); c1 = _mm_aesenc_si128(c1,kk); c2 = _mm_aesenc_si128(c2,kk); c3 = _mm_aesenc_si128(c3,kk);
			
 
				-
			
 
				-void zt_crypt_ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out)
			
 
				-{
			
 
				-	/* Because our CTR supports full 128-bit nonces, we must do a full 128-bit (big-endian)
			
 
				-	 * increment to be compatible with canonical NIST-certified CTR implementations. That's
			
 
				-	 * because it's possible to have a lot of bit saturation in the least significant 64
			
 
				-	 * bits, which could on rare occasions actually cause a 64-bit wrap. If this happened
			
 
				-	 * without carry it would result in incompatibility and quietly dropped packets. The
			
 
				-	 * probability is low, so this would be a one in billions packet loss bug that would
			
 
				-	 * probably never be found.
			
 
				-	 *
			
 
				-	 * This crazy code does a branch-free 128-bit increment by adding a one or a zero to
			
 
				-	 * the most significant 64 bits of the 128-bit vector based on whether the add we want
			
 
				-	 * to do to the least significant 64 bits would overflow. This can be computed by
			
 
				-	 * NOTing those bits and comparing with what we want to add, since NOT is the same
			
 
				-	 * as subtracting from uint64_max. This generates branch-free ASM on x64 with most
			
 
				-	 * good compilers. */
			
 
				-	register __m128i swap128 = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
			
 
				-	register __m128i ctr0 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)iv),swap128);
			
 
				-	register uint64_t notctr0msq = ~((uint64_t)_mm_extract_epi64(ctr0,0));
			
 
				-	register __m128i ctr1 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 1ULL),1LL)),swap128);
			
 
				-	register __m128i ctr2 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 2ULL),2LL)),swap128);
			
 
				-	register __m128i ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 3ULL),3LL)),swap128);
			
 
				-	ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				-
			
 
				-	register __m128i k0 = key[0];
			
 
				-	register __m128i k1 = key[1];
			
 
				-
			
 
				-	while (len >= 64) {
			
 
				-		register __m128i ka = key[2];
			
 
				-		register __m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				-		register __m128i c1 = _mm_xor_si128(ctr1,k0);
			
 
				-		register __m128i c2 = _mm_xor_si128(ctr2,k0);
			
 
				-		register __m128i c3 = _mm_xor_si128(ctr3,k0);
			
 
				-		ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				-		notctr0msq = ~((uint64_t)_mm_extract_epi64(ctr0,0));
			
 
				-		ctr1 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 5ULL),5LL)),swap128);
			
 
				-		ctr2 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 6ULL),6LL)),swap128);
			
 
				-		ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 7ULL),7LL)),swap128);
			
 
				-		ctr0 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 4ULL),4LL)),swap128);
			
 
				-		register __m128i kb = key[3];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(k1);
			
 
				-		register __m128i kc = key[4];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				-		register __m128i kd = key[5];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				-		ka = key[6];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				-		kb = key[7];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				-		kc = key[8];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				-		kd = key[9];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				-		ka = key[10];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				-		kb = key[11];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				-		kc = key[12];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				-		kd = key[13];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				-		ka = key[14];
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				-		ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				-		_mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,ka)));
			
 
				-		_mm_storeu_si128((__m128i *)(out + 16),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 16)),_mm_aesenclast_si128(c1,ka)));
			
 
				-		_mm_storeu_si128((__m128i *)(out + 32),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 32)),_mm_aesenclast_si128(c2,ka)));
			
 
				-		_mm_storeu_si128((__m128i *)(out + 48),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 48)),_mm_aesenclast_si128(c3,ka)));
			
 
				-		in += 64;
			
 
				-		out += 64;
			
 
				-		len -= 64;
			
 
				-	}
			
 
				-
			
 
				-	register __m128i k2 = key[2];
			
 
				-	register __m128i k3 = key[3];
			
 
				-	register __m128i k4 = key[4];
			
 
				-	register __m128i k5 = key[5];
			
 
				-	register __m128i k6 = key[6];
			
 
				-	register __m128i k7 = key[7];
			
 
				-
			
 
				-	while (len >= 16) {
			
 
				-		register __m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				-		ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				-		ctr0 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)((~((uint64_t)_mm_extract_epi64(ctr0,0))) < 1ULL),1LL)),swap128);
			
 
				-		c0 = _mm_aesenc_si128(c0,k1);
			
 
				-		c0 = _mm_aesenc_si128(c0,k2);
			
 
				-		c0 = _mm_aesenc_si128(c0,k3);
			
 
				-		c0 = _mm_aesenc_si128(c0,k4);
			
 
				-		c0 = _mm_aesenc_si128(c0,k5);
			
 
				-		c0 = _mm_aesenc_si128(c0,k6);
			
 
				-		register __m128i ka = key[8];
			
 
				-		c0 = _mm_aesenc_si128(c0,k7);
			
 
				-		register __m128i kb = key[9];
			
 
				-		c0 = _mm_aesenc_si128(c0,ka);
			
 
				-		ka = key[10];
			
 
				-		c0 = _mm_aesenc_si128(c0,kb);
			
 
				-		kb = key[11];
			
 
				-		c0 = _mm_aesenc_si128(c0,ka);
			
 
				-		ka = key[12];
			
 
				-		c0 = _mm_aesenc_si128(c0,kb);
			
 
				-		kb = key[13];
			
 
				-		c0 = _mm_aesenc_si128(c0,ka);
			
 
				-		ka = key[14];
			
 
				-		c0 = _mm_aesenc_si128(c0,kb);
			
 
				-		_mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,ka)));
			
 
				-		in += 16;
			
 
				-		out += 16;
			
 
				-		len -= 16;
			
 
				-	}
			
 
				-
			
 
				-	if (len) {
			
 
				-		register __m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				-		k0 = key[8];
			
 
				-		c0 = _mm_aesenc_si128(c0,k1);
			
 
				-		c0 = _mm_aesenc_si128(c0,k2);
			
 
				-		k1 = key[9];
			
 
				-		c0 = _mm_aesenc_si128(c0,k3);
			
 
				-		c0 = _mm_aesenc_si128(c0,k4);
			
 
				-		k2 = key[10];
			
 
				-		c0 = _mm_aesenc_si128(c0,k5);
			
 
				-		c0 = _mm_aesenc_si128(c0,k6);
			
 
				-		k3 = key[11];
			
 
				-		c0 = _mm_aesenc_si128(c0,k7);
			
 
				-		c0 = _mm_aesenc_si128(c0,k0);
			
 
				-		k0 = key[12];
			
 
				-		c0 = _mm_aesenc_si128(c0,k1);
			
 
				-		c0 = _mm_aesenc_si128(c0,k2);
			
 
				-		k1 = key[13];
			
 
				-		c0 = _mm_aesenc_si128(c0,k3);
			
 
				-		c0 = _mm_aesenc_si128(c0,k0);
			
 
				-		k2 = key[14];
			
 
				-		c0 = _mm_aesenc_si128(c0,k1);
			
 
				-		c0 = _mm_aesenclast_si128(c0,k2);
			
 
				-		uint8_t tmp[16];
			
 
				-		_mm_storeu_si128((__m128i *)tmp,c0);
			
 
				-		for(unsigned int i=0;i<len;++i)
			
 
				-			out[i] = in[i] ^ tmp[i];
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/node/AES.cpp
+++ b/node/AES.cpp
@@ -11,8 +11,8 @@
 
				  */
			
 
				 /****/
			
 
				 
			
 
				-#include "AES.hpp"
			
 
				 #include "Constants.hpp"
			
 
				+#include "AES.hpp"
			
 
				 
			
 
				 // This file contains the software implementations of AES and GHASH. They're
			
 
				 // only used if your CPU lacks hardware acceleration as the hardware
			
--- a/node/AES.hpp
+++ b/node/AES.hpp
@@ -22,15 +22,13 @@
 
				 
			
 
				 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
			
 
				 
			
 
				+#include <xmmintrin.h>
			
 
				 #include <wmmintrin.h>
			
 
				 #include <emmintrin.h>
			
 
				 #include <smmintrin.h>
			
 
				 
			
 
				 #define ZT_AES_AESNI 1
			
 
				 
			
 
				-// AES-aesni.c
			
 
				-extern "C" void zt_crypt_ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out);
			
 
				-
			
 
				 #endif // x64
			
 
				 
			
 
				 namespace ZeroTier {
			
@@ -119,7 +117,7 @@ public:
 
				 	{
			
 
				 #ifdef ZT_AES_AESNI
			
 
				 		if (likely(HW_ACCEL)) {
			
 
				-			zt_crypt_ctr_aesni(_k.ni.k,iv,(const uint8_t *)in,len,(uint8_t *)out);
			
 
				+			_ctr_aesni(_k.ni.k,iv,(const uint8_t *)in,len,(uint8_t *)out);
			
 
				 			return;
			
 
				 		}
			
 
				 #endif
			
@@ -686,6 +684,148 @@ private:
 
				 		t = _mm_aesenclast_si128(t,_k.ni.k[14]);
			
 
				 		_mm_storeu_si128((__m128i *)out,_mm_xor_si128(y,t));
			
 
				 	}
			
 
				+
			
 
				+#define ZT_AES_CTR_AESNI_ROUND(kk) c0 = _mm_aesenc_si128(c0,kk); c1 = _mm_aesenc_si128(c1,kk); c2 = _mm_aesenc_si128(c2,kk); c3 = _mm_aesenc_si128(c3,kk);
			
 
				+
			
 
				+	static inline void _ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out)
			
 
				+	{
			
 
				+		/* Because our CTR supports full 128-bit nonces, we must do a full 128-bit (big-endian)
			
 
				+		 * increment to be compatible with canonical NIST-certified CTR implementations. That's
			
 
				+		 * because it's possible to have a lot of bit saturation in the least significant 64
			
 
				+		 * bits, which could on rare occasions actually cause a 64-bit wrap. If this happened
			
 
				+		 * without carry it would result in incompatibility and quietly dropped packets. The
			
 
				+		 * probability is low, so this would be a one in billions packet loss bug that would
			
 
				+		 * probably never be found.
			
 
				+		 *
			
 
				+		 * This crazy code does a branch-free 128-bit increment by adding a one or a zero to
			
 
				+		 * the most significant 64 bits of the 128-bit vector based on whether the add we want
			
 
				+		 * to do to the least significant 64 bits would overflow. This can be computed by
			
 
				+		 * NOTing those bits and comparing with what we want to add, since NOT is the same
			
 
				+		 * as subtracting from uint64_max. This generates branch-free ASM on x64 with most
			
 
				+		 * good compilers. */
			
 
				+		__m128i swap128 = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
			
 
				+		__m128i ctr0 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)iv),swap128);
			
 
				+		uint64_t notctr0msq = ~((uint64_t)_mm_extract_epi64(ctr0,0));
			
 
				+		__m128i ctr1 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 1ULL),1LL)),swap128);
			
 
				+		__m128i ctr2 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 2ULL),2LL)),swap128);
			
 
				+		__m128i ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 3ULL),3LL)),swap128);
			
 
				+		ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				+
			
 
				+		__m128i k0 = key[0];
			
 
				+		__m128i k1 = key[1];
			
 
				+
			
 
				+		while (len >= 64) {
			
 
				+			__m128i ka = key[2];
			
 
				+			__m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				+			__m128i c1 = _mm_xor_si128(ctr1,k0);
			
 
				+			__m128i c2 = _mm_xor_si128(ctr2,k0);
			
 
				+			__m128i c3 = _mm_xor_si128(ctr3,k0);
			
 
				+			ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				+			notctr0msq = ~((uint64_t)_mm_extract_epi64(ctr0,0));
			
 
				+			ctr1 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 5ULL),5LL)),swap128);
			
 
				+			ctr2 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 6ULL),6LL)),swap128);
			
 
				+			ctr3 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 7ULL),7LL)),swap128);
			
 
				+			ctr0 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)(notctr0msq < 4ULL),4LL)),swap128);
			
 
				+			__m128i kb = key[3];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(k1);
			
 
				+			__m128i kc = key[4];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				+			__m128i kd = key[5];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				+			ka = key[6];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				+			kb = key[7];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				+			kc = key[8];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				+			kd = key[9];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				+			ka = key[10];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				+			kb = key[11];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				+			kc = key[12];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(ka);
			
 
				+			kd = key[13];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kb);
			
 
				+			ka = key[14];
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kc);
			
 
				+			ZT_AES_CTR_AESNI_ROUND(kd);
			
 
				+			_mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,ka)));
			
 
				+			_mm_storeu_si128((__m128i *)(out + 16),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 16)),_mm_aesenclast_si128(c1,ka)));
			
 
				+			_mm_storeu_si128((__m128i *)(out + 32),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 32)),_mm_aesenclast_si128(c2,ka)));
			
 
				+			_mm_storeu_si128((__m128i *)(out + 48),_mm_xor_si128(_mm_loadu_si128((const __m128i *)(in + 48)),_mm_aesenclast_si128(c3,ka)));
			
 
				+			in += 64;
			
 
				+			out += 64;
			
 
				+			len -= 64;
			
 
				+		}
			
 
				+
			
 
				+		__m128i k2 = key[2];
			
 
				+		__m128i k3 = key[3];
			
 
				+		__m128i k4 = key[4];
			
 
				+		__m128i k5 = key[5];
			
 
				+		__m128i k6 = key[6];
			
 
				+		__m128i k7 = key[7];
			
 
				+
			
 
				+		while (len >= 16) {
			
 
				+			__m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				+			ctr0 = _mm_shuffle_epi8(ctr0,swap128);
			
 
				+			ctr0 = _mm_shuffle_epi8(_mm_add_epi64(ctr0,_mm_set_epi64x((long long)((~((uint64_t)_mm_extract_epi64(ctr0,0))) < 1ULL),1LL)),swap128);
			
 
				+			c0 = _mm_aesenc_si128(c0,k1);
			
 
				+			c0 = _mm_aesenc_si128(c0,k2);
			
 
				+			c0 = _mm_aesenc_si128(c0,k3);
			
 
				+			c0 = _mm_aesenc_si128(c0,k4);
			
 
				+			c0 = _mm_aesenc_si128(c0,k5);
			
 
				+			c0 = _mm_aesenc_si128(c0,k6);
			
 
				+			__m128i ka = key[8];
			
 
				+			c0 = _mm_aesenc_si128(c0,k7);
			
 
				+			__m128i kb = key[9];
			
 
				+			c0 = _mm_aesenc_si128(c0,ka);
			
 
				+			ka = key[10];
			
 
				+			c0 = _mm_aesenc_si128(c0,kb);
			
 
				+			kb = key[11];
			
 
				+			c0 = _mm_aesenc_si128(c0,ka);
			
 
				+			ka = key[12];
			
 
				+			c0 = _mm_aesenc_si128(c0,kb);
			
 
				+			kb = key[13];
			
 
				+			c0 = _mm_aesenc_si128(c0,ka);
			
 
				+			ka = key[14];
			
 
				+			c0 = _mm_aesenc_si128(c0,kb);
			
 
				+			_mm_storeu_si128((__m128i *)out,_mm_xor_si128(_mm_loadu_si128((const __m128i *)in),_mm_aesenclast_si128(c0,ka)));
			
 
				+			in += 16;
			
 
				+			out += 16;
			
 
				+			len -= 16;
			
 
				+		}
			
 
				+
			
 
				+		if (len) {
			
 
				+			__m128i c0 = _mm_xor_si128(ctr0,k0);
			
 
				+			k0 = key[8];
			
 
				+			c0 = _mm_aesenc_si128(c0,k1);
			
 
				+			c0 = _mm_aesenc_si128(c0,k2);
			
 
				+			k1 = key[9];
			
 
				+			c0 = _mm_aesenc_si128(c0,k3);
			
 
				+			c0 = _mm_aesenc_si128(c0,k4);
			
 
				+			k2 = key[10];
			
 
				+			c0 = _mm_aesenc_si128(c0,k5);
			
 
				+			c0 = _mm_aesenc_si128(c0,k6);
			
 
				+			k3 = key[11];
			
 
				+			c0 = _mm_aesenc_si128(c0,k7);
			
 
				+			c0 = _mm_aesenc_si128(c0,k0);
			
 
				+			k0 = key[12];
			
 
				+			c0 = _mm_aesenc_si128(c0,k1);
			
 
				+			c0 = _mm_aesenc_si128(c0,k2);
			
 
				+			k1 = key[13];
			
 
				+			c0 = _mm_aesenc_si128(c0,k3);
			
 
				+			c0 = _mm_aesenc_si128(c0,k0);
			
 
				+			k2 = key[14];
			
 
				+			c0 = _mm_aesenc_si128(c0,k1);
			
 
				+			c0 = _mm_aesenclast_si128(c0,k2);
			
 
				+			uint8_t tmp[16];
			
 
				+			_mm_storeu_si128((__m128i *)tmp,c0);
			
 
				+			for(unsigned int i=0;i<len;++i)
			
 
				+				out[i] = in[i] ^ tmp[i];
			
 
				+		}
			
 
				+	}
			
 
				 #endif /* ZT_AES_AESNI ******************************************************/
			
 
				 };
			
 
				 
			
--- a/node/CMakeLists.txt
+++ b/node/CMakeLists.txt
@@ -49,6 +49,7 @@ set(core_headers
 
				 )
			
 
				 
			
 
				 set(core_src
			
 
				+	AES.cpp
			
 
				 	C25519.cpp
			
 
				 	Credential.cpp
			
 
				 	ECC384.cpp
			
--- a/node/Identity.cpp
+++ b/node/Identity.cpp
@@ -232,7 +232,7 @@ bool Identity::fromString(const char *str)
 
				 				switch(_type) {
			
 
				 
			
 
				 					case C25519:
			
 
				-						if (Utils::unhex(f,_pub.c25519,ZT_C25519_PUBLIC_KEY_LEN) != ZT_C25519_PUBLIC_KEY_LEN) {
			
 
				+						if (Utils::unhex(f,strlen(f),_pub.c25519,ZT_C25519_PUBLIC_KEY_LEN) != ZT_C25519_PUBLIC_KEY_LEN) {
			
 
				 							_address.zero();
			
 
				 							return false;
			
 
				 						}
			
@@ -253,7 +253,7 @@ bool Identity::fromString(const char *str)
 
				 					switch(_type) {
			
 
				 
			
 
				 						case C25519:
			
 
				-							if (Utils::unhex(f,_priv.c25519,ZT_C25519_PRIVATE_KEY_LEN) != ZT_C25519_PRIVATE_KEY_LEN) {
			
 
				+							if (Utils::unhex(f,strlen(f),_priv.c25519,ZT_C25519_PRIVATE_KEY_LEN) != ZT_C25519_PRIVATE_KEY_LEN) {
			
 
				 								_address.zero();
			
 
				 								return false;
			
 
				 							} else {
			
--- a/node/Network.cpp
+++ b/node/Network.cpp
@@ -566,9 +566,9 @@ Network::Network(const RuntimeEnvironment *renv,void *tPtr,uint64_t nwid,void *u
 
				 			int n = RR->node->stateObjectGet(tPtr,ZT_STATE_OBJECT_NETWORK_CONFIG,tmp,dict->unsafeData(),ZT_NETWORKCONFIG_DICT_CAPACITY - 1);
			
 
				 			if (n > 1) {
			
 
				 				try {
			
 
				-					ScopedPtr<NetworkConfig> nconf(new NetworkConfig());
			
 
				-					if (nconf->fromDictionary(*dict)) {
			
 
				-						this->setConfiguration(tPtr,*nconf,false);
			
 
				+					ScopedPtr<NetworkConfig> nconf2(new NetworkConfig());
			
 
				+					if (nconf2->fromDictionary(*dict)) {
			
 
				+						this->setConfiguration(tPtr,*nconf2,false);
			
 
				 						_lastConfigUpdate = 0; // still want to re-request an update since it's likely outdated
			
 
				 						got = true;
			
 
				 					}
			
@@ -671,8 +671,8 @@ bool Network::filterOutgoingPacket(
 
				 		}	break;
			
 
				 
			
 
				 		case DOZTFILTER_DROP:
			
 
				-			if (_config.remoteTraceTarget)
			
 
				-				RR->t->networkFilter(tPtr,*this,rrl,(Trace::RuleResultLog *)0,(Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				+			//if (_config.remoteTraceTarget)
			
 
				+			//	RR->t->networkFilter(tPtr,*this,rrl,(Trace::RuleResultLog *)0,(Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				 			return false;
			
 
				 
			
 
				 		case DOZTFILTER_REDIRECT: // interpreted as ACCEPT but ztFinalDest will have been changed in _doZtFilter()
			
@@ -712,17 +712,17 @@ bool Network::filterOutgoingPacket(
 
				 			outp.compress();
			
 
				 			RR->sw->send(tPtr,outp,true);
			
 
				 
			
 
				-			if (_config.remoteTraceTarget)
			
 
				-				RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				+			//if (_config.remoteTraceTarget)
			
 
				+			//	RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				 			return false; // DROP locally, since we redirected
			
 
				 		} else {
			
 
				-			if (_config.remoteTraceTarget)
			
 
				-				RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,1);
			
 
				+			//if (_config.remoteTraceTarget)
			
 
				+			//	RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,1);
			
 
				 			return true;
			
 
				 		}
			
 
				 	} else {
			
 
				-		if (_config.remoteTraceTarget)
			
 
				-			RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				+		//if (_config.remoteTraceTarget)
			
 
				+		//	RR->t->networkFilter(tPtr,*this,rrl,(localCapabilityIndex >= 0) ? &crrl : (Trace::RuleResultLog *)0,(localCapabilityIndex >= 0) ? &(_config.capabilities[localCapabilityIndex]) : (Capability *)0,ztSource,ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,noTee,false,0);
			
 
				 		return false;
			
 
				 	}
			
 
				 }
			
@@ -793,8 +793,8 @@ int Network::filterIncomingPacket(
 
				 		}	break;
			
 
				 
			
 
				 		case DOZTFILTER_DROP:
			
 
				-			if (_config.remoteTraceTarget)
			
 
				-				RR->t->networkFilter(tPtr,*this,rrl,(Trace::RuleResultLog *)0,(Capability *)0,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,0);
			
 
				+			//if (_config.remoteTraceTarget)
			
 
				+			//	RR->t->networkFilter(tPtr,*this,rrl,(Trace::RuleResultLog *)0,(Capability *)0,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,0);
			
 
				 			return 0; // DROP
			
 
				 
			
 
				 		case DOZTFILTER_REDIRECT: // interpreted as ACCEPT but ztFinalDest will have been changed in _doZtFilter()
			
@@ -832,14 +832,14 @@ int Network::filterIncomingPacket(
 
				 			outp.compress();
			
 
				 			RR->sw->send(tPtr,outp,true);
			
 
				 
			
 
				-			if (_config.remoteTraceTarget)
			
 
				-				RR->t->networkFilter(tPtr,*this,rrl,(c) ? &crrl : (Trace::RuleResultLog *)0,c,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,0);
			
 
				+			//if (_config.remoteTraceTarget)
			
 
				+			//	RR->t->networkFilter(tPtr,*this,rrl,(c) ? &crrl : (Trace::RuleResultLog *)0,c,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,0);
			
 
				 			return 0; // DROP locally, since we redirected
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (_config.remoteTraceTarget)
			
 
				-		RR->t->networkFilter(tPtr,*this,rrl,(c) ? &crrl : (Trace::RuleResultLog *)0,c,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,accept);
			
 
				+	//if (_config.remoteTraceTarget)
			
 
				+	//	RR->t->networkFilter(tPtr,*this,rrl,(c) ? &crrl : (Trace::RuleResultLog *)0,c,sourcePeer->address(),ztDest,macSource,macDest,frameData,frameLen,etherType,vlanId,false,true,accept);
			
 
				 	return accept;
			
 
				 }
			
 
				 
			
--- a/node/NetworkConfig.cpp
+++ b/node/NetworkConfig.cpp
@@ -35,7 +35,6 @@ bool NetworkConfig::toDictionary(Dictionary<ZT_NETWORKCONFIG_DICT_CAPACITY> &d,b
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_REVISION,this->revision)) return false;
			
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_ISSUED_TO,this->issuedTo.toString(tmp2))) return false;
			
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_FLAGS,this->flags)) return false;
			
 
				-	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_TOKEN,this->token)) return false;
			
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_MULTICAST_LIMIT,(uint64_t)this->multicastLimit)) return false;
			
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_TYPE,(uint64_t)this->type)) return false;
			
 
				 	if (!d.add(ZT_NETWORKCONFIG_DICT_KEY_NAME,this->name)) return false;
			
@@ -138,7 +137,6 @@ bool NetworkConfig::fromDictionary(const Dictionary<ZT_NETWORKCONFIG_DICT_CAPACI
 
				 		} else {
			
 
				 			// Otherwise we can use the new fields
			
 
				 			this->flags = d.getUI(ZT_NETWORKCONFIG_DICT_KEY_FLAGS,0);
			
 
				-			this->token = d.getUI(ZT_NETWORKCONFIG_DICT_KEY_TOKEN,0);
			
 
				 			this->type = (ZT_VirtualNetworkType)d.getUI(ZT_NETWORKCONFIG_DICT_KEY_TYPE,(uint64_t)ZT_NETWORK_TYPE_PRIVATE);
			
 
				 
			
 
				 			if (d.get(ZT_NETWORKCONFIG_DICT_KEY_COM,*tmp))
			
--- a/node/NetworkConfig.hpp
+++ b/node/NetworkConfig.hpp
@@ -142,8 +142,6 @@ namespace ZeroTier {
 
				 #define ZT_NETWORKCONFIG_DICT_KEY_FLAGS "f"
			
 
				 // integer(hex)
			
 
				 #define ZT_NETWORKCONFIG_DICT_KEY_MULTICAST_LIMIT "ml"
			
 
				-// integer(hex)
			
 
				-#define ZT_NETWORKCONFIG_DICT_KEY_TOKEN "k"
			
 
				 // network type (hex)
			
 
				 #define ZT_NETWORKCONFIG_DICT_KEY_TYPE "t"
			
 
				 // text
			
@@ -342,11 +340,6 @@ struct NetworkConfig
 
				 	 */
			
 
				 	uint64_t flags;
			
 
				 
			
 
				-	/**
			
 
				-	 * Token (64-bit token known only to network members)
			
 
				-	 */
			
 
				-	uint64_t token;
			
 
				-
			
 
				 	/**
			
 
				 	 * Network MTU
			
 
				 	 */
			
--- a/node/Node.cpp
+++ b/node/Node.cpp
@@ -11,16 +11,13 @@
 
				  */
			
 
				 /****/
			
 
				 
			
 
				-#include <cstdio>
			
 
				 #include <cstdlib>
			
 
				-#include <cstdarg>
			
 
				 #include <cstring>
			
 
				 #include <cstdint>
			
 
				 
			
 
				 #include "Constants.hpp"
			
 
				 #include "SharedPtr.hpp"
			
 
				 #include "Node.hpp"
			
 
				-#include "RuntimeEnvironment.hpp"
			
 
				 #include "NetworkController.hpp"
			
 
				 #include "Switch.hpp"
			
 
				 #include "Topology.hpp"
			
@@ -43,6 +40,7 @@ namespace ZeroTier {
 
				 Node::Node(void *uPtr, void *tPtr, const struct ZT_Node_Callbacks *callbacks, int64_t now) :
			
 
				 	_RR(this),
			
 
				 	RR(&_RR),
			
 
				+	_cb(*callbacks),
			
 
				 	_uPtr(uPtr),
			
 
				 	_networks(8),
			
 
				 	_now(now),
			
@@ -52,8 +50,6 @@ Node::Node(void *uPtr, void *tPtr, const struct ZT_Node_Callbacks *callbacks, in
 
				 	_lastDynamicRootUpdate(0),
			
 
				 	_online(false)
			
 
				 {
			
 
				-	memcpy(&_cb,callbacks,sizeof(ZT_Node_Callbacks));
			
 
				-
			
 
				 	memset(_expectingRepliesToBucketPtr,0,sizeof(_expectingRepliesToBucketPtr));
			
 
				 	memset(_expectingRepliesTo,0,sizeof(_expectingRepliesTo));
			
 
				 	memset(_lastIdentityVerification,0,sizeof(_lastIdentityVerification));
			
@@ -95,11 +91,11 @@ Node::Node(void *uPtr, void *tPtr, const struct ZT_Node_Callbacks *callbacks, in
 
				 		const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0);
			
 
				 		const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0);
			
 
				 
			
 
				-		m = reinterpret_cast<char *>(::malloc(16 + ts + sws + topologys + sas));
			
 
				+		m = reinterpret_cast<char *>(malloc(16 + ts + sws + topologys + sas));
			
 
				 		if (!m)
			
 
				 			throw std::bad_alloc();
			
 
				 		RR->rtmem = m;
			
 
				-		while (((uintptr_t)m & 0xf) != 0) ++m;
			
 
				+		while (((uintptr_t)m & 0xfU) != 0) ++m;
			
 
				 
			
 
				 		RR->t = new (m) Trace(RR);
			
 
				 		m += ts;
			
@@ -172,6 +168,10 @@ ZT_ResultCode Node::processVirtualNetworkFrame(
 
				 // This function object is run past every peer every ZT_PEER_PING_PERIOD.
			
 
				 struct _processBackgroundTasks_ping_eachPeer
			
 
				 {
			
 
				+	int64_t now;
			
 
				+	Node *parent;
			
 
				+	void *tPtr;
			
 
				+	bool online;
			
 
				 	ZT_ALWAYS_INLINE bool operator()(const SharedPtr<Peer> &peer,const bool isRoot)
			
 
				 	{
			
 
				 		unsigned int v4SendCount = 0,v6SendCount = 0;
			
@@ -198,10 +198,6 @@ struct _processBackgroundTasks_ping_eachPeer
 
				 
			
 
				 		return true;
			
 
				 	}
			
 
				-	int64_t now;
			
 
				-	Node *parent;
			
 
				-	void *tPtr;
			
 
				-	bool online;
			
 
				 };
			
 
				 
			
 
				 ZT_ResultCode Node::processBackgroundTasks(void *tPtr, int64_t now, volatile int64_t *nextBackgroundTaskDeadline)
			
--- a/node/Node.hpp
+++ b/node/Node.hpp
@@ -232,8 +232,8 @@ public:
 
				 private:
			
 
				 	RuntimeEnvironment _RR;
			
 
				 	RuntimeEnvironment *RR;
			
 
				-	void *_uPtr; // _uptr (lower case) is reserved in Visual Studio :P
			
 
				 	ZT_Node_Callbacks _cb;
			
 
				+	void *_uPtr; // _uptr (lower case) is reserved in Visual Studio :P
			
 
				 
			
 
				 	// For tracking packet IDs to filter out OK/ERROR replies to packets we did not send
			
 
				 	uint8_t _expectingRepliesToBucketPtr[ZT_EXPECTING_REPLIES_BUCKET_MASK1 + 1];
			
@@ -257,7 +257,6 @@ private:
 
				 	};
			
 
				 	Hashtable< _LocalControllerAuth,int64_t > _localControllerAuthorizations;
			
 
				 	Hashtable< uint64_t,SharedPtr<Network> > _networks;
			
 
				-	SharedPtr< const Locator > _locator;
			
 
				 	std::vector< ZT_InterfaceAddress > _localInterfaceAddresses;
			
 
				 
			
 
				 	Mutex _localControllerAuthorizations_m;
			
--- a/node/Switch.cpp
+++ b/node/Switch.cpp
@@ -428,7 +428,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
 
				 		std::vector<Address> activeBridges;
			
 
				 		for(unsigned int i=0;i<network->config().specialistCount;++i) {
			
 
				 			if ((network->config().specialists[i] & ZT_NETWORKCONFIG_SPECIALIST_TYPE_ACTIVE_BRIDGE) != 0)
			
 
				-				activeBridges.push_back(network->config().specialists[i]);
			
 
				+				activeBridges.push_back(Address(network->config().specialists[i]));
			
 
				 		}
			
 
				 		if ((bridges[0])&&(bridges[0] != RR->identity.address())&&(network->config().permitsBridging(bridges[0]))) {
			
 
				 			/* We have a known bridge route for this MAC, send it there. */
			
@@ -893,11 +893,11 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
 
				 		if (!viaPath) {
			
 
				 			if (peer->rateGateTryStaticPath(now)) {
			
 
				 				InetAddress tryAddr;
			
 
				-				bool gotPath = RR->node->externalPathLookup(tPtr,peer->address(),AF_INET6,tryAddr);
			
 
				+				bool gotPath = RR->node->externalPathLookup(tPtr,peer->identity(),AF_INET6,tryAddr);
			
 
				 				if ((gotPath)&&(tryAddr)) {
			
 
				 					peer->sendHELLO(tPtr,-1,tryAddr,now);
			
 
				 				} else {
			
 
				-					gotPath = RR->node->externalPathLookup(tPtr,peer->address(),AF_INET,tryAddr);
			
 
				+					gotPath = RR->node->externalPathLookup(tPtr,peer->identity(),AF_INET,tryAddr);
			
 
				 					if ((gotPath)&&(tryAddr))
			
 
				 						peer->sendHELLO(tPtr,-1,tryAddr,now);
			
 
				 				}