Browse Source

New Buffer implementation (not used yet) and cleanup

Adam Ierymenko 5 years ago
parent
commit
83eacdfada
9 changed files with 649 additions and 109 deletions
  1. 0 26
      node/AES.cpp
  2. 22 61
      node/AES.hpp
  3. 2 6
      node/Address.hpp
  4. 10 5
      node/AtomicCounter.hpp
  5. 116 0
      node/Buf.cpp
  6. 467 0
      node/Buf.hpp
  7. 2 0
      node/CMakeLists.txt
  8. 13 11
      node/Utils.cpp
  9. 17 0
      node/Utils.hpp

File diff suppressed because it is too large
+ 0 - 26
node/AES.cpp


+ 22 - 61
node/AES.hpp

@@ -21,45 +21,36 @@
 #include <cstdint>
 #include <cstdint>
 
 
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
-
 #include <xmmintrin.h>
 #include <xmmintrin.h>
 #include <wmmintrin.h>
 #include <wmmintrin.h>
 #include <emmintrin.h>
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include <smmintrin.h>
-
 #define ZT_AES_AESNI 1
 #define ZT_AES_AESNI 1
-
-#endif // x64
+#endif
 
 
 namespace ZeroTier {
 namespace ZeroTier {
 
 
 /**
 /**
- * AES-256 and pals
+ * AES-256 and pals including GMAC, CTR, etc.
  */
  */
 class AES
 class AES
 {
 {
 public:
 public:
-	/**
-	 * This will be true if your platform's type of AES acceleration is supported on this machine
-	 */
-	static const bool HW_ACCEL;
-
-	inline AES() {}
-	inline AES(const uint8_t key[32]) { this->init(key); }
-	inline ~AES() { Utils::burn(&_k,sizeof(_k)); }
+	ZT_ALWAYS_INLINE AES() {}
+	ZT_ALWAYS_INLINE AES(const uint8_t key[32]) { this->init(key); }
+	ZT_ALWAYS_INLINE ~AES() { Utils::burn(&_k,sizeof(_k)); }
 
 
 	/**
 	/**
 	 * Set (or re-set) this AES256 cipher's key
 	 * Set (or re-set) this AES256 cipher's key
 	 */
 	 */
-	inline void init(const uint8_t key[32])
+	ZT_ALWAYS_INLINE void init(const uint8_t key[32])
 	{
 	{
 #ifdef ZT_AES_AESNI
 #ifdef ZT_AES_AESNI
-		if (likely(HW_ACCEL)) {
+		if (likely(Utils::CPUID.aes)) {
 			_init_aesni(key);
 			_init_aesni(key);
 			return;
 			return;
 		}
 		}
 #endif
 #endif
-
 		_initSW(key);
 		_initSW(key);
 	}
 	}
 
 
@@ -69,15 +60,14 @@ public:
 	 * @param in Input block
 	 * @param in Input block
 	 * @param out Output block (can be same as input)
 	 * @param out Output block (can be same as input)
 	 */
 	 */
-	inline void encrypt(const uint8_t in[16],uint8_t out[16]) const
+	ZT_ALWAYS_INLINE void encrypt(const uint8_t in[16],uint8_t out[16]) const
 	{
 	{
 #ifdef ZT_AES_AESNI
 #ifdef ZT_AES_AESNI
-		if (likely(HW_ACCEL)) {
+		if (likely(Utils::CPUID.aes)) {
 			_encrypt_aesni(in,out);
 			_encrypt_aesni(in,out);
 			return;
 			return;
 		}
 		}
 #endif
 #endif
-
 		_encryptSW(in,out);
 		_encryptSW(in,out);
 	}
 	}
 
 
@@ -89,15 +79,14 @@ public:
 	 * @param len Length of input
 	 * @param len Length of input
 	 * @param out 128-bit authorization tag from GMAC
 	 * @param out 128-bit authorization tag from GMAC
 	 */
 	 */
-	inline void gmac(const uint8_t iv[12],const void *in,const unsigned int len,uint8_t out[16]) const
+	ZT_ALWAYS_INLINE void gmac(const uint8_t iv[12],const void *in,const unsigned int len,uint8_t out[16]) const
 	{
 	{
 #ifdef ZT_AES_AESNI
 #ifdef ZT_AES_AESNI
-		if (likely(HW_ACCEL)) {
+		if (likely(Utils::CPUID.aes)) {
 			_gmac_aesni(iv,(const uint8_t *)in,len,out);
 			_gmac_aesni(iv,(const uint8_t *)in,len,out);
 			return;
 			return;
 		}
 		}
 #endif
 #endif
-
 		_gmacSW(iv,(const uint8_t *)in,len,out);
 		_gmacSW(iv,(const uint8_t *)in,len,out);
 	}
 	}
 
 
@@ -113,44 +102,15 @@ public:
 	 * @param len Length of input
 	 * @param len Length of input
 	 * @param out Output plaintext or ciphertext
 	 * @param out Output plaintext or ciphertext
 	 */
 	 */
-	inline void ctr(const uint8_t iv[16],const void *in,unsigned int len,void *out) const
+	ZT_ALWAYS_INLINE void ctr(const uint8_t iv[16],const void *in,unsigned int len,void *out) const
 	{
 	{
 #ifdef ZT_AES_AESNI
 #ifdef ZT_AES_AESNI
-		if (likely(HW_ACCEL)) {
+		if (likely(Utils::CPUID.aes)) {
 			_ctr_aesni(_k.ni.k,iv,(const uint8_t *)in,len,(uint8_t *)out);
 			_ctr_aesni(_k.ni.k,iv,(const uint8_t *)in,len,(uint8_t *)out);
 			return;
 			return;
 		}
 		}
 #endif
 #endif
-
-		uint64_t ctr[2],cenc[2];
-		memcpy(ctr,iv,16);
-		uint64_t bctr = Utils::ntoh(ctr[1]);
-
-		const uint8_t *i = (const uint8_t *)in;
-		uint8_t *o = (uint8_t *)out;
-
-		while (len >= 16) {
-			_encryptSW((const uint8_t *)ctr,(uint8_t *)cenc);
-			ctr[1] = Utils::hton(++bctr);
-#ifdef ZT_NO_TYPE_PUNNING
-			for(unsigned int k=0;k<16;++k)
-				*(o++) = *(i++) ^ ((uint8_t *)cenc)[k];
-#else
-			*((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[0];
-			o += 8;
-			i += 8;
-			*((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[1];
-			o += 8;
-			i += 8;
-#endif
-			len -= 16;
-		}
-
-		if (len) {
-			_encryptSW((const uint8_t *)ctr,(uint8_t *)cenc);
-			for(unsigned int k=0;k<len;++k)
-				*(o++) = *(i++) ^ ((uint8_t *)cenc)[k];
-		}
+		_ctrSW(iv,in,len,out);
 	}
 	}
 
 
 	/**
 	/**
@@ -326,6 +286,7 @@ private:
 
 
 	void _initSW(const uint8_t key[32]);
 	void _initSW(const uint8_t key[32]);
 	void _encryptSW(const uint8_t in[16],uint8_t out[16]) const;
 	void _encryptSW(const uint8_t in[16],uint8_t out[16]) const;
+	void _ctrSW(const uint8_t iv[16],const void *in,unsigned int len,void *out) const;
 	void _gmacSW(const uint8_t iv[12],const uint8_t *in,unsigned int len,uint8_t out[16]) const;
 	void _gmacSW(const uint8_t iv[12],const uint8_t *in,unsigned int len,uint8_t out[16]) const;
 
 
 	/**************************************************************************/
 	/**************************************************************************/
@@ -432,7 +393,7 @@ private:
 #endif /*********************************************************************/
 #endif /*********************************************************************/
 
 
 #ifdef ZT_AES_AESNI /********************************************************/
 #ifdef ZT_AES_AESNI /********************************************************/
-	static inline __m128i _init256_1_aesni(__m128i a,__m128i b)
+	static ZT_ALWAYS_INLINE __m128i _init256_1_aesni(__m128i a,__m128i b)
 	{
 	{
 		__m128i x,y;
 		__m128i x,y;
 		b = _mm_shuffle_epi32(b,0xff);
 		b = _mm_shuffle_epi32(b,0xff);
@@ -445,7 +406,7 @@ private:
 		x = _mm_xor_si128(x,b);
 		x = _mm_xor_si128(x,b);
 		return x;
 		return x;
 	}
 	}
-	static inline __m128i _init256_2_aesni(__m128i a,__m128i b)
+	static ZT_ALWAYS_INLINE __m128i _init256_2_aesni(__m128i a,__m128i b)
 	{
 	{
 		__m128i x,y,z;
 		__m128i x,y,z;
 		y = _mm_aeskeygenassist_si128(a,0x00);
 		y = _mm_aeskeygenassist_si128(a,0x00);
@@ -459,7 +420,7 @@ private:
 		x = _mm_xor_si128(x,z);
 		x = _mm_xor_si128(x,z);
 		return x;
 		return x;
 	}
 	}
-	inline void _init_aesni(const uint8_t key[32])
+	ZT_ALWAYS_INLINE void _init_aesni(const uint8_t key[32])
 	{
 	{
 		__m128i t1,t2;
 		__m128i t1,t2;
 		_k.ni.k[0] = t1 = _mm_loadu_si128((const __m128i *)key);
 		_k.ni.k[0] = t1 = _mm_loadu_si128((const __m128i *)key);
@@ -505,7 +466,7 @@ private:
 		_k.ni.hhhh = _mm_shuffle_epi8(hhhh,shuf);
 		_k.ni.hhhh = _mm_shuffle_epi8(hhhh,shuf);
 	}
 	}
 
 
-	inline void _encrypt_aesni(const void *in,void *out) const
+	ZT_ALWAYS_INLINE void _encrypt_aesni(const void *in,void *out) const
 	{
 	{
 		__m128i tmp;
 		__m128i tmp;
 		tmp = _mm_loadu_si128((const __m128i *)in);
 		tmp = _mm_loadu_si128((const __m128i *)in);
@@ -526,7 +487,7 @@ private:
 		_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(tmp,_k.ni.k[14]));
 		_mm_storeu_si128((__m128i *)out,_mm_aesenclast_si128(tmp,_k.ni.k[14]));
 	}
 	}
 
 
-	static inline __m128i _mult_block_aesni(__m128i shuf,__m128i h,__m128i y)
+	ZT_ALWAYS_INLINE inline __m128i _mult_block_aesni(__m128i shuf,__m128i h,__m128i y)
 	{
 	{
 		y = _mm_shuffle_epi8(y,shuf);
 		y = _mm_shuffle_epi8(y,shuf);
 		__m128i t1 = _mm_clmulepi64_si128(h,y,0x00);
 		__m128i t1 = _mm_clmulepi64_si128(h,y,0x00);
@@ -568,7 +529,7 @@ private:
 	}
 	}
 	static inline __m128i _ghash_aesni(__m128i shuf,__m128i h,__m128i y,__m128i x) { return _mult_block_aesni(shuf,h,_mm_xor_si128(y,x)); }
 	static inline __m128i _ghash_aesni(__m128i shuf,__m128i h,__m128i y,__m128i x) { return _mult_block_aesni(shuf,h,_mm_xor_si128(y,x)); }
 
 
-	inline void _gmac_aesni(const uint8_t iv[12],const uint8_t *in,const unsigned int len,uint8_t out[16]) const
+	ZT_ALWAYS_INLINE void _gmac_aesni(const uint8_t iv[12],const uint8_t *in,const unsigned int len,uint8_t out[16]) const
 	{
 	{
 		const __m128i *const ab = (const __m128i *)in;
 		const __m128i *const ab = (const __m128i *)in;
 		const unsigned int blocks = len / 16;
 		const unsigned int blocks = len / 16;
@@ -687,7 +648,7 @@ private:
 
 
 #define ZT_AES_CTR_AESNI_ROUND(kk) c0 = _mm_aesenc_si128(c0,kk); c1 = _mm_aesenc_si128(c1,kk); c2 = _mm_aesenc_si128(c2,kk); c3 = _mm_aesenc_si128(c3,kk);
 #define ZT_AES_CTR_AESNI_ROUND(kk) c0 = _mm_aesenc_si128(c0,kk); c1 = _mm_aesenc_si128(c1,kk); c2 = _mm_aesenc_si128(c2,kk); c3 = _mm_aesenc_si128(c3,kk);
 
 
-	static inline void _ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out)
+	static ZT_ALWAYS_INLINE void _ctr_aesni(const __m128i key[14],const uint8_t iv[16],const uint8_t *in,unsigned int len,uint8_t *out)
 	{
 	{
 		/* Because our CTR supports full 128-bit nonces, we must do a full 128-bit (big-endian)
 		/* Because our CTR supports full 128-bit nonces, we must do a full 128-bit (big-endian)
 		 * increment to be compatible with canonical NIST-certified CTR implementations. That's
 		 * increment to be compatible with canonical NIST-certified CTR implementations. That's

+ 2 - 6
node/Address.hpp

@@ -113,11 +113,6 @@ public:
 	 */
 	 */
 	ZT_ALWAYS_INLINE char *toString(char buf[11]) const { return Utils::hex10(_a,buf); }
 	ZT_ALWAYS_INLINE char *toString(char buf[11]) const { return Utils::hex10(_a,buf); }
 
 
-	/**
-	 * @return True if this address is not zero
-	 */
-	ZT_ALWAYS_INLINE operator bool() const { return (_a != 0); }
-
 	/**
 	/**
 	 * Check if this address is reserved
 	 * Check if this address is reserved
 	 *
 	 *
@@ -127,7 +122,7 @@ public:
 	 *
 	 *
 	 * @return True if address is reserved and may not be used
 	 * @return True if address is reserved and may not be used
 	 */
 	 */
-	ZT_ALWAYS_INLINE bool isReserved() const { return ((!_a)||((_a >> 32) == ZT_ADDRESS_RESERVED_PREFIX)); }
+	ZT_ALWAYS_INLINE bool isReserved() const { return ((!_a)||((_a >> 32U) == ZT_ADDRESS_RESERVED_PREFIX)); }
 
 
 	/**
 	/**
 	 * @param i Value from 0 to 4 (inclusive)
 	 * @param i Value from 0 to 4 (inclusive)
@@ -135,6 +130,7 @@ public:
 	 */
 	 */
 	ZT_ALWAYS_INLINE uint8_t operator[](unsigned int i) const { return (uint8_t)(_a >> (32 - (i * 8))); }
 	ZT_ALWAYS_INLINE uint8_t operator[](unsigned int i) const { return (uint8_t)(_a >> (32 - (i * 8))); }
 
 
+	explicit ZT_ALWAYS_INLINE operator bool() const { return (_a != 0); }
 	explicit ZT_ALWAYS_INLINE operator unsigned int() const { return (unsigned int)_a; }
 	explicit ZT_ALWAYS_INLINE operator unsigned int() const { return (unsigned int)_a; }
 	explicit ZT_ALWAYS_INLINE operator unsigned long() const { return (unsigned long)_a; }
 	explicit ZT_ALWAYS_INLINE operator unsigned long() const { return (unsigned long)_a; }
 	explicit ZT_ALWAYS_INLINE operator unsigned long long() const { return (unsigned long long)_a; }
 	explicit ZT_ALWAYS_INLINE operator unsigned long long() const { return (unsigned long long)_a; }

+ 10 - 5
node/AtomicCounter.hpp

@@ -24,25 +24,30 @@ namespace ZeroTier {
 
 
 /**
 /**
  * Simple atomic counter supporting increment and decrement
  * Simple atomic counter supporting increment and decrement
+ *
+ * This is used as the reference counter in reference counted objects that
+ * work with SharedPtr<>.
  */
  */
 class AtomicCounter
 class AtomicCounter
 {
 {
 public:
 public:
-	ZT_ALWAYS_INLINE AtomicCounter() { _v = 0; }
+	ZT_ALWAYS_INLINE AtomicCounter() : _v(0) {}
 
 
 	ZT_ALWAYS_INLINE int load() const
 	ZT_ALWAYS_INLINE int load() const
 	{
 	{
 #ifdef __GNUC__
 #ifdef __GNUC__
-		return __sync_or_and_fetch(const_cast<int *>(&_v),0);
+		return _v;
 #else
 #else
 		return _v.load();
 		return _v.load();
 #endif
 #endif
 	}
 	}
 
 
+	ZT_ALWAYS_INLINE void zero() { _v = 0; }
+
 	ZT_ALWAYS_INLINE int operator++()
 	ZT_ALWAYS_INLINE int operator++()
 	{
 	{
 #ifdef __GNUC__
 #ifdef __GNUC__
-		return __sync_add_and_fetch(&_v,1);
+		return __sync_add_and_fetch((int *)&_v,1);
 #else
 #else
 		return ++_v;
 		return ++_v;
 #endif
 #endif
@@ -51,7 +56,7 @@ public:
 	ZT_ALWAYS_INLINE int operator--()
 	ZT_ALWAYS_INLINE int operator--()
 	{
 	{
 #ifdef __GNUC__
 #ifdef __GNUC__
-		return __sync_sub_and_fetch(&_v,1);
+		return __sync_sub_and_fetch((int *)&_v,1);
 #else
 #else
 		return --_v;
 		return --_v;
 #endif
 #endif
@@ -62,7 +67,7 @@ private:
 	ZT_ALWAYS_INLINE const AtomicCounter &operator=(const AtomicCounter &) { return *this; }
 	ZT_ALWAYS_INLINE const AtomicCounter &operator=(const AtomicCounter &) { return *this; }
 
 
 #ifdef __GNUC__
 #ifdef __GNUC__
-	int _v;
+	volatile int _v;
 #else
 #else
 	std::atomic_int _v;
 	std::atomic_int _v;
 #endif
 #endif

+ 116 - 0
node/Buf.cpp

@@ -0,0 +1,116 @@
+/*
+ * Copyright (c)2019 ZeroTier, Inc.
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file in the project's root directory.
+ *
+ * Change Date: 2023-01-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2.0 of the Apache License.
+ */
+/****/
+
+#include "Buf.hpp"
+
+#ifndef __GNUC__
+#include <atomic>
+#endif
+
+namespace ZeroTier {
+
+#ifdef __GNUC__
+static uintptr_t s_pool = 0;
+#else
+static std::atomic<uintptr_t> s_pool(0);
+#endif
+
+void Buf::operator delete(void *ptr,std::size_t sz)
+{
+	if (ptr) {
+		uintptr_t bb;
+		const uintptr_t locked = ~((uintptr_t)0);
+		for (;;) {
+#ifdef __GNUC__
+			bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's
+#else
+			bb = s_pool.fetch_or(locked);
+#endif
+			if (bb != locked)
+				break;
+		}
+
+		((Buf *)ptr)->__nextInPool = bb;
+#ifdef __GNUC__
+		__sync_fetch_and_and(&s_pool,(uintptr_t)ptr);
+#else
+		s_pool.store((uintptr_t)ptr);
+#endif
+	}
+}
+
+SharedPtr<Buf> Buf::get()
+{
+	uintptr_t bb;
+	const uintptr_t locked = ~((uintptr_t)0);
+	for (;;) {
+#ifdef __GNUC__
+		bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's
+#else
+		bb = s_pool.fetch_or(locked);
+#endif
+		if (bb != locked)
+			break;
+	}
+
+	Buf *b;
+	if (bb == 0) {
+#ifdef __GNUC__
+		__sync_fetch_and_and(&s_pool,bb);
+#else
+		s_pool.store(bb);
+#endif
+		b = (Buf *)malloc(sizeof(Buf));
+		if (!b)
+			return SharedPtr<Buf>();
+	} else {
+		b = (Buf *)bb;
+#ifdef __GNUC__
+		__sync_fetch_and_and(&s_pool,b->__nextInPool);
+#else
+		s_pool.store(b->__nextInPool);
+#endif
+	}
+
+	b->__refCount.zero();
+	return SharedPtr<Buf>(b);
+}
+
+void Buf::freePool()
+{
+	uintptr_t bb;
+	const uintptr_t locked = ~((uintptr_t)0);
+	for (;;) {
+#ifdef __GNUC__
+		bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's
+#else
+		bb = s_pool.fetch_or(locked);
+#endif
+		if (bb != locked)
+			break;
+	}
+
+#ifdef __GNUC__
+	__sync_fetch_and_and(&s_pool,(uintptr_t)0);
+#else
+	s_pool.store((uintptr_t)0);
+#endif
+
+	while (bb != 0) {
+		uintptr_t next = ((Buf *)bb)->__nextInPool;
+		free((void *)bb);
+		bb = next;
+	}
+}
+
+} // namespace ZeroTier

+ 467 - 0
node/Buf.hpp

@@ -0,0 +1,467 @@
+/*
+ * Copyright (c)2019 ZeroTier, Inc.
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file in the project's root directory.
+ *
+ * Change Date: 2023-01-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2.0 of the Apache License.
+ */
+/****/
+
+#ifndef ZT_BUF_HPP
+#define ZT_BUF_HPP
+
+#include "Constants.hpp"
+#include "AtomicCounter.hpp"
+#include "Utils.hpp"
+#include "SharedPtr.hpp"
+#include "Mutex.hpp"
+
+#include <cstdint>
+#include <cstring>
+#include <cstdlib>
+
+// Buffers are 16384 bytes in size because this is the smallest size that can hold any packet
+// and is a power of two. It needs to be a power of two because masking is significantly faster
+// than integer division modulus.
+#define ZT_BUF_MEM_SIZE 0x00004000
+#define ZT_BUF_MEM_MASK 0x00003fffU
+
+namespace ZeroTier {
+
+/**
+ * Buffer and methods for branch-free bounds-checked data assembly and parsing
+ *
+ * This implements an extremely fast buffer for packet assembly and parsing that avoids
+ * branching whenever possible. To be safe it must be used correctly!
+ *
+ * The read methods are prefixed by 'r', and write methods with 'w'. All methods take
+ * an iterator, which is just an int that should be initialized to 0 (or whatever starting
+ * position is desired). All read methods will advance the iterator regardless of outcome.
+ *
+ * Read and write methods fail silently in the event of overflow. They do not corrupt or
+ * access memory outside the bounds of Buf, but will otherwise produce undefined results.
+ *
+ * IT IS THE RESPONSIBILITY OF THE USER of this class to use the readOverflow() and
+ * writeOverflow() static methods to check the iterator for overflow after each series
+ * of reads and writes and BEFORE ANY PARSING or other decisions are made on the basis
+ * of the data obtained from a buffer. Failure to do so can result in bugs due
+ * to parsing and branching on undefined or corrupt data.
+ *
+ * ^^ THIS IS VERY IMPORTANT ^^
+ *
+ * A typical packet assembly consists of repeated calls to the write methods followed by
+ * a check to writeOverflow() before final packet armoring and transport. A typical packet
+ * disassembly and parsing consists of a series of read calls to obtain the packet's
+ * fields followed by a call to readOverflow() to check that these fields are valid. The
+ * packet is discarded if readOverflow() returns true. Some packet parsers may make
+ * additional reads and in this case readOverflow() must be checked after each set of
+ * reads to ensure that overflow did not occur.
+ *
+ * Buf uses a lock-free pool for extremely fast allocation and deallocation.
+ */
+class Buf
+{
+	friend class SharedPtr<Buf>;
+
+private:
+	// Direct construction isn't allowed; use get().
+	ZT_ALWAYS_INLINE Buf() {}
+	ZT_ALWAYS_INLINE Buf(const Buf &b) {}
+
+public:
+	static void operator delete(void *ptr,std::size_t sz);
+
+	/**
+	 * Get obtains a buffer from the pool or allocates a new buffer if the pool is empty
+	 *
+	 * @return Buffer
+	 */
+	static SharedPtr<Buf> get();
+
+	/**
+	 * Free buffers in the pool
+	 *
+	 * New buffers will be created and the pool repopulated if get() is called
+	 * and outstanding buffers will still be returned to the pool. This just
+	 * frees buffers currently held in reserve.
+	 */
+	static void freePool();
+
+	/**
+	 * Check for overflow beyond the size of the buffer
+	 *
+	 * This is used to check for overflow when writing. It returns true if the iterator
+	 * has passed beyond the capacity of the buffer.
+	 *
+	 * @param ii Iterator to check
+	 * @return True if iterator has read past the size of the buffer
+	 */
+	static ZT_ALWAYS_INLINE bool writeOverflow(const int &ii) { return ((ii - ZT_BUF_MEM_SIZE) > 0); }
+
+	/**
+	 * Check for overflow beyond the size of the data that should be in the buffer
+	 *
+	 * This is used to check for overflow when reading, with the second argument being the
+	 * size of the meaningful data actually present in the buffer.
+	 *
+	 * @param ii Iterator to check
+	 * @param size Size of data that should be in buffer
+	 * @return True if iterator has read past the size of the data
+	 */
+	static ZT_ALWAYS_INLINE bool readOverflow(const int &ii,const unsigned int size) { return ((ii - (int)size) > 0); }
+
+	////////////////////////////////////////////////////////////////////////////
+	// Read methods
+	////////////////////////////////////////////////////////////////////////////
+
+	/**
+	 * Read a byte
+	 *
+	 * @param ii Iterator
+	 * @return Byte (undefined on overflow)
+	 */
+	ZT_ALWAYS_INLINE uint8_t rI8(int &ii) const
+	{
+		const unsigned int s = (unsigned int)ii++;
+		return data[s & ZT_BUF_MEM_MASK];
+	}
+
+	/**
+	 * Read a 16-bit integer
+	 *
+	 * @param ii Integer
+	 * @return Integer (undefined on overflow)
+	 */
+	ZT_ALWAYS_INLINE uint16_t rI16(int &ii) const
+	{
+		const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK;
+		ii += 2;
+#ifdef ZT_NO_TYPE_PUNNING
+		return (
+			((uint16_t)data[s] << 8U) |
+			(uint16_t)data[s + 1]);
+#else
+		return Utils::ntoh(*reinterpret_cast<const uint16_t *>(data + s));
+#endif
+	}
+
+	/**
+	 * Read a 32-bit integer
+	 *
+	 * @param ii Integer
+	 * @return Integer (undefined on overflow)
+	 */
+	ZT_ALWAYS_INLINE uint32_t rI32(int &ii) const
+	{
+		const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK;
+		ii += 4;
+#ifdef ZT_NO_TYPE_PUNNING
+		return (
+			((uint32_t)data[s] << 24U) |
+			((uint32_t)data[s + 1] << 16U) |
+			((uint32_t)data[s + 2] << 8U) |
+			(uint32_t)data[s + 3]);
+#else
+		return Utils::ntoh(*reinterpret_cast<const uint32_t *>(data + s));
+#endif
+	}
+
+	/**
+	 * Read a 64-bit integer
+	 *
+	 * @param ii Integer
+	 * @return Integer (undefined on overflow)
+	 */
+	ZT_ALWAYS_INLINE uint64_t rI64(int &ii) const
+	{
+		const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK;
+		ii += 8;
+#ifdef ZT_NO_TYPE_PUNNING
+		return (
+			((uint64_t)data[s] << 56U) |
+			((uint64_t)data[s + 1] << 48U) |
+			((uint64_t)data[s + 2] << 40U) |
+			((uint64_t)data[s + 3] << 32U) |
+			((uint64_t)data[s + 4] << 24U) |
+			((uint64_t)data[s + 5] << 16U) |
+			((uint64_t)data[s + 6] << 8U) |
+			(uint64_t)data[s + 7]);
+#else
+		return Utils::ntoh(*reinterpret_cast<const uint64_t *>(data + s));
+#endif
+	}
+
+	/**
+	 * Read an object supporting the marshal/unmarshal interface
+	 *
+	 * If the return value is negative the object's state is undefined. A return value of
+	 * zero typically also indicates a problem, though this may depend on the object type.
+	 *
+	 * Since objects may be invalid even if there is no overflow, it's important to check
+	 * the return value of this function in all cases and discard invalid packets as it
+	 * indicates.
+	 *
+	 * @tparam T Object type
+	 * @param ii Iterator
+	 * @param obj Object to read
+	 * @return Bytes read or a negative value on unmarshal error (passed from object) or overflow
+	 */
+	template<typename T>
+	ZT_ALWAYS_INLINE int rO(int &ii,T &obj) const
+	{
+		if (ii < ZT_BUF_MEM_SIZE) {
+			int ms = obj.unmarshal(data + ii, ZT_BUF_MEM_SIZE - ii);
+			if (ms > 0)
+				ii += ms;
+			return ms;
+		}
+		return -1;
+	}
+
+	/**
+	 * Read a C-style string from the buffer, making a copy and advancing the iterator
+	 *
+	 * Use this if the buffer's memory may get changed between reading and processing
+	 * what is read.
+	 *
+	 * @param ii Iterator
+	 * @param buf Buffer to receive string
+	 * @param bufSize Capacity of buffer in bytes
+	 * @return Pointer to buf or NULL on overflow or error
+	 */
+	ZT_ALWAYS_INLINE char *rS(int &ii,char *const buf,const unsigned int bufSize) const
+	{
+		const char *const s = (const char *)(data + ii);
+		const int sii = ii;
+		while (ii < ZT_BUF_MEM_SIZE) {
+			if (data[ii++] == 0) {
+				memcpy(buf,s,ii - sii);
+				return buf;
+			}
+		}
+		return nullptr;
+	}
+
+	/**
+	 * Obtain a pointer to a C-style string in the buffer without copying and advance the iterator
+	 *
+	 * The iterator is advanced even if this fails and returns NULL so that readOverflow()
+	 * will indicate that an overflow occurred. As with other reads the string's contents are
+	 * undefined if readOverflow() returns true.
+	 *
+	 * This version avoids a copy and so is faster if the buffer won't be modified between
+	 * reading and processing.
+	 *
+	 * @param ii Iterator
+	 * @return Pointer to null-terminated C-style string or NULL on overflow or error
+	 */
+	ZT_ALWAYS_INLINE const char *rSnc(int &ii) const
+	{
+		const char *const s = (const char *)(data + ii);
+		while (ii < ZT_BUF_MEM_SIZE) {
+			if (data[ii++] == 0)
+				return s;
+		}
+		return nullptr;
+	}
+
+	/**
+	 * Read a byte array from the buffer, making a copy and advancing the iterator
+	 *
+	 * Use this if the buffer's memory may get changed between reading and processing
+	 * what is read.
+	 *
+	 * @param ii Iterator
+	 * @param bytes Buffer to contain data to read
+	 * @param len Length of buffer
+	 * @return Pointer to data or NULL on overflow or error
+	 */
+	ZT_ALWAYS_INLINE void *rB(int &ii,void *bytes,unsigned int len) const
+	{
+		const void *const b = (const void *)(data + ii);
+		if ((ii += (int)len) <= ZT_BUF_MEM_SIZE) {
+			memcpy(bytes, b, len);
+			return bytes;
+		}
+		return nullptr;
+	}
+
+	/**
+	 * Obtain a pointer to a field in the buffer without copying and advance the iterator
+	 *
+	 * The iterator is advanced even if this fails and returns NULL so that readOverflow()
+	 * will indicate that an overflow occurred.
+	 *
+	 * This version avoids a copy and so is faster if the buffer won't be modified between
+	 * reading and processing.
+	 *
+	 * @param ii Iterator
+	 * @param len Length of data field to obtain a pointer to
+	 * @return Pointer to field or NULL on overflow
+	 */
+	ZT_ALWAYS_INLINE const void *rBnc(int &ii,unsigned int len) const
+	{
+		const void *const b = (const void *)(data + ii);
+		return ((ii += (int)len) <= ZT_BUF_MEM_SIZE) ? b : nullptr;
+	}
+
+	////////////////////////////////////////////////////////////////////////////
+	// Write methods
+	////////////////////////////////////////////////////////////////////////////
+
+	/**
+	 * Write a byte
+	 *
+	 * @param ii Iterator
+	 * @param n Byte
+	 */
+	ZT_ALWAYS_INLINE void wI(int &ii,uint8_t n)
+	{
+		const unsigned int s = (unsigned int)ii++;
+		data[s & ZT_BUF_MEM_MASK] = n;
+	}
+
+	/**
+	 * Write a 16-bit integer in big-endian byte order
+	 *
+	 * @param ii Iterator
+	 * @param n Integer
+	 */
+	ZT_ALWAYS_INLINE void wI(int &ii,uint16_t n)
+	{
+		const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK;
+		ii += 2;
+#ifdef ZT_NO_TYPE_PUNNING
+		data[s] = (uint8_t)(n >> 8U);
+		data[s + 1] = (uint8_t)n;
+#else
+		*reinterpret_cast<uint16_t *>(data + s) = Utils::hton(n);
+#endif
+	}
+
+	/**
+	 * Write a 32-bit integer in big-endian byte order
+	 *
+	 * @param ii Iterator
+	 * @param n Integer
+	 */
+	ZT_ALWAYS_INLINE void wI(int &ii,uint32_t n)
+	{
+		const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK;
+		ii += 4;
+#ifdef ZT_NO_TYPE_PUNNING
+		data[s] = (uint8_t)(n >> 24U);
+		data[s + 1] = (uint8_t)(n >> 16U);
+		data[s + 2] = (uint8_t)(n >> 8U);
+		data[s + 3] = (uint8_t)n;
+#else
+		*reinterpret_cast<uint32_t *>(data + s) = Utils::hton(n);
+#endif
+	}
+
+	/**
+	 * Write a 64-bit integer in big-endian byte order
+	 *
+	 * @param ii Iterator
+	 * @param n Integer
+	 */
+	ZT_ALWAYS_INLINE void wI(int &ii,uint64_t n)
+	{
+		const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK;
+		ii += 8;
+#ifdef ZT_NO_TYPE_PUNNING
+		data[s] = (uint8_t)(n >> 56U);
+		data[s + 1] = (uint8_t)(n >> 48U);
+		data[s + 2] = (uint8_t)(n >> 40U);
+		data[s + 3] = (uint8_t)(n >> 32U);
+		data[s + 4] = (uint8_t)(n >> 24U);
+		data[s + 5] = (uint8_t)(n >> 16U);
+		data[s + 6] = (uint8_t)(n >> 8U);
+		data[s + 7] = (uint8_t)n;
+#else
+		*reinterpret_cast<uint64_t *>(data + s) = Utils::hton(n);
+#endif
+	}
+
+	/**
+	 * Write an object implementing the marshal interface
+	 *
+	 * @tparam T Object type
+	 * @param ii Iterator
+	 * @param t Object to write
+	 */
+	template<typename T>
+	ZT_ALWAYS_INLINE void wO(int &ii,T &t)
+	{
+		const unsigned int s = (unsigned int)ii;
+		if ((s + T::marshalSizeMax()) <= ZT_BUF_MEM_SIZE) {
+			int ms = t.marshal(data + s);
+			if (ms > 0)
+				ii += ms;
+		} else {
+			ii += T::marshalSizeMax(); // mark as overflowed even if we didn't do anything
+		}
+	}
+
+	/**
+	 * Write a C-style null-terminated string (including the trailing zero)
+	 *
+	 * @param ii Iterator
+	 * @param s String to write (writes an empty string if this is NULL)
+	 */
+	ZT_ALWAYS_INLINE void wS(int &ii,const char *s)
+	{
+		if (s) {
+			char c;
+			do {
+				c = *(s++);
+				wI(ii,(uint8_t)c);
+			} while (c);
+		} else {
+			wI(ii,(uint8_t)0);
+		}
+	}
+
+	/**
+	 * Write a byte array
+	 *
+	 * @param ii Iterator
+	 * @param bytes Bytes to write
+	 * @param len Size of data in bytes
+	 */
+	ZT_ALWAYS_INLINE void wB(int &ii,const void *const bytes,const unsigned int len)
+	{
+		unsigned int s = (unsigned int)ii;
+		if ((ii += (int)len) <= ZT_BUF_MEM_SIZE)
+			memcpy(data + s, bytes, len);
+	}
+
+	////////////////////////////////////////////////////////////////////////////
+
+	ZT_ALWAYS_INLINE Buf &operator=(const Buf &b)
+	{
+		if (&b != this)
+			memcpy(data,b.data,ZT_BUF_MEM_SIZE);
+		return *this;
+	}
+
+	/**
+	 * Raw buffer
+	 *
+	 * The extra eight bytes permit silent overflow of integer types without reading or writing
+	 * beyond Buf's memory and without branching or extra masks. They can be ignored otherwise.
+	 */
+	uint8_t data[ZT_BUF_MEM_SIZE + 8];
+
+private:
+	volatile uintptr_t __nextInPool;
+	AtomicCounter __refCount;
+};
+
+} // namespace ZeroTier
+
+#endif

+ 2 - 0
node/CMakeLists.txt

@@ -8,6 +8,7 @@ endif(WIN32)
 set(core_headers
 set(core_headers
 	Address.hpp
 	Address.hpp
 	AtomicCounter.hpp
 	AtomicCounter.hpp
+	Buf.hpp
 	Buffer.hpp
 	Buffer.hpp
 	C25519.hpp
 	C25519.hpp
 	Capability.hpp
 	Capability.hpp
@@ -50,6 +51,7 @@ set(core_headers
 
 
 set(core_src
 set(core_src
 	AES.cpp
 	AES.cpp
+	Buf.cpp
 	C25519.cpp
 	C25519.cpp
 	Credential.cpp
 	Credential.cpp
 	ECC384.cpp
 	ECC384.cpp

+ 13 - 11
node/Utils.cpp

@@ -32,31 +32,33 @@
 #include "AES.hpp"
 #include "AES.hpp"
 #include "SHA512.hpp"
 #include "SHA512.hpp"
 
 
+namespace ZeroTier {
+
+namespace Utils {
+
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
-#include <immintrin.h>
-static bool _zt_rdrand_supported()
+CPUIDRegisters::CPUIDRegisters()
 {
 {
 #ifdef __WINDOWS__
 #ifdef __WINDOWS__
 	int regs[4];
 	int regs[4];
 	__cpuid(regs,1);
 	__cpuid(regs,1);
-	return (((regs[2] >> 30) & 1) != 0);
+	eax = (uint32_t)regs[0];
+	ebx = (uint32_t)regs[1];
+	ecx = (uint32_t)regs[2];
+	edx = (uint32_t)regs[3];
 #else
 #else
-	uint32_t eax,ebx,ecx,edx;
 	__asm__ __volatile__ (
 	__asm__ __volatile__ (
 		"cpuid"
 		"cpuid"
 		: "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx)
 		: "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx)
 		: "a"(1),"c"(0)
 		: "a"(1),"c"(0)
 	);
 	);
-	return ((ecx & (1 << 30)) != 0);
 #endif
 #endif
+	rdrand = ((ecx & (1U << 30U)) != 0);
+	aes = ( ((ecx & (1U << 25U)) != 0) && ((ecx & (1U << 19U)) != 0) && ((ecx & (1U << 1U)) != 0) ); // AES, PCLMUL, SSE4.1
 }
 }
-static const bool _rdrandSupported = _zt_rdrand_supported();
+CPUIDRegisters CPUID;
 #endif
 #endif
 
 
-namespace ZeroTier {
-
-namespace Utils {
-
 const char HEXCHARS[16] = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' };
 const char HEXCHARS[16] = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' };
 
 
 bool secureEq(const void *a,const void *b,unsigned int len)
 bool secureEq(const void *a,const void *b,unsigned int len)
@@ -213,7 +215,7 @@ void getSecureRandom(void *buf,unsigned int bytes)
 				randomState[0] ^= (uint64_t)time(nullptr);
 				randomState[0] ^= (uint64_t)time(nullptr);
 				randomState[1] ^= (uint64_t)((uintptr_t)buf);
 				randomState[1] ^= (uint64_t)((uintptr_t)buf);
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
 #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
-				if (_rdrandSupported) {
+				if (CPUID.rdrand) {
 					uint64_t tmp = 0;
 					uint64_t tmp = 0;
 					_rdrand64_step((unsigned long long *)&tmp);
 					_rdrand64_step((unsigned long long *)&tmp);
 					randomState[2] ^= tmp;
 					randomState[2] ^= tmp;

+ 17 - 0
node/Utils.hpp

@@ -20,6 +20,12 @@
 #include <cstring>
 #include <cstring>
 #include <ctime>
 #include <ctime>
 
 
+#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#include <immintrin.h>
+#endif
+
 #include <string>
 #include <string>
 #include <stdexcept>
 #include <stdexcept>
 #include <vector>
 #include <vector>
@@ -31,6 +37,17 @@ namespace ZeroTier {
 
 
 namespace Utils {
 namespace Utils {
 
 
+#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))
+struct CPUIDRegisters
+{
+	uint32_t eax,ebx,ecx,edx;
+	bool rdrand;
+	bool aes;
+	CPUIDRegisters();
+};
+extern CPUIDRegisters CPUID;
+#endif
+
 /**
 /**
  * Hexadecimal characters 0-f
  * Hexadecimal characters 0-f
  */
  */

Some files were not shown because too many files changed in this diff