Browse Source

Some code cleanup and make sure any type punning is guarded with ZT_NO_TYPE_PUNNING.

Adam Ierymenko 8 years ago
parent
commit
a8ced184dc
4 changed files with 76 additions and 63 deletions
  1. 21 39
      node/Packet.cpp
  2. 18 22
      node/Salsa20.cpp
  3. 37 0
      node/Salsa20.hpp
  4. 0 2
      selftest.cpp

+ 21 - 39
node/Packet.cpp

@@ -1109,38 +1109,26 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
 	setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
 
 	_salsa20MangleKey((const unsigned char *)key,mangledKey);
-
 	if (ZT_HAS_FAST_CRYPTO()) {
-		const unsigned int payloadLen = (encryptPayload) ? (size() - ZT_PACKET_IDX_VERB) : 0;
+		const unsigned int encryptLen = (encryptPayload) ? (size() - ZT_PACKET_IDX_VERB) : 0;
 		uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8];
-		ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,payloadLen + 64,(data + ZT_PACKET_IDX_IV),mangledKey);
-
-		uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block
-		uint8_t *dptr = data + ZT_PACKET_IDX_VERB;
-		unsigned int ksrem = payloadLen;
-		while (ksrem >= 8) {
-			ksrem -= 8;
-			*(reinterpret_cast<uint64_t *>(dptr)) ^= *(ksptr++);
-			dptr += 8;
-		}
-		for(unsigned int i=0;i<ksrem;++i) {
-			dptr[i] ^= reinterpret_cast<const uint8_t *>(ksptr)[i];
-		}
-
+		ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,encryptLen + 64,(data + ZT_PACKET_IDX_IV),mangledKey);
+		Salsa20::memxor(data + ZT_PACKET_IDX_VERB,reinterpret_cast<const uint8_t *>(keyStream + 8),encryptLen);
 		uint64_t mac[2];
 		Poly1305::compute(mac,data + ZT_PACKET_IDX_VERB,size() - ZT_PACKET_IDX_VERB,keyStream);
+#ifdef ZT_NO_TYPE_PUNNING
 		memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
+#else
+		(*reinterpret_cast<uint64_t *>(data + ZT_PACKET_IDX_MAC)) = mac[0];
+#endif
 	} else {
 		Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
-
 		uint64_t macKey[4];
 		s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
-
 		uint8_t *const payload = data + ZT_PACKET_IDX_VERB;
 		const unsigned int payloadLen = size() - ZT_PACKET_IDX_VERB;
 		if (encryptPayload)
 			s20.crypt12(payload,payload,payloadLen);
-
 		uint64_t mac[2];
 		Poly1305::compute(mac,payload,payloadLen,macKey);
 		memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
@@ -1157,39 +1145,33 @@ bool Packet::dearmor(const void *key)
 
 	if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
 		_salsa20MangleKey((const unsigned char *)key,mangledKey);
-
 		if (ZT_HAS_FAST_CRYPTO()) {
 			uint64_t keyStream[(ZT_PROTO_MAX_PACKET_LENGTH + 64 + 8) / 8];
 			ZT_FAST_SINGLE_PASS_SALSA2012(keyStream,((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) ? (payloadLen + 64) : 64),(data + ZT_PACKET_IDX_IV),mangledKey);
-
 			uint64_t mac[2];
 			Poly1305::compute(mac,payload,payloadLen,keyStream);
+#ifdef ZT_NO_TYPE_PUNNING
 			if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8))
-				return false; // MAC failed, packet is corrupt, modified, or is not from the sender
-
-			if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012) {
-				uint64_t *ksptr = keyStream + 8; // encryption starts after first Salsa20 block
-				uint8_t *dptr = data + ZT_PACKET_IDX_VERB;
-				unsigned int ksrem = payloadLen;
-				while (ksrem >= 8) {
-					ksrem -= 8;
-					*(reinterpret_cast<uint64_t *>(dptr)) ^= *(ksptr++);
-					dptr += 8;
-				}
-				for(unsigned int i=0;i<ksrem;++i) {
-					dptr[i] ^= reinterpret_cast<const uint8_t *>(ksptr)[i];
-				}
-			}
+				return false;
+#else
+			if ((*reinterpret_cast<const uint64_t *>(data + ZT_PACKET_IDX_MAC)) != mac[0]) // also secure, constant time
+				return false;
+#endif
+			if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)
+				Salsa20::memxor(data + ZT_PACKET_IDX_VERB,reinterpret_cast<const uint8_t *>(keyStream + 8),payloadLen);
 		} else {
 			Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
-
 			uint64_t macKey[4];
 			s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
 			uint64_t mac[2];
 			Poly1305::compute(mac,payload,payloadLen,macKey);
+#ifdef ZT_NO_TYPE_PUNNING
 			if (!Utils::secureEq(mac,data + ZT_PACKET_IDX_MAC,8))
-				return false; // MAC failed, packet is corrupt, modified, or is not from the sender
-
+				return false;
+#else
+			if ((*reinterpret_cast<const uint64_t *>(data + ZT_PACKET_IDX_MAC)) != mac[0]) // also secure, constant time
+				return false;
+#endif
 			if (cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)
 				s20.crypt12(payload,payload,payloadLen);
 		}

+ 18 - 22
node/Salsa20.cpp

@@ -69,46 +69,42 @@ namespace ZeroTier {
 void Salsa20::init(const void *key,const void *iv)
 {
 #ifdef ZT_SALSA20_SSE
-	const uint32_t *k = (const uint32_t *)key;
-
+	const uint32_t *const k = (const uint32_t *)key;
 	_state.i[0] = 0x61707865;
-	_state.i[3] = 0x6b206574;
-	_state.i[13] = k[0];
-	_state.i[10] = k[1];
-	_state.i[7] = k[2];
-	_state.i[4] = k[3];
-	k += 4;
 	_state.i[1] = 0x3320646e;
 	_state.i[2] = 0x79622d32;
-	_state.i[15] = k[0];
-	_state.i[12] = k[1];
-	_state.i[9] = k[2];
-	_state.i[6] = k[3];
-	_state.i[14] = ((const uint32_t *)iv)[0];
-	_state.i[11] = ((const uint32_t *)iv)[1];
+	_state.i[3] = 0x6b206574;
+	_state.i[4] = k[3];
 	_state.i[5] = 0;
+	_state.i[6] = k[7];
+	_state.i[7] = k[2];
 	_state.i[8] = 0;
+	_state.i[9] = k[6];
+	_state.i[10] = k[1];
+	_state.i[11] = ((const uint32_t *)iv)[1];
+	_state.i[12] = k[5];
+	_state.i[13] = k[0];
+	_state.i[14] = ((const uint32_t *)iv)[0];
+	_state.i[15] = k[4];
 #else
 	const char *const constants = "expand 32-byte k";
-	const uint8_t *k = (const uint8_t *)key;
-
+	const uint8_t *const k = (const uint8_t *)key;
+	_state.i[0] = U8TO32_LITTLE(constants + 0);
 	_state.i[1] = U8TO32_LITTLE(k + 0);
 	_state.i[2] = U8TO32_LITTLE(k + 4);
 	_state.i[3] = U8TO32_LITTLE(k + 8);
 	_state.i[4] = U8TO32_LITTLE(k + 12);
-	k += 16;
 	_state.i[5] = U8TO32_LITTLE(constants + 4);
 	_state.i[6] = U8TO32_LITTLE(((const uint8_t *)iv) + 0);
 	_state.i[7] = U8TO32_LITTLE(((const uint8_t *)iv) + 4);
 	_state.i[8] = 0;
 	_state.i[9] = 0;
 	_state.i[10] = U8TO32_LITTLE(constants + 8);
-	_state.i[11] = U8TO32_LITTLE(k + 0);
-	_state.i[12] = U8TO32_LITTLE(k + 4);
-	_state.i[13] = U8TO32_LITTLE(k + 8);
-	_state.i[14] = U8TO32_LITTLE(k + 12);
+	_state.i[11] = U8TO32_LITTLE(k + 16);
+	_state.i[12] = U8TO32_LITTLE(k + 20);
+	_state.i[13] = U8TO32_LITTLE(k + 24);
+	_state.i[14] = U8TO32_LITTLE(k + 28);
 	_state.i[15] = U8TO32_LITTLE(constants + 12);
-	_state.i[0] = U8TO32_LITTLE(constants + 0);
 #endif
 }
 

+ 37 - 0
node/Salsa20.hpp

@@ -34,6 +34,43 @@ public:
 	Salsa20() {}
 	~Salsa20() { Utils::burn(&_state,sizeof(_state)); }
 
+	/**
+	 * XOR d with s
+	 *
+	 * This is done efficiently using e.g. SSE if available. It's used when
+	 * alternative Salsa20 implementations are used in Packet and is here
+	 * since this is where all the SSE stuff is already included.
+	 *
+	 * @param d Destination to XOR
+	 * @param s Source bytes to XOR with destination
+	 * @param len Length of s and d
+	 */
+	static inline void memxor(uint8_t *d,const uint8_t *s,unsigned int len)
+	{
+#ifdef ZT_SALSA20_SSE
+		while (len >= 16) {
+			_mm_storeu_si128(reinterpret_cast<__m128i *>(d),_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<__m128i *>(d)),_mm_loadu_si128(reinterpret_cast<const __m128i *>(s))));
+			s += 16;
+			d += 16;
+			len -= 16;
+		}
+#else
+#ifndef ZT_NO_TYPE_PUNNING
+		while (len >= 16) {
+			(*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
+			s += 8;
+			d += 8;
+			(*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
+			s += 8;
+			d += 8;
+			len -= 16;
+		}
+#endif
+#endif
+		while (len--)
+			*(d++) ^= *(s++);
+	}
+
 	/**
 	 * @param key 256-bit (32 byte) key
 	 * @param iv 64-bit initialization vector

+ 0 - 2
selftest.cpp

@@ -141,8 +141,6 @@ static const C25519TestVector C25519_TEST_VECTORS[ZT_NUM_C25519_TEST_VECTORS] =
 
 //////////////////////////////////////////////////////////////////////////////
 
-static unsigned char fuzzbuf[1048576];
-
 static int testCrypto()
 {
 	unsigned char buf1[16384];