|
@@ -11,17 +11,40 @@
|
|
|
#define XOR(v,w) ((v) ^ (w))
|
|
|
#define PLUS(v,w) ((uint32_t)((v) + (w)))
|
|
|
|
|
|
+#ifndef ZT_SALSA20_SSE
|
|
|
+
|
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
|
+
|
|
|
+/* We have a slower version of these macros for CPU/compiler combos that
|
|
|
+ * do not allow unaligned access to a uint32_t. Another solution would be
|
|
|
+ * to methodically require alignment across the code, but this is quicker
|
|
|
+ * for now. The culprit appears to be some Android-based ARM devices. */
|
|
|
+#if 1
|
|
|
+#define U8TO32_LITTLE(p) ( ((uint32_t)(p)[0]) | ((uint32_t)(p)[1] << 8) | ((uint32_t)(p)[2] << 16) | ((uint32_t)(p)[3] << 24) )
|
|
|
+static inline void U32TO8_LITTLE(uint8_t *const c,const uint32_t v)
|
|
|
+{
|
|
|
+ c[0] = (uint8_t)v;
|
|
|
+ c[1] = (uint8_t)(v >> 8);
|
|
|
+ c[2] = (uint8_t)(v >> 16);
|
|
|
+ c[3] = (uint8_t)(v >> 24);
|
|
|
+}
|
|
|
+#else
|
|
|
#define U8TO32_LITTLE(p) (*((const uint32_t *)((const void *)(p))))
|
|
|
#define U32TO8_LITTLE(c,v) *((uint32_t *)((void *)(c))) = (v)
|
|
|
-#else
|
|
|
+#endif
|
|
|
+
|
|
|
+#else // big endian
|
|
|
+
|
|
|
#ifdef __GNUC__
|
|
|
#define U8TO32_LITTLE(p) __builtin_bswap32(*((const uint32_t *)((const void *)(p))))
|
|
|
#define U32TO8_LITTLE(c,v) *((uint32_t *)((void *)(c))) = __builtin_bswap32((v))
|
|
|
-#else
|
|
|
+#else // no bswap stuff... need to do it manually?
|
|
|
error need be;
|
|
|
-#endif
|
|
|
-#endif
|
|
|
+#endif // __GNUC__ or not
|
|
|
+
|
|
|
+#endif // little/big endian
|
|
|
+
|
|
|
+#endif // !ZT_SALSA20_SSE
|
|
|
|
|
|
#ifdef ZT_SALSA20_SSE
|
|
|
class _s20sseconsts
|