Poly1305.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. /*
  2. 20080912
  3. D. J. Bernstein
  4. Public domain.
  5. */
  6. #include "Constants.hpp"
  7. #include "Poly1305.hpp"
  8. #include <stdio.h>
  9. #include <stdint.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #ifdef __WINDOWS__
  13. #pragma warning(disable: 4146)
  14. #endif
  15. namespace ZeroTier {
  16. #if 0
  17. static inline void add(unsigned int h[17],const unsigned int c[17])
  18. {
  19. unsigned int j;
  20. unsigned int u;
  21. u = 0;
  22. for (j = 0;j < 17;++j) { u += h[j] + c[j]; h[j] = u & 255; u >>= 8; }
  23. }
  24. static inline void squeeze(unsigned int h[17])
  25. {
  26. unsigned int j;
  27. unsigned int u;
  28. u = 0;
  29. for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
  30. u += h[16]; h[16] = u & 3;
  31. u = 5 * (u >> 2);
  32. for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
  33. u += h[16]; h[16] = u;
  34. }
  35. static const unsigned int minusp[17] = {
  36. 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252
  37. } ;
  38. static inline void freeze(unsigned int h[17])
  39. {
  40. unsigned int horig[17];
  41. unsigned int j;
  42. unsigned int negative;
  43. for (j = 0;j < 17;++j) horig[j] = h[j];
  44. add(h,minusp);
  45. negative = -(h[16] >> 7);
  46. for (j = 0;j < 17;++j) h[j] ^= negative & (horig[j] ^ h[j]);
  47. }
  48. static inline void mulmod(unsigned int h[17],const unsigned int r[17])
  49. {
  50. unsigned int hr[17];
  51. unsigned int i;
  52. unsigned int j;
  53. unsigned int u;
  54. for (i = 0;i < 17;++i) {
  55. u = 0;
  56. for (j = 0;j <= i;++j) u += h[j] * r[i - j];
  57. for (j = i + 1;j < 17;++j) u += 320 * h[j] * r[i + 17 - j];
  58. hr[i] = u;
  59. }
  60. for (i = 0;i < 17;++i) h[i] = hr[i];
  61. squeeze(h);
  62. }
  63. static inline int crypto_onetimeauth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
  64. {
  65. unsigned int j;
  66. unsigned int r[17];
  67. unsigned int h[17];
  68. unsigned int c[17];
  69. r[0] = k[0];
  70. r[1] = k[1];
  71. r[2] = k[2];
  72. r[3] = k[3] & 15;
  73. r[4] = k[4] & 252;
  74. r[5] = k[5];
  75. r[6] = k[6];
  76. r[7] = k[7] & 15;
  77. r[8] = k[8] & 252;
  78. r[9] = k[9];
  79. r[10] = k[10];
  80. r[11] = k[11] & 15;
  81. r[12] = k[12] & 252;
  82. r[13] = k[13];
  83. r[14] = k[14];
  84. r[15] = k[15] & 15;
  85. r[16] = 0;
  86. for (j = 0;j < 17;++j) h[j] = 0;
  87. while (inlen > 0) {
  88. for (j = 0;j < 17;++j) c[j] = 0;
  89. for (j = 0;(j < 16) && (j < inlen);++j) c[j] = in[j];
  90. c[j] = 1;
  91. in += j; inlen -= j;
  92. add(h,c);
  93. mulmod(h,r);
  94. }
  95. freeze(h);
  96. for (j = 0;j < 16;++j) c[j] = k[j + 16];
  97. c[16] = 0;
  98. add(h,c);
  99. for (j = 0;j < 16;++j) out[j] = h[j];
  100. return 0;
  101. }
  102. void Poly1305::compute(void *auth,const void *data,unsigned int len,const void *key)
  103. throw()
  104. {
  105. crypto_onetimeauth((unsigned char *)auth,(const unsigned char *)data,len,(const unsigned char *)key);
  106. }
  107. #endif
  108. namespace {
  109. typedef struct poly1305_context {
  110. size_t aligner;
  111. unsigned char opaque[136];
  112. } poly1305_context;
  113. #if defined(_MSC_VER) || defined(__GNUC__)
  114. //////////////////////////////////////////////////////////////////////////////
  115. // 128-bit implementation for MSC and GCC
  116. #if defined(_MSC_VER)
  117. #include <intrin.h>
  118. typedef struct uint128_t {
  119. unsigned long long lo;
  120. unsigned long long hi;
  121. } uint128_t;
  122. #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
  123. #define ADD(out, in) { unsigned long long t = out.lo; out.lo += in.lo; out.hi += (out.lo < t) + in.hi; }
  124. #define ADDLO(out, in) { unsigned long long t = out.lo; out.lo += in; out.hi += (out.lo < t); }
  125. #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
  126. #define LO(in) (in.lo)
  127. // #define POLY1305_NOINLINE __declspec(noinline)
  128. #elif defined(__GNUC__)
  129. #if defined(__SIZEOF_INT128__)
  130. typedef unsigned __int128 uint128_t;
  131. #else
  132. typedef unsigned uint128_t __attribute__((mode(TI)));
  133. #endif
  134. #define MUL(out, x, y) out = ((uint128_t)x * y)
  135. #define ADD(out, in) out += in
  136. #define ADDLO(out, in) out += in
  137. #define SHR(in, shift) (unsigned long long)(in >> (shift))
  138. #define LO(in) (unsigned long long)(in)
  139. // #define POLY1305_NOINLINE __attribute__((noinline))
  140. #endif
  141. #define poly1305_block_size 16
  142. /* 17 + sizeof(size_t) + 8*sizeof(unsigned long long) */
  143. typedef struct poly1305_state_internal_t {
  144. unsigned long long r[3];
  145. unsigned long long h[3];
  146. unsigned long long pad[2];
  147. size_t leftover;
  148. unsigned char buffer[poly1305_block_size];
  149. unsigned char final;
  150. } poly1305_state_internal_t;
  151. /* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */
  152. static inline unsigned long long
  153. U8TO64(const unsigned char *p) {
  154. return
  155. (((unsigned long long)(p[0] & 0xff) ) |
  156. ((unsigned long long)(p[1] & 0xff) << 8) |
  157. ((unsigned long long)(p[2] & 0xff) << 16) |
  158. ((unsigned long long)(p[3] & 0xff) << 24) |
  159. ((unsigned long long)(p[4] & 0xff) << 32) |
  160. ((unsigned long long)(p[5] & 0xff) << 40) |
  161. ((unsigned long long)(p[6] & 0xff) << 48) |
  162. ((unsigned long long)(p[7] & 0xff) << 56));
  163. }
  164. /* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */
  165. static inline void
  166. U64TO8(unsigned char *p, unsigned long long v) {
  167. p[0] = (v ) & 0xff;
  168. p[1] = (v >> 8) & 0xff;
  169. p[2] = (v >> 16) & 0xff;
  170. p[3] = (v >> 24) & 0xff;
  171. p[4] = (v >> 32) & 0xff;
  172. p[5] = (v >> 40) & 0xff;
  173. p[6] = (v >> 48) & 0xff;
  174. p[7] = (v >> 56) & 0xff;
  175. }
  176. static inline void
  177. poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
  178. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  179. unsigned long long t0,t1;
  180. /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
  181. t0 = U8TO64(&key[0]);
  182. t1 = U8TO64(&key[8]);
  183. st->r[0] = ( t0 ) & 0xffc0fffffff;
  184. st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
  185. st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
  186. /* h = 0 */
  187. st->h[0] = 0;
  188. st->h[1] = 0;
  189. st->h[2] = 0;
  190. /* save pad for later */
  191. st->pad[0] = U8TO64(&key[16]);
  192. st->pad[1] = U8TO64(&key[24]);
  193. st->leftover = 0;
  194. st->final = 0;
  195. }
  196. static inline void
  197. poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) {
  198. const unsigned long long hibit = (st->final) ? 0 : ((unsigned long long)1 << 40); /* 1 << 128 */
  199. unsigned long long r0,r1,r2;
  200. unsigned long long s1,s2;
  201. unsigned long long h0,h1,h2;
  202. unsigned long long c;
  203. uint128_t d0,d1,d2,d;
  204. r0 = st->r[0];
  205. r1 = st->r[1];
  206. r2 = st->r[2];
  207. h0 = st->h[0];
  208. h1 = st->h[1];
  209. h2 = st->h[2];
  210. s1 = r1 * (5 << 2);
  211. s2 = r2 * (5 << 2);
  212. while (bytes >= poly1305_block_size) {
  213. unsigned long long t0,t1;
  214. /* h += m[i] */
  215. t0 = U8TO64(&m[0]);
  216. t1 = U8TO64(&m[8]);
  217. h0 += (( t0 ) & 0xfffffffffff);
  218. h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
  219. h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
  220. /* h *= r */
  221. MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
  222. MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
  223. MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
  224. /* (partial) h %= p */
  225. c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
  226. ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
  227. ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
  228. h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
  229. h1 += c;
  230. m += poly1305_block_size;
  231. bytes -= poly1305_block_size;
  232. }
  233. st->h[0] = h0;
  234. st->h[1] = h1;
  235. st->h[2] = h2;
  236. }
  237. static inline void
  238. poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
  239. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  240. unsigned long long h0,h1,h2,c;
  241. unsigned long long g0,g1,g2;
  242. unsigned long long t0,t1;
  243. /* process the remaining block */
  244. if (st->leftover) {
  245. size_t i = st->leftover;
  246. st->buffer[i] = 1;
  247. for (i = i + 1; i < poly1305_block_size; i++)
  248. st->buffer[i] = 0;
  249. st->final = 1;
  250. poly1305_blocks(st, st->buffer, poly1305_block_size);
  251. }
  252. /* fully carry h */
  253. h0 = st->h[0];
  254. h1 = st->h[1];
  255. h2 = st->h[2];
  256. c = (h1 >> 44); h1 &= 0xfffffffffff;
  257. h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
  258. h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
  259. h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
  260. h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
  261. h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
  262. h1 += c;
  263. /* compute h + -p */
  264. g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
  265. g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
  266. g2 = h2 + c - ((unsigned long long)1 << 42);
  267. /* select h if h < p, or h + -p if h >= p */
  268. c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
  269. g0 &= c;
  270. g1 &= c;
  271. g2 &= c;
  272. c = ~c;
  273. h0 = (h0 & c) | g0;
  274. h1 = (h1 & c) | g1;
  275. h2 = (h2 & c) | g2;
  276. /* h = (h + pad) */
  277. t0 = st->pad[0];
  278. t1 = st->pad[1];
  279. h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff;
  280. h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff;
  281. h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff;
  282. /* mac = h % (2^128) */
  283. h0 = ((h0 ) | (h1 << 44));
  284. h1 = ((h1 >> 20) | (h2 << 24));
  285. U64TO8(&mac[0], h0);
  286. U64TO8(&mac[8], h1);
  287. /* zero out the state */
  288. st->h[0] = 0;
  289. st->h[1] = 0;
  290. st->h[2] = 0;
  291. st->r[0] = 0;
  292. st->r[1] = 0;
  293. st->r[2] = 0;
  294. st->pad[0] = 0;
  295. st->pad[1] = 0;
  296. }
  297. //////////////////////////////////////////////////////////////////////////////
  298. #else
  299. //////////////////////////////////////////////////////////////////////////////
  300. // More portable 64-bit implementation
  301. #define poly1305_block_size 16
  302. /* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */
  303. typedef struct poly1305_state_internal_t {
  304. unsigned long r[5];
  305. unsigned long h[5];
  306. unsigned long pad[4];
  307. size_t leftover;
  308. unsigned char buffer[poly1305_block_size];
  309. unsigned char final;
  310. } poly1305_state_internal_t;
  311. /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
  312. static unsigned long
  313. U8TO32(const unsigned char *p) {
  314. return
  315. (((unsigned long)(p[0] & 0xff) ) |
  316. ((unsigned long)(p[1] & 0xff) << 8) |
  317. ((unsigned long)(p[2] & 0xff) << 16) |
  318. ((unsigned long)(p[3] & 0xff) << 24));
  319. }
  320. /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
  321. static void
  322. U32TO8(unsigned char *p, unsigned long v) {
  323. p[0] = (v ) & 0xff;
  324. p[1] = (v >> 8) & 0xff;
  325. p[2] = (v >> 16) & 0xff;
  326. p[3] = (v >> 24) & 0xff;
  327. }
  328. static inline void
  329. poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
  330. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  331. /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
  332. st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff;
  333. st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
  334. st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
  335. st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
  336. st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
  337. /* h = 0 */
  338. st->h[0] = 0;
  339. st->h[1] = 0;
  340. st->h[2] = 0;
  341. st->h[3] = 0;
  342. st->h[4] = 0;
  343. /* save pad for later */
  344. st->pad[0] = U8TO32(&key[16]);
  345. st->pad[1] = U8TO32(&key[20]);
  346. st->pad[2] = U8TO32(&key[24]);
  347. st->pad[3] = U8TO32(&key[28]);
  348. st->leftover = 0;
  349. st->final = 0;
  350. }
  351. static inline void
  352. poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) {
  353. const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
  354. unsigned long r0,r1,r2,r3,r4;
  355. unsigned long s1,s2,s3,s4;
  356. unsigned long h0,h1,h2,h3,h4;
  357. unsigned long long d0,d1,d2,d3,d4;
  358. unsigned long c;
  359. r0 = st->r[0];
  360. r1 = st->r[1];
  361. r2 = st->r[2];
  362. r3 = st->r[3];
  363. r4 = st->r[4];
  364. s1 = r1 * 5;
  365. s2 = r2 * 5;
  366. s3 = r3 * 5;
  367. s4 = r4 * 5;
  368. h0 = st->h[0];
  369. h1 = st->h[1];
  370. h2 = st->h[2];
  371. h3 = st->h[3];
  372. h4 = st->h[4];
  373. while (bytes >= poly1305_block_size) {
  374. /* h += m[i] */
  375. h0 += (U8TO32(m+ 0) ) & 0x3ffffff;
  376. h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
  377. h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
  378. h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
  379. h4 += (U8TO32(m+12) >> 8) | hibit;
  380. /* h *= r */
  381. d0 = ((unsigned long long)h0 * r0) + ((unsigned long long)h1 * s4) + ((unsigned long long)h2 * s3) + ((unsigned long long)h3 * s2) + ((unsigned long long)h4 * s1);
  382. d1 = ((unsigned long long)h0 * r1) + ((unsigned long long)h1 * r0) + ((unsigned long long)h2 * s4) + ((unsigned long long)h3 * s3) + ((unsigned long long)h4 * s2);
  383. d2 = ((unsigned long long)h0 * r2) + ((unsigned long long)h1 * r1) + ((unsigned long long)h2 * r0) + ((unsigned long long)h3 * s4) + ((unsigned long long)h4 * s3);
  384. d3 = ((unsigned long long)h0 * r3) + ((unsigned long long)h1 * r2) + ((unsigned long long)h2 * r1) + ((unsigned long long)h3 * r0) + ((unsigned long long)h4 * s4);
  385. d4 = ((unsigned long long)h0 * r4) + ((unsigned long long)h1 * r3) + ((unsigned long long)h2 * r2) + ((unsigned long long)h3 * r1) + ((unsigned long long)h4 * r0);
  386. /* (partial) h %= p */
  387. c = (unsigned long)(d0 >> 26); h0 = (unsigned long)d0 & 0x3ffffff;
  388. d1 += c; c = (unsigned long)(d1 >> 26); h1 = (unsigned long)d1 & 0x3ffffff;
  389. d2 += c; c = (unsigned long)(d2 >> 26); h2 = (unsigned long)d2 & 0x3ffffff;
  390. d3 += c; c = (unsigned long)(d3 >> 26); h3 = (unsigned long)d3 & 0x3ffffff;
  391. d4 += c; c = (unsigned long)(d4 >> 26); h4 = (unsigned long)d4 & 0x3ffffff;
  392. h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
  393. h1 += c;
  394. m += poly1305_block_size;
  395. bytes -= poly1305_block_size;
  396. }
  397. st->h[0] = h0;
  398. st->h[1] = h1;
  399. st->h[2] = h2;
  400. st->h[3] = h3;
  401. st->h[4] = h4;
  402. }
  403. static inline void
  404. poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
  405. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  406. unsigned long h0,h1,h2,h3,h4,c;
  407. unsigned long g0,g1,g2,g3,g4;
  408. unsigned long long f;
  409. unsigned long mask;
  410. /* process the remaining block */
  411. if (st->leftover) {
  412. size_t i = st->leftover;
  413. st->buffer[i++] = 1;
  414. for (; i < poly1305_block_size; i++)
  415. st->buffer[i] = 0;
  416. st->final = 1;
  417. poly1305_blocks(st, st->buffer, poly1305_block_size);
  418. }
  419. /* fully carry h */
  420. h0 = st->h[0];
  421. h1 = st->h[1];
  422. h2 = st->h[2];
  423. h3 = st->h[3];
  424. h4 = st->h[4];
  425. c = h1 >> 26; h1 = h1 & 0x3ffffff;
  426. h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
  427. h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
  428. h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
  429. h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
  430. h1 += c;
  431. /* compute h + -p */
  432. g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
  433. g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
  434. g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
  435. g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
  436. g4 = h4 + c - (1 << 26);
  437. /* select h if h < p, or h + -p if h >= p */
  438. mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
  439. g0 &= mask;
  440. g1 &= mask;
  441. g2 &= mask;
  442. g3 &= mask;
  443. g4 &= mask;
  444. mask = ~mask;
  445. h0 = (h0 & mask) | g0;
  446. h1 = (h1 & mask) | g1;
  447. h2 = (h2 & mask) | g2;
  448. h3 = (h3 & mask) | g3;
  449. h4 = (h4 & mask) | g4;
  450. /* h = h % (2^128) */
  451. h0 = ((h0 ) | (h1 << 26)) & 0xffffffff;
  452. h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
  453. h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
  454. h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
  455. /* mac = (h + pad) % (2^128) */
  456. f = (unsigned long long)h0 + st->pad[0] ; h0 = (unsigned long)f;
  457. f = (unsigned long long)h1 + st->pad[1] + (f >> 32); h1 = (unsigned long)f;
  458. f = (unsigned long long)h2 + st->pad[2] + (f >> 32); h2 = (unsigned long)f;
  459. f = (unsigned long long)h3 + st->pad[3] + (f >> 32); h3 = (unsigned long)f;
  460. U32TO8(mac + 0, h0);
  461. U32TO8(mac + 4, h1);
  462. U32TO8(mac + 8, h2);
  463. U32TO8(mac + 12, h3);
  464. /* zero out the state */
  465. st->h[0] = 0;
  466. st->h[1] = 0;
  467. st->h[2] = 0;
  468. st->h[3] = 0;
  469. st->h[4] = 0;
  470. st->r[0] = 0;
  471. st->r[1] = 0;
  472. st->r[2] = 0;
  473. st->r[3] = 0;
  474. st->r[4] = 0;
  475. st->pad[0] = 0;
  476. st->pad[1] = 0;
  477. st->pad[2] = 0;
  478. st->pad[3] = 0;
  479. }
  480. //////////////////////////////////////////////////////////////////////////////
  481. #endif // MSC/GCC or not
  482. static inline void
  483. poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) {
  484. poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
  485. size_t i;
  486. /* handle leftover */
  487. if (st->leftover) {
  488. size_t want = (poly1305_block_size - st->leftover);
  489. if (want > bytes)
  490. want = bytes;
  491. for (i = 0; i < want; i++)
  492. st->buffer[st->leftover + i] = m[i];
  493. bytes -= want;
  494. m += want;
  495. st->leftover += want;
  496. if (st->leftover < poly1305_block_size)
  497. return;
  498. poly1305_blocks(st, st->buffer, poly1305_block_size);
  499. st->leftover = 0;
  500. }
  501. /* process full blocks */
  502. if (bytes >= poly1305_block_size) {
  503. size_t want = (bytes & ~(poly1305_block_size - 1));
  504. poly1305_blocks(st, m, want);
  505. m += want;
  506. bytes -= want;
  507. }
  508. /* store leftover */
  509. if (bytes) {
  510. for (i = 0; i < bytes; i++)
  511. st->buffer[st->leftover + i] = m[i];
  512. st->leftover += bytes;
  513. }
  514. }
  515. } // anonymous namespace
  516. void Poly1305::compute(void *auth,const void *data,unsigned int len,const void *key)
  517. throw()
  518. {
  519. poly1305_context ctx;
  520. poly1305_init(&ctx,reinterpret_cast<const unsigned char *>(key));
  521. poly1305_update(&ctx,reinterpret_cast<const unsigned char *>(data),(size_t)len);
  522. poly1305_finish(&ctx,reinterpret_cast<unsigned char *>(auth));
  523. }
  524. } // namespace ZeroTier