bn_exp.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
  1. /* crypto/bn/bn_exp.c */
  2. /* Copyright (C) 1995-1998 Eric Young ([email protected])
  3. * All rights reserved.
  4. *
  5. * This package is an SSL implementation written
  6. * by Eric Young ([email protected]).
  7. * The implementation was written so as to conform with Netscapes SSL.
  8. *
  9. * This library is free for commercial and non-commercial use as long as
  10. * the following conditions are aheared to. The following conditions
  11. * apply to all code found in this distribution, be it the RC4, RSA,
  12. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  13. * included with this distribution is covered by the same copyright terms
  14. * except that the holder is Tim Hudson ([email protected]).
  15. *
  16. * Copyright remains Eric Young's, and as such any Copyright notices in
  17. * the code are not to be removed.
  18. * If this package is used in a product, Eric Young should be given attribution
  19. * as the author of the parts of the library used.
  20. * This can be in the form of a textual message at program startup or
  21. * in documentation (online or textual) provided with the package.
  22. *
  23. * Redistribution and use in source and binary forms, with or without
  24. * modification, are permitted provided that the following conditions
  25. * are met:
  26. * 1. Redistributions of source code must retain the copyright
  27. * notice, this list of conditions and the following disclaimer.
  28. * 2. Redistributions in binary form must reproduce the above copyright
  29. * notice, this list of conditions and the following disclaimer in the
  30. * documentation and/or other materials provided with the distribution.
  31. * 3. All advertising materials mentioning features or use of this software
  32. * must display the following acknowledgement:
  33. * "This product includes cryptographic software written by
  34. * Eric Young ([email protected])"
  35. * The word 'cryptographic' can be left out if the rouines from the library
  36. * being used are not cryptographic related :-).
  37. * 4. If you include any Windows specific code (or a derivative thereof) from
  38. * the apps directory (application code) you must include an acknowledgement:
  39. * "This product includes software written by Tim Hudson ([email protected])"
  40. *
  41. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  42. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  45. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  46. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  47. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  48. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  49. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  50. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  51. * SUCH DAMAGE.
  52. *
  53. * The licence and distribution terms for any publically available version or
  54. * derivative of this code cannot be changed. i.e. this code cannot simply be
  55. * copied and put under another distribution licence
  56. * [including the GNU Public Licence.]
  57. */
  58. /* ====================================================================
  59. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  60. *
  61. * Redistribution and use in source and binary forms, with or without
  62. * modification, are permitted provided that the following conditions
  63. * are met:
  64. *
  65. * 1. Redistributions of source code must retain the above copyright
  66. * notice, this list of conditions and the following disclaimer.
  67. *
  68. * 2. Redistributions in binary form must reproduce the above copyright
  69. * notice, this list of conditions and the following disclaimer in
  70. * the documentation and/or other materials provided with the
  71. * distribution.
  72. *
  73. * 3. All advertising materials mentioning features or use of this
  74. * software must display the following acknowledgment:
  75. * "This product includes software developed by the OpenSSL Project
  76. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  77. *
  78. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  79. * endorse or promote products derived from this software without
  80. * prior written permission. For written permission, please contact
  81. * [email protected].
  82. *
  83. * 5. Products derived from this software may not be called "OpenSSL"
  84. * nor may "OpenSSL" appear in their names without prior written
  85. * permission of the OpenSSL Project.
  86. *
  87. * 6. Redistributions of any form whatsoever must retain the following
  88. * acknowledgment:
  89. * "This product includes software developed by the OpenSSL Project
  90. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  91. *
  92. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  93. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  94. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  95. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  96. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  97. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  98. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  99. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  100. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  101. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  102. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  103. * OF THE POSSIBILITY OF SUCH DAMAGE.
  104. * ====================================================================
  105. *
  106. * This product includes cryptographic software written by Eric Young
  107. * ([email protected]). This product includes software written by Tim
  108. * Hudson ([email protected]).
  109. *
  110. */
  111. #include "cryptlib.h"
  112. #include "bn_lcl.h"
  113. #include <stdlib.h>
  114. #ifdef _WIN32
  115. # include <malloc.h>
  116. # ifndef alloca
  117. # define alloca _alloca
  118. # endif
  119. #elif defined(__GNUC__)
  120. # ifndef alloca
  121. # define alloca(s) __builtin_alloca((s))
  122. # endif
  123. #endif
  124. /* maximum precomputation table size for *variable* sliding windows */
  125. #define TABLE_SIZE 32
  126. /* this one works - simple but works */
  127. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  128. {
  129. int i, bits, ret = 0;
  130. BIGNUM *v, *rr;
  131. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  132. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  133. BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  134. return -1;
  135. }
  136. BN_CTX_start(ctx);
  137. if ((r == a) || (r == p))
  138. rr = BN_CTX_get(ctx);
  139. else
  140. rr = r;
  141. v = BN_CTX_get(ctx);
  142. if (rr == NULL || v == NULL)
  143. goto err;
  144. if (BN_copy(v, a) == NULL)
  145. goto err;
  146. bits = BN_num_bits(p);
  147. if (BN_is_odd(p)) {
  148. if (BN_copy(rr, a) == NULL)
  149. goto err;
  150. } else {
  151. if (!BN_one(rr))
  152. goto err;
  153. }
  154. for (i = 1; i < bits; i++) {
  155. if (!BN_sqr(v, v, ctx))
  156. goto err;
  157. if (BN_is_bit_set(p, i)) {
  158. if (!BN_mul(rr, rr, v, ctx))
  159. goto err;
  160. }
  161. }
  162. if (r != rr)
  163. BN_copy(r, rr);
  164. ret = 1;
  165. err:
  166. BN_CTX_end(ctx);
  167. bn_check_top(r);
  168. return (ret);
  169. }
  170. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  171. BN_CTX *ctx)
  172. {
  173. int ret;
  174. bn_check_top(a);
  175. bn_check_top(p);
  176. bn_check_top(m);
  177. /*-
  178. * For even modulus m = 2^k*m_odd, it might make sense to compute
  179. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  180. * exponentiation for the odd part), using appropriate exponent
  181. * reductions, and combine the results using the CRT.
  182. *
  183. * For now, we use Montgomery only if the modulus is odd; otherwise,
  184. * exponentiation using the reciprocal-based quick remaindering
  185. * algorithm is used.
  186. *
  187. * (Timing obtained with expspeed.c [computations a^p mod m
  188. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  189. * 4096, 8192 bits], compared to the running time of the
  190. * standard algorithm:
  191. *
  192. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  193. * 55 .. 77 % [UltraSparc processor, but
  194. * debug-solaris-sparcv8-gcc conf.]
  195. *
  196. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  197. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  198. *
  199. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  200. * at 2048 and more bits, but at 512 and 1024 bits, it was
  201. * slower even than the standard algorithm!
  202. *
  203. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  204. * should be obtained when the new Montgomery reduction code
  205. * has been integrated into OpenSSL.)
  206. */
  207. #define MONT_MUL_MOD
  208. #define MONT_EXP_WORD
  209. #define RECP_MUL_MOD
  210. #ifdef MONT_MUL_MOD
  211. /*
  212. * I have finally been able to take out this pre-condition of the top bit
  213. * being set. It was caused by an error in BN_div with negatives. There
  214. * was also another problem when for a^b%m a >= m. eay 07-May-97
  215. */
  216. /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
  217. if (BN_is_odd(m)) {
  218. # ifdef MONT_EXP_WORD
  219. if (a->top == 1 && !a->neg
  220. && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
  221. BN_ULONG A = a->d[0];
  222. ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  223. } else
  224. # endif
  225. ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  226. } else
  227. #endif
  228. #ifdef RECP_MUL_MOD
  229. {
  230. ret = BN_mod_exp_recp(r, a, p, m, ctx);
  231. }
  232. #else
  233. {
  234. ret = BN_mod_exp_simple(r, a, p, m, ctx);
  235. }
  236. #endif
  237. bn_check_top(r);
  238. return (ret);
  239. }
  240. int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  241. const BIGNUM *m, BN_CTX *ctx)
  242. {
  243. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  244. int start = 1;
  245. BIGNUM *aa;
  246. /* Table of variables obtained from 'ctx' */
  247. BIGNUM *val[TABLE_SIZE];
  248. BN_RECP_CTX recp;
  249. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  250. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  251. BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  252. return -1;
  253. }
  254. bits = BN_num_bits(p);
  255. if (bits == 0) {
  256. ret = BN_one(r);
  257. return ret;
  258. }
  259. BN_CTX_start(ctx);
  260. aa = BN_CTX_get(ctx);
  261. val[0] = BN_CTX_get(ctx);
  262. if (!aa || !val[0])
  263. goto err;
  264. BN_RECP_CTX_init(&recp);
  265. if (m->neg) {
  266. /* ignore sign of 'm' */
  267. if (!BN_copy(aa, m))
  268. goto err;
  269. aa->neg = 0;
  270. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
  271. goto err;
  272. } else {
  273. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
  274. goto err;
  275. }
  276. if (!BN_nnmod(val[0], a, m, ctx))
  277. goto err; /* 1 */
  278. if (BN_is_zero(val[0])) {
  279. BN_zero(r);
  280. ret = 1;
  281. goto err;
  282. }
  283. window = BN_window_bits_for_exponent_size(bits);
  284. if (window > 1) {
  285. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
  286. goto err; /* 2 */
  287. j = 1 << (window - 1);
  288. for (i = 1; i < j; i++) {
  289. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  290. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
  291. goto err;
  292. }
  293. }
  294. start = 1; /* This is used to avoid multiplication etc
  295. * when there is only the value '1' in the
  296. * buffer. */
  297. wvalue = 0; /* The 'value' of the window */
  298. wstart = bits - 1; /* The top bit of the window */
  299. wend = 0; /* The bottom bit of the window */
  300. if (!BN_one(r))
  301. goto err;
  302. for (;;) {
  303. if (BN_is_bit_set(p, wstart) == 0) {
  304. if (!start)
  305. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  306. goto err;
  307. if (wstart == 0)
  308. break;
  309. wstart--;
  310. continue;
  311. }
  312. /*
  313. * We now have wstart on a 'set' bit, we now need to work out how bit
  314. * a window to do. To do this we need to scan forward until the last
  315. * set bit before the end of the window
  316. */
  317. j = wstart;
  318. wvalue = 1;
  319. wend = 0;
  320. for (i = 1; i < window; i++) {
  321. if (wstart - i < 0)
  322. break;
  323. if (BN_is_bit_set(p, wstart - i)) {
  324. wvalue <<= (i - wend);
  325. wvalue |= 1;
  326. wend = i;
  327. }
  328. }
  329. /* wend is the size of the current window */
  330. j = wend + 1;
  331. /* add the 'bytes above' */
  332. if (!start)
  333. for (i = 0; i < j; i++) {
  334. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  335. goto err;
  336. }
  337. /* wvalue will be an odd number < 2^window */
  338. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
  339. goto err;
  340. /* move the 'window' down further */
  341. wstart -= wend + 1;
  342. wvalue = 0;
  343. start = 0;
  344. if (wstart < 0)
  345. break;
  346. }
  347. ret = 1;
  348. err:
  349. BN_CTX_end(ctx);
  350. BN_RECP_CTX_free(&recp);
  351. bn_check_top(r);
  352. return (ret);
  353. }
  354. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  355. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  356. {
  357. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  358. int start = 1;
  359. BIGNUM *d, *r;
  360. const BIGNUM *aa;
  361. /* Table of variables obtained from 'ctx' */
  362. BIGNUM *val[TABLE_SIZE];
  363. BN_MONT_CTX *mont = NULL;
  364. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  365. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  366. }
  367. bn_check_top(a);
  368. bn_check_top(p);
  369. bn_check_top(m);
  370. if (!BN_is_odd(m)) {
  371. BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
  372. return (0);
  373. }
  374. bits = BN_num_bits(p);
  375. if (bits == 0) {
  376. ret = BN_one(rr);
  377. return ret;
  378. }
  379. BN_CTX_start(ctx);
  380. d = BN_CTX_get(ctx);
  381. r = BN_CTX_get(ctx);
  382. val[0] = BN_CTX_get(ctx);
  383. if (!d || !r || !val[0])
  384. goto err;
  385. /*
  386. * If this is not done, things will break in the montgomery part
  387. */
  388. if (in_mont != NULL)
  389. mont = in_mont;
  390. else {
  391. if ((mont = BN_MONT_CTX_new()) == NULL)
  392. goto err;
  393. if (!BN_MONT_CTX_set(mont, m, ctx))
  394. goto err;
  395. }
  396. if (a->neg || BN_ucmp(a, m) >= 0) {
  397. if (!BN_nnmod(val[0], a, m, ctx))
  398. goto err;
  399. aa = val[0];
  400. } else
  401. aa = a;
  402. if (BN_is_zero(aa)) {
  403. BN_zero(rr);
  404. ret = 1;
  405. goto err;
  406. }
  407. if (!BN_to_montgomery(val[0], aa, mont, ctx))
  408. goto err; /* 1 */
  409. window = BN_window_bits_for_exponent_size(bits);
  410. if (window > 1) {
  411. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
  412. goto err; /* 2 */
  413. j = 1 << (window - 1);
  414. for (i = 1; i < j; i++) {
  415. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  416. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
  417. goto err;
  418. }
  419. }
  420. start = 1; /* This is used to avoid multiplication etc
  421. * when there is only the value '1' in the
  422. * buffer. */
  423. wvalue = 0; /* The 'value' of the window */
  424. wstart = bits - 1; /* The top bit of the window */
  425. wend = 0; /* The bottom bit of the window */
  426. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
  427. goto err;
  428. for (;;) {
  429. if (BN_is_bit_set(p, wstart) == 0) {
  430. if (!start) {
  431. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  432. goto err;
  433. }
  434. if (wstart == 0)
  435. break;
  436. wstart--;
  437. continue;
  438. }
  439. /*
  440. * We now have wstart on a 'set' bit, we now need to work out how bit
  441. * a window to do. To do this we need to scan forward until the last
  442. * set bit before the end of the window
  443. */
  444. j = wstart;
  445. wvalue = 1;
  446. wend = 0;
  447. for (i = 1; i < window; i++) {
  448. if (wstart - i < 0)
  449. break;
  450. if (BN_is_bit_set(p, wstart - i)) {
  451. wvalue <<= (i - wend);
  452. wvalue |= 1;
  453. wend = i;
  454. }
  455. }
  456. /* wend is the size of the current window */
  457. j = wend + 1;
  458. /* add the 'bytes above' */
  459. if (!start)
  460. for (i = 0; i < j; i++) {
  461. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  462. goto err;
  463. }
  464. /* wvalue will be an odd number < 2^window */
  465. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
  466. goto err;
  467. /* move the 'window' down further */
  468. wstart -= wend + 1;
  469. wvalue = 0;
  470. start = 0;
  471. if (wstart < 0)
  472. break;
  473. }
  474. if (!BN_from_montgomery(rr, r, mont, ctx))
  475. goto err;
  476. ret = 1;
  477. err:
  478. if ((in_mont == NULL) && (mont != NULL))
  479. BN_MONT_CTX_free(mont);
  480. BN_CTX_end(ctx);
  481. bn_check_top(rr);
  482. return (ret);
  483. }
  484. /*
  485. * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  486. * layout so that accessing any of these table values shows the same access
  487. * pattern as far as cache lines are concerned. The following functions are
  488. * used to transfer a BIGNUM from/to that table.
  489. */
  490. static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
  491. unsigned char *buf, int idx,
  492. int width)
  493. {
  494. size_t i, j;
  495. if (top > b->top)
  496. top = b->top; /* this works because 'buf' is explicitly
  497. * zeroed */
  498. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  499. buf[j] = ((unsigned char *)b->d)[i];
  500. }
  501. return 1;
  502. }
  503. static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
  504. unsigned char *buf, int idx,
  505. int width)
  506. {
  507. size_t i, j;
  508. if (bn_wexpand(b, top) == NULL)
  509. return 0;
  510. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  511. ((unsigned char *)b->d)[i] = buf[j];
  512. }
  513. b->top = top;
  514. bn_correct_top(b);
  515. return 1;
  516. }
  517. /*
  518. * Given a pointer value, compute the next address that is a cache line
  519. * multiple.
  520. */
  521. #define MOD_EXP_CTIME_ALIGN(x_) \
  522. ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  523. /*
  524. * This variant of BN_mod_exp_mont() uses fixed windows and the special
  525. * precomputation memory layout to limit data-dependency to a minimum to
  526. * protect secret exponents (cf. the hyper-threading timing attacks pointed
  527. * out by Colin Percival,
  528. * http://www.daemong-consideredperthreading-considered-harmful/)
  529. */
  530. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  531. const BIGNUM *m, BN_CTX *ctx,
  532. BN_MONT_CTX *in_mont)
  533. {
  534. int i, bits, ret = 0, window, wvalue;
  535. int top;
  536. BN_MONT_CTX *mont = NULL;
  537. int numPowers;
  538. unsigned char *powerbufFree = NULL;
  539. int powerbufLen = 0;
  540. unsigned char *powerbuf = NULL;
  541. BIGNUM tmp, am;
  542. bn_check_top(a);
  543. bn_check_top(p);
  544. bn_check_top(m);
  545. top = m->top;
  546. if (!(m->d[0] & 1)) {
  547. BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
  548. return (0);
  549. }
  550. bits = BN_num_bits(p);
  551. if (bits == 0) {
  552. ret = BN_one(rr);
  553. return ret;
  554. }
  555. BN_CTX_start(ctx);
  556. /*
  557. * Allocate a montgomery context if it was not supplied by the caller. If
  558. * this is not done, things will break in the montgomery part.
  559. */
  560. if (in_mont != NULL)
  561. mont = in_mont;
  562. else {
  563. if ((mont = BN_MONT_CTX_new()) == NULL)
  564. goto err;
  565. if (!BN_MONT_CTX_set(mont, m, ctx))
  566. goto err;
  567. }
  568. /* Get the window size to use with size of p. */
  569. window = BN_window_bits_for_ctime_exponent_size(bits);
  570. #if defined(OPENSSL_BN_ASM_MONT5)
  571. if (window == 6 && bits <= 1024)
  572. window = 5; /* ~5% improvement of 2048-bit RSA sign */
  573. #endif
  574. /*
  575. * Allocate a buffer large enough to hold all of the pre-computed powers
  576. * of am, am itself and tmp.
  577. */
  578. numPowers = 1 << window;
  579. powerbufLen = sizeof(m->d[0]) * (top * numPowers +
  580. ((2 * top) >
  581. numPowers ? (2 * top) : numPowers));
  582. #ifdef alloca
  583. if (powerbufLen < 3072)
  584. powerbufFree =
  585. alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  586. else
  587. #endif
  588. if ((powerbufFree =
  589. (unsigned char *)OPENSSL_malloc(powerbufLen +
  590. MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
  591. == NULL)
  592. goto err;
  593. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  594. memset(powerbuf, 0, powerbufLen);
  595. #ifdef alloca
  596. if (powerbufLen < 3072)
  597. powerbufFree = NULL;
  598. #endif
  599. /* lay down tmp and am right after powers table */
  600. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  601. am.d = tmp.d + top;
  602. tmp.top = am.top = 0;
  603. tmp.dmax = am.dmax = top;
  604. tmp.neg = am.neg = 0;
  605. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  606. /* prepare a^0 in Montgomery domain */
  607. #if 1
  608. if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  609. goto err;
  610. #else
  611. tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
  612. for (i = 1; i < top; i++)
  613. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  614. tmp.top = top;
  615. #endif
  616. /* prepare a^1 in Montgomery domain */
  617. if (a->neg || BN_ucmp(a, m) >= 0) {
  618. if (!BN_mod(&am, a, m, ctx))
  619. goto err;
  620. if (!BN_to_montgomery(&am, &am, mont, ctx))
  621. goto err;
  622. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  623. goto err;
  624. #if defined(OPENSSL_BN_ASM_MONT5)
  625. if (window == 5 && top > 1) {
  626. /*
  627. * This optimization uses ideas from http://eprint.iacr.org/2011/239,
  628. * specifically optimization of cache-timing attack countermeasures
  629. * and pre-computation optimization.
  630. */
  631. /*
  632. * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  633. * 512-bit RSA is hardly relevant, we omit it to spare size...
  634. */
  635. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
  636. const void *table, const BN_ULONG *np,
  637. const BN_ULONG *n0, int num, int power);
  638. void bn_scatter5(const BN_ULONG *inp, size_t num,
  639. void *table, size_t power);
  640. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  641. BN_ULONG *np = mont->N.d, *n0 = mont->n0;
  642. /*
  643. * BN_to_montgomery can contaminate words above .top [in
  644. * BN_DEBUG[_DEBUG] build]...
  645. */
  646. for (i = am.top; i < top; i++)
  647. am.d[i] = 0;
  648. for (i = tmp.top; i < top; i++)
  649. tmp.d[i] = 0;
  650. bn_scatter5(tmp.d, top, powerbuf, 0);
  651. bn_scatter5(am.d, am.top, powerbuf, 1);
  652. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  653. bn_scatter5(tmp.d, top, powerbuf, 2);
  654. # if 0
  655. for (i = 3; i < 32; i++) {
  656. /* Calculate a^i = a^(i-1) * a */
  657. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  658. bn_scatter5(tmp.d, top, powerbuf, i);
  659. }
  660. # else
  661. /* same as above, but uses squaring for 1/2 of operations */
  662. for (i = 4; i < 32; i *= 2) {
  663. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  664. bn_scatter5(tmp.d, top, powerbuf, i);
  665. }
  666. for (i = 3; i < 8; i += 2) {
  667. int j;
  668. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  669. bn_scatter5(tmp.d, top, powerbuf, i);
  670. for (j = 2 * i; j < 32; j *= 2) {
  671. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  672. bn_scatter5(tmp.d, top, powerbuf, j);
  673. }
  674. }
  675. for (; i < 16; i += 2) {
  676. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  677. bn_scatter5(tmp.d, top, powerbuf, i);
  678. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  679. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  680. }
  681. for (; i < 32; i += 2) {
  682. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  683. bn_scatter5(tmp.d, top, powerbuf, i);
  684. }
  685. # endif
  686. bits--;
  687. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  688. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  689. bn_gather5(tmp.d, top, powerbuf, wvalue);
  690. /*
  691. * Scan the exponent one window at a time starting from the most
  692. * significant bits.
  693. */
  694. while (bits >= 0) {
  695. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  696. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  697. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  698. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  699. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  700. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  701. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  702. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  703. }
  704. tmp.top = top;
  705. bn_correct_top(&tmp);
  706. } else
  707. #endif
  708. {
  709. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
  710. goto err;
  711. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
  712. goto err;
  713. /*
  714. * If the window size is greater than 1, then calculate
  715. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
  716. * powers could instead be computed as (a^(i/2))^2 to use the slight
  717. * performance advantage of sqr over mul).
  718. */
  719. if (window > 1) {
  720. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  721. goto err;
  722. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  723. (&tmp, top, powerbuf, 2, numPowers))
  724. goto err;
  725. for (i = 3; i < numPowers; i++) {
  726. /* Calculate a^i = a^(i-1) * a */
  727. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  728. goto err;
  729. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  730. (&tmp, top, powerbuf, i, numPowers))
  731. goto err;
  732. }
  733. }
  734. bits--;
  735. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  736. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  737. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  738. (&tmp, top, powerbuf, wvalue, numPowers))
  739. goto err;
  740. /*
  741. * Scan the exponent one window at a time starting from the most
  742. * significant bits.
  743. */
  744. while (bits >= 0) {
  745. wvalue = 0; /* The 'value' of the window */
  746. /* Scan the window, squaring the result as we go */
  747. for (i = 0; i < window; i++, bits--) {
  748. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  749. goto err;
  750. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  751. }
  752. /*
  753. * Fetch the appropriate pre-computed value from the pre-buf
  754. */
  755. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  756. (&am, top, powerbuf, wvalue, numPowers))
  757. goto err;
  758. /* Multiply the result into the intermediate result */
  759. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  760. goto err;
  761. }
  762. }
  763. /* Convert the final result from montgomery to standard format */
  764. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  765. goto err;
  766. ret = 1;
  767. err:
  768. if ((in_mont == NULL) && (mont != NULL))
  769. BN_MONT_CTX_free(mont);
  770. if (powerbuf != NULL) {
  771. OPENSSL_cleanse(powerbuf, powerbufLen);
  772. if (powerbufFree)
  773. OPENSSL_free(powerbufFree);
  774. }
  775. BN_CTX_end(ctx);
  776. return (ret);
  777. }
  778. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  779. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  780. {
  781. BN_MONT_CTX *mont = NULL;
  782. int b, bits, ret = 0;
  783. int r_is_one;
  784. BN_ULONG w, next_w;
  785. BIGNUM *d, *r, *t;
  786. BIGNUM *swap_tmp;
  787. #define BN_MOD_MUL_WORD(r, w, m) \
  788. (BN_mul_word(r, (w)) && \
  789. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  790. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  791. /*
  792. * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  793. * probably more overhead than always using BN_mod (which uses BN_copy if
  794. * a similar test returns true).
  795. */
  796. /*
  797. * We can use BN_mod and do not need BN_nnmod because our accumulator is
  798. * never negative (the result of BN_mod does not depend on the sign of
  799. * the modulus).
  800. */
  801. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  802. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  803. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  804. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  805. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  806. return -1;
  807. }
  808. bn_check_top(p);
  809. bn_check_top(m);
  810. if (!BN_is_odd(m)) {
  811. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
  812. return (0);
  813. }
  814. if (m->top == 1)
  815. a %= m->d[0]; /* make sure that 'a' is reduced */
  816. bits = BN_num_bits(p);
  817. if (bits == 0) {
  818. /* x**0 mod 1 is still zero. */
  819. if (BN_is_one(m)) {
  820. ret = 1;
  821. BN_zero(rr);
  822. } else
  823. ret = BN_one(rr);
  824. return ret;
  825. }
  826. if (a == 0) {
  827. BN_zero(rr);
  828. ret = 1;
  829. return ret;
  830. }
  831. BN_CTX_start(ctx);
  832. d = BN_CTX_get(ctx);
  833. r = BN_CTX_get(ctx);
  834. t = BN_CTX_get(ctx);
  835. if (d == NULL || r == NULL || t == NULL)
  836. goto err;
  837. if (in_mont != NULL)
  838. mont = in_mont;
  839. else {
  840. if ((mont = BN_MONT_CTX_new()) == NULL)
  841. goto err;
  842. if (!BN_MONT_CTX_set(mont, m, ctx))
  843. goto err;
  844. }
  845. r_is_one = 1; /* except for Montgomery factor */
  846. /* bits-1 >= 0 */
  847. /* The result is accumulated in the product r*w. */
  848. w = a; /* bit 'bits-1' of 'p' is always set */
  849. for (b = bits - 2; b >= 0; b--) {
  850. /* First, square r*w. */
  851. next_w = w * w;
  852. if ((next_w / w) != w) { /* overflow */
  853. if (r_is_one) {
  854. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  855. goto err;
  856. r_is_one = 0;
  857. } else {
  858. if (!BN_MOD_MUL_WORD(r, w, m))
  859. goto err;
  860. }
  861. next_w = 1;
  862. }
  863. w = next_w;
  864. if (!r_is_one) {
  865. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  866. goto err;
  867. }
  868. /* Second, multiply r*w by 'a' if exponent bit is set. */
  869. if (BN_is_bit_set(p, b)) {
  870. next_w = w * a;
  871. if ((next_w / a) != w) { /* overflow */
  872. if (r_is_one) {
  873. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  874. goto err;
  875. r_is_one = 0;
  876. } else {
  877. if (!BN_MOD_MUL_WORD(r, w, m))
  878. goto err;
  879. }
  880. next_w = a;
  881. }
  882. w = next_w;
  883. }
  884. }
  885. /* Finally, set r:=r*w. */
  886. if (w != 1) {
  887. if (r_is_one) {
  888. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  889. goto err;
  890. r_is_one = 0;
  891. } else {
  892. if (!BN_MOD_MUL_WORD(r, w, m))
  893. goto err;
  894. }
  895. }
  896. if (r_is_one) { /* can happen only if a == 1 */
  897. if (!BN_one(rr))
  898. goto err;
  899. } else {
  900. if (!BN_from_montgomery(rr, r, mont, ctx))
  901. goto err;
  902. }
  903. ret = 1;
  904. err:
  905. if ((in_mont == NULL) && (mont != NULL))
  906. BN_MONT_CTX_free(mont);
  907. BN_CTX_end(ctx);
  908. bn_check_top(rr);
  909. return (ret);
  910. }
  911. /* The old fallback, simple version :-) */
  912. int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  913. const BIGNUM *m, BN_CTX *ctx)
  914. {
  915. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  916. int start = 1;
  917. BIGNUM *d;
  918. /* Table of variables obtained from 'ctx' */
  919. BIGNUM *val[TABLE_SIZE];
  920. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  921. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  922. BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  923. return -1;
  924. }
  925. bits = BN_num_bits(p);
  926. if (bits == 0) {
  927. ret = BN_one(r);
  928. return ret;
  929. }
  930. BN_CTX_start(ctx);
  931. d = BN_CTX_get(ctx);
  932. val[0] = BN_CTX_get(ctx);
  933. if (!d || !val[0])
  934. goto err;
  935. if (!BN_nnmod(val[0], a, m, ctx))
  936. goto err; /* 1 */
  937. if (BN_is_zero(val[0])) {
  938. BN_zero(r);
  939. ret = 1;
  940. goto err;
  941. }
  942. window = BN_window_bits_for_exponent_size(bits);
  943. if (window > 1) {
  944. if (!BN_mod_mul(d, val[0], val[0], m, ctx))
  945. goto err; /* 2 */
  946. j = 1 << (window - 1);
  947. for (i = 1; i < j; i++) {
  948. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  949. !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
  950. goto err;
  951. }
  952. }
  953. start = 1; /* This is used to avoid multiplication etc
  954. * when there is only the value '1' in the
  955. * buffer. */
  956. wvalue = 0; /* The 'value' of the window */
  957. wstart = bits - 1; /* The top bit of the window */
  958. wend = 0; /* The bottom bit of the window */
  959. if (!BN_one(r))
  960. goto err;
  961. for (;;) {
  962. if (BN_is_bit_set(p, wstart) == 0) {
  963. if (!start)
  964. if (!BN_mod_mul(r, r, r, m, ctx))
  965. goto err;
  966. if (wstart == 0)
  967. break;
  968. wstart--;
  969. continue;
  970. }
  971. /*
  972. * We now have wstart on a 'set' bit, we now need to work out how bit
  973. * a window to do. To do this we need to scan forward until the last
  974. * set bit before the end of the window
  975. */
  976. j = wstart;
  977. wvalue = 1;
  978. wend = 0;
  979. for (i = 1; i < window; i++) {
  980. if (wstart - i < 0)
  981. break;
  982. if (BN_is_bit_set(p, wstart - i)) {
  983. wvalue <<= (i - wend);
  984. wvalue |= 1;
  985. wend = i;
  986. }
  987. }
  988. /* wend is the size of the current window */
  989. j = wend + 1;
  990. /* add the 'bytes above' */
  991. if (!start)
  992. for (i = 0; i < j; i++) {
  993. if (!BN_mod_mul(r, r, r, m, ctx))
  994. goto err;
  995. }
  996. /* wvalue will be an odd number < 2^window */
  997. if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
  998. goto err;
  999. /* move the 'window' down further */
  1000. wstart -= wend + 1;
  1001. wvalue = 0;
  1002. start = 0;
  1003. if (wstart < 0)
  1004. break;
  1005. }
  1006. ret = 1;
  1007. err:
  1008. BN_CTX_end(ctx);
  1009. bn_check_top(r);
  1010. return (ret);
  1011. }