gfp_p384.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /* Copyright 2016 Brian Smith.
  2. *
  3. * Permission to use, copy, modify, and/or distribute this software for any
  4. * purpose with or without fee is hereby granted, provided that the above
  5. * copyright notice and this permission notice appear in all copies.
  6. *
  7. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  8. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
  10. * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  12. * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
  14. #include "../../limbs/limbs.h"
  15. #include "ecp_nistz384.h"
  16. #include "../bn/internal.h"
  17. #include "../../internal.h"
  18. #include "../../limbs/limbs.inl"
  19. /* XXX: Here we assume that the conversion from |Carry| to |Limb| is
  20. * constant-time, but we haven't verified that assumption. TODO: Fix it so
  21. * we don't need to make that assumption. */
  22. typedef Limb Elem[P384_LIMBS];
  23. typedef Limb ScalarMont[P384_LIMBS];
  24. typedef Limb Scalar[P384_LIMBS];
  25. static const BN_ULONG Q[P384_LIMBS] = {
  26. TOBN(0x00000000, 0xffffffff),
  27. TOBN(0xffffffff, 0x00000000),
  28. TOBN(0xffffffff, 0xfffffffe),
  29. TOBN(0xffffffff, 0xffffffff),
  30. TOBN(0xffffffff, 0xffffffff),
  31. TOBN(0xffffffff, 0xffffffff),
  32. };
  33. static const BN_ULONG N[P384_LIMBS] = {
  34. TOBN(0xecec196a, 0xccc52973),
  35. TOBN(0x581a0db2, 0x48b0a77a),
  36. TOBN(0xc7634d81, 0xf4372ddf),
  37. TOBN(0xffffffff, 0xffffffff),
  38. TOBN(0xffffffff, 0xffffffff),
  39. TOBN(0xffffffff, 0xffffffff),
  40. };
  41. static const BN_ULONG ONE[P384_LIMBS] = {
  42. TOBN(0xffffffff, 1), TOBN(0, 0xffffffff), TOBN(0, 1), TOBN(0, 0), TOBN(0, 0),
  43. TOBN(0, 0),
  44. };
  45. /* XXX: MSVC for x86 warns when it fails to inline these functions it should
  46. * probably inline. */
  47. #if defined(_MSC_VER) && !defined(__clang__) && defined(OPENSSL_X86)
  48. #define INLINE_IF_POSSIBLE __forceinline
  49. #else
  50. #define INLINE_IF_POSSIBLE inline
  51. #endif
  52. static inline Limb is_equal(const Elem a, const Elem b) {
  53. return LIMBS_equal(a, b, P384_LIMBS);
  54. }
  55. static inline Limb is_zero(const BN_ULONG a[P384_LIMBS]) {
  56. return LIMBS_are_zero(a, P384_LIMBS);
  57. }
  58. static inline void copy_conditional(Elem r, const Elem a,
  59. const Limb condition) {
  60. for (size_t i = 0; i < P384_LIMBS; ++i) {
  61. r[i] = constant_time_select_w(condition, a[i], r[i]);
  62. }
  63. }
  64. static inline void elem_add(Elem r, const Elem a, const Elem b) {
  65. LIMBS_add_mod(r, a, b, Q, P384_LIMBS);
  66. }
  67. static inline void elem_sub(Elem r, const Elem a, const Elem b) {
  68. LIMBS_sub_mod(r, a, b, Q, P384_LIMBS);
  69. }
  70. static void elem_div_by_2(Elem r, const Elem a) {
  71. /* Consider the case where `a` is even. Then we can shift `a` right one bit
  72. * and the result will still be valid because we didn't lose any bits and so
  73. * `(a >> 1) * 2 == a (mod q)`, which is the invariant we must satisfy.
  74. *
  75. * The remainder of this comment is considering the case where `a` is odd.
  76. *
  77. * Since `a` is odd, it isn't the case that `(a >> 1) * 2 == a (mod q)`
  78. * because the lowest bit is lost during the shift. For example, consider:
  79. *
  80. * ```python
  81. * q = 2**384 - 2**128 - 2**96 + 2**32 - 1
  82. * a = 2**383
  83. * two_a = a * 2 % q
  84. * assert two_a == 0x100000000ffffffffffffffff00000001
  85. * ```
  86. *
  87. * Notice there how `(2 * a) % q` wrapped around to a smaller odd value. When
  88. * we divide `two_a` by two (mod q), we need to get the value `2**383`, which
  89. * we obviously can't get with just a right shift.
  90. *
  91. * `q` is odd, and `a` is odd, so `a + q` is even. We could calculate
  92. * `(a + q) >> 1` and then reduce it mod `q`. However, then we would have to
  93. * keep track of an extra most significant bit. We can avoid that by instead
  94. * calculating `(a >> 1) + ((q + 1) >> 1)`. The `1` in `q + 1` is the least
  95. * significant bit of `a`. `q + 1` is even, which means it can be shifted
  96. * without losing any bits. Since `q` is odd, `q - 1` is even, so the largest
  97. * odd field element is `q - 2`. Thus we know that `a <= q - 2`. We know
  98. * `(q + 1) >> 1` is `(q + 1) / 2` since (`q + 1`) is even. The value of
  99. * `a >> 1` is `(a - 1)/2` since the shift will drop the least significant
  100. * bit of `a`, which is 1. Thus:
  101. *
  102. * sum = ((q + 1) >> 1) + (a >> 1)
  103. * sum = (q + 1)/2 + (a >> 1) (substituting (q + 1)/2)
  104. * <= (q + 1)/2 + (q - 2 - 1)/2 (substituting a <= q - 2)
  105. * <= (q + 1)/2 + (q - 3)/2 (simplifying)
  106. * <= (q + 1 + q - 3)/2 (factoring out the common divisor)
  107. * <= (2q - 2)/2 (simplifying)
  108. * <= q - 1 (simplifying)
  109. *
  110. * Thus, no reduction of the sum mod `q` is necessary. */
  111. Limb is_odd = constant_time_is_nonzero_w(a[0] & 1);
  112. /* r = a >> 1. */
  113. Limb carry = a[P384_LIMBS - 1] & 1;
  114. r[P384_LIMBS - 1] = a[P384_LIMBS - 1] >> 1;
  115. for (size_t i = 1; i < P384_LIMBS; ++i) {
  116. Limb new_carry = a[P384_LIMBS - i - 1];
  117. r[P384_LIMBS - i - 1] =
  118. (a[P384_LIMBS - i - 1] >> 1) | (carry << (LIMB_BITS - 1));
  119. carry = new_carry;
  120. }
  121. static const Elem Q_PLUS_1_SHR_1 = {
  122. TOBN(0x00000000, 0x80000000), TOBN(0x7fffffff, 0x80000000),
  123. TOBN(0xffffffff, 0xffffffff), TOBN(0xffffffff, 0xffffffff),
  124. TOBN(0xffffffff, 0xffffffff), TOBN(0x7fffffff, 0xffffffff),
  125. };
  126. Elem adjusted;
  127. BN_ULONG carry2 = limbs_add(adjusted, r, Q_PLUS_1_SHR_1, P384_LIMBS);
  128. dev_assert_secret(carry2 == 0);
  129. (void)carry2;
  130. copy_conditional(r, adjusted, is_odd);
  131. }
  132. static inline void elem_mul_mont(Elem r, const Elem a, const Elem b) {
  133. static const BN_ULONG Q_N0[] = {
  134. BN_MONT_CTX_N0(0x1, 0x1)
  135. };
  136. /* XXX: Not (clearly) constant-time; inefficient.*/
  137. GFp_bn_mul_mont(r, a, b, Q, Q_N0, P384_LIMBS);
  138. }
  139. static inline void elem_mul_by_2(Elem r, const Elem a) {
  140. LIMBS_shl_mod(r, a, Q, P384_LIMBS);
  141. }
  142. static INLINE_IF_POSSIBLE void elem_mul_by_3(Elem r, const Elem a) {
  143. /* XXX: inefficient. TODO: Replace with an integrated shift + add. */
  144. Elem doubled;
  145. elem_add(doubled, a, a);
  146. elem_add(r, doubled, a);
  147. }
  148. static inline void elem_sqr_mont(Elem r, const Elem a) {
  149. /* XXX: Inefficient. TODO: Add a dedicated squaring routine. */
  150. elem_mul_mont(r, a, a);
  151. }
  152. void GFp_p384_elem_add(Elem r, const Elem a, const Elem b) {
  153. elem_add(r, a, b);
  154. }
  155. void GFp_p384_elem_sub(Elem r, const Elem a, const Elem b) {
  156. elem_sub(r, a, b);
  157. }
  158. void GFp_p384_elem_div_by_2(Elem r, const Elem a) {
  159. elem_div_by_2(r, a);
  160. }
  161. void GFp_p384_elem_mul_mont(Elem r, const Elem a, const Elem b) {
  162. elem_mul_mont(r, a, b);
  163. }
  164. void GFp_p384_elem_neg(Elem r, const Elem a) {
  165. Limb is_zero = LIMBS_are_zero(a, P384_LIMBS);
  166. Carry borrow = limbs_sub(r, Q, a, P384_LIMBS);
  167. dev_assert_secret(borrow == 0);
  168. (void)borrow;
  169. for (size_t i = 0; i < P384_LIMBS; ++i) {
  170. r[i] = constant_time_select_w(is_zero, 0, r[i]);
  171. }
  172. }
  173. void GFp_p384_scalar_mul_mont(ScalarMont r, const ScalarMont a,
  174. const ScalarMont b) {
  175. static const BN_ULONG N_N0[] = {
  176. BN_MONT_CTX_N0(0x6ed46089, 0xe88fdc45)
  177. };
  178. /* XXX: Inefficient. TODO: Add dedicated multiplication routine. */
  179. GFp_bn_mul_mont(r, a, b, N, N_N0, P384_LIMBS);
  180. }
  181. /* TODO(perf): Optimize this. */
  182. static void gfp_p384_point_select_w5(P384_POINT *out,
  183. const P384_POINT table[16], size_t index) {
  184. Elem x; limbs_zero(x, P384_LIMBS);
  185. Elem y; limbs_zero(y, P384_LIMBS);
  186. Elem z; limbs_zero(z, P384_LIMBS);
  187. // TODO: Rewrite in terms of |limbs_select|.
  188. for (size_t i = 0; i < 16; ++i) {
  189. crypto_word equal = constant_time_eq_w(index, (crypto_word)i + 1);
  190. for (size_t j = 0; j < P384_LIMBS; ++j) {
  191. x[j] = constant_time_select_w(equal, table[i].X[j], x[j]);
  192. y[j] = constant_time_select_w(equal, table[i].Y[j], y[j]);
  193. z[j] = constant_time_select_w(equal, table[i].Z[j], z[j]);
  194. }
  195. }
  196. limbs_copy(out->X, x, P384_LIMBS);
  197. limbs_copy(out->Y, y, P384_LIMBS);
  198. limbs_copy(out->Z, z, P384_LIMBS);
  199. }
  200. #include "ecp_nistz384.inl"