crt.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. /*
  2. * Copyright (c) 2008-2010 Stefan Krah. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. * SUCH DAMAGE.
  26. */
  27. #include "mpdecimal.h"
  28. #include <stdio.h>
  29. #include <assert.h>
  30. #include "numbertheory.h"
  31. #include "umodarith.h"
  32. #include "crt.h"
  33. /*
  34. * Functions for arithmetic on triple-word mpd_uint_t numbers.
  35. */
  36. /* Multiply P1P2 by v, store result in w. */
  37. static inline void
  38. _crt_mulP1P2_3(mpd_uint_t w[3], mpd_uint_t v)
  39. {
  40. mpd_uint_t hi1, hi2, lo;
  41. _mpd_mul_words(&hi1, &lo, LH_P1P2, v);
  42. w[0] = lo;
  43. _mpd_mul_words(&hi2, &lo, UH_P1P2, v);
  44. lo = hi1 + lo;
  45. if (lo < hi1) hi2++;
  46. w[1] = lo;
  47. w[2] = hi2;
  48. }
  49. /* Add 3 words from v to w. The result is known to fit in w. */
  50. static inline void
  51. _crt_add3(mpd_uint_t w[3], mpd_uint_t v[3])
  52. {
  53. mpd_uint_t carry;
  54. w[0] = w[0] + v[0];
  55. carry = (w[0] < v[0]);
  56. w[1] = w[1] + v[1];
  57. if (w[1] < v[1]) w[2]++;
  58. w[1] = w[1] + carry;
  59. if (w[1] < carry) w[2]++;
  60. w[2] += v[2];
  61. }
  62. /* Divide 3 words in u by v, store result in w, return remainder. */
  63. static inline mpd_uint_t
  64. _crt_div3(mpd_uint_t *w, const mpd_uint_t *u, mpd_uint_t v)
  65. {
  66. mpd_uint_t r1 = u[2];
  67. mpd_uint_t r2;
  68. if (r1 < v) {
  69. w[2] = 0;
  70. }
  71. else {
  72. _mpd_div_word(&w[2], &r1, u[2], v); /* GCOV_NOT_REACHED */
  73. }
  74. _mpd_div_words(&w[1], &r2, r1, u[1], v);
  75. _mpd_div_words(&w[0], &r1, r2, u[0], v);
  76. return r1;
  77. }
  78. /*
  79. * Chinese Remainder Theorem:
  80. * Algorithm from Joerg Arndt, "Matters Computational",
  81. * Chapter 37.4.1 [http://www.jjj.de/fxt/]
  82. */
  83. /*
  84. * CRT with carry: x1, x2, x3 contain numbers modulo p1, p2, p3. For each
  85. * triple of members of the arrays, find the unique z modulo p1*p2*p3.
  86. *
  87. * Overflow analysis for 32 bit:
  88. *
  89. * carry[3] can hold cmax = 2**96-1. Let c_i denote the carry at the
  90. * beginning of the ith iteration. Let zmax be the maximum z.
  91. *
  92. * cmax = 2**96-1 = 79228162514264337593543950335
  93. * zmax = (p1*p2*p3)-1 = 7711435583600944683209981953
  94. *
  95. * c_0 = 0
  96. * c_1 = (c_0 + zmax) / 10**9 = 7711435583600944683
  97. * c_2 = (c_1 + zmax) / 10**9 = 7711435591312380266
  98. * c_3 = (c_2 + zmax) / 10**9 = 7711435591312380274
  99. * c_4 = (c_3 + zmax) / 10**9 = 7711435591312380274
  100. * (...)
  101. *
  102. * The carries do not increase, (c_i + zmax) cannot overflow.
  103. *
  104. *
  105. * Overflow analysis for 64 bit:
  106. *
  107. * cmax = 2**192-1 = 6277101735386680763835789423207666416102355444464034512895
  108. * zmax = (p1*p2*p3)-1 = 6277101353934753858413533876806988331203900781075588186113
  109. *
  110. * c_0 = 0
  111. * c_1 = (c_0 + zmax) / 10**19 = 627710135393475385841353387680698833120
  112. * c_2 = (c_1 + zmax) / 10**19 = 627710135393475385904124401220046371704
  113. * c_3 = (c_2 + zmax) / 10**19 = 627710135393475385904124401220046371710
  114. * c_4 = (c_3 + zmax) / 10**19 = 627710135393475385904124401220046371710
  115. * (...)
  116. *
  117. * The carries do not increase. (c_i + zmax) cannot overflow.
  118. */
  119. void
  120. crt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t rsize)
  121. {
  122. mpd_uint_t p1 = mpd_moduli[P1];
  123. mpd_uint_t umod;
  124. #ifdef PPRO
  125. double dmod;
  126. uint32_t dinvmod[3];
  127. #endif
  128. mpd_uint_t a1, a2, a3;
  129. mpd_uint_t s;
  130. mpd_uint_t z[3], t[3];
  131. mpd_uint_t carry[3] = {0,0,0};
  132. mpd_uint_t hi, lo;
  133. mpd_size_t i;
  134. for (i = 0; i < rsize; i++) {
  135. a1 = x1[i];
  136. a2 = x2[i];
  137. a3 = x3[i];
  138. SETMODULUS(P2);
  139. s = ext_submod(a2, a1, umod);
  140. s = MULMOD(s, INV_P1_MOD_P2);
  141. _mpd_mul_words(&hi, &lo, s, p1);
  142. lo = lo + a1;
  143. if (lo < a1) hi++;
  144. SETMODULUS(P3);
  145. s = dw_submod(a3, hi, lo, umod);
  146. s = MULMOD(s, INV_P1P2_MOD_P3);
  147. z[0] = lo;
  148. z[1] = hi;
  149. z[2] = 0;
  150. _crt_mulP1P2_3(t, s);
  151. _crt_add3(z, t);
  152. _crt_add3(carry, z);
  153. x1[i] = _crt_div3(carry, carry, MPD_RADIX);
  154. }
  155. assert(carry[0] == 0 && carry[1] == 0 && carry[2] == 0);
  156. }