convolute.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. * SUCH DAMAGE.
  26. */
  27. #include "mpdecimal.h"
  28. #include <stdio.h>
  29. #include "bits.h"
  30. #include "constants.h"
  31. #include "fnt.h"
  32. #include "fourstep.h"
  33. #include "numbertheory.h"
  34. #include "sixstep.h"
  35. #include "umodarith.h"
  36. #include "convolute.h"
  37. /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
  38. the multiplication of very large coefficients. */
  39. /* Convolute the data in c1 and c2. Result is in c1. */
  40. int
  41. fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
  42. {
  43. int (*fnt)(mpd_uint_t *, mpd_size_t, int);
  44. int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
  45. #ifdef PPRO
  46. double dmod;
  47. uint32_t dinvmod[3];
  48. #endif
  49. mpd_uint_t n_inv, umod;
  50. mpd_size_t i;
  51. SETMODULUS(modnum);
  52. n_inv = POWMOD(n, (umod-2));
  53. if (ispower2(n)) {
  54. if (n > SIX_STEP_THRESHOLD) {
  55. fnt = six_step_fnt;
  56. inv_fnt = inv_six_step_fnt;
  57. }
  58. else {
  59. fnt = std_fnt;
  60. inv_fnt = std_inv_fnt;
  61. }
  62. }
  63. else {
  64. fnt = four_step_fnt;
  65. inv_fnt = inv_four_step_fnt;
  66. }
  67. if (!fnt(c1, n, modnum)) {
  68. return 0;
  69. }
  70. if (!fnt(c2, n, modnum)) {
  71. return 0;
  72. }
  73. for (i = 0; i < n-1; i += 2) {
  74. mpd_uint_t x0 = c1[i];
  75. mpd_uint_t y0 = c2[i];
  76. mpd_uint_t x1 = c1[i+1];
  77. mpd_uint_t y1 = c2[i+1];
  78. MULMOD2(&x0, y0, &x1, y1);
  79. c1[i] = x0;
  80. c1[i+1] = x1;
  81. }
  82. if (!inv_fnt(c1, n, modnum)) {
  83. return 0;
  84. }
  85. for (i = 0; i < n-3; i += 4) {
  86. mpd_uint_t x0 = c1[i];
  87. mpd_uint_t x1 = c1[i+1];
  88. mpd_uint_t x2 = c1[i+2];
  89. mpd_uint_t x3 = c1[i+3];
  90. MULMOD2C(&x0, &x1, n_inv);
  91. MULMOD2C(&x2, &x3, n_inv);
  92. c1[i] = x0;
  93. c1[i+1] = x1;
  94. c1[i+2] = x2;
  95. c1[i+3] = x3;
  96. }
  97. return 1;
  98. }
  99. /* Autoconvolute the data in c1. Result is in c1. */
  100. int
  101. fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
  102. {
  103. int (*fnt)(mpd_uint_t *, mpd_size_t, int);
  104. int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
  105. #ifdef PPRO
  106. double dmod;
  107. uint32_t dinvmod[3];
  108. #endif
  109. mpd_uint_t n_inv, umod;
  110. mpd_size_t i;
  111. SETMODULUS(modnum);
  112. n_inv = POWMOD(n, (umod-2));
  113. if (ispower2(n)) {
  114. if (n > SIX_STEP_THRESHOLD) {
  115. fnt = six_step_fnt;
  116. inv_fnt = inv_six_step_fnt;
  117. }
  118. else {
  119. fnt = std_fnt;
  120. inv_fnt = std_inv_fnt;
  121. }
  122. }
  123. else {
  124. fnt = four_step_fnt;
  125. inv_fnt = inv_four_step_fnt;
  126. }
  127. if (!fnt(c1, n, modnum)) {
  128. return 0;
  129. }
  130. for (i = 0; i < n-1; i += 2) {
  131. mpd_uint_t x0 = c1[i];
  132. mpd_uint_t x1 = c1[i+1];
  133. MULMOD2(&x0, x0, &x1, x1);
  134. c1[i] = x0;
  135. c1[i+1] = x1;
  136. }
  137. if (!inv_fnt(c1, n, modnum)) {
  138. return 0;
  139. }
  140. for (i = 0; i < n-3; i += 4) {
  141. mpd_uint_t x0 = c1[i];
  142. mpd_uint_t x1 = c1[i+1];
  143. mpd_uint_t x2 = c1[i+2];
  144. mpd_uint_t x3 = c1[i+3];
  145. MULMOD2C(&x0, &x1, n_inv);
  146. MULMOD2C(&x2, &x3, n_inv);
  147. c1[i] = x0;
  148. c1[i+1] = x1;
  149. c1[i+2] = x2;
  150. c1[i+3] = x3;
  151. }
  152. return 1;
  153. }