x86-mont-win32n.asm 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. %ifdef BORINGSSL_PREFIX
  4. %include "boringssl_prefix_symbols_nasm.inc"
  5. %endif
  6. %ifidn __OUTPUT_FORMAT__,obj
  7. section code use32 class=code align=64
  8. %elifidn __OUTPUT_FORMAT__,win32
  9. [email protected] equ 1
  10. section .text code align=64
  11. %else
  12. section .text code
  13. %endif
  14. ;extern _GFp_ia32cap_P
  15. global _GFp_bn_mul_mont
  16. align 16
  17. _GFp_bn_mul_mont:
  18. L$_GFp_bn_mul_mont_begin:
  19. push ebp
  20. push ebx
  21. push esi
  22. push edi
  23. xor eax,eax
  24. mov edi,DWORD [40+esp]
  25. lea esi,[20+esp]
  26. lea edx,[24+esp]
  27. add edi,2
  28. neg edi
  29. lea ebp,[edi*4+esp-32]
  30. neg edi
  31. mov eax,ebp
  32. sub eax,edx
  33. and eax,2047
  34. sub ebp,eax
  35. xor edx,ebp
  36. and edx,2048
  37. xor edx,2048
  38. sub ebp,edx
  39. and ebp,-64
  40. mov eax,esp
  41. sub eax,ebp
  42. and eax,-4096
  43. mov edx,esp
  44. lea esp,[eax*1+ebp]
  45. mov eax,DWORD [esp]
  46. cmp esp,ebp
  47. ja NEAR L$000page_walk
  48. jmp NEAR L$001page_walk_done
  49. align 16
  50. L$000page_walk:
  51. lea esp,[esp-4096]
  52. mov eax,DWORD [esp]
  53. cmp esp,ebp
  54. ja NEAR L$000page_walk
  55. L$001page_walk_done:
  56. mov eax,DWORD [esi]
  57. mov ebx,DWORD [4+esi]
  58. mov ecx,DWORD [8+esi]
  59. mov ebp,DWORD [12+esi]
  60. mov esi,DWORD [16+esi]
  61. mov esi,DWORD [esi]
  62. mov DWORD [4+esp],eax
  63. mov DWORD [8+esp],ebx
  64. mov DWORD [12+esp],ecx
  65. mov DWORD [16+esp],ebp
  66. mov DWORD [20+esp],esi
  67. lea ebx,[edi-3]
  68. mov DWORD [24+esp],edx
  69. lea eax,[_GFp_ia32cap_P]
  70. bt DWORD [eax],26
  71. mov eax,-1
  72. movd mm7,eax
  73. mov esi,DWORD [8+esp]
  74. mov edi,DWORD [12+esp]
  75. mov ebp,DWORD [16+esp]
  76. xor edx,edx
  77. xor ecx,ecx
  78. movd mm4,DWORD [edi]
  79. movd mm5,DWORD [esi]
  80. movd mm3,DWORD [ebp]
  81. pmuludq mm5,mm4
  82. movq mm2,mm5
  83. movq mm0,mm5
  84. pand mm0,mm7
  85. pmuludq mm5,[20+esp]
  86. pmuludq mm3,mm5
  87. paddq mm3,mm0
  88. movd mm1,DWORD [4+ebp]
  89. movd mm0,DWORD [4+esi]
  90. psrlq mm2,32
  91. psrlq mm3,32
  92. inc ecx
  93. align 16
  94. L$0021st:
  95. pmuludq mm0,mm4
  96. pmuludq mm1,mm5
  97. paddq mm2,mm0
  98. paddq mm3,mm1
  99. movq mm0,mm2
  100. pand mm0,mm7
  101. movd mm1,DWORD [4+ecx*4+ebp]
  102. paddq mm3,mm0
  103. movd mm0,DWORD [4+ecx*4+esi]
  104. psrlq mm2,32
  105. movd DWORD [28+ecx*4+esp],mm3
  106. psrlq mm3,32
  107. lea ecx,[1+ecx]
  108. cmp ecx,ebx
  109. jl NEAR L$0021st
  110. pmuludq mm0,mm4
  111. pmuludq mm1,mm5
  112. paddq mm2,mm0
  113. paddq mm3,mm1
  114. movq mm0,mm2
  115. pand mm0,mm7
  116. paddq mm3,mm0
  117. movd DWORD [28+ecx*4+esp],mm3
  118. psrlq mm2,32
  119. psrlq mm3,32
  120. paddq mm3,mm2
  121. movq [32+ebx*4+esp],mm3
  122. inc edx
  123. L$003outer:
  124. xor ecx,ecx
  125. movd mm4,DWORD [edx*4+edi]
  126. movd mm5,DWORD [esi]
  127. movd mm6,DWORD [32+esp]
  128. movd mm3,DWORD [ebp]
  129. pmuludq mm5,mm4
  130. paddq mm5,mm6
  131. movq mm0,mm5
  132. movq mm2,mm5
  133. pand mm0,mm7
  134. pmuludq mm5,[20+esp]
  135. pmuludq mm3,mm5
  136. paddq mm3,mm0
  137. movd mm6,DWORD [36+esp]
  138. movd mm1,DWORD [4+ebp]
  139. movd mm0,DWORD [4+esi]
  140. psrlq mm2,32
  141. psrlq mm3,32
  142. paddq mm2,mm6
  143. inc ecx
  144. dec ebx
  145. L$004inner:
  146. pmuludq mm0,mm4
  147. pmuludq mm1,mm5
  148. paddq mm2,mm0
  149. paddq mm3,mm1
  150. movq mm0,mm2
  151. movd mm6,DWORD [36+ecx*4+esp]
  152. pand mm0,mm7
  153. movd mm1,DWORD [4+ecx*4+ebp]
  154. paddq mm3,mm0
  155. movd mm0,DWORD [4+ecx*4+esi]
  156. psrlq mm2,32
  157. movd DWORD [28+ecx*4+esp],mm3
  158. psrlq mm3,32
  159. paddq mm2,mm6
  160. dec ebx
  161. lea ecx,[1+ecx]
  162. jnz NEAR L$004inner
  163. mov ebx,ecx
  164. pmuludq mm0,mm4
  165. pmuludq mm1,mm5
  166. paddq mm2,mm0
  167. paddq mm3,mm1
  168. movq mm0,mm2
  169. pand mm0,mm7
  170. paddq mm3,mm0
  171. movd DWORD [28+ecx*4+esp],mm3
  172. psrlq mm2,32
  173. psrlq mm3,32
  174. movd mm6,DWORD [36+ebx*4+esp]
  175. paddq mm3,mm2
  176. paddq mm3,mm6
  177. movq [32+ebx*4+esp],mm3
  178. lea edx,[1+edx]
  179. cmp edx,ebx
  180. jle NEAR L$003outer
  181. emms
  182. align 16
  183. L$005common_tail:
  184. mov ebp,DWORD [16+esp]
  185. mov edi,DWORD [4+esp]
  186. lea esi,[32+esp]
  187. mov eax,DWORD [esi]
  188. mov ecx,ebx
  189. xor edx,edx
  190. align 16
  191. L$006sub:
  192. sbb eax,DWORD [edx*4+ebp]
  193. mov DWORD [edx*4+edi],eax
  194. dec ecx
  195. mov eax,DWORD [4+edx*4+esi]
  196. lea edx,[1+edx]
  197. jge NEAR L$006sub
  198. sbb eax,0
  199. mov edx,-1
  200. xor edx,eax
  201. jmp NEAR L$007copy
  202. align 16
  203. L$007copy:
  204. mov esi,DWORD [32+ebx*4+esp]
  205. mov ebp,DWORD [ebx*4+edi]
  206. mov DWORD [32+ebx*4+esp],ecx
  207. and esi,eax
  208. and ebp,edx
  209. or ebp,esi
  210. mov DWORD [ebx*4+edi],ebp
  211. dec ebx
  212. jge NEAR L$007copy
  213. mov esp,DWORD [24+esp]
  214. mov eax,1
  215. pop edi
  216. pop esi
  217. pop ebx
  218. pop ebp
  219. ret
  220. db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  221. db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
  222. db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  223. db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  224. db 111,114,103,62,0
  225. segment .bss
  226. common _GFp_ia32cap_P 16