sha256-x86_64-nasm.asm 60 KB


  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. section .text code align=64
  8. EXTERN GFp_ia32cap_P
  9. global GFp_sha256_block_data_order
  10. ALIGN 16
  11. GFp_sha256_block_data_order:
  12. mov QWORD[8+rsp],rdi ;WIN64 prologue
  13. mov QWORD[16+rsp],rsi
  14. mov rax,rsp
  15. $L$SEH_begin_GFp_sha256_block_data_order:
  16. mov rdi,rcx
  17. mov rsi,rdx
  18. mov rdx,r8
  19. lea r11,[GFp_ia32cap_P]
  20. mov r9d,DWORD[r11]
  21. mov r10d,DWORD[4+r11]
  22. mov r11d,DWORD[8+r11]
  23. and r9d,1073741824
  24. and r10d,268435968
  25. or r10d,r9d
  26. cmp r10d,1342177792
  27. je NEAR $L$avx_shortcut
  28. test r10d,512
  29. jnz NEAR $L$ssse3_shortcut
  30. mov rax,rsp
  31. push rbx
  32. push rbp
  33. push r12
  34. push r13
  35. push r14
  36. push r15
  37. shl rdx,4
  38. sub rsp,16*4+4*8
  39. lea rdx,[rdx*4+rsi]
  40. and rsp,-64
  41. mov QWORD[((64+0))+rsp],rdi
  42. mov QWORD[((64+8))+rsp],rsi
  43. mov QWORD[((64+16))+rsp],rdx
  44. mov QWORD[88+rsp],rax
  45. $L$prologue:
  46. mov eax,DWORD[rdi]
  47. mov ebx,DWORD[4+rdi]
  48. mov ecx,DWORD[8+rdi]
  49. mov edx,DWORD[12+rdi]
  50. mov r8d,DWORD[16+rdi]
  51. mov r9d,DWORD[20+rdi]
  52. mov r10d,DWORD[24+rdi]
  53. mov r11d,DWORD[28+rdi]
  54. jmp NEAR $L$loop
  55. ALIGN 16
  56. $L$loop:
  57. mov edi,ebx
  58. lea rbp,[K256]
  59. xor edi,ecx
  60. mov r12d,DWORD[rsi]
  61. mov r13d,r8d
  62. mov r14d,eax
  63. bswap r12d
  64. ror r13d,14
  65. mov r15d,r9d
  66. xor r13d,r8d
  67. ror r14d,9
  68. xor r15d,r10d
  69. mov DWORD[rsp],r12d
  70. xor r14d,eax
  71. and r15d,r8d
  72. ror r13d,5
  73. add r12d,r11d
  74. xor r15d,r10d
  75. ror r14d,11
  76. xor r13d,r8d
  77. add r12d,r15d
  78. mov r15d,eax
  79. add r12d,DWORD[rbp]
  80. xor r14d,eax
  81. xor r15d,ebx
  82. ror r13d,6
  83. mov r11d,ebx
  84. and edi,r15d
  85. ror r14d,2
  86. add r12d,r13d
  87. xor r11d,edi
  88. add edx,r12d
  89. add r11d,r12d
  90. lea rbp,[4+rbp]
  91. add r11d,r14d
  92. mov r12d,DWORD[4+rsi]
  93. mov r13d,edx
  94. mov r14d,r11d
  95. bswap r12d
  96. ror r13d,14
  97. mov edi,r8d
  98. xor r13d,edx
  99. ror r14d,9
  100. xor edi,r9d
  101. mov DWORD[4+rsp],r12d
  102. xor r14d,r11d
  103. and edi,edx
  104. ror r13d,5
  105. add r12d,r10d
  106. xor edi,r9d
  107. ror r14d,11
  108. xor r13d,edx
  109. add r12d,edi
  110. mov edi,r11d
  111. add r12d,DWORD[rbp]
  112. xor r14d,r11d
  113. xor edi,eax
  114. ror r13d,6
  115. mov r10d,eax
  116. and r15d,edi
  117. ror r14d,2
  118. add r12d,r13d
  119. xor r10d,r15d
  120. add ecx,r12d
  121. add r10d,r12d
  122. lea rbp,[4+rbp]
  123. add r10d,r14d
  124. mov r12d,DWORD[8+rsi]
  125. mov r13d,ecx
  126. mov r14d,r10d
  127. bswap r12d
  128. ror r13d,14
  129. mov r15d,edx
  130. xor r13d,ecx
  131. ror r14d,9
  132. xor r15d,r8d
  133. mov DWORD[8+rsp],r12d
  134. xor r14d,r10d
  135. and r15d,ecx
  136. ror r13d,5
  137. add r12d,r9d
  138. xor r15d,r8d
  139. ror r14d,11
  140. xor r13d,ecx
  141. add r12d,r15d
  142. mov r15d,r10d
  143. add r12d,DWORD[rbp]
  144. xor r14d,r10d
  145. xor r15d,r11d
  146. ror r13d,6
  147. mov r9d,r11d
  148. and edi,r15d
  149. ror r14d,2
  150. add r12d,r13d
  151. xor r9d,edi
  152. add ebx,r12d
  153. add r9d,r12d
  154. lea rbp,[4+rbp]
  155. add r9d,r14d
  156. mov r12d,DWORD[12+rsi]
  157. mov r13d,ebx
  158. mov r14d,r9d
  159. bswap r12d
  160. ror r13d,14
  161. mov edi,ecx
  162. xor r13d,ebx
  163. ror r14d,9
  164. xor edi,edx
  165. mov DWORD[12+rsp],r12d
  166. xor r14d,r9d
  167. and edi,ebx
  168. ror r13d,5
  169. add r12d,r8d
  170. xor edi,edx
  171. ror r14d,11
  172. xor r13d,ebx
  173. add r12d,edi
  174. mov edi,r9d
  175. add r12d,DWORD[rbp]
  176. xor r14d,r9d
  177. xor edi,r10d
  178. ror r13d,6
  179. mov r8d,r10d
  180. and r15d,edi
  181. ror r14d,2
  182. add r12d,r13d
  183. xor r8d,r15d
  184. add eax,r12d
  185. add r8d,r12d
  186. lea rbp,[20+rbp]
  187. add r8d,r14d
  188. mov r12d,DWORD[16+rsi]
  189. mov r13d,eax
  190. mov r14d,r8d
  191. bswap r12d
  192. ror r13d,14
  193. mov r15d,ebx
  194. xor r13d,eax
  195. ror r14d,9
  196. xor r15d,ecx
  197. mov DWORD[16+rsp],r12d
  198. xor r14d,r8d
  199. and r15d,eax
  200. ror r13d,5
  201. add r12d,edx
  202. xor r15d,ecx
  203. ror r14d,11
  204. xor r13d,eax
  205. add r12d,r15d
  206. mov r15d,r8d
  207. add r12d,DWORD[rbp]
  208. xor r14d,r8d
  209. xor r15d,r9d
  210. ror r13d,6
  211. mov edx,r9d
  212. and edi,r15d
  213. ror r14d,2
  214. add r12d,r13d
  215. xor edx,edi
  216. add r11d,r12d
  217. add edx,r12d
  218. lea rbp,[4+rbp]
  219. add edx,r14d
  220. mov r12d,DWORD[20+rsi]
  221. mov r13d,r11d
  222. mov r14d,edx
  223. bswap r12d
  224. ror r13d,14
  225. mov edi,eax
  226. xor r13d,r11d
  227. ror r14d,9
  228. xor edi,ebx
  229. mov DWORD[20+rsp],r12d
  230. xor r14d,edx
  231. and edi,r11d
  232. ror r13d,5
  233. add r12d,ecx
  234. xor edi,ebx
  235. ror r14d,11
  236. xor r13d,r11d
  237. add r12d,edi
  238. mov edi,edx
  239. add r12d,DWORD[rbp]
  240. xor r14d,edx
  241. xor edi,r8d
  242. ror r13d,6
  243. mov ecx,r8d
  244. and r15d,edi
  245. ror r14d,2
  246. add r12d,r13d
  247. xor ecx,r15d
  248. add r10d,r12d
  249. add ecx,r12d
  250. lea rbp,[4+rbp]
  251. add ecx,r14d
  252. mov r12d,DWORD[24+rsi]
  253. mov r13d,r10d
  254. mov r14d,ecx
  255. bswap r12d
  256. ror r13d,14
  257. mov r15d,r11d
  258. xor r13d,r10d
  259. ror r14d,9
  260. xor r15d,eax
  261. mov DWORD[24+rsp],r12d
  262. xor r14d,ecx
  263. and r15d,r10d
  264. ror r13d,5
  265. add r12d,ebx
  266. xor r15d,eax
  267. ror r14d,11
  268. xor r13d,r10d
  269. add r12d,r15d
  270. mov r15d,ecx
  271. add r12d,DWORD[rbp]
  272. xor r14d,ecx
  273. xor r15d,edx
  274. ror r13d,6
  275. mov ebx,edx
  276. and edi,r15d
  277. ror r14d,2
  278. add r12d,r13d
  279. xor ebx,edi
  280. add r9d,r12d
  281. add ebx,r12d
  282. lea rbp,[4+rbp]
  283. add ebx,r14d
  284. mov r12d,DWORD[28+rsi]
  285. mov r13d,r9d
  286. mov r14d,ebx
  287. bswap r12d
  288. ror r13d,14
  289. mov edi,r10d
  290. xor r13d,r9d
  291. ror r14d,9
  292. xor edi,r11d
  293. mov DWORD[28+rsp],r12d
  294. xor r14d,ebx
  295. and edi,r9d
  296. ror r13d,5
  297. add r12d,eax
  298. xor edi,r11d
  299. ror r14d,11
  300. xor r13d,r9d
  301. add r12d,edi
  302. mov edi,ebx
  303. add r12d,DWORD[rbp]
  304. xor r14d,ebx
  305. xor edi,ecx
  306. ror r13d,6
  307. mov eax,ecx
  308. and r15d,edi
  309. ror r14d,2
  310. add r12d,r13d
  311. xor eax,r15d
  312. add r8d,r12d
  313. add eax,r12d
  314. lea rbp,[20+rbp]
  315. add eax,r14d
  316. mov r12d,DWORD[32+rsi]
  317. mov r13d,r8d
  318. mov r14d,eax
  319. bswap r12d
  320. ror r13d,14
  321. mov r15d,r9d
  322. xor r13d,r8d
  323. ror r14d,9
  324. xor r15d,r10d
  325. mov DWORD[32+rsp],r12d
  326. xor r14d,eax
  327. and r15d,r8d
  328. ror r13d,5
  329. add r12d,r11d
  330. xor r15d,r10d
  331. ror r14d,11
  332. xor r13d,r8d
  333. add r12d,r15d
  334. mov r15d,eax
  335. add r12d,DWORD[rbp]
  336. xor r14d,eax
  337. xor r15d,ebx
  338. ror r13d,6
  339. mov r11d,ebx
  340. and edi,r15d
  341. ror r14d,2
  342. add r12d,r13d
  343. xor r11d,edi
  344. add edx,r12d
  345. add r11d,r12d
  346. lea rbp,[4+rbp]
  347. add r11d,r14d
  348. mov r12d,DWORD[36+rsi]
  349. mov r13d,edx
  350. mov r14d,r11d
  351. bswap r12d
  352. ror r13d,14
  353. mov edi,r8d
  354. xor r13d,edx
  355. ror r14d,9
  356. xor edi,r9d
  357. mov DWORD[36+rsp],r12d
  358. xor r14d,r11d
  359. and edi,edx
  360. ror r13d,5
  361. add r12d,r10d
  362. xor edi,r9d
  363. ror r14d,11
  364. xor r13d,edx
  365. add r12d,edi
  366. mov edi,r11d
  367. add r12d,DWORD[rbp]
  368. xor r14d,r11d
  369. xor edi,eax
  370. ror r13d,6
  371. mov r10d,eax
  372. and r15d,edi
  373. ror r14d,2
  374. add r12d,r13d
  375. xor r10d,r15d
  376. add ecx,r12d
  377. add r10d,r12d
  378. lea rbp,[4+rbp]
  379. add r10d,r14d
  380. mov r12d,DWORD[40+rsi]
  381. mov r13d,ecx
  382. mov r14d,r10d
  383. bswap r12d
  384. ror r13d,14
  385. mov r15d,edx
  386. xor r13d,ecx
  387. ror r14d,9
  388. xor r15d,r8d
  389. mov DWORD[40+rsp],r12d
  390. xor r14d,r10d
  391. and r15d,ecx
  392. ror r13d,5
  393. add r12d,r9d
  394. xor r15d,r8d
  395. ror r14d,11
  396. xor r13d,ecx
  397. add r12d,r15d
  398. mov r15d,r10d
  399. add r12d,DWORD[rbp]
  400. xor r14d,r10d
  401. xor r15d,r11d
  402. ror r13d,6
  403. mov r9d,r11d
  404. and edi,r15d
  405. ror r14d,2
  406. add r12d,r13d
  407. xor r9d,edi
  408. add ebx,r12d
  409. add r9d,r12d
  410. lea rbp,[4+rbp]
  411. add r9d,r14d
  412. mov r12d,DWORD[44+rsi]
  413. mov r13d,ebx
  414. mov r14d,r9d
  415. bswap r12d
  416. ror r13d,14
  417. mov edi,ecx
  418. xor r13d,ebx
  419. ror r14d,9
  420. xor edi,edx
  421. mov DWORD[44+rsp],r12d
  422. xor r14d,r9d
  423. and edi,ebx
  424. ror r13d,5
  425. add r12d,r8d
  426. xor edi,edx
  427. ror r14d,11
  428. xor r13d,ebx
  429. add r12d,edi
  430. mov edi,r9d
  431. add r12d,DWORD[rbp]
  432. xor r14d,r9d
  433. xor edi,r10d
  434. ror r13d,6
  435. mov r8d,r10d
  436. and r15d,edi
  437. ror r14d,2
  438. add r12d,r13d
  439. xor r8d,r15d
  440. add eax,r12d
  441. add r8d,r12d
  442. lea rbp,[20+rbp]
  443. add r8d,r14d
  444. mov r12d,DWORD[48+rsi]
  445. mov r13d,eax
  446. mov r14d,r8d
  447. bswap r12d
  448. ror r13d,14
  449. mov r15d,ebx
  450. xor r13d,eax
  451. ror r14d,9
  452. xor r15d,ecx
  453. mov DWORD[48+rsp],r12d
  454. xor r14d,r8d
  455. and r15d,eax
  456. ror r13d,5
  457. add r12d,edx
  458. xor r15d,ecx
  459. ror r14d,11
  460. xor r13d,eax
  461. add r12d,r15d
  462. mov r15d,r8d
  463. add r12d,DWORD[rbp]
  464. xor r14d,r8d
  465. xor r15d,r9d
  466. ror r13d,6
  467. mov edx,r9d
  468. and edi,r15d
  469. ror r14d,2
  470. add r12d,r13d
  471. xor edx,edi
  472. add r11d,r12d
  473. add edx,r12d
  474. lea rbp,[4+rbp]
  475. add edx,r14d
  476. mov r12d,DWORD[52+rsi]
  477. mov r13d,r11d
  478. mov r14d,edx
  479. bswap r12d
  480. ror r13d,14
  481. mov edi,eax
  482. xor r13d,r11d
  483. ror r14d,9
  484. xor edi,ebx
  485. mov DWORD[52+rsp],r12d
  486. xor r14d,edx
  487. and edi,r11d
  488. ror r13d,5
  489. add r12d,ecx
  490. xor edi,ebx
  491. ror r14d,11
  492. xor r13d,r11d
  493. add r12d,edi
  494. mov edi,edx
  495. add r12d,DWORD[rbp]
  496. xor r14d,edx
  497. xor edi,r8d
  498. ror r13d,6
  499. mov ecx,r8d
  500. and r15d,edi
  501. ror r14d,2
  502. add r12d,r13d
  503. xor ecx,r15d
  504. add r10d,r12d
  505. add ecx,r12d
  506. lea rbp,[4+rbp]
  507. add ecx,r14d
  508. mov r12d,DWORD[56+rsi]
  509. mov r13d,r10d
  510. mov r14d,ecx
  511. bswap r12d
  512. ror r13d,14
  513. mov r15d,r11d
  514. xor r13d,r10d
  515. ror r14d,9
  516. xor r15d,eax
  517. mov DWORD[56+rsp],r12d
  518. xor r14d,ecx
  519. and r15d,r10d
  520. ror r13d,5
  521. add r12d,ebx
  522. xor r15d,eax
  523. ror r14d,11
  524. xor r13d,r10d
  525. add r12d,r15d
  526. mov r15d,ecx
  527. add r12d,DWORD[rbp]
  528. xor r14d,ecx
  529. xor r15d,edx
  530. ror r13d,6
  531. mov ebx,edx
  532. and edi,r15d
  533. ror r14d,2
  534. add r12d,r13d
  535. xor ebx,edi
  536. add r9d,r12d
  537. add ebx,r12d
  538. lea rbp,[4+rbp]
  539. add ebx,r14d
  540. mov r12d,DWORD[60+rsi]
  541. mov r13d,r9d
  542. mov r14d,ebx
  543. bswap r12d
  544. ror r13d,14
  545. mov edi,r10d
  546. xor r13d,r9d
  547. ror r14d,9
  548. xor edi,r11d
  549. mov DWORD[60+rsp],r12d
  550. xor r14d,ebx
  551. and edi,r9d
  552. ror r13d,5
  553. add r12d,eax
  554. xor edi,r11d
  555. ror r14d,11
  556. xor r13d,r9d
  557. add r12d,edi
  558. mov edi,ebx
  559. add r12d,DWORD[rbp]
  560. xor r14d,ebx
  561. xor edi,ecx
  562. ror r13d,6
  563. mov eax,ecx
  564. and r15d,edi
  565. ror r14d,2
  566. add r12d,r13d
  567. xor eax,r15d
  568. add r8d,r12d
  569. add eax,r12d
  570. lea rbp,[20+rbp]
  571. jmp NEAR $L$rounds_16_xx
  572. ALIGN 16
  573. $L$rounds_16_xx:
  574. mov r13d,DWORD[4+rsp]
  575. mov r15d,DWORD[56+rsp]
  576. mov r12d,r13d
  577. ror r13d,11
  578. add eax,r14d
  579. mov r14d,r15d
  580. ror r15d,2
  581. xor r13d,r12d
  582. shr r12d,3
  583. ror r13d,7
  584. xor r15d,r14d
  585. shr r14d,10
  586. ror r15d,17
  587. xor r12d,r13d
  588. xor r15d,r14d
  589. add r12d,DWORD[36+rsp]
  590. add r12d,DWORD[rsp]
  591. mov r13d,r8d
  592. add r12d,r15d
  593. mov r14d,eax
  594. ror r13d,14
  595. mov r15d,r9d
  596. xor r13d,r8d
  597. ror r14d,9
  598. xor r15d,r10d
  599. mov DWORD[rsp],r12d
  600. xor r14d,eax
  601. and r15d,r8d
  602. ror r13d,5
  603. add r12d,r11d
  604. xor r15d,r10d
  605. ror r14d,11
  606. xor r13d,r8d
  607. add r12d,r15d
  608. mov r15d,eax
  609. add r12d,DWORD[rbp]
  610. xor r14d,eax
  611. xor r15d,ebx
  612. ror r13d,6
  613. mov r11d,ebx
  614. and edi,r15d
  615. ror r14d,2
  616. add r12d,r13d
  617. xor r11d,edi
  618. add edx,r12d
  619. add r11d,r12d
  620. lea rbp,[4+rbp]
  621. mov r13d,DWORD[8+rsp]
  622. mov edi,DWORD[60+rsp]
  623. mov r12d,r13d
  624. ror r13d,11
  625. add r11d,r14d
  626. mov r14d,edi
  627. ror edi,2
  628. xor r13d,r12d
  629. shr r12d,3
  630. ror r13d,7
  631. xor edi,r14d
  632. shr r14d,10
  633. ror edi,17
  634. xor r12d,r13d
  635. xor edi,r14d
  636. add r12d,DWORD[40+rsp]
  637. add r12d,DWORD[4+rsp]
  638. mov r13d,edx
  639. add r12d,edi
  640. mov r14d,r11d
  641. ror r13d,14
  642. mov edi,r8d
  643. xor r13d,edx
  644. ror r14d,9
  645. xor edi,r9d
  646. mov DWORD[4+rsp],r12d
  647. xor r14d,r11d
  648. and edi,edx
  649. ror r13d,5
  650. add r12d,r10d
  651. xor edi,r9d
  652. ror r14d,11
  653. xor r13d,edx
  654. add r12d,edi
  655. mov edi,r11d
  656. add r12d,DWORD[rbp]
  657. xor r14d,r11d
  658. xor edi,eax
  659. ror r13d,6
  660. mov r10d,eax
  661. and r15d,edi
  662. ror r14d,2
  663. add r12d,r13d
  664. xor r10d,r15d
  665. add ecx,r12d
  666. add r10d,r12d
  667. lea rbp,[4+rbp]
  668. mov r13d,DWORD[12+rsp]
  669. mov r15d,DWORD[rsp]
  670. mov r12d,r13d
  671. ror r13d,11
  672. add r10d,r14d
  673. mov r14d,r15d
  674. ror r15d,2
  675. xor r13d,r12d
  676. shr r12d,3
  677. ror r13d,7
  678. xor r15d,r14d
  679. shr r14d,10
  680. ror r15d,17
  681. xor r12d,r13d
  682. xor r15d,r14d
  683. add r12d,DWORD[44+rsp]
  684. add r12d,DWORD[8+rsp]
  685. mov r13d,ecx
  686. add r12d,r15d
  687. mov r14d,r10d
  688. ror r13d,14
  689. mov r15d,edx
  690. xor r13d,ecx
  691. ror r14d,9
  692. xor r15d,r8d
  693. mov DWORD[8+rsp],r12d
  694. xor r14d,r10d
  695. and r15d,ecx
  696. ror r13d,5
  697. add r12d,r9d
  698. xor r15d,r8d
  699. ror r14d,11
  700. xor r13d,ecx
  701. add r12d,r15d
  702. mov r15d,r10d
  703. add r12d,DWORD[rbp]
  704. xor r14d,r10d
  705. xor r15d,r11d
  706. ror r13d,6
  707. mov r9d,r11d
  708. and edi,r15d
  709. ror r14d,2
  710. add r12d,r13d
  711. xor r9d,edi
  712. add ebx,r12d
  713. add r9d,r12d
  714. lea rbp,[4+rbp]
  715. mov r13d,DWORD[16+rsp]
  716. mov edi,DWORD[4+rsp]
  717. mov r12d,r13d
  718. ror r13d,11
  719. add r9d,r14d
  720. mov r14d,edi
  721. ror edi,2
  722. xor r13d,r12d
  723. shr r12d,3
  724. ror r13d,7
  725. xor edi,r14d
  726. shr r14d,10
  727. ror edi,17
  728. xor r12d,r13d
  729. xor edi,r14d
  730. add r12d,DWORD[48+rsp]
  731. add r12d,DWORD[12+rsp]
  732. mov r13d,ebx
  733. add r12d,edi
  734. mov r14d,r9d
  735. ror r13d,14
  736. mov edi,ecx
  737. xor r13d,ebx
  738. ror r14d,9
  739. xor edi,edx
  740. mov DWORD[12+rsp],r12d
  741. xor r14d,r9d
  742. and edi,ebx
  743. ror r13d,5
  744. add r12d,r8d
  745. xor edi,edx
  746. ror r14d,11
  747. xor r13d,ebx
  748. add r12d,edi
  749. mov edi,r9d
  750. add r12d,DWORD[rbp]
  751. xor r14d,r9d
  752. xor edi,r10d
  753. ror r13d,6
  754. mov r8d,r10d
  755. and r15d,edi
  756. ror r14d,2
  757. add r12d,r13d
  758. xor r8d,r15d
  759. add eax,r12d
  760. add r8d,r12d
  761. lea rbp,[20+rbp]
  762. mov r13d,DWORD[20+rsp]
  763. mov r15d,DWORD[8+rsp]
  764. mov r12d,r13d
  765. ror r13d,11
  766. add r8d,r14d
  767. mov r14d,r15d
  768. ror r15d,2
  769. xor r13d,r12d
  770. shr r12d,3
  771. ror r13d,7
  772. xor r15d,r14d
  773. shr r14d,10
  774. ror r15d,17
  775. xor r12d,r13d
  776. xor r15d,r14d
  777. add r12d,DWORD[52+rsp]
  778. add r12d,DWORD[16+rsp]
  779. mov r13d,eax
  780. add r12d,r15d
  781. mov r14d,r8d
  782. ror r13d,14
  783. mov r15d,ebx
  784. xor r13d,eax
  785. ror r14d,9
  786. xor r15d,ecx
  787. mov DWORD[16+rsp],r12d
  788. xor r14d,r8d
  789. and r15d,eax
  790. ror r13d,5
  791. add r12d,edx
  792. xor r15d,ecx
  793. ror r14d,11
  794. xor r13d,eax
  795. add r12d,r15d
  796. mov r15d,r8d
  797. add r12d,DWORD[rbp]
  798. xor r14d,r8d
  799. xor r15d,r9d
  800. ror r13d,6
  801. mov edx,r9d
  802. and edi,r15d
  803. ror r14d,2
  804. add r12d,r13d
  805. xor edx,edi
  806. add r11d,r12d
  807. add edx,r12d
  808. lea rbp,[4+rbp]
  809. mov r13d,DWORD[24+rsp]
  810. mov edi,DWORD[12+rsp]
  811. mov r12d,r13d
  812. ror r13d,11
  813. add edx,r14d
  814. mov r14d,edi
  815. ror edi,2
  816. xor r13d,r12d
  817. shr r12d,3
  818. ror r13d,7
  819. xor edi,r14d
  820. shr r14d,10
  821. ror edi,17
  822. xor r12d,r13d
  823. xor edi,r14d
  824. add r12d,DWORD[56+rsp]
  825. add r12d,DWORD[20+rsp]
  826. mov r13d,r11d
  827. add r12d,edi
  828. mov r14d,edx
  829. ror r13d,14
  830. mov edi,eax
  831. xor r13d,r11d
  832. ror r14d,9
  833. xor edi,ebx
  834. mov DWORD[20+rsp],r12d
  835. xor r14d,edx
  836. and edi,r11d
  837. ror r13d,5
  838. add r12d,ecx
  839. xor edi,ebx
  840. ror r14d,11
  841. xor r13d,r11d
  842. add r12d,edi
  843. mov edi,edx
  844. add r12d,DWORD[rbp]
  845. xor r14d,edx
  846. xor edi,r8d
  847. ror r13d,6
  848. mov ecx,r8d
  849. and r15d,edi
  850. ror r14d,2
  851. add r12d,r13d
  852. xor ecx,r15d
  853. add r10d,r12d
  854. add ecx,r12d
  855. lea rbp,[4+rbp]
  856. mov r13d,DWORD[28+rsp]
  857. mov r15d,DWORD[16+rsp]
  858. mov r12d,r13d
  859. ror r13d,11
  860. add ecx,r14d
  861. mov r14d,r15d
  862. ror r15d,2
  863. xor r13d,r12d
  864. shr r12d,3
  865. ror r13d,7
  866. xor r15d,r14d
  867. shr r14d,10
  868. ror r15d,17
  869. xor r12d,r13d
  870. xor r15d,r14d
  871. add r12d,DWORD[60+rsp]
  872. add r12d,DWORD[24+rsp]
  873. mov r13d,r10d
  874. add r12d,r15d
  875. mov r14d,ecx
  876. ror r13d,14
  877. mov r15d,r11d
  878. xor r13d,r10d
  879. ror r14d,9
  880. xor r15d,eax
  881. mov DWORD[24+rsp],r12d
  882. xor r14d,ecx
  883. and r15d,r10d
  884. ror r13d,5
  885. add r12d,ebx
  886. xor r15d,eax
  887. ror r14d,11
  888. xor r13d,r10d
  889. add r12d,r15d
  890. mov r15d,ecx
  891. add r12d,DWORD[rbp]
  892. xor r14d,ecx
  893. xor r15d,edx
  894. ror r13d,6
  895. mov ebx,edx
  896. and edi,r15d
  897. ror r14d,2
  898. add r12d,r13d
  899. xor ebx,edi
  900. add r9d,r12d
  901. add ebx,r12d
  902. lea rbp,[4+rbp]
  903. mov r13d,DWORD[32+rsp]
  904. mov edi,DWORD[20+rsp]
  905. mov r12d,r13d
  906. ror r13d,11
  907. add ebx,r14d
  908. mov r14d,edi
  909. ror edi,2
  910. xor r13d,r12d
  911. shr r12d,3
  912. ror r13d,7
  913. xor edi,r14d
  914. shr r14d,10
  915. ror edi,17
  916. xor r12d,r13d
  917. xor edi,r14d
  918. add r12d,DWORD[rsp]
  919. add r12d,DWORD[28+rsp]
  920. mov r13d,r9d
  921. add r12d,edi
  922. mov r14d,ebx
  923. ror r13d,14
  924. mov edi,r10d
  925. xor r13d,r9d
  926. ror r14d,9
  927. xor edi,r11d
  928. mov DWORD[28+rsp],r12d
  929. xor r14d,ebx
  930. and edi,r9d
  931. ror r13d,5
  932. add r12d,eax
  933. xor edi,r11d
  934. ror r14d,11
  935. xor r13d,r9d
  936. add r12d,edi
  937. mov edi,ebx
  938. add r12d,DWORD[rbp]
  939. xor r14d,ebx
  940. xor edi,ecx
  941. ror r13d,6
  942. mov eax,ecx
  943. and r15d,edi
  944. ror r14d,2
  945. add r12d,r13d
  946. xor eax,r15d
  947. add r8d,r12d
  948. add eax,r12d
  949. lea rbp,[20+rbp]
  950. mov r13d,DWORD[36+rsp]
  951. mov r15d,DWORD[24+rsp]
  952. mov r12d,r13d
  953. ror r13d,11
  954. add eax,r14d
  955. mov r14d,r15d
  956. ror r15d,2
  957. xor r13d,r12d
  958. shr r12d,3
  959. ror r13d,7
  960. xor r15d,r14d
  961. shr r14d,10
  962. ror r15d,17
  963. xor r12d,r13d
  964. xor r15d,r14d
  965. add r12d,DWORD[4+rsp]
  966. add r12d,DWORD[32+rsp]
  967. mov r13d,r8d
  968. add r12d,r15d
  969. mov r14d,eax
  970. ror r13d,14
  971. mov r15d,r9d
  972. xor r13d,r8d
  973. ror r14d,9
  974. xor r15d,r10d
  975. mov DWORD[32+rsp],r12d
  976. xor r14d,eax
  977. and r15d,r8d
  978. ror r13d,5
  979. add r12d,r11d
  980. xor r15d,r10d
  981. ror r14d,11
  982. xor r13d,r8d
  983. add r12d,r15d
  984. mov r15d,eax
  985. add r12d,DWORD[rbp]
  986. xor r14d,eax
  987. xor r15d,ebx
  988. ror r13d,6
  989. mov r11d,ebx
  990. and edi,r15d
  991. ror r14d,2
  992. add r12d,r13d
  993. xor r11d,edi
  994. add edx,r12d
  995. add r11d,r12d
  996. lea rbp,[4+rbp]
  997. mov r13d,DWORD[40+rsp]
  998. mov edi,DWORD[28+rsp]
  999. mov r12d,r13d
  1000. ror r13d,11
  1001. add r11d,r14d
  1002. mov r14d,edi
  1003. ror edi,2
  1004. xor r13d,r12d
  1005. shr r12d,3
  1006. ror r13d,7
  1007. xor edi,r14d
  1008. shr r14d,10
  1009. ror edi,17
  1010. xor r12d,r13d
  1011. xor edi,r14d
  1012. add r12d,DWORD[8+rsp]
  1013. add r12d,DWORD[36+rsp]
  1014. mov r13d,edx
  1015. add r12d,edi
  1016. mov r14d,r11d
  1017. ror r13d,14
  1018. mov edi,r8d
  1019. xor r13d,edx
  1020. ror r14d,9
  1021. xor edi,r9d
  1022. mov DWORD[36+rsp],r12d
  1023. xor r14d,r11d
  1024. and edi,edx
  1025. ror r13d,5
  1026. add r12d,r10d
  1027. xor edi,r9d
  1028. ror r14d,11
  1029. xor r13d,edx
  1030. add r12d,edi
  1031. mov edi,r11d
  1032. add r12d,DWORD[rbp]
  1033. xor r14d,r11d
  1034. xor edi,eax
  1035. ror r13d,6
  1036. mov r10d,eax
  1037. and r15d,edi
  1038. ror r14d,2
  1039. add r12d,r13d
  1040. xor r10d,r15d
  1041. add ecx,r12d
  1042. add r10d,r12d
  1043. lea rbp,[4+rbp]
  1044. mov r13d,DWORD[44+rsp]
  1045. mov r15d,DWORD[32+rsp]
  1046. mov r12d,r13d
  1047. ror r13d,11
  1048. add r10d,r14d
  1049. mov r14d,r15d
  1050. ror r15d,2
  1051. xor r13d,r12d
  1052. shr r12d,3
  1053. ror r13d,7
  1054. xor r15d,r14d
  1055. shr r14d,10
  1056. ror r15d,17
  1057. xor r12d,r13d
  1058. xor r15d,r14d
  1059. add r12d,DWORD[12+rsp]
  1060. add r12d,DWORD[40+rsp]
  1061. mov r13d,ecx
  1062. add r12d,r15d
  1063. mov r14d,r10d
  1064. ror r13d,14
  1065. mov r15d,edx
  1066. xor r13d,ecx
  1067. ror r14d,9
  1068. xor r15d,r8d
  1069. mov DWORD[40+rsp],r12d
  1070. xor r14d,r10d
  1071. and r15d,ecx
  1072. ror r13d,5
  1073. add r12d,r9d
  1074. xor r15d,r8d
  1075. ror r14d,11
  1076. xor r13d,ecx
  1077. add r12d,r15d
  1078. mov r15d,r10d
  1079. add r12d,DWORD[rbp]
  1080. xor r14d,r10d
  1081. xor r15d,r11d
  1082. ror r13d,6
  1083. mov r9d,r11d
  1084. and edi,r15d
  1085. ror r14d,2
  1086. add r12d,r13d
  1087. xor r9d,edi
  1088. add ebx,r12d
  1089. add r9d,r12d
  1090. lea rbp,[4+rbp]
  1091. mov r13d,DWORD[48+rsp]
  1092. mov edi,DWORD[36+rsp]
  1093. mov r12d,r13d
  1094. ror r13d,11
  1095. add r9d,r14d
  1096. mov r14d,edi
  1097. ror edi,2
  1098. xor r13d,r12d
  1099. shr r12d,3
  1100. ror r13d,7
  1101. xor edi,r14d
  1102. shr r14d,10
  1103. ror edi,17
  1104. xor r12d,r13d
  1105. xor edi,r14d
  1106. add r12d,DWORD[16+rsp]
  1107. add r12d,DWORD[44+rsp]
  1108. mov r13d,ebx
  1109. add r12d,edi
  1110. mov r14d,r9d
  1111. ror r13d,14
  1112. mov edi,ecx
  1113. xor r13d,ebx
  1114. ror r14d,9
  1115. xor edi,edx
  1116. mov DWORD[44+rsp],r12d
  1117. xor r14d,r9d
  1118. and edi,ebx
  1119. ror r13d,5
  1120. add r12d,r8d
  1121. xor edi,edx
  1122. ror r14d,11
  1123. xor r13d,ebx
  1124. add r12d,edi
  1125. mov edi,r9d
  1126. add r12d,DWORD[rbp]
  1127. xor r14d,r9d
  1128. xor edi,r10d
  1129. ror r13d,6
  1130. mov r8d,r10d
  1131. and r15d,edi
  1132. ror r14d,2
  1133. add r12d,r13d
  1134. xor r8d,r15d
  1135. add eax,r12d
  1136. add r8d,r12d
  1137. lea rbp,[20+rbp]
  1138. mov r13d,DWORD[52+rsp]
  1139. mov r15d,DWORD[40+rsp]
  1140. mov r12d,r13d
  1141. ror r13d,11
  1142. add r8d,r14d
  1143. mov r14d,r15d
  1144. ror r15d,2
  1145. xor r13d,r12d
  1146. shr r12d,3
  1147. ror r13d,7
  1148. xor r15d,r14d
  1149. shr r14d,10
  1150. ror r15d,17
  1151. xor r12d,r13d
  1152. xor r15d,r14d
  1153. add r12d,DWORD[20+rsp]
  1154. add r12d,DWORD[48+rsp]
  1155. mov r13d,eax
  1156. add r12d,r15d
  1157. mov r14d,r8d
  1158. ror r13d,14
  1159. mov r15d,ebx
  1160. xor r13d,eax
  1161. ror r14d,9
  1162. xor r15d,ecx
  1163. mov DWORD[48+rsp],r12d
  1164. xor r14d,r8d
  1165. and r15d,eax
  1166. ror r13d,5
  1167. add r12d,edx
  1168. xor r15d,ecx
  1169. ror r14d,11
  1170. xor r13d,eax
  1171. add r12d,r15d
  1172. mov r15d,r8d
  1173. add r12d,DWORD[rbp]
  1174. xor r14d,r8d
  1175. xor r15d,r9d
  1176. ror r13d,6
  1177. mov edx,r9d
  1178. and edi,r15d
  1179. ror r14d,2
  1180. add r12d,r13d
  1181. xor edx,edi
  1182. add r11d,r12d
  1183. add edx,r12d
  1184. lea rbp,[4+rbp]
  1185. mov r13d,DWORD[56+rsp]
  1186. mov edi,DWORD[44+rsp]
  1187. mov r12d,r13d
  1188. ror r13d,11
  1189. add edx,r14d
  1190. mov r14d,edi
  1191. ror edi,2
  1192. xor r13d,r12d
  1193. shr r12d,3
  1194. ror r13d,7
  1195. xor edi,r14d
  1196. shr r14d,10
  1197. ror edi,17
  1198. xor r12d,r13d
  1199. xor edi,r14d
  1200. add r12d,DWORD[24+rsp]
  1201. add r12d,DWORD[52+rsp]
  1202. mov r13d,r11d
  1203. add r12d,edi
  1204. mov r14d,edx
  1205. ror r13d,14
  1206. mov edi,eax
  1207. xor r13d,r11d
  1208. ror r14d,9
  1209. xor edi,ebx
  1210. mov DWORD[52+rsp],r12d
  1211. xor r14d,edx
  1212. and edi,r11d
  1213. ror r13d,5
  1214. add r12d,ecx
  1215. xor edi,ebx
  1216. ror r14d,11
  1217. xor r13d,r11d
  1218. add r12d,edi
  1219. mov edi,edx
  1220. add r12d,DWORD[rbp]
  1221. xor r14d,edx
  1222. xor edi,r8d
  1223. ror r13d,6
  1224. mov ecx,r8d
  1225. and r15d,edi
  1226. ror r14d,2
  1227. add r12d,r13d
  1228. xor ecx,r15d
  1229. add r10d,r12d
  1230. add ecx,r12d
  1231. lea rbp,[4+rbp]
  1232. mov r13d,DWORD[60+rsp]
  1233. mov r15d,DWORD[48+rsp]
  1234. mov r12d,r13d
  1235. ror r13d,11
  1236. add ecx,r14d
  1237. mov r14d,r15d
  1238. ror r15d,2
  1239. xor r13d,r12d
  1240. shr r12d,3
  1241. ror r13d,7
  1242. xor r15d,r14d
  1243. shr r14d,10
  1244. ror r15d,17
  1245. xor r12d,r13d
  1246. xor r15d,r14d
  1247. add r12d,DWORD[28+rsp]
  1248. add r12d,DWORD[56+rsp]
  1249. mov r13d,r10d
  1250. add r12d,r15d
  1251. mov r14d,ecx
  1252. ror r13d,14
  1253. mov r15d,r11d
  1254. xor r13d,r10d
  1255. ror r14d,9
  1256. xor r15d,eax
  1257. mov DWORD[56+rsp],r12d
  1258. xor r14d,ecx
  1259. and r15d,r10d
  1260. ror r13d,5
  1261. add r12d,ebx
  1262. xor r15d,eax
  1263. ror r14d,11
  1264. xor r13d,r10d
  1265. add r12d,r15d
  1266. mov r15d,ecx
  1267. add r12d,DWORD[rbp]
  1268. xor r14d,ecx
  1269. xor r15d,edx
  1270. ror r13d,6
  1271. mov ebx,edx
  1272. and edi,r15d
  1273. ror r14d,2
  1274. add r12d,r13d
  1275. xor ebx,edi
  1276. add r9d,r12d
  1277. add ebx,r12d
  1278. lea rbp,[4+rbp]
  1279. mov r13d,DWORD[rsp]
  1280. mov edi,DWORD[52+rsp]
  1281. mov r12d,r13d
  1282. ror r13d,11
  1283. add ebx,r14d
  1284. mov r14d,edi
  1285. ror edi,2
  1286. xor r13d,r12d
  1287. shr r12d,3
  1288. ror r13d,7
  1289. xor edi,r14d
  1290. shr r14d,10
  1291. ror edi,17
  1292. xor r12d,r13d
  1293. xor edi,r14d
  1294. add r12d,DWORD[32+rsp]
  1295. add r12d,DWORD[60+rsp]
  1296. mov r13d,r9d
  1297. add r12d,edi
  1298. mov r14d,ebx
  1299. ror r13d,14
  1300. mov edi,r10d
  1301. xor r13d,r9d
  1302. ror r14d,9
  1303. xor edi,r11d
  1304. mov DWORD[60+rsp],r12d
  1305. xor r14d,ebx
  1306. and edi,r9d
  1307. ror r13d,5
  1308. add r12d,eax
  1309. xor edi,r11d
  1310. ror r14d,11
  1311. xor r13d,r9d
  1312. add r12d,edi
  1313. mov edi,ebx
  1314. add r12d,DWORD[rbp]
  1315. xor r14d,ebx
  1316. xor edi,ecx
  1317. ror r13d,6
  1318. mov eax,ecx
  1319. and r15d,edi
  1320. ror r14d,2
  1321. add r12d,r13d
  1322. xor eax,r15d
  1323. add r8d,r12d
  1324. add eax,r12d
  1325. lea rbp,[20+rbp]
  1326. cmp BYTE[3+rbp],0
  1327. jnz NEAR $L$rounds_16_xx
  1328. mov rdi,QWORD[((64+0))+rsp]
  1329. add eax,r14d
  1330. lea rsi,[64+rsi]
  1331. add eax,DWORD[rdi]
  1332. add ebx,DWORD[4+rdi]
  1333. add ecx,DWORD[8+rdi]
  1334. add edx,DWORD[12+rdi]
  1335. add r8d,DWORD[16+rdi]
  1336. add r9d,DWORD[20+rdi]
  1337. add r10d,DWORD[24+rdi]
  1338. add r11d,DWORD[28+rdi]
  1339. cmp rsi,QWORD[((64+16))+rsp]
  1340. mov DWORD[rdi],eax
  1341. mov DWORD[4+rdi],ebx
  1342. mov DWORD[8+rdi],ecx
  1343. mov DWORD[12+rdi],edx
  1344. mov DWORD[16+rdi],r8d
  1345. mov DWORD[20+rdi],r9d
  1346. mov DWORD[24+rdi],r10d
  1347. mov DWORD[28+rdi],r11d
  1348. jb NEAR $L$loop
  1349. mov rsi,QWORD[88+rsp]
  1350. mov r15,QWORD[((-48))+rsi]
  1351. mov r14,QWORD[((-40))+rsi]
  1352. mov r13,QWORD[((-32))+rsi]
  1353. mov r12,QWORD[((-24))+rsi]
  1354. mov rbp,QWORD[((-16))+rsi]
  1355. mov rbx,QWORD[((-8))+rsi]
  1356. lea rsp,[rsi]
  1357. $L$epilogue:
  1358. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1359. mov rsi,QWORD[16+rsp]
  1360. DB 0F3h,0C3h ;repret
  1361. $L$SEH_end_GFp_sha256_block_data_order:
  1362. ALIGN 64
  1363. K256:
  1364. DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  1365. DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  1366. DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  1367. DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  1368. DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  1369. DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  1370. DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  1371. DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  1372. DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  1373. DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  1374. DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  1375. DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  1376. DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  1377. DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  1378. DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  1379. DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  1380. DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  1381. DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  1382. DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  1383. DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  1384. DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  1385. DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  1386. DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  1387. DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  1388. DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  1389. DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  1390. DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  1391. DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  1392. DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  1393. DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  1394. DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  1395. DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  1396. DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  1397. DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  1398. DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
  1399. DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
  1400. DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
  1401. DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
  1402. DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
  1403. DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
  1404. DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  1405. DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  1406. DB 111,114,103,62,0
  1407. ALIGN 64
  1408. GFp_sha256_block_data_order_ssse3:
  1409. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1410. mov QWORD[16+rsp],rsi
  1411. mov rax,rsp
  1412. $L$SEH_begin_GFp_sha256_block_data_order_ssse3:
  1413. mov rdi,rcx
  1414. mov rsi,rdx
  1415. mov rdx,r8
  1416. $L$ssse3_shortcut:
  1417. mov rax,rsp
  1418. push rbx
  1419. push rbp
  1420. push r12
  1421. push r13
  1422. push r14
  1423. push r15
  1424. shl rdx,4
  1425. sub rsp,160
  1426. lea rdx,[rdx*4+rsi]
  1427. and rsp,-64
  1428. mov QWORD[((64+0))+rsp],rdi
  1429. mov QWORD[((64+8))+rsp],rsi
  1430. mov QWORD[((64+16))+rsp],rdx
  1431. mov QWORD[88+rsp],rax
  1432. movaps XMMWORD[(64+32)+rsp],xmm6
  1433. movaps XMMWORD[(64+48)+rsp],xmm7
  1434. movaps XMMWORD[(64+64)+rsp],xmm8
  1435. movaps XMMWORD[(64+80)+rsp],xmm9
  1436. $L$prologue_ssse3:
  1437. mov eax,DWORD[rdi]
  1438. mov ebx,DWORD[4+rdi]
  1439. mov ecx,DWORD[8+rdi]
  1440. mov edx,DWORD[12+rdi]
  1441. mov r8d,DWORD[16+rdi]
  1442. mov r9d,DWORD[20+rdi]
  1443. mov r10d,DWORD[24+rdi]
  1444. mov r11d,DWORD[28+rdi]
  1445. jmp NEAR $L$loop_ssse3
  1446. ALIGN 16
  1447. $L$loop_ssse3:
  1448. movdqa xmm7,XMMWORD[((K256+512))]
  1449. movdqu xmm0,XMMWORD[rsi]
  1450. movdqu xmm1,XMMWORD[16+rsi]
  1451. movdqu xmm2,XMMWORD[32+rsi]
  1452. DB 102,15,56,0,199
  1453. movdqu xmm3,XMMWORD[48+rsi]
  1454. lea rbp,[K256]
  1455. DB 102,15,56,0,207
  1456. movdqa xmm4,XMMWORD[rbp]
  1457. movdqa xmm5,XMMWORD[32+rbp]
  1458. DB 102,15,56,0,215
  1459. paddd xmm4,xmm0
  1460. movdqa xmm6,XMMWORD[64+rbp]
  1461. DB 102,15,56,0,223
  1462. movdqa xmm7,XMMWORD[96+rbp]
  1463. paddd xmm5,xmm1
  1464. paddd xmm6,xmm2
  1465. paddd xmm7,xmm3
  1466. movdqa XMMWORD[rsp],xmm4
  1467. mov r14d,eax
  1468. movdqa XMMWORD[16+rsp],xmm5
  1469. mov edi,ebx
  1470. movdqa XMMWORD[32+rsp],xmm6
  1471. xor edi,ecx
  1472. movdqa XMMWORD[48+rsp],xmm7
  1473. mov r13d,r8d
  1474. jmp NEAR $L$ssse3_00_47
  1475. ALIGN 16
  1476. $L$ssse3_00_47:
  1477. sub rbp,-128
  1478. ror r13d,14
  1479. movdqa xmm4,xmm1
  1480. mov eax,r14d
  1481. mov r12d,r9d
  1482. movdqa xmm7,xmm3
  1483. ror r14d,9
  1484. xor r13d,r8d
  1485. xor r12d,r10d
  1486. ror r13d,5
  1487. xor r14d,eax
  1488. DB 102,15,58,15,224,4
  1489. and r12d,r8d
  1490. xor r13d,r8d
  1491. DB 102,15,58,15,250,4
  1492. add r11d,DWORD[rsp]
  1493. mov r15d,eax
  1494. xor r12d,r10d
  1495. ror r14d,11
  1496. movdqa xmm5,xmm4
  1497. xor r15d,ebx
  1498. add r11d,r12d
  1499. movdqa xmm6,xmm4
  1500. ror r13d,6
  1501. and edi,r15d
  1502. psrld xmm4,3
  1503. xor r14d,eax
  1504. add r11d,r13d
  1505. xor edi,ebx
  1506. paddd xmm0,xmm7
  1507. ror r14d,2
  1508. add edx,r11d
  1509. psrld xmm6,7
  1510. add r11d,edi
  1511. mov r13d,edx
  1512. pshufd xmm7,xmm3,250
  1513. add r14d,r11d
  1514. ror r13d,14
  1515. pslld xmm5,14
  1516. mov r11d,r14d
  1517. mov r12d,r8d
  1518. pxor xmm4,xmm6
  1519. ror r14d,9
  1520. xor r13d,edx
  1521. xor r12d,r9d
  1522. ror r13d,5
  1523. psrld xmm6,11
  1524. xor r14d,r11d
  1525. pxor xmm4,xmm5
  1526. and r12d,edx
  1527. xor r13d,edx
  1528. pslld xmm5,11
  1529. add r10d,DWORD[4+rsp]
  1530. mov edi,r11d
  1531. pxor xmm4,xmm6
  1532. xor r12d,r9d
  1533. ror r14d,11
  1534. movdqa xmm6,xmm7
  1535. xor edi,eax
  1536. add r10d,r12d
  1537. pxor xmm4,xmm5
  1538. ror r13d,6
  1539. and r15d,edi
  1540. xor r14d,r11d
  1541. psrld xmm7,10
  1542. add r10d,r13d
  1543. xor r15d,eax
  1544. paddd xmm0,xmm4
  1545. ror r14d,2
  1546. add ecx,r10d
  1547. psrlq xmm6,17
  1548. add r10d,r15d
  1549. mov r13d,ecx
  1550. add r14d,r10d
  1551. pxor xmm7,xmm6
  1552. ror r13d,14
  1553. mov r10d,r14d
  1554. mov r12d,edx
  1555. ror r14d,9
  1556. psrlq xmm6,2
  1557. xor r13d,ecx
  1558. xor r12d,r8d
  1559. pxor xmm7,xmm6
  1560. ror r13d,5
  1561. xor r14d,r10d
  1562. and r12d,ecx
  1563. pshufd xmm7,xmm7,128
  1564. xor r13d,ecx
  1565. add r9d,DWORD[8+rsp]
  1566. mov r15d,r10d
  1567. psrldq xmm7,8
  1568. xor r12d,r8d
  1569. ror r14d,11
  1570. xor r15d,r11d
  1571. add r9d,r12d
  1572. ror r13d,6
  1573. paddd xmm0,xmm7
  1574. and edi,r15d
  1575. xor r14d,r10d
  1576. add r9d,r13d
  1577. pshufd xmm7,xmm0,80
  1578. xor edi,r11d
  1579. ror r14d,2
  1580. add ebx,r9d
  1581. movdqa xmm6,xmm7
  1582. add r9d,edi
  1583. mov r13d,ebx
  1584. psrld xmm7,10
  1585. add r14d,r9d
  1586. ror r13d,14
  1587. psrlq xmm6,17
  1588. mov r9d,r14d
  1589. mov r12d,ecx
  1590. pxor xmm7,xmm6
  1591. ror r14d,9
  1592. xor r13d,ebx
  1593. xor r12d,edx
  1594. ror r13d,5
  1595. xor r14d,r9d
  1596. psrlq xmm6,2
  1597. and r12d,ebx
  1598. xor r13d,ebx
  1599. add r8d,DWORD[12+rsp]
  1600. pxor xmm7,xmm6
  1601. mov edi,r9d
  1602. xor r12d,edx
  1603. ror r14d,11
  1604. pshufd xmm7,xmm7,8
  1605. xor edi,r10d
  1606. add r8d,r12d
  1607. movdqa xmm6,XMMWORD[rbp]
  1608. ror r13d,6
  1609. and r15d,edi
  1610. pslldq xmm7,8
  1611. xor r14d,r9d
  1612. add r8d,r13d
  1613. xor r15d,r10d
  1614. paddd xmm0,xmm7
  1615. ror r14d,2
  1616. add eax,r8d
  1617. add r8d,r15d
  1618. paddd xmm6,xmm0
  1619. mov r13d,eax
  1620. add r14d,r8d
  1621. movdqa XMMWORD[rsp],xmm6
  1622. ror r13d,14
  1623. movdqa xmm4,xmm2
  1624. mov r8d,r14d
  1625. mov r12d,ebx
  1626. movdqa xmm7,xmm0
  1627. ror r14d,9
  1628. xor r13d,eax
  1629. xor r12d,ecx
  1630. ror r13d,5
  1631. xor r14d,r8d
  1632. DB 102,15,58,15,225,4
  1633. and r12d,eax
  1634. xor r13d,eax
  1635. DB 102,15,58,15,251,4
  1636. add edx,DWORD[16+rsp]
  1637. mov r15d,r8d
  1638. xor r12d,ecx
  1639. ror r14d,11
  1640. movdqa xmm5,xmm4
  1641. xor r15d,r9d
  1642. add edx,r12d
  1643. movdqa xmm6,xmm4
  1644. ror r13d,6
  1645. and edi,r15d
  1646. psrld xmm4,3
  1647. xor r14d,r8d
  1648. add edx,r13d
  1649. xor edi,r9d
  1650. paddd xmm1,xmm7
  1651. ror r14d,2
  1652. add r11d,edx
  1653. psrld xmm6,7
  1654. add edx,edi
  1655. mov r13d,r11d
  1656. pshufd xmm7,xmm0,250
  1657. add r14d,edx
  1658. ror r13d,14
  1659. pslld xmm5,14
  1660. mov edx,r14d
  1661. mov r12d,eax
  1662. pxor xmm4,xmm6
  1663. ror r14d,9
  1664. xor r13d,r11d
  1665. xor r12d,ebx
  1666. ror r13d,5
  1667. psrld xmm6,11
  1668. xor r14d,edx
  1669. pxor xmm4,xmm5
  1670. and r12d,r11d
  1671. xor r13d,r11d
  1672. pslld xmm5,11
  1673. add ecx,DWORD[20+rsp]
  1674. mov edi,edx
  1675. pxor xmm4,xmm6
  1676. xor r12d,ebx
  1677. ror r14d,11
  1678. movdqa xmm6,xmm7
  1679. xor edi,r8d
  1680. add ecx,r12d
  1681. pxor xmm4,xmm5
  1682. ror r13d,6
  1683. and r15d,edi
  1684. xor r14d,edx
  1685. psrld xmm7,10
  1686. add ecx,r13d
  1687. xor r15d,r8d
  1688. paddd xmm1,xmm4
  1689. ror r14d,2
  1690. add r10d,ecx
  1691. psrlq xmm6,17
  1692. add ecx,r15d
  1693. mov r13d,r10d
  1694. add r14d,ecx
  1695. pxor xmm7,xmm6
  1696. ror r13d,14
  1697. mov ecx,r14d
  1698. mov r12d,r11d
  1699. ror r14d,9
  1700. psrlq xmm6,2
  1701. xor r13d,r10d
  1702. xor r12d,eax
  1703. pxor xmm7,xmm6
  1704. ror r13d,5
  1705. xor r14d,ecx
  1706. and r12d,r10d
  1707. pshufd xmm7,xmm7,128
  1708. xor r13d,r10d
  1709. add ebx,DWORD[24+rsp]
  1710. mov r15d,ecx
  1711. psrldq xmm7,8
  1712. xor r12d,eax
  1713. ror r14d,11
  1714. xor r15d,edx
  1715. add ebx,r12d
  1716. ror r13d,6
  1717. paddd xmm1,xmm7
  1718. and edi,r15d
  1719. xor r14d,ecx
  1720. add ebx,r13d
  1721. pshufd xmm7,xmm1,80
  1722. xor edi,edx
  1723. ror r14d,2
  1724. add r9d,ebx
  1725. movdqa xmm6,xmm7
  1726. add ebx,edi
  1727. mov r13d,r9d
  1728. psrld xmm7,10
  1729. add r14d,ebx
  1730. ror r13d,14
  1731. psrlq xmm6,17
  1732. mov ebx,r14d
  1733. mov r12d,r10d
  1734. pxor xmm7,xmm6
  1735. ror r14d,9
  1736. xor r13d,r9d
  1737. xor r12d,r11d
  1738. ror r13d,5
  1739. xor r14d,ebx
  1740. psrlq xmm6,2
  1741. and r12d,r9d
  1742. xor r13d,r9d
  1743. add eax,DWORD[28+rsp]
  1744. pxor xmm7,xmm6
  1745. mov edi,ebx
  1746. xor r12d,r11d
  1747. ror r14d,11
  1748. pshufd xmm7,xmm7,8
  1749. xor edi,ecx
  1750. add eax,r12d
  1751. movdqa xmm6,XMMWORD[32+rbp]
  1752. ror r13d,6
  1753. and r15d,edi
  1754. pslldq xmm7,8
  1755. xor r14d,ebx
  1756. add eax,r13d
  1757. xor r15d,ecx
  1758. paddd xmm1,xmm7
  1759. ror r14d,2
  1760. add r8d,eax
  1761. add eax,r15d
  1762. paddd xmm6,xmm1
  1763. mov r13d,r8d
  1764. add r14d,eax
  1765. movdqa XMMWORD[16+rsp],xmm6
  1766. ror r13d,14
  1767. movdqa xmm4,xmm3
  1768. mov eax,r14d
  1769. mov r12d,r9d
  1770. movdqa xmm7,xmm1
  1771. ror r14d,9
  1772. xor r13d,r8d
  1773. xor r12d,r10d
  1774. ror r13d,5
  1775. xor r14d,eax
  1776. DB 102,15,58,15,226,4
  1777. and r12d,r8d
  1778. xor r13d,r8d
  1779. DB 102,15,58,15,248,4
  1780. add r11d,DWORD[32+rsp]
  1781. mov r15d,eax
  1782. xor r12d,r10d
  1783. ror r14d,11
  1784. movdqa xmm5,xmm4
  1785. xor r15d,ebx
  1786. add r11d,r12d
  1787. movdqa xmm6,xmm4
  1788. ror r13d,6
  1789. and edi,r15d
  1790. psrld xmm4,3
  1791. xor r14d,eax
  1792. add r11d,r13d
  1793. xor edi,ebx
  1794. paddd xmm2,xmm7
  1795. ror r14d,2
  1796. add edx,r11d
  1797. psrld xmm6,7
  1798. add r11d,edi
  1799. mov r13d,edx
  1800. pshufd xmm7,xmm1,250
  1801. add r14d,r11d
  1802. ror r13d,14
  1803. pslld xmm5,14
  1804. mov r11d,r14d
  1805. mov r12d,r8d
  1806. pxor xmm4,xmm6
  1807. ror r14d,9
  1808. xor r13d,edx
  1809. xor r12d,r9d
  1810. ror r13d,5
  1811. psrld xmm6,11
  1812. xor r14d,r11d
  1813. pxor xmm4,xmm5
  1814. and r12d,edx
  1815. xor r13d,edx
  1816. pslld xmm5,11
  1817. add r10d,DWORD[36+rsp]
  1818. mov edi,r11d
  1819. pxor xmm4,xmm6
  1820. xor r12d,r9d
  1821. ror r14d,11
  1822. movdqa xmm6,xmm7
  1823. xor edi,eax
  1824. add r10d,r12d
  1825. pxor xmm4,xmm5
  1826. ror r13d,6
  1827. and r15d,edi
  1828. xor r14d,r11d
  1829. psrld xmm7,10
  1830. add r10d,r13d
  1831. xor r15d,eax
  1832. paddd xmm2,xmm4
  1833. ror r14d,2
  1834. add ecx,r10d
  1835. psrlq xmm6,17
  1836. add r10d,r15d
  1837. mov r13d,ecx
  1838. add r14d,r10d
  1839. pxor xmm7,xmm6
  1840. ror r13d,14
  1841. mov r10d,r14d
  1842. mov r12d,edx
  1843. ror r14d,9
  1844. psrlq xmm6,2
  1845. xor r13d,ecx
  1846. xor r12d,r8d
  1847. pxor xmm7,xmm6
  1848. ror r13d,5
  1849. xor r14d,r10d
  1850. and r12d,ecx
  1851. pshufd xmm7,xmm7,128
  1852. xor r13d,ecx
  1853. add r9d,DWORD[40+rsp]
  1854. mov r15d,r10d
  1855. psrldq xmm7,8
  1856. xor r12d,r8d
  1857. ror r14d,11
  1858. xor r15d,r11d
  1859. add r9d,r12d
  1860. ror r13d,6
  1861. paddd xmm2,xmm7
  1862. and edi,r15d
  1863. xor r14d,r10d
  1864. add r9d,r13d
  1865. pshufd xmm7,xmm2,80
  1866. xor edi,r11d
  1867. ror r14d,2
  1868. add ebx,r9d
  1869. movdqa xmm6,xmm7
  1870. add r9d,edi
  1871. mov r13d,ebx
  1872. psrld xmm7,10
  1873. add r14d,r9d
  1874. ror r13d,14
  1875. psrlq xmm6,17
  1876. mov r9d,r14d
  1877. mov r12d,ecx
  1878. pxor xmm7,xmm6
  1879. ror r14d,9
  1880. xor r13d,ebx
  1881. xor r12d,edx
  1882. ror r13d,5
  1883. xor r14d,r9d
  1884. psrlq xmm6,2
  1885. and r12d,ebx
  1886. xor r13d,ebx
  1887. add r8d,DWORD[44+rsp]
  1888. pxor xmm7,xmm6
  1889. mov edi,r9d
  1890. xor r12d,edx
  1891. ror r14d,11
  1892. pshufd xmm7,xmm7,8
  1893. xor edi,r10d
  1894. add r8d,r12d
  1895. movdqa xmm6,XMMWORD[64+rbp]
  1896. ror r13d,6
  1897. and r15d,edi
  1898. pslldq xmm7,8
  1899. xor r14d,r9d
  1900. add r8d,r13d
  1901. xor r15d,r10d
  1902. paddd xmm2,xmm7
  1903. ror r14d,2
  1904. add eax,r8d
  1905. add r8d,r15d
  1906. paddd xmm6,xmm2
  1907. mov r13d,eax
  1908. add r14d,r8d
  1909. movdqa XMMWORD[32+rsp],xmm6
  1910. ror r13d,14
  1911. movdqa xmm4,xmm0
  1912. mov r8d,r14d
  1913. mov r12d,ebx
  1914. movdqa xmm7,xmm2
  1915. ror r14d,9
  1916. xor r13d,eax
  1917. xor r12d,ecx
  1918. ror r13d,5
  1919. xor r14d,r8d
  1920. DB 102,15,58,15,227,4
  1921. and r12d,eax
  1922. xor r13d,eax
  1923. DB 102,15,58,15,249,4
  1924. add edx,DWORD[48+rsp]
  1925. mov r15d,r8d
  1926. xor r12d,ecx
  1927. ror r14d,11
  1928. movdqa xmm5,xmm4
  1929. xor r15d,r9d
  1930. add edx,r12d
  1931. movdqa xmm6,xmm4
  1932. ror r13d,6
  1933. and edi,r15d
  1934. psrld xmm4,3
  1935. xor r14d,r8d
  1936. add edx,r13d
  1937. xor edi,r9d
  1938. paddd xmm3,xmm7
  1939. ror r14d,2
  1940. add r11d,edx
  1941. psrld xmm6,7
  1942. add edx,edi
  1943. mov r13d,r11d
  1944. pshufd xmm7,xmm2,250
  1945. add r14d,edx
  1946. ror r13d,14
  1947. pslld xmm5,14
  1948. mov edx,r14d
  1949. mov r12d,eax
  1950. pxor xmm4,xmm6
  1951. ror r14d,9
  1952. xor r13d,r11d
  1953. xor r12d,ebx
  1954. ror r13d,5
  1955. psrld xmm6,11
  1956. xor r14d,edx
  1957. pxor xmm4,xmm5
  1958. and r12d,r11d
  1959. xor r13d,r11d
  1960. pslld xmm5,11
  1961. add ecx,DWORD[52+rsp]
  1962. mov edi,edx
  1963. pxor xmm4,xmm6
  1964. xor r12d,ebx
  1965. ror r14d,11
  1966. movdqa xmm6,xmm7
  1967. xor edi,r8d
  1968. add ecx,r12d
  1969. pxor xmm4,xmm5
  1970. ror r13d,6
  1971. and r15d,edi
  1972. xor r14d,edx
  1973. psrld xmm7,10
  1974. add ecx,r13d
  1975. xor r15d,r8d
  1976. paddd xmm3,xmm4
  1977. ror r14d,2
  1978. add r10d,ecx
  1979. psrlq xmm6,17
  1980. add ecx,r15d
  1981. mov r13d,r10d
  1982. add r14d,ecx
  1983. pxor xmm7,xmm6
  1984. ror r13d,14
  1985. mov ecx,r14d
  1986. mov r12d,r11d
  1987. ror r14d,9
  1988. psrlq xmm6,2
  1989. xor r13d,r10d
  1990. xor r12d,eax
  1991. pxor xmm7,xmm6
  1992. ror r13d,5
  1993. xor r14d,ecx
  1994. and r12d,r10d
  1995. pshufd xmm7,xmm7,128
  1996. xor r13d,r10d
  1997. add ebx,DWORD[56+rsp]
  1998. mov r15d,ecx
  1999. psrldq xmm7,8
  2000. xor r12d,eax
  2001. ror r14d,11
  2002. xor r15d,edx
  2003. add ebx,r12d
  2004. ror r13d,6
  2005. paddd xmm3,xmm7
  2006. and edi,r15d
  2007. xor r14d,ecx
  2008. add ebx,r13d
  2009. pshufd xmm7,xmm3,80
  2010. xor edi,edx
  2011. ror r14d,2
  2012. add r9d,ebx
  2013. movdqa xmm6,xmm7
  2014. add ebx,edi
  2015. mov r13d,r9d
  2016. psrld xmm7,10
  2017. add r14d,ebx
  2018. ror r13d,14
  2019. psrlq xmm6,17
  2020. mov ebx,r14d
  2021. mov r12d,r10d
  2022. pxor xmm7,xmm6
  2023. ror r14d,9
  2024. xor r13d,r9d
  2025. xor r12d,r11d
  2026. ror r13d,5
  2027. xor r14d,ebx
  2028. psrlq xmm6,2
  2029. and r12d,r9d
  2030. xor r13d,r9d
  2031. add eax,DWORD[60+rsp]
  2032. pxor xmm7,xmm6
  2033. mov edi,ebx
  2034. xor r12d,r11d
  2035. ror r14d,11
  2036. pshufd xmm7,xmm7,8
  2037. xor edi,ecx
  2038. add eax,r12d
  2039. movdqa xmm6,XMMWORD[96+rbp]
  2040. ror r13d,6
  2041. and r15d,edi
  2042. pslldq xmm7,8
  2043. xor r14d,ebx
  2044. add eax,r13d
  2045. xor r15d,ecx
  2046. paddd xmm3,xmm7
  2047. ror r14d,2
  2048. add r8d,eax
  2049. add eax,r15d
  2050. paddd xmm6,xmm3
  2051. mov r13d,r8d
  2052. add r14d,eax
  2053. movdqa XMMWORD[48+rsp],xmm6
  2054. cmp BYTE[131+rbp],0
  2055. jne NEAR $L$ssse3_00_47
  2056. ror r13d,14
  2057. mov eax,r14d
  2058. mov r12d,r9d
  2059. ror r14d,9
  2060. xor r13d,r8d
  2061. xor r12d,r10d
  2062. ror r13d,5
  2063. xor r14d,eax
  2064. and r12d,r8d
  2065. xor r13d,r8d
  2066. add r11d,DWORD[rsp]
  2067. mov r15d,eax
  2068. xor r12d,r10d
  2069. ror r14d,11
  2070. xor r15d,ebx
  2071. add r11d,r12d
  2072. ror r13d,6
  2073. and edi,r15d
  2074. xor r14d,eax
  2075. add r11d,r13d
  2076. xor edi,ebx
  2077. ror r14d,2
  2078. add edx,r11d
  2079. add r11d,edi
  2080. mov r13d,edx
  2081. add r14d,r11d
  2082. ror r13d,14
  2083. mov r11d,r14d
  2084. mov r12d,r8d
  2085. ror r14d,9
  2086. xor r13d,edx
  2087. xor r12d,r9d
  2088. ror r13d,5
  2089. xor r14d,r11d
  2090. and r12d,edx
  2091. xor r13d,edx
  2092. add r10d,DWORD[4+rsp]
  2093. mov edi,r11d
  2094. xor r12d,r9d
  2095. ror r14d,11
  2096. xor edi,eax
  2097. add r10d,r12d
  2098. ror r13d,6
  2099. and r15d,edi
  2100. xor r14d,r11d
  2101. add r10d,r13d
  2102. xor r15d,eax
  2103. ror r14d,2
  2104. add ecx,r10d
  2105. add r10d,r15d
  2106. mov r13d,ecx
  2107. add r14d,r10d
  2108. ror r13d,14
  2109. mov r10d,r14d
  2110. mov r12d,edx
  2111. ror r14d,9
  2112. xor r13d,ecx
  2113. xor r12d,r8d
  2114. ror r13d,5
  2115. xor r14d,r10d
  2116. and r12d,ecx
  2117. xor r13d,ecx
  2118. add r9d,DWORD[8+rsp]
  2119. mov r15d,r10d
  2120. xor r12d,r8d
  2121. ror r14d,11
  2122. xor r15d,r11d
  2123. add r9d,r12d
  2124. ror r13d,6
  2125. and edi,r15d
  2126. xor r14d,r10d
  2127. add r9d,r13d
  2128. xor edi,r11d
  2129. ror r14d,2
  2130. add ebx,r9d
  2131. add r9d,edi
  2132. mov r13d,ebx
  2133. add r14d,r9d
  2134. ror r13d,14
  2135. mov r9d,r14d
  2136. mov r12d,ecx
  2137. ror r14d,9
  2138. xor r13d,ebx
  2139. xor r12d,edx
  2140. ror r13d,5
  2141. xor r14d,r9d
  2142. and r12d,ebx
  2143. xor r13d,ebx
  2144. add r8d,DWORD[12+rsp]
  2145. mov edi,r9d
  2146. xor r12d,edx
  2147. ror r14d,11
  2148. xor edi,r10d
  2149. add r8d,r12d
  2150. ror r13d,6
  2151. and r15d,edi
  2152. xor r14d,r9d
  2153. add r8d,r13d
  2154. xor r15d,r10d
  2155. ror r14d,2
  2156. add eax,r8d
  2157. add r8d,r15d
  2158. mov r13d,eax
  2159. add r14d,r8d
  2160. ror r13d,14
  2161. mov r8d,r14d
  2162. mov r12d,ebx
  2163. ror r14d,9
  2164. xor r13d,eax
  2165. xor r12d,ecx
  2166. ror r13d,5
  2167. xor r14d,r8d
  2168. and r12d,eax
  2169. xor r13d,eax
  2170. add edx,DWORD[16+rsp]
  2171. mov r15d,r8d
  2172. xor r12d,ecx
  2173. ror r14d,11
  2174. xor r15d,r9d
  2175. add edx,r12d
  2176. ror r13d,6
  2177. and edi,r15d
  2178. xor r14d,r8d
  2179. add edx,r13d
  2180. xor edi,r9d
  2181. ror r14d,2
  2182. add r11d,edx
  2183. add edx,edi
  2184. mov r13d,r11d
  2185. add r14d,edx
  2186. ror r13d,14
  2187. mov edx,r14d
  2188. mov r12d,eax
  2189. ror r14d,9
  2190. xor r13d,r11d
  2191. xor r12d,ebx
  2192. ror r13d,5
  2193. xor r14d,edx
  2194. and r12d,r11d
  2195. xor r13d,r11d
  2196. add ecx,DWORD[20+rsp]
  2197. mov edi,edx
  2198. xor r12d,ebx
  2199. ror r14d,11
  2200. xor edi,r8d
  2201. add ecx,r12d
  2202. ror r13d,6
  2203. and r15d,edi
  2204. xor r14d,edx
  2205. add ecx,r13d
  2206. xor r15d,r8d
  2207. ror r14d,2
  2208. add r10d,ecx
  2209. add ecx,r15d
  2210. mov r13d,r10d
  2211. add r14d,ecx
  2212. ror r13d,14
  2213. mov ecx,r14d
  2214. mov r12d,r11d
  2215. ror r14d,9
  2216. xor r13d,r10d
  2217. xor r12d,eax
  2218. ror r13d,5
  2219. xor r14d,ecx
  2220. and r12d,r10d
  2221. xor r13d,r10d
  2222. add ebx,DWORD[24+rsp]
  2223. mov r15d,ecx
  2224. xor r12d,eax
  2225. ror r14d,11
  2226. xor r15d,edx
  2227. add ebx,r12d
  2228. ror r13d,6
  2229. and edi,r15d
  2230. xor r14d,ecx
  2231. add ebx,r13d
  2232. xor edi,edx
  2233. ror r14d,2
  2234. add r9d,ebx
  2235. add ebx,edi
  2236. mov r13d,r9d
  2237. add r14d,ebx
  2238. ror r13d,14
  2239. mov ebx,r14d
  2240. mov r12d,r10d
  2241. ror r14d,9
  2242. xor r13d,r9d
  2243. xor r12d,r11d
  2244. ror r13d,5
  2245. xor r14d,ebx
  2246. and r12d,r9d
  2247. xor r13d,r9d
  2248. add eax,DWORD[28+rsp]
  2249. mov edi,ebx
  2250. xor r12d,r11d
  2251. ror r14d,11
  2252. xor edi,ecx
  2253. add eax,r12d
  2254. ror r13d,6
  2255. and r15d,edi
  2256. xor r14d,ebx
  2257. add eax,r13d
  2258. xor r15d,ecx
  2259. ror r14d,2
  2260. add r8d,eax
  2261. add eax,r15d
  2262. mov r13d,r8d
  2263. add r14d,eax
  2264. ror r13d,14
  2265. mov eax,r14d
  2266. mov r12d,r9d
  2267. ror r14d,9
  2268. xor r13d,r8d
  2269. xor r12d,r10d
  2270. ror r13d,5
  2271. xor r14d,eax
  2272. and r12d,r8d
  2273. xor r13d,r8d
  2274. add r11d,DWORD[32+rsp]
  2275. mov r15d,eax
  2276. xor r12d,r10d
  2277. ror r14d,11
  2278. xor r15d,ebx
  2279. add r11d,r12d
  2280. ror r13d,6
  2281. and edi,r15d
  2282. xor r14d,eax
  2283. add r11d,r13d
  2284. xor edi,ebx
  2285. ror r14d,2
  2286. add edx,r11d
  2287. add r11d,edi
  2288. mov r13d,edx
  2289. add r14d,r11d
  2290. ror r13d,14
  2291. mov r11d,r14d
  2292. mov r12d,r8d
  2293. ror r14d,9
  2294. xor r13d,edx
  2295. xor r12d,r9d
  2296. ror r13d,5
  2297. xor r14d,r11d
  2298. and r12d,edx
  2299. xor r13d,edx
  2300. add r10d,DWORD[36+rsp]
  2301. mov edi,r11d
  2302. xor r12d,r9d
  2303. ror r14d,11
  2304. xor edi,eax
  2305. add r10d,r12d
  2306. ror r13d,6
  2307. and r15d,edi
  2308. xor r14d,r11d
  2309. add r10d,r13d
  2310. xor r15d,eax
  2311. ror r14d,2
  2312. add ecx,r10d
  2313. add r10d,r15d
  2314. mov r13d,ecx
  2315. add r14d,r10d
  2316. ror r13d,14
  2317. mov r10d,r14d
  2318. mov r12d,edx
  2319. ror r14d,9
  2320. xor r13d,ecx
  2321. xor r12d,r8d
  2322. ror r13d,5
  2323. xor r14d,r10d
  2324. and r12d,ecx
  2325. xor r13d,ecx
  2326. add r9d,DWORD[40+rsp]
  2327. mov r15d,r10d
  2328. xor r12d,r8d
  2329. ror r14d,11
  2330. xor r15d,r11d
  2331. add r9d,r12d
  2332. ror r13d,6
  2333. and edi,r15d
  2334. xor r14d,r10d
  2335. add r9d,r13d
  2336. xor edi,r11d
  2337. ror r14d,2
  2338. add ebx,r9d
  2339. add r9d,edi
  2340. mov r13d,ebx
  2341. add r14d,r9d
  2342. ror r13d,14
  2343. mov r9d,r14d
  2344. mov r12d,ecx
  2345. ror r14d,9
  2346. xor r13d,ebx
  2347. xor r12d,edx
  2348. ror r13d,5
  2349. xor r14d,r9d
  2350. and r12d,ebx
  2351. xor r13d,ebx
  2352. add r8d,DWORD[44+rsp]
  2353. mov edi,r9d
  2354. xor r12d,edx
  2355. ror r14d,11
  2356. xor edi,r10d
  2357. add r8d,r12d
  2358. ror r13d,6
  2359. and r15d,edi
  2360. xor r14d,r9d
  2361. add r8d,r13d
  2362. xor r15d,r10d
  2363. ror r14d,2
  2364. add eax,r8d
  2365. add r8d,r15d
  2366. mov r13d,eax
  2367. add r14d,r8d
  2368. ror r13d,14
  2369. mov r8d,r14d
  2370. mov r12d,ebx
  2371. ror r14d,9
  2372. xor r13d,eax
  2373. xor r12d,ecx
  2374. ror r13d,5
  2375. xor r14d,r8d
  2376. and r12d,eax
  2377. xor r13d,eax
  2378. add edx,DWORD[48+rsp]
  2379. mov r15d,r8d
  2380. xor r12d,ecx
  2381. ror r14d,11
  2382. xor r15d,r9d
  2383. add edx,r12d
  2384. ror r13d,6
  2385. and edi,r15d
  2386. xor r14d,r8d
  2387. add edx,r13d
  2388. xor edi,r9d
  2389. ror r14d,2
  2390. add r11d,edx
  2391. add edx,edi
  2392. mov r13d,r11d
  2393. add r14d,edx
  2394. ror r13d,14
  2395. mov edx,r14d
  2396. mov r12d,eax
  2397. ror r14d,9
  2398. xor r13d,r11d
  2399. xor r12d,ebx
  2400. ror r13d,5
  2401. xor r14d,edx
  2402. and r12d,r11d
  2403. xor r13d,r11d
  2404. add ecx,DWORD[52+rsp]
  2405. mov edi,edx
  2406. xor r12d,ebx
  2407. ror r14d,11
  2408. xor edi,r8d
  2409. add ecx,r12d
  2410. ror r13d,6
  2411. and r15d,edi
  2412. xor r14d,edx
  2413. add ecx,r13d
  2414. xor r15d,r8d
  2415. ror r14d,2
  2416. add r10d,ecx
  2417. add ecx,r15d
  2418. mov r13d,r10d
  2419. add r14d,ecx
  2420. ror r13d,14
  2421. mov ecx,r14d
  2422. mov r12d,r11d
  2423. ror r14d,9
  2424. xor r13d,r10d
  2425. xor r12d,eax
  2426. ror r13d,5
  2427. xor r14d,ecx
  2428. and r12d,r10d
  2429. xor r13d,r10d
  2430. add ebx,DWORD[56+rsp]
  2431. mov r15d,ecx
  2432. xor r12d,eax
  2433. ror r14d,11
  2434. xor r15d,edx
  2435. add ebx,r12d
  2436. ror r13d,6
  2437. and edi,r15d
  2438. xor r14d,ecx
  2439. add ebx,r13d
  2440. xor edi,edx
  2441. ror r14d,2
  2442. add r9d,ebx
  2443. add ebx,edi
  2444. mov r13d,r9d
  2445. add r14d,ebx
  2446. ror r13d,14
  2447. mov ebx,r14d
  2448. mov r12d,r10d
  2449. ror r14d,9
  2450. xor r13d,r9d
  2451. xor r12d,r11d
  2452. ror r13d,5
  2453. xor r14d,ebx
  2454. and r12d,r9d
  2455. xor r13d,r9d
  2456. add eax,DWORD[60+rsp]
  2457. mov edi,ebx
  2458. xor r12d,r11d
  2459. ror r14d,11
  2460. xor edi,ecx
  2461. add eax,r12d
  2462. ror r13d,6
  2463. and r15d,edi
  2464. xor r14d,ebx
  2465. add eax,r13d
  2466. xor r15d,ecx
  2467. ror r14d,2
  2468. add r8d,eax
  2469. add eax,r15d
  2470. mov r13d,r8d
  2471. add r14d,eax
  2472. mov rdi,QWORD[((64+0))+rsp]
  2473. mov eax,r14d
  2474. add eax,DWORD[rdi]
  2475. lea rsi,[64+rsi]
  2476. add ebx,DWORD[4+rdi]
  2477. add ecx,DWORD[8+rdi]
  2478. add edx,DWORD[12+rdi]
  2479. add r8d,DWORD[16+rdi]
  2480. add r9d,DWORD[20+rdi]
  2481. add r10d,DWORD[24+rdi]
  2482. add r11d,DWORD[28+rdi]
  2483. cmp rsi,QWORD[((64+16))+rsp]
  2484. mov DWORD[rdi],eax
  2485. mov DWORD[4+rdi],ebx
  2486. mov DWORD[8+rdi],ecx
  2487. mov DWORD[12+rdi],edx
  2488. mov DWORD[16+rdi],r8d
  2489. mov DWORD[20+rdi],r9d
  2490. mov DWORD[24+rdi],r10d
  2491. mov DWORD[28+rdi],r11d
  2492. jb NEAR $L$loop_ssse3
  2493. mov rsi,QWORD[88+rsp]
  2494. movaps xmm6,XMMWORD[((64+32))+rsp]
  2495. movaps xmm7,XMMWORD[((64+48))+rsp]
  2496. movaps xmm8,XMMWORD[((64+64))+rsp]
  2497. movaps xmm9,XMMWORD[((64+80))+rsp]
  2498. mov r15,QWORD[((-48))+rsi]
  2499. mov r14,QWORD[((-40))+rsi]
  2500. mov r13,QWORD[((-32))+rsi]
  2501. mov r12,QWORD[((-24))+rsi]
  2502. mov rbp,QWORD[((-16))+rsi]
  2503. mov rbx,QWORD[((-8))+rsi]
  2504. lea rsp,[rsi]
  2505. $L$epilogue_ssse3:
  2506. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2507. mov rsi,QWORD[16+rsp]
  2508. DB 0F3h,0C3h ;repret
  2509. $L$SEH_end_GFp_sha256_block_data_order_ssse3:
  2510. ALIGN 64
  2511. GFp_sha256_block_data_order_avx:
  2512. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2513. mov QWORD[16+rsp],rsi
  2514. mov rax,rsp
  2515. $L$SEH_begin_GFp_sha256_block_data_order_avx:
  2516. mov rdi,rcx
  2517. mov rsi,rdx
  2518. mov rdx,r8
  2519. $L$avx_shortcut:
  2520. mov rax,rsp
  2521. push rbx
  2522. push rbp
  2523. push r12
  2524. push r13
  2525. push r14
  2526. push r15
  2527. shl rdx,4
  2528. sub rsp,160
  2529. lea rdx,[rdx*4+rsi]
  2530. and rsp,-64
  2531. mov QWORD[((64+0))+rsp],rdi
  2532. mov QWORD[((64+8))+rsp],rsi
  2533. mov QWORD[((64+16))+rsp],rdx
  2534. mov QWORD[88+rsp],rax
  2535. movaps XMMWORD[(64+32)+rsp],xmm6
  2536. movaps XMMWORD[(64+48)+rsp],xmm7
  2537. movaps XMMWORD[(64+64)+rsp],xmm8
  2538. movaps XMMWORD[(64+80)+rsp],xmm9
  2539. $L$prologue_avx:
  2540. vzeroupper
  2541. mov eax,DWORD[rdi]
  2542. mov ebx,DWORD[4+rdi]
  2543. mov ecx,DWORD[8+rdi]
  2544. mov edx,DWORD[12+rdi]
  2545. mov r8d,DWORD[16+rdi]
  2546. mov r9d,DWORD[20+rdi]
  2547. mov r10d,DWORD[24+rdi]
  2548. mov r11d,DWORD[28+rdi]
  2549. vmovdqa xmm8,XMMWORD[((K256+512+32))]
  2550. vmovdqa xmm9,XMMWORD[((K256+512+64))]
  2551. jmp NEAR $L$loop_avx
  2552. ALIGN 16
  2553. $L$loop_avx:
  2554. vmovdqa xmm7,XMMWORD[((K256+512))]
  2555. vmovdqu xmm0,XMMWORD[rsi]
  2556. vmovdqu xmm1,XMMWORD[16+rsi]
  2557. vmovdqu xmm2,XMMWORD[32+rsi]
  2558. vmovdqu xmm3,XMMWORD[48+rsi]
  2559. vpshufb xmm0,xmm0,xmm7
  2560. lea rbp,[K256]
  2561. vpshufb xmm1,xmm1,xmm7
  2562. vpshufb xmm2,xmm2,xmm7
  2563. vpaddd xmm4,xmm0,XMMWORD[rbp]
  2564. vpshufb xmm3,xmm3,xmm7
  2565. vpaddd xmm5,xmm1,XMMWORD[32+rbp]
  2566. vpaddd xmm6,xmm2,XMMWORD[64+rbp]
  2567. vpaddd xmm7,xmm3,XMMWORD[96+rbp]
  2568. vmovdqa XMMWORD[rsp],xmm4
  2569. mov r14d,eax
  2570. vmovdqa XMMWORD[16+rsp],xmm5
  2571. mov edi,ebx
  2572. vmovdqa XMMWORD[32+rsp],xmm6
  2573. xor edi,ecx
  2574. vmovdqa XMMWORD[48+rsp],xmm7
  2575. mov r13d,r8d
  2576. jmp NEAR $L$avx_00_47
  2577. ALIGN 16
  2578. $L$avx_00_47:
  2579. sub rbp,-128
  2580. vpalignr xmm4,xmm1,xmm0,4
  2581. shrd r13d,r13d,14
  2582. mov eax,r14d
  2583. mov r12d,r9d
  2584. vpalignr xmm7,xmm3,xmm2,4
  2585. shrd r14d,r14d,9
  2586. xor r13d,r8d
  2587. xor r12d,r10d
  2588. vpsrld xmm6,xmm4,7
  2589. shrd r13d,r13d,5
  2590. xor r14d,eax
  2591. and r12d,r8d
  2592. vpaddd xmm0,xmm0,xmm7
  2593. xor r13d,r8d
  2594. add r11d,DWORD[rsp]
  2595. mov r15d,eax
  2596. vpsrld xmm7,xmm4,3
  2597. xor r12d,r10d
  2598. shrd r14d,r14d,11
  2599. xor r15d,ebx
  2600. vpslld xmm5,xmm4,14
  2601. add r11d,r12d
  2602. shrd r13d,r13d,6
  2603. and edi,r15d
  2604. vpxor xmm4,xmm7,xmm6
  2605. xor r14d,eax
  2606. add r11d,r13d
  2607. xor edi,ebx
  2608. vpshufd xmm7,xmm3,250
  2609. shrd r14d,r14d,2
  2610. add edx,r11d
  2611. add r11d,edi
  2612. vpsrld xmm6,xmm6,11
  2613. mov r13d,edx
  2614. add r14d,r11d
  2615. shrd r13d,r13d,14
  2616. vpxor xmm4,xmm4,xmm5
  2617. mov r11d,r14d
  2618. mov r12d,r8d
  2619. shrd r14d,r14d,9
  2620. vpslld xmm5,xmm5,11
  2621. xor r13d,edx
  2622. xor r12d,r9d
  2623. shrd r13d,r13d,5
  2624. vpxor xmm4,xmm4,xmm6
  2625. xor r14d,r11d
  2626. and r12d,edx
  2627. xor r13d,edx
  2628. vpsrld xmm6,xmm7,10
  2629. add r10d,DWORD[4+rsp]
  2630. mov edi,r11d
  2631. xor r12d,r9d
  2632. vpxor xmm4,xmm4,xmm5
  2633. shrd r14d,r14d,11
  2634. xor edi,eax
  2635. add r10d,r12d
  2636. vpsrlq xmm7,xmm7,17
  2637. shrd r13d,r13d,6
  2638. and r15d,edi
  2639. xor r14d,r11d
  2640. vpaddd xmm0,xmm0,xmm4
  2641. add r10d,r13d
  2642. xor r15d,eax
  2643. shrd r14d,r14d,2
  2644. vpxor xmm6,xmm6,xmm7
  2645. add ecx,r10d
  2646. add r10d,r15d
  2647. mov r13d,ecx
  2648. vpsrlq xmm7,xmm7,2
  2649. add r14d,r10d
  2650. shrd r13d,r13d,14
  2651. mov r10d,r14d
  2652. vpxor xmm6,xmm6,xmm7
  2653. mov r12d,edx
  2654. shrd r14d,r14d,9
  2655. xor r13d,ecx
  2656. vpshufb xmm6,xmm6,xmm8
  2657. xor r12d,r8d
  2658. shrd r13d,r13d,5
  2659. xor r14d,r10d
  2660. vpaddd xmm0,xmm0,xmm6
  2661. and r12d,ecx
  2662. xor r13d,ecx
  2663. add r9d,DWORD[8+rsp]
  2664. vpshufd xmm7,xmm0,80
  2665. mov r15d,r10d
  2666. xor r12d,r8d
  2667. shrd r14d,r14d,11
  2668. vpsrld xmm6,xmm7,10
  2669. xor r15d,r11d
  2670. add r9d,r12d
  2671. shrd r13d,r13d,6
  2672. vpsrlq xmm7,xmm7,17
  2673. and edi,r15d
  2674. xor r14d,r10d
  2675. add r9d,r13d
  2676. vpxor xmm6,xmm6,xmm7
  2677. xor edi,r11d
  2678. shrd r14d,r14d,2
  2679. add ebx,r9d
  2680. vpsrlq xmm7,xmm7,2
  2681. add r9d,edi
  2682. mov r13d,ebx
  2683. add r14d,r9d
  2684. vpxor xmm6,xmm6,xmm7
  2685. shrd r13d,r13d,14
  2686. mov r9d,r14d
  2687. mov r12d,ecx
  2688. vpshufb xmm6,xmm6,xmm9
  2689. shrd r14d,r14d,9
  2690. xor r13d,ebx
  2691. xor r12d,edx
  2692. vpaddd xmm0,xmm0,xmm6
  2693. shrd r13d,r13d,5
  2694. xor r14d,r9d
  2695. and r12d,ebx
  2696. vpaddd xmm6,xmm0,XMMWORD[rbp]
  2697. xor r13d,ebx
  2698. add r8d,DWORD[12+rsp]
  2699. mov edi,r9d
  2700. xor r12d,edx
  2701. shrd r14d,r14d,11
  2702. xor edi,r10d
  2703. add r8d,r12d
  2704. shrd r13d,r13d,6
  2705. and r15d,edi
  2706. xor r14d,r9d
  2707. add r8d,r13d
  2708. xor r15d,r10d
  2709. shrd r14d,r14d,2
  2710. add eax,r8d
  2711. add r8d,r15d
  2712. mov r13d,eax
  2713. add r14d,r8d
  2714. vmovdqa XMMWORD[rsp],xmm6
  2715. vpalignr xmm4,xmm2,xmm1,4
  2716. shrd r13d,r13d,14
  2717. mov r8d,r14d
  2718. mov r12d,ebx
  2719. vpalignr xmm7,xmm0,xmm3,4
  2720. shrd r14d,r14d,9
  2721. xor r13d,eax
  2722. xor r12d,ecx
  2723. vpsrld xmm6,xmm4,7
  2724. shrd r13d,r13d,5
  2725. xor r14d,r8d
  2726. and r12d,eax
  2727. vpaddd xmm1,xmm1,xmm7
  2728. xor r13d,eax
  2729. add edx,DWORD[16+rsp]
  2730. mov r15d,r8d
  2731. vpsrld xmm7,xmm4,3
  2732. xor r12d,ecx
  2733. shrd r14d,r14d,11
  2734. xor r15d,r9d
  2735. vpslld xmm5,xmm4,14
  2736. add edx,r12d
  2737. shrd r13d,r13d,6
  2738. and edi,r15d
  2739. vpxor xmm4,xmm7,xmm6
  2740. xor r14d,r8d
  2741. add edx,r13d
  2742. xor edi,r9d
  2743. vpshufd xmm7,xmm0,250
  2744. shrd r14d,r14d,2
  2745. add r11d,edx
  2746. add edx,edi
  2747. vpsrld xmm6,xmm6,11
  2748. mov r13d,r11d
  2749. add r14d,edx
  2750. shrd r13d,r13d,14
  2751. vpxor xmm4,xmm4,xmm5
  2752. mov edx,r14d
  2753. mov r12d,eax
  2754. shrd r14d,r14d,9
  2755. vpslld xmm5,xmm5,11
  2756. xor r13d,r11d
  2757. xor r12d,ebx
  2758. shrd r13d,r13d,5
  2759. vpxor xmm4,xmm4,xmm6
  2760. xor r14d,edx
  2761. and r12d,r11d
  2762. xor r13d,r11d
  2763. vpsrld xmm6,xmm7,10
  2764. add ecx,DWORD[20+rsp]
  2765. mov edi,edx
  2766. xor r12d,ebx
  2767. vpxor xmm4,xmm4,xmm5
  2768. shrd r14d,r14d,11
  2769. xor edi,r8d
  2770. add ecx,r12d
  2771. vpsrlq xmm7,xmm7,17
  2772. shrd r13d,r13d,6
  2773. and r15d,edi
  2774. xor r14d,edx
  2775. vpaddd xmm1,xmm1,xmm4
  2776. add ecx,r13d
  2777. xor r15d,r8d
  2778. shrd r14d,r14d,2
  2779. vpxor xmm6,xmm6,xmm7
  2780. add r10d,ecx
  2781. add ecx,r15d
  2782. mov r13d,r10d
  2783. vpsrlq xmm7,xmm7,2
  2784. add r14d,ecx
  2785. shrd r13d,r13d,14
  2786. mov ecx,r14d
  2787. vpxor xmm6,xmm6,xmm7
  2788. mov r12d,r11d
  2789. shrd r14d,r14d,9
  2790. xor r13d,r10d
  2791. vpshufb xmm6,xmm6,xmm8
  2792. xor r12d,eax
  2793. shrd r13d,r13d,5
  2794. xor r14d,ecx
  2795. vpaddd xmm1,xmm1,xmm6
  2796. and r12d,r10d
  2797. xor r13d,r10d
  2798. add ebx,DWORD[24+rsp]
  2799. vpshufd xmm7,xmm1,80
  2800. mov r15d,ecx
  2801. xor r12d,eax
  2802. shrd r14d,r14d,11
  2803. vpsrld xmm6,xmm7,10
  2804. xor r15d,edx
  2805. add ebx,r12d
  2806. shrd r13d,r13d,6
  2807. vpsrlq xmm7,xmm7,17
  2808. and edi,r15d
  2809. xor r14d,ecx
  2810. add ebx,r13d
  2811. vpxor xmm6,xmm6,xmm7
  2812. xor edi,edx
  2813. shrd r14d,r14d,2
  2814. add r9d,ebx
  2815. vpsrlq xmm7,xmm7,2
  2816. add ebx,edi
  2817. mov r13d,r9d
  2818. add r14d,ebx
  2819. vpxor xmm6,xmm6,xmm7
  2820. shrd r13d,r13d,14
  2821. mov ebx,r14d
  2822. mov r12d,r10d
  2823. vpshufb xmm6,xmm6,xmm9
  2824. shrd r14d,r14d,9
  2825. xor r13d,r9d
  2826. xor r12d,r11d
  2827. vpaddd xmm1,xmm1,xmm6
  2828. shrd r13d,r13d,5
  2829. xor r14d,ebx
  2830. and r12d,r9d
  2831. vpaddd xmm6,xmm1,XMMWORD[32+rbp]
  2832. xor r13d,r9d
  2833. add eax,DWORD[28+rsp]
  2834. mov edi,ebx
  2835. xor r12d,r11d
  2836. shrd r14d,r14d,11
  2837. xor edi,ecx
  2838. add eax,r12d
  2839. shrd r13d,r13d,6
  2840. and r15d,edi
  2841. xor r14d,ebx
  2842. add eax,r13d
  2843. xor r15d,ecx
  2844. shrd r14d,r14d,2
  2845. add r8d,eax
  2846. add eax,r15d
  2847. mov r13d,r8d
  2848. add r14d,eax
  2849. vmovdqa XMMWORD[16+rsp],xmm6
  2850. vpalignr xmm4,xmm3,xmm2,4
  2851. shrd r13d,r13d,14
  2852. mov eax,r14d
  2853. mov r12d,r9d
  2854. vpalignr xmm7,xmm1,xmm0,4
  2855. shrd r14d,r14d,9
  2856. xor r13d,r8d
  2857. xor r12d,r10d
  2858. vpsrld xmm6,xmm4,7
  2859. shrd r13d,r13d,5
  2860. xor r14d,eax
  2861. and r12d,r8d
  2862. vpaddd xmm2,xmm2,xmm7
  2863. xor r13d,r8d
  2864. add r11d,DWORD[32+rsp]
  2865. mov r15d,eax
  2866. vpsrld xmm7,xmm4,3
  2867. xor r12d,r10d
  2868. shrd r14d,r14d,11
  2869. xor r15d,ebx
  2870. vpslld xmm5,xmm4,14
  2871. add r11d,r12d
  2872. shrd r13d,r13d,6
  2873. and edi,r15d
  2874. vpxor xmm4,xmm7,xmm6
  2875. xor r14d,eax
  2876. add r11d,r13d
  2877. xor edi,ebx
  2878. vpshufd xmm7,xmm1,250
  2879. shrd r14d,r14d,2
  2880. add edx,r11d
  2881. add r11d,edi
  2882. vpsrld xmm6,xmm6,11
  2883. mov r13d,edx
  2884. add r14d,r11d
  2885. shrd r13d,r13d,14
  2886. vpxor xmm4,xmm4,xmm5
  2887. mov r11d,r14d
  2888. mov r12d,r8d
  2889. shrd r14d,r14d,9
  2890. vpslld xmm5,xmm5,11
  2891. xor r13d,edx
  2892. xor r12d,r9d
  2893. shrd r13d,r13d,5
  2894. vpxor xmm4,xmm4,xmm6
  2895. xor r14d,r11d
  2896. and r12d,edx
  2897. xor r13d,edx
  2898. vpsrld xmm6,xmm7,10
  2899. add r10d,DWORD[36+rsp]
  2900. mov edi,r11d
  2901. xor r12d,r9d
  2902. vpxor xmm4,xmm4,xmm5
  2903. shrd r14d,r14d,11
  2904. xor edi,eax
  2905. add r10d,r12d
  2906. vpsrlq xmm7,xmm7,17
  2907. shrd r13d,r13d,6
  2908. and r15d,edi
  2909. xor r14d,r11d
  2910. vpaddd xmm2,xmm2,xmm4
  2911. add r10d,r13d
  2912. xor r15d,eax
  2913. shrd r14d,r14d,2
  2914. vpxor xmm6,xmm6,xmm7
  2915. add ecx,r10d
  2916. add r10d,r15d
  2917. mov r13d,ecx
  2918. vpsrlq xmm7,xmm7,2
  2919. add r14d,r10d
  2920. shrd r13d,r13d,14
  2921. mov r10d,r14d
  2922. vpxor xmm6,xmm6,xmm7
  2923. mov r12d,edx
  2924. shrd r14d,r14d,9
  2925. xor r13d,ecx
  2926. vpshufb xmm6,xmm6,xmm8
  2927. xor r12d,r8d
  2928. shrd r13d,r13d,5
  2929. xor r14d,r10d
  2930. vpaddd xmm2,xmm2,xmm6
  2931. and r12d,ecx
  2932. xor r13d,ecx
  2933. add r9d,DWORD[40+rsp]
  2934. vpshufd xmm7,xmm2,80
  2935. mov r15d,r10d
  2936. xor r12d,r8d
  2937. shrd r14d,r14d,11
  2938. vpsrld xmm6,xmm7,10
  2939. xor r15d,r11d
  2940. add r9d,r12d
  2941. shrd r13d,r13d,6
  2942. vpsrlq xmm7,xmm7,17
  2943. and edi,r15d
  2944. xor r14d,r10d
  2945. add r9d,r13d
  2946. vpxor xmm6,xmm6,xmm7
  2947. xor edi,r11d
  2948. shrd r14d,r14d,2
  2949. add ebx,r9d
  2950. vpsrlq xmm7,xmm7,2
  2951. add r9d,edi
  2952. mov r13d,ebx
  2953. add r14d,r9d
  2954. vpxor xmm6,xmm6,xmm7
  2955. shrd r13d,r13d,14
  2956. mov r9d,r14d
  2957. mov r12d,ecx
  2958. vpshufb xmm6,xmm6,xmm9
  2959. shrd r14d,r14d,9
  2960. xor r13d,ebx
  2961. xor r12d,edx
  2962. vpaddd xmm2,xmm2,xmm6
  2963. shrd r13d,r13d,5
  2964. xor r14d,r9d
  2965. and r12d,ebx
  2966. vpaddd xmm6,xmm2,XMMWORD[64+rbp]
  2967. xor r13d,ebx
  2968. add r8d,DWORD[44+rsp]
  2969. mov edi,r9d
  2970. xor r12d,edx
  2971. shrd r14d,r14d,11
  2972. xor edi,r10d
  2973. add r8d,r12d
  2974. shrd r13d,r13d,6
  2975. and r15d,edi
  2976. xor r14d,r9d
  2977. add r8d,r13d
  2978. xor r15d,r10d
  2979. shrd r14d,r14d,2
  2980. add eax,r8d
  2981. add r8d,r15d
  2982. mov r13d,eax
  2983. add r14d,r8d
  2984. vmovdqa XMMWORD[32+rsp],xmm6
  2985. vpalignr xmm4,xmm0,xmm3,4
  2986. shrd r13d,r13d,14
  2987. mov r8d,r14d
  2988. mov r12d,ebx
  2989. vpalignr xmm7,xmm2,xmm1,4
  2990. shrd r14d,r14d,9
  2991. xor r13d,eax
  2992. xor r12d,ecx
  2993. vpsrld xmm6,xmm4,7
  2994. shrd r13d,r13d,5
  2995. xor r14d,r8d
  2996. and r12d,eax
  2997. vpaddd xmm3,xmm3,xmm7
  2998. xor r13d,eax
  2999. add edx,DWORD[48+rsp]
  3000. mov r15d,r8d
  3001. vpsrld xmm7,xmm4,3
  3002. xor r12d,ecx
  3003. shrd r14d,r14d,11
  3004. xor r15d,r9d
  3005. vpslld xmm5,xmm4,14
  3006. add edx,r12d
  3007. shrd r13d,r13d,6
  3008. and edi,r15d
  3009. vpxor xmm4,xmm7,xmm6
  3010. xor r14d,r8d
  3011. add edx,r13d
  3012. xor edi,r9d
  3013. vpshufd xmm7,xmm2,250
  3014. shrd r14d,r14d,2
  3015. add r11d,edx
  3016. add edx,edi
  3017. vpsrld xmm6,xmm6,11
  3018. mov r13d,r11d
  3019. add r14d,edx
  3020. shrd r13d,r13d,14
  3021. vpxor xmm4,xmm4,xmm5
  3022. mov edx,r14d
  3023. mov r12d,eax
  3024. shrd r14d,r14d,9
  3025. vpslld xmm5,xmm5,11
  3026. xor r13d,r11d
  3027. xor r12d,ebx
  3028. shrd r13d,r13d,5
  3029. vpxor xmm4,xmm4,xmm6
  3030. xor r14d,edx
  3031. and r12d,r11d
  3032. xor r13d,r11d
  3033. vpsrld xmm6,xmm7,10
  3034. add ecx,DWORD[52+rsp]
  3035. mov edi,edx
  3036. xor r12d,ebx
  3037. vpxor xmm4,xmm4,xmm5
  3038. shrd r14d,r14d,11
  3039. xor edi,r8d
  3040. add ecx,r12d
  3041. vpsrlq xmm7,xmm7,17
  3042. shrd r13d,r13d,6
  3043. and r15d,edi
  3044. xor r14d,edx
  3045. vpaddd xmm3,xmm3,xmm4
  3046. add ecx,r13d
  3047. xor r15d,r8d
  3048. shrd r14d,r14d,2
  3049. vpxor xmm6,xmm6,xmm7
  3050. add r10d,ecx
  3051. add ecx,r15d
  3052. mov r13d,r10d
  3053. vpsrlq xmm7,xmm7,2
  3054. add r14d,ecx
  3055. shrd r13d,r13d,14
  3056. mov ecx,r14d
  3057. vpxor xmm6,xmm6,xmm7
  3058. mov r12d,r11d
  3059. shrd r14d,r14d,9
  3060. xor r13d,r10d
  3061. vpshufb xmm6,xmm6,xmm8
  3062. xor r12d,eax
  3063. shrd r13d,r13d,5
  3064. xor r14d,ecx
  3065. vpaddd xmm3,xmm3,xmm6
  3066. and r12d,r10d
  3067. xor r13d,r10d
  3068. add ebx,DWORD[56+rsp]
  3069. vpshufd xmm7,xmm3,80
  3070. mov r15d,ecx
  3071. xor r12d,eax
  3072. shrd r14d,r14d,11
  3073. vpsrld xmm6,xmm7,10
  3074. xor r15d,edx
  3075. add ebx,r12d
  3076. shrd r13d,r13d,6
  3077. vpsrlq xmm7,xmm7,17
  3078. and edi,r15d
  3079. xor r14d,ecx
  3080. add ebx,r13d
  3081. vpxor xmm6,xmm6,xmm7
  3082. xor edi,edx
  3083. shrd r14d,r14d,2
  3084. add r9d,ebx
  3085. vpsrlq xmm7,xmm7,2
  3086. add ebx,edi
  3087. mov r13d,r9d
  3088. add r14d,ebx
  3089. vpxor xmm6,xmm6,xmm7
  3090. shrd r13d,r13d,14
  3091. mov ebx,r14d
  3092. mov r12d,r10d
  3093. vpshufb xmm6,xmm6,xmm9
  3094. shrd r14d,r14d,9
  3095. xor r13d,r9d
  3096. xor r12d,r11d
  3097. vpaddd xmm3,xmm3,xmm6
  3098. shrd r13d,r13d,5
  3099. xor r14d,ebx
  3100. and r12d,r9d
  3101. vpaddd xmm6,xmm3,XMMWORD[96+rbp]
  3102. xor r13d,r9d
  3103. add eax,DWORD[60+rsp]
  3104. mov edi,ebx
  3105. xor r12d,r11d
  3106. shrd r14d,r14d,11
  3107. xor edi,ecx
  3108. add eax,r12d
  3109. shrd r13d,r13d,6
  3110. and r15d,edi
  3111. xor r14d,ebx
  3112. add eax,r13d
  3113. xor r15d,ecx
  3114. shrd r14d,r14d,2
  3115. add r8d,eax
  3116. add eax,r15d
  3117. mov r13d,r8d
  3118. add r14d,eax
  3119. vmovdqa XMMWORD[48+rsp],xmm6
  3120. cmp BYTE[131+rbp],0
  3121. jne NEAR $L$avx_00_47
  3122. shrd r13d,r13d,14
  3123. mov eax,r14d
  3124. mov r12d,r9d
  3125. shrd r14d,r14d,9
  3126. xor r13d,r8d
  3127. xor r12d,r10d
  3128. shrd r13d,r13d,5
  3129. xor r14d,eax
  3130. and r12d,r8d
  3131. xor r13d,r8d
  3132. add r11d,DWORD[rsp]
  3133. mov r15d,eax
  3134. xor r12d,r10d
  3135. shrd r14d,r14d,11
  3136. xor r15d,ebx
  3137. add r11d,r12d
  3138. shrd r13d,r13d,6
  3139. and edi,r15d
  3140. xor r14d,eax
  3141. add r11d,r13d
  3142. xor edi,ebx
  3143. shrd r14d,r14d,2
  3144. add edx,r11d
  3145. add r11d,edi
  3146. mov r13d,edx
  3147. add r14d,r11d
  3148. shrd r13d,r13d,14
  3149. mov r11d,r14d
  3150. mov r12d,r8d
  3151. shrd r14d,r14d,9
  3152. xor r13d,edx
  3153. xor r12d,r9d
  3154. shrd r13d,r13d,5
  3155. xor r14d,r11d
  3156. and r12d,edx
  3157. xor r13d,edx
  3158. add r10d,DWORD[4+rsp]
  3159. mov edi,r11d
  3160. xor r12d,r9d
  3161. shrd r14d,r14d,11
  3162. xor edi,eax
  3163. add r10d,r12d
  3164. shrd r13d,r13d,6
  3165. and r15d,edi
  3166. xor r14d,r11d
  3167. add r10d,r13d
  3168. xor r15d,eax
  3169. shrd r14d,r14d,2
  3170. add ecx,r10d
  3171. add r10d,r15d
  3172. mov r13d,ecx
  3173. add r14d,r10d
  3174. shrd r13d,r13d,14
  3175. mov r10d,r14d
  3176. mov r12d,edx
  3177. shrd r14d,r14d,9
  3178. xor r13d,ecx
  3179. xor r12d,r8d
  3180. shrd r13d,r13d,5
  3181. xor r14d,r10d
  3182. and r12d,ecx
  3183. xor r13d,ecx
  3184. add r9d,DWORD[8+rsp]
  3185. mov r15d,r10d
  3186. xor r12d,r8d
  3187. shrd r14d,r14d,11
  3188. xor r15d,r11d
  3189. add r9d,r12d
  3190. shrd r13d,r13d,6
  3191. and edi,r15d
  3192. xor r14d,r10d
  3193. add r9d,r13d
  3194. xor edi,r11d
  3195. shrd r14d,r14d,2
  3196. add ebx,r9d
  3197. add r9d,edi
  3198. mov r13d,ebx
  3199. add r14d,r9d
  3200. shrd r13d,r13d,14
  3201. mov r9d,r14d
  3202. mov r12d,ecx
  3203. shrd r14d,r14d,9
  3204. xor r13d,ebx
  3205. xor r12d,edx
  3206. shrd r13d,r13d,5
  3207. xor r14d,r9d
  3208. and r12d,ebx
  3209. xor r13d,ebx
  3210. add r8d,DWORD[12+rsp]
  3211. mov edi,r9d
  3212. xor r12d,edx
  3213. shrd r14d,r14d,11
  3214. xor edi,r10d
  3215. add r8d,r12d
  3216. shrd r13d,r13d,6
  3217. and r15d,edi
  3218. xor r14d,r9d
  3219. add r8d,r13d
  3220. xor r15d,r10d
  3221. shrd r14d,r14d,2
  3222. add eax,r8d
  3223. add r8d,r15d
  3224. mov r13d,eax
  3225. add r14d,r8d
  3226. shrd r13d,r13d,14
  3227. mov r8d,r14d
  3228. mov r12d,ebx
  3229. shrd r14d,r14d,9
  3230. xor r13d,eax
  3231. xor r12d,ecx
  3232. shrd r13d,r13d,5
  3233. xor r14d,r8d
  3234. and r12d,eax
  3235. xor r13d,eax
  3236. add edx,DWORD[16+rsp]
  3237. mov r15d,r8d
  3238. xor r12d,ecx
  3239. shrd r14d,r14d,11
  3240. xor r15d,r9d
  3241. add edx,r12d
  3242. shrd r13d,r13d,6
  3243. and edi,r15d
  3244. xor r14d,r8d
  3245. add edx,r13d
  3246. xor edi,r9d
  3247. shrd r14d,r14d,2
  3248. add r11d,edx
  3249. add edx,edi
  3250. mov r13d,r11d
  3251. add r14d,edx
  3252. shrd r13d,r13d,14
  3253. mov edx,r14d
  3254. mov r12d,eax
  3255. shrd r14d,r14d,9
  3256. xor r13d,r11d
  3257. xor r12d,ebx
  3258. shrd r13d,r13d,5
  3259. xor r14d,edx
  3260. and r12d,r11d
  3261. xor r13d,r11d
  3262. add ecx,DWORD[20+rsp]
  3263. mov edi,edx
  3264. xor r12d,ebx
  3265. shrd r14d,r14d,11
  3266. xor edi,r8d
  3267. add ecx,r12d
  3268. shrd r13d,r13d,6
  3269. and r15d,edi
  3270. xor r14d,edx
  3271. add ecx,r13d
  3272. xor r15d,r8d
  3273. shrd r14d,r14d,2
  3274. add r10d,ecx
  3275. add ecx,r15d
  3276. mov r13d,r10d
  3277. add r14d,ecx
  3278. shrd r13d,r13d,14
  3279. mov ecx,r14d
  3280. mov r12d,r11d
  3281. shrd r14d,r14d,9
  3282. xor r13d,r10d
  3283. xor r12d,eax
  3284. shrd r13d,r13d,5
  3285. xor r14d,ecx
  3286. and r12d,r10d
  3287. xor r13d,r10d
  3288. add ebx,DWORD[24+rsp]
  3289. mov r15d,ecx
  3290. xor r12d,eax
  3291. shrd r14d,r14d,11
  3292. xor r15d,edx
  3293. add ebx,r12d
  3294. shrd r13d,r13d,6
  3295. and edi,r15d
  3296. xor r14d,ecx
  3297. add ebx,r13d
  3298. xor edi,edx
  3299. shrd r14d,r14d,2
  3300. add r9d,ebx
  3301. add ebx,edi
  3302. mov r13d,r9d
  3303. add r14d,ebx
  3304. shrd r13d,r13d,14
  3305. mov ebx,r14d
  3306. mov r12d,r10d
  3307. shrd r14d,r14d,9
  3308. xor r13d,r9d
  3309. xor r12d,r11d
  3310. shrd r13d,r13d,5
  3311. xor r14d,ebx
  3312. and r12d,r9d
  3313. xor r13d,r9d
  3314. add eax,DWORD[28+rsp]
  3315. mov edi,ebx
  3316. xor r12d,r11d
  3317. shrd r14d,r14d,11
  3318. xor edi,ecx
  3319. add eax,r12d
  3320. shrd r13d,r13d,6
  3321. and r15d,edi
  3322. xor r14d,ebx
  3323. add eax,r13d
  3324. xor r15d,ecx
  3325. shrd r14d,r14d,2
  3326. add r8d,eax
  3327. add eax,r15d
  3328. mov r13d,r8d
  3329. add r14d,eax
  3330. shrd r13d,r13d,14
  3331. mov eax,r14d
  3332. mov r12d,r9d
  3333. shrd r14d,r14d,9
  3334. xor r13d,r8d
  3335. xor r12d,r10d
  3336. shrd r13d,r13d,5
  3337. xor r14d,eax
  3338. and r12d,r8d
  3339. xor r13d,r8d
  3340. add r11d,DWORD[32+rsp]
  3341. mov r15d,eax
  3342. xor r12d,r10d
  3343. shrd r14d,r14d,11
  3344. xor r15d,ebx
  3345. add r11d,r12d
  3346. shrd r13d,r13d,6
  3347. and edi,r15d
  3348. xor r14d,eax
  3349. add r11d,r13d
  3350. xor edi,ebx
  3351. shrd r14d,r14d,2
  3352. add edx,r11d
  3353. add r11d,edi
  3354. mov r13d,edx
  3355. add r14d,r11d
  3356. shrd r13d,r13d,14
  3357. mov r11d,r14d
  3358. mov r12d,r8d
  3359. shrd r14d,r14d,9
  3360. xor r13d,edx
  3361. xor r12d,r9d
  3362. shrd r13d,r13d,5
  3363. xor r14d,r11d
  3364. and r12d,edx
  3365. xor r13d,edx
  3366. add r10d,DWORD[36+rsp]
  3367. mov edi,r11d
  3368. xor r12d,r9d
  3369. shrd r14d,r14d,11
  3370. xor edi,eax
  3371. add r10d,r12d
  3372. shrd r13d,r13d,6
  3373. and r15d,edi
  3374. xor r14d,r11d
  3375. add r10d,r13d
  3376. xor r15d,eax
  3377. shrd r14d,r14d,2
  3378. add ecx,r10d
  3379. add r10d,r15d
  3380. mov r13d,ecx
  3381. add r14d,r10d
  3382. shrd r13d,r13d,14
  3383. mov r10d,r14d
  3384. mov r12d,edx
  3385. shrd r14d,r14d,9
  3386. xor r13d,ecx
  3387. xor r12d,r8d
  3388. shrd r13d,r13d,5
  3389. xor r14d,r10d
  3390. and r12d,ecx
  3391. xor r13d,ecx
  3392. add r9d,DWORD[40+rsp]
  3393. mov r15d,r10d
  3394. xor r12d,r8d
  3395. shrd r14d,r14d,11
  3396. xor r15d,r11d
  3397. add r9d,r12d
  3398. shrd r13d,r13d,6
  3399. and edi,r15d
  3400. xor r14d,r10d
  3401. add r9d,r13d
  3402. xor edi,r11d
  3403. shrd r14d,r14d,2
  3404. add ebx,r9d
  3405. add r9d,edi
  3406. mov r13d,ebx
  3407. add r14d,r9d
  3408. shrd r13d,r13d,14
  3409. mov r9d,r14d
  3410. mov r12d,ecx
  3411. shrd r14d,r14d,9
  3412. xor r13d,ebx
  3413. xor r12d,edx
  3414. shrd r13d,r13d,5
  3415. xor r14d,r9d
  3416. and r12d,ebx
  3417. xor r13d,ebx
  3418. add r8d,DWORD[44+rsp]
  3419. mov edi,r9d
  3420. xor r12d,edx
  3421. shrd r14d,r14d,11
  3422. xor edi,r10d
  3423. add r8d,r12d
  3424. shrd r13d,r13d,6
  3425. and r15d,edi
  3426. xor r14d,r9d
  3427. add r8d,r13d
  3428. xor r15d,r10d
  3429. shrd r14d,r14d,2
  3430. add eax,r8d
  3431. add r8d,r15d
  3432. mov r13d,eax
  3433. add r14d,r8d
  3434. shrd r13d,r13d,14
  3435. mov r8d,r14d
  3436. mov r12d,ebx
  3437. shrd r14d,r14d,9
  3438. xor r13d,eax
  3439. xor r12d,ecx
  3440. shrd r13d,r13d,5
  3441. xor r14d,r8d
  3442. and r12d,eax
  3443. xor r13d,eax
  3444. add edx,DWORD[48+rsp]
  3445. mov r15d,r8d
  3446. xor r12d,ecx
  3447. shrd r14d,r14d,11
  3448. xor r15d,r9d
  3449. add edx,r12d
  3450. shrd r13d,r13d,6
  3451. and edi,r15d
  3452. xor r14d,r8d
  3453. add edx,r13d
  3454. xor edi,r9d
  3455. shrd r14d,r14d,2
  3456. add r11d,edx
  3457. add edx,edi
  3458. mov r13d,r11d
  3459. add r14d,edx
  3460. shrd r13d,r13d,14
  3461. mov edx,r14d
  3462. mov r12d,eax
  3463. shrd r14d,r14d,9
  3464. xor r13d,r11d
  3465. xor r12d,ebx
  3466. shrd r13d,r13d,5
  3467. xor r14d,edx
  3468. and r12d,r11d
  3469. xor r13d,r11d
  3470. add ecx,DWORD[52+rsp]
  3471. mov edi,edx
  3472. xor r12d,ebx
  3473. shrd r14d,r14d,11
  3474. xor edi,r8d
  3475. add ecx,r12d
  3476. shrd r13d,r13d,6
  3477. and r15d,edi
  3478. xor r14d,edx
  3479. add ecx,r13d
  3480. xor r15d,r8d
  3481. shrd r14d,r14d,2
  3482. add r10d,ecx
  3483. add ecx,r15d
  3484. mov r13d,r10d
  3485. add r14d,ecx
  3486. shrd r13d,r13d,14
  3487. mov ecx,r14d
  3488. mov r12d,r11d
  3489. shrd r14d,r14d,9
  3490. xor r13d,r10d
  3491. xor r12d,eax
  3492. shrd r13d,r13d,5
  3493. xor r14d,ecx
  3494. and r12d,r10d
  3495. xor r13d,r10d
  3496. add ebx,DWORD[56+rsp]
  3497. mov r15d,ecx
  3498. xor r12d,eax
  3499. shrd r14d,r14d,11
  3500. xor r15d,edx
  3501. add ebx,r12d
  3502. shrd r13d,r13d,6
  3503. and edi,r15d
  3504. xor r14d,ecx
  3505. add ebx,r13d
  3506. xor edi,edx
  3507. shrd r14d,r14d,2
  3508. add r9d,ebx
  3509. add ebx,edi
  3510. mov r13d,r9d
  3511. add r14d,ebx
  3512. shrd r13d,r13d,14
  3513. mov ebx,r14d
  3514. mov r12d,r10d
  3515. shrd r14d,r14d,9
  3516. xor r13d,r9d
  3517. xor r12d,r11d
  3518. shrd r13d,r13d,5
  3519. xor r14d,ebx
  3520. and r12d,r9d
  3521. xor r13d,r9d
  3522. add eax,DWORD[60+rsp]
  3523. mov edi,ebx
  3524. xor r12d,r11d
  3525. shrd r14d,r14d,11
  3526. xor edi,ecx
  3527. add eax,r12d
  3528. shrd r13d,r13d,6
  3529. and r15d,edi
  3530. xor r14d,ebx
  3531. add eax,r13d
  3532. xor r15d,ecx
  3533. shrd r14d,r14d,2
  3534. add r8d,eax
  3535. add eax,r15d
  3536. mov r13d,r8d
  3537. add r14d,eax
  3538. mov rdi,QWORD[((64+0))+rsp]
  3539. mov eax,r14d
  3540. add eax,DWORD[rdi]
  3541. lea rsi,[64+rsi]
  3542. add ebx,DWORD[4+rdi]
  3543. add ecx,DWORD[8+rdi]
  3544. add edx,DWORD[12+rdi]
  3545. add r8d,DWORD[16+rdi]
  3546. add r9d,DWORD[20+rdi]
  3547. add r10d,DWORD[24+rdi]
  3548. add r11d,DWORD[28+rdi]
  3549. cmp rsi,QWORD[((64+16))+rsp]
  3550. mov DWORD[rdi],eax
  3551. mov DWORD[4+rdi],ebx
  3552. mov DWORD[8+rdi],ecx
  3553. mov DWORD[12+rdi],edx
  3554. mov DWORD[16+rdi],r8d
  3555. mov DWORD[20+rdi],r9d
  3556. mov DWORD[24+rdi],r10d
  3557. mov DWORD[28+rdi],r11d
  3558. jb NEAR $L$loop_avx
  3559. mov rsi,QWORD[88+rsp]
  3560. vzeroupper
  3561. movaps xmm6,XMMWORD[((64+32))+rsp]
  3562. movaps xmm7,XMMWORD[((64+48))+rsp]
  3563. movaps xmm8,XMMWORD[((64+64))+rsp]
  3564. movaps xmm9,XMMWORD[((64+80))+rsp]
  3565. mov r15,QWORD[((-48))+rsi]
  3566. mov r14,QWORD[((-40))+rsi]
  3567. mov r13,QWORD[((-32))+rsi]
  3568. mov r12,QWORD[((-24))+rsi]
  3569. mov rbp,QWORD[((-16))+rsi]
  3570. mov rbx,QWORD[((-8))+rsi]
  3571. lea rsp,[rsi]
  3572. $L$epilogue_avx:
  3573. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3574. mov rsi,QWORD[16+rsp]
  3575. DB 0F3h,0C3h ;repret
  3576. $L$SEH_end_GFp_sha256_block_data_order_avx:
  3577. EXTERN __imp_RtlVirtualUnwind
  3578. ALIGN 16
  3579. se_handler:
  3580. push rsi
  3581. push rdi
  3582. push rbx
  3583. push rbp
  3584. push r12
  3585. push r13
  3586. push r14
  3587. push r15
  3588. pushfq
  3589. sub rsp,64
  3590. mov rax,QWORD[120+r8]
  3591. mov rbx,QWORD[248+r8]
  3592. mov rsi,QWORD[8+r9]
  3593. mov r11,QWORD[56+r9]
  3594. mov r10d,DWORD[r11]
  3595. lea r10,[r10*1+rsi]
  3596. cmp rbx,r10
  3597. jb NEAR $L$in_prologue
  3598. mov rax,QWORD[152+r8]
  3599. mov r10d,DWORD[4+r11]
  3600. lea r10,[r10*1+rsi]
  3601. cmp rbx,r10
  3602. jae NEAR $L$in_prologue
  3603. mov rsi,rax
  3604. mov rax,QWORD[((64+24))+rax]
  3605. mov rbx,QWORD[((-8))+rax]
  3606. mov rbp,QWORD[((-16))+rax]
  3607. mov r12,QWORD[((-24))+rax]
  3608. mov r13,QWORD[((-32))+rax]
  3609. mov r14,QWORD[((-40))+rax]
  3610. mov r15,QWORD[((-48))+rax]
  3611. mov QWORD[144+r8],rbx
  3612. mov QWORD[160+r8],rbp
  3613. mov QWORD[216+r8],r12
  3614. mov QWORD[224+r8],r13
  3615. mov QWORD[232+r8],r14
  3616. mov QWORD[240+r8],r15
  3617. lea r10,[$L$epilogue]
  3618. cmp rbx,r10
  3619. jb NEAR $L$in_prologue
  3620. lea rsi,[((64+32))+rsi]
  3621. lea rdi,[512+r8]
  3622. mov ecx,8
  3623. DD 0xa548f3fc
  3624. $L$in_prologue:
  3625. mov rdi,QWORD[8+rax]
  3626. mov rsi,QWORD[16+rax]
  3627. mov QWORD[152+r8],rax
  3628. mov QWORD[168+r8],rsi
  3629. mov QWORD[176+r8],rdi
  3630. mov rdi,QWORD[40+r9]
  3631. mov rsi,r8
  3632. mov ecx,154
  3633. DD 0xa548f3fc
  3634. mov rsi,r9
  3635. xor rcx,rcx
  3636. mov rdx,QWORD[8+rsi]
  3637. mov r8,QWORD[rsi]
  3638. mov r9,QWORD[16+rsi]
  3639. mov r10,QWORD[40+rsi]
  3640. lea r11,[56+rsi]
  3641. lea r12,[24+rsi]
  3642. mov QWORD[32+rsp],r10
  3643. mov QWORD[40+rsp],r11
  3644. mov QWORD[48+rsp],r12
  3645. mov QWORD[56+rsp],rcx
  3646. call QWORD[__imp_RtlVirtualUnwind]
  3647. mov eax,1
  3648. add rsp,64
  3649. popfq
  3650. pop r15
  3651. pop r14
  3652. pop r13
  3653. pop r12
  3654. pop rbp
  3655. pop rbx
  3656. pop rdi
  3657. pop rsi
  3658. DB 0F3h,0C3h ;repret
  3659. section .pdata rdata align=4
  3660. ALIGN 4
  3661. DD $L$SEH_begin_GFp_sha256_block_data_order wrt ..imagebase
  3662. DD $L$SEH_end_GFp_sha256_block_data_order wrt ..imagebase
  3663. DD $L$SEH_info_GFp_sha256_block_data_order wrt ..imagebase
  3664. DD $L$SEH_begin_GFp_sha256_block_data_order_ssse3 wrt ..imagebase
  3665. DD $L$SEH_end_GFp_sha256_block_data_order_ssse3 wrt ..imagebase
  3666. DD $L$SEH_info_GFp_sha256_block_data_order_ssse3 wrt ..imagebase
  3667. DD $L$SEH_begin_GFp_sha256_block_data_order_avx wrt ..imagebase
  3668. DD $L$SEH_end_GFp_sha256_block_data_order_avx wrt ..imagebase
  3669. DD $L$SEH_info_GFp_sha256_block_data_order_avx wrt ..imagebase
  3670. section .xdata rdata align=8
  3671. ALIGN 8
  3672. $L$SEH_info_GFp_sha256_block_data_order:
  3673. DB 9,0,0,0
  3674. DD se_handler wrt ..imagebase
  3675. DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
  3676. $L$SEH_info_GFp_sha256_block_data_order_ssse3:
  3677. DB 9,0,0,0
  3678. DD se_handler wrt ..imagebase
  3679. DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
  3680. $L$SEH_info_GFp_sha256_block_data_order_avx:
  3681. DB 9,0,0,0
  3682. DD se_handler wrt ..imagebase
  3683. DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase