2
0

chacha-x86-macosx.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. # This file is generated from a similarly-named Perl script in the BoringSSL
  2. # source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _GFp_ChaCha20_ctr32
  9. .private_extern _GFp_ChaCha20_ctr32
  10. .align 4
  11. _GFp_ChaCha20_ctr32:
  12. L_GFp_ChaCha20_ctr32_begin:
  13. pushl %ebp
  14. pushl %ebx
  15. pushl %esi
  16. pushl %edi
  17. xorl %eax,%eax
  18. cmpl 28(%esp),%eax
  19. je L000no_data
  20. call Lpic_point
  21. Lpic_point:
  22. popl %eax
  23. movl L_GFp_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
  24. testl $16777216,(%ebp)
  25. jz L001x86
  26. testl $512,4(%ebp)
  27. jz L001x86
  28. jmp Lssse3_shortcut
  29. L001x86:
  30. movl 32(%esp),%esi
  31. movl 36(%esp),%edi
  32. subl $132,%esp
  33. movl (%esi),%eax
  34. movl 4(%esi),%ebx
  35. movl 8(%esi),%ecx
  36. movl 12(%esi),%edx
  37. movl %eax,80(%esp)
  38. movl %ebx,84(%esp)
  39. movl %ecx,88(%esp)
  40. movl %edx,92(%esp)
  41. movl 16(%esi),%eax
  42. movl 20(%esi),%ebx
  43. movl 24(%esi),%ecx
  44. movl 28(%esi),%edx
  45. movl %eax,96(%esp)
  46. movl %ebx,100(%esp)
  47. movl %ecx,104(%esp)
  48. movl %edx,108(%esp)
  49. movl (%edi),%eax
  50. movl 4(%edi),%ebx
  51. movl 8(%edi),%ecx
  52. movl 12(%edi),%edx
  53. subl $1,%eax
  54. movl %eax,112(%esp)
  55. movl %ebx,116(%esp)
  56. movl %ecx,120(%esp)
  57. movl %edx,124(%esp)
  58. jmp L002entry
  59. .align 4,0x90
  60. L003outer_loop:
  61. movl %ebx,156(%esp)
  62. movl %eax,152(%esp)
  63. movl %ecx,160(%esp)
  64. L002entry:
  65. movl $1634760805,%eax
  66. movl $857760878,4(%esp)
  67. movl $2036477234,8(%esp)
  68. movl $1797285236,12(%esp)
  69. movl 84(%esp),%ebx
  70. movl 88(%esp),%ebp
  71. movl 104(%esp),%ecx
  72. movl 108(%esp),%esi
  73. movl 116(%esp),%edx
  74. movl 120(%esp),%edi
  75. movl %ebx,20(%esp)
  76. movl %ebp,24(%esp)
  77. movl %ecx,40(%esp)
  78. movl %esi,44(%esp)
  79. movl %edx,52(%esp)
  80. movl %edi,56(%esp)
  81. movl 92(%esp),%ebx
  82. movl 124(%esp),%edi
  83. movl 112(%esp),%edx
  84. movl 80(%esp),%ebp
  85. movl 96(%esp),%ecx
  86. movl 100(%esp),%esi
  87. addl $1,%edx
  88. movl %ebx,28(%esp)
  89. movl %edi,60(%esp)
  90. movl %edx,112(%esp)
  91. movl $10,%ebx
  92. jmp L004loop
  93. .align 4,0x90
  94. L004loop:
  95. addl %ebp,%eax
  96. movl %ebx,128(%esp)
  97. movl %ebp,%ebx
  98. xorl %eax,%edx
  99. roll $16,%edx
  100. addl %edx,%ecx
  101. xorl %ecx,%ebx
  102. movl 52(%esp),%edi
  103. roll $12,%ebx
  104. movl 20(%esp),%ebp
  105. addl %ebx,%eax
  106. xorl %eax,%edx
  107. movl %eax,(%esp)
  108. roll $8,%edx
  109. movl 4(%esp),%eax
  110. addl %edx,%ecx
  111. movl %edx,48(%esp)
  112. xorl %ecx,%ebx
  113. addl %ebp,%eax
  114. roll $7,%ebx
  115. xorl %eax,%edi
  116. movl %ecx,32(%esp)
  117. roll $16,%edi
  118. movl %ebx,16(%esp)
  119. addl %edi,%esi
  120. movl 40(%esp),%ecx
  121. xorl %esi,%ebp
  122. movl 56(%esp),%edx
  123. roll $12,%ebp
  124. movl 24(%esp),%ebx
  125. addl %ebp,%eax
  126. xorl %eax,%edi
  127. movl %eax,4(%esp)
  128. roll $8,%edi
  129. movl 8(%esp),%eax
  130. addl %edi,%esi
  131. movl %edi,52(%esp)
  132. xorl %esi,%ebp
  133. addl %ebx,%eax
  134. roll $7,%ebp
  135. xorl %eax,%edx
  136. movl %esi,36(%esp)
  137. roll $16,%edx
  138. movl %ebp,20(%esp)
  139. addl %edx,%ecx
  140. movl 44(%esp),%esi
  141. xorl %ecx,%ebx
  142. movl 60(%esp),%edi
  143. roll $12,%ebx
  144. movl 28(%esp),%ebp
  145. addl %ebx,%eax
  146. xorl %eax,%edx
  147. movl %eax,8(%esp)
  148. roll $8,%edx
  149. movl 12(%esp),%eax
  150. addl %edx,%ecx
  151. movl %edx,56(%esp)
  152. xorl %ecx,%ebx
  153. addl %ebp,%eax
  154. roll $7,%ebx
  155. xorl %eax,%edi
  156. roll $16,%edi
  157. movl %ebx,24(%esp)
  158. addl %edi,%esi
  159. xorl %esi,%ebp
  160. roll $12,%ebp
  161. movl 20(%esp),%ebx
  162. addl %ebp,%eax
  163. xorl %eax,%edi
  164. movl %eax,12(%esp)
  165. roll $8,%edi
  166. movl (%esp),%eax
  167. addl %edi,%esi
  168. movl %edi,%edx
  169. xorl %esi,%ebp
  170. addl %ebx,%eax
  171. roll $7,%ebp
  172. xorl %eax,%edx
  173. roll $16,%edx
  174. movl %ebp,28(%esp)
  175. addl %edx,%ecx
  176. xorl %ecx,%ebx
  177. movl 48(%esp),%edi
  178. roll $12,%ebx
  179. movl 24(%esp),%ebp
  180. addl %ebx,%eax
  181. xorl %eax,%edx
  182. movl %eax,(%esp)
  183. roll $8,%edx
  184. movl 4(%esp),%eax
  185. addl %edx,%ecx
  186. movl %edx,60(%esp)
  187. xorl %ecx,%ebx
  188. addl %ebp,%eax
  189. roll $7,%ebx
  190. xorl %eax,%edi
  191. movl %ecx,40(%esp)
  192. roll $16,%edi
  193. movl %ebx,20(%esp)
  194. addl %edi,%esi
  195. movl 32(%esp),%ecx
  196. xorl %esi,%ebp
  197. movl 52(%esp),%edx
  198. roll $12,%ebp
  199. movl 28(%esp),%ebx
  200. addl %ebp,%eax
  201. xorl %eax,%edi
  202. movl %eax,4(%esp)
  203. roll $8,%edi
  204. movl 8(%esp),%eax
  205. addl %edi,%esi
  206. movl %edi,48(%esp)
  207. xorl %esi,%ebp
  208. addl %ebx,%eax
  209. roll $7,%ebp
  210. xorl %eax,%edx
  211. movl %esi,44(%esp)
  212. roll $16,%edx
  213. movl %ebp,24(%esp)
  214. addl %edx,%ecx
  215. movl 36(%esp),%esi
  216. xorl %ecx,%ebx
  217. movl 56(%esp),%edi
  218. roll $12,%ebx
  219. movl 16(%esp),%ebp
  220. addl %ebx,%eax
  221. xorl %eax,%edx
  222. movl %eax,8(%esp)
  223. roll $8,%edx
  224. movl 12(%esp),%eax
  225. addl %edx,%ecx
  226. movl %edx,52(%esp)
  227. xorl %ecx,%ebx
  228. addl %ebp,%eax
  229. roll $7,%ebx
  230. xorl %eax,%edi
  231. roll $16,%edi
  232. movl %ebx,28(%esp)
  233. addl %edi,%esi
  234. xorl %esi,%ebp
  235. movl 48(%esp),%edx
  236. roll $12,%ebp
  237. movl 128(%esp),%ebx
  238. addl %ebp,%eax
  239. xorl %eax,%edi
  240. movl %eax,12(%esp)
  241. roll $8,%edi
  242. movl (%esp),%eax
  243. addl %edi,%esi
  244. movl %edi,56(%esp)
  245. xorl %esi,%ebp
  246. roll $7,%ebp
  247. decl %ebx
  248. jnz L004loop
  249. movl 160(%esp),%ebx
  250. addl $1634760805,%eax
  251. addl 80(%esp),%ebp
  252. addl 96(%esp),%ecx
  253. addl 100(%esp),%esi
  254. cmpl $64,%ebx
  255. jb L005tail
  256. movl 156(%esp),%ebx
  257. addl 112(%esp),%edx
  258. addl 120(%esp),%edi
  259. xorl (%ebx),%eax
  260. xorl 16(%ebx),%ebp
  261. movl %eax,(%esp)
  262. movl 152(%esp),%eax
  263. xorl 32(%ebx),%ecx
  264. xorl 36(%ebx),%esi
  265. xorl 48(%ebx),%edx
  266. xorl 56(%ebx),%edi
  267. movl %ebp,16(%eax)
  268. movl %ecx,32(%eax)
  269. movl %esi,36(%eax)
  270. movl %edx,48(%eax)
  271. movl %edi,56(%eax)
  272. movl 4(%esp),%ebp
  273. movl 8(%esp),%ecx
  274. movl 12(%esp),%esi
  275. movl 20(%esp),%edx
  276. movl 24(%esp),%edi
  277. addl $857760878,%ebp
  278. addl $2036477234,%ecx
  279. addl $1797285236,%esi
  280. addl 84(%esp),%edx
  281. addl 88(%esp),%edi
  282. xorl 4(%ebx),%ebp
  283. xorl 8(%ebx),%ecx
  284. xorl 12(%ebx),%esi
  285. xorl 20(%ebx),%edx
  286. xorl 24(%ebx),%edi
  287. movl %ebp,4(%eax)
  288. movl %ecx,8(%eax)
  289. movl %esi,12(%eax)
  290. movl %edx,20(%eax)
  291. movl %edi,24(%eax)
  292. movl 28(%esp),%ebp
  293. movl 40(%esp),%ecx
  294. movl 44(%esp),%esi
  295. movl 52(%esp),%edx
  296. movl 60(%esp),%edi
  297. addl 92(%esp),%ebp
  298. addl 104(%esp),%ecx
  299. addl 108(%esp),%esi
  300. addl 116(%esp),%edx
  301. addl 124(%esp),%edi
  302. xorl 28(%ebx),%ebp
  303. xorl 40(%ebx),%ecx
  304. xorl 44(%ebx),%esi
  305. xorl 52(%ebx),%edx
  306. xorl 60(%ebx),%edi
  307. leal 64(%ebx),%ebx
  308. movl %ebp,28(%eax)
  309. movl (%esp),%ebp
  310. movl %ecx,40(%eax)
  311. movl 160(%esp),%ecx
  312. movl %esi,44(%eax)
  313. movl %edx,52(%eax)
  314. movl %edi,60(%eax)
  315. movl %ebp,(%eax)
  316. leal 64(%eax),%eax
  317. subl $64,%ecx
  318. jnz L003outer_loop
  319. jmp L006done
  320. L005tail:
  321. addl 112(%esp),%edx
  322. addl 120(%esp),%edi
  323. movl %eax,(%esp)
  324. movl %ebp,16(%esp)
  325. movl %ecx,32(%esp)
  326. movl %esi,36(%esp)
  327. movl %edx,48(%esp)
  328. movl %edi,56(%esp)
  329. movl 4(%esp),%ebp
  330. movl 8(%esp),%ecx
  331. movl 12(%esp),%esi
  332. movl 20(%esp),%edx
  333. movl 24(%esp),%edi
  334. addl $857760878,%ebp
  335. addl $2036477234,%ecx
  336. addl $1797285236,%esi
  337. addl 84(%esp),%edx
  338. addl 88(%esp),%edi
  339. movl %ebp,4(%esp)
  340. movl %ecx,8(%esp)
  341. movl %esi,12(%esp)
  342. movl %edx,20(%esp)
  343. movl %edi,24(%esp)
  344. movl 28(%esp),%ebp
  345. movl 40(%esp),%ecx
  346. movl 44(%esp),%esi
  347. movl 52(%esp),%edx
  348. movl 60(%esp),%edi
  349. addl 92(%esp),%ebp
  350. addl 104(%esp),%ecx
  351. addl 108(%esp),%esi
  352. addl 116(%esp),%edx
  353. addl 124(%esp),%edi
  354. movl %ebp,28(%esp)
  355. movl 156(%esp),%ebp
  356. movl %ecx,40(%esp)
  357. movl 152(%esp),%ecx
  358. movl %esi,44(%esp)
  359. xorl %esi,%esi
  360. movl %edx,52(%esp)
  361. movl %edi,60(%esp)
  362. xorl %eax,%eax
  363. xorl %edx,%edx
  364. L007tail_loop:
  365. movb (%esi,%ebp,1),%al
  366. movb (%esp,%esi,1),%dl
  367. leal 1(%esi),%esi
  368. xorb %dl,%al
  369. movb %al,-1(%ecx,%esi,1)
  370. decl %ebx
  371. jnz L007tail_loop
  372. L006done:
  373. addl $132,%esp
  374. L000no_data:
  375. popl %edi
  376. popl %esi
  377. popl %ebx
  378. popl %ebp
  379. ret
  380. .private_extern __ChaCha20_ssse3
  381. .align 4
  382. __ChaCha20_ssse3:
  383. pushl %ebp
  384. pushl %ebx
  385. pushl %esi
  386. pushl %edi
  387. Lssse3_shortcut:
  388. movl 20(%esp),%edi
  389. movl 24(%esp),%esi
  390. movl 28(%esp),%ecx
  391. movl 32(%esp),%edx
  392. movl 36(%esp),%ebx
  393. movl %esp,%ebp
  394. subl $524,%esp
  395. andl $-64,%esp
  396. movl %ebp,512(%esp)
  397. leal Lssse3_data-Lpic_point(%eax),%eax
  398. movdqu (%ebx),%xmm3
  399. cmpl $256,%ecx
  400. jb L0081x
  401. movl %edx,516(%esp)
  402. movl %ebx,520(%esp)
  403. subl $256,%ecx
  404. leal 384(%esp),%ebp
  405. movdqu (%edx),%xmm7
  406. pshufd $0,%xmm3,%xmm0
  407. pshufd $85,%xmm3,%xmm1
  408. pshufd $170,%xmm3,%xmm2
  409. pshufd $255,%xmm3,%xmm3
  410. paddd 48(%eax),%xmm0
  411. pshufd $0,%xmm7,%xmm4
  412. pshufd $85,%xmm7,%xmm5
  413. psubd 64(%eax),%xmm0
  414. pshufd $170,%xmm7,%xmm6
  415. pshufd $255,%xmm7,%xmm7
  416. movdqa %xmm0,64(%ebp)
  417. movdqa %xmm1,80(%ebp)
  418. movdqa %xmm2,96(%ebp)
  419. movdqa %xmm3,112(%ebp)
  420. movdqu 16(%edx),%xmm3
  421. movdqa %xmm4,-64(%ebp)
  422. movdqa %xmm5,-48(%ebp)
  423. movdqa %xmm6,-32(%ebp)
  424. movdqa %xmm7,-16(%ebp)
  425. movdqa 32(%eax),%xmm7
  426. leal 128(%esp),%ebx
  427. pshufd $0,%xmm3,%xmm0
  428. pshufd $85,%xmm3,%xmm1
  429. pshufd $170,%xmm3,%xmm2
  430. pshufd $255,%xmm3,%xmm3
  431. pshufd $0,%xmm7,%xmm4
  432. pshufd $85,%xmm7,%xmm5
  433. pshufd $170,%xmm7,%xmm6
  434. pshufd $255,%xmm7,%xmm7
  435. movdqa %xmm0,(%ebp)
  436. movdqa %xmm1,16(%ebp)
  437. movdqa %xmm2,32(%ebp)
  438. movdqa %xmm3,48(%ebp)
  439. movdqa %xmm4,-128(%ebp)
  440. movdqa %xmm5,-112(%ebp)
  441. movdqa %xmm6,-96(%ebp)
  442. movdqa %xmm7,-80(%ebp)
  443. leal 128(%esi),%esi
  444. leal 128(%edi),%edi
  445. jmp L009outer_loop
  446. .align 4,0x90
  447. L009outer_loop:
  448. movdqa -112(%ebp),%xmm1
  449. movdqa -96(%ebp),%xmm2
  450. movdqa -80(%ebp),%xmm3
  451. movdqa -48(%ebp),%xmm5
  452. movdqa -32(%ebp),%xmm6
  453. movdqa -16(%ebp),%xmm7
  454. movdqa %xmm1,-112(%ebx)
  455. movdqa %xmm2,-96(%ebx)
  456. movdqa %xmm3,-80(%ebx)
  457. movdqa %xmm5,-48(%ebx)
  458. movdqa %xmm6,-32(%ebx)
  459. movdqa %xmm7,-16(%ebx)
  460. movdqa 32(%ebp),%xmm2
  461. movdqa 48(%ebp),%xmm3
  462. movdqa 64(%ebp),%xmm4
  463. movdqa 80(%ebp),%xmm5
  464. movdqa 96(%ebp),%xmm6
  465. movdqa 112(%ebp),%xmm7
  466. paddd 64(%eax),%xmm4
  467. movdqa %xmm2,32(%ebx)
  468. movdqa %xmm3,48(%ebx)
  469. movdqa %xmm4,64(%ebx)
  470. movdqa %xmm5,80(%ebx)
  471. movdqa %xmm6,96(%ebx)
  472. movdqa %xmm7,112(%ebx)
  473. movdqa %xmm4,64(%ebp)
  474. movdqa -128(%ebp),%xmm0
  475. movdqa %xmm4,%xmm6
  476. movdqa -64(%ebp),%xmm3
  477. movdqa (%ebp),%xmm4
  478. movdqa 16(%ebp),%xmm5
  479. movl $10,%edx
  480. nop
  481. .align 4,0x90
  482. L010loop:
  483. paddd %xmm3,%xmm0
  484. movdqa %xmm3,%xmm2
  485. pxor %xmm0,%xmm6
  486. pshufb (%eax),%xmm6
  487. paddd %xmm6,%xmm4
  488. pxor %xmm4,%xmm2
  489. movdqa -48(%ebx),%xmm3
  490. movdqa %xmm2,%xmm1
  491. pslld $12,%xmm2
  492. psrld $20,%xmm1
  493. por %xmm1,%xmm2
  494. movdqa -112(%ebx),%xmm1
  495. paddd %xmm2,%xmm0
  496. movdqa 80(%ebx),%xmm7
  497. pxor %xmm0,%xmm6
  498. movdqa %xmm0,-128(%ebx)
  499. pshufb 16(%eax),%xmm6
  500. paddd %xmm6,%xmm4
  501. movdqa %xmm6,64(%ebx)
  502. pxor %xmm4,%xmm2
  503. paddd %xmm3,%xmm1
  504. movdqa %xmm2,%xmm0
  505. pslld $7,%xmm2
  506. psrld $25,%xmm0
  507. pxor %xmm1,%xmm7
  508. por %xmm0,%xmm2
  509. movdqa %xmm4,(%ebx)
  510. pshufb (%eax),%xmm7
  511. movdqa %xmm2,-64(%ebx)
  512. paddd %xmm7,%xmm5
  513. movdqa 32(%ebx),%xmm4
  514. pxor %xmm5,%xmm3
  515. movdqa -32(%ebx),%xmm2
  516. movdqa %xmm3,%xmm0
  517. pslld $12,%xmm3
  518. psrld $20,%xmm0
  519. por %xmm0,%xmm3
  520. movdqa -96(%ebx),%xmm0
  521. paddd %xmm3,%xmm1
  522. movdqa 96(%ebx),%xmm6
  523. pxor %xmm1,%xmm7
  524. movdqa %xmm1,-112(%ebx)
  525. pshufb 16(%eax),%xmm7
  526. paddd %xmm7,%xmm5
  527. movdqa %xmm7,80(%ebx)
  528. pxor %xmm5,%xmm3
  529. paddd %xmm2,%xmm0
  530. movdqa %xmm3,%xmm1
  531. pslld $7,%xmm3
  532. psrld $25,%xmm1
  533. pxor %xmm0,%xmm6
  534. por %xmm1,%xmm3
  535. movdqa %xmm5,16(%ebx)
  536. pshufb (%eax),%xmm6
  537. movdqa %xmm3,-48(%ebx)
  538. paddd %xmm6,%xmm4
  539. movdqa 48(%ebx),%xmm5
  540. pxor %xmm4,%xmm2
  541. movdqa -16(%ebx),%xmm3
  542. movdqa %xmm2,%xmm1
  543. pslld $12,%xmm2
  544. psrld $20,%xmm1
  545. por %xmm1,%xmm2
  546. movdqa -80(%ebx),%xmm1
  547. paddd %xmm2,%xmm0
  548. movdqa 112(%ebx),%xmm7
  549. pxor %xmm0,%xmm6
  550. movdqa %xmm0,-96(%ebx)
  551. pshufb 16(%eax),%xmm6
  552. paddd %xmm6,%xmm4
  553. movdqa %xmm6,96(%ebx)
  554. pxor %xmm4,%xmm2
  555. paddd %xmm3,%xmm1
  556. movdqa %xmm2,%xmm0
  557. pslld $7,%xmm2
  558. psrld $25,%xmm0
  559. pxor %xmm1,%xmm7
  560. por %xmm0,%xmm2
  561. pshufb (%eax),%xmm7
  562. movdqa %xmm2,-32(%ebx)
  563. paddd %xmm7,%xmm5
  564. pxor %xmm5,%xmm3
  565. movdqa -48(%ebx),%xmm2
  566. movdqa %xmm3,%xmm0
  567. pslld $12,%xmm3
  568. psrld $20,%xmm0
  569. por %xmm0,%xmm3
  570. movdqa -128(%ebx),%xmm0
  571. paddd %xmm3,%xmm1
  572. pxor %xmm1,%xmm7
  573. movdqa %xmm1,-80(%ebx)
  574. pshufb 16(%eax),%xmm7
  575. paddd %xmm7,%xmm5
  576. movdqa %xmm7,%xmm6
  577. pxor %xmm5,%xmm3
  578. paddd %xmm2,%xmm0
  579. movdqa %xmm3,%xmm1
  580. pslld $7,%xmm3
  581. psrld $25,%xmm1
  582. pxor %xmm0,%xmm6
  583. por %xmm1,%xmm3
  584. pshufb (%eax),%xmm6
  585. movdqa %xmm3,-16(%ebx)
  586. paddd %xmm6,%xmm4
  587. pxor %xmm4,%xmm2
  588. movdqa -32(%ebx),%xmm3
  589. movdqa %xmm2,%xmm1
  590. pslld $12,%xmm2
  591. psrld $20,%xmm1
  592. por %xmm1,%xmm2
  593. movdqa -112(%ebx),%xmm1
  594. paddd %xmm2,%xmm0
  595. movdqa 64(%ebx),%xmm7
  596. pxor %xmm0,%xmm6
  597. movdqa %xmm0,-128(%ebx)
  598. pshufb 16(%eax),%xmm6
  599. paddd %xmm6,%xmm4
  600. movdqa %xmm6,112(%ebx)
  601. pxor %xmm4,%xmm2
  602. paddd %xmm3,%xmm1
  603. movdqa %xmm2,%xmm0
  604. pslld $7,%xmm2
  605. psrld $25,%xmm0
  606. pxor %xmm1,%xmm7
  607. por %xmm0,%xmm2
  608. movdqa %xmm4,32(%ebx)
  609. pshufb (%eax),%xmm7
  610. movdqa %xmm2,-48(%ebx)
  611. paddd %xmm7,%xmm5
  612. movdqa (%ebx),%xmm4
  613. pxor %xmm5,%xmm3
  614. movdqa -16(%ebx),%xmm2
  615. movdqa %xmm3,%xmm0
  616. pslld $12,%xmm3
  617. psrld $20,%xmm0
  618. por %xmm0,%xmm3
  619. movdqa -96(%ebx),%xmm0
  620. paddd %xmm3,%xmm1
  621. movdqa 80(%ebx),%xmm6
  622. pxor %xmm1,%xmm7
  623. movdqa %xmm1,-112(%ebx)
  624. pshufb 16(%eax),%xmm7
  625. paddd %xmm7,%xmm5
  626. movdqa %xmm7,64(%ebx)
  627. pxor %xmm5,%xmm3
  628. paddd %xmm2,%xmm0
  629. movdqa %xmm3,%xmm1
  630. pslld $7,%xmm3
  631. psrld $25,%xmm1
  632. pxor %xmm0,%xmm6
  633. por %xmm1,%xmm3
  634. movdqa %xmm5,48(%ebx)
  635. pshufb (%eax),%xmm6
  636. movdqa %xmm3,-32(%ebx)
  637. paddd %xmm6,%xmm4
  638. movdqa 16(%ebx),%xmm5
  639. pxor %xmm4,%xmm2
  640. movdqa -64(%ebx),%xmm3
  641. movdqa %xmm2,%xmm1
  642. pslld $12,%xmm2
  643. psrld $20,%xmm1
  644. por %xmm1,%xmm2
  645. movdqa -80(%ebx),%xmm1
  646. paddd %xmm2,%xmm0
  647. movdqa 96(%ebx),%xmm7
  648. pxor %xmm0,%xmm6
  649. movdqa %xmm0,-96(%ebx)
  650. pshufb 16(%eax),%xmm6
  651. paddd %xmm6,%xmm4
  652. movdqa %xmm6,80(%ebx)
  653. pxor %xmm4,%xmm2
  654. paddd %xmm3,%xmm1
  655. movdqa %xmm2,%xmm0
  656. pslld $7,%xmm2
  657. psrld $25,%xmm0
  658. pxor %xmm1,%xmm7
  659. por %xmm0,%xmm2
  660. pshufb (%eax),%xmm7
  661. movdqa %xmm2,-16(%ebx)
  662. paddd %xmm7,%xmm5
  663. pxor %xmm5,%xmm3
  664. movdqa %xmm3,%xmm0
  665. pslld $12,%xmm3
  666. psrld $20,%xmm0
  667. por %xmm0,%xmm3
  668. movdqa -128(%ebx),%xmm0
  669. paddd %xmm3,%xmm1
  670. movdqa 64(%ebx),%xmm6
  671. pxor %xmm1,%xmm7
  672. movdqa %xmm1,-80(%ebx)
  673. pshufb 16(%eax),%xmm7
  674. paddd %xmm7,%xmm5
  675. movdqa %xmm7,96(%ebx)
  676. pxor %xmm5,%xmm3
  677. movdqa %xmm3,%xmm1
  678. pslld $7,%xmm3
  679. psrld $25,%xmm1
  680. por %xmm1,%xmm3
  681. decl %edx
  682. jnz L010loop
  683. movdqa %xmm3,-64(%ebx)
  684. movdqa %xmm4,(%ebx)
  685. movdqa %xmm5,16(%ebx)
  686. movdqa %xmm6,64(%ebx)
  687. movdqa %xmm7,96(%ebx)
  688. movdqa -112(%ebx),%xmm1
  689. movdqa -96(%ebx),%xmm2
  690. movdqa -80(%ebx),%xmm3
  691. paddd -128(%ebp),%xmm0
  692. paddd -112(%ebp),%xmm1
  693. paddd -96(%ebp),%xmm2
  694. paddd -80(%ebp),%xmm3
  695. movdqa %xmm0,%xmm6
  696. punpckldq %xmm1,%xmm0
  697. movdqa %xmm2,%xmm7
  698. punpckldq %xmm3,%xmm2
  699. punpckhdq %xmm1,%xmm6
  700. punpckhdq %xmm3,%xmm7
  701. movdqa %xmm0,%xmm1
  702. punpcklqdq %xmm2,%xmm0
  703. movdqa %xmm6,%xmm3
  704. punpcklqdq %xmm7,%xmm6
  705. punpckhqdq %xmm2,%xmm1
  706. punpckhqdq %xmm7,%xmm3
  707. movdqu -128(%esi),%xmm4
  708. movdqu -64(%esi),%xmm5
  709. movdqu (%esi),%xmm2
  710. movdqu 64(%esi),%xmm7
  711. leal 16(%esi),%esi
  712. pxor %xmm0,%xmm4
  713. movdqa -64(%ebx),%xmm0
  714. pxor %xmm1,%xmm5
  715. movdqa -48(%ebx),%xmm1
  716. pxor %xmm2,%xmm6
  717. movdqa -32(%ebx),%xmm2
  718. pxor %xmm3,%xmm7
  719. movdqa -16(%ebx),%xmm3
  720. movdqu %xmm4,-128(%edi)
  721. movdqu %xmm5,-64(%edi)
  722. movdqu %xmm6,(%edi)
  723. movdqu %xmm7,64(%edi)
  724. leal 16(%edi),%edi
  725. paddd -64(%ebp),%xmm0
  726. paddd -48(%ebp),%xmm1
  727. paddd -32(%ebp),%xmm2
  728. paddd -16(%ebp),%xmm3
  729. movdqa %xmm0,%xmm6
  730. punpckldq %xmm1,%xmm0
  731. movdqa %xmm2,%xmm7
  732. punpckldq %xmm3,%xmm2
  733. punpckhdq %xmm1,%xmm6
  734. punpckhdq %xmm3,%xmm7
  735. movdqa %xmm0,%xmm1
  736. punpcklqdq %xmm2,%xmm0
  737. movdqa %xmm6,%xmm3
  738. punpcklqdq %xmm7,%xmm6
  739. punpckhqdq %xmm2,%xmm1
  740. punpckhqdq %xmm7,%xmm3
  741. movdqu -128(%esi),%xmm4
  742. movdqu -64(%esi),%xmm5
  743. movdqu (%esi),%xmm2
  744. movdqu 64(%esi),%xmm7
  745. leal 16(%esi),%esi
  746. pxor %xmm0,%xmm4
  747. movdqa (%ebx),%xmm0
  748. pxor %xmm1,%xmm5
  749. movdqa 16(%ebx),%xmm1
  750. pxor %xmm2,%xmm6
  751. movdqa 32(%ebx),%xmm2
  752. pxor %xmm3,%xmm7
  753. movdqa 48(%ebx),%xmm3
  754. movdqu %xmm4,-128(%edi)
  755. movdqu %xmm5,-64(%edi)
  756. movdqu %xmm6,(%edi)
  757. movdqu %xmm7,64(%edi)
  758. leal 16(%edi),%edi
  759. paddd (%ebp),%xmm0
  760. paddd 16(%ebp),%xmm1
  761. paddd 32(%ebp),%xmm2
  762. paddd 48(%ebp),%xmm3
  763. movdqa %xmm0,%xmm6
  764. punpckldq %xmm1,%xmm0
  765. movdqa %xmm2,%xmm7
  766. punpckldq %xmm3,%xmm2
  767. punpckhdq %xmm1,%xmm6
  768. punpckhdq %xmm3,%xmm7
  769. movdqa %xmm0,%xmm1
  770. punpcklqdq %xmm2,%xmm0
  771. movdqa %xmm6,%xmm3
  772. punpcklqdq %xmm7,%xmm6
  773. punpckhqdq %xmm2,%xmm1
  774. punpckhqdq %xmm7,%xmm3
  775. movdqu -128(%esi),%xmm4
  776. movdqu -64(%esi),%xmm5
  777. movdqu (%esi),%xmm2
  778. movdqu 64(%esi),%xmm7
  779. leal 16(%esi),%esi
  780. pxor %xmm0,%xmm4
  781. movdqa 64(%ebx),%xmm0
  782. pxor %xmm1,%xmm5
  783. movdqa 80(%ebx),%xmm1
  784. pxor %xmm2,%xmm6
  785. movdqa 96(%ebx),%xmm2
  786. pxor %xmm3,%xmm7
  787. movdqa 112(%ebx),%xmm3
  788. movdqu %xmm4,-128(%edi)
  789. movdqu %xmm5,-64(%edi)
  790. movdqu %xmm6,(%edi)
  791. movdqu %xmm7,64(%edi)
  792. leal 16(%edi),%edi
  793. paddd 64(%ebp),%xmm0
  794. paddd 80(%ebp),%xmm1
  795. paddd 96(%ebp),%xmm2
  796. paddd 112(%ebp),%xmm3
  797. movdqa %xmm0,%xmm6
  798. punpckldq %xmm1,%xmm0
  799. movdqa %xmm2,%xmm7
  800. punpckldq %xmm3,%xmm2
  801. punpckhdq %xmm1,%xmm6
  802. punpckhdq %xmm3,%xmm7
  803. movdqa %xmm0,%xmm1
  804. punpcklqdq %xmm2,%xmm0
  805. movdqa %xmm6,%xmm3
  806. punpcklqdq %xmm7,%xmm6
  807. punpckhqdq %xmm2,%xmm1
  808. punpckhqdq %xmm7,%xmm3
  809. movdqu -128(%esi),%xmm4
  810. movdqu -64(%esi),%xmm5
  811. movdqu (%esi),%xmm2
  812. movdqu 64(%esi),%xmm7
  813. leal 208(%esi),%esi
  814. pxor %xmm0,%xmm4
  815. pxor %xmm1,%xmm5
  816. pxor %xmm2,%xmm6
  817. pxor %xmm3,%xmm7
  818. movdqu %xmm4,-128(%edi)
  819. movdqu %xmm5,-64(%edi)
  820. movdqu %xmm6,(%edi)
  821. movdqu %xmm7,64(%edi)
  822. leal 208(%edi),%edi
  823. subl $256,%ecx
  824. jnc L009outer_loop
  825. addl $256,%ecx
  826. jz L011done
  827. movl 520(%esp),%ebx
  828. leal -128(%esi),%esi
  829. movl 516(%esp),%edx
  830. leal -128(%edi),%edi
  831. movd 64(%ebp),%xmm2
  832. movdqu (%ebx),%xmm3
  833. paddd 96(%eax),%xmm2
  834. pand 112(%eax),%xmm3
  835. por %xmm2,%xmm3
  836. L0081x:
  837. movdqa 32(%eax),%xmm0
  838. movdqu (%edx),%xmm1
  839. movdqu 16(%edx),%xmm2
  840. movdqa (%eax),%xmm6
  841. movdqa 16(%eax),%xmm7
  842. movl %ebp,48(%esp)
  843. movdqa %xmm0,(%esp)
  844. movdqa %xmm1,16(%esp)
  845. movdqa %xmm2,32(%esp)
  846. movdqa %xmm3,48(%esp)
  847. movl $10,%edx
  848. jmp L012loop1x
  849. .align 4,0x90
  850. L013outer1x:
  851. movdqa 80(%eax),%xmm3
  852. movdqa (%esp),%xmm0
  853. movdqa 16(%esp),%xmm1
  854. movdqa 32(%esp),%xmm2
  855. paddd 48(%esp),%xmm3
  856. movl $10,%edx
  857. movdqa %xmm3,48(%esp)
  858. jmp L012loop1x
  859. .align 4,0x90
  860. L012loop1x:
  861. paddd %xmm1,%xmm0
  862. pxor %xmm0,%xmm3
  863. .byte 102,15,56,0,222
  864. paddd %xmm3,%xmm2
  865. pxor %xmm2,%xmm1
  866. movdqa %xmm1,%xmm4
  867. psrld $20,%xmm1
  868. pslld $12,%xmm4
  869. por %xmm4,%xmm1
  870. paddd %xmm1,%xmm0
  871. pxor %xmm0,%xmm3
  872. .byte 102,15,56,0,223
  873. paddd %xmm3,%xmm2
  874. pxor %xmm2,%xmm1
  875. movdqa %xmm1,%xmm4
  876. psrld $25,%xmm1
  877. pslld $7,%xmm4
  878. por %xmm4,%xmm1
  879. pshufd $78,%xmm2,%xmm2
  880. pshufd $57,%xmm1,%xmm1
  881. pshufd $147,%xmm3,%xmm3
  882. nop
  883. paddd %xmm1,%xmm0
  884. pxor %xmm0,%xmm3
  885. .byte 102,15,56,0,222
  886. paddd %xmm3,%xmm2
  887. pxor %xmm2,%xmm1
  888. movdqa %xmm1,%xmm4
  889. psrld $20,%xmm1
  890. pslld $12,%xmm4
  891. por %xmm4,%xmm1
  892. paddd %xmm1,%xmm0
  893. pxor %xmm0,%xmm3
  894. .byte 102,15,56,0,223
  895. paddd %xmm3,%xmm2
  896. pxor %xmm2,%xmm1
  897. movdqa %xmm1,%xmm4
  898. psrld $25,%xmm1
  899. pslld $7,%xmm4
  900. por %xmm4,%xmm1
  901. pshufd $78,%xmm2,%xmm2
  902. pshufd $147,%xmm1,%xmm1
  903. pshufd $57,%xmm3,%xmm3
  904. decl %edx
  905. jnz L012loop1x
  906. paddd (%esp),%xmm0
  907. paddd 16(%esp),%xmm1
  908. paddd 32(%esp),%xmm2
  909. paddd 48(%esp),%xmm3
  910. cmpl $64,%ecx
  911. jb L014tail
  912. movdqu (%esi),%xmm4
  913. movdqu 16(%esi),%xmm5
  914. pxor %xmm4,%xmm0
  915. movdqu 32(%esi),%xmm4
  916. pxor %xmm5,%xmm1
  917. movdqu 48(%esi),%xmm5
  918. pxor %xmm4,%xmm2
  919. pxor %xmm5,%xmm3
  920. leal 64(%esi),%esi
  921. movdqu %xmm0,(%edi)
  922. movdqu %xmm1,16(%edi)
  923. movdqu %xmm2,32(%edi)
  924. movdqu %xmm3,48(%edi)
  925. leal 64(%edi),%edi
  926. subl $64,%ecx
  927. jnz L013outer1x
  928. jmp L011done
  929. L014tail:
  930. movdqa %xmm0,(%esp)
  931. movdqa %xmm1,16(%esp)
  932. movdqa %xmm2,32(%esp)
  933. movdqa %xmm3,48(%esp)
  934. xorl %eax,%eax
  935. xorl %edx,%edx
  936. xorl %ebp,%ebp
  937. L015tail_loop:
  938. movb (%esp,%ebp,1),%al
  939. movb (%esi,%ebp,1),%dl
  940. leal 1(%ebp),%ebp
  941. xorb %dl,%al
  942. movb %al,-1(%edi,%ebp,1)
  943. decl %ecx
  944. jnz L015tail_loop
  945. L011done:
  946. movl 512(%esp),%esp
  947. popl %edi
  948. popl %esi
  949. popl %ebx
  950. popl %ebp
  951. ret
  952. .align 6,0x90
  953. Lssse3_data:
  954. .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  955. .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  956. .long 1634760805,857760878,2036477234,1797285236
  957. .long 0,1,2,3
  958. .long 4,4,4,4
  959. .long 1,0,0,0
  960. .long 4,0,0,0
  961. .long 0,-1,-1,-1
  962. .align 6,0x90
  963. .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  964. .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  965. .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  966. .byte 114,103,62,0
  967. .section __IMPORT,__pointers,non_lazy_symbol_pointers
  968. L_GFp_ia32cap_P$non_lazy_ptr:
  969. .indirect_symbol _GFp_ia32cap_P
  970. .long 0
  971. #endif