ghash-x86-macosx.S 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. # This file is generated from a similarly-named Perl script in the BoringSSL
  2. # source tree. Do not edit by hand.
  3. #if defined(__i386__)
  4. #if defined(BORINGSSL_PREFIX)
  5. #include <boringssl_prefix_symbols_asm.h>
  6. #endif
  7. .text
  8. .globl _GFp_gcm_init_clmul
  9. .private_extern _GFp_gcm_init_clmul
  10. .align 4
  11. _GFp_gcm_init_clmul:
  12. L_GFp_gcm_init_clmul_begin:
  13. movl 4(%esp),%edx
  14. movl 8(%esp),%eax
  15. call L000pic
  16. L000pic:
  17. popl %ecx
  18. leal Lbswap-L000pic(%ecx),%ecx
  19. movdqu (%eax),%xmm2
  20. pshufd $78,%xmm2,%xmm2
  21. pshufd $255,%xmm2,%xmm4
  22. movdqa %xmm2,%xmm3
  23. psllq $1,%xmm2
  24. pxor %xmm5,%xmm5
  25. psrlq $63,%xmm3
  26. pcmpgtd %xmm4,%xmm5
  27. pslldq $8,%xmm3
  28. por %xmm3,%xmm2
  29. pand 16(%ecx),%xmm5
  30. pxor %xmm5,%xmm2
  31. movdqa %xmm2,%xmm0
  32. movdqa %xmm0,%xmm1
  33. pshufd $78,%xmm0,%xmm3
  34. pshufd $78,%xmm2,%xmm4
  35. pxor %xmm0,%xmm3
  36. pxor %xmm2,%xmm4
  37. .byte 102,15,58,68,194,0
  38. .byte 102,15,58,68,202,17
  39. .byte 102,15,58,68,220,0
  40. xorps %xmm0,%xmm3
  41. xorps %xmm1,%xmm3
  42. movdqa %xmm3,%xmm4
  43. psrldq $8,%xmm3
  44. pslldq $8,%xmm4
  45. pxor %xmm3,%xmm1
  46. pxor %xmm4,%xmm0
  47. movdqa %xmm0,%xmm4
  48. movdqa %xmm0,%xmm3
  49. psllq $5,%xmm0
  50. pxor %xmm0,%xmm3
  51. psllq $1,%xmm0
  52. pxor %xmm3,%xmm0
  53. psllq $57,%xmm0
  54. movdqa %xmm0,%xmm3
  55. pslldq $8,%xmm0
  56. psrldq $8,%xmm3
  57. pxor %xmm4,%xmm0
  58. pxor %xmm3,%xmm1
  59. movdqa %xmm0,%xmm4
  60. psrlq $1,%xmm0
  61. pxor %xmm4,%xmm1
  62. pxor %xmm0,%xmm4
  63. psrlq $5,%xmm0
  64. pxor %xmm4,%xmm0
  65. psrlq $1,%xmm0
  66. pxor %xmm1,%xmm0
  67. pshufd $78,%xmm2,%xmm3
  68. pshufd $78,%xmm0,%xmm4
  69. pxor %xmm2,%xmm3
  70. movdqu %xmm2,(%edx)
  71. pxor %xmm0,%xmm4
  72. movdqu %xmm0,16(%edx)
  73. .byte 102,15,58,15,227,8
  74. movdqu %xmm4,32(%edx)
  75. ret
  76. .globl _GFp_gcm_gmult_clmul
  77. .private_extern _GFp_gcm_gmult_clmul
  78. .align 4
  79. _GFp_gcm_gmult_clmul:
  80. L_GFp_gcm_gmult_clmul_begin:
  81. movl 4(%esp),%eax
  82. movl 8(%esp),%edx
  83. call L001pic
  84. L001pic:
  85. popl %ecx
  86. leal Lbswap-L001pic(%ecx),%ecx
  87. movdqu (%eax),%xmm0
  88. movdqa (%ecx),%xmm5
  89. movups (%edx),%xmm2
  90. .byte 102,15,56,0,197
  91. movups 32(%edx),%xmm4
  92. movdqa %xmm0,%xmm1
  93. pshufd $78,%xmm0,%xmm3
  94. pxor %xmm0,%xmm3
  95. .byte 102,15,58,68,194,0
  96. .byte 102,15,58,68,202,17
  97. .byte 102,15,58,68,220,0
  98. xorps %xmm0,%xmm3
  99. xorps %xmm1,%xmm3
  100. movdqa %xmm3,%xmm4
  101. psrldq $8,%xmm3
  102. pslldq $8,%xmm4
  103. pxor %xmm3,%xmm1
  104. pxor %xmm4,%xmm0
  105. movdqa %xmm0,%xmm4
  106. movdqa %xmm0,%xmm3
  107. psllq $5,%xmm0
  108. pxor %xmm0,%xmm3
  109. psllq $1,%xmm0
  110. pxor %xmm3,%xmm0
  111. psllq $57,%xmm0
  112. movdqa %xmm0,%xmm3
  113. pslldq $8,%xmm0
  114. psrldq $8,%xmm3
  115. pxor %xmm4,%xmm0
  116. pxor %xmm3,%xmm1
  117. movdqa %xmm0,%xmm4
  118. psrlq $1,%xmm0
  119. pxor %xmm4,%xmm1
  120. pxor %xmm0,%xmm4
  121. psrlq $5,%xmm0
  122. pxor %xmm4,%xmm0
  123. psrlq $1,%xmm0
  124. pxor %xmm1,%xmm0
  125. .byte 102,15,56,0,197
  126. movdqu %xmm0,(%eax)
  127. ret
  128. .globl _GFp_gcm_ghash_clmul
  129. .private_extern _GFp_gcm_ghash_clmul
  130. .align 4
  131. _GFp_gcm_ghash_clmul:
  132. L_GFp_gcm_ghash_clmul_begin:
  133. pushl %ebp
  134. pushl %ebx
  135. pushl %esi
  136. pushl %edi
  137. movl 20(%esp),%eax
  138. movl 24(%esp),%edx
  139. movl 28(%esp),%esi
  140. movl 32(%esp),%ebx
  141. call L002pic
  142. L002pic:
  143. popl %ecx
  144. leal Lbswap-L002pic(%ecx),%ecx
  145. movdqu (%eax),%xmm0
  146. movdqa (%ecx),%xmm5
  147. movdqu (%edx),%xmm2
  148. .byte 102,15,56,0,197
  149. subl $16,%ebx
  150. jz L003odd_tail
  151. movdqu (%esi),%xmm3
  152. movdqu 16(%esi),%xmm6
  153. .byte 102,15,56,0,221
  154. .byte 102,15,56,0,245
  155. movdqu 32(%edx),%xmm5
  156. pxor %xmm3,%xmm0
  157. pshufd $78,%xmm6,%xmm3
  158. movdqa %xmm6,%xmm7
  159. pxor %xmm6,%xmm3
  160. leal 32(%esi),%esi
  161. .byte 102,15,58,68,242,0
  162. .byte 102,15,58,68,250,17
  163. .byte 102,15,58,68,221,0
  164. movups 16(%edx),%xmm2
  165. nop
  166. subl $32,%ebx
  167. jbe L004even_tail
  168. jmp L005mod_loop
  169. .align 5,0x90
  170. L005mod_loop:
  171. pshufd $78,%xmm0,%xmm4
  172. movdqa %xmm0,%xmm1
  173. pxor %xmm0,%xmm4
  174. nop
  175. .byte 102,15,58,68,194,0
  176. .byte 102,15,58,68,202,17
  177. .byte 102,15,58,68,229,16
  178. movups (%edx),%xmm2
  179. xorps %xmm6,%xmm0
  180. movdqa (%ecx),%xmm5
  181. xorps %xmm7,%xmm1
  182. movdqu (%esi),%xmm7
  183. pxor %xmm0,%xmm3
  184. movdqu 16(%esi),%xmm6
  185. pxor %xmm1,%xmm3
  186. .byte 102,15,56,0,253
  187. pxor %xmm3,%xmm4
  188. movdqa %xmm4,%xmm3
  189. psrldq $8,%xmm4
  190. pslldq $8,%xmm3
  191. pxor %xmm4,%xmm1
  192. pxor %xmm3,%xmm0
  193. .byte 102,15,56,0,245
  194. pxor %xmm7,%xmm1
  195. movdqa %xmm6,%xmm7
  196. movdqa %xmm0,%xmm4
  197. movdqa %xmm0,%xmm3
  198. psllq $5,%xmm0
  199. pxor %xmm0,%xmm3
  200. psllq $1,%xmm0
  201. pxor %xmm3,%xmm0
  202. .byte 102,15,58,68,242,0
  203. movups 32(%edx),%xmm5
  204. psllq $57,%xmm0
  205. movdqa %xmm0,%xmm3
  206. pslldq $8,%xmm0
  207. psrldq $8,%xmm3
  208. pxor %xmm4,%xmm0
  209. pxor %xmm3,%xmm1
  210. pshufd $78,%xmm7,%xmm3
  211. movdqa %xmm0,%xmm4
  212. psrlq $1,%xmm0
  213. pxor %xmm7,%xmm3
  214. pxor %xmm4,%xmm1
  215. .byte 102,15,58,68,250,17
  216. movups 16(%edx),%xmm2
  217. pxor %xmm0,%xmm4
  218. psrlq $5,%xmm0
  219. pxor %xmm4,%xmm0
  220. psrlq $1,%xmm0
  221. pxor %xmm1,%xmm0
  222. .byte 102,15,58,68,221,0
  223. leal 32(%esi),%esi
  224. subl $32,%ebx
  225. ja L005mod_loop
  226. L004even_tail:
  227. pshufd $78,%xmm0,%xmm4
  228. movdqa %xmm0,%xmm1
  229. pxor %xmm0,%xmm4
  230. .byte 102,15,58,68,194,0
  231. .byte 102,15,58,68,202,17
  232. .byte 102,15,58,68,229,16
  233. movdqa (%ecx),%xmm5
  234. xorps %xmm6,%xmm0
  235. xorps %xmm7,%xmm1
  236. pxor %xmm0,%xmm3
  237. pxor %xmm1,%xmm3
  238. pxor %xmm3,%xmm4
  239. movdqa %xmm4,%xmm3
  240. psrldq $8,%xmm4
  241. pslldq $8,%xmm3
  242. pxor %xmm4,%xmm1
  243. pxor %xmm3,%xmm0
  244. movdqa %xmm0,%xmm4
  245. movdqa %xmm0,%xmm3
  246. psllq $5,%xmm0
  247. pxor %xmm0,%xmm3
  248. psllq $1,%xmm0
  249. pxor %xmm3,%xmm0
  250. psllq $57,%xmm0
  251. movdqa %xmm0,%xmm3
  252. pslldq $8,%xmm0
  253. psrldq $8,%xmm3
  254. pxor %xmm4,%xmm0
  255. pxor %xmm3,%xmm1
  256. movdqa %xmm0,%xmm4
  257. psrlq $1,%xmm0
  258. pxor %xmm4,%xmm1
  259. pxor %xmm0,%xmm4
  260. psrlq $5,%xmm0
  261. pxor %xmm4,%xmm0
  262. psrlq $1,%xmm0
  263. pxor %xmm1,%xmm0
  264. testl %ebx,%ebx
  265. jnz L006done
  266. movups (%edx),%xmm2
  267. L003odd_tail:
  268. movdqu (%esi),%xmm3
  269. .byte 102,15,56,0,221
  270. pxor %xmm3,%xmm0
  271. movdqa %xmm0,%xmm1
  272. pshufd $78,%xmm0,%xmm3
  273. pshufd $78,%xmm2,%xmm4
  274. pxor %xmm0,%xmm3
  275. pxor %xmm2,%xmm4
  276. .byte 102,15,58,68,194,0
  277. .byte 102,15,58,68,202,17
  278. .byte 102,15,58,68,220,0
  279. xorps %xmm0,%xmm3
  280. xorps %xmm1,%xmm3
  281. movdqa %xmm3,%xmm4
  282. psrldq $8,%xmm3
  283. pslldq $8,%xmm4
  284. pxor %xmm3,%xmm1
  285. pxor %xmm4,%xmm0
  286. movdqa %xmm0,%xmm4
  287. movdqa %xmm0,%xmm3
  288. psllq $5,%xmm0
  289. pxor %xmm0,%xmm3
  290. psllq $1,%xmm0
  291. pxor %xmm3,%xmm0
  292. psllq $57,%xmm0
  293. movdqa %xmm0,%xmm3
  294. pslldq $8,%xmm0
  295. psrldq $8,%xmm3
  296. pxor %xmm4,%xmm0
  297. pxor %xmm3,%xmm1
  298. movdqa %xmm0,%xmm4
  299. psrlq $1,%xmm0
  300. pxor %xmm4,%xmm1
  301. pxor %xmm0,%xmm4
  302. psrlq $5,%xmm0
  303. pxor %xmm4,%xmm0
  304. psrlq $1,%xmm0
  305. pxor %xmm1,%xmm0
  306. L006done:
  307. .byte 102,15,56,0,197
  308. movdqu %xmm0,(%eax)
  309. popl %edi
  310. popl %esi
  311. popl %ebx
  312. popl %ebp
  313. ret
  314. .align 6,0x90
  315. Lbswap:
  316. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  317. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
  318. .align 6,0x90
  319. L007rem_8bit:
  320. .value 0,450,900,582,1800,1738,1164,1358
  321. .value 3600,4050,3476,3158,2328,2266,2716,2910
  322. .value 7200,7650,8100,7782,6952,6890,6316,6510
  323. .value 4656,5106,4532,4214,5432,5370,5820,6014
  324. .value 14400,14722,15300,14854,16200,16010,15564,15630
  325. .value 13904,14226,13780,13334,12632,12442,13020,13086
  326. .value 9312,9634,10212,9766,9064,8874,8428,8494
  327. .value 10864,11186,10740,10294,11640,11450,12028,12094
  328. .value 28800,28994,29444,29382,30600,30282,29708,30158
  329. .value 32400,32594,32020,31958,31128,30810,31260,31710
  330. .value 27808,28002,28452,28390,27560,27242,26668,27118
  331. .value 25264,25458,24884,24822,26040,25722,26172,26622
  332. .value 18624,18690,19268,19078,20424,19978,19532,19854
  333. .value 18128,18194,17748,17558,16856,16410,16988,17310
  334. .value 21728,21794,22372,22182,21480,21034,20588,20910
  335. .value 23280,23346,22900,22710,24056,23610,24188,24510
  336. .value 57600,57538,57988,58182,58888,59338,58764,58446
  337. .value 61200,61138,60564,60758,59416,59866,60316,59998
  338. .value 64800,64738,65188,65382,64040,64490,63916,63598
  339. .value 62256,62194,61620,61814,62520,62970,63420,63102
  340. .value 55616,55426,56004,56070,56904,57226,56780,56334
  341. .value 55120,54930,54484,54550,53336,53658,54236,53790
  342. .value 50528,50338,50916,50982,49768,50090,49644,49198
  343. .value 52080,51890,51444,51510,52344,52666,53244,52798
  344. .value 37248,36930,37380,37830,38536,38730,38156,38094
  345. .value 40848,40530,39956,40406,39064,39258,39708,39646
  346. .value 36256,35938,36388,36838,35496,35690,35116,35054
  347. .value 33712,33394,32820,33270,33976,34170,34620,34558
  348. .value 43456,43010,43588,43910,44744,44810,44364,44174
  349. .value 42960,42514,42068,42390,41176,41242,41820,41630
  350. .value 46560,46114,46692,47014,45800,45866,45420,45230
  351. .value 48112,47666,47220,47542,48376,48442,49020,48830
  352. .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
  353. .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
  354. .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
  355. .byte 0
  356. #endif