aesni-gcm-x86_64-nasm.asm 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. section .text code align=64
  8. ALIGN 32
  9. _aesni_ctr32_ghash_6x:
  10. vmovdqu xmm2,XMMWORD[32+r11]
  11. sub rdx,6
  12. vpxor xmm4,xmm4,xmm4
  13. vmovdqu xmm15,XMMWORD[((0-128))+rcx]
  14. vpaddb xmm10,xmm1,xmm2
  15. vpaddb xmm11,xmm10,xmm2
  16. vpaddb xmm12,xmm11,xmm2
  17. vpaddb xmm13,xmm12,xmm2
  18. vpaddb xmm14,xmm13,xmm2
  19. vpxor xmm9,xmm1,xmm15
  20. vmovdqu XMMWORD[(16+8)+rsp],xmm4
  21. jmp NEAR $L$oop6x
  22. ALIGN 32
  23. $L$oop6x:
  24. add ebx,100663296
  25. jc NEAR $L$handle_ctr32
  26. vmovdqu xmm3,XMMWORD[((0-32))+r9]
  27. vpaddb xmm1,xmm14,xmm2
  28. vpxor xmm10,xmm10,xmm15
  29. vpxor xmm11,xmm11,xmm15
  30. $L$resume_ctr32:
  31. vmovdqu XMMWORD[r8],xmm1
  32. vpclmulqdq xmm5,xmm7,xmm3,0x10
  33. vpxor xmm12,xmm12,xmm15
  34. vmovups xmm2,XMMWORD[((16-128))+rcx]
  35. vpclmulqdq xmm6,xmm7,xmm3,0x01
  36. xor r12,r12
  37. cmp r15,r14
  38. vaesenc xmm9,xmm9,xmm2
  39. vmovdqu xmm0,XMMWORD[((48+8))+rsp]
  40. vpxor xmm13,xmm13,xmm15
  41. vpclmulqdq xmm1,xmm7,xmm3,0x00
  42. vaesenc xmm10,xmm10,xmm2
  43. vpxor xmm14,xmm14,xmm15
  44. setnc r12b
  45. vpclmulqdq xmm7,xmm7,xmm3,0x11
  46. vaesenc xmm11,xmm11,xmm2
  47. vmovdqu xmm3,XMMWORD[((16-32))+r9]
  48. neg r12
  49. vaesenc xmm12,xmm12,xmm2
  50. vpxor xmm6,xmm6,xmm5
  51. vpclmulqdq xmm5,xmm0,xmm3,0x00
  52. vpxor xmm8,xmm8,xmm4
  53. vaesenc xmm13,xmm13,xmm2
  54. vpxor xmm4,xmm1,xmm5
  55. and r12,0x60
  56. vmovups xmm15,XMMWORD[((32-128))+rcx]
  57. vpclmulqdq xmm1,xmm0,xmm3,0x10
  58. vaesenc xmm14,xmm14,xmm2
  59. vpclmulqdq xmm2,xmm0,xmm3,0x01
  60. lea r14,[r12*1+r14]
  61. vaesenc xmm9,xmm9,xmm15
  62. vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
  63. vpclmulqdq xmm3,xmm0,xmm3,0x11
  64. vmovdqu xmm0,XMMWORD[((64+8))+rsp]
  65. vaesenc xmm10,xmm10,xmm15
  66. movbe r13,QWORD[88+r14]
  67. vaesenc xmm11,xmm11,xmm15
  68. movbe r12,QWORD[80+r14]
  69. vaesenc xmm12,xmm12,xmm15
  70. mov QWORD[((32+8))+rsp],r13
  71. vaesenc xmm13,xmm13,xmm15
  72. mov QWORD[((40+8))+rsp],r12
  73. vmovdqu xmm5,XMMWORD[((48-32))+r9]
  74. vaesenc xmm14,xmm14,xmm15
  75. vmovups xmm15,XMMWORD[((48-128))+rcx]
  76. vpxor xmm6,xmm6,xmm1
  77. vpclmulqdq xmm1,xmm0,xmm5,0x00
  78. vaesenc xmm9,xmm9,xmm15
  79. vpxor xmm6,xmm6,xmm2
  80. vpclmulqdq xmm2,xmm0,xmm5,0x10
  81. vaesenc xmm10,xmm10,xmm15
  82. vpxor xmm7,xmm7,xmm3
  83. vpclmulqdq xmm3,xmm0,xmm5,0x01
  84. vaesenc xmm11,xmm11,xmm15
  85. vpclmulqdq xmm5,xmm0,xmm5,0x11
  86. vmovdqu xmm0,XMMWORD[((80+8))+rsp]
  87. vaesenc xmm12,xmm12,xmm15
  88. vaesenc xmm13,xmm13,xmm15
  89. vpxor xmm4,xmm4,xmm1
  90. vmovdqu xmm1,XMMWORD[((64-32))+r9]
  91. vaesenc xmm14,xmm14,xmm15
  92. vmovups xmm15,XMMWORD[((64-128))+rcx]
  93. vpxor xmm6,xmm6,xmm2
  94. vpclmulqdq xmm2,xmm0,xmm1,0x00
  95. vaesenc xmm9,xmm9,xmm15
  96. vpxor xmm6,xmm6,xmm3
  97. vpclmulqdq xmm3,xmm0,xmm1,0x10
  98. vaesenc xmm10,xmm10,xmm15
  99. movbe r13,QWORD[72+r14]
  100. vpxor xmm7,xmm7,xmm5
  101. vpclmulqdq xmm5,xmm0,xmm1,0x01
  102. vaesenc xmm11,xmm11,xmm15
  103. movbe r12,QWORD[64+r14]
  104. vpclmulqdq xmm1,xmm0,xmm1,0x11
  105. vmovdqu xmm0,XMMWORD[((96+8))+rsp]
  106. vaesenc xmm12,xmm12,xmm15
  107. mov QWORD[((48+8))+rsp],r13
  108. vaesenc xmm13,xmm13,xmm15
  109. mov QWORD[((56+8))+rsp],r12
  110. vpxor xmm4,xmm4,xmm2
  111. vmovdqu xmm2,XMMWORD[((96-32))+r9]
  112. vaesenc xmm14,xmm14,xmm15
  113. vmovups xmm15,XMMWORD[((80-128))+rcx]
  114. vpxor xmm6,xmm6,xmm3
  115. vpclmulqdq xmm3,xmm0,xmm2,0x00
  116. vaesenc xmm9,xmm9,xmm15
  117. vpxor xmm6,xmm6,xmm5
  118. vpclmulqdq xmm5,xmm0,xmm2,0x10
  119. vaesenc xmm10,xmm10,xmm15
  120. movbe r13,QWORD[56+r14]
  121. vpxor xmm7,xmm7,xmm1
  122. vpclmulqdq xmm1,xmm0,xmm2,0x01
  123. vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp]
  124. vaesenc xmm11,xmm11,xmm15
  125. movbe r12,QWORD[48+r14]
  126. vpclmulqdq xmm2,xmm0,xmm2,0x11
  127. vaesenc xmm12,xmm12,xmm15
  128. mov QWORD[((64+8))+rsp],r13
  129. vaesenc xmm13,xmm13,xmm15
  130. mov QWORD[((72+8))+rsp],r12
  131. vpxor xmm4,xmm4,xmm3
  132. vmovdqu xmm3,XMMWORD[((112-32))+r9]
  133. vaesenc xmm14,xmm14,xmm15
  134. vmovups xmm15,XMMWORD[((96-128))+rcx]
  135. vpxor xmm6,xmm6,xmm5
  136. vpclmulqdq xmm5,xmm8,xmm3,0x10
  137. vaesenc xmm9,xmm9,xmm15
  138. vpxor xmm6,xmm6,xmm1
  139. vpclmulqdq xmm1,xmm8,xmm3,0x01
  140. vaesenc xmm10,xmm10,xmm15
  141. movbe r13,QWORD[40+r14]
  142. vpxor xmm7,xmm7,xmm2
  143. vpclmulqdq xmm2,xmm8,xmm3,0x00
  144. vaesenc xmm11,xmm11,xmm15
  145. movbe r12,QWORD[32+r14]
  146. vpclmulqdq xmm8,xmm8,xmm3,0x11
  147. vaesenc xmm12,xmm12,xmm15
  148. mov QWORD[((80+8))+rsp],r13
  149. vaesenc xmm13,xmm13,xmm15
  150. mov QWORD[((88+8))+rsp],r12
  151. vpxor xmm6,xmm6,xmm5
  152. vaesenc xmm14,xmm14,xmm15
  153. vpxor xmm6,xmm6,xmm1
  154. vmovups xmm15,XMMWORD[((112-128))+rcx]
  155. vpslldq xmm5,xmm6,8
  156. vpxor xmm4,xmm4,xmm2
  157. vmovdqu xmm3,XMMWORD[16+r11]
  158. vaesenc xmm9,xmm9,xmm15
  159. vpxor xmm7,xmm7,xmm8
  160. vaesenc xmm10,xmm10,xmm15
  161. vpxor xmm4,xmm4,xmm5
  162. movbe r13,QWORD[24+r14]
  163. vaesenc xmm11,xmm11,xmm15
  164. movbe r12,QWORD[16+r14]
  165. vpalignr xmm0,xmm4,xmm4,8
  166. vpclmulqdq xmm4,xmm4,xmm3,0x10
  167. mov QWORD[((96+8))+rsp],r13
  168. vaesenc xmm12,xmm12,xmm15
  169. mov QWORD[((104+8))+rsp],r12
  170. vaesenc xmm13,xmm13,xmm15
  171. vmovups xmm1,XMMWORD[((128-128))+rcx]
  172. vaesenc xmm14,xmm14,xmm15
  173. vaesenc xmm9,xmm9,xmm1
  174. vmovups xmm15,XMMWORD[((144-128))+rcx]
  175. vaesenc xmm10,xmm10,xmm1
  176. vpsrldq xmm6,xmm6,8
  177. vaesenc xmm11,xmm11,xmm1
  178. vpxor xmm7,xmm7,xmm6
  179. vaesenc xmm12,xmm12,xmm1
  180. vpxor xmm4,xmm4,xmm0
  181. movbe r13,QWORD[8+r14]
  182. vaesenc xmm13,xmm13,xmm1
  183. movbe r12,QWORD[r14]
  184. vaesenc xmm14,xmm14,xmm1
  185. vmovups xmm1,XMMWORD[((160-128))+rcx]
  186. cmp ebp,11
  187. jb NEAR $L$enc_tail
  188. vaesenc xmm9,xmm9,xmm15
  189. vaesenc xmm10,xmm10,xmm15
  190. vaesenc xmm11,xmm11,xmm15
  191. vaesenc xmm12,xmm12,xmm15
  192. vaesenc xmm13,xmm13,xmm15
  193. vaesenc xmm14,xmm14,xmm15
  194. vaesenc xmm9,xmm9,xmm1
  195. vaesenc xmm10,xmm10,xmm1
  196. vaesenc xmm11,xmm11,xmm1
  197. vaesenc xmm12,xmm12,xmm1
  198. vaesenc xmm13,xmm13,xmm1
  199. vmovups xmm15,XMMWORD[((176-128))+rcx]
  200. vaesenc xmm14,xmm14,xmm1
  201. vmovups xmm1,XMMWORD[((192-128))+rcx]
  202. vaesenc xmm9,xmm9,xmm15
  203. vaesenc xmm10,xmm10,xmm15
  204. vaesenc xmm11,xmm11,xmm15
  205. vaesenc xmm12,xmm12,xmm15
  206. vaesenc xmm13,xmm13,xmm15
  207. vaesenc xmm14,xmm14,xmm15
  208. vaesenc xmm9,xmm9,xmm1
  209. vaesenc xmm10,xmm10,xmm1
  210. vaesenc xmm11,xmm11,xmm1
  211. vaesenc xmm12,xmm12,xmm1
  212. vaesenc xmm13,xmm13,xmm1
  213. vmovups xmm15,XMMWORD[((208-128))+rcx]
  214. vaesenc xmm14,xmm14,xmm1
  215. vmovups xmm1,XMMWORD[((224-128))+rcx]
  216. jmp NEAR $L$enc_tail
  217. ALIGN 32
  218. $L$handle_ctr32:
  219. vmovdqu xmm0,XMMWORD[r11]
  220. vpshufb xmm6,xmm1,xmm0
  221. vmovdqu xmm5,XMMWORD[48+r11]
  222. vpaddd xmm10,xmm6,XMMWORD[64+r11]
  223. vpaddd xmm11,xmm6,xmm5
  224. vmovdqu xmm3,XMMWORD[((0-32))+r9]
  225. vpaddd xmm12,xmm10,xmm5
  226. vpshufb xmm10,xmm10,xmm0
  227. vpaddd xmm13,xmm11,xmm5
  228. vpshufb xmm11,xmm11,xmm0
  229. vpxor xmm10,xmm10,xmm15
  230. vpaddd xmm14,xmm12,xmm5
  231. vpshufb xmm12,xmm12,xmm0
  232. vpxor xmm11,xmm11,xmm15
  233. vpaddd xmm1,xmm13,xmm5
  234. vpshufb xmm13,xmm13,xmm0
  235. vpshufb xmm14,xmm14,xmm0
  236. vpshufb xmm1,xmm1,xmm0
  237. jmp NEAR $L$resume_ctr32
  238. ALIGN 32
  239. $L$enc_tail:
  240. vaesenc xmm9,xmm9,xmm15
  241. vmovdqu XMMWORD[(16+8)+rsp],xmm7
  242. vpalignr xmm8,xmm4,xmm4,8
  243. vaesenc xmm10,xmm10,xmm15
  244. vpclmulqdq xmm4,xmm4,xmm3,0x10
  245. vpxor xmm2,xmm1,XMMWORD[rdi]
  246. vaesenc xmm11,xmm11,xmm15
  247. vpxor xmm0,xmm1,XMMWORD[16+rdi]
  248. vaesenc xmm12,xmm12,xmm15
  249. vpxor xmm5,xmm1,XMMWORD[32+rdi]
  250. vaesenc xmm13,xmm13,xmm15
  251. vpxor xmm6,xmm1,XMMWORD[48+rdi]
  252. vaesenc xmm14,xmm14,xmm15
  253. vpxor xmm7,xmm1,XMMWORD[64+rdi]
  254. vpxor xmm3,xmm1,XMMWORD[80+rdi]
  255. vmovdqu xmm1,XMMWORD[r8]
  256. vaesenclast xmm9,xmm9,xmm2
  257. vmovdqu xmm2,XMMWORD[32+r11]
  258. vaesenclast xmm10,xmm10,xmm0
  259. vpaddb xmm0,xmm1,xmm2
  260. mov QWORD[((112+8))+rsp],r13
  261. lea rdi,[96+rdi]
  262. vaesenclast xmm11,xmm11,xmm5
  263. vpaddb xmm5,xmm0,xmm2
  264. mov QWORD[((120+8))+rsp],r12
  265. lea rsi,[96+rsi]
  266. vmovdqu xmm15,XMMWORD[((0-128))+rcx]
  267. vaesenclast xmm12,xmm12,xmm6
  268. vpaddb xmm6,xmm5,xmm2
  269. vaesenclast xmm13,xmm13,xmm7
  270. vpaddb xmm7,xmm6,xmm2
  271. vaesenclast xmm14,xmm14,xmm3
  272. vpaddb xmm3,xmm7,xmm2
  273. add r10,0x60
  274. sub rdx,0x6
  275. jc NEAR $L$6x_done
  276. vmovups XMMWORD[(-96)+rsi],xmm9
  277. vpxor xmm9,xmm1,xmm15
  278. vmovups XMMWORD[(-80)+rsi],xmm10
  279. vmovdqa xmm10,xmm0
  280. vmovups XMMWORD[(-64)+rsi],xmm11
  281. vmovdqa xmm11,xmm5
  282. vmovups XMMWORD[(-48)+rsi],xmm12
  283. vmovdqa xmm12,xmm6
  284. vmovups XMMWORD[(-32)+rsi],xmm13
  285. vmovdqa xmm13,xmm7
  286. vmovups XMMWORD[(-16)+rsi],xmm14
  287. vmovdqa xmm14,xmm3
  288. vmovdqu xmm7,XMMWORD[((32+8))+rsp]
  289. jmp NEAR $L$oop6x
  290. $L$6x_done:
  291. vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
  292. vpxor xmm8,xmm8,xmm4
  293. DB 0F3h,0C3h ;repret
  294. global GFp_aesni_gcm_decrypt
  295. ALIGN 32
  296. GFp_aesni_gcm_decrypt:
  297. mov QWORD[8+rsp],rdi ;WIN64 prologue
  298. mov QWORD[16+rsp],rsi
  299. mov rax,rsp
  300. $L$SEH_begin_GFp_aesni_gcm_decrypt:
  301. mov rdi,rcx
  302. mov rsi,rdx
  303. mov rdx,r8
  304. mov rcx,r9
  305. mov r8,QWORD[40+rsp]
  306. mov r9,QWORD[48+rsp]
  307. xor r10,r10
  308. cmp rdx,0x60
  309. jb NEAR $L$gcm_dec_abort
  310. lea rax,[rsp]
  311. push rbx
  312. push rbp
  313. push r12
  314. push r13
  315. push r14
  316. push r15
  317. lea rsp,[((-168))+rsp]
  318. movaps XMMWORD[(-216)+rax],xmm6
  319. movaps XMMWORD[(-200)+rax],xmm7
  320. movaps XMMWORD[(-184)+rax],xmm8
  321. movaps XMMWORD[(-168)+rax],xmm9
  322. movaps XMMWORD[(-152)+rax],xmm10
  323. movaps XMMWORD[(-136)+rax],xmm11
  324. movaps XMMWORD[(-120)+rax],xmm12
  325. movaps XMMWORD[(-104)+rax],xmm13
  326. movaps XMMWORD[(-88)+rax],xmm14
  327. movaps XMMWORD[(-72)+rax],xmm15
  328. $L$gcm_dec_body:
  329. vzeroupper
  330. vmovdqu xmm1,XMMWORD[r8]
  331. add rsp,-128
  332. mov ebx,DWORD[12+r8]
  333. lea r11,[$L$bswap_mask]
  334. lea r14,[((-128))+rcx]
  335. mov r15,0xf80
  336. vmovdqu xmm8,XMMWORD[r9]
  337. and rsp,-128
  338. vmovdqu xmm0,XMMWORD[r11]
  339. lea rcx,[128+rcx]
  340. lea r9,[((32+32))+r9]
  341. mov ebp,DWORD[((240-128))+rcx]
  342. vpshufb xmm8,xmm8,xmm0
  343. and r14,r15
  344. and r15,rsp
  345. sub r15,r14
  346. jc NEAR $L$dec_no_key_aliasing
  347. cmp r15,768
  348. jnc NEAR $L$dec_no_key_aliasing
  349. sub rsp,r15
  350. $L$dec_no_key_aliasing:
  351. vmovdqu xmm7,XMMWORD[80+rdi]
  352. lea r14,[rdi]
  353. vmovdqu xmm4,XMMWORD[64+rdi]
  354. lea r15,[((-192))+rdx*1+rdi]
  355. vmovdqu xmm5,XMMWORD[48+rdi]
  356. shr rdx,4
  357. xor r10,r10
  358. vmovdqu xmm6,XMMWORD[32+rdi]
  359. vpshufb xmm7,xmm7,xmm0
  360. vmovdqu xmm2,XMMWORD[16+rdi]
  361. vpshufb xmm4,xmm4,xmm0
  362. vmovdqu xmm3,XMMWORD[rdi]
  363. vpshufb xmm5,xmm5,xmm0
  364. vmovdqu XMMWORD[48+rsp],xmm4
  365. vpshufb xmm6,xmm6,xmm0
  366. vmovdqu XMMWORD[64+rsp],xmm5
  367. vpshufb xmm2,xmm2,xmm0
  368. vmovdqu XMMWORD[80+rsp],xmm6
  369. vpshufb xmm3,xmm3,xmm0
  370. vmovdqu XMMWORD[96+rsp],xmm2
  371. vmovdqu XMMWORD[112+rsp],xmm3
  372. call _aesni_ctr32_ghash_6x
  373. vmovups XMMWORD[(-96)+rsi],xmm9
  374. vmovups XMMWORD[(-80)+rsi],xmm10
  375. vmovups XMMWORD[(-64)+rsi],xmm11
  376. vmovups XMMWORD[(-48)+rsi],xmm12
  377. vmovups XMMWORD[(-32)+rsi],xmm13
  378. vmovups XMMWORD[(-16)+rsi],xmm14
  379. vpshufb xmm8,xmm8,XMMWORD[r11]
  380. vmovdqu XMMWORD[(-64)+r9],xmm8
  381. vzeroupper
  382. movaps xmm6,XMMWORD[((-216))+rax]
  383. movaps xmm7,XMMWORD[((-200))+rax]
  384. movaps xmm8,XMMWORD[((-184))+rax]
  385. movaps xmm9,XMMWORD[((-168))+rax]
  386. movaps xmm10,XMMWORD[((-152))+rax]
  387. movaps xmm11,XMMWORD[((-136))+rax]
  388. movaps xmm12,XMMWORD[((-120))+rax]
  389. movaps xmm13,XMMWORD[((-104))+rax]
  390. movaps xmm14,XMMWORD[((-88))+rax]
  391. movaps xmm15,XMMWORD[((-72))+rax]
  392. mov r15,QWORD[((-48))+rax]
  393. mov r14,QWORD[((-40))+rax]
  394. mov r13,QWORD[((-32))+rax]
  395. mov r12,QWORD[((-24))+rax]
  396. mov rbp,QWORD[((-16))+rax]
  397. mov rbx,QWORD[((-8))+rax]
  398. lea rsp,[rax]
  399. $L$gcm_dec_abort:
  400. mov rax,r10
  401. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  402. mov rsi,QWORD[16+rsp]
  403. DB 0F3h,0C3h ;repret
  404. $L$SEH_end_GFp_aesni_gcm_decrypt:
  405. ALIGN 32
  406. _aesni_ctr32_6x:
  407. vmovdqu xmm4,XMMWORD[((0-128))+rcx]
  408. vmovdqu xmm2,XMMWORD[32+r11]
  409. lea r13,[((-1))+rbp]
  410. vmovups xmm15,XMMWORD[((16-128))+rcx]
  411. lea r12,[((32-128))+rcx]
  412. vpxor xmm9,xmm1,xmm4
  413. add ebx,100663296
  414. jc NEAR $L$handle_ctr32_2
  415. vpaddb xmm10,xmm1,xmm2
  416. vpaddb xmm11,xmm10,xmm2
  417. vpxor xmm10,xmm10,xmm4
  418. vpaddb xmm12,xmm11,xmm2
  419. vpxor xmm11,xmm11,xmm4
  420. vpaddb xmm13,xmm12,xmm2
  421. vpxor xmm12,xmm12,xmm4
  422. vpaddb xmm14,xmm13,xmm2
  423. vpxor xmm13,xmm13,xmm4
  424. vpaddb xmm1,xmm14,xmm2
  425. vpxor xmm14,xmm14,xmm4
  426. jmp NEAR $L$oop_ctr32
  427. ALIGN 16
  428. $L$oop_ctr32:
  429. vaesenc xmm9,xmm9,xmm15
  430. vaesenc xmm10,xmm10,xmm15
  431. vaesenc xmm11,xmm11,xmm15
  432. vaesenc xmm12,xmm12,xmm15
  433. vaesenc xmm13,xmm13,xmm15
  434. vaesenc xmm14,xmm14,xmm15
  435. vmovups xmm15,XMMWORD[r12]
  436. lea r12,[16+r12]
  437. dec r13d
  438. jnz NEAR $L$oop_ctr32
  439. vmovdqu xmm3,XMMWORD[r12]
  440. vaesenc xmm9,xmm9,xmm15
  441. vpxor xmm4,xmm3,XMMWORD[rdi]
  442. vaesenc xmm10,xmm10,xmm15
  443. vpxor xmm5,xmm3,XMMWORD[16+rdi]
  444. vaesenc xmm11,xmm11,xmm15
  445. vpxor xmm6,xmm3,XMMWORD[32+rdi]
  446. vaesenc xmm12,xmm12,xmm15
  447. vpxor xmm8,xmm3,XMMWORD[48+rdi]
  448. vaesenc xmm13,xmm13,xmm15
  449. vpxor xmm2,xmm3,XMMWORD[64+rdi]
  450. vaesenc xmm14,xmm14,xmm15
  451. vpxor xmm3,xmm3,XMMWORD[80+rdi]
  452. lea rdi,[96+rdi]
  453. vaesenclast xmm9,xmm9,xmm4
  454. vaesenclast xmm10,xmm10,xmm5
  455. vaesenclast xmm11,xmm11,xmm6
  456. vaesenclast xmm12,xmm12,xmm8
  457. vaesenclast xmm13,xmm13,xmm2
  458. vaesenclast xmm14,xmm14,xmm3
  459. vmovups XMMWORD[rsi],xmm9
  460. vmovups XMMWORD[16+rsi],xmm10
  461. vmovups XMMWORD[32+rsi],xmm11
  462. vmovups XMMWORD[48+rsi],xmm12
  463. vmovups XMMWORD[64+rsi],xmm13
  464. vmovups XMMWORD[80+rsi],xmm14
  465. lea rsi,[96+rsi]
  466. DB 0F3h,0C3h ;repret
  467. ALIGN 32
  468. $L$handle_ctr32_2:
  469. vpshufb xmm6,xmm1,xmm0
  470. vmovdqu xmm5,XMMWORD[48+r11]
  471. vpaddd xmm10,xmm6,XMMWORD[64+r11]
  472. vpaddd xmm11,xmm6,xmm5
  473. vpaddd xmm12,xmm10,xmm5
  474. vpshufb xmm10,xmm10,xmm0
  475. vpaddd xmm13,xmm11,xmm5
  476. vpshufb xmm11,xmm11,xmm0
  477. vpxor xmm10,xmm10,xmm4
  478. vpaddd xmm14,xmm12,xmm5
  479. vpshufb xmm12,xmm12,xmm0
  480. vpxor xmm11,xmm11,xmm4
  481. vpaddd xmm1,xmm13,xmm5
  482. vpshufb xmm13,xmm13,xmm0
  483. vpxor xmm12,xmm12,xmm4
  484. vpshufb xmm14,xmm14,xmm0
  485. vpxor xmm13,xmm13,xmm4
  486. vpshufb xmm1,xmm1,xmm0
  487. vpxor xmm14,xmm14,xmm4
  488. jmp NEAR $L$oop_ctr32
  489. global GFp_aesni_gcm_encrypt
  490. ALIGN 32
  491. GFp_aesni_gcm_encrypt:
  492. mov QWORD[8+rsp],rdi ;WIN64 prologue
  493. mov QWORD[16+rsp],rsi
  494. mov rax,rsp
  495. $L$SEH_begin_GFp_aesni_gcm_encrypt:
  496. mov rdi,rcx
  497. mov rsi,rdx
  498. mov rdx,r8
  499. mov rcx,r9
  500. mov r8,QWORD[40+rsp]
  501. mov r9,QWORD[48+rsp]
  502. xor r10,r10
  503. cmp rdx,0x60*3
  504. jb NEAR $L$gcm_enc_abort
  505. lea rax,[rsp]
  506. push rbx
  507. push rbp
  508. push r12
  509. push r13
  510. push r14
  511. push r15
  512. lea rsp,[((-168))+rsp]
  513. movaps XMMWORD[(-216)+rax],xmm6
  514. movaps XMMWORD[(-200)+rax],xmm7
  515. movaps XMMWORD[(-184)+rax],xmm8
  516. movaps XMMWORD[(-168)+rax],xmm9
  517. movaps XMMWORD[(-152)+rax],xmm10
  518. movaps XMMWORD[(-136)+rax],xmm11
  519. movaps XMMWORD[(-120)+rax],xmm12
  520. movaps XMMWORD[(-104)+rax],xmm13
  521. movaps XMMWORD[(-88)+rax],xmm14
  522. movaps XMMWORD[(-72)+rax],xmm15
  523. $L$gcm_enc_body:
  524. vzeroupper
  525. vmovdqu xmm1,XMMWORD[r8]
  526. add rsp,-128
  527. mov ebx,DWORD[12+r8]
  528. lea r11,[$L$bswap_mask]
  529. lea r14,[((-128))+rcx]
  530. mov r15,0xf80
  531. lea rcx,[128+rcx]
  532. vmovdqu xmm0,XMMWORD[r11]
  533. and rsp,-128
  534. mov ebp,DWORD[((240-128))+rcx]
  535. and r14,r15
  536. and r15,rsp
  537. sub r15,r14
  538. jc NEAR $L$enc_no_key_aliasing
  539. cmp r15,768
  540. jnc NEAR $L$enc_no_key_aliasing
  541. sub rsp,r15
  542. $L$enc_no_key_aliasing:
  543. lea r14,[rsi]
  544. lea r15,[((-192))+rdx*1+rsi]
  545. shr rdx,4
  546. call _aesni_ctr32_6x
  547. vpshufb xmm8,xmm9,xmm0
  548. vpshufb xmm2,xmm10,xmm0
  549. vmovdqu XMMWORD[112+rsp],xmm8
  550. vpshufb xmm4,xmm11,xmm0
  551. vmovdqu XMMWORD[96+rsp],xmm2
  552. vpshufb xmm5,xmm12,xmm0
  553. vmovdqu XMMWORD[80+rsp],xmm4
  554. vpshufb xmm6,xmm13,xmm0
  555. vmovdqu XMMWORD[64+rsp],xmm5
  556. vpshufb xmm7,xmm14,xmm0
  557. vmovdqu XMMWORD[48+rsp],xmm6
  558. call _aesni_ctr32_6x
  559. vmovdqu xmm8,XMMWORD[r9]
  560. lea r9,[((32+32))+r9]
  561. sub rdx,12
  562. mov r10,0x60*2
  563. vpshufb xmm8,xmm8,xmm0
  564. call _aesni_ctr32_ghash_6x
  565. vmovdqu xmm7,XMMWORD[32+rsp]
  566. vmovdqu xmm0,XMMWORD[r11]
  567. vmovdqu xmm3,XMMWORD[((0-32))+r9]
  568. vpunpckhqdq xmm1,xmm7,xmm7
  569. vmovdqu xmm15,XMMWORD[((32-32))+r9]
  570. vmovups XMMWORD[(-96)+rsi],xmm9
  571. vpshufb xmm9,xmm9,xmm0
  572. vpxor xmm1,xmm1,xmm7
  573. vmovups XMMWORD[(-80)+rsi],xmm10
  574. vpshufb xmm10,xmm10,xmm0
  575. vmovups XMMWORD[(-64)+rsi],xmm11
  576. vpshufb xmm11,xmm11,xmm0
  577. vmovups XMMWORD[(-48)+rsi],xmm12
  578. vpshufb xmm12,xmm12,xmm0
  579. vmovups XMMWORD[(-32)+rsi],xmm13
  580. vpshufb xmm13,xmm13,xmm0
  581. vmovups XMMWORD[(-16)+rsi],xmm14
  582. vpshufb xmm14,xmm14,xmm0
  583. vmovdqu XMMWORD[16+rsp],xmm9
  584. vmovdqu xmm6,XMMWORD[48+rsp]
  585. vmovdqu xmm0,XMMWORD[((16-32))+r9]
  586. vpunpckhqdq xmm2,xmm6,xmm6
  587. vpclmulqdq xmm5,xmm7,xmm3,0x00
  588. vpxor xmm2,xmm2,xmm6
  589. vpclmulqdq xmm7,xmm7,xmm3,0x11
  590. vpclmulqdq xmm1,xmm1,xmm15,0x00
  591. vmovdqu xmm9,XMMWORD[64+rsp]
  592. vpclmulqdq xmm4,xmm6,xmm0,0x00
  593. vmovdqu xmm3,XMMWORD[((48-32))+r9]
  594. vpxor xmm4,xmm4,xmm5
  595. vpunpckhqdq xmm5,xmm9,xmm9
  596. vpclmulqdq xmm6,xmm6,xmm0,0x11
  597. vpxor xmm5,xmm5,xmm9
  598. vpxor xmm6,xmm6,xmm7
  599. vpclmulqdq xmm2,xmm2,xmm15,0x10
  600. vmovdqu xmm15,XMMWORD[((80-32))+r9]
  601. vpxor xmm2,xmm2,xmm1
  602. vmovdqu xmm1,XMMWORD[80+rsp]
  603. vpclmulqdq xmm7,xmm9,xmm3,0x00
  604. vmovdqu xmm0,XMMWORD[((64-32))+r9]
  605. vpxor xmm7,xmm7,xmm4
  606. vpunpckhqdq xmm4,xmm1,xmm1
  607. vpclmulqdq xmm9,xmm9,xmm3,0x11
  608. vpxor xmm4,xmm4,xmm1
  609. vpxor xmm9,xmm9,xmm6
  610. vpclmulqdq xmm5,xmm5,xmm15,0x00
  611. vpxor xmm5,xmm5,xmm2
  612. vmovdqu xmm2,XMMWORD[96+rsp]
  613. vpclmulqdq xmm6,xmm1,xmm0,0x00
  614. vmovdqu xmm3,XMMWORD[((96-32))+r9]
  615. vpxor xmm6,xmm6,xmm7
  616. vpunpckhqdq xmm7,xmm2,xmm2
  617. vpclmulqdq xmm1,xmm1,xmm0,0x11
  618. vpxor xmm7,xmm7,xmm2
  619. vpxor xmm1,xmm1,xmm9
  620. vpclmulqdq xmm4,xmm4,xmm15,0x10
  621. vmovdqu xmm15,XMMWORD[((128-32))+r9]
  622. vpxor xmm4,xmm4,xmm5
  623. vpxor xmm8,xmm8,XMMWORD[112+rsp]
  624. vpclmulqdq xmm5,xmm2,xmm3,0x00
  625. vmovdqu xmm0,XMMWORD[((112-32))+r9]
  626. vpunpckhqdq xmm9,xmm8,xmm8
  627. vpxor xmm5,xmm5,xmm6
  628. vpclmulqdq xmm2,xmm2,xmm3,0x11
  629. vpxor xmm9,xmm9,xmm8
  630. vpxor xmm2,xmm2,xmm1
  631. vpclmulqdq xmm7,xmm7,xmm15,0x00
  632. vpxor xmm4,xmm7,xmm4
  633. vpclmulqdq xmm6,xmm8,xmm0,0x00
  634. vmovdqu xmm3,XMMWORD[((0-32))+r9]
  635. vpunpckhqdq xmm1,xmm14,xmm14
  636. vpclmulqdq xmm8,xmm8,xmm0,0x11
  637. vpxor xmm1,xmm1,xmm14
  638. vpxor xmm5,xmm6,xmm5
  639. vpclmulqdq xmm9,xmm9,xmm15,0x10
  640. vmovdqu xmm15,XMMWORD[((32-32))+r9]
  641. vpxor xmm7,xmm8,xmm2
  642. vpxor xmm6,xmm9,xmm4
  643. vmovdqu xmm0,XMMWORD[((16-32))+r9]
  644. vpxor xmm9,xmm7,xmm5
  645. vpclmulqdq xmm4,xmm14,xmm3,0x00
  646. vpxor xmm6,xmm6,xmm9
  647. vpunpckhqdq xmm2,xmm13,xmm13
  648. vpclmulqdq xmm14,xmm14,xmm3,0x11
  649. vpxor xmm2,xmm2,xmm13
  650. vpslldq xmm9,xmm6,8
  651. vpclmulqdq xmm1,xmm1,xmm15,0x00
  652. vpxor xmm8,xmm5,xmm9
  653. vpsrldq xmm6,xmm6,8
  654. vpxor xmm7,xmm7,xmm6
  655. vpclmulqdq xmm5,xmm13,xmm0,0x00
  656. vmovdqu xmm3,XMMWORD[((48-32))+r9]
  657. vpxor xmm5,xmm5,xmm4
  658. vpunpckhqdq xmm9,xmm12,xmm12
  659. vpclmulqdq xmm13,xmm13,xmm0,0x11
  660. vpxor xmm9,xmm9,xmm12
  661. vpxor xmm13,xmm13,xmm14
  662. vpalignr xmm14,xmm8,xmm8,8
  663. vpclmulqdq xmm2,xmm2,xmm15,0x10
  664. vmovdqu xmm15,XMMWORD[((80-32))+r9]
  665. vpxor xmm2,xmm2,xmm1
  666. vpclmulqdq xmm4,xmm12,xmm3,0x00
  667. vmovdqu xmm0,XMMWORD[((64-32))+r9]
  668. vpxor xmm4,xmm4,xmm5
  669. vpunpckhqdq xmm1,xmm11,xmm11
  670. vpclmulqdq xmm12,xmm12,xmm3,0x11
  671. vpxor xmm1,xmm1,xmm11
  672. vpxor xmm12,xmm12,xmm13
  673. vxorps xmm7,xmm7,XMMWORD[16+rsp]
  674. vpclmulqdq xmm9,xmm9,xmm15,0x00
  675. vpxor xmm9,xmm9,xmm2
  676. vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
  677. vxorps xmm8,xmm8,xmm14
  678. vpclmulqdq xmm5,xmm11,xmm0,0x00
  679. vmovdqu xmm3,XMMWORD[((96-32))+r9]
  680. vpxor xmm5,xmm5,xmm4
  681. vpunpckhqdq xmm2,xmm10,xmm10
  682. vpclmulqdq xmm11,xmm11,xmm0,0x11
  683. vpxor xmm2,xmm2,xmm10
  684. vpalignr xmm14,xmm8,xmm8,8
  685. vpxor xmm11,xmm11,xmm12
  686. vpclmulqdq xmm1,xmm1,xmm15,0x10
  687. vmovdqu xmm15,XMMWORD[((128-32))+r9]
  688. vpxor xmm1,xmm1,xmm9
  689. vxorps xmm14,xmm14,xmm7
  690. vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
  691. vxorps xmm8,xmm8,xmm14
  692. vpclmulqdq xmm4,xmm10,xmm3,0x00
  693. vmovdqu xmm0,XMMWORD[((112-32))+r9]
  694. vpxor xmm4,xmm4,xmm5
  695. vpunpckhqdq xmm9,xmm8,xmm8
  696. vpclmulqdq xmm10,xmm10,xmm3,0x11
  697. vpxor xmm9,xmm9,xmm8
  698. vpxor xmm10,xmm10,xmm11
  699. vpclmulqdq xmm2,xmm2,xmm15,0x00
  700. vpxor xmm2,xmm2,xmm1
  701. vpclmulqdq xmm5,xmm8,xmm0,0x00
  702. vpclmulqdq xmm7,xmm8,xmm0,0x11
  703. vpxor xmm5,xmm5,xmm4
  704. vpclmulqdq xmm6,xmm9,xmm15,0x10
  705. vpxor xmm7,xmm7,xmm10
  706. vpxor xmm6,xmm6,xmm2
  707. vpxor xmm4,xmm7,xmm5
  708. vpxor xmm6,xmm6,xmm4
  709. vpslldq xmm1,xmm6,8
  710. vmovdqu xmm3,XMMWORD[16+r11]
  711. vpsrldq xmm6,xmm6,8
  712. vpxor xmm8,xmm5,xmm1
  713. vpxor xmm7,xmm7,xmm6
  714. vpalignr xmm2,xmm8,xmm8,8
  715. vpclmulqdq xmm8,xmm8,xmm3,0x10
  716. vpxor xmm8,xmm8,xmm2
  717. vpalignr xmm2,xmm8,xmm8,8
  718. vpclmulqdq xmm8,xmm8,xmm3,0x10
  719. vpxor xmm2,xmm2,xmm7
  720. vpxor xmm8,xmm8,xmm2
  721. vpshufb xmm8,xmm8,XMMWORD[r11]
  722. vmovdqu XMMWORD[(-64)+r9],xmm8
  723. vzeroupper
  724. movaps xmm6,XMMWORD[((-216))+rax]
  725. movaps xmm7,XMMWORD[((-200))+rax]
  726. movaps xmm8,XMMWORD[((-184))+rax]
  727. movaps xmm9,XMMWORD[((-168))+rax]
  728. movaps xmm10,XMMWORD[((-152))+rax]
  729. movaps xmm11,XMMWORD[((-136))+rax]
  730. movaps xmm12,XMMWORD[((-120))+rax]
  731. movaps xmm13,XMMWORD[((-104))+rax]
  732. movaps xmm14,XMMWORD[((-88))+rax]
  733. movaps xmm15,XMMWORD[((-72))+rax]
  734. mov r15,QWORD[((-48))+rax]
  735. mov r14,QWORD[((-40))+rax]
  736. mov r13,QWORD[((-32))+rax]
  737. mov r12,QWORD[((-24))+rax]
  738. mov rbp,QWORD[((-16))+rax]
  739. mov rbx,QWORD[((-8))+rax]
  740. lea rsp,[rax]
  741. $L$gcm_enc_abort:
  742. mov rax,r10
  743. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  744. mov rsi,QWORD[16+rsp]
  745. DB 0F3h,0C3h ;repret
  746. $L$SEH_end_GFp_aesni_gcm_encrypt:
  747. ALIGN 64
  748. $L$bswap_mask:
  749. DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  750. $L$poly:
  751. DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
  752. $L$one_msb:
  753. DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
  754. $L$two_lsb:
  755. DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  756. $L$one_lsb:
  757. DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  758. DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
  759. DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
  760. DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
  761. DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  762. ALIGN 64
  763. EXTERN __imp_RtlVirtualUnwind
  764. ALIGN 16
  765. gcm_se_handler:
  766. push rsi
  767. push rdi
  768. push rbx
  769. push rbp
  770. push r12
  771. push r13
  772. push r14
  773. push r15
  774. pushfq
  775. sub rsp,64
  776. mov rax,QWORD[120+r8]
  777. mov rbx,QWORD[248+r8]
  778. mov rsi,QWORD[8+r9]
  779. mov r11,QWORD[56+r9]
  780. mov r10d,DWORD[r11]
  781. lea r10,[r10*1+rsi]
  782. cmp rbx,r10
  783. jb NEAR $L$common_seh_tail
  784. mov rax,QWORD[152+r8]
  785. mov r10d,DWORD[4+r11]
  786. lea r10,[r10*1+rsi]
  787. cmp rbx,r10
  788. jae NEAR $L$common_seh_tail
  789. mov rax,QWORD[120+r8]
  790. mov r15,QWORD[((-48))+rax]
  791. mov r14,QWORD[((-40))+rax]
  792. mov r13,QWORD[((-32))+rax]
  793. mov r12,QWORD[((-24))+rax]
  794. mov rbp,QWORD[((-16))+rax]
  795. mov rbx,QWORD[((-8))+rax]
  796. mov QWORD[240+r8],r15
  797. mov QWORD[232+r8],r14
  798. mov QWORD[224+r8],r13
  799. mov QWORD[216+r8],r12
  800. mov QWORD[160+r8],rbp
  801. mov QWORD[144+r8],rbx
  802. lea rsi,[((-216))+rax]
  803. lea rdi,[512+r8]
  804. mov ecx,20
  805. DD 0xa548f3fc
  806. $L$common_seh_tail:
  807. mov rdi,QWORD[8+rax]
  808. mov rsi,QWORD[16+rax]
  809. mov QWORD[152+r8],rax
  810. mov QWORD[168+r8],rsi
  811. mov QWORD[176+r8],rdi
  812. mov rdi,QWORD[40+r9]
  813. mov rsi,r8
  814. mov ecx,154
  815. DD 0xa548f3fc
  816. mov rsi,r9
  817. xor rcx,rcx
  818. mov rdx,QWORD[8+rsi]
  819. mov r8,QWORD[rsi]
  820. mov r9,QWORD[16+rsi]
  821. mov r10,QWORD[40+rsi]
  822. lea r11,[56+rsi]
  823. lea r12,[24+rsi]
  824. mov QWORD[32+rsp],r10
  825. mov QWORD[40+rsp],r11
  826. mov QWORD[48+rsp],r12
  827. mov QWORD[56+rsp],rcx
  828. call QWORD[__imp_RtlVirtualUnwind]
  829. mov eax,1
  830. add rsp,64
  831. popfq
  832. pop r15
  833. pop r14
  834. pop r13
  835. pop r12
  836. pop rbp
  837. pop rbx
  838. pop rdi
  839. pop rsi
  840. DB 0F3h,0C3h ;repret
  841. section .pdata rdata align=4
  842. ALIGN 4
  843. DD $L$SEH_begin_GFp_aesni_gcm_decrypt wrt ..imagebase
  844. DD $L$SEH_end_GFp_aesni_gcm_decrypt wrt ..imagebase
  845. DD $L$SEH_gcm_dec_info wrt ..imagebase
  846. DD $L$SEH_begin_GFp_aesni_gcm_encrypt wrt ..imagebase
  847. DD $L$SEH_end_GFp_aesni_gcm_encrypt wrt ..imagebase
  848. DD $L$SEH_GFp_gcm_enc_info wrt ..imagebase
  849. section .xdata rdata align=8
  850. ALIGN 8
  851. $L$SEH_gcm_dec_info:
  852. DB 9,0,0,0
  853. DD gcm_se_handler wrt ..imagebase
  854. DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
  855. $L$SEH_GFp_gcm_enc_info:
  856. DB 9,0,0,0
  857. DD gcm_se_handler wrt ..imagebase
  858. DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase