12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922 |
- ; This file is generated from a similarly-named Perl script in the BoringSSL
- ; source tree. Do not edit by hand.
- default rel
- %define XMMWORD
- %define YMMWORD
- %define ZMMWORD
- section .text code align=64
- EXTERN GFp_ia32cap_P
- ALIGN 64
- $L$zero:
- DD 0,0,0,0
- $L$one:
- DD 1,0,0,0
- $L$inc:
- DD 0,1,2,3
- $L$four:
- DD 4,4,4,4
- $L$incy:
- DD 0,2,4,6,1,3,5,7
- $L$eight:
- DD 8,8,8,8,8,8,8,8
- $L$rot16:
- DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd
- $L$rot24:
- DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe
- $L$sigma:
- DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107
- DB 0
- ALIGN 64
- $L$zeroz:
- DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0
- $L$fourz:
- DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0
- $L$incz:
- DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- $L$sixteen:
- DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
- DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
- DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
- DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
- DB 108,46,111,114,103,62,0
- global GFp_ChaCha20_ctr32
- ALIGN 64
- GFp_ChaCha20_ctr32:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
- $L$SEH_begin_GFp_ChaCha20_ctr32:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
- mov r8,QWORD[40+rsp]
- cmp rdx,0
- je NEAR $L$no_data
- mov r10,QWORD[((GFp_ia32cap_P+4))]
- test r10d,512
- jnz NEAR $L$ChaCha20_ssse3
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- sub rsp,64+24
- $L$ctr32_body:
- movdqu xmm1,XMMWORD[rcx]
- movdqu xmm2,XMMWORD[16+rcx]
- movdqu xmm3,XMMWORD[r8]
- movdqa xmm4,XMMWORD[$L$one]
- movdqa XMMWORD[16+rsp],xmm1
- movdqa XMMWORD[32+rsp],xmm2
- movdqa XMMWORD[48+rsp],xmm3
- mov rbp,rdx
- jmp NEAR $L$oop_outer
- ALIGN 32
- $L$oop_outer:
- mov eax,0x61707865
- mov ebx,0x3320646e
- mov ecx,0x79622d32
- mov edx,0x6b206574
- mov r8d,DWORD[16+rsp]
- mov r9d,DWORD[20+rsp]
- mov r10d,DWORD[24+rsp]
- mov r11d,DWORD[28+rsp]
- movd r12d,xmm3
- mov r13d,DWORD[52+rsp]
- mov r14d,DWORD[56+rsp]
- mov r15d,DWORD[60+rsp]
- mov QWORD[((64+0))+rsp],rbp
- mov ebp,10
- mov QWORD[((64+8))+rsp],rsi
- DB 102,72,15,126,214
- mov QWORD[((64+16))+rsp],rdi
- mov rdi,rsi
- shr rdi,32
- jmp NEAR $L$oop
- ALIGN 32
- $L$oop:
- add eax,r8d
- xor r12d,eax
- rol r12d,16
- add ebx,r9d
- xor r13d,ebx
- rol r13d,16
- add esi,r12d
- xor r8d,esi
- rol r8d,12
- add edi,r13d
- xor r9d,edi
- rol r9d,12
- add eax,r8d
- xor r12d,eax
- rol r12d,8
- add ebx,r9d
- xor r13d,ebx
- rol r13d,8
- add esi,r12d
- xor r8d,esi
- rol r8d,7
- add edi,r13d
- xor r9d,edi
- rol r9d,7
- mov DWORD[32+rsp],esi
- mov DWORD[36+rsp],edi
- mov esi,DWORD[40+rsp]
- mov edi,DWORD[44+rsp]
- add ecx,r10d
- xor r14d,ecx
- rol r14d,16
- add edx,r11d
- xor r15d,edx
- rol r15d,16
- add esi,r14d
- xor r10d,esi
- rol r10d,12
- add edi,r15d
- xor r11d,edi
- rol r11d,12
- add ecx,r10d
- xor r14d,ecx
- rol r14d,8
- add edx,r11d
- xor r15d,edx
- rol r15d,8
- add esi,r14d
- xor r10d,esi
- rol r10d,7
- add edi,r15d
- xor r11d,edi
- rol r11d,7
- add eax,r9d
- xor r15d,eax
- rol r15d,16
- add ebx,r10d
- xor r12d,ebx
- rol r12d,16
- add esi,r15d
- xor r9d,esi
- rol r9d,12
- add edi,r12d
- xor r10d,edi
- rol r10d,12
- add eax,r9d
- xor r15d,eax
- rol r15d,8
- add ebx,r10d
- xor r12d,ebx
- rol r12d,8
- add esi,r15d
- xor r9d,esi
- rol r9d,7
- add edi,r12d
- xor r10d,edi
- rol r10d,7
- mov DWORD[40+rsp],esi
- mov DWORD[44+rsp],edi
- mov esi,DWORD[32+rsp]
- mov edi,DWORD[36+rsp]
- add ecx,r11d
- xor r13d,ecx
- rol r13d,16
- add edx,r8d
- xor r14d,edx
- rol r14d,16
- add esi,r13d
- xor r11d,esi
- rol r11d,12
- add edi,r14d
- xor r8d,edi
- rol r8d,12
- add ecx,r11d
- xor r13d,ecx
- rol r13d,8
- add edx,r8d
- xor r14d,edx
- rol r14d,8
- add esi,r13d
- xor r11d,esi
- rol r11d,7
- add edi,r14d
- xor r8d,edi
- rol r8d,7
- dec ebp
- jnz NEAR $L$oop
- mov DWORD[36+rsp],edi
- mov DWORD[32+rsp],esi
- mov rbp,QWORD[64+rsp]
- movdqa xmm1,xmm2
- mov rsi,QWORD[((64+8))+rsp]
- paddd xmm3,xmm4
- mov rdi,QWORD[((64+16))+rsp]
- add eax,0x61707865
- add ebx,0x3320646e
- add ecx,0x79622d32
- add edx,0x6b206574
- add r8d,DWORD[16+rsp]
- add r9d,DWORD[20+rsp]
- add r10d,DWORD[24+rsp]
- add r11d,DWORD[28+rsp]
- add r12d,DWORD[48+rsp]
- add r13d,DWORD[52+rsp]
- add r14d,DWORD[56+rsp]
- add r15d,DWORD[60+rsp]
- paddd xmm1,XMMWORD[32+rsp]
- cmp rbp,64
- jb NEAR $L$tail
- xor eax,DWORD[rsi]
- xor ebx,DWORD[4+rsi]
- xor ecx,DWORD[8+rsi]
- xor edx,DWORD[12+rsi]
- xor r8d,DWORD[16+rsi]
- xor r9d,DWORD[20+rsi]
- xor r10d,DWORD[24+rsi]
- xor r11d,DWORD[28+rsi]
- movdqu xmm0,XMMWORD[32+rsi]
- xor r12d,DWORD[48+rsi]
- xor r13d,DWORD[52+rsi]
- xor r14d,DWORD[56+rsi]
- xor r15d,DWORD[60+rsi]
- lea rsi,[64+rsi]
- pxor xmm0,xmm1
- movdqa XMMWORD[32+rsp],xmm2
- movd DWORD[48+rsp],xmm3
- mov DWORD[rdi],eax
- mov DWORD[4+rdi],ebx
- mov DWORD[8+rdi],ecx
- mov DWORD[12+rdi],edx
- mov DWORD[16+rdi],r8d
- mov DWORD[20+rdi],r9d
- mov DWORD[24+rdi],r10d
- mov DWORD[28+rdi],r11d
- movdqu XMMWORD[32+rdi],xmm0
- mov DWORD[48+rdi],r12d
- mov DWORD[52+rdi],r13d
- mov DWORD[56+rdi],r14d
- mov DWORD[60+rdi],r15d
- lea rdi,[64+rdi]
- sub rbp,64
- jnz NEAR $L$oop_outer
- jmp NEAR $L$done
- ALIGN 16
- $L$tail:
- mov DWORD[rsp],eax
- mov DWORD[4+rsp],ebx
- xor rbx,rbx
- mov DWORD[8+rsp],ecx
- mov DWORD[12+rsp],edx
- mov DWORD[16+rsp],r8d
- mov DWORD[20+rsp],r9d
- mov DWORD[24+rsp],r10d
- mov DWORD[28+rsp],r11d
- movdqa XMMWORD[32+rsp],xmm1
- mov DWORD[48+rsp],r12d
- mov DWORD[52+rsp],r13d
- mov DWORD[56+rsp],r14d
- mov DWORD[60+rsp],r15d
- $L$oop_tail:
- movzx eax,BYTE[rbx*1+rsi]
- movzx edx,BYTE[rbx*1+rsp]
- lea rbx,[1+rbx]
- xor eax,edx
- mov BYTE[((-1))+rbx*1+rdi],al
- dec rbp
- jnz NEAR $L$oop_tail
- $L$done:
- lea rsi,[((64+24+48))+rsp]
- mov r15,QWORD[((-48))+rsi]
- mov r14,QWORD[((-40))+rsi]
- mov r13,QWORD[((-32))+rsi]
- mov r12,QWORD[((-24))+rsi]
- mov rbp,QWORD[((-16))+rsi]
- mov rbx,QWORD[((-8))+rsi]
- lea rsp,[rsi]
- $L$no_data:
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
- $L$SEH_end_GFp_ChaCha20_ctr32:
- ALIGN 32
- ChaCha20_ssse3:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
- $L$SEH_begin_ChaCha20_ssse3:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
- mov r8,QWORD[40+rsp]
- $L$ChaCha20_ssse3:
- mov r9,rsp
- cmp rdx,128
- ja NEAR $L$ChaCha20_4x
- $L$do_sse3_after_all:
- sub rsp,64+40
- movaps XMMWORD[(-40)+r9],xmm6
- movaps XMMWORD[(-24)+r9],xmm7
- $L$ssse3_body:
- movdqa xmm0,XMMWORD[$L$sigma]
- movdqu xmm1,XMMWORD[rcx]
- movdqu xmm2,XMMWORD[16+rcx]
- movdqu xmm3,XMMWORD[r8]
- movdqa xmm6,XMMWORD[$L$rot16]
- movdqa xmm7,XMMWORD[$L$rot24]
- movdqa XMMWORD[rsp],xmm0
- movdqa XMMWORD[16+rsp],xmm1
- movdqa XMMWORD[32+rsp],xmm2
- movdqa XMMWORD[48+rsp],xmm3
- mov r8,10
- jmp NEAR $L$oop_ssse3
- ALIGN 32
- $L$oop_outer_ssse3:
- movdqa xmm3,XMMWORD[$L$one]
- movdqa xmm0,XMMWORD[rsp]
- movdqa xmm1,XMMWORD[16+rsp]
- movdqa xmm2,XMMWORD[32+rsp]
- paddd xmm3,XMMWORD[48+rsp]
- mov r8,10
- movdqa XMMWORD[48+rsp],xmm3
- jmp NEAR $L$oop_ssse3
- ALIGN 32
- $L$oop_ssse3:
- paddd xmm0,xmm1
- pxor xmm3,xmm0
- DB 102,15,56,0,222
- paddd xmm2,xmm3
- pxor xmm1,xmm2
- movdqa xmm4,xmm1
- psrld xmm1,20
- pslld xmm4,12
- por xmm1,xmm4
- paddd xmm0,xmm1
- pxor xmm3,xmm0
- DB 102,15,56,0,223
- paddd xmm2,xmm3
- pxor xmm1,xmm2
- movdqa xmm4,xmm1
- psrld xmm1,25
- pslld xmm4,7
- por xmm1,xmm4
- pshufd xmm2,xmm2,78
- pshufd xmm1,xmm1,57
- pshufd xmm3,xmm3,147
- nop
- paddd xmm0,xmm1
- pxor xmm3,xmm0
- DB 102,15,56,0,222
- paddd xmm2,xmm3
- pxor xmm1,xmm2
- movdqa xmm4,xmm1
- psrld xmm1,20
- pslld xmm4,12
- por xmm1,xmm4
- paddd xmm0,xmm1
- pxor xmm3,xmm0
- DB 102,15,56,0,223
- paddd xmm2,xmm3
- pxor xmm1,xmm2
- movdqa xmm4,xmm1
- psrld xmm1,25
- pslld xmm4,7
- por xmm1,xmm4
- pshufd xmm2,xmm2,78
- pshufd xmm1,xmm1,147
- pshufd xmm3,xmm3,57
- dec r8
- jnz NEAR $L$oop_ssse3
- paddd xmm0,XMMWORD[rsp]
- paddd xmm1,XMMWORD[16+rsp]
- paddd xmm2,XMMWORD[32+rsp]
- paddd xmm3,XMMWORD[48+rsp]
- cmp rdx,64
- jb NEAR $L$tail_ssse3
- movdqu xmm4,XMMWORD[rsi]
- movdqu xmm5,XMMWORD[16+rsi]
- pxor xmm0,xmm4
- movdqu xmm4,XMMWORD[32+rsi]
- pxor xmm1,xmm5
- movdqu xmm5,XMMWORD[48+rsi]
- lea rsi,[64+rsi]
- pxor xmm2,xmm4
- pxor xmm3,xmm5
- movdqu XMMWORD[rdi],xmm0
- movdqu XMMWORD[16+rdi],xmm1
- movdqu XMMWORD[32+rdi],xmm2
- movdqu XMMWORD[48+rdi],xmm3
- lea rdi,[64+rdi]
- sub rdx,64
- jnz NEAR $L$oop_outer_ssse3
- jmp NEAR $L$done_ssse3
- ALIGN 16
- $L$tail_ssse3:
- movdqa XMMWORD[rsp],xmm0
- movdqa XMMWORD[16+rsp],xmm1
- movdqa XMMWORD[32+rsp],xmm2
- movdqa XMMWORD[48+rsp],xmm3
- xor r8,r8
- $L$oop_tail_ssse3:
- movzx eax,BYTE[r8*1+rsi]
- movzx ecx,BYTE[r8*1+rsp]
- lea r8,[1+r8]
- xor eax,ecx
- mov BYTE[((-1))+r8*1+rdi],al
- dec rdx
- jnz NEAR $L$oop_tail_ssse3
- $L$done_ssse3:
- movaps xmm6,XMMWORD[((-40))+r9]
- movaps xmm7,XMMWORD[((-24))+r9]
- lea rsp,[r9]
- $L$ssse3_epilogue:
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
- $L$SEH_end_ChaCha20_ssse3:
- ALIGN 32
- ChaCha20_4x:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
- $L$SEH_begin_ChaCha20_4x:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
- mov r8,QWORD[40+rsp]
- $L$ChaCha20_4x:
- mov r9,rsp
- mov r11,r10
- shr r10,32
- test r10,32
- jnz NEAR $L$ChaCha20_8x
- cmp rdx,192
- ja NEAR $L$proceed4x
- and r11,71303168
- cmp r11,4194304
- je NEAR $L$do_sse3_after_all
- $L$proceed4x:
- sub rsp,0x140+168
- movaps XMMWORD[(-168)+r9],xmm6
- movaps XMMWORD[(-152)+r9],xmm7
- movaps XMMWORD[(-136)+r9],xmm8
- movaps XMMWORD[(-120)+r9],xmm9
- movaps XMMWORD[(-104)+r9],xmm10
- movaps XMMWORD[(-88)+r9],xmm11
- movaps XMMWORD[(-72)+r9],xmm12
- movaps XMMWORD[(-56)+r9],xmm13
- movaps XMMWORD[(-40)+r9],xmm14
- movaps XMMWORD[(-24)+r9],xmm15
- $L$4x_body:
- movdqa xmm11,XMMWORD[$L$sigma]
- movdqu xmm15,XMMWORD[rcx]
- movdqu xmm7,XMMWORD[16+rcx]
- movdqu xmm3,XMMWORD[r8]
- lea rcx,[256+rsp]
- lea r10,[$L$rot16]
- lea r11,[$L$rot24]
- pshufd xmm8,xmm11,0x00
- pshufd xmm9,xmm11,0x55
- movdqa XMMWORD[64+rsp],xmm8
- pshufd xmm10,xmm11,0xaa
- movdqa XMMWORD[80+rsp],xmm9
- pshufd xmm11,xmm11,0xff
- movdqa XMMWORD[96+rsp],xmm10
- movdqa XMMWORD[112+rsp],xmm11
- pshufd xmm12,xmm15,0x00
- pshufd xmm13,xmm15,0x55
- movdqa XMMWORD[(128-256)+rcx],xmm12
- pshufd xmm14,xmm15,0xaa
- movdqa XMMWORD[(144-256)+rcx],xmm13
- pshufd xmm15,xmm15,0xff
- movdqa XMMWORD[(160-256)+rcx],xmm14
- movdqa XMMWORD[(176-256)+rcx],xmm15
- pshufd xmm4,xmm7,0x00
- pshufd xmm5,xmm7,0x55
- movdqa XMMWORD[(192-256)+rcx],xmm4
- pshufd xmm6,xmm7,0xaa
- movdqa XMMWORD[(208-256)+rcx],xmm5
- pshufd xmm7,xmm7,0xff
- movdqa XMMWORD[(224-256)+rcx],xmm6
- movdqa XMMWORD[(240-256)+rcx],xmm7
- pshufd xmm0,xmm3,0x00
- pshufd xmm1,xmm3,0x55
- paddd xmm0,XMMWORD[$L$inc]
- pshufd xmm2,xmm3,0xaa
- movdqa XMMWORD[(272-256)+rcx],xmm1
- pshufd xmm3,xmm3,0xff
- movdqa XMMWORD[(288-256)+rcx],xmm2
- movdqa XMMWORD[(304-256)+rcx],xmm3
- jmp NEAR $L$oop_enter4x
- ALIGN 32
- $L$oop_outer4x:
- movdqa xmm8,XMMWORD[64+rsp]
- movdqa xmm9,XMMWORD[80+rsp]
- movdqa xmm10,XMMWORD[96+rsp]
- movdqa xmm11,XMMWORD[112+rsp]
- movdqa xmm12,XMMWORD[((128-256))+rcx]
- movdqa xmm13,XMMWORD[((144-256))+rcx]
- movdqa xmm14,XMMWORD[((160-256))+rcx]
- movdqa xmm15,XMMWORD[((176-256))+rcx]
- movdqa xmm4,XMMWORD[((192-256))+rcx]
- movdqa xmm5,XMMWORD[((208-256))+rcx]
- movdqa xmm6,XMMWORD[((224-256))+rcx]
- movdqa xmm7,XMMWORD[((240-256))+rcx]
- movdqa xmm0,XMMWORD[((256-256))+rcx]
- movdqa xmm1,XMMWORD[((272-256))+rcx]
- movdqa xmm2,XMMWORD[((288-256))+rcx]
- movdqa xmm3,XMMWORD[((304-256))+rcx]
- paddd xmm0,XMMWORD[$L$four]
- $L$oop_enter4x:
- movdqa XMMWORD[32+rsp],xmm6
- movdqa XMMWORD[48+rsp],xmm7
- movdqa xmm7,XMMWORD[r10]
- mov eax,10
- movdqa XMMWORD[(256-256)+rcx],xmm0
- jmp NEAR $L$oop4x
- ALIGN 32
- $L$oop4x:
- paddd xmm8,xmm12
- paddd xmm9,xmm13
- pxor xmm0,xmm8
- pxor xmm1,xmm9
- DB 102,15,56,0,199
- DB 102,15,56,0,207
- paddd xmm4,xmm0
- paddd xmm5,xmm1
- pxor xmm12,xmm4
- pxor xmm13,xmm5
- movdqa xmm6,xmm12
- pslld xmm12,12
- psrld xmm6,20
- movdqa xmm7,xmm13
- pslld xmm13,12
- por xmm12,xmm6
- psrld xmm7,20
- movdqa xmm6,XMMWORD[r11]
- por xmm13,xmm7
- paddd xmm8,xmm12
- paddd xmm9,xmm13
- pxor xmm0,xmm8
- pxor xmm1,xmm9
- DB 102,15,56,0,198
- DB 102,15,56,0,206
- paddd xmm4,xmm0
- paddd xmm5,xmm1
- pxor xmm12,xmm4
- pxor xmm13,xmm5
- movdqa xmm7,xmm12
- pslld xmm12,7
- psrld xmm7,25
- movdqa xmm6,xmm13
- pslld xmm13,7
- por xmm12,xmm7
- psrld xmm6,25
- movdqa xmm7,XMMWORD[r10]
- por xmm13,xmm6
- movdqa XMMWORD[rsp],xmm4
- movdqa XMMWORD[16+rsp],xmm5
- movdqa xmm4,XMMWORD[32+rsp]
- movdqa xmm5,XMMWORD[48+rsp]
- paddd xmm10,xmm14
- paddd xmm11,xmm15
- pxor xmm2,xmm10
- pxor xmm3,xmm11
- DB 102,15,56,0,215
- DB 102,15,56,0,223
- paddd xmm4,xmm2
- paddd xmm5,xmm3
- pxor xmm14,xmm4
- pxor xmm15,xmm5
- movdqa xmm6,xmm14
- pslld xmm14,12
- psrld xmm6,20
- movdqa xmm7,xmm15
- pslld xmm15,12
- por xmm14,xmm6
- psrld xmm7,20
- movdqa xmm6,XMMWORD[r11]
- por xmm15,xmm7
- paddd xmm10,xmm14
- paddd xmm11,xmm15
- pxor xmm2,xmm10
- pxor xmm3,xmm11
- DB 102,15,56,0,214
- DB 102,15,56,0,222
- paddd xmm4,xmm2
- paddd xmm5,xmm3
- pxor xmm14,xmm4
- pxor xmm15,xmm5
- movdqa xmm7,xmm14
- pslld xmm14,7
- psrld xmm7,25
- movdqa xmm6,xmm15
- pslld xmm15,7
- por xmm14,xmm7
- psrld xmm6,25
- movdqa xmm7,XMMWORD[r10]
- por xmm15,xmm6
- paddd xmm8,xmm13
- paddd xmm9,xmm14
- pxor xmm3,xmm8
- pxor xmm0,xmm9
- DB 102,15,56,0,223
- DB 102,15,56,0,199
- paddd xmm4,xmm3
- paddd xmm5,xmm0
- pxor xmm13,xmm4
- pxor xmm14,xmm5
- movdqa xmm6,xmm13
- pslld xmm13,12
- psrld xmm6,20
- movdqa xmm7,xmm14
- pslld xmm14,12
- por xmm13,xmm6
- psrld xmm7,20
- movdqa xmm6,XMMWORD[r11]
- por xmm14,xmm7
- paddd xmm8,xmm13
- paddd xmm9,xmm14
- pxor xmm3,xmm8
- pxor xmm0,xmm9
- DB 102,15,56,0,222
- DB 102,15,56,0,198
- paddd xmm4,xmm3
- paddd xmm5,xmm0
- pxor xmm13,xmm4
- pxor xmm14,xmm5
- movdqa xmm7,xmm13
- pslld xmm13,7
- psrld xmm7,25
- movdqa xmm6,xmm14
- pslld xmm14,7
- por xmm13,xmm7
- psrld xmm6,25
- movdqa xmm7,XMMWORD[r10]
- por xmm14,xmm6
- movdqa XMMWORD[32+rsp],xmm4
- movdqa XMMWORD[48+rsp],xmm5
- movdqa xmm4,XMMWORD[rsp]
- movdqa xmm5,XMMWORD[16+rsp]
- paddd xmm10,xmm15
- paddd xmm11,xmm12
- pxor xmm1,xmm10
- pxor xmm2,xmm11
- DB 102,15,56,0,207
- DB 102,15,56,0,215
- paddd xmm4,xmm1
- paddd xmm5,xmm2
- pxor xmm15,xmm4
- pxor xmm12,xmm5
- movdqa xmm6,xmm15
- pslld xmm15,12
- psrld xmm6,20
- movdqa xmm7,xmm12
- pslld xmm12,12
- por xmm15,xmm6
- psrld xmm7,20
- movdqa xmm6,XMMWORD[r11]
- por xmm12,xmm7
- paddd xmm10,xmm15
- paddd xmm11,xmm12
- pxor xmm1,xmm10
- pxor xmm2,xmm11
- DB 102,15,56,0,206
- DB 102,15,56,0,214
- paddd xmm4,xmm1
- paddd xmm5,xmm2
- pxor xmm15,xmm4
- pxor xmm12,xmm5
- movdqa xmm7,xmm15
- pslld xmm15,7
- psrld xmm7,25
- movdqa xmm6,xmm12
- pslld xmm12,7
- por xmm15,xmm7
- psrld xmm6,25
- movdqa xmm7,XMMWORD[r10]
- por xmm12,xmm6
- dec eax
- jnz NEAR $L$oop4x
- paddd xmm8,XMMWORD[64+rsp]
- paddd xmm9,XMMWORD[80+rsp]
- paddd xmm10,XMMWORD[96+rsp]
- paddd xmm11,XMMWORD[112+rsp]
- movdqa xmm6,xmm8
- punpckldq xmm8,xmm9
- movdqa xmm7,xmm10
- punpckldq xmm10,xmm11
- punpckhdq xmm6,xmm9
- punpckhdq xmm7,xmm11
- movdqa xmm9,xmm8
- punpcklqdq xmm8,xmm10
- movdqa xmm11,xmm6
- punpcklqdq xmm6,xmm7
- punpckhqdq xmm9,xmm10
- punpckhqdq xmm11,xmm7
- paddd xmm12,XMMWORD[((128-256))+rcx]
- paddd xmm13,XMMWORD[((144-256))+rcx]
- paddd xmm14,XMMWORD[((160-256))+rcx]
- paddd xmm15,XMMWORD[((176-256))+rcx]
- movdqa XMMWORD[rsp],xmm8
- movdqa XMMWORD[16+rsp],xmm9
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,xmm12
- punpckldq xmm12,xmm13
- movdqa xmm7,xmm14
- punpckldq xmm14,xmm15
- punpckhdq xmm10,xmm13
- punpckhdq xmm7,xmm15
- movdqa xmm13,xmm12
- punpcklqdq xmm12,xmm14
- movdqa xmm15,xmm10
- punpcklqdq xmm10,xmm7
- punpckhqdq xmm13,xmm14
- punpckhqdq xmm15,xmm7
- paddd xmm4,XMMWORD[((192-256))+rcx]
- paddd xmm5,XMMWORD[((208-256))+rcx]
- paddd xmm8,XMMWORD[((224-256))+rcx]
- paddd xmm9,XMMWORD[((240-256))+rcx]
- movdqa XMMWORD[32+rsp],xmm6
- movdqa XMMWORD[48+rsp],xmm11
- movdqa xmm14,xmm4
- punpckldq xmm4,xmm5
- movdqa xmm7,xmm8
- punpckldq xmm8,xmm9
- punpckhdq xmm14,xmm5
- punpckhdq xmm7,xmm9
- movdqa xmm5,xmm4
- punpcklqdq xmm4,xmm8
- movdqa xmm9,xmm14
- punpcklqdq xmm14,xmm7
- punpckhqdq xmm5,xmm8
- punpckhqdq xmm9,xmm7
- paddd xmm0,XMMWORD[((256-256))+rcx]
- paddd xmm1,XMMWORD[((272-256))+rcx]
- paddd xmm2,XMMWORD[((288-256))+rcx]
- paddd xmm3,XMMWORD[((304-256))+rcx]
- movdqa xmm8,xmm0
- punpckldq xmm0,xmm1
- movdqa xmm7,xmm2
- punpckldq xmm2,xmm3
- punpckhdq xmm8,xmm1
- punpckhdq xmm7,xmm3
- movdqa xmm1,xmm0
- punpcklqdq xmm0,xmm2
- movdqa xmm3,xmm8
- punpcklqdq xmm8,xmm7
- punpckhqdq xmm1,xmm2
- punpckhqdq xmm3,xmm7
- cmp rdx,64*4
- jb NEAR $L$tail4x
- movdqu xmm6,XMMWORD[rsi]
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[rsp]
- pxor xmm11,xmm12
- pxor xmm2,xmm4
- pxor xmm7,xmm0
- movdqu XMMWORD[rdi],xmm6
- movdqu xmm6,XMMWORD[64+rsi]
- movdqu XMMWORD[16+rdi],xmm11
- movdqu xmm11,XMMWORD[80+rsi]
- movdqu XMMWORD[32+rdi],xmm2
- movdqu xmm2,XMMWORD[96+rsi]
- movdqu XMMWORD[48+rdi],xmm7
- movdqu xmm7,XMMWORD[112+rsi]
- lea rsi,[128+rsi]
- pxor xmm6,XMMWORD[16+rsp]
- pxor xmm11,xmm13
- pxor xmm2,xmm5
- pxor xmm7,xmm1
- movdqu XMMWORD[64+rdi],xmm6
- movdqu xmm6,XMMWORD[rsi]
- movdqu XMMWORD[80+rdi],xmm11
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu XMMWORD[96+rdi],xmm2
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu XMMWORD[112+rdi],xmm7
- lea rdi,[128+rdi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[32+rsp]
- pxor xmm11,xmm10
- pxor xmm2,xmm14
- pxor xmm7,xmm8
- movdqu XMMWORD[rdi],xmm6
- movdqu xmm6,XMMWORD[64+rsi]
- movdqu XMMWORD[16+rdi],xmm11
- movdqu xmm11,XMMWORD[80+rsi]
- movdqu XMMWORD[32+rdi],xmm2
- movdqu xmm2,XMMWORD[96+rsi]
- movdqu XMMWORD[48+rdi],xmm7
- movdqu xmm7,XMMWORD[112+rsi]
- lea rsi,[128+rsi]
- pxor xmm6,XMMWORD[48+rsp]
- pxor xmm11,xmm15
- pxor xmm2,xmm9
- pxor xmm7,xmm3
- movdqu XMMWORD[64+rdi],xmm6
- movdqu XMMWORD[80+rdi],xmm11
- movdqu XMMWORD[96+rdi],xmm2
- movdqu XMMWORD[112+rdi],xmm7
- lea rdi,[128+rdi]
- sub rdx,64*4
- jnz NEAR $L$oop_outer4x
- jmp NEAR $L$done4x
- $L$tail4x:
- cmp rdx,192
- jae NEAR $L$192_or_more4x
- cmp rdx,128
- jae NEAR $L$128_or_more4x
- cmp rdx,64
- jae NEAR $L$64_or_more4x
- xor r10,r10
- movdqa XMMWORD[16+rsp],xmm12
- movdqa XMMWORD[32+rsp],xmm4
- movdqa XMMWORD[48+rsp],xmm0
- jmp NEAR $L$oop_tail4x
- ALIGN 32
- $L$64_or_more4x:
- movdqu xmm6,XMMWORD[rsi]
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[rsp]
- pxor xmm11,xmm12
- pxor xmm2,xmm4
- pxor xmm7,xmm0
- movdqu XMMWORD[rdi],xmm6
- movdqu XMMWORD[16+rdi],xmm11
- movdqu XMMWORD[32+rdi],xmm2
- movdqu XMMWORD[48+rdi],xmm7
- je NEAR $L$done4x
- movdqa xmm6,XMMWORD[16+rsp]
- lea rsi,[64+rsi]
- xor r10,r10
- movdqa XMMWORD[rsp],xmm6
- movdqa XMMWORD[16+rsp],xmm13
- lea rdi,[64+rdi]
- movdqa XMMWORD[32+rsp],xmm5
- sub rdx,64
- movdqa XMMWORD[48+rsp],xmm1
- jmp NEAR $L$oop_tail4x
- ALIGN 32
- $L$128_or_more4x:
- movdqu xmm6,XMMWORD[rsi]
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[rsp]
- pxor xmm11,xmm12
- pxor xmm2,xmm4
- pxor xmm7,xmm0
- movdqu XMMWORD[rdi],xmm6
- movdqu xmm6,XMMWORD[64+rsi]
- movdqu XMMWORD[16+rdi],xmm11
- movdqu xmm11,XMMWORD[80+rsi]
- movdqu XMMWORD[32+rdi],xmm2
- movdqu xmm2,XMMWORD[96+rsi]
- movdqu XMMWORD[48+rdi],xmm7
- movdqu xmm7,XMMWORD[112+rsi]
- pxor xmm6,XMMWORD[16+rsp]
- pxor xmm11,xmm13
- pxor xmm2,xmm5
- pxor xmm7,xmm1
- movdqu XMMWORD[64+rdi],xmm6
- movdqu XMMWORD[80+rdi],xmm11
- movdqu XMMWORD[96+rdi],xmm2
- movdqu XMMWORD[112+rdi],xmm7
- je NEAR $L$done4x
- movdqa xmm6,XMMWORD[32+rsp]
- lea rsi,[128+rsi]
- xor r10,r10
- movdqa XMMWORD[rsp],xmm6
- movdqa XMMWORD[16+rsp],xmm10
- lea rdi,[128+rdi]
- movdqa XMMWORD[32+rsp],xmm14
- sub rdx,128
- movdqa XMMWORD[48+rsp],xmm8
- jmp NEAR $L$oop_tail4x
- ALIGN 32
- $L$192_or_more4x:
- movdqu xmm6,XMMWORD[rsi]
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[rsp]
- pxor xmm11,xmm12
- pxor xmm2,xmm4
- pxor xmm7,xmm0
- movdqu XMMWORD[rdi],xmm6
- movdqu xmm6,XMMWORD[64+rsi]
- movdqu XMMWORD[16+rdi],xmm11
- movdqu xmm11,XMMWORD[80+rsi]
- movdqu XMMWORD[32+rdi],xmm2
- movdqu xmm2,XMMWORD[96+rsi]
- movdqu XMMWORD[48+rdi],xmm7
- movdqu xmm7,XMMWORD[112+rsi]
- lea rsi,[128+rsi]
- pxor xmm6,XMMWORD[16+rsp]
- pxor xmm11,xmm13
- pxor xmm2,xmm5
- pxor xmm7,xmm1
- movdqu XMMWORD[64+rdi],xmm6
- movdqu xmm6,XMMWORD[rsi]
- movdqu XMMWORD[80+rdi],xmm11
- movdqu xmm11,XMMWORD[16+rsi]
- movdqu XMMWORD[96+rdi],xmm2
- movdqu xmm2,XMMWORD[32+rsi]
- movdqu XMMWORD[112+rdi],xmm7
- lea rdi,[128+rdi]
- movdqu xmm7,XMMWORD[48+rsi]
- pxor xmm6,XMMWORD[32+rsp]
- pxor xmm11,xmm10
- pxor xmm2,xmm14
- pxor xmm7,xmm8
- movdqu XMMWORD[rdi],xmm6
- movdqu XMMWORD[16+rdi],xmm11
- movdqu XMMWORD[32+rdi],xmm2
- movdqu XMMWORD[48+rdi],xmm7
- je NEAR $L$done4x
- movdqa xmm6,XMMWORD[48+rsp]
- lea rsi,[64+rsi]
- xor r10,r10
- movdqa XMMWORD[rsp],xmm6
- movdqa XMMWORD[16+rsp],xmm15
- lea rdi,[64+rdi]
- movdqa XMMWORD[32+rsp],xmm9
- sub rdx,192
- movdqa XMMWORD[48+rsp],xmm3
- $L$oop_tail4x:
- movzx eax,BYTE[r10*1+rsi]
- movzx ecx,BYTE[r10*1+rsp]
- lea r10,[1+r10]
- xor eax,ecx
- mov BYTE[((-1))+r10*1+rdi],al
- dec rdx
- jnz NEAR $L$oop_tail4x
- $L$done4x:
- movaps xmm6,XMMWORD[((-168))+r9]
- movaps xmm7,XMMWORD[((-152))+r9]
- movaps xmm8,XMMWORD[((-136))+r9]
- movaps xmm9,XMMWORD[((-120))+r9]
- movaps xmm10,XMMWORD[((-104))+r9]
- movaps xmm11,XMMWORD[((-88))+r9]
- movaps xmm12,XMMWORD[((-72))+r9]
- movaps xmm13,XMMWORD[((-56))+r9]
- movaps xmm14,XMMWORD[((-40))+r9]
- movaps xmm15,XMMWORD[((-24))+r9]
- lea rsp,[r9]
- $L$4x_epilogue:
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
- $L$SEH_end_ChaCha20_4x:
- ALIGN 32
- ChaCha20_8x:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
- $L$SEH_begin_ChaCha20_8x:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
- mov r8,QWORD[40+rsp]
- $L$ChaCha20_8x:
- mov r9,rsp
- sub rsp,0x280+168
- and rsp,-32
- movaps XMMWORD[(-168)+r9],xmm6
- movaps XMMWORD[(-152)+r9],xmm7
- movaps XMMWORD[(-136)+r9],xmm8
- movaps XMMWORD[(-120)+r9],xmm9
- movaps XMMWORD[(-104)+r9],xmm10
- movaps XMMWORD[(-88)+r9],xmm11
- movaps XMMWORD[(-72)+r9],xmm12
- movaps XMMWORD[(-56)+r9],xmm13
- movaps XMMWORD[(-40)+r9],xmm14
- movaps XMMWORD[(-24)+r9],xmm15
- $L$8x_body:
- vzeroupper
- vbroadcasti128 ymm11,XMMWORD[$L$sigma]
- vbroadcasti128 ymm3,XMMWORD[rcx]
- vbroadcasti128 ymm15,XMMWORD[16+rcx]
- vbroadcasti128 ymm7,XMMWORD[r8]
- lea rcx,[256+rsp]
- lea rax,[512+rsp]
- lea r10,[$L$rot16]
- lea r11,[$L$rot24]
- vpshufd ymm8,ymm11,0x00
- vpshufd ymm9,ymm11,0x55
- vmovdqa YMMWORD[(128-256)+rcx],ymm8
- vpshufd ymm10,ymm11,0xaa
- vmovdqa YMMWORD[(160-256)+rcx],ymm9
- vpshufd ymm11,ymm11,0xff
- vmovdqa YMMWORD[(192-256)+rcx],ymm10
- vmovdqa YMMWORD[(224-256)+rcx],ymm11
- vpshufd ymm0,ymm3,0x00
- vpshufd ymm1,ymm3,0x55
- vmovdqa YMMWORD[(256-256)+rcx],ymm0
- vpshufd ymm2,ymm3,0xaa
- vmovdqa YMMWORD[(288-256)+rcx],ymm1
- vpshufd ymm3,ymm3,0xff
- vmovdqa YMMWORD[(320-256)+rcx],ymm2
- vmovdqa YMMWORD[(352-256)+rcx],ymm3
- vpshufd ymm12,ymm15,0x00
- vpshufd ymm13,ymm15,0x55
- vmovdqa YMMWORD[(384-512)+rax],ymm12
- vpshufd ymm14,ymm15,0xaa
- vmovdqa YMMWORD[(416-512)+rax],ymm13
- vpshufd ymm15,ymm15,0xff
- vmovdqa YMMWORD[(448-512)+rax],ymm14
- vmovdqa YMMWORD[(480-512)+rax],ymm15
- vpshufd ymm4,ymm7,0x00
- vpshufd ymm5,ymm7,0x55
- vpaddd ymm4,ymm4,YMMWORD[$L$incy]
- vpshufd ymm6,ymm7,0xaa
- vmovdqa YMMWORD[(544-512)+rax],ymm5
- vpshufd ymm7,ymm7,0xff
- vmovdqa YMMWORD[(576-512)+rax],ymm6
- vmovdqa YMMWORD[(608-512)+rax],ymm7
- jmp NEAR $L$oop_enter8x
- ALIGN 32
- $L$oop_outer8x:
- vmovdqa ymm8,YMMWORD[((128-256))+rcx]
- vmovdqa ymm9,YMMWORD[((160-256))+rcx]
- vmovdqa ymm10,YMMWORD[((192-256))+rcx]
- vmovdqa ymm11,YMMWORD[((224-256))+rcx]
- vmovdqa ymm0,YMMWORD[((256-256))+rcx]
- vmovdqa ymm1,YMMWORD[((288-256))+rcx]
- vmovdqa ymm2,YMMWORD[((320-256))+rcx]
- vmovdqa ymm3,YMMWORD[((352-256))+rcx]
- vmovdqa ymm12,YMMWORD[((384-512))+rax]
- vmovdqa ymm13,YMMWORD[((416-512))+rax]
- vmovdqa ymm14,YMMWORD[((448-512))+rax]
- vmovdqa ymm15,YMMWORD[((480-512))+rax]
- vmovdqa ymm4,YMMWORD[((512-512))+rax]
- vmovdqa ymm5,YMMWORD[((544-512))+rax]
- vmovdqa ymm6,YMMWORD[((576-512))+rax]
- vmovdqa ymm7,YMMWORD[((608-512))+rax]
- vpaddd ymm4,ymm4,YMMWORD[$L$eight]
- $L$oop_enter8x:
- vmovdqa YMMWORD[64+rsp],ymm14
- vmovdqa YMMWORD[96+rsp],ymm15
- vbroadcasti128 ymm15,XMMWORD[r10]
- vmovdqa YMMWORD[(512-512)+rax],ymm4
- mov eax,10
- jmp NEAR $L$oop8x
- ALIGN 32
- $L$oop8x:
- vpaddd ymm8,ymm8,ymm0
- vpxor ymm4,ymm8,ymm4
- vpshufb ymm4,ymm4,ymm15
- vpaddd ymm9,ymm9,ymm1
- vpxor ymm5,ymm9,ymm5
- vpshufb ymm5,ymm5,ymm15
- vpaddd ymm12,ymm12,ymm4
- vpxor ymm0,ymm12,ymm0
- vpslld ymm14,ymm0,12
- vpsrld ymm0,ymm0,20
- vpor ymm0,ymm14,ymm0
- vbroadcasti128 ymm14,XMMWORD[r11]
- vpaddd ymm13,ymm13,ymm5
- vpxor ymm1,ymm13,ymm1
- vpslld ymm15,ymm1,12
- vpsrld ymm1,ymm1,20
- vpor ymm1,ymm15,ymm1
- vpaddd ymm8,ymm8,ymm0
- vpxor ymm4,ymm8,ymm4
- vpshufb ymm4,ymm4,ymm14
- vpaddd ymm9,ymm9,ymm1
- vpxor ymm5,ymm9,ymm5
- vpshufb ymm5,ymm5,ymm14
- vpaddd ymm12,ymm12,ymm4
- vpxor ymm0,ymm12,ymm0
- vpslld ymm15,ymm0,7
- vpsrld ymm0,ymm0,25
- vpor ymm0,ymm15,ymm0
- vbroadcasti128 ymm15,XMMWORD[r10]
- vpaddd ymm13,ymm13,ymm5
- vpxor ymm1,ymm13,ymm1
- vpslld ymm14,ymm1,7
- vpsrld ymm1,ymm1,25
- vpor ymm1,ymm14,ymm1
- vmovdqa YMMWORD[rsp],ymm12
- vmovdqa YMMWORD[32+rsp],ymm13
- vmovdqa ymm12,YMMWORD[64+rsp]
- vmovdqa ymm13,YMMWORD[96+rsp]
- vpaddd ymm10,ymm10,ymm2
- vpxor ymm6,ymm10,ymm6
- vpshufb ymm6,ymm6,ymm15
- vpaddd ymm11,ymm11,ymm3
- vpxor ymm7,ymm11,ymm7
- vpshufb ymm7,ymm7,ymm15
- vpaddd ymm12,ymm12,ymm6
- vpxor ymm2,ymm12,ymm2
- vpslld ymm14,ymm2,12
- vpsrld ymm2,ymm2,20
- vpor ymm2,ymm14,ymm2
- vbroadcasti128 ymm14,XMMWORD[r11]
- vpaddd ymm13,ymm13,ymm7
- vpxor ymm3,ymm13,ymm3
- vpslld ymm15,ymm3,12
- vpsrld ymm3,ymm3,20
- vpor ymm3,ymm15,ymm3
- vpaddd ymm10,ymm10,ymm2
- vpxor ymm6,ymm10,ymm6
- vpshufb ymm6,ymm6,ymm14
- vpaddd ymm11,ymm11,ymm3
- vpxor ymm7,ymm11,ymm7
- vpshufb ymm7,ymm7,ymm14
- vpaddd ymm12,ymm12,ymm6
- vpxor ymm2,ymm12,ymm2
- vpslld ymm15,ymm2,7
- vpsrld ymm2,ymm2,25
- vpor ymm2,ymm15,ymm2
- vbroadcasti128 ymm15,XMMWORD[r10]
- vpaddd ymm13,ymm13,ymm7
- vpxor ymm3,ymm13,ymm3
- vpslld ymm14,ymm3,7
- vpsrld ymm3,ymm3,25
- vpor ymm3,ymm14,ymm3
- vpaddd ymm8,ymm8,ymm1
- vpxor ymm7,ymm8,ymm7
- vpshufb ymm7,ymm7,ymm15
- vpaddd ymm9,ymm9,ymm2
- vpxor ymm4,ymm9,ymm4
- vpshufb ymm4,ymm4,ymm15
- vpaddd ymm12,ymm12,ymm7
- vpxor ymm1,ymm12,ymm1
- vpslld ymm14,ymm1,12
- vpsrld ymm1,ymm1,20
- vpor ymm1,ymm14,ymm1
- vbroadcasti128 ymm14,XMMWORD[r11]
- vpaddd ymm13,ymm13,ymm4
- vpxor ymm2,ymm13,ymm2
- vpslld ymm15,ymm2,12
- vpsrld ymm2,ymm2,20
- vpor ymm2,ymm15,ymm2
- vpaddd ymm8,ymm8,ymm1
- vpxor ymm7,ymm8,ymm7
- vpshufb ymm7,ymm7,ymm14
- vpaddd ymm9,ymm9,ymm2
- vpxor ymm4,ymm9,ymm4
- vpshufb ymm4,ymm4,ymm14
- vpaddd ymm12,ymm12,ymm7
- vpxor ymm1,ymm12,ymm1
- vpslld ymm15,ymm1,7
- vpsrld ymm1,ymm1,25
- vpor ymm1,ymm15,ymm1
- vbroadcasti128 ymm15,XMMWORD[r10]
- vpaddd ymm13,ymm13,ymm4
- vpxor ymm2,ymm13,ymm2
- vpslld ymm14,ymm2,7
- vpsrld ymm2,ymm2,25
- vpor ymm2,ymm14,ymm2
- vmovdqa YMMWORD[64+rsp],ymm12
- vmovdqa YMMWORD[96+rsp],ymm13
- vmovdqa ymm12,YMMWORD[rsp]
- vmovdqa ymm13,YMMWORD[32+rsp]
- vpaddd ymm10,ymm10,ymm3
- vpxor ymm5,ymm10,ymm5
- vpshufb ymm5,ymm5,ymm15
- vpaddd ymm11,ymm11,ymm0
- vpxor ymm6,ymm11,ymm6
- vpshufb ymm6,ymm6,ymm15
- vpaddd ymm12,ymm12,ymm5
- vpxor ymm3,ymm12,ymm3
- vpslld ymm14,ymm3,12
- vpsrld ymm3,ymm3,20
- vpor ymm3,ymm14,ymm3
- vbroadcasti128 ymm14,XMMWORD[r11]
- vpaddd ymm13,ymm13,ymm6
- vpxor ymm0,ymm13,ymm0
- vpslld ymm15,ymm0,12
- vpsrld ymm0,ymm0,20
- vpor ymm0,ymm15,ymm0
- vpaddd ymm10,ymm10,ymm3
- vpxor ymm5,ymm10,ymm5
- vpshufb ymm5,ymm5,ymm14
- vpaddd ymm11,ymm11,ymm0
- vpxor ymm6,ymm11,ymm6
- vpshufb ymm6,ymm6,ymm14
- vpaddd ymm12,ymm12,ymm5
- vpxor ymm3,ymm12,ymm3
- vpslld ymm15,ymm3,7
- vpsrld ymm3,ymm3,25
- vpor ymm3,ymm15,ymm3
- vbroadcasti128 ymm15,XMMWORD[r10]
- vpaddd ymm13,ymm13,ymm6
- vpxor ymm0,ymm13,ymm0
- vpslld ymm14,ymm0,7
- vpsrld ymm0,ymm0,25
- vpor ymm0,ymm14,ymm0
- dec eax
- jnz NEAR $L$oop8x
- lea rax,[512+rsp]
- vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx]
- vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx]
- vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx]
- vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx]
- vpunpckldq ymm14,ymm8,ymm9
- vpunpckldq ymm15,ymm10,ymm11
- vpunpckhdq ymm8,ymm8,ymm9
- vpunpckhdq ymm10,ymm10,ymm11
- vpunpcklqdq ymm9,ymm14,ymm15
- vpunpckhqdq ymm14,ymm14,ymm15
- vpunpcklqdq ymm11,ymm8,ymm10
- vpunpckhqdq ymm8,ymm8,ymm10
- vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx]
- vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx]
- vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx]
- vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx]
- vpunpckldq ymm10,ymm0,ymm1
- vpunpckldq ymm15,ymm2,ymm3
- vpunpckhdq ymm0,ymm0,ymm1
- vpunpckhdq ymm2,ymm2,ymm3
- vpunpcklqdq ymm1,ymm10,ymm15
- vpunpckhqdq ymm10,ymm10,ymm15
- vpunpcklqdq ymm3,ymm0,ymm2
- vpunpckhqdq ymm0,ymm0,ymm2
- vperm2i128 ymm15,ymm9,ymm1,0x20
- vperm2i128 ymm1,ymm9,ymm1,0x31
- vperm2i128 ymm9,ymm14,ymm10,0x20
- vperm2i128 ymm10,ymm14,ymm10,0x31
- vperm2i128 ymm14,ymm11,ymm3,0x20
- vperm2i128 ymm3,ymm11,ymm3,0x31
- vperm2i128 ymm11,ymm8,ymm0,0x20
- vperm2i128 ymm0,ymm8,ymm0,0x31
- vmovdqa YMMWORD[rsp],ymm15
- vmovdqa YMMWORD[32+rsp],ymm9
- vmovdqa ymm15,YMMWORD[64+rsp]
- vmovdqa ymm9,YMMWORD[96+rsp]
- vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax]
- vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax]
- vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax]
- vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax]
- vpunpckldq ymm2,ymm12,ymm13
- vpunpckldq ymm8,ymm15,ymm9
- vpunpckhdq ymm12,ymm12,ymm13
- vpunpckhdq ymm15,ymm15,ymm9
- vpunpcklqdq ymm13,ymm2,ymm8
- vpunpckhqdq ymm2,ymm2,ymm8
- vpunpcklqdq ymm9,ymm12,ymm15
- vpunpckhqdq ymm12,ymm12,ymm15
- vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax]
- vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax]
- vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax]
- vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax]
- vpunpckldq ymm15,ymm4,ymm5
- vpunpckldq ymm8,ymm6,ymm7
- vpunpckhdq ymm4,ymm4,ymm5
- vpunpckhdq ymm6,ymm6,ymm7
- vpunpcklqdq ymm5,ymm15,ymm8
- vpunpckhqdq ymm15,ymm15,ymm8
- vpunpcklqdq ymm7,ymm4,ymm6
- vpunpckhqdq ymm4,ymm4,ymm6
- vperm2i128 ymm8,ymm13,ymm5,0x20
- vperm2i128 ymm5,ymm13,ymm5,0x31
- vperm2i128 ymm13,ymm2,ymm15,0x20
- vperm2i128 ymm15,ymm2,ymm15,0x31
- vperm2i128 ymm2,ymm9,ymm7,0x20
- vperm2i128 ymm7,ymm9,ymm7,0x31
- vperm2i128 ymm9,ymm12,ymm4,0x20
- vperm2i128 ymm4,ymm12,ymm4,0x31
- vmovdqa ymm6,YMMWORD[rsp]
- vmovdqa ymm12,YMMWORD[32+rsp]
- cmp rdx,64*8
- jb NEAR $L$tail8x
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- lea rsi,[128+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- lea rdi,[128+rdi]
- vpxor ymm12,ymm12,YMMWORD[rsi]
- vpxor ymm13,ymm13,YMMWORD[32+rsi]
- vpxor ymm10,ymm10,YMMWORD[64+rsi]
- vpxor ymm15,ymm15,YMMWORD[96+rsi]
- lea rsi,[128+rsi]
- vmovdqu YMMWORD[rdi],ymm12
- vmovdqu YMMWORD[32+rdi],ymm13
- vmovdqu YMMWORD[64+rdi],ymm10
- vmovdqu YMMWORD[96+rdi],ymm15
- lea rdi,[128+rdi]
- vpxor ymm14,ymm14,YMMWORD[rsi]
- vpxor ymm2,ymm2,YMMWORD[32+rsi]
- vpxor ymm3,ymm3,YMMWORD[64+rsi]
- vpxor ymm7,ymm7,YMMWORD[96+rsi]
- lea rsi,[128+rsi]
- vmovdqu YMMWORD[rdi],ymm14
- vmovdqu YMMWORD[32+rdi],ymm2
- vmovdqu YMMWORD[64+rdi],ymm3
- vmovdqu YMMWORD[96+rdi],ymm7
- lea rdi,[128+rdi]
- vpxor ymm11,ymm11,YMMWORD[rsi]
- vpxor ymm9,ymm9,YMMWORD[32+rsi]
- vpxor ymm0,ymm0,YMMWORD[64+rsi]
- vpxor ymm4,ymm4,YMMWORD[96+rsi]
- lea rsi,[128+rsi]
- vmovdqu YMMWORD[rdi],ymm11
- vmovdqu YMMWORD[32+rdi],ymm9
- vmovdqu YMMWORD[64+rdi],ymm0
- vmovdqu YMMWORD[96+rdi],ymm4
- lea rdi,[128+rdi]
- sub rdx,64*8
- jnz NEAR $L$oop_outer8x
- jmp NEAR $L$done8x
- $L$tail8x:
- cmp rdx,448
- jae NEAR $L$448_or_more8x
- cmp rdx,384
- jae NEAR $L$384_or_more8x
- cmp rdx,320
- jae NEAR $L$320_or_more8x
- cmp rdx,256
- jae NEAR $L$256_or_more8x
- cmp rdx,192
- jae NEAR $L$192_or_more8x
- cmp rdx,128
- jae NEAR $L$128_or_more8x
- cmp rdx,64
- jae NEAR $L$64_or_more8x
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm6
- vmovdqa YMMWORD[32+rsp],ymm8
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$64_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- je NEAR $L$done8x
- lea rsi,[64+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm1
- lea rdi,[64+rdi]
- sub rdx,64
- vmovdqa YMMWORD[32+rsp],ymm5
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$128_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- je NEAR $L$done8x
- lea rsi,[128+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm12
- lea rdi,[128+rdi]
- sub rdx,128
- vmovdqa YMMWORD[32+rsp],ymm13
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$192_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vpxor ymm12,ymm12,YMMWORD[128+rsi]
- vpxor ymm13,ymm13,YMMWORD[160+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- vmovdqu YMMWORD[128+rdi],ymm12
- vmovdqu YMMWORD[160+rdi],ymm13
- je NEAR $L$done8x
- lea rsi,[192+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm10
- lea rdi,[192+rdi]
- sub rdx,192
- vmovdqa YMMWORD[32+rsp],ymm15
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$256_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vpxor ymm12,ymm12,YMMWORD[128+rsi]
- vpxor ymm13,ymm13,YMMWORD[160+rsi]
- vpxor ymm10,ymm10,YMMWORD[192+rsi]
- vpxor ymm15,ymm15,YMMWORD[224+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- vmovdqu YMMWORD[128+rdi],ymm12
- vmovdqu YMMWORD[160+rdi],ymm13
- vmovdqu YMMWORD[192+rdi],ymm10
- vmovdqu YMMWORD[224+rdi],ymm15
- je NEAR $L$done8x
- lea rsi,[256+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm14
- lea rdi,[256+rdi]
- sub rdx,256
- vmovdqa YMMWORD[32+rsp],ymm2
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$320_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vpxor ymm12,ymm12,YMMWORD[128+rsi]
- vpxor ymm13,ymm13,YMMWORD[160+rsi]
- vpxor ymm10,ymm10,YMMWORD[192+rsi]
- vpxor ymm15,ymm15,YMMWORD[224+rsi]
- vpxor ymm14,ymm14,YMMWORD[256+rsi]
- vpxor ymm2,ymm2,YMMWORD[288+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- vmovdqu YMMWORD[128+rdi],ymm12
- vmovdqu YMMWORD[160+rdi],ymm13
- vmovdqu YMMWORD[192+rdi],ymm10
- vmovdqu YMMWORD[224+rdi],ymm15
- vmovdqu YMMWORD[256+rdi],ymm14
- vmovdqu YMMWORD[288+rdi],ymm2
- je NEAR $L$done8x
- lea rsi,[320+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm3
- lea rdi,[320+rdi]
- sub rdx,320
- vmovdqa YMMWORD[32+rsp],ymm7
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$384_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vpxor ymm12,ymm12,YMMWORD[128+rsi]
- vpxor ymm13,ymm13,YMMWORD[160+rsi]
- vpxor ymm10,ymm10,YMMWORD[192+rsi]
- vpxor ymm15,ymm15,YMMWORD[224+rsi]
- vpxor ymm14,ymm14,YMMWORD[256+rsi]
- vpxor ymm2,ymm2,YMMWORD[288+rsi]
- vpxor ymm3,ymm3,YMMWORD[320+rsi]
- vpxor ymm7,ymm7,YMMWORD[352+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- vmovdqu YMMWORD[128+rdi],ymm12
- vmovdqu YMMWORD[160+rdi],ymm13
- vmovdqu YMMWORD[192+rdi],ymm10
- vmovdqu YMMWORD[224+rdi],ymm15
- vmovdqu YMMWORD[256+rdi],ymm14
- vmovdqu YMMWORD[288+rdi],ymm2
- vmovdqu YMMWORD[320+rdi],ymm3
- vmovdqu YMMWORD[352+rdi],ymm7
- je NEAR $L$done8x
- lea rsi,[384+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm11
- lea rdi,[384+rdi]
- sub rdx,384
- vmovdqa YMMWORD[32+rsp],ymm9
- jmp NEAR $L$oop_tail8x
- ALIGN 32
- $L$448_or_more8x:
- vpxor ymm6,ymm6,YMMWORD[rsi]
- vpxor ymm8,ymm8,YMMWORD[32+rsi]
- vpxor ymm1,ymm1,YMMWORD[64+rsi]
- vpxor ymm5,ymm5,YMMWORD[96+rsi]
- vpxor ymm12,ymm12,YMMWORD[128+rsi]
- vpxor ymm13,ymm13,YMMWORD[160+rsi]
- vpxor ymm10,ymm10,YMMWORD[192+rsi]
- vpxor ymm15,ymm15,YMMWORD[224+rsi]
- vpxor ymm14,ymm14,YMMWORD[256+rsi]
- vpxor ymm2,ymm2,YMMWORD[288+rsi]
- vpxor ymm3,ymm3,YMMWORD[320+rsi]
- vpxor ymm7,ymm7,YMMWORD[352+rsi]
- vpxor ymm11,ymm11,YMMWORD[384+rsi]
- vpxor ymm9,ymm9,YMMWORD[416+rsi]
- vmovdqu YMMWORD[rdi],ymm6
- vmovdqu YMMWORD[32+rdi],ymm8
- vmovdqu YMMWORD[64+rdi],ymm1
- vmovdqu YMMWORD[96+rdi],ymm5
- vmovdqu YMMWORD[128+rdi],ymm12
- vmovdqu YMMWORD[160+rdi],ymm13
- vmovdqu YMMWORD[192+rdi],ymm10
- vmovdqu YMMWORD[224+rdi],ymm15
- vmovdqu YMMWORD[256+rdi],ymm14
- vmovdqu YMMWORD[288+rdi],ymm2
- vmovdqu YMMWORD[320+rdi],ymm3
- vmovdqu YMMWORD[352+rdi],ymm7
- vmovdqu YMMWORD[384+rdi],ymm11
- vmovdqu YMMWORD[416+rdi],ymm9
- je NEAR $L$done8x
- lea rsi,[448+rsi]
- xor r10,r10
- vmovdqa YMMWORD[rsp],ymm0
- lea rdi,[448+rdi]
- sub rdx,448
- vmovdqa YMMWORD[32+rsp],ymm4
- $L$oop_tail8x:
- movzx eax,BYTE[r10*1+rsi]
- movzx ecx,BYTE[r10*1+rsp]
- lea r10,[1+r10]
- xor eax,ecx
- mov BYTE[((-1))+r10*1+rdi],al
- dec rdx
- jnz NEAR $L$oop_tail8x
- $L$done8x:
- vzeroall
- movaps xmm6,XMMWORD[((-168))+r9]
- movaps xmm7,XMMWORD[((-152))+r9]
- movaps xmm8,XMMWORD[((-136))+r9]
- movaps xmm9,XMMWORD[((-120))+r9]
- movaps xmm10,XMMWORD[((-104))+r9]
- movaps xmm11,XMMWORD[((-88))+r9]
- movaps xmm12,XMMWORD[((-72))+r9]
- movaps xmm13,XMMWORD[((-56))+r9]
- movaps xmm14,XMMWORD[((-40))+r9]
- movaps xmm15,XMMWORD[((-24))+r9]
- lea rsp,[r9]
- $L$8x_epilogue:
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
- $L$SEH_end_ChaCha20_8x:
- EXTERN __imp_RtlVirtualUnwind
- ALIGN 16
- se_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
- mov rax,QWORD[120+r8]
- mov rbx,QWORD[248+r8]
- mov rsi,QWORD[8+r9]
- mov r11,QWORD[56+r9]
- lea r10,[$L$ctr32_body]
- cmp rbx,r10
- jb NEAR $L$common_seh_tail
- mov rax,QWORD[152+r8]
- lea r10,[$L$no_data]
- cmp rbx,r10
- jae NEAR $L$common_seh_tail
- lea rax,[((64+24+48))+rax]
- mov rbx,QWORD[((-8))+rax]
- mov rbp,QWORD[((-16))+rax]
- mov r12,QWORD[((-24))+rax]
- mov r13,QWORD[((-32))+rax]
- mov r14,QWORD[((-40))+rax]
- mov r15,QWORD[((-48))+rax]
- mov QWORD[144+r8],rbx
- mov QWORD[160+r8],rbp
- mov QWORD[216+r8],r12
- mov QWORD[224+r8],r13
- mov QWORD[232+r8],r14
- mov QWORD[240+r8],r15
- $L$common_seh_tail:
- mov rdi,QWORD[8+rax]
- mov rsi,QWORD[16+rax]
- mov QWORD[152+r8],rax
- mov QWORD[168+r8],rsi
- mov QWORD[176+r8],rdi
- mov rdi,QWORD[40+r9]
- mov rsi,r8
- mov ecx,154
- DD 0xa548f3fc
- mov rsi,r9
- xor rcx,rcx
- mov rdx,QWORD[8+rsi]
- mov r8,QWORD[rsi]
- mov r9,QWORD[16+rsi]
- mov r10,QWORD[40+rsi]
- lea r11,[56+rsi]
- lea r12,[24+rsi]
- mov QWORD[32+rsp],r10
- mov QWORD[40+rsp],r11
- mov QWORD[48+rsp],r12
- mov QWORD[56+rsp],rcx
- call QWORD[__imp_RtlVirtualUnwind]
- mov eax,1
- add rsp,64
- popfq
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- pop rdi
- pop rsi
- DB 0F3h,0C3h ;repret
- ALIGN 16
- ssse3_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
- mov rax,QWORD[120+r8]
- mov rbx,QWORD[248+r8]
- mov rsi,QWORD[8+r9]
- mov r11,QWORD[56+r9]
- mov r10d,DWORD[r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jb NEAR $L$common_seh_tail
- mov rax,QWORD[192+r8]
- mov r10d,DWORD[4+r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jae NEAR $L$common_seh_tail
- lea rsi,[((-40))+rax]
- lea rdi,[512+r8]
- mov ecx,4
- DD 0xa548f3fc
- jmp NEAR $L$common_seh_tail
- ALIGN 16
- full_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
- mov rax,QWORD[120+r8]
- mov rbx,QWORD[248+r8]
- mov rsi,QWORD[8+r9]
- mov r11,QWORD[56+r9]
- mov r10d,DWORD[r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jb NEAR $L$common_seh_tail
- mov rax,QWORD[192+r8]
- mov r10d,DWORD[4+r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jae NEAR $L$common_seh_tail
- lea rsi,[((-168))+rax]
- lea rdi,[512+r8]
- mov ecx,20
- DD 0xa548f3fc
- jmp NEAR $L$common_seh_tail
- section .pdata rdata align=4
- ALIGN 4
- DD $L$SEH_begin_GFp_ChaCha20_ctr32 wrt ..imagebase
- DD $L$SEH_end_GFp_ChaCha20_ctr32 wrt ..imagebase
- DD $L$SEH_info_GFp_ChaCha20_ctr32 wrt ..imagebase
- DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase
- DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase
- DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase
- DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase
- DD $L$SEH_end_ChaCha20_4x wrt ..imagebase
- DD $L$SEH_info_ChaCha20_4x wrt ..imagebase
- DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase
- DD $L$SEH_end_ChaCha20_8x wrt ..imagebase
- DD $L$SEH_info_ChaCha20_8x wrt ..imagebase
- section .xdata rdata align=8
- ALIGN 8
- $L$SEH_info_GFp_ChaCha20_ctr32:
- DB 9,0,0,0
- DD se_handler wrt ..imagebase
- $L$SEH_info_ChaCha20_ssse3:
- DB 9,0,0,0
- DD ssse3_handler wrt ..imagebase
- DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase
- $L$SEH_info_ChaCha20_4x:
- DB 9,0,0,0
- DD full_handler wrt ..imagebase
- DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase
- $L$SEH_info_ChaCha20_8x:
- DB 9,0,0,0
- DD full_handler wrt ..imagebase
- DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase
|