p256-x86_64-asm-nasm.asm 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037
  1. ; This file is generated from a similarly-named Perl script in the BoringSSL
  2. ; source tree. Do not edit by hand.
  3. default rel
  4. %define XMMWORD
  5. %define YMMWORD
  6. %define ZMMWORD
  7. section .text code align=64
  8. EXTERN GFp_ia32cap_P
  9. ALIGN 64
  10. $L$poly:
  11. DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
  12. $L$One:
  13. DD 1,1,1,1,1,1,1,1
  14. $L$Two:
  15. DD 2,2,2,2,2,2,2,2
  16. $L$Three:
  17. DD 3,3,3,3,3,3,3,3
  18. $L$ONE_mont:
  19. DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
  20. $L$ord:
  21. DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
  22. $L$ordK:
  23. DQ 0xccd1c8aaee00bc4f
  24. global GFp_nistz256_add
  25. ALIGN 32
  26. GFp_nistz256_add:
  27. mov QWORD[8+rsp],rdi ;WIN64 prologue
  28. mov QWORD[16+rsp],rsi
  29. mov rax,rsp
  30. $L$SEH_begin_GFp_nistz256_add:
  31. mov rdi,rcx
  32. mov rsi,rdx
  33. mov rdx,r8
  34. push r12
  35. push r13
  36. mov r8,QWORD[rsi]
  37. xor r13,r13
  38. mov r9,QWORD[8+rsi]
  39. mov r10,QWORD[16+rsi]
  40. mov r11,QWORD[24+rsi]
  41. lea rsi,[$L$poly]
  42. add r8,QWORD[rdx]
  43. adc r9,QWORD[8+rdx]
  44. mov rax,r8
  45. adc r10,QWORD[16+rdx]
  46. adc r11,QWORD[24+rdx]
  47. mov rdx,r9
  48. adc r13,0
  49. sub r8,QWORD[rsi]
  50. mov rcx,r10
  51. sbb r9,QWORD[8+rsi]
  52. sbb r10,QWORD[16+rsi]
  53. mov r12,r11
  54. sbb r11,QWORD[24+rsi]
  55. sbb r13,0
  56. cmovc r8,rax
  57. cmovc r9,rdx
  58. mov QWORD[rdi],r8
  59. cmovc r10,rcx
  60. mov QWORD[8+rdi],r9
  61. cmovc r11,r12
  62. mov QWORD[16+rdi],r10
  63. mov QWORD[24+rdi],r11
  64. pop r13
  65. pop r12
  66. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  67. mov rsi,QWORD[16+rsp]
  68. DB 0F3h,0C3h ;repret
  69. $L$SEH_end_GFp_nistz256_add:
  70. global GFp_nistz256_neg
  71. ALIGN 32
  72. GFp_nistz256_neg:
  73. mov QWORD[8+rsp],rdi ;WIN64 prologue
  74. mov QWORD[16+rsp],rsi
  75. mov rax,rsp
  76. $L$SEH_begin_GFp_nistz256_neg:
  77. mov rdi,rcx
  78. mov rsi,rdx
  79. push r12
  80. push r13
  81. $L$neg_body:
  82. xor r8,r8
  83. xor r9,r9
  84. xor r10,r10
  85. xor r11,r11
  86. xor r13,r13
  87. sub r8,QWORD[rsi]
  88. sbb r9,QWORD[8+rsi]
  89. sbb r10,QWORD[16+rsi]
  90. mov rax,r8
  91. sbb r11,QWORD[24+rsi]
  92. lea rsi,[$L$poly]
  93. mov rdx,r9
  94. sbb r13,0
  95. add r8,QWORD[rsi]
  96. mov rcx,r10
  97. adc r9,QWORD[8+rsi]
  98. adc r10,QWORD[16+rsi]
  99. mov r12,r11
  100. adc r11,QWORD[24+rsi]
  101. test r13,r13
  102. cmovz r8,rax
  103. cmovz r9,rdx
  104. mov QWORD[rdi],r8
  105. cmovz r10,rcx
  106. mov QWORD[8+rdi],r9
  107. cmovz r11,r12
  108. mov QWORD[16+rdi],r10
  109. mov QWORD[24+rdi],r11
  110. mov r13,QWORD[rsp]
  111. mov r12,QWORD[8+rsp]
  112. lea rsp,[16+rsp]
  113. $L$neg_epilogue:
  114. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  115. mov rsi,QWORD[16+rsp]
  116. DB 0F3h,0C3h ;repret
  117. $L$SEH_end_GFp_nistz256_neg:
  118. global GFp_p256_scalar_mul_mont
  119. ALIGN 32
  120. GFp_p256_scalar_mul_mont:
  121. mov QWORD[8+rsp],rdi ;WIN64 prologue
  122. mov QWORD[16+rsp],rsi
  123. mov rax,rsp
  124. $L$SEH_begin_GFp_p256_scalar_mul_mont:
  125. mov rdi,rcx
  126. mov rsi,rdx
  127. mov rdx,r8
  128. lea rcx,[GFp_ia32cap_P]
  129. mov rcx,QWORD[8+rcx]
  130. and ecx,0x80100
  131. cmp ecx,0x80100
  132. je NEAR $L$ecp_nistz256_ord_mul_montx
  133. push rbp
  134. push rbx
  135. push r12
  136. push r13
  137. push r14
  138. push r15
  139. $L$ord_mul_body:
  140. mov rax,QWORD[rdx]
  141. mov rbx,rdx
  142. lea r14,[$L$ord]
  143. mov r15,QWORD[$L$ordK]
  144. mov rcx,rax
  145. mul QWORD[rsi]
  146. mov r8,rax
  147. mov rax,rcx
  148. mov r9,rdx
  149. mul QWORD[8+rsi]
  150. add r9,rax
  151. mov rax,rcx
  152. adc rdx,0
  153. mov r10,rdx
  154. mul QWORD[16+rsi]
  155. add r10,rax
  156. mov rax,rcx
  157. adc rdx,0
  158. mov r13,r8
  159. imul r8,r15
  160. mov r11,rdx
  161. mul QWORD[24+rsi]
  162. add r11,rax
  163. mov rax,r8
  164. adc rdx,0
  165. mov r12,rdx
  166. mul QWORD[r14]
  167. mov rbp,r8
  168. add r13,rax
  169. mov rax,r8
  170. adc rdx,0
  171. mov rcx,rdx
  172. sub r10,r8
  173. sbb r8,0
  174. mul QWORD[8+r14]
  175. add r9,rcx
  176. adc rdx,0
  177. add r9,rax
  178. mov rax,rbp
  179. adc r10,rdx
  180. mov rdx,rbp
  181. adc r8,0
  182. shl rax,32
  183. shr rdx,32
  184. sub r11,rax
  185. mov rax,QWORD[8+rbx]
  186. sbb rbp,rdx
  187. add r11,r8
  188. adc r12,rbp
  189. adc r13,0
  190. mov rcx,rax
  191. mul QWORD[rsi]
  192. add r9,rax
  193. mov rax,rcx
  194. adc rdx,0
  195. mov rbp,rdx
  196. mul QWORD[8+rsi]
  197. add r10,rbp
  198. adc rdx,0
  199. add r10,rax
  200. mov rax,rcx
  201. adc rdx,0
  202. mov rbp,rdx
  203. mul QWORD[16+rsi]
  204. add r11,rbp
  205. adc rdx,0
  206. add r11,rax
  207. mov rax,rcx
  208. adc rdx,0
  209. mov rcx,r9
  210. imul r9,r15
  211. mov rbp,rdx
  212. mul QWORD[24+rsi]
  213. add r12,rbp
  214. adc rdx,0
  215. xor r8,r8
  216. add r12,rax
  217. mov rax,r9
  218. adc r13,rdx
  219. adc r8,0
  220. mul QWORD[r14]
  221. mov rbp,r9
  222. add rcx,rax
  223. mov rax,r9
  224. adc rcx,rdx
  225. sub r11,r9
  226. sbb r9,0
  227. mul QWORD[8+r14]
  228. add r10,rcx
  229. adc rdx,0
  230. add r10,rax
  231. mov rax,rbp
  232. adc r11,rdx
  233. mov rdx,rbp
  234. adc r9,0
  235. shl rax,32
  236. shr rdx,32
  237. sub r12,rax
  238. mov rax,QWORD[16+rbx]
  239. sbb rbp,rdx
  240. add r12,r9
  241. adc r13,rbp
  242. adc r8,0
  243. mov rcx,rax
  244. mul QWORD[rsi]
  245. add r10,rax
  246. mov rax,rcx
  247. adc rdx,0
  248. mov rbp,rdx
  249. mul QWORD[8+rsi]
  250. add r11,rbp
  251. adc rdx,0
  252. add r11,rax
  253. mov rax,rcx
  254. adc rdx,0
  255. mov rbp,rdx
  256. mul QWORD[16+rsi]
  257. add r12,rbp
  258. adc rdx,0
  259. add r12,rax
  260. mov rax,rcx
  261. adc rdx,0
  262. mov rcx,r10
  263. imul r10,r15
  264. mov rbp,rdx
  265. mul QWORD[24+rsi]
  266. add r13,rbp
  267. adc rdx,0
  268. xor r9,r9
  269. add r13,rax
  270. mov rax,r10
  271. adc r8,rdx
  272. adc r9,0
  273. mul QWORD[r14]
  274. mov rbp,r10
  275. add rcx,rax
  276. mov rax,r10
  277. adc rcx,rdx
  278. sub r12,r10
  279. sbb r10,0
  280. mul QWORD[8+r14]
  281. add r11,rcx
  282. adc rdx,0
  283. add r11,rax
  284. mov rax,rbp
  285. adc r12,rdx
  286. mov rdx,rbp
  287. adc r10,0
  288. shl rax,32
  289. shr rdx,32
  290. sub r13,rax
  291. mov rax,QWORD[24+rbx]
  292. sbb rbp,rdx
  293. add r13,r10
  294. adc r8,rbp
  295. adc r9,0
  296. mov rcx,rax
  297. mul QWORD[rsi]
  298. add r11,rax
  299. mov rax,rcx
  300. adc rdx,0
  301. mov rbp,rdx
  302. mul QWORD[8+rsi]
  303. add r12,rbp
  304. adc rdx,0
  305. add r12,rax
  306. mov rax,rcx
  307. adc rdx,0
  308. mov rbp,rdx
  309. mul QWORD[16+rsi]
  310. add r13,rbp
  311. adc rdx,0
  312. add r13,rax
  313. mov rax,rcx
  314. adc rdx,0
  315. mov rcx,r11
  316. imul r11,r15
  317. mov rbp,rdx
  318. mul QWORD[24+rsi]
  319. add r8,rbp
  320. adc rdx,0
  321. xor r10,r10
  322. add r8,rax
  323. mov rax,r11
  324. adc r9,rdx
  325. adc r10,0
  326. mul QWORD[r14]
  327. mov rbp,r11
  328. add rcx,rax
  329. mov rax,r11
  330. adc rcx,rdx
  331. sub r13,r11
  332. sbb r11,0
  333. mul QWORD[8+r14]
  334. add r12,rcx
  335. adc rdx,0
  336. add r12,rax
  337. mov rax,rbp
  338. adc r13,rdx
  339. mov rdx,rbp
  340. adc r11,0
  341. shl rax,32
  342. shr rdx,32
  343. sub r8,rax
  344. sbb rbp,rdx
  345. add r8,r11
  346. adc r9,rbp
  347. adc r10,0
  348. mov rsi,r12
  349. sub r12,QWORD[r14]
  350. mov r11,r13
  351. sbb r13,QWORD[8+r14]
  352. mov rcx,r8
  353. sbb r8,QWORD[16+r14]
  354. mov rbp,r9
  355. sbb r9,QWORD[24+r14]
  356. sbb r10,0
  357. cmovc r12,rsi
  358. cmovc r13,r11
  359. cmovc r8,rcx
  360. cmovc r9,rbp
  361. mov QWORD[rdi],r12
  362. mov QWORD[8+rdi],r13
  363. mov QWORD[16+rdi],r8
  364. mov QWORD[24+rdi],r9
  365. mov r15,QWORD[rsp]
  366. mov r14,QWORD[8+rsp]
  367. mov r13,QWORD[16+rsp]
  368. mov r12,QWORD[24+rsp]
  369. mov rbx,QWORD[32+rsp]
  370. mov rbp,QWORD[40+rsp]
  371. lea rsp,[48+rsp]
  372. $L$ord_mul_epilogue:
  373. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  374. mov rsi,QWORD[16+rsp]
  375. DB 0F3h,0C3h ;repret
  376. $L$SEH_end_GFp_p256_scalar_mul_mont:
  377. global GFp_p256_scalar_sqr_rep_mont
  378. ALIGN 32
  379. GFp_p256_scalar_sqr_rep_mont:
  380. mov QWORD[8+rsp],rdi ;WIN64 prologue
  381. mov QWORD[16+rsp],rsi
  382. mov rax,rsp
  383. $L$SEH_begin_GFp_p256_scalar_sqr_rep_mont:
  384. mov rdi,rcx
  385. mov rsi,rdx
  386. mov rdx,r8
  387. lea rcx,[GFp_ia32cap_P]
  388. mov rcx,QWORD[8+rcx]
  389. and ecx,0x80100
  390. cmp ecx,0x80100
  391. je NEAR $L$ecp_nistz256_ord_sqr_montx
  392. push rbp
  393. push rbx
  394. push r12
  395. push r13
  396. push r14
  397. push r15
  398. $L$ord_sqr_body:
  399. mov r8,QWORD[rsi]
  400. mov rax,QWORD[8+rsi]
  401. mov r14,QWORD[16+rsi]
  402. mov r15,QWORD[24+rsi]
  403. lea rsi,[$L$ord]
  404. mov rbx,rdx
  405. jmp NEAR $L$oop_ord_sqr
  406. ALIGN 32
  407. $L$oop_ord_sqr:
  408. mov rbp,rax
  409. mul r8
  410. mov r9,rax
  411. DB 102,72,15,110,205
  412. mov rax,r14
  413. mov r10,rdx
  414. mul r8
  415. add r10,rax
  416. mov rax,r15
  417. DB 102,73,15,110,214
  418. adc rdx,0
  419. mov r11,rdx
  420. mul r8
  421. add r11,rax
  422. mov rax,r15
  423. DB 102,73,15,110,223
  424. adc rdx,0
  425. mov r12,rdx
  426. mul r14
  427. mov r13,rax
  428. mov rax,r14
  429. mov r14,rdx
  430. mul rbp
  431. add r11,rax
  432. mov rax,r15
  433. adc rdx,0
  434. mov r15,rdx
  435. mul rbp
  436. add r12,rax
  437. adc rdx,0
  438. add r12,r15
  439. adc r13,rdx
  440. adc r14,0
  441. xor r15,r15
  442. mov rax,r8
  443. add r9,r9
  444. adc r10,r10
  445. adc r11,r11
  446. adc r12,r12
  447. adc r13,r13
  448. adc r14,r14
  449. adc r15,0
  450. mul rax
  451. mov r8,rax
  452. DB 102,72,15,126,200
  453. mov rbp,rdx
  454. mul rax
  455. add r9,rbp
  456. adc r10,rax
  457. DB 102,72,15,126,208
  458. adc rdx,0
  459. mov rbp,rdx
  460. mul rax
  461. add r11,rbp
  462. adc r12,rax
  463. DB 102,72,15,126,216
  464. adc rdx,0
  465. mov rbp,rdx
  466. mov rcx,r8
  467. imul r8,QWORD[32+rsi]
  468. mul rax
  469. add r13,rbp
  470. adc r14,rax
  471. mov rax,QWORD[rsi]
  472. adc r15,rdx
  473. mul r8
  474. mov rbp,r8
  475. add rcx,rax
  476. mov rax,QWORD[8+rsi]
  477. adc rcx,rdx
  478. sub r10,r8
  479. sbb rbp,0
  480. mul r8
  481. add r9,rcx
  482. adc rdx,0
  483. add r9,rax
  484. mov rax,r8
  485. adc r10,rdx
  486. mov rdx,r8
  487. adc rbp,0
  488. mov rcx,r9
  489. imul r9,QWORD[32+rsi]
  490. shl rax,32
  491. shr rdx,32
  492. sub r11,rax
  493. mov rax,QWORD[rsi]
  494. sbb r8,rdx
  495. add r11,rbp
  496. adc r8,0
  497. mul r9
  498. mov rbp,r9
  499. add rcx,rax
  500. mov rax,QWORD[8+rsi]
  501. adc rcx,rdx
  502. sub r11,r9
  503. sbb rbp,0
  504. mul r9
  505. add r10,rcx
  506. adc rdx,0
  507. add r10,rax
  508. mov rax,r9
  509. adc r11,rdx
  510. mov rdx,r9
  511. adc rbp,0
  512. mov rcx,r10
  513. imul r10,QWORD[32+rsi]
  514. shl rax,32
  515. shr rdx,32
  516. sub r8,rax
  517. mov rax,QWORD[rsi]
  518. sbb r9,rdx
  519. add r8,rbp
  520. adc r9,0
  521. mul r10
  522. mov rbp,r10
  523. add rcx,rax
  524. mov rax,QWORD[8+rsi]
  525. adc rcx,rdx
  526. sub r8,r10
  527. sbb rbp,0
  528. mul r10
  529. add r11,rcx
  530. adc rdx,0
  531. add r11,rax
  532. mov rax,r10
  533. adc r8,rdx
  534. mov rdx,r10
  535. adc rbp,0
  536. mov rcx,r11
  537. imul r11,QWORD[32+rsi]
  538. shl rax,32
  539. shr rdx,32
  540. sub r9,rax
  541. mov rax,QWORD[rsi]
  542. sbb r10,rdx
  543. add r9,rbp
  544. adc r10,0
  545. mul r11
  546. mov rbp,r11
  547. add rcx,rax
  548. mov rax,QWORD[8+rsi]
  549. adc rcx,rdx
  550. sub r9,r11
  551. sbb rbp,0
  552. mul r11
  553. add r8,rcx
  554. adc rdx,0
  555. add r8,rax
  556. mov rax,r11
  557. adc r9,rdx
  558. mov rdx,r11
  559. adc rbp,0
  560. shl rax,32
  561. shr rdx,32
  562. sub r10,rax
  563. sbb r11,rdx
  564. add r10,rbp
  565. adc r11,0
  566. xor rdx,rdx
  567. add r8,r12
  568. adc r9,r13
  569. mov r12,r8
  570. adc r10,r14
  571. adc r11,r15
  572. mov rax,r9
  573. adc rdx,0
  574. sub r8,QWORD[rsi]
  575. mov r14,r10
  576. sbb r9,QWORD[8+rsi]
  577. sbb r10,QWORD[16+rsi]
  578. mov r15,r11
  579. sbb r11,QWORD[24+rsi]
  580. sbb rdx,0
  581. cmovc r8,r12
  582. cmovnc rax,r9
  583. cmovnc r14,r10
  584. cmovnc r15,r11
  585. dec rbx
  586. jnz NEAR $L$oop_ord_sqr
  587. mov QWORD[rdi],r8
  588. mov QWORD[8+rdi],rax
  589. pxor xmm1,xmm1
  590. mov QWORD[16+rdi],r14
  591. pxor xmm2,xmm2
  592. mov QWORD[24+rdi],r15
  593. pxor xmm3,xmm3
  594. mov r15,QWORD[rsp]
  595. mov r14,QWORD[8+rsp]
  596. mov r13,QWORD[16+rsp]
  597. mov r12,QWORD[24+rsp]
  598. mov rbx,QWORD[32+rsp]
  599. mov rbp,QWORD[40+rsp]
  600. lea rsp,[48+rsp]
  601. $L$ord_sqr_epilogue:
  602. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  603. mov rsi,QWORD[16+rsp]
  604. DB 0F3h,0C3h ;repret
  605. $L$SEH_end_GFp_p256_scalar_sqr_rep_mont:
  606. ALIGN 32
  607. ecp_nistz256_ord_mul_montx:
  608. mov QWORD[8+rsp],rdi ;WIN64 prologue
  609. mov QWORD[16+rsp],rsi
  610. mov rax,rsp
  611. $L$SEH_begin_ecp_nistz256_ord_mul_montx:
  612. mov rdi,rcx
  613. mov rsi,rdx
  614. mov rdx,r8
  615. $L$ecp_nistz256_ord_mul_montx:
  616. push rbp
  617. push rbx
  618. push r12
  619. push r13
  620. push r14
  621. push r15
  622. $L$ord_mulx_body:
  623. mov rbx,rdx
  624. mov rdx,QWORD[rdx]
  625. mov r9,QWORD[rsi]
  626. mov r10,QWORD[8+rsi]
  627. mov r11,QWORD[16+rsi]
  628. mov r12,QWORD[24+rsi]
  629. lea rsi,[((-128))+rsi]
  630. lea r14,[(($L$ord-128))]
  631. mov r15,QWORD[$L$ordK]
  632. mulx r9,r8,r9
  633. mulx r10,rcx,r10
  634. mulx r11,rbp,r11
  635. add r9,rcx
  636. mulx r12,rcx,r12
  637. mov rdx,r8
  638. mulx rax,rdx,r15
  639. adc r10,rbp
  640. adc r11,rcx
  641. adc r12,0
  642. xor r13,r13
  643. mulx rbp,rcx,QWORD[((0+128))+r14]
  644. adcx r8,rcx
  645. adox r9,rbp
  646. mulx rbp,rcx,QWORD[((8+128))+r14]
  647. adcx r9,rcx
  648. adox r10,rbp
  649. mulx rbp,rcx,QWORD[((16+128))+r14]
  650. adcx r10,rcx
  651. adox r11,rbp
  652. mulx rbp,rcx,QWORD[((24+128))+r14]
  653. mov rdx,QWORD[8+rbx]
  654. adcx r11,rcx
  655. adox r12,rbp
  656. adcx r12,r8
  657. adox r13,r8
  658. adc r13,0
  659. mulx rbp,rcx,QWORD[((0+128))+rsi]
  660. adcx r9,rcx
  661. adox r10,rbp
  662. mulx rbp,rcx,QWORD[((8+128))+rsi]
  663. adcx r10,rcx
  664. adox r11,rbp
  665. mulx rbp,rcx,QWORD[((16+128))+rsi]
  666. adcx r11,rcx
  667. adox r12,rbp
  668. mulx rbp,rcx,QWORD[((24+128))+rsi]
  669. mov rdx,r9
  670. mulx rax,rdx,r15
  671. adcx r12,rcx
  672. adox r13,rbp
  673. adcx r13,r8
  674. adox r8,r8
  675. adc r8,0
  676. mulx rbp,rcx,QWORD[((0+128))+r14]
  677. adcx r9,rcx
  678. adox r10,rbp
  679. mulx rbp,rcx,QWORD[((8+128))+r14]
  680. adcx r10,rcx
  681. adox r11,rbp
  682. mulx rbp,rcx,QWORD[((16+128))+r14]
  683. adcx r11,rcx
  684. adox r12,rbp
  685. mulx rbp,rcx,QWORD[((24+128))+r14]
  686. mov rdx,QWORD[16+rbx]
  687. adcx r12,rcx
  688. adox r13,rbp
  689. adcx r13,r9
  690. adox r8,r9
  691. adc r8,0
  692. mulx rbp,rcx,QWORD[((0+128))+rsi]
  693. adcx r10,rcx
  694. adox r11,rbp
  695. mulx rbp,rcx,QWORD[((8+128))+rsi]
  696. adcx r11,rcx
  697. adox r12,rbp
  698. mulx rbp,rcx,QWORD[((16+128))+rsi]
  699. adcx r12,rcx
  700. adox r13,rbp
  701. mulx rbp,rcx,QWORD[((24+128))+rsi]
  702. mov rdx,r10
  703. mulx rax,rdx,r15
  704. adcx r13,rcx
  705. adox r8,rbp
  706. adcx r8,r9
  707. adox r9,r9
  708. adc r9,0
  709. mulx rbp,rcx,QWORD[((0+128))+r14]
  710. adcx r10,rcx
  711. adox r11,rbp
  712. mulx rbp,rcx,QWORD[((8+128))+r14]
  713. adcx r11,rcx
  714. adox r12,rbp
  715. mulx rbp,rcx,QWORD[((16+128))+r14]
  716. adcx r12,rcx
  717. adox r13,rbp
  718. mulx rbp,rcx,QWORD[((24+128))+r14]
  719. mov rdx,QWORD[24+rbx]
  720. adcx r13,rcx
  721. adox r8,rbp
  722. adcx r8,r10
  723. adox r9,r10
  724. adc r9,0
  725. mulx rbp,rcx,QWORD[((0+128))+rsi]
  726. adcx r11,rcx
  727. adox r12,rbp
  728. mulx rbp,rcx,QWORD[((8+128))+rsi]
  729. adcx r12,rcx
  730. adox r13,rbp
  731. mulx rbp,rcx,QWORD[((16+128))+rsi]
  732. adcx r13,rcx
  733. adox r8,rbp
  734. mulx rbp,rcx,QWORD[((24+128))+rsi]
  735. mov rdx,r11
  736. mulx rax,rdx,r15
  737. adcx r8,rcx
  738. adox r9,rbp
  739. adcx r9,r10
  740. adox r10,r10
  741. adc r10,0
  742. mulx rbp,rcx,QWORD[((0+128))+r14]
  743. adcx r11,rcx
  744. adox r12,rbp
  745. mulx rbp,rcx,QWORD[((8+128))+r14]
  746. adcx r12,rcx
  747. adox r13,rbp
  748. mulx rbp,rcx,QWORD[((16+128))+r14]
  749. adcx r13,rcx
  750. adox r8,rbp
  751. mulx rbp,rcx,QWORD[((24+128))+r14]
  752. lea r14,[128+r14]
  753. mov rbx,r12
  754. adcx r8,rcx
  755. adox r9,rbp
  756. mov rdx,r13
  757. adcx r9,r11
  758. adox r10,r11
  759. adc r10,0
  760. mov rcx,r8
  761. sub r12,QWORD[r14]
  762. sbb r13,QWORD[8+r14]
  763. sbb r8,QWORD[16+r14]
  764. mov rbp,r9
  765. sbb r9,QWORD[24+r14]
  766. sbb r10,0
  767. cmovc r12,rbx
  768. cmovc r13,rdx
  769. cmovc r8,rcx
  770. cmovc r9,rbp
  771. mov QWORD[rdi],r12
  772. mov QWORD[8+rdi],r13
  773. mov QWORD[16+rdi],r8
  774. mov QWORD[24+rdi],r9
  775. mov r15,QWORD[rsp]
  776. mov r14,QWORD[8+rsp]
  777. mov r13,QWORD[16+rsp]
  778. mov r12,QWORD[24+rsp]
  779. mov rbx,QWORD[32+rsp]
  780. mov rbp,QWORD[40+rsp]
  781. lea rsp,[48+rsp]
  782. $L$ord_mulx_epilogue:
  783. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  784. mov rsi,QWORD[16+rsp]
  785. DB 0F3h,0C3h ;repret
  786. $L$SEH_end_ecp_nistz256_ord_mul_montx:
  787. ALIGN 32
  788. ecp_nistz256_ord_sqr_montx:
  789. mov QWORD[8+rsp],rdi ;WIN64 prologue
  790. mov QWORD[16+rsp],rsi
  791. mov rax,rsp
  792. $L$SEH_begin_ecp_nistz256_ord_sqr_montx:
  793. mov rdi,rcx
  794. mov rsi,rdx
  795. mov rdx,r8
  796. $L$ecp_nistz256_ord_sqr_montx:
  797. push rbp
  798. push rbx
  799. push r12
  800. push r13
  801. push r14
  802. push r15
  803. $L$ord_sqrx_body:
  804. mov rbx,rdx
  805. mov rdx,QWORD[rsi]
  806. mov r14,QWORD[8+rsi]
  807. mov r15,QWORD[16+rsi]
  808. mov r8,QWORD[24+rsi]
  809. lea rsi,[$L$ord]
  810. jmp NEAR $L$oop_ord_sqrx
  811. ALIGN 32
  812. $L$oop_ord_sqrx:
  813. mulx r10,r9,r14
  814. mulx r11,rcx,r15
  815. mov rax,rdx
  816. DB 102,73,15,110,206
  817. mulx r12,rbp,r8
  818. mov rdx,r14
  819. add r10,rcx
  820. DB 102,73,15,110,215
  821. adc r11,rbp
  822. adc r12,0
  823. xor r13,r13
  824. mulx rbp,rcx,r15
  825. adcx r11,rcx
  826. adox r12,rbp
  827. mulx rbp,rcx,r8
  828. mov rdx,r15
  829. adcx r12,rcx
  830. adox r13,rbp
  831. adc r13,0
  832. mulx r14,rcx,r8
  833. mov rdx,rax
  834. DB 102,73,15,110,216
  835. xor r15,r15
  836. adcx r9,r9
  837. adox r13,rcx
  838. adcx r10,r10
  839. adox r14,r15
  840. mulx rbp,r8,rdx
  841. DB 102,72,15,126,202
  842. adcx r11,r11
  843. adox r9,rbp
  844. adcx r12,r12
  845. mulx rax,rcx,rdx
  846. DB 102,72,15,126,210
  847. adcx r13,r13
  848. adox r10,rcx
  849. adcx r14,r14
  850. mulx rbp,rcx,rdx
  851. DB 0x67
  852. DB 102,72,15,126,218
  853. adox r11,rax
  854. adcx r15,r15
  855. adox r12,rcx
  856. adox r13,rbp
  857. mulx rax,rcx,rdx
  858. adox r14,rcx
  859. adox r15,rax
  860. mov rdx,r8
  861. mulx rcx,rdx,QWORD[32+rsi]
  862. xor rax,rax
  863. mulx rbp,rcx,QWORD[rsi]
  864. adcx r8,rcx
  865. adox r9,rbp
  866. mulx rbp,rcx,QWORD[8+rsi]
  867. adcx r9,rcx
  868. adox r10,rbp
  869. mulx rbp,rcx,QWORD[16+rsi]
  870. adcx r10,rcx
  871. adox r11,rbp
  872. mulx rbp,rcx,QWORD[24+rsi]
  873. adcx r11,rcx
  874. adox r8,rbp
  875. adcx r8,rax
  876. mov rdx,r9
  877. mulx rcx,rdx,QWORD[32+rsi]
  878. mulx rbp,rcx,QWORD[rsi]
  879. adox r9,rcx
  880. adcx r10,rbp
  881. mulx rbp,rcx,QWORD[8+rsi]
  882. adox r10,rcx
  883. adcx r11,rbp
  884. mulx rbp,rcx,QWORD[16+rsi]
  885. adox r11,rcx
  886. adcx r8,rbp
  887. mulx rbp,rcx,QWORD[24+rsi]
  888. adox r8,rcx
  889. adcx r9,rbp
  890. adox r9,rax
  891. mov rdx,r10
  892. mulx rcx,rdx,QWORD[32+rsi]
  893. mulx rbp,rcx,QWORD[rsi]
  894. adcx r10,rcx
  895. adox r11,rbp
  896. mulx rbp,rcx,QWORD[8+rsi]
  897. adcx r11,rcx
  898. adox r8,rbp
  899. mulx rbp,rcx,QWORD[16+rsi]
  900. adcx r8,rcx
  901. adox r9,rbp
  902. mulx rbp,rcx,QWORD[24+rsi]
  903. adcx r9,rcx
  904. adox r10,rbp
  905. adcx r10,rax
  906. mov rdx,r11
  907. mulx rcx,rdx,QWORD[32+rsi]
  908. mulx rbp,rcx,QWORD[rsi]
  909. adox r11,rcx
  910. adcx r8,rbp
  911. mulx rbp,rcx,QWORD[8+rsi]
  912. adox r8,rcx
  913. adcx r9,rbp
  914. mulx rbp,rcx,QWORD[16+rsi]
  915. adox r9,rcx
  916. adcx r10,rbp
  917. mulx rbp,rcx,QWORD[24+rsi]
  918. adox r10,rcx
  919. adcx r11,rbp
  920. adox r11,rax
  921. add r12,r8
  922. adc r9,r13
  923. mov rdx,r12
  924. adc r10,r14
  925. adc r11,r15
  926. mov r14,r9
  927. adc rax,0
  928. sub r12,QWORD[rsi]
  929. mov r15,r10
  930. sbb r9,QWORD[8+rsi]
  931. sbb r10,QWORD[16+rsi]
  932. mov r8,r11
  933. sbb r11,QWORD[24+rsi]
  934. sbb rax,0
  935. cmovnc rdx,r12
  936. cmovnc r14,r9
  937. cmovnc r15,r10
  938. cmovnc r8,r11
  939. dec rbx
  940. jnz NEAR $L$oop_ord_sqrx
  941. mov QWORD[rdi],rdx
  942. mov QWORD[8+rdi],r14
  943. pxor xmm1,xmm1
  944. mov QWORD[16+rdi],r15
  945. pxor xmm2,xmm2
  946. mov QWORD[24+rdi],r8
  947. pxor xmm3,xmm3
  948. mov r15,QWORD[rsp]
  949. mov r14,QWORD[8+rsp]
  950. mov r13,QWORD[16+rsp]
  951. mov r12,QWORD[24+rsp]
  952. mov rbx,QWORD[32+rsp]
  953. mov rbp,QWORD[40+rsp]
  954. lea rsp,[48+rsp]
  955. $L$ord_sqrx_epilogue:
  956. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  957. mov rsi,QWORD[16+rsp]
  958. DB 0F3h,0C3h ;repret
  959. $L$SEH_end_ecp_nistz256_ord_sqr_montx:
  960. global GFp_nistz256_mul_mont
  961. ALIGN 32
  962. GFp_nistz256_mul_mont:
  963. mov QWORD[8+rsp],rdi ;WIN64 prologue
  964. mov QWORD[16+rsp],rsi
  965. mov rax,rsp
  966. $L$SEH_begin_GFp_nistz256_mul_mont:
  967. mov rdi,rcx
  968. mov rsi,rdx
  969. mov rdx,r8
  970. lea rcx,[GFp_ia32cap_P]
  971. mov rcx,QWORD[8+rcx]
  972. and ecx,0x80100
  973. $L$mul_mont:
  974. push rbp
  975. push rbx
  976. push r12
  977. push r13
  978. push r14
  979. push r15
  980. $L$mul_body:
  981. cmp ecx,0x80100
  982. je NEAR $L$mul_montx
  983. mov rbx,rdx
  984. mov rax,QWORD[rdx]
  985. mov r9,QWORD[rsi]
  986. mov r10,QWORD[8+rsi]
  987. mov r11,QWORD[16+rsi]
  988. mov r12,QWORD[24+rsi]
  989. call __ecp_nistz256_mul_montq
  990. jmp NEAR $L$mul_mont_done
  991. ALIGN 32
  992. $L$mul_montx:
  993. mov rbx,rdx
  994. mov rdx,QWORD[rdx]
  995. mov r9,QWORD[rsi]
  996. mov r10,QWORD[8+rsi]
  997. mov r11,QWORD[16+rsi]
  998. mov r12,QWORD[24+rsi]
  999. lea rsi,[((-128))+rsi]
  1000. call __ecp_nistz256_mul_montx
  1001. $L$mul_mont_done:
  1002. mov r15,QWORD[rsp]
  1003. mov r14,QWORD[8+rsp]
  1004. mov r13,QWORD[16+rsp]
  1005. mov r12,QWORD[24+rsp]
  1006. mov rbx,QWORD[32+rsp]
  1007. mov rbp,QWORD[40+rsp]
  1008. lea rsp,[48+rsp]
  1009. $L$mul_epilogue:
  1010. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1011. mov rsi,QWORD[16+rsp]
  1012. DB 0F3h,0C3h ;repret
  1013. $L$SEH_end_GFp_nistz256_mul_mont:
  1014. ALIGN 32
  1015. __ecp_nistz256_mul_montq:
  1016. mov rbp,rax
  1017. mul r9
  1018. mov r14,QWORD[(($L$poly+8))]
  1019. mov r8,rax
  1020. mov rax,rbp
  1021. mov r9,rdx
  1022. mul r10
  1023. mov r15,QWORD[(($L$poly+24))]
  1024. add r9,rax
  1025. mov rax,rbp
  1026. adc rdx,0
  1027. mov r10,rdx
  1028. mul r11
  1029. add r10,rax
  1030. mov rax,rbp
  1031. adc rdx,0
  1032. mov r11,rdx
  1033. mul r12
  1034. add r11,rax
  1035. mov rax,r8
  1036. adc rdx,0
  1037. xor r13,r13
  1038. mov r12,rdx
  1039. mov rbp,r8
  1040. shl r8,32
  1041. mul r15
  1042. shr rbp,32
  1043. add r9,r8
  1044. adc r10,rbp
  1045. adc r11,rax
  1046. mov rax,QWORD[8+rbx]
  1047. adc r12,rdx
  1048. adc r13,0
  1049. xor r8,r8
  1050. mov rbp,rax
  1051. mul QWORD[rsi]
  1052. add r9,rax
  1053. mov rax,rbp
  1054. adc rdx,0
  1055. mov rcx,rdx
  1056. mul QWORD[8+rsi]
  1057. add r10,rcx
  1058. adc rdx,0
  1059. add r10,rax
  1060. mov rax,rbp
  1061. adc rdx,0
  1062. mov rcx,rdx
  1063. mul QWORD[16+rsi]
  1064. add r11,rcx
  1065. adc rdx,0
  1066. add r11,rax
  1067. mov rax,rbp
  1068. adc rdx,0
  1069. mov rcx,rdx
  1070. mul QWORD[24+rsi]
  1071. add r12,rcx
  1072. adc rdx,0
  1073. add r12,rax
  1074. mov rax,r9
  1075. adc r13,rdx
  1076. adc r8,0
  1077. mov rbp,r9
  1078. shl r9,32
  1079. mul r15
  1080. shr rbp,32
  1081. add r10,r9
  1082. adc r11,rbp
  1083. adc r12,rax
  1084. mov rax,QWORD[16+rbx]
  1085. adc r13,rdx
  1086. adc r8,0
  1087. xor r9,r9
  1088. mov rbp,rax
  1089. mul QWORD[rsi]
  1090. add r10,rax
  1091. mov rax,rbp
  1092. adc rdx,0
  1093. mov rcx,rdx
  1094. mul QWORD[8+rsi]
  1095. add r11,rcx
  1096. adc rdx,0
  1097. add r11,rax
  1098. mov rax,rbp
  1099. adc rdx,0
  1100. mov rcx,rdx
  1101. mul QWORD[16+rsi]
  1102. add r12,rcx
  1103. adc rdx,0
  1104. add r12,rax
  1105. mov rax,rbp
  1106. adc rdx,0
  1107. mov rcx,rdx
  1108. mul QWORD[24+rsi]
  1109. add r13,rcx
  1110. adc rdx,0
  1111. add r13,rax
  1112. mov rax,r10
  1113. adc r8,rdx
  1114. adc r9,0
  1115. mov rbp,r10
  1116. shl r10,32
  1117. mul r15
  1118. shr rbp,32
  1119. add r11,r10
  1120. adc r12,rbp
  1121. adc r13,rax
  1122. mov rax,QWORD[24+rbx]
  1123. adc r8,rdx
  1124. adc r9,0
  1125. xor r10,r10
  1126. mov rbp,rax
  1127. mul QWORD[rsi]
  1128. add r11,rax
  1129. mov rax,rbp
  1130. adc rdx,0
  1131. mov rcx,rdx
  1132. mul QWORD[8+rsi]
  1133. add r12,rcx
  1134. adc rdx,0
  1135. add r12,rax
  1136. mov rax,rbp
  1137. adc rdx,0
  1138. mov rcx,rdx
  1139. mul QWORD[16+rsi]
  1140. add r13,rcx
  1141. adc rdx,0
  1142. add r13,rax
  1143. mov rax,rbp
  1144. adc rdx,0
  1145. mov rcx,rdx
  1146. mul QWORD[24+rsi]
  1147. add r8,rcx
  1148. adc rdx,0
  1149. add r8,rax
  1150. mov rax,r11
  1151. adc r9,rdx
  1152. adc r10,0
  1153. mov rbp,r11
  1154. shl r11,32
  1155. mul r15
  1156. shr rbp,32
  1157. add r12,r11
  1158. adc r13,rbp
  1159. mov rcx,r12
  1160. adc r8,rax
  1161. adc r9,rdx
  1162. mov rbp,r13
  1163. adc r10,0
  1164. sub r12,-1
  1165. mov rbx,r8
  1166. sbb r13,r14
  1167. sbb r8,0
  1168. mov rdx,r9
  1169. sbb r9,r15
  1170. sbb r10,0
  1171. cmovc r12,rcx
  1172. cmovc r13,rbp
  1173. mov QWORD[rdi],r12
  1174. cmovc r8,rbx
  1175. mov QWORD[8+rdi],r13
  1176. cmovc r9,rdx
  1177. mov QWORD[16+rdi],r8
  1178. mov QWORD[24+rdi],r9
  1179. DB 0F3h,0C3h ;repret
  1180. global GFp_nistz256_sqr_mont
  1181. ALIGN 32
  1182. GFp_nistz256_sqr_mont:
  1183. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1184. mov QWORD[16+rsp],rsi
  1185. mov rax,rsp
  1186. $L$SEH_begin_GFp_nistz256_sqr_mont:
  1187. mov rdi,rcx
  1188. mov rsi,rdx
  1189. lea rcx,[GFp_ia32cap_P]
  1190. mov rcx,QWORD[8+rcx]
  1191. and ecx,0x80100
  1192. push rbp
  1193. push rbx
  1194. push r12
  1195. push r13
  1196. push r14
  1197. push r15
  1198. $L$sqr_body:
  1199. cmp ecx,0x80100
  1200. je NEAR $L$sqr_montx
  1201. mov rax,QWORD[rsi]
  1202. mov r14,QWORD[8+rsi]
  1203. mov r15,QWORD[16+rsi]
  1204. mov r8,QWORD[24+rsi]
  1205. call __ecp_nistz256_sqr_montq
  1206. jmp NEAR $L$sqr_mont_done
  1207. ALIGN 32
  1208. $L$sqr_montx:
  1209. mov rdx,QWORD[rsi]
  1210. mov r14,QWORD[8+rsi]
  1211. mov r15,QWORD[16+rsi]
  1212. mov r8,QWORD[24+rsi]
  1213. lea rsi,[((-128))+rsi]
  1214. call __ecp_nistz256_sqr_montx
  1215. $L$sqr_mont_done:
  1216. mov r15,QWORD[rsp]
  1217. mov r14,QWORD[8+rsp]
  1218. mov r13,QWORD[16+rsp]
  1219. mov r12,QWORD[24+rsp]
  1220. mov rbx,QWORD[32+rsp]
  1221. mov rbp,QWORD[40+rsp]
  1222. lea rsp,[48+rsp]
  1223. $L$sqr_epilogue:
  1224. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  1225. mov rsi,QWORD[16+rsp]
  1226. DB 0F3h,0C3h ;repret
  1227. $L$SEH_end_GFp_nistz256_sqr_mont:
  1228. ALIGN 32
  1229. __ecp_nistz256_sqr_montq:
  1230. mov r13,rax
  1231. mul r14
  1232. mov r9,rax
  1233. mov rax,r15
  1234. mov r10,rdx
  1235. mul r13
  1236. add r10,rax
  1237. mov rax,r8
  1238. adc rdx,0
  1239. mov r11,rdx
  1240. mul r13
  1241. add r11,rax
  1242. mov rax,r15
  1243. adc rdx,0
  1244. mov r12,rdx
  1245. mul r14
  1246. add r11,rax
  1247. mov rax,r8
  1248. adc rdx,0
  1249. mov rbp,rdx
  1250. mul r14
  1251. add r12,rax
  1252. mov rax,r8
  1253. adc rdx,0
  1254. add r12,rbp
  1255. mov r13,rdx
  1256. adc r13,0
  1257. mul r15
  1258. xor r15,r15
  1259. add r13,rax
  1260. mov rax,QWORD[rsi]
  1261. mov r14,rdx
  1262. adc r14,0
  1263. add r9,r9
  1264. adc r10,r10
  1265. adc r11,r11
  1266. adc r12,r12
  1267. adc r13,r13
  1268. adc r14,r14
  1269. adc r15,0
  1270. mul rax
  1271. mov r8,rax
  1272. mov rax,QWORD[8+rsi]
  1273. mov rcx,rdx
  1274. mul rax
  1275. add r9,rcx
  1276. adc r10,rax
  1277. mov rax,QWORD[16+rsi]
  1278. adc rdx,0
  1279. mov rcx,rdx
  1280. mul rax
  1281. add r11,rcx
  1282. adc r12,rax
  1283. mov rax,QWORD[24+rsi]
  1284. adc rdx,0
  1285. mov rcx,rdx
  1286. mul rax
  1287. add r13,rcx
  1288. adc r14,rax
  1289. mov rax,r8
  1290. adc r15,rdx
  1291. mov rsi,QWORD[(($L$poly+8))]
  1292. mov rbp,QWORD[(($L$poly+24))]
  1293. mov rcx,r8
  1294. shl r8,32
  1295. mul rbp
  1296. shr rcx,32
  1297. add r9,r8
  1298. adc r10,rcx
  1299. adc r11,rax
  1300. mov rax,r9
  1301. adc rdx,0
  1302. mov rcx,r9
  1303. shl r9,32
  1304. mov r8,rdx
  1305. mul rbp
  1306. shr rcx,32
  1307. add r10,r9
  1308. adc r11,rcx
  1309. adc r8,rax
  1310. mov rax,r10
  1311. adc rdx,0
  1312. mov rcx,r10
  1313. shl r10,32
  1314. mov r9,rdx
  1315. mul rbp
  1316. shr rcx,32
  1317. add r11,r10
  1318. adc r8,rcx
  1319. adc r9,rax
  1320. mov rax,r11
  1321. adc rdx,0
  1322. mov rcx,r11
  1323. shl r11,32
  1324. mov r10,rdx
  1325. mul rbp
  1326. shr rcx,32
  1327. add r8,r11
  1328. adc r9,rcx
  1329. adc r10,rax
  1330. adc rdx,0
  1331. xor r11,r11
  1332. add r12,r8
  1333. adc r13,r9
  1334. mov r8,r12
  1335. adc r14,r10
  1336. adc r15,rdx
  1337. mov r9,r13
  1338. adc r11,0
  1339. sub r12,-1
  1340. mov r10,r14
  1341. sbb r13,rsi
  1342. sbb r14,0
  1343. mov rcx,r15
  1344. sbb r15,rbp
  1345. sbb r11,0
  1346. cmovc r12,r8
  1347. cmovc r13,r9
  1348. mov QWORD[rdi],r12
  1349. cmovc r14,r10
  1350. mov QWORD[8+rdi],r13
  1351. cmovc r15,rcx
  1352. mov QWORD[16+rdi],r14
  1353. mov QWORD[24+rdi],r15
  1354. DB 0F3h,0C3h ;repret
  1355. ALIGN 32
  1356. __ecp_nistz256_mul_montx:
  1357. mulx r9,r8,r9
  1358. mulx r10,rcx,r10
  1359. mov r14,32
  1360. xor r13,r13
  1361. mulx r11,rbp,r11
  1362. mov r15,QWORD[(($L$poly+24))]
  1363. adc r9,rcx
  1364. mulx r12,rcx,r12
  1365. mov rdx,r8
  1366. adc r10,rbp
  1367. shlx rbp,r8,r14
  1368. adc r11,rcx
  1369. shrx rcx,r8,r14
  1370. adc r12,0
  1371. add r9,rbp
  1372. adc r10,rcx
  1373. mulx rbp,rcx,r15
  1374. mov rdx,QWORD[8+rbx]
  1375. adc r11,rcx
  1376. adc r12,rbp
  1377. adc r13,0
  1378. xor r8,r8
  1379. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1380. adcx r9,rcx
  1381. adox r10,rbp
  1382. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1383. adcx r10,rcx
  1384. adox r11,rbp
  1385. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1386. adcx r11,rcx
  1387. adox r12,rbp
  1388. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1389. mov rdx,r9
  1390. adcx r12,rcx
  1391. shlx rcx,r9,r14
  1392. adox r13,rbp
  1393. shrx rbp,r9,r14
  1394. adcx r13,r8
  1395. adox r8,r8
  1396. adc r8,0
  1397. add r10,rcx
  1398. adc r11,rbp
  1399. mulx rbp,rcx,r15
  1400. mov rdx,QWORD[16+rbx]
  1401. adc r12,rcx
  1402. adc r13,rbp
  1403. adc r8,0
  1404. xor r9,r9
  1405. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1406. adcx r10,rcx
  1407. adox r11,rbp
  1408. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1409. adcx r11,rcx
  1410. adox r12,rbp
  1411. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1412. adcx r12,rcx
  1413. adox r13,rbp
  1414. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1415. mov rdx,r10
  1416. adcx r13,rcx
  1417. shlx rcx,r10,r14
  1418. adox r8,rbp
  1419. shrx rbp,r10,r14
  1420. adcx r8,r9
  1421. adox r9,r9
  1422. adc r9,0
  1423. add r11,rcx
  1424. adc r12,rbp
  1425. mulx rbp,rcx,r15
  1426. mov rdx,QWORD[24+rbx]
  1427. adc r13,rcx
  1428. adc r8,rbp
  1429. adc r9,0
  1430. xor r10,r10
  1431. mulx rbp,rcx,QWORD[((0+128))+rsi]
  1432. adcx r11,rcx
  1433. adox r12,rbp
  1434. mulx rbp,rcx,QWORD[((8+128))+rsi]
  1435. adcx r12,rcx
  1436. adox r13,rbp
  1437. mulx rbp,rcx,QWORD[((16+128))+rsi]
  1438. adcx r13,rcx
  1439. adox r8,rbp
  1440. mulx rbp,rcx,QWORD[((24+128))+rsi]
  1441. mov rdx,r11
  1442. adcx r8,rcx
  1443. shlx rcx,r11,r14
  1444. adox r9,rbp
  1445. shrx rbp,r11,r14
  1446. adcx r9,r10
  1447. adox r10,r10
  1448. adc r10,0
  1449. add r12,rcx
  1450. adc r13,rbp
  1451. mulx rbp,rcx,r15
  1452. mov rbx,r12
  1453. mov r14,QWORD[(($L$poly+8))]
  1454. adc r8,rcx
  1455. mov rdx,r13
  1456. adc r9,rbp
  1457. adc r10,0
  1458. xor eax,eax
  1459. mov rcx,r8
  1460. sbb r12,-1
  1461. sbb r13,r14
  1462. sbb r8,0
  1463. mov rbp,r9
  1464. sbb r9,r15
  1465. sbb r10,0
  1466. cmovc r12,rbx
  1467. cmovc r13,rdx
  1468. mov QWORD[rdi],r12
  1469. cmovc r8,rcx
  1470. mov QWORD[8+rdi],r13
  1471. cmovc r9,rbp
  1472. mov QWORD[16+rdi],r8
  1473. mov QWORD[24+rdi],r9
  1474. DB 0F3h,0C3h ;repret
  1475. ALIGN 32
  1476. __ecp_nistz256_sqr_montx:
  1477. mulx r10,r9,r14
  1478. mulx r11,rcx,r15
  1479. xor eax,eax
  1480. adc r10,rcx
  1481. mulx r12,rbp,r8
  1482. mov rdx,r14
  1483. adc r11,rbp
  1484. adc r12,0
  1485. xor r13,r13
  1486. mulx rbp,rcx,r15
  1487. adcx r11,rcx
  1488. adox r12,rbp
  1489. mulx rbp,rcx,r8
  1490. mov rdx,r15
  1491. adcx r12,rcx
  1492. adox r13,rbp
  1493. adc r13,0
  1494. mulx r14,rcx,r8
  1495. mov rdx,QWORD[((0+128))+rsi]
  1496. xor r15,r15
  1497. adcx r9,r9
  1498. adox r13,rcx
  1499. adcx r10,r10
  1500. adox r14,r15
  1501. mulx rbp,r8,rdx
  1502. mov rdx,QWORD[((8+128))+rsi]
  1503. adcx r11,r11
  1504. adox r9,rbp
  1505. adcx r12,r12
  1506. mulx rax,rcx,rdx
  1507. mov rdx,QWORD[((16+128))+rsi]
  1508. adcx r13,r13
  1509. adox r10,rcx
  1510. adcx r14,r14
  1511. DB 0x67
  1512. mulx rbp,rcx,rdx
  1513. mov rdx,QWORD[((24+128))+rsi]
  1514. adox r11,rax
  1515. adcx r15,r15
  1516. adox r12,rcx
  1517. mov rsi,32
  1518. adox r13,rbp
  1519. DB 0x67,0x67
  1520. mulx rax,rcx,rdx
  1521. mov rdx,QWORD[(($L$poly+24))]
  1522. adox r14,rcx
  1523. shlx rcx,r8,rsi
  1524. adox r15,rax
  1525. shrx rax,r8,rsi
  1526. mov rbp,rdx
  1527. add r9,rcx
  1528. adc r10,rax
  1529. mulx r8,rcx,r8
  1530. adc r11,rcx
  1531. shlx rcx,r9,rsi
  1532. adc r8,0
  1533. shrx rax,r9,rsi
  1534. add r10,rcx
  1535. adc r11,rax
  1536. mulx r9,rcx,r9
  1537. adc r8,rcx
  1538. shlx rcx,r10,rsi
  1539. adc r9,0
  1540. shrx rax,r10,rsi
  1541. add r11,rcx
  1542. adc r8,rax
  1543. mulx r10,rcx,r10
  1544. adc r9,rcx
  1545. shlx rcx,r11,rsi
  1546. adc r10,0
  1547. shrx rax,r11,rsi
  1548. add r8,rcx
  1549. adc r9,rax
  1550. mulx r11,rcx,r11
  1551. adc r10,rcx
  1552. adc r11,0
  1553. xor rdx,rdx
  1554. add r12,r8
  1555. mov rsi,QWORD[(($L$poly+8))]
  1556. adc r13,r9
  1557. mov r8,r12
  1558. adc r14,r10
  1559. adc r15,r11
  1560. mov r9,r13
  1561. adc rdx,0
  1562. sub r12,-1
  1563. mov r10,r14
  1564. sbb r13,rsi
  1565. sbb r14,0
  1566. mov r11,r15
  1567. sbb r15,rbp
  1568. sbb rdx,0
  1569. cmovc r12,r8
  1570. cmovc r13,r9
  1571. mov QWORD[rdi],r12
  1572. cmovc r14,r10
  1573. mov QWORD[8+rdi],r13
  1574. cmovc r15,r11
  1575. mov QWORD[16+rdi],r14
  1576. mov QWORD[24+rdi],r15
  1577. DB 0F3h,0C3h ;repret
  1578. global GFp_nistz256_select_w5
  1579. ALIGN 32
  1580. GFp_nistz256_select_w5:
  1581. lea rax,[GFp_ia32cap_P]
  1582. mov rax,QWORD[8+rax]
  1583. test eax,32
  1584. jnz NEAR $L$avx2_select_w5
  1585. lea rax,[((-136))+rsp]
  1586. $L$SEH_begin_GFp_nistz256_select_w5:
  1587. DB 0x48,0x8d,0x60,0xe0
  1588. DB 0x0f,0x29,0x70,0xe0
  1589. DB 0x0f,0x29,0x78,0xf0
  1590. DB 0x44,0x0f,0x29,0x00
  1591. DB 0x44,0x0f,0x29,0x48,0x10
  1592. DB 0x44,0x0f,0x29,0x50,0x20
  1593. DB 0x44,0x0f,0x29,0x58,0x30
  1594. DB 0x44,0x0f,0x29,0x60,0x40
  1595. DB 0x44,0x0f,0x29,0x68,0x50
  1596. DB 0x44,0x0f,0x29,0x70,0x60
  1597. DB 0x44,0x0f,0x29,0x78,0x70
  1598. movdqa xmm0,XMMWORD[$L$One]
  1599. movd xmm1,r8d
  1600. pxor xmm2,xmm2
  1601. pxor xmm3,xmm3
  1602. pxor xmm4,xmm4
  1603. pxor xmm5,xmm5
  1604. pxor xmm6,xmm6
  1605. pxor xmm7,xmm7
  1606. movdqa xmm8,xmm0
  1607. pshufd xmm1,xmm1,0
  1608. mov rax,16
  1609. $L$select_loop_sse_w5:
  1610. movdqa xmm15,xmm8
  1611. paddd xmm8,xmm0
  1612. pcmpeqd xmm15,xmm1
  1613. movdqa xmm9,XMMWORD[rdx]
  1614. movdqa xmm10,XMMWORD[16+rdx]
  1615. movdqa xmm11,XMMWORD[32+rdx]
  1616. movdqa xmm12,XMMWORD[48+rdx]
  1617. movdqa xmm13,XMMWORD[64+rdx]
  1618. movdqa xmm14,XMMWORD[80+rdx]
  1619. lea rdx,[96+rdx]
  1620. pand xmm9,xmm15
  1621. pand xmm10,xmm15
  1622. por xmm2,xmm9
  1623. pand xmm11,xmm15
  1624. por xmm3,xmm10
  1625. pand xmm12,xmm15
  1626. por xmm4,xmm11
  1627. pand xmm13,xmm15
  1628. por xmm5,xmm12
  1629. pand xmm14,xmm15
  1630. por xmm6,xmm13
  1631. por xmm7,xmm14
  1632. dec rax
  1633. jnz NEAR $L$select_loop_sse_w5
  1634. movdqu XMMWORD[rcx],xmm2
  1635. movdqu XMMWORD[16+rcx],xmm3
  1636. movdqu XMMWORD[32+rcx],xmm4
  1637. movdqu XMMWORD[48+rcx],xmm5
  1638. movdqu XMMWORD[64+rcx],xmm6
  1639. movdqu XMMWORD[80+rcx],xmm7
  1640. movaps xmm6,XMMWORD[rsp]
  1641. movaps xmm7,XMMWORD[16+rsp]
  1642. movaps xmm8,XMMWORD[32+rsp]
  1643. movaps xmm9,XMMWORD[48+rsp]
  1644. movaps xmm10,XMMWORD[64+rsp]
  1645. movaps xmm11,XMMWORD[80+rsp]
  1646. movaps xmm12,XMMWORD[96+rsp]
  1647. movaps xmm13,XMMWORD[112+rsp]
  1648. movaps xmm14,XMMWORD[128+rsp]
  1649. movaps xmm15,XMMWORD[144+rsp]
  1650. lea rsp,[168+rsp]
  1651. DB 0F3h,0C3h ;repret
  1652. $L$SEH_end_GFp_nistz256_select_w5:
  1653. global GFp_nistz256_select_w7
  1654. ALIGN 32
  1655. GFp_nistz256_select_w7:
  1656. lea rax,[GFp_ia32cap_P]
  1657. mov rax,QWORD[8+rax]
  1658. test eax,32
  1659. jnz NEAR $L$avx2_select_w7
  1660. lea rax,[((-136))+rsp]
  1661. $L$SEH_begin_GFp_nistz256_select_w7:
  1662. DB 0x48,0x8d,0x60,0xe0
  1663. DB 0x0f,0x29,0x70,0xe0
  1664. DB 0x0f,0x29,0x78,0xf0
  1665. DB 0x44,0x0f,0x29,0x00
  1666. DB 0x44,0x0f,0x29,0x48,0x10
  1667. DB 0x44,0x0f,0x29,0x50,0x20
  1668. DB 0x44,0x0f,0x29,0x58,0x30
  1669. DB 0x44,0x0f,0x29,0x60,0x40
  1670. DB 0x44,0x0f,0x29,0x68,0x50
  1671. DB 0x44,0x0f,0x29,0x70,0x60
  1672. DB 0x44,0x0f,0x29,0x78,0x70
  1673. movdqa xmm8,XMMWORD[$L$One]
  1674. movd xmm1,r8d
  1675. pxor xmm2,xmm2
  1676. pxor xmm3,xmm3
  1677. pxor xmm4,xmm4
  1678. pxor xmm5,xmm5
  1679. movdqa xmm0,xmm8
  1680. pshufd xmm1,xmm1,0
  1681. mov rax,64
  1682. $L$select_loop_sse_w7:
  1683. movdqa xmm15,xmm8
  1684. paddd xmm8,xmm0
  1685. movdqa xmm9,XMMWORD[rdx]
  1686. movdqa xmm10,XMMWORD[16+rdx]
  1687. pcmpeqd xmm15,xmm1
  1688. movdqa xmm11,XMMWORD[32+rdx]
  1689. movdqa xmm12,XMMWORD[48+rdx]
  1690. lea rdx,[64+rdx]
  1691. pand xmm9,xmm15
  1692. pand xmm10,xmm15
  1693. por xmm2,xmm9
  1694. pand xmm11,xmm15
  1695. por xmm3,xmm10
  1696. pand xmm12,xmm15
  1697. por xmm4,xmm11
  1698. prefetcht0 [255+rdx]
  1699. por xmm5,xmm12
  1700. dec rax
  1701. jnz NEAR $L$select_loop_sse_w7
  1702. movdqu XMMWORD[rcx],xmm2
  1703. movdqu XMMWORD[16+rcx],xmm3
  1704. movdqu XMMWORD[32+rcx],xmm4
  1705. movdqu XMMWORD[48+rcx],xmm5
  1706. movaps xmm6,XMMWORD[rsp]
  1707. movaps xmm7,XMMWORD[16+rsp]
  1708. movaps xmm8,XMMWORD[32+rsp]
  1709. movaps xmm9,XMMWORD[48+rsp]
  1710. movaps xmm10,XMMWORD[64+rsp]
  1711. movaps xmm11,XMMWORD[80+rsp]
  1712. movaps xmm12,XMMWORD[96+rsp]
  1713. movaps xmm13,XMMWORD[112+rsp]
  1714. movaps xmm14,XMMWORD[128+rsp]
  1715. movaps xmm15,XMMWORD[144+rsp]
  1716. lea rsp,[168+rsp]
  1717. DB 0F3h,0C3h ;repret
  1718. $L$SEH_end_GFp_nistz256_select_w7:
  1719. ALIGN 32
  1720. GFp_nistz256_avx2_select_w5:
  1721. $L$avx2_select_w5:
  1722. vzeroupper
  1723. lea rax,[((-136))+rsp]
  1724. mov r11,rsp
  1725. $L$SEH_begin_GFp_nistz256_avx2_select_w5:
  1726. DB 0x48,0x8d,0x60,0xe0
  1727. DB 0xc5,0xf8,0x29,0x70,0xe0
  1728. DB 0xc5,0xf8,0x29,0x78,0xf0
  1729. DB 0xc5,0x78,0x29,0x40,0x00
  1730. DB 0xc5,0x78,0x29,0x48,0x10
  1731. DB 0xc5,0x78,0x29,0x50,0x20
  1732. DB 0xc5,0x78,0x29,0x58,0x30
  1733. DB 0xc5,0x78,0x29,0x60,0x40
  1734. DB 0xc5,0x78,0x29,0x68,0x50
  1735. DB 0xc5,0x78,0x29,0x70,0x60
  1736. DB 0xc5,0x78,0x29,0x78,0x70
  1737. vmovdqa ymm0,YMMWORD[$L$Two]
  1738. vpxor ymm2,ymm2,ymm2
  1739. vpxor ymm3,ymm3,ymm3
  1740. vpxor ymm4,ymm4,ymm4
  1741. vmovdqa ymm5,YMMWORD[$L$One]
  1742. vmovdqa ymm10,YMMWORD[$L$Two]
  1743. vmovd xmm1,r8d
  1744. vpermd ymm1,ymm2,ymm1
  1745. mov rax,8
  1746. $L$select_loop_avx2_w5:
  1747. vmovdqa ymm6,YMMWORD[rdx]
  1748. vmovdqa ymm7,YMMWORD[32+rdx]
  1749. vmovdqa ymm8,YMMWORD[64+rdx]
  1750. vmovdqa ymm11,YMMWORD[96+rdx]
  1751. vmovdqa ymm12,YMMWORD[128+rdx]
  1752. vmovdqa ymm13,YMMWORD[160+rdx]
  1753. vpcmpeqd ymm9,ymm5,ymm1
  1754. vpcmpeqd ymm14,ymm10,ymm1
  1755. vpaddd ymm5,ymm5,ymm0
  1756. vpaddd ymm10,ymm10,ymm0
  1757. lea rdx,[192+rdx]
  1758. vpand ymm6,ymm6,ymm9
  1759. vpand ymm7,ymm7,ymm9
  1760. vpand ymm8,ymm8,ymm9
  1761. vpand ymm11,ymm11,ymm14
  1762. vpand ymm12,ymm12,ymm14
  1763. vpand ymm13,ymm13,ymm14
  1764. vpxor ymm2,ymm2,ymm6
  1765. vpxor ymm3,ymm3,ymm7
  1766. vpxor ymm4,ymm4,ymm8
  1767. vpxor ymm2,ymm2,ymm11
  1768. vpxor ymm3,ymm3,ymm12
  1769. vpxor ymm4,ymm4,ymm13
  1770. dec rax
  1771. jnz NEAR $L$select_loop_avx2_w5
  1772. vmovdqu YMMWORD[rcx],ymm2
  1773. vmovdqu YMMWORD[32+rcx],ymm3
  1774. vmovdqu YMMWORD[64+rcx],ymm4
  1775. vzeroupper
  1776. movaps xmm6,XMMWORD[rsp]
  1777. movaps xmm7,XMMWORD[16+rsp]
  1778. movaps xmm8,XMMWORD[32+rsp]
  1779. movaps xmm9,XMMWORD[48+rsp]
  1780. movaps xmm10,XMMWORD[64+rsp]
  1781. movaps xmm11,XMMWORD[80+rsp]
  1782. movaps xmm12,XMMWORD[96+rsp]
  1783. movaps xmm13,XMMWORD[112+rsp]
  1784. movaps xmm14,XMMWORD[128+rsp]
  1785. movaps xmm15,XMMWORD[144+rsp]
  1786. lea rsp,[r11]
  1787. DB 0F3h,0C3h ;repret
  1788. $L$SEH_end_GFp_nistz256_avx2_select_w5:
  1789. global GFp_nistz256_avx2_select_w7
  1790. ALIGN 32
  1791. GFp_nistz256_avx2_select_w7:
  1792. $L$avx2_select_w7:
  1793. vzeroupper
  1794. mov r11,rsp
  1795. lea rax,[((-136))+rsp]
  1796. $L$SEH_begin_GFp_nistz256_avx2_select_w7:
  1797. DB 0x48,0x8d,0x60,0xe0
  1798. DB 0xc5,0xf8,0x29,0x70,0xe0
  1799. DB 0xc5,0xf8,0x29,0x78,0xf0
  1800. DB 0xc5,0x78,0x29,0x40,0x00
  1801. DB 0xc5,0x78,0x29,0x48,0x10
  1802. DB 0xc5,0x78,0x29,0x50,0x20
  1803. DB 0xc5,0x78,0x29,0x58,0x30
  1804. DB 0xc5,0x78,0x29,0x60,0x40
  1805. DB 0xc5,0x78,0x29,0x68,0x50
  1806. DB 0xc5,0x78,0x29,0x70,0x60
  1807. DB 0xc5,0x78,0x29,0x78,0x70
  1808. vmovdqa ymm0,YMMWORD[$L$Three]
  1809. vpxor ymm2,ymm2,ymm2
  1810. vpxor ymm3,ymm3,ymm3
  1811. vmovdqa ymm4,YMMWORD[$L$One]
  1812. vmovdqa ymm8,YMMWORD[$L$Two]
  1813. vmovdqa ymm12,YMMWORD[$L$Three]
  1814. vmovd xmm1,r8d
  1815. vpermd ymm1,ymm2,ymm1
  1816. mov rax,21
  1817. $L$select_loop_avx2_w7:
  1818. vmovdqa ymm5,YMMWORD[rdx]
  1819. vmovdqa ymm6,YMMWORD[32+rdx]
  1820. vmovdqa ymm9,YMMWORD[64+rdx]
  1821. vmovdqa ymm10,YMMWORD[96+rdx]
  1822. vmovdqa ymm13,YMMWORD[128+rdx]
  1823. vmovdqa ymm14,YMMWORD[160+rdx]
  1824. vpcmpeqd ymm7,ymm4,ymm1
  1825. vpcmpeqd ymm11,ymm8,ymm1
  1826. vpcmpeqd ymm15,ymm12,ymm1
  1827. vpaddd ymm4,ymm4,ymm0
  1828. vpaddd ymm8,ymm8,ymm0
  1829. vpaddd ymm12,ymm12,ymm0
  1830. lea rdx,[192+rdx]
  1831. vpand ymm5,ymm5,ymm7
  1832. vpand ymm6,ymm6,ymm7
  1833. vpand ymm9,ymm9,ymm11
  1834. vpand ymm10,ymm10,ymm11
  1835. vpand ymm13,ymm13,ymm15
  1836. vpand ymm14,ymm14,ymm15
  1837. vpxor ymm2,ymm2,ymm5
  1838. vpxor ymm3,ymm3,ymm6
  1839. vpxor ymm2,ymm2,ymm9
  1840. vpxor ymm3,ymm3,ymm10
  1841. vpxor ymm2,ymm2,ymm13
  1842. vpxor ymm3,ymm3,ymm14
  1843. dec rax
  1844. jnz NEAR $L$select_loop_avx2_w7
  1845. vmovdqa ymm5,YMMWORD[rdx]
  1846. vmovdqa ymm6,YMMWORD[32+rdx]
  1847. vpcmpeqd ymm7,ymm4,ymm1
  1848. vpand ymm5,ymm5,ymm7
  1849. vpand ymm6,ymm6,ymm7
  1850. vpxor ymm2,ymm2,ymm5
  1851. vpxor ymm3,ymm3,ymm6
  1852. vmovdqu YMMWORD[rcx],ymm2
  1853. vmovdqu YMMWORD[32+rcx],ymm3
  1854. vzeroupper
  1855. movaps xmm6,XMMWORD[rsp]
  1856. movaps xmm7,XMMWORD[16+rsp]
  1857. movaps xmm8,XMMWORD[32+rsp]
  1858. movaps xmm9,XMMWORD[48+rsp]
  1859. movaps xmm10,XMMWORD[64+rsp]
  1860. movaps xmm11,XMMWORD[80+rsp]
  1861. movaps xmm12,XMMWORD[96+rsp]
  1862. movaps xmm13,XMMWORD[112+rsp]
  1863. movaps xmm14,XMMWORD[128+rsp]
  1864. movaps xmm15,XMMWORD[144+rsp]
  1865. lea rsp,[r11]
  1866. DB 0F3h,0C3h ;repret
  1867. $L$SEH_end_GFp_nistz256_avx2_select_w7:
  1868. ALIGN 32
  1869. __ecp_nistz256_add_toq:
  1870. xor r11,r11
  1871. add r12,QWORD[rbx]
  1872. adc r13,QWORD[8+rbx]
  1873. mov rax,r12
  1874. adc r8,QWORD[16+rbx]
  1875. adc r9,QWORD[24+rbx]
  1876. mov rbp,r13
  1877. adc r11,0
  1878. sub r12,-1
  1879. mov rcx,r8
  1880. sbb r13,r14
  1881. sbb r8,0
  1882. mov r10,r9
  1883. sbb r9,r15
  1884. sbb r11,0
  1885. cmovc r12,rax
  1886. cmovc r13,rbp
  1887. mov QWORD[rdi],r12
  1888. cmovc r8,rcx
  1889. mov QWORD[8+rdi],r13
  1890. cmovc r9,r10
  1891. mov QWORD[16+rdi],r8
  1892. mov QWORD[24+rdi],r9
  1893. DB 0F3h,0C3h ;repret
  1894. ALIGN 32
  1895. __ecp_nistz256_sub_fromq:
  1896. sub r12,QWORD[rbx]
  1897. sbb r13,QWORD[8+rbx]
  1898. mov rax,r12
  1899. sbb r8,QWORD[16+rbx]
  1900. sbb r9,QWORD[24+rbx]
  1901. mov rbp,r13
  1902. sbb r11,r11
  1903. add r12,-1
  1904. mov rcx,r8
  1905. adc r13,r14
  1906. adc r8,0
  1907. mov r10,r9
  1908. adc r9,r15
  1909. test r11,r11
  1910. cmovz r12,rax
  1911. cmovz r13,rbp
  1912. mov QWORD[rdi],r12
  1913. cmovz r8,rcx
  1914. mov QWORD[8+rdi],r13
  1915. cmovz r9,r10
  1916. mov QWORD[16+rdi],r8
  1917. mov QWORD[24+rdi],r9
  1918. DB 0F3h,0C3h ;repret
  1919. ALIGN 32
  1920. __ecp_nistz256_subq:
  1921. sub rax,r12
  1922. sbb rbp,r13
  1923. mov r12,rax
  1924. sbb rcx,r8
  1925. sbb r10,r9
  1926. mov r13,rbp
  1927. sbb r11,r11
  1928. add rax,-1
  1929. mov r8,rcx
  1930. adc rbp,r14
  1931. adc rcx,0
  1932. mov r9,r10
  1933. adc r10,r15
  1934. test r11,r11
  1935. cmovnz r12,rax
  1936. cmovnz r13,rbp
  1937. cmovnz r8,rcx
  1938. cmovnz r9,r10
  1939. DB 0F3h,0C3h ;repret
  1940. ALIGN 32
  1941. __ecp_nistz256_mul_by_2q:
  1942. xor r11,r11
  1943. add r12,r12
  1944. adc r13,r13
  1945. mov rax,r12
  1946. adc r8,r8
  1947. adc r9,r9
  1948. mov rbp,r13
  1949. adc r11,0
  1950. sub r12,-1
  1951. mov rcx,r8
  1952. sbb r13,r14
  1953. sbb r8,0
  1954. mov r10,r9
  1955. sbb r9,r15
  1956. sbb r11,0
  1957. cmovc r12,rax
  1958. cmovc r13,rbp
  1959. mov QWORD[rdi],r12
  1960. cmovc r8,rcx
  1961. mov QWORD[8+rdi],r13
  1962. cmovc r9,r10
  1963. mov QWORD[16+rdi],r8
  1964. mov QWORD[24+rdi],r9
  1965. DB 0F3h,0C3h ;repret
  1966. global GFp_nistz256_point_double
  1967. ALIGN 32
  1968. GFp_nistz256_point_double:
  1969. mov QWORD[8+rsp],rdi ;WIN64 prologue
  1970. mov QWORD[16+rsp],rsi
  1971. mov rax,rsp
  1972. $L$SEH_begin_GFp_nistz256_point_double:
  1973. mov rdi,rcx
  1974. mov rsi,rdx
  1975. lea rcx,[GFp_ia32cap_P]
  1976. mov rcx,QWORD[8+rcx]
  1977. and ecx,0x80100
  1978. cmp ecx,0x80100
  1979. je NEAR $L$point_doublex
  1980. push rbp
  1981. push rbx
  1982. push r12
  1983. push r13
  1984. push r14
  1985. push r15
  1986. sub rsp,32*5+8
  1987. $L$point_doubleq_body:
  1988. $L$point_double_shortcutq:
  1989. movdqu xmm0,XMMWORD[rsi]
  1990. mov rbx,rsi
  1991. movdqu xmm1,XMMWORD[16+rsi]
  1992. mov r12,QWORD[((32+0))+rsi]
  1993. mov r13,QWORD[((32+8))+rsi]
  1994. mov r8,QWORD[((32+16))+rsi]
  1995. mov r9,QWORD[((32+24))+rsi]
  1996. mov r14,QWORD[(($L$poly+8))]
  1997. mov r15,QWORD[(($L$poly+24))]
  1998. movdqa XMMWORD[96+rsp],xmm0
  1999. movdqa XMMWORD[(96+16)+rsp],xmm1
  2000. lea r10,[32+rdi]
  2001. lea r11,[64+rdi]
  2002. DB 102,72,15,110,199
  2003. DB 102,73,15,110,202
  2004. DB 102,73,15,110,211
  2005. lea rdi,[rsp]
  2006. call __ecp_nistz256_mul_by_2q
  2007. mov rax,QWORD[((64+0))+rsi]
  2008. mov r14,QWORD[((64+8))+rsi]
  2009. mov r15,QWORD[((64+16))+rsi]
  2010. mov r8,QWORD[((64+24))+rsi]
  2011. lea rsi,[((64-0))+rsi]
  2012. lea rdi,[64+rsp]
  2013. call __ecp_nistz256_sqr_montq
  2014. mov rax,QWORD[((0+0))+rsp]
  2015. mov r14,QWORD[((8+0))+rsp]
  2016. lea rsi,[((0+0))+rsp]
  2017. mov r15,QWORD[((16+0))+rsp]
  2018. mov r8,QWORD[((24+0))+rsp]
  2019. lea rdi,[rsp]
  2020. call __ecp_nistz256_sqr_montq
  2021. mov rax,QWORD[32+rbx]
  2022. mov r9,QWORD[((64+0))+rbx]
  2023. mov r10,QWORD[((64+8))+rbx]
  2024. mov r11,QWORD[((64+16))+rbx]
  2025. mov r12,QWORD[((64+24))+rbx]
  2026. lea rsi,[((64-0))+rbx]
  2027. lea rbx,[32+rbx]
  2028. DB 102,72,15,126,215
  2029. call __ecp_nistz256_mul_montq
  2030. call __ecp_nistz256_mul_by_2q
  2031. mov r12,QWORD[((96+0))+rsp]
  2032. mov r13,QWORD[((96+8))+rsp]
  2033. lea rbx,[64+rsp]
  2034. mov r8,QWORD[((96+16))+rsp]
  2035. mov r9,QWORD[((96+24))+rsp]
  2036. lea rdi,[32+rsp]
  2037. call __ecp_nistz256_add_toq
  2038. mov r12,QWORD[((96+0))+rsp]
  2039. mov r13,QWORD[((96+8))+rsp]
  2040. lea rbx,[64+rsp]
  2041. mov r8,QWORD[((96+16))+rsp]
  2042. mov r9,QWORD[((96+24))+rsp]
  2043. lea rdi,[64+rsp]
  2044. call __ecp_nistz256_sub_fromq
  2045. mov rax,QWORD[((0+0))+rsp]
  2046. mov r14,QWORD[((8+0))+rsp]
  2047. lea rsi,[((0+0))+rsp]
  2048. mov r15,QWORD[((16+0))+rsp]
  2049. mov r8,QWORD[((24+0))+rsp]
  2050. DB 102,72,15,126,207
  2051. call __ecp_nistz256_sqr_montq
  2052. xor r9,r9
  2053. mov rax,r12
  2054. add r12,-1
  2055. mov r10,r13
  2056. adc r13,rsi
  2057. mov rcx,r14
  2058. adc r14,0
  2059. mov r8,r15
  2060. adc r15,rbp
  2061. adc r9,0
  2062. xor rsi,rsi
  2063. test rax,1
  2064. cmovz r12,rax
  2065. cmovz r13,r10
  2066. cmovz r14,rcx
  2067. cmovz r15,r8
  2068. cmovz r9,rsi
  2069. mov rax,r13
  2070. shr r12,1
  2071. shl rax,63
  2072. mov r10,r14
  2073. shr r13,1
  2074. or r12,rax
  2075. shl r10,63
  2076. mov rcx,r15
  2077. shr r14,1
  2078. or r13,r10
  2079. shl rcx,63
  2080. mov QWORD[rdi],r12
  2081. shr r15,1
  2082. mov QWORD[8+rdi],r13
  2083. shl r9,63
  2084. or r14,rcx
  2085. or r15,r9
  2086. mov QWORD[16+rdi],r14
  2087. mov QWORD[24+rdi],r15
  2088. mov rax,QWORD[64+rsp]
  2089. lea rbx,[64+rsp]
  2090. mov r9,QWORD[((0+32))+rsp]
  2091. mov r10,QWORD[((8+32))+rsp]
  2092. lea rsi,[((0+32))+rsp]
  2093. mov r11,QWORD[((16+32))+rsp]
  2094. mov r12,QWORD[((24+32))+rsp]
  2095. lea rdi,[32+rsp]
  2096. call __ecp_nistz256_mul_montq
  2097. lea rdi,[128+rsp]
  2098. call __ecp_nistz256_mul_by_2q
  2099. lea rbx,[32+rsp]
  2100. lea rdi,[32+rsp]
  2101. call __ecp_nistz256_add_toq
  2102. mov rax,QWORD[96+rsp]
  2103. lea rbx,[96+rsp]
  2104. mov r9,QWORD[((0+0))+rsp]
  2105. mov r10,QWORD[((8+0))+rsp]
  2106. lea rsi,[((0+0))+rsp]
  2107. mov r11,QWORD[((16+0))+rsp]
  2108. mov r12,QWORD[((24+0))+rsp]
  2109. lea rdi,[rsp]
  2110. call __ecp_nistz256_mul_montq
  2111. lea rdi,[128+rsp]
  2112. call __ecp_nistz256_mul_by_2q
  2113. mov rax,QWORD[((0+32))+rsp]
  2114. mov r14,QWORD[((8+32))+rsp]
  2115. lea rsi,[((0+32))+rsp]
  2116. mov r15,QWORD[((16+32))+rsp]
  2117. mov r8,QWORD[((24+32))+rsp]
  2118. DB 102,72,15,126,199
  2119. call __ecp_nistz256_sqr_montq
  2120. lea rbx,[128+rsp]
  2121. mov r8,r14
  2122. mov r9,r15
  2123. mov r14,rsi
  2124. mov r15,rbp
  2125. call __ecp_nistz256_sub_fromq
  2126. mov rax,QWORD[((0+0))+rsp]
  2127. mov rbp,QWORD[((0+8))+rsp]
  2128. mov rcx,QWORD[((0+16))+rsp]
  2129. mov r10,QWORD[((0+24))+rsp]
  2130. lea rdi,[rsp]
  2131. call __ecp_nistz256_subq
  2132. mov rax,QWORD[32+rsp]
  2133. lea rbx,[32+rsp]
  2134. mov r14,r12
  2135. xor ecx,ecx
  2136. mov QWORD[((0+0))+rsp],r12
  2137. mov r10,r13
  2138. mov QWORD[((0+8))+rsp],r13
  2139. cmovz r11,r8
  2140. mov QWORD[((0+16))+rsp],r8
  2141. lea rsi,[((0-0))+rsp]
  2142. cmovz r12,r9
  2143. mov QWORD[((0+24))+rsp],r9
  2144. mov r9,r14
  2145. lea rdi,[rsp]
  2146. call __ecp_nistz256_mul_montq
  2147. DB 102,72,15,126,203
  2148. DB 102,72,15,126,207
  2149. call __ecp_nistz256_sub_fromq
  2150. lea rsi,[((160+56))+rsp]
  2151. mov r15,QWORD[((-48))+rsi]
  2152. mov r14,QWORD[((-40))+rsi]
  2153. mov r13,QWORD[((-32))+rsi]
  2154. mov r12,QWORD[((-24))+rsi]
  2155. mov rbx,QWORD[((-16))+rsi]
  2156. mov rbp,QWORD[((-8))+rsi]
  2157. lea rsp,[rsi]
  2158. $L$point_doubleq_epilogue:
  2159. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2160. mov rsi,QWORD[16+rsp]
  2161. DB 0F3h,0C3h ;repret
  2162. $L$SEH_end_GFp_nistz256_point_double:
  2163. global GFp_nistz256_point_add
  2164. ALIGN 32
  2165. GFp_nistz256_point_add:
  2166. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2167. mov QWORD[16+rsp],rsi
  2168. mov rax,rsp
  2169. $L$SEH_begin_GFp_nistz256_point_add:
  2170. mov rdi,rcx
  2171. mov rsi,rdx
  2172. mov rdx,r8
  2173. lea rcx,[GFp_ia32cap_P]
  2174. mov rcx,QWORD[8+rcx]
  2175. and ecx,0x80100
  2176. cmp ecx,0x80100
  2177. je NEAR $L$point_addx
  2178. push rbp
  2179. push rbx
  2180. push r12
  2181. push r13
  2182. push r14
  2183. push r15
  2184. sub rsp,32*18+8
  2185. $L$point_addq_body:
  2186. movdqu xmm0,XMMWORD[rsi]
  2187. movdqu xmm1,XMMWORD[16+rsi]
  2188. movdqu xmm2,XMMWORD[32+rsi]
  2189. movdqu xmm3,XMMWORD[48+rsi]
  2190. movdqu xmm4,XMMWORD[64+rsi]
  2191. movdqu xmm5,XMMWORD[80+rsi]
  2192. mov rbx,rsi
  2193. mov rsi,rdx
  2194. movdqa XMMWORD[384+rsp],xmm0
  2195. movdqa XMMWORD[(384+16)+rsp],xmm1
  2196. movdqa XMMWORD[416+rsp],xmm2
  2197. movdqa XMMWORD[(416+16)+rsp],xmm3
  2198. movdqa XMMWORD[448+rsp],xmm4
  2199. movdqa XMMWORD[(448+16)+rsp],xmm5
  2200. por xmm5,xmm4
  2201. movdqu xmm0,XMMWORD[rsi]
  2202. pshufd xmm3,xmm5,0xb1
  2203. movdqu xmm1,XMMWORD[16+rsi]
  2204. movdqu xmm2,XMMWORD[32+rsi]
  2205. por xmm5,xmm3
  2206. movdqu xmm3,XMMWORD[48+rsi]
  2207. mov rax,QWORD[((64+0))+rsi]
  2208. mov r14,QWORD[((64+8))+rsi]
  2209. mov r15,QWORD[((64+16))+rsi]
  2210. mov r8,QWORD[((64+24))+rsi]
  2211. movdqa XMMWORD[480+rsp],xmm0
  2212. pshufd xmm4,xmm5,0x1e
  2213. movdqa XMMWORD[(480+16)+rsp],xmm1
  2214. movdqu xmm0,XMMWORD[64+rsi]
  2215. movdqu xmm1,XMMWORD[80+rsi]
  2216. movdqa XMMWORD[512+rsp],xmm2
  2217. movdqa XMMWORD[(512+16)+rsp],xmm3
  2218. por xmm5,xmm4
  2219. pxor xmm4,xmm4
  2220. por xmm1,xmm0
  2221. DB 102,72,15,110,199
  2222. lea rsi,[((64-0))+rsi]
  2223. mov QWORD[((544+0))+rsp],rax
  2224. mov QWORD[((544+8))+rsp],r14
  2225. mov QWORD[((544+16))+rsp],r15
  2226. mov QWORD[((544+24))+rsp],r8
  2227. lea rdi,[96+rsp]
  2228. call __ecp_nistz256_sqr_montq
  2229. pcmpeqd xmm5,xmm4
  2230. pshufd xmm4,xmm1,0xb1
  2231. por xmm4,xmm1
  2232. pshufd xmm5,xmm5,0
  2233. pshufd xmm3,xmm4,0x1e
  2234. por xmm4,xmm3
  2235. pxor xmm3,xmm3
  2236. pcmpeqd xmm4,xmm3
  2237. pshufd xmm4,xmm4,0
  2238. mov rax,QWORD[((64+0))+rbx]
  2239. mov r14,QWORD[((64+8))+rbx]
  2240. mov r15,QWORD[((64+16))+rbx]
  2241. mov r8,QWORD[((64+24))+rbx]
  2242. DB 102,72,15,110,203
  2243. lea rsi,[((64-0))+rbx]
  2244. lea rdi,[32+rsp]
  2245. call __ecp_nistz256_sqr_montq
  2246. mov rax,QWORD[544+rsp]
  2247. lea rbx,[544+rsp]
  2248. mov r9,QWORD[((0+96))+rsp]
  2249. mov r10,QWORD[((8+96))+rsp]
  2250. lea rsi,[((0+96))+rsp]
  2251. mov r11,QWORD[((16+96))+rsp]
  2252. mov r12,QWORD[((24+96))+rsp]
  2253. lea rdi,[224+rsp]
  2254. call __ecp_nistz256_mul_montq
  2255. mov rax,QWORD[448+rsp]
  2256. lea rbx,[448+rsp]
  2257. mov r9,QWORD[((0+32))+rsp]
  2258. mov r10,QWORD[((8+32))+rsp]
  2259. lea rsi,[((0+32))+rsp]
  2260. mov r11,QWORD[((16+32))+rsp]
  2261. mov r12,QWORD[((24+32))+rsp]
  2262. lea rdi,[256+rsp]
  2263. call __ecp_nistz256_mul_montq
  2264. mov rax,QWORD[416+rsp]
  2265. lea rbx,[416+rsp]
  2266. mov r9,QWORD[((0+224))+rsp]
  2267. mov r10,QWORD[((8+224))+rsp]
  2268. lea rsi,[((0+224))+rsp]
  2269. mov r11,QWORD[((16+224))+rsp]
  2270. mov r12,QWORD[((24+224))+rsp]
  2271. lea rdi,[224+rsp]
  2272. call __ecp_nistz256_mul_montq
  2273. mov rax,QWORD[512+rsp]
  2274. lea rbx,[512+rsp]
  2275. mov r9,QWORD[((0+256))+rsp]
  2276. mov r10,QWORD[((8+256))+rsp]
  2277. lea rsi,[((0+256))+rsp]
  2278. mov r11,QWORD[((16+256))+rsp]
  2279. mov r12,QWORD[((24+256))+rsp]
  2280. lea rdi,[256+rsp]
  2281. call __ecp_nistz256_mul_montq
  2282. lea rbx,[224+rsp]
  2283. lea rdi,[64+rsp]
  2284. call __ecp_nistz256_sub_fromq
  2285. or r12,r13
  2286. movdqa xmm2,xmm4
  2287. or r12,r8
  2288. or r12,r9
  2289. por xmm2,xmm5
  2290. DB 102,73,15,110,220
  2291. mov rax,QWORD[384+rsp]
  2292. lea rbx,[384+rsp]
  2293. mov r9,QWORD[((0+96))+rsp]
  2294. mov r10,QWORD[((8+96))+rsp]
  2295. lea rsi,[((0+96))+rsp]
  2296. mov r11,QWORD[((16+96))+rsp]
  2297. mov r12,QWORD[((24+96))+rsp]
  2298. lea rdi,[160+rsp]
  2299. call __ecp_nistz256_mul_montq
  2300. mov rax,QWORD[480+rsp]
  2301. lea rbx,[480+rsp]
  2302. mov r9,QWORD[((0+32))+rsp]
  2303. mov r10,QWORD[((8+32))+rsp]
  2304. lea rsi,[((0+32))+rsp]
  2305. mov r11,QWORD[((16+32))+rsp]
  2306. mov r12,QWORD[((24+32))+rsp]
  2307. lea rdi,[192+rsp]
  2308. call __ecp_nistz256_mul_montq
  2309. lea rbx,[160+rsp]
  2310. lea rdi,[rsp]
  2311. call __ecp_nistz256_sub_fromq
  2312. or r12,r13
  2313. or r12,r8
  2314. or r12,r9
  2315. DB 102,73,15,126,208
  2316. DB 102,73,15,126,217
  2317. or r12,r8
  2318. DB 0x3e
  2319. jnz NEAR $L$add_proceedq
  2320. test r9,r9
  2321. jz NEAR $L$add_doubleq
  2322. DB 102,72,15,126,199
  2323. pxor xmm0,xmm0
  2324. movdqu XMMWORD[rdi],xmm0
  2325. movdqu XMMWORD[16+rdi],xmm0
  2326. movdqu XMMWORD[32+rdi],xmm0
  2327. movdqu XMMWORD[48+rdi],xmm0
  2328. movdqu XMMWORD[64+rdi],xmm0
  2329. movdqu XMMWORD[80+rdi],xmm0
  2330. jmp NEAR $L$add_doneq
  2331. ALIGN 32
  2332. $L$add_doubleq:
  2333. DB 102,72,15,126,206
  2334. DB 102,72,15,126,199
  2335. add rsp,416
  2336. jmp NEAR $L$point_double_shortcutq
  2337. ALIGN 32
  2338. $L$add_proceedq:
  2339. mov rax,QWORD[((0+64))+rsp]
  2340. mov r14,QWORD[((8+64))+rsp]
  2341. lea rsi,[((0+64))+rsp]
  2342. mov r15,QWORD[((16+64))+rsp]
  2343. mov r8,QWORD[((24+64))+rsp]
  2344. lea rdi,[96+rsp]
  2345. call __ecp_nistz256_sqr_montq
  2346. mov rax,QWORD[448+rsp]
  2347. lea rbx,[448+rsp]
  2348. mov r9,QWORD[((0+0))+rsp]
  2349. mov r10,QWORD[((8+0))+rsp]
  2350. lea rsi,[((0+0))+rsp]
  2351. mov r11,QWORD[((16+0))+rsp]
  2352. mov r12,QWORD[((24+0))+rsp]
  2353. lea rdi,[352+rsp]
  2354. call __ecp_nistz256_mul_montq
  2355. mov rax,QWORD[((0+0))+rsp]
  2356. mov r14,QWORD[((8+0))+rsp]
  2357. lea rsi,[((0+0))+rsp]
  2358. mov r15,QWORD[((16+0))+rsp]
  2359. mov r8,QWORD[((24+0))+rsp]
  2360. lea rdi,[32+rsp]
  2361. call __ecp_nistz256_sqr_montq
  2362. mov rax,QWORD[544+rsp]
  2363. lea rbx,[544+rsp]
  2364. mov r9,QWORD[((0+352))+rsp]
  2365. mov r10,QWORD[((8+352))+rsp]
  2366. lea rsi,[((0+352))+rsp]
  2367. mov r11,QWORD[((16+352))+rsp]
  2368. mov r12,QWORD[((24+352))+rsp]
  2369. lea rdi,[352+rsp]
  2370. call __ecp_nistz256_mul_montq
  2371. mov rax,QWORD[rsp]
  2372. lea rbx,[rsp]
  2373. mov r9,QWORD[((0+32))+rsp]
  2374. mov r10,QWORD[((8+32))+rsp]
  2375. lea rsi,[((0+32))+rsp]
  2376. mov r11,QWORD[((16+32))+rsp]
  2377. mov r12,QWORD[((24+32))+rsp]
  2378. lea rdi,[128+rsp]
  2379. call __ecp_nistz256_mul_montq
  2380. mov rax,QWORD[160+rsp]
  2381. lea rbx,[160+rsp]
  2382. mov r9,QWORD[((0+32))+rsp]
  2383. mov r10,QWORD[((8+32))+rsp]
  2384. lea rsi,[((0+32))+rsp]
  2385. mov r11,QWORD[((16+32))+rsp]
  2386. mov r12,QWORD[((24+32))+rsp]
  2387. lea rdi,[192+rsp]
  2388. call __ecp_nistz256_mul_montq
  2389. xor r11,r11
  2390. add r12,r12
  2391. lea rsi,[96+rsp]
  2392. adc r13,r13
  2393. mov rax,r12
  2394. adc r8,r8
  2395. adc r9,r9
  2396. mov rbp,r13
  2397. adc r11,0
  2398. sub r12,-1
  2399. mov rcx,r8
  2400. sbb r13,r14
  2401. sbb r8,0
  2402. mov r10,r9
  2403. sbb r9,r15
  2404. sbb r11,0
  2405. cmovc r12,rax
  2406. mov rax,QWORD[rsi]
  2407. cmovc r13,rbp
  2408. mov rbp,QWORD[8+rsi]
  2409. cmovc r8,rcx
  2410. mov rcx,QWORD[16+rsi]
  2411. cmovc r9,r10
  2412. mov r10,QWORD[24+rsi]
  2413. call __ecp_nistz256_subq
  2414. lea rbx,[128+rsp]
  2415. lea rdi,[288+rsp]
  2416. call __ecp_nistz256_sub_fromq
  2417. mov rax,QWORD[((192+0))+rsp]
  2418. mov rbp,QWORD[((192+8))+rsp]
  2419. mov rcx,QWORD[((192+16))+rsp]
  2420. mov r10,QWORD[((192+24))+rsp]
  2421. lea rdi,[320+rsp]
  2422. call __ecp_nistz256_subq
  2423. mov QWORD[rdi],r12
  2424. mov QWORD[8+rdi],r13
  2425. mov QWORD[16+rdi],r8
  2426. mov QWORD[24+rdi],r9
  2427. mov rax,QWORD[128+rsp]
  2428. lea rbx,[128+rsp]
  2429. mov r9,QWORD[((0+224))+rsp]
  2430. mov r10,QWORD[((8+224))+rsp]
  2431. lea rsi,[((0+224))+rsp]
  2432. mov r11,QWORD[((16+224))+rsp]
  2433. mov r12,QWORD[((24+224))+rsp]
  2434. lea rdi,[256+rsp]
  2435. call __ecp_nistz256_mul_montq
  2436. mov rax,QWORD[320+rsp]
  2437. lea rbx,[320+rsp]
  2438. mov r9,QWORD[((0+64))+rsp]
  2439. mov r10,QWORD[((8+64))+rsp]
  2440. lea rsi,[((0+64))+rsp]
  2441. mov r11,QWORD[((16+64))+rsp]
  2442. mov r12,QWORD[((24+64))+rsp]
  2443. lea rdi,[320+rsp]
  2444. call __ecp_nistz256_mul_montq
  2445. lea rbx,[256+rsp]
  2446. lea rdi,[320+rsp]
  2447. call __ecp_nistz256_sub_fromq
  2448. DB 102,72,15,126,199
  2449. movdqa xmm0,xmm5
  2450. movdqa xmm1,xmm5
  2451. pandn xmm0,XMMWORD[352+rsp]
  2452. movdqa xmm2,xmm5
  2453. pandn xmm1,XMMWORD[((352+16))+rsp]
  2454. movdqa xmm3,xmm5
  2455. pand xmm2,XMMWORD[544+rsp]
  2456. pand xmm3,XMMWORD[((544+16))+rsp]
  2457. por xmm2,xmm0
  2458. por xmm3,xmm1
  2459. movdqa xmm0,xmm4
  2460. movdqa xmm1,xmm4
  2461. pandn xmm0,xmm2
  2462. movdqa xmm2,xmm4
  2463. pandn xmm1,xmm3
  2464. movdqa xmm3,xmm4
  2465. pand xmm2,XMMWORD[448+rsp]
  2466. pand xmm3,XMMWORD[((448+16))+rsp]
  2467. por xmm2,xmm0
  2468. por xmm3,xmm1
  2469. movdqu XMMWORD[64+rdi],xmm2
  2470. movdqu XMMWORD[80+rdi],xmm3
  2471. movdqa xmm0,xmm5
  2472. movdqa xmm1,xmm5
  2473. pandn xmm0,XMMWORD[288+rsp]
  2474. movdqa xmm2,xmm5
  2475. pandn xmm1,XMMWORD[((288+16))+rsp]
  2476. movdqa xmm3,xmm5
  2477. pand xmm2,XMMWORD[480+rsp]
  2478. pand xmm3,XMMWORD[((480+16))+rsp]
  2479. por xmm2,xmm0
  2480. por xmm3,xmm1
  2481. movdqa xmm0,xmm4
  2482. movdqa xmm1,xmm4
  2483. pandn xmm0,xmm2
  2484. movdqa xmm2,xmm4
  2485. pandn xmm1,xmm3
  2486. movdqa xmm3,xmm4
  2487. pand xmm2,XMMWORD[384+rsp]
  2488. pand xmm3,XMMWORD[((384+16))+rsp]
  2489. por xmm2,xmm0
  2490. por xmm3,xmm1
  2491. movdqu XMMWORD[rdi],xmm2
  2492. movdqu XMMWORD[16+rdi],xmm3
  2493. movdqa xmm0,xmm5
  2494. movdqa xmm1,xmm5
  2495. pandn xmm0,XMMWORD[320+rsp]
  2496. movdqa xmm2,xmm5
  2497. pandn xmm1,XMMWORD[((320+16))+rsp]
  2498. movdqa xmm3,xmm5
  2499. pand xmm2,XMMWORD[512+rsp]
  2500. pand xmm3,XMMWORD[((512+16))+rsp]
  2501. por xmm2,xmm0
  2502. por xmm3,xmm1
  2503. movdqa xmm0,xmm4
  2504. movdqa xmm1,xmm4
  2505. pandn xmm0,xmm2
  2506. movdqa xmm2,xmm4
  2507. pandn xmm1,xmm3
  2508. movdqa xmm3,xmm4
  2509. pand xmm2,XMMWORD[416+rsp]
  2510. pand xmm3,XMMWORD[((416+16))+rsp]
  2511. por xmm2,xmm0
  2512. por xmm3,xmm1
  2513. movdqu XMMWORD[32+rdi],xmm2
  2514. movdqu XMMWORD[48+rdi],xmm3
  2515. $L$add_doneq:
  2516. lea rsi,[((576+56))+rsp]
  2517. mov r15,QWORD[((-48))+rsi]
  2518. mov r14,QWORD[((-40))+rsi]
  2519. mov r13,QWORD[((-32))+rsi]
  2520. mov r12,QWORD[((-24))+rsi]
  2521. mov rbx,QWORD[((-16))+rsi]
  2522. mov rbp,QWORD[((-8))+rsi]
  2523. lea rsp,[rsi]
  2524. $L$point_addq_epilogue:
  2525. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2526. mov rsi,QWORD[16+rsp]
  2527. DB 0F3h,0C3h ;repret
  2528. $L$SEH_end_GFp_nistz256_point_add:
  2529. global GFp_nistz256_point_add_affine
  2530. ALIGN 32
  2531. GFp_nistz256_point_add_affine:
  2532. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2533. mov QWORD[16+rsp],rsi
  2534. mov rax,rsp
  2535. $L$SEH_begin_GFp_nistz256_point_add_affine:
  2536. mov rdi,rcx
  2537. mov rsi,rdx
  2538. mov rdx,r8
  2539. lea rcx,[GFp_ia32cap_P]
  2540. mov rcx,QWORD[8+rcx]
  2541. and ecx,0x80100
  2542. cmp ecx,0x80100
  2543. je NEAR $L$point_add_affinex
  2544. push rbp
  2545. push rbx
  2546. push r12
  2547. push r13
  2548. push r14
  2549. push r15
  2550. sub rsp,32*15+8
  2551. $L$add_affineq_body:
  2552. movdqu xmm0,XMMWORD[rsi]
  2553. mov rbx,rdx
  2554. movdqu xmm1,XMMWORD[16+rsi]
  2555. movdqu xmm2,XMMWORD[32+rsi]
  2556. movdqu xmm3,XMMWORD[48+rsi]
  2557. movdqu xmm4,XMMWORD[64+rsi]
  2558. movdqu xmm5,XMMWORD[80+rsi]
  2559. mov rax,QWORD[((64+0))+rsi]
  2560. mov r14,QWORD[((64+8))+rsi]
  2561. mov r15,QWORD[((64+16))+rsi]
  2562. mov r8,QWORD[((64+24))+rsi]
  2563. movdqa XMMWORD[320+rsp],xmm0
  2564. movdqa XMMWORD[(320+16)+rsp],xmm1
  2565. movdqa XMMWORD[352+rsp],xmm2
  2566. movdqa XMMWORD[(352+16)+rsp],xmm3
  2567. movdqa XMMWORD[384+rsp],xmm4
  2568. movdqa XMMWORD[(384+16)+rsp],xmm5
  2569. por xmm5,xmm4
  2570. movdqu xmm0,XMMWORD[rbx]
  2571. pshufd xmm3,xmm5,0xb1
  2572. movdqu xmm1,XMMWORD[16+rbx]
  2573. movdqu xmm2,XMMWORD[32+rbx]
  2574. por xmm5,xmm3
  2575. movdqu xmm3,XMMWORD[48+rbx]
  2576. movdqa XMMWORD[416+rsp],xmm0
  2577. pshufd xmm4,xmm5,0x1e
  2578. movdqa XMMWORD[(416+16)+rsp],xmm1
  2579. por xmm1,xmm0
  2580. DB 102,72,15,110,199
  2581. movdqa XMMWORD[448+rsp],xmm2
  2582. movdqa XMMWORD[(448+16)+rsp],xmm3
  2583. por xmm3,xmm2
  2584. por xmm5,xmm4
  2585. pxor xmm4,xmm4
  2586. por xmm3,xmm1
  2587. lea rsi,[((64-0))+rsi]
  2588. lea rdi,[32+rsp]
  2589. call __ecp_nistz256_sqr_montq
  2590. pcmpeqd xmm5,xmm4
  2591. pshufd xmm4,xmm3,0xb1
  2592. mov rax,QWORD[rbx]
  2593. mov r9,r12
  2594. por xmm4,xmm3
  2595. pshufd xmm5,xmm5,0
  2596. pshufd xmm3,xmm4,0x1e
  2597. mov r10,r13
  2598. por xmm4,xmm3
  2599. pxor xmm3,xmm3
  2600. mov r11,r14
  2601. pcmpeqd xmm4,xmm3
  2602. pshufd xmm4,xmm4,0
  2603. lea rsi,[((32-0))+rsp]
  2604. mov r12,r15
  2605. lea rdi,[rsp]
  2606. call __ecp_nistz256_mul_montq
  2607. lea rbx,[320+rsp]
  2608. lea rdi,[64+rsp]
  2609. call __ecp_nistz256_sub_fromq
  2610. mov rax,QWORD[384+rsp]
  2611. lea rbx,[384+rsp]
  2612. mov r9,QWORD[((0+32))+rsp]
  2613. mov r10,QWORD[((8+32))+rsp]
  2614. lea rsi,[((0+32))+rsp]
  2615. mov r11,QWORD[((16+32))+rsp]
  2616. mov r12,QWORD[((24+32))+rsp]
  2617. lea rdi,[32+rsp]
  2618. call __ecp_nistz256_mul_montq
  2619. mov rax,QWORD[384+rsp]
  2620. lea rbx,[384+rsp]
  2621. mov r9,QWORD[((0+64))+rsp]
  2622. mov r10,QWORD[((8+64))+rsp]
  2623. lea rsi,[((0+64))+rsp]
  2624. mov r11,QWORD[((16+64))+rsp]
  2625. mov r12,QWORD[((24+64))+rsp]
  2626. lea rdi,[288+rsp]
  2627. call __ecp_nistz256_mul_montq
  2628. mov rax,QWORD[448+rsp]
  2629. lea rbx,[448+rsp]
  2630. mov r9,QWORD[((0+32))+rsp]
  2631. mov r10,QWORD[((8+32))+rsp]
  2632. lea rsi,[((0+32))+rsp]
  2633. mov r11,QWORD[((16+32))+rsp]
  2634. mov r12,QWORD[((24+32))+rsp]
  2635. lea rdi,[32+rsp]
  2636. call __ecp_nistz256_mul_montq
  2637. lea rbx,[352+rsp]
  2638. lea rdi,[96+rsp]
  2639. call __ecp_nistz256_sub_fromq
  2640. mov rax,QWORD[((0+64))+rsp]
  2641. mov r14,QWORD[((8+64))+rsp]
  2642. lea rsi,[((0+64))+rsp]
  2643. mov r15,QWORD[((16+64))+rsp]
  2644. mov r8,QWORD[((24+64))+rsp]
  2645. lea rdi,[128+rsp]
  2646. call __ecp_nistz256_sqr_montq
  2647. mov rax,QWORD[((0+96))+rsp]
  2648. mov r14,QWORD[((8+96))+rsp]
  2649. lea rsi,[((0+96))+rsp]
  2650. mov r15,QWORD[((16+96))+rsp]
  2651. mov r8,QWORD[((24+96))+rsp]
  2652. lea rdi,[192+rsp]
  2653. call __ecp_nistz256_sqr_montq
  2654. mov rax,QWORD[128+rsp]
  2655. lea rbx,[128+rsp]
  2656. mov r9,QWORD[((0+64))+rsp]
  2657. mov r10,QWORD[((8+64))+rsp]
  2658. lea rsi,[((0+64))+rsp]
  2659. mov r11,QWORD[((16+64))+rsp]
  2660. mov r12,QWORD[((24+64))+rsp]
  2661. lea rdi,[160+rsp]
  2662. call __ecp_nistz256_mul_montq
  2663. mov rax,QWORD[320+rsp]
  2664. lea rbx,[320+rsp]
  2665. mov r9,QWORD[((0+128))+rsp]
  2666. mov r10,QWORD[((8+128))+rsp]
  2667. lea rsi,[((0+128))+rsp]
  2668. mov r11,QWORD[((16+128))+rsp]
  2669. mov r12,QWORD[((24+128))+rsp]
  2670. lea rdi,[rsp]
  2671. call __ecp_nistz256_mul_montq
  2672. xor r11,r11
  2673. add r12,r12
  2674. lea rsi,[192+rsp]
  2675. adc r13,r13
  2676. mov rax,r12
  2677. adc r8,r8
  2678. adc r9,r9
  2679. mov rbp,r13
  2680. adc r11,0
  2681. sub r12,-1
  2682. mov rcx,r8
  2683. sbb r13,r14
  2684. sbb r8,0
  2685. mov r10,r9
  2686. sbb r9,r15
  2687. sbb r11,0
  2688. cmovc r12,rax
  2689. mov rax,QWORD[rsi]
  2690. cmovc r13,rbp
  2691. mov rbp,QWORD[8+rsi]
  2692. cmovc r8,rcx
  2693. mov rcx,QWORD[16+rsi]
  2694. cmovc r9,r10
  2695. mov r10,QWORD[24+rsi]
  2696. call __ecp_nistz256_subq
  2697. lea rbx,[160+rsp]
  2698. lea rdi,[224+rsp]
  2699. call __ecp_nistz256_sub_fromq
  2700. mov rax,QWORD[((0+0))+rsp]
  2701. mov rbp,QWORD[((0+8))+rsp]
  2702. mov rcx,QWORD[((0+16))+rsp]
  2703. mov r10,QWORD[((0+24))+rsp]
  2704. lea rdi,[64+rsp]
  2705. call __ecp_nistz256_subq
  2706. mov QWORD[rdi],r12
  2707. mov QWORD[8+rdi],r13
  2708. mov QWORD[16+rdi],r8
  2709. mov QWORD[24+rdi],r9
  2710. mov rax,QWORD[352+rsp]
  2711. lea rbx,[352+rsp]
  2712. mov r9,QWORD[((0+160))+rsp]
  2713. mov r10,QWORD[((8+160))+rsp]
  2714. lea rsi,[((0+160))+rsp]
  2715. mov r11,QWORD[((16+160))+rsp]
  2716. mov r12,QWORD[((24+160))+rsp]
  2717. lea rdi,[32+rsp]
  2718. call __ecp_nistz256_mul_montq
  2719. mov rax,QWORD[96+rsp]
  2720. lea rbx,[96+rsp]
  2721. mov r9,QWORD[((0+64))+rsp]
  2722. mov r10,QWORD[((8+64))+rsp]
  2723. lea rsi,[((0+64))+rsp]
  2724. mov r11,QWORD[((16+64))+rsp]
  2725. mov r12,QWORD[((24+64))+rsp]
  2726. lea rdi,[64+rsp]
  2727. call __ecp_nistz256_mul_montq
  2728. lea rbx,[32+rsp]
  2729. lea rdi,[256+rsp]
  2730. call __ecp_nistz256_sub_fromq
  2731. DB 102,72,15,126,199
  2732. movdqa xmm0,xmm5
  2733. movdqa xmm1,xmm5
  2734. pandn xmm0,XMMWORD[288+rsp]
  2735. movdqa xmm2,xmm5
  2736. pandn xmm1,XMMWORD[((288+16))+rsp]
  2737. movdqa xmm3,xmm5
  2738. pand xmm2,XMMWORD[$L$ONE_mont]
  2739. pand xmm3,XMMWORD[(($L$ONE_mont+16))]
  2740. por xmm2,xmm0
  2741. por xmm3,xmm1
  2742. movdqa xmm0,xmm4
  2743. movdqa xmm1,xmm4
  2744. pandn xmm0,xmm2
  2745. movdqa xmm2,xmm4
  2746. pandn xmm1,xmm3
  2747. movdqa xmm3,xmm4
  2748. pand xmm2,XMMWORD[384+rsp]
  2749. pand xmm3,XMMWORD[((384+16))+rsp]
  2750. por xmm2,xmm0
  2751. por xmm3,xmm1
  2752. movdqu XMMWORD[64+rdi],xmm2
  2753. movdqu XMMWORD[80+rdi],xmm3
  2754. movdqa xmm0,xmm5
  2755. movdqa xmm1,xmm5
  2756. pandn xmm0,XMMWORD[224+rsp]
  2757. movdqa xmm2,xmm5
  2758. pandn xmm1,XMMWORD[((224+16))+rsp]
  2759. movdqa xmm3,xmm5
  2760. pand xmm2,XMMWORD[416+rsp]
  2761. pand xmm3,XMMWORD[((416+16))+rsp]
  2762. por xmm2,xmm0
  2763. por xmm3,xmm1
  2764. movdqa xmm0,xmm4
  2765. movdqa xmm1,xmm4
  2766. pandn xmm0,xmm2
  2767. movdqa xmm2,xmm4
  2768. pandn xmm1,xmm3
  2769. movdqa xmm3,xmm4
  2770. pand xmm2,XMMWORD[320+rsp]
  2771. pand xmm3,XMMWORD[((320+16))+rsp]
  2772. por xmm2,xmm0
  2773. por xmm3,xmm1
  2774. movdqu XMMWORD[rdi],xmm2
  2775. movdqu XMMWORD[16+rdi],xmm3
  2776. movdqa xmm0,xmm5
  2777. movdqa xmm1,xmm5
  2778. pandn xmm0,XMMWORD[256+rsp]
  2779. movdqa xmm2,xmm5
  2780. pandn xmm1,XMMWORD[((256+16))+rsp]
  2781. movdqa xmm3,xmm5
  2782. pand xmm2,XMMWORD[448+rsp]
  2783. pand xmm3,XMMWORD[((448+16))+rsp]
  2784. por xmm2,xmm0
  2785. por xmm3,xmm1
  2786. movdqa xmm0,xmm4
  2787. movdqa xmm1,xmm4
  2788. pandn xmm0,xmm2
  2789. movdqa xmm2,xmm4
  2790. pandn xmm1,xmm3
  2791. movdqa xmm3,xmm4
  2792. pand xmm2,XMMWORD[352+rsp]
  2793. pand xmm3,XMMWORD[((352+16))+rsp]
  2794. por xmm2,xmm0
  2795. por xmm3,xmm1
  2796. movdqu XMMWORD[32+rdi],xmm2
  2797. movdqu XMMWORD[48+rdi],xmm3
  2798. lea rsi,[((480+56))+rsp]
  2799. mov r15,QWORD[((-48))+rsi]
  2800. mov r14,QWORD[((-40))+rsi]
  2801. mov r13,QWORD[((-32))+rsi]
  2802. mov r12,QWORD[((-24))+rsi]
  2803. mov rbx,QWORD[((-16))+rsi]
  2804. mov rbp,QWORD[((-8))+rsi]
  2805. lea rsp,[rsi]
  2806. $L$add_affineq_epilogue:
  2807. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  2808. mov rsi,QWORD[16+rsp]
  2809. DB 0F3h,0C3h ;repret
  2810. $L$SEH_end_GFp_nistz256_point_add_affine:
  2811. ALIGN 32
  2812. __ecp_nistz256_add_tox:
  2813. xor r11,r11
  2814. adc r12,QWORD[rbx]
  2815. adc r13,QWORD[8+rbx]
  2816. mov rax,r12
  2817. adc r8,QWORD[16+rbx]
  2818. adc r9,QWORD[24+rbx]
  2819. mov rbp,r13
  2820. adc r11,0
  2821. xor r10,r10
  2822. sbb r12,-1
  2823. mov rcx,r8
  2824. sbb r13,r14
  2825. sbb r8,0
  2826. mov r10,r9
  2827. sbb r9,r15
  2828. sbb r11,0
  2829. cmovc r12,rax
  2830. cmovc r13,rbp
  2831. mov QWORD[rdi],r12
  2832. cmovc r8,rcx
  2833. mov QWORD[8+rdi],r13
  2834. cmovc r9,r10
  2835. mov QWORD[16+rdi],r8
  2836. mov QWORD[24+rdi],r9
  2837. DB 0F3h,0C3h ;repret
  2838. ALIGN 32
  2839. __ecp_nistz256_sub_fromx:
  2840. xor r11,r11
  2841. sbb r12,QWORD[rbx]
  2842. sbb r13,QWORD[8+rbx]
  2843. mov rax,r12
  2844. sbb r8,QWORD[16+rbx]
  2845. sbb r9,QWORD[24+rbx]
  2846. mov rbp,r13
  2847. sbb r11,0
  2848. xor r10,r10
  2849. adc r12,-1
  2850. mov rcx,r8
  2851. adc r13,r14
  2852. adc r8,0
  2853. mov r10,r9
  2854. adc r9,r15
  2855. bt r11,0
  2856. cmovnc r12,rax
  2857. cmovnc r13,rbp
  2858. mov QWORD[rdi],r12
  2859. cmovnc r8,rcx
  2860. mov QWORD[8+rdi],r13
  2861. cmovnc r9,r10
  2862. mov QWORD[16+rdi],r8
  2863. mov QWORD[24+rdi],r9
  2864. DB 0F3h,0C3h ;repret
  2865. ALIGN 32
  2866. __ecp_nistz256_subx:
  2867. xor r11,r11
  2868. sbb rax,r12
  2869. sbb rbp,r13
  2870. mov r12,rax
  2871. sbb rcx,r8
  2872. sbb r10,r9
  2873. mov r13,rbp
  2874. sbb r11,0
  2875. xor r9,r9
  2876. adc rax,-1
  2877. mov r8,rcx
  2878. adc rbp,r14
  2879. adc rcx,0
  2880. mov r9,r10
  2881. adc r10,r15
  2882. bt r11,0
  2883. cmovc r12,rax
  2884. cmovc r13,rbp
  2885. cmovc r8,rcx
  2886. cmovc r9,r10
  2887. DB 0F3h,0C3h ;repret
  2888. ALIGN 32
  2889. __ecp_nistz256_mul_by_2x:
  2890. xor r11,r11
  2891. adc r12,r12
  2892. adc r13,r13
  2893. mov rax,r12
  2894. adc r8,r8
  2895. adc r9,r9
  2896. mov rbp,r13
  2897. adc r11,0
  2898. xor r10,r10
  2899. sbb r12,-1
  2900. mov rcx,r8
  2901. sbb r13,r14
  2902. sbb r8,0
  2903. mov r10,r9
  2904. sbb r9,r15
  2905. sbb r11,0
  2906. cmovc r12,rax
  2907. cmovc r13,rbp
  2908. mov QWORD[rdi],r12
  2909. cmovc r8,rcx
  2910. mov QWORD[8+rdi],r13
  2911. cmovc r9,r10
  2912. mov QWORD[16+rdi],r8
  2913. mov QWORD[24+rdi],r9
  2914. DB 0F3h,0C3h ;repret
  2915. ALIGN 32
  2916. GFp_nistz256_point_doublex:
  2917. mov QWORD[8+rsp],rdi ;WIN64 prologue
  2918. mov QWORD[16+rsp],rsi
  2919. mov rax,rsp
  2920. $L$SEH_begin_GFp_nistz256_point_doublex:
  2921. mov rdi,rcx
  2922. mov rsi,rdx
  2923. $L$point_doublex:
  2924. push rbp
  2925. push rbx
  2926. push r12
  2927. push r13
  2928. push r14
  2929. push r15
  2930. sub rsp,32*5+8
  2931. $L$point_doublex_body:
  2932. $L$point_double_shortcutx:
  2933. movdqu xmm0,XMMWORD[rsi]
  2934. mov rbx,rsi
  2935. movdqu xmm1,XMMWORD[16+rsi]
  2936. mov r12,QWORD[((32+0))+rsi]
  2937. mov r13,QWORD[((32+8))+rsi]
  2938. mov r8,QWORD[((32+16))+rsi]
  2939. mov r9,QWORD[((32+24))+rsi]
  2940. mov r14,QWORD[(($L$poly+8))]
  2941. mov r15,QWORD[(($L$poly+24))]
  2942. movdqa XMMWORD[96+rsp],xmm0
  2943. movdqa XMMWORD[(96+16)+rsp],xmm1
  2944. lea r10,[32+rdi]
  2945. lea r11,[64+rdi]
  2946. DB 102,72,15,110,199
  2947. DB 102,73,15,110,202
  2948. DB 102,73,15,110,211
  2949. lea rdi,[rsp]
  2950. call __ecp_nistz256_mul_by_2x
  2951. mov rdx,QWORD[((64+0))+rsi]
  2952. mov r14,QWORD[((64+8))+rsi]
  2953. mov r15,QWORD[((64+16))+rsi]
  2954. mov r8,QWORD[((64+24))+rsi]
  2955. lea rsi,[((64-128))+rsi]
  2956. lea rdi,[64+rsp]
  2957. call __ecp_nistz256_sqr_montx
  2958. mov rdx,QWORD[((0+0))+rsp]
  2959. mov r14,QWORD[((8+0))+rsp]
  2960. lea rsi,[((-128+0))+rsp]
  2961. mov r15,QWORD[((16+0))+rsp]
  2962. mov r8,QWORD[((24+0))+rsp]
  2963. lea rdi,[rsp]
  2964. call __ecp_nistz256_sqr_montx
  2965. mov rdx,QWORD[32+rbx]
  2966. mov r9,QWORD[((64+0))+rbx]
  2967. mov r10,QWORD[((64+8))+rbx]
  2968. mov r11,QWORD[((64+16))+rbx]
  2969. mov r12,QWORD[((64+24))+rbx]
  2970. lea rsi,[((64-128))+rbx]
  2971. lea rbx,[32+rbx]
  2972. DB 102,72,15,126,215
  2973. call __ecp_nistz256_mul_montx
  2974. call __ecp_nistz256_mul_by_2x
  2975. mov r12,QWORD[((96+0))+rsp]
  2976. mov r13,QWORD[((96+8))+rsp]
  2977. lea rbx,[64+rsp]
  2978. mov r8,QWORD[((96+16))+rsp]
  2979. mov r9,QWORD[((96+24))+rsp]
  2980. lea rdi,[32+rsp]
  2981. call __ecp_nistz256_add_tox
  2982. mov r12,QWORD[((96+0))+rsp]
  2983. mov r13,QWORD[((96+8))+rsp]
  2984. lea rbx,[64+rsp]
  2985. mov r8,QWORD[((96+16))+rsp]
  2986. mov r9,QWORD[((96+24))+rsp]
  2987. lea rdi,[64+rsp]
  2988. call __ecp_nistz256_sub_fromx
  2989. mov rdx,QWORD[((0+0))+rsp]
  2990. mov r14,QWORD[((8+0))+rsp]
  2991. lea rsi,[((-128+0))+rsp]
  2992. mov r15,QWORD[((16+0))+rsp]
  2993. mov r8,QWORD[((24+0))+rsp]
  2994. DB 102,72,15,126,207
  2995. call __ecp_nistz256_sqr_montx
  2996. xor r9,r9
  2997. mov rax,r12
  2998. add r12,-1
  2999. mov r10,r13
  3000. adc r13,rsi
  3001. mov rcx,r14
  3002. adc r14,0
  3003. mov r8,r15
  3004. adc r15,rbp
  3005. adc r9,0
  3006. xor rsi,rsi
  3007. test rax,1
  3008. cmovz r12,rax
  3009. cmovz r13,r10
  3010. cmovz r14,rcx
  3011. cmovz r15,r8
  3012. cmovz r9,rsi
  3013. mov rax,r13
  3014. shr r12,1
  3015. shl rax,63
  3016. mov r10,r14
  3017. shr r13,1
  3018. or r12,rax
  3019. shl r10,63
  3020. mov rcx,r15
  3021. shr r14,1
  3022. or r13,r10
  3023. shl rcx,63
  3024. mov QWORD[rdi],r12
  3025. shr r15,1
  3026. mov QWORD[8+rdi],r13
  3027. shl r9,63
  3028. or r14,rcx
  3029. or r15,r9
  3030. mov QWORD[16+rdi],r14
  3031. mov QWORD[24+rdi],r15
  3032. mov rdx,QWORD[64+rsp]
  3033. lea rbx,[64+rsp]
  3034. mov r9,QWORD[((0+32))+rsp]
  3035. mov r10,QWORD[((8+32))+rsp]
  3036. lea rsi,[((-128+32))+rsp]
  3037. mov r11,QWORD[((16+32))+rsp]
  3038. mov r12,QWORD[((24+32))+rsp]
  3039. lea rdi,[32+rsp]
  3040. call __ecp_nistz256_mul_montx
  3041. lea rdi,[128+rsp]
  3042. call __ecp_nistz256_mul_by_2x
  3043. lea rbx,[32+rsp]
  3044. lea rdi,[32+rsp]
  3045. call __ecp_nistz256_add_tox
  3046. mov rdx,QWORD[96+rsp]
  3047. lea rbx,[96+rsp]
  3048. mov r9,QWORD[((0+0))+rsp]
  3049. mov r10,QWORD[((8+0))+rsp]
  3050. lea rsi,[((-128+0))+rsp]
  3051. mov r11,QWORD[((16+0))+rsp]
  3052. mov r12,QWORD[((24+0))+rsp]
  3053. lea rdi,[rsp]
  3054. call __ecp_nistz256_mul_montx
  3055. lea rdi,[128+rsp]
  3056. call __ecp_nistz256_mul_by_2x
  3057. mov rdx,QWORD[((0+32))+rsp]
  3058. mov r14,QWORD[((8+32))+rsp]
  3059. lea rsi,[((-128+32))+rsp]
  3060. mov r15,QWORD[((16+32))+rsp]
  3061. mov r8,QWORD[((24+32))+rsp]
  3062. DB 102,72,15,126,199
  3063. call __ecp_nistz256_sqr_montx
  3064. lea rbx,[128+rsp]
  3065. mov r8,r14
  3066. mov r9,r15
  3067. mov r14,rsi
  3068. mov r15,rbp
  3069. call __ecp_nistz256_sub_fromx
  3070. mov rax,QWORD[((0+0))+rsp]
  3071. mov rbp,QWORD[((0+8))+rsp]
  3072. mov rcx,QWORD[((0+16))+rsp]
  3073. mov r10,QWORD[((0+24))+rsp]
  3074. lea rdi,[rsp]
  3075. call __ecp_nistz256_subx
  3076. mov rdx,QWORD[32+rsp]
  3077. lea rbx,[32+rsp]
  3078. mov r14,r12
  3079. xor ecx,ecx
  3080. mov QWORD[((0+0))+rsp],r12
  3081. mov r10,r13
  3082. mov QWORD[((0+8))+rsp],r13
  3083. cmovz r11,r8
  3084. mov QWORD[((0+16))+rsp],r8
  3085. lea rsi,[((0-128))+rsp]
  3086. cmovz r12,r9
  3087. mov QWORD[((0+24))+rsp],r9
  3088. mov r9,r14
  3089. lea rdi,[rsp]
  3090. call __ecp_nistz256_mul_montx
  3091. DB 102,72,15,126,203
  3092. DB 102,72,15,126,207
  3093. call __ecp_nistz256_sub_fromx
  3094. lea rsi,[((160+56))+rsp]
  3095. mov r15,QWORD[((-48))+rsi]
  3096. mov r14,QWORD[((-40))+rsi]
  3097. mov r13,QWORD[((-32))+rsi]
  3098. mov r12,QWORD[((-24))+rsi]
  3099. mov rbx,QWORD[((-16))+rsi]
  3100. mov rbp,QWORD[((-8))+rsi]
  3101. lea rsp,[rsi]
  3102. $L$point_doublex_epilogue:
  3103. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3104. mov rsi,QWORD[16+rsp]
  3105. DB 0F3h,0C3h ;repret
  3106. $L$SEH_end_GFp_nistz256_point_doublex:
  3107. ALIGN 32
  3108. GFp_nistz256_point_addx:
  3109. mov QWORD[8+rsp],rdi ;WIN64 prologue
  3110. mov QWORD[16+rsp],rsi
  3111. mov rax,rsp
  3112. $L$SEH_begin_GFp_nistz256_point_addx:
  3113. mov rdi,rcx
  3114. mov rsi,rdx
  3115. mov rdx,r8
  3116. $L$point_addx:
  3117. push rbp
  3118. push rbx
  3119. push r12
  3120. push r13
  3121. push r14
  3122. push r15
  3123. sub rsp,32*18+8
  3124. $L$point_addx_body:
  3125. movdqu xmm0,XMMWORD[rsi]
  3126. movdqu xmm1,XMMWORD[16+rsi]
  3127. movdqu xmm2,XMMWORD[32+rsi]
  3128. movdqu xmm3,XMMWORD[48+rsi]
  3129. movdqu xmm4,XMMWORD[64+rsi]
  3130. movdqu xmm5,XMMWORD[80+rsi]
  3131. mov rbx,rsi
  3132. mov rsi,rdx
  3133. movdqa XMMWORD[384+rsp],xmm0
  3134. movdqa XMMWORD[(384+16)+rsp],xmm1
  3135. movdqa XMMWORD[416+rsp],xmm2
  3136. movdqa XMMWORD[(416+16)+rsp],xmm3
  3137. movdqa XMMWORD[448+rsp],xmm4
  3138. movdqa XMMWORD[(448+16)+rsp],xmm5
  3139. por xmm5,xmm4
  3140. movdqu xmm0,XMMWORD[rsi]
  3141. pshufd xmm3,xmm5,0xb1
  3142. movdqu xmm1,XMMWORD[16+rsi]
  3143. movdqu xmm2,XMMWORD[32+rsi]
  3144. por xmm5,xmm3
  3145. movdqu xmm3,XMMWORD[48+rsi]
  3146. mov rdx,QWORD[((64+0))+rsi]
  3147. mov r14,QWORD[((64+8))+rsi]
  3148. mov r15,QWORD[((64+16))+rsi]
  3149. mov r8,QWORD[((64+24))+rsi]
  3150. movdqa XMMWORD[480+rsp],xmm0
  3151. pshufd xmm4,xmm5,0x1e
  3152. movdqa XMMWORD[(480+16)+rsp],xmm1
  3153. movdqu xmm0,XMMWORD[64+rsi]
  3154. movdqu xmm1,XMMWORD[80+rsi]
  3155. movdqa XMMWORD[512+rsp],xmm2
  3156. movdqa XMMWORD[(512+16)+rsp],xmm3
  3157. por xmm5,xmm4
  3158. pxor xmm4,xmm4
  3159. por xmm1,xmm0
  3160. DB 102,72,15,110,199
  3161. lea rsi,[((64-128))+rsi]
  3162. mov QWORD[((544+0))+rsp],rdx
  3163. mov QWORD[((544+8))+rsp],r14
  3164. mov QWORD[((544+16))+rsp],r15
  3165. mov QWORD[((544+24))+rsp],r8
  3166. lea rdi,[96+rsp]
  3167. call __ecp_nistz256_sqr_montx
  3168. pcmpeqd xmm5,xmm4
  3169. pshufd xmm4,xmm1,0xb1
  3170. por xmm4,xmm1
  3171. pshufd xmm5,xmm5,0
  3172. pshufd xmm3,xmm4,0x1e
  3173. por xmm4,xmm3
  3174. pxor xmm3,xmm3
  3175. pcmpeqd xmm4,xmm3
  3176. pshufd xmm4,xmm4,0
  3177. mov rdx,QWORD[((64+0))+rbx]
  3178. mov r14,QWORD[((64+8))+rbx]
  3179. mov r15,QWORD[((64+16))+rbx]
  3180. mov r8,QWORD[((64+24))+rbx]
  3181. DB 102,72,15,110,203
  3182. lea rsi,[((64-128))+rbx]
  3183. lea rdi,[32+rsp]
  3184. call __ecp_nistz256_sqr_montx
  3185. mov rdx,QWORD[544+rsp]
  3186. lea rbx,[544+rsp]
  3187. mov r9,QWORD[((0+96))+rsp]
  3188. mov r10,QWORD[((8+96))+rsp]
  3189. lea rsi,[((-128+96))+rsp]
  3190. mov r11,QWORD[((16+96))+rsp]
  3191. mov r12,QWORD[((24+96))+rsp]
  3192. lea rdi,[224+rsp]
  3193. call __ecp_nistz256_mul_montx
  3194. mov rdx,QWORD[448+rsp]
  3195. lea rbx,[448+rsp]
  3196. mov r9,QWORD[((0+32))+rsp]
  3197. mov r10,QWORD[((8+32))+rsp]
  3198. lea rsi,[((-128+32))+rsp]
  3199. mov r11,QWORD[((16+32))+rsp]
  3200. mov r12,QWORD[((24+32))+rsp]
  3201. lea rdi,[256+rsp]
  3202. call __ecp_nistz256_mul_montx
  3203. mov rdx,QWORD[416+rsp]
  3204. lea rbx,[416+rsp]
  3205. mov r9,QWORD[((0+224))+rsp]
  3206. mov r10,QWORD[((8+224))+rsp]
  3207. lea rsi,[((-128+224))+rsp]
  3208. mov r11,QWORD[((16+224))+rsp]
  3209. mov r12,QWORD[((24+224))+rsp]
  3210. lea rdi,[224+rsp]
  3211. call __ecp_nistz256_mul_montx
  3212. mov rdx,QWORD[512+rsp]
  3213. lea rbx,[512+rsp]
  3214. mov r9,QWORD[((0+256))+rsp]
  3215. mov r10,QWORD[((8+256))+rsp]
  3216. lea rsi,[((-128+256))+rsp]
  3217. mov r11,QWORD[((16+256))+rsp]
  3218. mov r12,QWORD[((24+256))+rsp]
  3219. lea rdi,[256+rsp]
  3220. call __ecp_nistz256_mul_montx
  3221. lea rbx,[224+rsp]
  3222. lea rdi,[64+rsp]
  3223. call __ecp_nistz256_sub_fromx
  3224. or r12,r13
  3225. movdqa xmm2,xmm4
  3226. or r12,r8
  3227. or r12,r9
  3228. por xmm2,xmm5
  3229. DB 102,73,15,110,220
  3230. mov rdx,QWORD[384+rsp]
  3231. lea rbx,[384+rsp]
  3232. mov r9,QWORD[((0+96))+rsp]
  3233. mov r10,QWORD[((8+96))+rsp]
  3234. lea rsi,[((-128+96))+rsp]
  3235. mov r11,QWORD[((16+96))+rsp]
  3236. mov r12,QWORD[((24+96))+rsp]
  3237. lea rdi,[160+rsp]
  3238. call __ecp_nistz256_mul_montx
  3239. mov rdx,QWORD[480+rsp]
  3240. lea rbx,[480+rsp]
  3241. mov r9,QWORD[((0+32))+rsp]
  3242. mov r10,QWORD[((8+32))+rsp]
  3243. lea rsi,[((-128+32))+rsp]
  3244. mov r11,QWORD[((16+32))+rsp]
  3245. mov r12,QWORD[((24+32))+rsp]
  3246. lea rdi,[192+rsp]
  3247. call __ecp_nistz256_mul_montx
  3248. lea rbx,[160+rsp]
  3249. lea rdi,[rsp]
  3250. call __ecp_nistz256_sub_fromx
  3251. or r12,r13
  3252. or r12,r8
  3253. or r12,r9
  3254. DB 102,73,15,126,208
  3255. DB 102,73,15,126,217
  3256. or r12,r8
  3257. DB 0x3e
  3258. jnz NEAR $L$add_proceedx
  3259. test r9,r9
  3260. jz NEAR $L$add_doublex
  3261. DB 102,72,15,126,199
  3262. pxor xmm0,xmm0
  3263. movdqu XMMWORD[rdi],xmm0
  3264. movdqu XMMWORD[16+rdi],xmm0
  3265. movdqu XMMWORD[32+rdi],xmm0
  3266. movdqu XMMWORD[48+rdi],xmm0
  3267. movdqu XMMWORD[64+rdi],xmm0
  3268. movdqu XMMWORD[80+rdi],xmm0
  3269. jmp NEAR $L$add_donex
  3270. ALIGN 32
  3271. $L$add_doublex:
  3272. DB 102,72,15,126,206
  3273. DB 102,72,15,126,199
  3274. add rsp,416
  3275. jmp NEAR $L$point_double_shortcutx
  3276. ALIGN 32
  3277. $L$add_proceedx:
  3278. mov rdx,QWORD[((0+64))+rsp]
  3279. mov r14,QWORD[((8+64))+rsp]
  3280. lea rsi,[((-128+64))+rsp]
  3281. mov r15,QWORD[((16+64))+rsp]
  3282. mov r8,QWORD[((24+64))+rsp]
  3283. lea rdi,[96+rsp]
  3284. call __ecp_nistz256_sqr_montx
  3285. mov rdx,QWORD[448+rsp]
  3286. lea rbx,[448+rsp]
  3287. mov r9,QWORD[((0+0))+rsp]
  3288. mov r10,QWORD[((8+0))+rsp]
  3289. lea rsi,[((-128+0))+rsp]
  3290. mov r11,QWORD[((16+0))+rsp]
  3291. mov r12,QWORD[((24+0))+rsp]
  3292. lea rdi,[352+rsp]
  3293. call __ecp_nistz256_mul_montx
  3294. mov rdx,QWORD[((0+0))+rsp]
  3295. mov r14,QWORD[((8+0))+rsp]
  3296. lea rsi,[((-128+0))+rsp]
  3297. mov r15,QWORD[((16+0))+rsp]
  3298. mov r8,QWORD[((24+0))+rsp]
  3299. lea rdi,[32+rsp]
  3300. call __ecp_nistz256_sqr_montx
  3301. mov rdx,QWORD[544+rsp]
  3302. lea rbx,[544+rsp]
  3303. mov r9,QWORD[((0+352))+rsp]
  3304. mov r10,QWORD[((8+352))+rsp]
  3305. lea rsi,[((-128+352))+rsp]
  3306. mov r11,QWORD[((16+352))+rsp]
  3307. mov r12,QWORD[((24+352))+rsp]
  3308. lea rdi,[352+rsp]
  3309. call __ecp_nistz256_mul_montx
  3310. mov rdx,QWORD[rsp]
  3311. lea rbx,[rsp]
  3312. mov r9,QWORD[((0+32))+rsp]
  3313. mov r10,QWORD[((8+32))+rsp]
  3314. lea rsi,[((-128+32))+rsp]
  3315. mov r11,QWORD[((16+32))+rsp]
  3316. mov r12,QWORD[((24+32))+rsp]
  3317. lea rdi,[128+rsp]
  3318. call __ecp_nistz256_mul_montx
  3319. mov rdx,QWORD[160+rsp]
  3320. lea rbx,[160+rsp]
  3321. mov r9,QWORD[((0+32))+rsp]
  3322. mov r10,QWORD[((8+32))+rsp]
  3323. lea rsi,[((-128+32))+rsp]
  3324. mov r11,QWORD[((16+32))+rsp]
  3325. mov r12,QWORD[((24+32))+rsp]
  3326. lea rdi,[192+rsp]
  3327. call __ecp_nistz256_mul_montx
  3328. xor r11,r11
  3329. add r12,r12
  3330. lea rsi,[96+rsp]
  3331. adc r13,r13
  3332. mov rax,r12
  3333. adc r8,r8
  3334. adc r9,r9
  3335. mov rbp,r13
  3336. adc r11,0
  3337. sub r12,-1
  3338. mov rcx,r8
  3339. sbb r13,r14
  3340. sbb r8,0
  3341. mov r10,r9
  3342. sbb r9,r15
  3343. sbb r11,0
  3344. cmovc r12,rax
  3345. mov rax,QWORD[rsi]
  3346. cmovc r13,rbp
  3347. mov rbp,QWORD[8+rsi]
  3348. cmovc r8,rcx
  3349. mov rcx,QWORD[16+rsi]
  3350. cmovc r9,r10
  3351. mov r10,QWORD[24+rsi]
  3352. call __ecp_nistz256_subx
  3353. lea rbx,[128+rsp]
  3354. lea rdi,[288+rsp]
  3355. call __ecp_nistz256_sub_fromx
  3356. mov rax,QWORD[((192+0))+rsp]
  3357. mov rbp,QWORD[((192+8))+rsp]
  3358. mov rcx,QWORD[((192+16))+rsp]
  3359. mov r10,QWORD[((192+24))+rsp]
  3360. lea rdi,[320+rsp]
  3361. call __ecp_nistz256_subx
  3362. mov QWORD[rdi],r12
  3363. mov QWORD[8+rdi],r13
  3364. mov QWORD[16+rdi],r8
  3365. mov QWORD[24+rdi],r9
  3366. mov rdx,QWORD[128+rsp]
  3367. lea rbx,[128+rsp]
  3368. mov r9,QWORD[((0+224))+rsp]
  3369. mov r10,QWORD[((8+224))+rsp]
  3370. lea rsi,[((-128+224))+rsp]
  3371. mov r11,QWORD[((16+224))+rsp]
  3372. mov r12,QWORD[((24+224))+rsp]
  3373. lea rdi,[256+rsp]
  3374. call __ecp_nistz256_mul_montx
  3375. mov rdx,QWORD[320+rsp]
  3376. lea rbx,[320+rsp]
  3377. mov r9,QWORD[((0+64))+rsp]
  3378. mov r10,QWORD[((8+64))+rsp]
  3379. lea rsi,[((-128+64))+rsp]
  3380. mov r11,QWORD[((16+64))+rsp]
  3381. mov r12,QWORD[((24+64))+rsp]
  3382. lea rdi,[320+rsp]
  3383. call __ecp_nistz256_mul_montx
  3384. lea rbx,[256+rsp]
  3385. lea rdi,[320+rsp]
  3386. call __ecp_nistz256_sub_fromx
  3387. DB 102,72,15,126,199
  3388. movdqa xmm0,xmm5
  3389. movdqa xmm1,xmm5
  3390. pandn xmm0,XMMWORD[352+rsp]
  3391. movdqa xmm2,xmm5
  3392. pandn xmm1,XMMWORD[((352+16))+rsp]
  3393. movdqa xmm3,xmm5
  3394. pand xmm2,XMMWORD[544+rsp]
  3395. pand xmm3,XMMWORD[((544+16))+rsp]
  3396. por xmm2,xmm0
  3397. por xmm3,xmm1
  3398. movdqa xmm0,xmm4
  3399. movdqa xmm1,xmm4
  3400. pandn xmm0,xmm2
  3401. movdqa xmm2,xmm4
  3402. pandn xmm1,xmm3
  3403. movdqa xmm3,xmm4
  3404. pand xmm2,XMMWORD[448+rsp]
  3405. pand xmm3,XMMWORD[((448+16))+rsp]
  3406. por xmm2,xmm0
  3407. por xmm3,xmm1
  3408. movdqu XMMWORD[64+rdi],xmm2
  3409. movdqu XMMWORD[80+rdi],xmm3
  3410. movdqa xmm0,xmm5
  3411. movdqa xmm1,xmm5
  3412. pandn xmm0,XMMWORD[288+rsp]
  3413. movdqa xmm2,xmm5
  3414. pandn xmm1,XMMWORD[((288+16))+rsp]
  3415. movdqa xmm3,xmm5
  3416. pand xmm2,XMMWORD[480+rsp]
  3417. pand xmm3,XMMWORD[((480+16))+rsp]
  3418. por xmm2,xmm0
  3419. por xmm3,xmm1
  3420. movdqa xmm0,xmm4
  3421. movdqa xmm1,xmm4
  3422. pandn xmm0,xmm2
  3423. movdqa xmm2,xmm4
  3424. pandn xmm1,xmm3
  3425. movdqa xmm3,xmm4
  3426. pand xmm2,XMMWORD[384+rsp]
  3427. pand xmm3,XMMWORD[((384+16))+rsp]
  3428. por xmm2,xmm0
  3429. por xmm3,xmm1
  3430. movdqu XMMWORD[rdi],xmm2
  3431. movdqu XMMWORD[16+rdi],xmm3
  3432. movdqa xmm0,xmm5
  3433. movdqa xmm1,xmm5
  3434. pandn xmm0,XMMWORD[320+rsp]
  3435. movdqa xmm2,xmm5
  3436. pandn xmm1,XMMWORD[((320+16))+rsp]
  3437. movdqa xmm3,xmm5
  3438. pand xmm2,XMMWORD[512+rsp]
  3439. pand xmm3,XMMWORD[((512+16))+rsp]
  3440. por xmm2,xmm0
  3441. por xmm3,xmm1
  3442. movdqa xmm0,xmm4
  3443. movdqa xmm1,xmm4
  3444. pandn xmm0,xmm2
  3445. movdqa xmm2,xmm4
  3446. pandn xmm1,xmm3
  3447. movdqa xmm3,xmm4
  3448. pand xmm2,XMMWORD[416+rsp]
  3449. pand xmm3,XMMWORD[((416+16))+rsp]
  3450. por xmm2,xmm0
  3451. por xmm3,xmm1
  3452. movdqu XMMWORD[32+rdi],xmm2
  3453. movdqu XMMWORD[48+rdi],xmm3
  3454. $L$add_donex:
  3455. lea rsi,[((576+56))+rsp]
  3456. mov r15,QWORD[((-48))+rsi]
  3457. mov r14,QWORD[((-40))+rsi]
  3458. mov r13,QWORD[((-32))+rsi]
  3459. mov r12,QWORD[((-24))+rsi]
  3460. mov rbx,QWORD[((-16))+rsi]
  3461. mov rbp,QWORD[((-8))+rsi]
  3462. lea rsp,[rsi]
  3463. $L$point_addx_epilogue:
  3464. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3465. mov rsi,QWORD[16+rsp]
  3466. DB 0F3h,0C3h ;repret
  3467. $L$SEH_end_GFp_nistz256_point_addx:
  3468. ALIGN 32
  3469. GFp_nistz256_point_add_affinex:
  3470. mov QWORD[8+rsp],rdi ;WIN64 prologue
  3471. mov QWORD[16+rsp],rsi
  3472. mov rax,rsp
  3473. $L$SEH_begin_GFp_nistz256_point_add_affinex:
  3474. mov rdi,rcx
  3475. mov rsi,rdx
  3476. mov rdx,r8
  3477. $L$point_add_affinex:
  3478. push rbp
  3479. push rbx
  3480. push r12
  3481. push r13
  3482. push r14
  3483. push r15
  3484. sub rsp,32*15+8
  3485. $L$add_affinex_body:
  3486. movdqu xmm0,XMMWORD[rsi]
  3487. mov rbx,rdx
  3488. movdqu xmm1,XMMWORD[16+rsi]
  3489. movdqu xmm2,XMMWORD[32+rsi]
  3490. movdqu xmm3,XMMWORD[48+rsi]
  3491. movdqu xmm4,XMMWORD[64+rsi]
  3492. movdqu xmm5,XMMWORD[80+rsi]
  3493. mov rdx,QWORD[((64+0))+rsi]
  3494. mov r14,QWORD[((64+8))+rsi]
  3495. mov r15,QWORD[((64+16))+rsi]
  3496. mov r8,QWORD[((64+24))+rsi]
  3497. movdqa XMMWORD[320+rsp],xmm0
  3498. movdqa XMMWORD[(320+16)+rsp],xmm1
  3499. movdqa XMMWORD[352+rsp],xmm2
  3500. movdqa XMMWORD[(352+16)+rsp],xmm3
  3501. movdqa XMMWORD[384+rsp],xmm4
  3502. movdqa XMMWORD[(384+16)+rsp],xmm5
  3503. por xmm5,xmm4
  3504. movdqu xmm0,XMMWORD[rbx]
  3505. pshufd xmm3,xmm5,0xb1
  3506. movdqu xmm1,XMMWORD[16+rbx]
  3507. movdqu xmm2,XMMWORD[32+rbx]
  3508. por xmm5,xmm3
  3509. movdqu xmm3,XMMWORD[48+rbx]
  3510. movdqa XMMWORD[416+rsp],xmm0
  3511. pshufd xmm4,xmm5,0x1e
  3512. movdqa XMMWORD[(416+16)+rsp],xmm1
  3513. por xmm1,xmm0
  3514. DB 102,72,15,110,199
  3515. movdqa XMMWORD[448+rsp],xmm2
  3516. movdqa XMMWORD[(448+16)+rsp],xmm3
  3517. por xmm3,xmm2
  3518. por xmm5,xmm4
  3519. pxor xmm4,xmm4
  3520. por xmm3,xmm1
  3521. lea rsi,[((64-128))+rsi]
  3522. lea rdi,[32+rsp]
  3523. call __ecp_nistz256_sqr_montx
  3524. pcmpeqd xmm5,xmm4
  3525. pshufd xmm4,xmm3,0xb1
  3526. mov rdx,QWORD[rbx]
  3527. mov r9,r12
  3528. por xmm4,xmm3
  3529. pshufd xmm5,xmm5,0
  3530. pshufd xmm3,xmm4,0x1e
  3531. mov r10,r13
  3532. por xmm4,xmm3
  3533. pxor xmm3,xmm3
  3534. mov r11,r14
  3535. pcmpeqd xmm4,xmm3
  3536. pshufd xmm4,xmm4,0
  3537. lea rsi,[((32-128))+rsp]
  3538. mov r12,r15
  3539. lea rdi,[rsp]
  3540. call __ecp_nistz256_mul_montx
  3541. lea rbx,[320+rsp]
  3542. lea rdi,[64+rsp]
  3543. call __ecp_nistz256_sub_fromx
  3544. mov rdx,QWORD[384+rsp]
  3545. lea rbx,[384+rsp]
  3546. mov r9,QWORD[((0+32))+rsp]
  3547. mov r10,QWORD[((8+32))+rsp]
  3548. lea rsi,[((-128+32))+rsp]
  3549. mov r11,QWORD[((16+32))+rsp]
  3550. mov r12,QWORD[((24+32))+rsp]
  3551. lea rdi,[32+rsp]
  3552. call __ecp_nistz256_mul_montx
  3553. mov rdx,QWORD[384+rsp]
  3554. lea rbx,[384+rsp]
  3555. mov r9,QWORD[((0+64))+rsp]
  3556. mov r10,QWORD[((8+64))+rsp]
  3557. lea rsi,[((-128+64))+rsp]
  3558. mov r11,QWORD[((16+64))+rsp]
  3559. mov r12,QWORD[((24+64))+rsp]
  3560. lea rdi,[288+rsp]
  3561. call __ecp_nistz256_mul_montx
  3562. mov rdx,QWORD[448+rsp]
  3563. lea rbx,[448+rsp]
  3564. mov r9,QWORD[((0+32))+rsp]
  3565. mov r10,QWORD[((8+32))+rsp]
  3566. lea rsi,[((-128+32))+rsp]
  3567. mov r11,QWORD[((16+32))+rsp]
  3568. mov r12,QWORD[((24+32))+rsp]
  3569. lea rdi,[32+rsp]
  3570. call __ecp_nistz256_mul_montx
  3571. lea rbx,[352+rsp]
  3572. lea rdi,[96+rsp]
  3573. call __ecp_nistz256_sub_fromx
  3574. mov rdx,QWORD[((0+64))+rsp]
  3575. mov r14,QWORD[((8+64))+rsp]
  3576. lea rsi,[((-128+64))+rsp]
  3577. mov r15,QWORD[((16+64))+rsp]
  3578. mov r8,QWORD[((24+64))+rsp]
  3579. lea rdi,[128+rsp]
  3580. call __ecp_nistz256_sqr_montx
  3581. mov rdx,QWORD[((0+96))+rsp]
  3582. mov r14,QWORD[((8+96))+rsp]
  3583. lea rsi,[((-128+96))+rsp]
  3584. mov r15,QWORD[((16+96))+rsp]
  3585. mov r8,QWORD[((24+96))+rsp]
  3586. lea rdi,[192+rsp]
  3587. call __ecp_nistz256_sqr_montx
  3588. mov rdx,QWORD[128+rsp]
  3589. lea rbx,[128+rsp]
  3590. mov r9,QWORD[((0+64))+rsp]
  3591. mov r10,QWORD[((8+64))+rsp]
  3592. lea rsi,[((-128+64))+rsp]
  3593. mov r11,QWORD[((16+64))+rsp]
  3594. mov r12,QWORD[((24+64))+rsp]
  3595. lea rdi,[160+rsp]
  3596. call __ecp_nistz256_mul_montx
  3597. mov rdx,QWORD[320+rsp]
  3598. lea rbx,[320+rsp]
  3599. mov r9,QWORD[((0+128))+rsp]
  3600. mov r10,QWORD[((8+128))+rsp]
  3601. lea rsi,[((-128+128))+rsp]
  3602. mov r11,QWORD[((16+128))+rsp]
  3603. mov r12,QWORD[((24+128))+rsp]
  3604. lea rdi,[rsp]
  3605. call __ecp_nistz256_mul_montx
  3606. xor r11,r11
  3607. add r12,r12
  3608. lea rsi,[192+rsp]
  3609. adc r13,r13
  3610. mov rax,r12
  3611. adc r8,r8
  3612. adc r9,r9
  3613. mov rbp,r13
  3614. adc r11,0
  3615. sub r12,-1
  3616. mov rcx,r8
  3617. sbb r13,r14
  3618. sbb r8,0
  3619. mov r10,r9
  3620. sbb r9,r15
  3621. sbb r11,0
  3622. cmovc r12,rax
  3623. mov rax,QWORD[rsi]
  3624. cmovc r13,rbp
  3625. mov rbp,QWORD[8+rsi]
  3626. cmovc r8,rcx
  3627. mov rcx,QWORD[16+rsi]
  3628. cmovc r9,r10
  3629. mov r10,QWORD[24+rsi]
  3630. call __ecp_nistz256_subx
  3631. lea rbx,[160+rsp]
  3632. lea rdi,[224+rsp]
  3633. call __ecp_nistz256_sub_fromx
  3634. mov rax,QWORD[((0+0))+rsp]
  3635. mov rbp,QWORD[((0+8))+rsp]
  3636. mov rcx,QWORD[((0+16))+rsp]
  3637. mov r10,QWORD[((0+24))+rsp]
  3638. lea rdi,[64+rsp]
  3639. call __ecp_nistz256_subx
  3640. mov QWORD[rdi],r12
  3641. mov QWORD[8+rdi],r13
  3642. mov QWORD[16+rdi],r8
  3643. mov QWORD[24+rdi],r9
  3644. mov rdx,QWORD[352+rsp]
  3645. lea rbx,[352+rsp]
  3646. mov r9,QWORD[((0+160))+rsp]
  3647. mov r10,QWORD[((8+160))+rsp]
  3648. lea rsi,[((-128+160))+rsp]
  3649. mov r11,QWORD[((16+160))+rsp]
  3650. mov r12,QWORD[((24+160))+rsp]
  3651. lea rdi,[32+rsp]
  3652. call __ecp_nistz256_mul_montx
  3653. mov rdx,QWORD[96+rsp]
  3654. lea rbx,[96+rsp]
  3655. mov r9,QWORD[((0+64))+rsp]
  3656. mov r10,QWORD[((8+64))+rsp]
  3657. lea rsi,[((-128+64))+rsp]
  3658. mov r11,QWORD[((16+64))+rsp]
  3659. mov r12,QWORD[((24+64))+rsp]
  3660. lea rdi,[64+rsp]
  3661. call __ecp_nistz256_mul_montx
  3662. lea rbx,[32+rsp]
  3663. lea rdi,[256+rsp]
  3664. call __ecp_nistz256_sub_fromx
  3665. DB 102,72,15,126,199
  3666. movdqa xmm0,xmm5
  3667. movdqa xmm1,xmm5
  3668. pandn xmm0,XMMWORD[288+rsp]
  3669. movdqa xmm2,xmm5
  3670. pandn xmm1,XMMWORD[((288+16))+rsp]
  3671. movdqa xmm3,xmm5
  3672. pand xmm2,XMMWORD[$L$ONE_mont]
  3673. pand xmm3,XMMWORD[(($L$ONE_mont+16))]
  3674. por xmm2,xmm0
  3675. por xmm3,xmm1
  3676. movdqa xmm0,xmm4
  3677. movdqa xmm1,xmm4
  3678. pandn xmm0,xmm2
  3679. movdqa xmm2,xmm4
  3680. pandn xmm1,xmm3
  3681. movdqa xmm3,xmm4
  3682. pand xmm2,XMMWORD[384+rsp]
  3683. pand xmm3,XMMWORD[((384+16))+rsp]
  3684. por xmm2,xmm0
  3685. por xmm3,xmm1
  3686. movdqu XMMWORD[64+rdi],xmm2
  3687. movdqu XMMWORD[80+rdi],xmm3
  3688. movdqa xmm0,xmm5
  3689. movdqa xmm1,xmm5
  3690. pandn xmm0,XMMWORD[224+rsp]
  3691. movdqa xmm2,xmm5
  3692. pandn xmm1,XMMWORD[((224+16))+rsp]
  3693. movdqa xmm3,xmm5
  3694. pand xmm2,XMMWORD[416+rsp]
  3695. pand xmm3,XMMWORD[((416+16))+rsp]
  3696. por xmm2,xmm0
  3697. por xmm3,xmm1
  3698. movdqa xmm0,xmm4
  3699. movdqa xmm1,xmm4
  3700. pandn xmm0,xmm2
  3701. movdqa xmm2,xmm4
  3702. pandn xmm1,xmm3
  3703. movdqa xmm3,xmm4
  3704. pand xmm2,XMMWORD[320+rsp]
  3705. pand xmm3,XMMWORD[((320+16))+rsp]
  3706. por xmm2,xmm0
  3707. por xmm3,xmm1
  3708. movdqu XMMWORD[rdi],xmm2
  3709. movdqu XMMWORD[16+rdi],xmm3
  3710. movdqa xmm0,xmm5
  3711. movdqa xmm1,xmm5
  3712. pandn xmm0,XMMWORD[256+rsp]
  3713. movdqa xmm2,xmm5
  3714. pandn xmm1,XMMWORD[((256+16))+rsp]
  3715. movdqa xmm3,xmm5
  3716. pand xmm2,XMMWORD[448+rsp]
  3717. pand xmm3,XMMWORD[((448+16))+rsp]
  3718. por xmm2,xmm0
  3719. por xmm3,xmm1
  3720. movdqa xmm0,xmm4
  3721. movdqa xmm1,xmm4
  3722. pandn xmm0,xmm2
  3723. movdqa xmm2,xmm4
  3724. pandn xmm1,xmm3
  3725. movdqa xmm3,xmm4
  3726. pand xmm2,XMMWORD[352+rsp]
  3727. pand xmm3,XMMWORD[((352+16))+rsp]
  3728. por xmm2,xmm0
  3729. por xmm3,xmm1
  3730. movdqu XMMWORD[32+rdi],xmm2
  3731. movdqu XMMWORD[48+rdi],xmm3
  3732. lea rsi,[((480+56))+rsp]
  3733. mov r15,QWORD[((-48))+rsi]
  3734. mov r14,QWORD[((-40))+rsi]
  3735. mov r13,QWORD[((-32))+rsi]
  3736. mov r12,QWORD[((-24))+rsi]
  3737. mov rbx,QWORD[((-16))+rsi]
  3738. mov rbp,QWORD[((-8))+rsi]
  3739. lea rsp,[rsi]
  3740. $L$add_affinex_epilogue:
  3741. mov rdi,QWORD[8+rsp] ;WIN64 epilogue
  3742. mov rsi,QWORD[16+rsp]
  3743. DB 0F3h,0C3h ;repret
  3744. $L$SEH_end_GFp_nistz256_point_add_affinex:
  3745. EXTERN __imp_RtlVirtualUnwind
  3746. ALIGN 16
  3747. short_handler:
  3748. push rsi
  3749. push rdi
  3750. push rbx
  3751. push rbp
  3752. push r12
  3753. push r13
  3754. push r14
  3755. push r15
  3756. pushfq
  3757. sub rsp,64
  3758. mov rax,QWORD[120+r8]
  3759. mov rbx,QWORD[248+r8]
  3760. mov rsi,QWORD[8+r9]
  3761. mov r11,QWORD[56+r9]
  3762. mov r10d,DWORD[r11]
  3763. lea r10,[r10*1+rsi]
  3764. cmp rbx,r10
  3765. jb NEAR $L$common_seh_tail
  3766. mov rax,QWORD[152+r8]
  3767. mov r10d,DWORD[4+r11]
  3768. lea r10,[r10*1+rsi]
  3769. cmp rbx,r10
  3770. jae NEAR $L$common_seh_tail
  3771. lea rax,[16+rax]
  3772. mov r12,QWORD[((-8))+rax]
  3773. mov r13,QWORD[((-16))+rax]
  3774. mov QWORD[216+r8],r12
  3775. mov QWORD[224+r8],r13
  3776. jmp NEAR $L$common_seh_tail
  3777. ALIGN 16
  3778. full_handler:
  3779. push rsi
  3780. push rdi
  3781. push rbx
  3782. push rbp
  3783. push r12
  3784. push r13
  3785. push r14
  3786. push r15
  3787. pushfq
  3788. sub rsp,64
  3789. mov rax,QWORD[120+r8]
  3790. mov rbx,QWORD[248+r8]
  3791. mov rsi,QWORD[8+r9]
  3792. mov r11,QWORD[56+r9]
  3793. mov r10d,DWORD[r11]
  3794. lea r10,[r10*1+rsi]
  3795. cmp rbx,r10
  3796. jb NEAR $L$common_seh_tail
  3797. mov rax,QWORD[152+r8]
  3798. mov r10d,DWORD[4+r11]
  3799. lea r10,[r10*1+rsi]
  3800. cmp rbx,r10
  3801. jae NEAR $L$common_seh_tail
  3802. mov r10d,DWORD[8+r11]
  3803. lea rax,[r10*1+rax]
  3804. mov rbp,QWORD[((-8))+rax]
  3805. mov rbx,QWORD[((-16))+rax]
  3806. mov r12,QWORD[((-24))+rax]
  3807. mov r13,QWORD[((-32))+rax]
  3808. mov r14,QWORD[((-40))+rax]
  3809. mov r15,QWORD[((-48))+rax]
  3810. mov QWORD[144+r8],rbx
  3811. mov QWORD[160+r8],rbp
  3812. mov QWORD[216+r8],r12
  3813. mov QWORD[224+r8],r13
  3814. mov QWORD[232+r8],r14
  3815. mov QWORD[240+r8],r15
  3816. $L$common_seh_tail:
  3817. mov rdi,QWORD[8+rax]
  3818. mov rsi,QWORD[16+rax]
  3819. mov QWORD[152+r8],rax
  3820. mov QWORD[168+r8],rsi
  3821. mov QWORD[176+r8],rdi
  3822. mov rdi,QWORD[40+r9]
  3823. mov rsi,r8
  3824. mov ecx,154
  3825. DD 0xa548f3fc
  3826. mov rsi,r9
  3827. xor rcx,rcx
  3828. mov rdx,QWORD[8+rsi]
  3829. mov r8,QWORD[rsi]
  3830. mov r9,QWORD[16+rsi]
  3831. mov r10,QWORD[40+rsi]
  3832. lea r11,[56+rsi]
  3833. lea r12,[24+rsi]
  3834. mov QWORD[32+rsp],r10
  3835. mov QWORD[40+rsp],r11
  3836. mov QWORD[48+rsp],r12
  3837. mov QWORD[56+rsp],rcx
  3838. call QWORD[__imp_RtlVirtualUnwind]
  3839. mov eax,1
  3840. add rsp,64
  3841. popfq
  3842. pop r15
  3843. pop r14
  3844. pop r13
  3845. pop r12
  3846. pop rbp
  3847. pop rbx
  3848. pop rdi
  3849. pop rsi
  3850. DB 0F3h,0C3h ;repret
  3851. section .pdata rdata align=4
  3852. ALIGN 4
  3853. DD $L$SEH_begin_GFp_nistz256_neg wrt ..imagebase
  3854. DD $L$SEH_end_GFp_nistz256_neg wrt ..imagebase
  3855. DD $L$SEH_info_GFp_nistz256_neg wrt ..imagebase
  3856. DD $L$SEH_begin_GFp_p256_scalar_mul_mont wrt ..imagebase
  3857. DD $L$SEH_end_GFp_p256_scalar_mul_mont wrt ..imagebase
  3858. DD $L$SEH_info_GFp_p256_scalar_mul_mont wrt ..imagebase
  3859. DD $L$SEH_begin_GFp_p256_scalar_sqr_rep_mont wrt ..imagebase
  3860. DD $L$SEH_end_GFp_p256_scalar_sqr_rep_mont wrt ..imagebase
  3861. DD $L$SEH_info_GFp_p256_scalar_sqr_rep_mont wrt ..imagebase
  3862. DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3863. DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3864. DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
  3865. DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3866. DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3867. DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
  3868. DD $L$SEH_begin_GFp_nistz256_mul_mont wrt ..imagebase
  3869. DD $L$SEH_end_GFp_nistz256_mul_mont wrt ..imagebase
  3870. DD $L$SEH_info_GFp_nistz256_mul_mont wrt ..imagebase
  3871. DD $L$SEH_begin_GFp_nistz256_sqr_mont wrt ..imagebase
  3872. DD $L$SEH_end_GFp_nistz256_sqr_mont wrt ..imagebase
  3873. DD $L$SEH_info_GFp_nistz256_sqr_mont wrt ..imagebase
  3874. DD $L$SEH_begin_GFp_nistz256_select_w5 wrt ..imagebase
  3875. DD $L$SEH_end_GFp_nistz256_select_w5 wrt ..imagebase
  3876. DD $L$SEH_info_GFp_nistz256_select_wX wrt ..imagebase
  3877. DD $L$SEH_begin_GFp_nistz256_select_w7 wrt ..imagebase
  3878. DD $L$SEH_end_GFp_nistz256_select_w7 wrt ..imagebase
  3879. DD $L$SEH_info_GFp_nistz256_select_wX wrt ..imagebase
  3880. DD $L$SEH_begin_GFp_nistz256_avx2_select_w5 wrt ..imagebase
  3881. DD $L$SEH_end_GFp_nistz256_avx2_select_w5 wrt ..imagebase
  3882. DD $L$SEH_info_GFp_nistz256_avx2_select_wX wrt ..imagebase
  3883. DD $L$SEH_begin_GFp_nistz256_avx2_select_w7 wrt ..imagebase
  3884. DD $L$SEH_end_GFp_nistz256_avx2_select_w7 wrt ..imagebase
  3885. DD $L$SEH_info_GFp_nistz256_avx2_select_wX wrt ..imagebase
  3886. DD $L$SEH_begin_GFp_nistz256_point_double wrt ..imagebase
  3887. DD $L$SEH_end_GFp_nistz256_point_double wrt ..imagebase
  3888. DD $L$SEH_info_GFp_nistz256_point_double wrt ..imagebase
  3889. DD $L$SEH_begin_GFp_nistz256_point_add wrt ..imagebase
  3890. DD $L$SEH_end_GFp_nistz256_point_add wrt ..imagebase
  3891. DD $L$SEH_info_GFp_nistz256_point_add wrt ..imagebase
  3892. DD $L$SEH_begin_GFp_nistz256_point_add_affine wrt ..imagebase
  3893. DD $L$SEH_end_GFp_nistz256_point_add_affine wrt ..imagebase
  3894. DD $L$SEH_info_GFp_nistz256_point_add_affine wrt ..imagebase
  3895. DD $L$SEH_begin_GFp_nistz256_point_doublex wrt ..imagebase
  3896. DD $L$SEH_end_GFp_nistz256_point_doublex wrt ..imagebase
  3897. DD $L$SEH_info_GFp_nistz256_point_doublex wrt ..imagebase
  3898. DD $L$SEH_begin_GFp_nistz256_point_addx wrt ..imagebase
  3899. DD $L$SEH_end_GFp_nistz256_point_addx wrt ..imagebase
  3900. DD $L$SEH_info_GFp_nistz256_point_addx wrt ..imagebase
  3901. DD $L$SEH_begin_GFp_nistz256_point_add_affinex wrt ..imagebase
  3902. DD $L$SEH_end_GFp_nistz256_point_add_affinex wrt ..imagebase
  3903. DD $L$SEH_info_GFp_nistz256_point_add_affinex wrt ..imagebase
  3904. section .xdata rdata align=8
  3905. ALIGN 8
  3906. $L$SEH_info_GFp_nistz256_neg:
  3907. DB 9,0,0,0
  3908. DD short_handler wrt ..imagebase
  3909. DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
  3910. $L$SEH_info_GFp_p256_scalar_mul_mont:
  3911. DB 9,0,0,0
  3912. DD full_handler wrt ..imagebase
  3913. DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
  3914. DD 48,0
  3915. $L$SEH_info_GFp_p256_scalar_sqr_rep_mont:
  3916. DB 9,0,0,0
  3917. DD full_handler wrt ..imagebase
  3918. DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
  3919. DD 48,0
  3920. $L$SEH_info_ecp_nistz256_ord_mul_montx:
  3921. DB 9,0,0,0
  3922. DD full_handler wrt ..imagebase
  3923. DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
  3924. DD 48,0
  3925. $L$SEH_info_ecp_nistz256_ord_sqr_montx:
  3926. DB 9,0,0,0
  3927. DD full_handler wrt ..imagebase
  3928. DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
  3929. DD 48,0
  3930. $L$SEH_info_GFp_nistz256_mul_mont:
  3931. DB 9,0,0,0
  3932. DD full_handler wrt ..imagebase
  3933. DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
  3934. DD 48,0
  3935. $L$SEH_info_GFp_nistz256_sqr_mont:
  3936. DB 9,0,0,0
  3937. DD full_handler wrt ..imagebase
  3938. DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
  3939. DD 48,0
  3940. $L$SEH_info_GFp_nistz256_select_wX:
  3941. DB 0x01,0x33,0x16,0x00
  3942. DB 0x33,0xf8,0x09,0x00
  3943. DB 0x2e,0xe8,0x08,0x00
  3944. DB 0x29,0xd8,0x07,0x00
  3945. DB 0x24,0xc8,0x06,0x00
  3946. DB 0x1f,0xb8,0x05,0x00
  3947. DB 0x1a,0xa8,0x04,0x00
  3948. DB 0x15,0x98,0x03,0x00
  3949. DB 0x10,0x88,0x02,0x00
  3950. DB 0x0c,0x78,0x01,0x00
  3951. DB 0x08,0x68,0x00,0x00
  3952. DB 0x04,0x01,0x15,0x00
  3953. ALIGN 8
  3954. $L$SEH_info_GFp_nistz256_avx2_select_wX:
  3955. DB 0x01,0x36,0x17,0x0b
  3956. DB 0x36,0xf8,0x09,0x00
  3957. DB 0x31,0xe8,0x08,0x00
  3958. DB 0x2c,0xd8,0x07,0x00
  3959. DB 0x27,0xc8,0x06,0x00
  3960. DB 0x22,0xb8,0x05,0x00
  3961. DB 0x1d,0xa8,0x04,0x00
  3962. DB 0x18,0x98,0x03,0x00
  3963. DB 0x13,0x88,0x02,0x00
  3964. DB 0x0e,0x78,0x01,0x00
  3965. DB 0x09,0x68,0x00,0x00
  3966. DB 0x04,0x01,0x15,0x00
  3967. DB 0x00,0xb3,0x00,0x00
  3968. ALIGN 8
  3969. $L$SEH_info_GFp_nistz256_point_double:
  3970. DB 9,0,0,0
  3971. DD full_handler wrt ..imagebase
  3972. DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
  3973. DD 32*5+56,0
  3974. $L$SEH_info_GFp_nistz256_point_add:
  3975. DB 9,0,0,0
  3976. DD full_handler wrt ..imagebase
  3977. DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
  3978. DD 32*18+56,0
  3979. $L$SEH_info_GFp_nistz256_point_add_affine:
  3980. DB 9,0,0,0
  3981. DD full_handler wrt ..imagebase
  3982. DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
  3983. DD 32*15+56,0
  3984. ALIGN 8
  3985. $L$SEH_info_GFp_nistz256_point_doublex:
  3986. DB 9,0,0,0
  3987. DD full_handler wrt ..imagebase
  3988. DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
  3989. DD 32*5+56,0
  3990. $L$SEH_info_GFp_nistz256_point_addx:
  3991. DB 9,0,0,0
  3992. DD full_handler wrt ..imagebase
  3993. DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
  3994. DD 32*18+56,0
  3995. $L$SEH_info_GFp_nistz256_point_add_affinex:
  3996. DB 9,0,0,0
  3997. DD full_handler wrt ..imagebase
  3998. DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
  3999. DD 32*15+56,0