salsa2012.s 63 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231
  1. # qhasm: int32 input_0
  2. # qhasm: int32 input_1
  3. # qhasm: int32 input_2
  4. # qhasm: int32 input_3
  5. # qhasm: stack32 input_4
  6. # qhasm: stack32 input_5
  7. # qhasm: stack32 input_6
  8. # qhasm: stack32 input_7
  9. # qhasm: int32 caller_r4
  10. # qhasm: int32 caller_r5
  11. # qhasm: int32 caller_r6
  12. # qhasm: int32 caller_r7
  13. # qhasm: int32 caller_r8
  14. # qhasm: int32 caller_r9
  15. # qhasm: int32 caller_r10
  16. # qhasm: int32 caller_r11
  17. # qhasm: int32 caller_r14
  18. # qhasm: reg128 caller_q4
  19. # qhasm: reg128 caller_q5
  20. # qhasm: reg128 caller_q6
  21. # qhasm: reg128 caller_q7
  22. # qhasm: startcode
  23. .fpu neon
  24. .text
  25. # qhasm: constant sigma:
  26. .align 2
  27. sigma:
  28. # qhasm: const32 1634760805
  29. .word 1634760805
  30. # qhasm: const32 857760878
  31. .word 857760878
  32. # qhasm: const32 2036477234
  33. .word 2036477234
  34. # qhasm: const32 1797285236
  35. .word 1797285236
  36. # qhasm: int128 abab
  37. # qhasm: int128 diag0
  38. # qhasm: int128 diag1
  39. # qhasm: int128 diag2
  40. # qhasm: int128 diag3
  41. # qhasm: int128 a0
  42. # qhasm: int128 a1
  43. # qhasm: int128 a2
  44. # qhasm: int128 a3
  45. # qhasm: int128 b0
  46. # qhasm: int128 b1
  47. # qhasm: int128 b2
  48. # qhasm: int128 b3
  49. # qhasm: int128 next_diag0
  50. # qhasm: int128 next_diag1
  51. # qhasm: int128 next_diag2
  52. # qhasm: int128 next_diag3
  53. # qhasm: int128 next_a0
  54. # qhasm: int128 next_a1
  55. # qhasm: int128 next_a2
  56. # qhasm: int128 next_a3
  57. # qhasm: int128 next_b0
  58. # qhasm: int128 next_b1
  59. # qhasm: int128 next_b2
  60. # qhasm: int128 next_b3
  61. # qhasm: int128 x0x5x10x15
  62. # qhasm: int128 x12x1x6x11
  63. # qhasm: int128 x8x13x2x7
  64. # qhasm: int128 x4x9x14x3
  65. # qhasm: int128 x0x1x10x11
  66. # qhasm: int128 x12x13x6x7
  67. # qhasm: int128 x8x9x2x3
  68. # qhasm: int128 x4x5x14x15
  69. # qhasm: int128 x0x1x2x3
  70. # qhasm: int128 x4x5x6x7
  71. # qhasm: int128 x8x9x10x11
  72. # qhasm: int128 x12x13x14x15
  73. # qhasm: int128 m0m1m2m3
  74. # qhasm: int128 m4m5m6m7
  75. # qhasm: int128 m8m9m10m11
  76. # qhasm: int128 m12m13m14m15
  77. # qhasm: int128 start0
  78. # qhasm: int128 start1
  79. # qhasm: int128 start2
  80. # qhasm: int128 start3
  81. # qhasm: stack128 stack_start3
  82. # qhasm: stack128 next_start2
  83. # qhasm: stack128 next_start3
  84. # qhasm: int128 k0k1k2k3
  85. # qhasm: int128 k4k5k6k7
  86. # qhasm: int128 k1n1k7k2
  87. # qhasm: int128 n2n3n3n2
  88. # qhasm: int128 k2k3k6k7
  89. # qhasm: int128 nextblock
  90. # qhasm: stack128 stack_q4
  91. # qhasm: stack128 stack_q5
  92. # qhasm: stack128 stack_q6
  93. # qhasm: stack128 stack_q7
  94. # qhasm: stack32 stack_r4
  95. # qhasm: stack128 k2k3k6k7_stack
  96. # qhasm: stack128 k1n1k7k2_stack
  97. # qhasm: stack512 tmp
  98. # qhasm: stack32 savec
  99. # qhasm: int32 i
  100. # qhasm: int32 ci
  101. # qhasm: int32 mi
  102. # qhasm: enter zt_salsa2012_armneon3_xor
  103. .align 2
  104. .global _zt_salsa2012_armneon3_xor
  105. .global zt_salsa2012_armneon3_xor
  106. .type _zt_salsa2012_armneon3_xor STT_FUNC
  107. .type zt_salsa2012_armneon3_xor STT_FUNC
  108. _zt_salsa2012_armneon3_xor:
  109. zt_salsa2012_armneon3_xor:
  110. sub sp,sp,#256
  111. # qhasm: new stack_q4
  112. # qhasm: new stack_q5
  113. # qhasm: new stack_q6
  114. # qhasm: new stack_q7
  115. # qhasm: stack_q4 bot = caller_q4 bot
  116. # asm 1: vstr <caller_q4=reg128#5%bot,<stack_q4=stack128#1
  117. # asm 2: vstr <caller_q4=d8,<stack_q4=[sp,#96]
  118. vstr d8,[sp,#96]
  119. # qhasm: stack_q4 top = caller_q4 top
  120. # asm 1: vstr <caller_q4=reg128#5%top,<stack_q4=stack128#1
  121. # asm 2: vstr <caller_q4=d9,<stack_q4=[sp,#104]
  122. vstr d9,[sp,#104]
  123. # qhasm: stack_q5 bot = caller_q5 bot
  124. # asm 1: vstr <caller_q5=reg128#6%bot,<stack_q5=stack128#2
  125. # asm 2: vstr <caller_q5=d10,<stack_q5=[sp,#112]
  126. vstr d10,[sp,#112]
  127. # qhasm: stack_q5 top = caller_q5 top
  128. # asm 1: vstr <caller_q5=reg128#6%top,<stack_q5=stack128#2
  129. # asm 2: vstr <caller_q5=d11,<stack_q5=[sp,#120]
  130. vstr d11,[sp,#120]
  131. # qhasm: stack_q6 bot = caller_q6 bot
  132. # asm 1: vstr <caller_q6=reg128#7%bot,<stack_q6=stack128#3
  133. # asm 2: vstr <caller_q6=d12,<stack_q6=[sp,#128]
  134. vstr d12,[sp,#128]
  135. # qhasm: stack_q6 top = caller_q6 top
  136. # asm 1: vstr <caller_q6=reg128#7%top,<stack_q6=stack128#3
  137. # asm 2: vstr <caller_q6=d13,<stack_q6=[sp,#136]
  138. vstr d13,[sp,#136]
  139. # qhasm: stack_q7 bot = caller_q7 bot
  140. # asm 1: vstr <caller_q7=reg128#8%bot,<stack_q7=stack128#4
  141. # asm 2: vstr <caller_q7=d14,<stack_q7=[sp,#144]
  142. vstr d14,[sp,#144]
  143. # qhasm: stack_q7 top = caller_q7 top
  144. # asm 1: vstr <caller_q7=reg128#8%top,<stack_q7=stack128#4
  145. # asm 2: vstr <caller_q7=d15,<stack_q7=[sp,#152]
  146. vstr d15,[sp,#152]
  147. # qhasm: new stack_r4
  148. # qhasm: stack_r4 = caller_r4
  149. # asm 1: str <caller_r4=int32#5,>stack_r4=stack32#2
  150. # asm 2: str <caller_r4=r4,>stack_r4=[sp,#68]
  151. str r4,[sp,#68]
  152. # qhasm: int32 c
  153. # qhasm: c = input_0
  154. # asm 1: mov >c=int32#1,<input_0=int32#1
  155. # asm 2: mov >c=r0,<input_0=r0
  156. mov r0,r0
  157. # qhasm: int32 m
  158. # qhasm: m = input_1
  159. # asm 1: mov >m=int32#2,<input_1=int32#2
  160. # asm 2: mov >m=r1,<input_1=r1
  161. mov r1,r1
  162. # qhasm: int32 mlenlow
  163. # qhasm: mlenlow = input_2
  164. # asm 1: mov >mlenlow=int32#3,<input_2=int32#3
  165. # asm 2: mov >mlenlow=r2,<input_2=r2
  166. mov r2,r2
  167. # qhasm: int32 mlenhigh
  168. # qhasm: mlenhigh = input_3
  169. # asm 1: mov >mlenhigh=int32#4,<input_3=int32#4
  170. # asm 2: mov >mlenhigh=r3,<input_3=r3
  171. mov r3,r3
  172. # qhasm: int32 n
  173. # qhasm: n = input_4
  174. # asm 1: ldr >n=int32#5,<input_4=stack32#arg1
  175. # asm 2: ldr >n=r4,<input_4=[sp,#256]
  176. ldr r4,[sp,#256]
  177. # qhasm: int32 k
  178. # qhasm: k = input_5
  179. # asm 1: ldr >k=int32#13,<input_5=stack32#arg2
  180. # asm 2: ldr >k=r12,<input_5=[sp,#260]
  181. ldr r12,[sp,#260]
  182. # qhasm: k0k1k2k3 = mem128[k]
  183. # asm 1: vld1.8 {>k0k1k2k3=reg128#1%bot->k0k1k2k3=reg128#1%top},[<k=int32#13]
  184. # asm 2: vld1.8 {>k0k1k2k3=d0->k0k1k2k3=d1},[<k=r12]
  185. vld1.8 {d0-d1},[r12]
  186. # qhasm: k += 16
  187. # asm 1: add <k=int32#13,<k=int32#13,#16
  188. # asm 2: add <k=r12,<k=r12,#16
  189. add r12,r12,#16
  190. # qhasm: k4k5k6k7 = mem128[k]
  191. # asm 1: vld1.8 {>k4k5k6k7=reg128#2%bot->k4k5k6k7=reg128#2%top},[<k=int32#13]
  192. # asm 2: vld1.8 {>k4k5k6k7=d2->k4k5k6k7=d3},[<k=r12]
  193. vld1.8 {d2-d3},[r12]
  194. # qhasm: i = sigma
  195. # asm 1: ldr >i=int32#13,=sigma
  196. # asm 2: ldr >i=r12,=sigma
  197. ldr r12,=sigma
  198. # qhasm: start0 = mem128[i]
  199. # asm 1: vld1.8 {>start0=reg128#3%bot->start0=reg128#3%top},[<i=int32#13]
  200. # asm 2: vld1.8 {>start0=d4->start0=d5},[<i=r12]
  201. vld1.8 {d4-d5},[r12]
  202. # qhasm: 2x start1 = 0
  203. # asm 1: vmov.i64 >start1=reg128#4,#0
  204. # asm 2: vmov.i64 >start1=q3,#0
  205. vmov.i64 q3,#0
  206. # qhasm: start1 bot = mem64[n]
  207. # asm 1: vld1.8 {<start1=reg128#4%bot},[<n=int32#5]
  208. # asm 2: vld1.8 {<start1=d6},[<n=r4]
  209. vld1.8 {d6},[r4]
  210. # qhasm: start1 = start1[1] start1[0] start1[2,3]
  211. # asm 1: vext.32 <start1=reg128#4%bot,<start1=reg128#4%bot,<start1=reg128#4%bot,#1
  212. # asm 2: vext.32 <start1=d6,<start1=d6,<start1=d6,#1
  213. vext.32 d6,d6,d6,#1
  214. # qhasm: start1 = start1[0,1] start1[1] k4k5k6k7[0]
  215. # asm 1: vext.32 <start1=reg128#4%top,<start1=reg128#4%bot,<k4k5k6k7=reg128#2%bot,#1
  216. # asm 2: vext.32 <start1=d7,<start1=d6,<k4k5k6k7=d2,#1
  217. vext.32 d7,d6,d2,#1
  218. # qhasm: new k1n1k7k2
  219. # qhasm: k1n1k7k2 = k0k1k2k3[1] start1[0] k1n1k7k2[2,3]
  220. # asm 1: vext.32 <k1n1k7k2=reg128#5%bot,<k0k1k2k3=reg128#1%bot,<start1=reg128#4%bot,#1
  221. # asm 2: vext.32 <k1n1k7k2=d8,<k0k1k2k3=d0,<start1=d6,#1
  222. vext.32 d8,d0,d6,#1
  223. # qhasm: k1n1k7k2 = k1n1k7k2[0,1] k4k5k6k7[3] k0k1k2k3[2]
  224. # asm 1: vext.32 <k1n1k7k2=reg128#5%top,<k4k5k6k7=reg128#2%top,<k0k1k2k3=reg128#1%top,#1
  225. # asm 2: vext.32 <k1n1k7k2=d9,<k4k5k6k7=d3,<k0k1k2k3=d1,#1
  226. vext.32 d9,d3,d1,#1
  227. # qhasm: k2k3k6k7 = k4k5k6k7
  228. # asm 1: vmov >k2k3k6k7=reg128#6,<k4k5k6k7=reg128#2
  229. # asm 2: vmov >k2k3k6k7=q5,<k4k5k6k7=q1
  230. vmov q5,q1
  231. # qhasm: k2k3k6k7 = k0k1k2k3[2,3] k2k3k6k7[2,3]
  232. # asm 1: vmov <k2k3k6k7=reg128#6%bot,<k0k1k2k3=reg128#1%top
  233. # asm 2: vmov <k2k3k6k7=d10,<k0k1k2k3=d1
  234. vmov d10,d1
  235. # qhasm: start1 = k4k5k6k7[1] k0k1k2k3[0] start1[2,3]
  236. # asm 1: vext.32 <start1=reg128#4%bot,<k4k5k6k7=reg128#2%bot,<k0k1k2k3=reg128#1%bot,#1
  237. # asm 2: vext.32 <start1=d6,<k4k5k6k7=d2,<k0k1k2k3=d0,#1
  238. vext.32 d6,d2,d0,#1
  239. # qhasm: new k2k3k6k7_stack
  240. # qhasm: k2k3k6k7_stack bot = k2k3k6k7 bot
  241. # asm 1: vstr <k2k3k6k7=reg128#6%bot,<k2k3k6k7_stack=stack128#5
  242. # asm 2: vstr <k2k3k6k7=d10,<k2k3k6k7_stack=[sp,#160]
  243. vstr d10,[sp,#160]
  244. # qhasm: k2k3k6k7_stack top = k2k3k6k7 top
  245. # asm 1: vstr <k2k3k6k7=reg128#6%top,<k2k3k6k7_stack=stack128#5
  246. # asm 2: vstr <k2k3k6k7=d11,<k2k3k6k7_stack=[sp,#168]
  247. vstr d11,[sp,#168]
  248. # qhasm: new k1n1k7k2_stack
  249. # qhasm: k1n1k7k2_stack bot = k1n1k7k2 bot
  250. # asm 1: vstr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
  251. # asm 2: vstr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
  252. vstr d8,[sp,#176]
  253. # qhasm: k1n1k7k2_stack top = k1n1k7k2 top
  254. # asm 1: vstr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
  255. # asm 2: vstr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
  256. vstr d9,[sp,#184]
  257. # qhasm: 2x n2n3n3n2 = 0
  258. # asm 1: vmov.i64 >n2n3n3n2=reg128#1,#0
  259. # asm 2: vmov.i64 >n2n3n3n2=q0,#0
  260. vmov.i64 q0,#0
  261. # qhasm: unsigned<? mlenlow - 128
  262. # asm 1: cmp <mlenlow=int32#3,#128
  263. # asm 2: cmp <mlenlow=r2,#128
  264. cmp r2,#128
  265. # qhasm: goto mlenlowbelow128 if unsigned<
  266. blo ._mlenlowbelow128
  267. # qhasm: mlenatleast128:
  268. ._mlenatleast128:
  269. # qhasm: new k2k3k6k7
  270. # qhasm: k2k3k6k7 bot = k2k3k6k7_stack bot
  271. # asm 1: vldr <k2k3k6k7=reg128#2%bot,<k2k3k6k7_stack=stack128#5
  272. # asm 2: vldr <k2k3k6k7=d2,<k2k3k6k7_stack=[sp,#160]
  273. vldr d2,[sp,#160]
  274. # qhasm: k2k3k6k7 top = k2k3k6k7_stack top
  275. # asm 1: vldr <k2k3k6k7=reg128#2%top,<k2k3k6k7_stack=stack128#5
  276. # asm 2: vldr <k2k3k6k7=d3,<k2k3k6k7_stack=[sp,#168]
  277. vldr d3,[sp,#168]
  278. # qhasm: new k1n1k7k2
  279. # qhasm: k1n1k7k2 bot = k1n1k7k2_stack bot
  280. # asm 1: vldr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
  281. # asm 2: vldr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
  282. vldr d8,[sp,#176]
  283. # qhasm: k1n1k7k2 top = k1n1k7k2_stack top
  284. # asm 1: vldr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
  285. # asm 2: vldr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
  286. vldr d9,[sp,#184]
  287. # qhasm: n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
  288. # asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
  289. # asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
  290. vext.32 d1,d0,d0,#1
  291. # qhasm: new diag2
  292. # qhasm: diag2 = diag2[0,1] k1n1k7k2[0,1]
  293. # asm 1: vmov <diag2=reg128#6%top,<k1n1k7k2=reg128#5%bot
  294. # asm 2: vmov <diag2=d11,<k1n1k7k2=d8
  295. vmov d11,d8
  296. # qhasm: diag2 = n2n3n3n2[3] k2k3k6k7[2] diag2[2,3]
  297. # asm 1: vext.32 <diag2=reg128#6%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
  298. # asm 2: vext.32 <diag2=d10,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
  299. vext.32 d10,d1,d3,#1
  300. # qhasm: new diag3
  301. # qhasm: diag3 = diag3[0,1] k1n1k7k2[2,3]
  302. # asm 1: vmov <diag3=reg128#7%top,<k1n1k7k2=reg128#5%top
  303. # asm 2: vmov <diag3=d13,<k1n1k7k2=d9
  304. vmov d13,d9
  305. # qhasm: diag3 = k2k3k6k7[1] n2n3n3n2[2] diag3[2,3]
  306. # asm 1: vext.32 <diag3=reg128#7%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
  307. # asm 2: vext.32 <diag3=d12,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
  308. vext.32 d12,d2,d1,#1
  309. # qhasm: diag0 = start0
  310. # asm 1: vmov >diag0=reg128#8,<start0=reg128#3
  311. # asm 2: vmov >diag0=q7,<start0=q2
  312. vmov q7,q2
  313. # qhasm: diag1 = start1
  314. # asm 1: vmov >diag1=reg128#9,<start1=reg128#4
  315. # asm 2: vmov >diag1=q8,<start1=q3
  316. vmov q8,q3
  317. # qhasm: start2 = diag2
  318. # asm 1: vmov >start2=reg128#10,<diag2=reg128#6
  319. # asm 2: vmov >start2=q9,<diag2=q5
  320. vmov q9,q5
  321. # qhasm: new stack_start3
  322. # qhasm: stack_start3 bot = diag3 bot
  323. # asm 1: vstr <diag3=reg128#7%bot,<stack_start3=stack128#9
  324. # asm 2: vstr <diag3=d12,<stack_start3=[sp,#224]
  325. vstr d12,[sp,#224]
  326. # qhasm: stack_start3 top = diag3 top
  327. # asm 1: vstr <diag3=reg128#7%top,<stack_start3=stack128#9
  328. # asm 2: vstr <diag3=d13,<stack_start3=[sp,#232]
  329. vstr d13,[sp,#232]
  330. # qhasm: 2x nextblock = 0xff
  331. # asm 1: vmov.i64 >nextblock=reg128#11,#0xff
  332. # asm 2: vmov.i64 >nextblock=q10,#0xff
  333. vmov.i64 q10,#0xff
  334. # qhasm: 4x nextblock unsigned>>= 7
  335. # asm 1: vshr.u32 >nextblock=reg128#11,<nextblock=reg128#11,#7
  336. # asm 2: vshr.u32 >nextblock=q10,<nextblock=q10,#7
  337. vshr.u32 q10,q10,#7
  338. # qhasm: 2x n2n3n3n2 += nextblock
  339. # asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#11
  340. # asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q10
  341. vadd.i64 q0,q0,q10
  342. # qhasm: n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
  343. # asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
  344. # asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
  345. vext.32 d1,d0,d0,#1
  346. # qhasm: new next_diag2
  347. # qhasm: next_diag2 = next_diag2[0,1] k1n1k7k2[0,1]
  348. # asm 1: vmov <next_diag2=reg128#12%top,<k1n1k7k2=reg128#5%bot
  349. # asm 2: vmov <next_diag2=d23,<k1n1k7k2=d8
  350. vmov d23,d8
  351. # qhasm: next_diag2 = n2n3n3n2[3] k2k3k6k7[2] next_diag2[2,3]
  352. # asm 1: vext.32 <next_diag2=reg128#12%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
  353. # asm 2: vext.32 <next_diag2=d22,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
  354. vext.32 d22,d1,d3,#1
  355. # qhasm: new next_diag3
  356. # qhasm: next_diag3 = next_diag3[0,1] k1n1k7k2[2,3]
  357. # asm 1: vmov <next_diag3=reg128#13%top,<k1n1k7k2=reg128#5%top
  358. # asm 2: vmov <next_diag3=d25,<k1n1k7k2=d9
  359. vmov d25,d9
  360. # qhasm: next_diag3 = k2k3k6k7[1] n2n3n3n2[2] next_diag3[2,3]
  361. # asm 1: vext.32 <next_diag3=reg128#13%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
  362. # asm 2: vext.32 <next_diag3=d24,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
  363. vext.32 d24,d2,d1,#1
  364. # qhasm: 2x n2n3n3n2 += nextblock
  365. # asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#11
  366. # asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q10
  367. vadd.i64 q0,q0,q10
  368. # qhasm: next_diag0 = diag0
  369. # asm 1: vmov >next_diag0=reg128#2,<diag0=reg128#8
  370. # asm 2: vmov >next_diag0=q1,<diag0=q7
  371. vmov q1,q7
  372. # qhasm: next_diag1 = diag1
  373. # asm 1: vmov >next_diag1=reg128#5,<diag1=reg128#9
  374. # asm 2: vmov >next_diag1=q4,<diag1=q8
  375. vmov q4,q8
  376. # qhasm: next_start2 bot = next_diag2 bot
  377. # asm 1: vstr <next_diag2=reg128#12%bot,<next_start2=stack128#7
  378. # asm 2: vstr <next_diag2=d22,<next_start2=[sp,#192]
  379. vstr d22,[sp,#192]
  380. # qhasm: next_start2 top = next_diag2 top
  381. # asm 1: vstr <next_diag2=reg128#12%top,<next_start2=stack128#7
  382. # asm 2: vstr <next_diag2=d23,<next_start2=[sp,#200]
  383. vstr d23,[sp,#200]
  384. # qhasm: next_start3 bot = next_diag3 bot
  385. # asm 1: vstr <next_diag3=reg128#13%bot,<next_start3=stack128#8
  386. # asm 2: vstr <next_diag3=d24,<next_start3=[sp,#208]
  387. vstr d24,[sp,#208]
  388. # qhasm: next_start3 top = next_diag3 top
  389. # asm 1: vstr <next_diag3=reg128#13%top,<next_start3=stack128#8
  390. # asm 2: vstr <next_diag3=d25,<next_start3=[sp,#216]
  391. vstr d25,[sp,#216]
  392. # qhasm: i = 12
  393. # asm 1: ldr >i=int32#5,=12
  394. # asm 2: ldr >i=r4,=12
  395. ldr r4,=12
  396. # qhasm: mainloop2:
  397. ._mainloop2:
  398. # qhasm: 4x a0 = diag1 + diag0
  399. # asm 1: vadd.i32 >a0=reg128#11,<diag1=reg128#9,<diag0=reg128#8
  400. # asm 2: vadd.i32 >a0=q10,<diag1=q8,<diag0=q7
  401. vadd.i32 q10,q8,q7
  402. # qhasm: 4x next_a0 = next_diag1 + next_diag0
  403. # asm 1: vadd.i32 >next_a0=reg128#14,<next_diag1=reg128#5,<next_diag0=reg128#2
  404. # asm 2: vadd.i32 >next_a0=q13,<next_diag1=q4,<next_diag0=q1
  405. vadd.i32 q13,q4,q1
  406. # qhasm: 4x b0 = a0 << 7
  407. # asm 1: vshl.i32 >b0=reg128#15,<a0=reg128#11,#7
  408. # asm 2: vshl.i32 >b0=q14,<a0=q10,#7
  409. vshl.i32 q14,q10,#7
  410. # qhasm: 4x next_b0 = next_a0 << 7
  411. # asm 1: vshl.i32 >next_b0=reg128#16,<next_a0=reg128#14,#7
  412. # asm 2: vshl.i32 >next_b0=q15,<next_a0=q13,#7
  413. vshl.i32 q15,q13,#7
  414. # qhasm: 4x b0 insert= a0 >> 25
  415. # asm 1: vsri.i32 <b0=reg128#15,<a0=reg128#11,#25
  416. # asm 2: vsri.i32 <b0=q14,<a0=q10,#25
  417. vsri.i32 q14,q10,#25
  418. # qhasm: 4x next_b0 insert= next_a0 >> 25
  419. # asm 1: vsri.i32 <next_b0=reg128#16,<next_a0=reg128#14,#25
  420. # asm 2: vsri.i32 <next_b0=q15,<next_a0=q13,#25
  421. vsri.i32 q15,q13,#25
  422. # qhasm: diag3 ^= b0
  423. # asm 1: veor >diag3=reg128#7,<diag3=reg128#7,<b0=reg128#15
  424. # asm 2: veor >diag3=q6,<diag3=q6,<b0=q14
  425. veor q6,q6,q14
  426. # qhasm: next_diag3 ^= next_b0
  427. # asm 1: veor >next_diag3=reg128#11,<next_diag3=reg128#13,<next_b0=reg128#16
  428. # asm 2: veor >next_diag3=q10,<next_diag3=q12,<next_b0=q15
  429. veor q10,q12,q15
  430. # qhasm: 4x a1 = diag0 + diag3
  431. # asm 1: vadd.i32 >a1=reg128#13,<diag0=reg128#8,<diag3=reg128#7
  432. # asm 2: vadd.i32 >a1=q12,<diag0=q7,<diag3=q6
  433. vadd.i32 q12,q7,q6
  434. # qhasm: 4x next_a1 = next_diag0 + next_diag3
  435. # asm 1: vadd.i32 >next_a1=reg128#14,<next_diag0=reg128#2,<next_diag3=reg128#11
  436. # asm 2: vadd.i32 >next_a1=q13,<next_diag0=q1,<next_diag3=q10
  437. vadd.i32 q13,q1,q10
  438. # qhasm: 4x b1 = a1 << 9
  439. # asm 1: vshl.i32 >b1=reg128#15,<a1=reg128#13,#9
  440. # asm 2: vshl.i32 >b1=q14,<a1=q12,#9
  441. vshl.i32 q14,q12,#9
  442. # qhasm: 4x next_b1 = next_a1 << 9
  443. # asm 1: vshl.i32 >next_b1=reg128#16,<next_a1=reg128#14,#9
  444. # asm 2: vshl.i32 >next_b1=q15,<next_a1=q13,#9
  445. vshl.i32 q15,q13,#9
  446. # qhasm: 4x b1 insert= a1 >> 23
  447. # asm 1: vsri.i32 <b1=reg128#15,<a1=reg128#13,#23
  448. # asm 2: vsri.i32 <b1=q14,<a1=q12,#23
  449. vsri.i32 q14,q12,#23
  450. # qhasm: 4x next_b1 insert= next_a1 >> 23
  451. # asm 1: vsri.i32 <next_b1=reg128#16,<next_a1=reg128#14,#23
  452. # asm 2: vsri.i32 <next_b1=q15,<next_a1=q13,#23
  453. vsri.i32 q15,q13,#23
  454. # qhasm: diag2 ^= b1
  455. # asm 1: veor >diag2=reg128#6,<diag2=reg128#6,<b1=reg128#15
  456. # asm 2: veor >diag2=q5,<diag2=q5,<b1=q14
  457. veor q5,q5,q14
  458. # qhasm: next_diag2 ^= next_b1
  459. # asm 1: veor >next_diag2=reg128#12,<next_diag2=reg128#12,<next_b1=reg128#16
  460. # asm 2: veor >next_diag2=q11,<next_diag2=q11,<next_b1=q15
  461. veor q11,q11,q15
  462. # qhasm: 4x a2 = diag3 + diag2
  463. # asm 1: vadd.i32 >a2=reg128#13,<diag3=reg128#7,<diag2=reg128#6
  464. # asm 2: vadd.i32 >a2=q12,<diag3=q6,<diag2=q5
  465. vadd.i32 q12,q6,q5
  466. # qhasm: diag3 = diag3[3] diag3[0,1,2]
  467. # asm 1: vext.32 >diag3=reg128#7,<diag3=reg128#7,<diag3=reg128#7,#3
  468. # asm 2: vext.32 >diag3=q6,<diag3=q6,<diag3=q6,#3
  469. vext.32 q6,q6,q6,#3
  470. # qhasm: 4x next_a2 = next_diag3 + next_diag2
  471. # asm 1: vadd.i32 >next_a2=reg128#14,<next_diag3=reg128#11,<next_diag2=reg128#12
  472. # asm 2: vadd.i32 >next_a2=q13,<next_diag3=q10,<next_diag2=q11
  473. vadd.i32 q13,q10,q11
  474. # qhasm: 4x b2 = a2 << 13
  475. # asm 1: vshl.i32 >b2=reg128#15,<a2=reg128#13,#13
  476. # asm 2: vshl.i32 >b2=q14,<a2=q12,#13
  477. vshl.i32 q14,q12,#13
  478. # qhasm: next_diag3 = next_diag3[3] next_diag3[0,1,2]
  479. # asm 1: vext.32 >next_diag3=reg128#11,<next_diag3=reg128#11,<next_diag3=reg128#11,#3
  480. # asm 2: vext.32 >next_diag3=q10,<next_diag3=q10,<next_diag3=q10,#3
  481. vext.32 q10,q10,q10,#3
  482. # qhasm: 4x next_b2 = next_a2 << 13
  483. # asm 1: vshl.i32 >next_b2=reg128#16,<next_a2=reg128#14,#13
  484. # asm 2: vshl.i32 >next_b2=q15,<next_a2=q13,#13
  485. vshl.i32 q15,q13,#13
  486. # qhasm: 4x b2 insert= a2 >> 19
  487. # asm 1: vsri.i32 <b2=reg128#15,<a2=reg128#13,#19
  488. # asm 2: vsri.i32 <b2=q14,<a2=q12,#19
  489. vsri.i32 q14,q12,#19
  490. # qhasm: 4x next_b2 insert= next_a2 >> 19
  491. # asm 1: vsri.i32 <next_b2=reg128#16,<next_a2=reg128#14,#19
  492. # asm 2: vsri.i32 <next_b2=q15,<next_a2=q13,#19
  493. vsri.i32 q15,q13,#19
  494. # qhasm: diag1 ^= b2
  495. # asm 1: veor >diag1=reg128#9,<diag1=reg128#9,<b2=reg128#15
  496. # asm 2: veor >diag1=q8,<diag1=q8,<b2=q14
  497. veor q8,q8,q14
  498. # qhasm: next_diag1 ^= next_b2
  499. # asm 1: veor >next_diag1=reg128#5,<next_diag1=reg128#5,<next_b2=reg128#16
  500. # asm 2: veor >next_diag1=q4,<next_diag1=q4,<next_b2=q15
  501. veor q4,q4,q15
  502. # qhasm: 4x a3 = diag2 + diag1
  503. # asm 1: vadd.i32 >a3=reg128#13,<diag2=reg128#6,<diag1=reg128#9
  504. # asm 2: vadd.i32 >a3=q12,<diag2=q5,<diag1=q8
  505. vadd.i32 q12,q5,q8
  506. # qhasm: diag2 = diag2[2,3] diag2[0,1]
  507. # asm 1: vswp <diag2=reg128#6%bot,<diag2=reg128#6%top
  508. # asm 2: vswp <diag2=d10,<diag2=d11
  509. vswp d10,d11
  510. # qhasm: 4x next_a3 = next_diag2 + next_diag1
  511. # asm 1: vadd.i32 >next_a3=reg128#14,<next_diag2=reg128#12,<next_diag1=reg128#5
  512. # asm 2: vadd.i32 >next_a3=q13,<next_diag2=q11,<next_diag1=q4
  513. vadd.i32 q13,q11,q4
  514. # qhasm: 4x b3 = a3 << 18
  515. # asm 1: vshl.i32 >b3=reg128#15,<a3=reg128#13,#18
  516. # asm 2: vshl.i32 >b3=q14,<a3=q12,#18
  517. vshl.i32 q14,q12,#18
  518. # qhasm: next_diag2 = next_diag2[2,3] next_diag2[0,1]
  519. # asm 1: vswp <next_diag2=reg128#12%bot,<next_diag2=reg128#12%top
  520. # asm 2: vswp <next_diag2=d22,<next_diag2=d23
  521. vswp d22,d23
  522. # qhasm: 4x next_b3 = next_a3 << 18
  523. # asm 1: vshl.i32 >next_b3=reg128#16,<next_a3=reg128#14,#18
  524. # asm 2: vshl.i32 >next_b3=q15,<next_a3=q13,#18
  525. vshl.i32 q15,q13,#18
  526. # qhasm: 4x b3 insert= a3 >> 14
  527. # asm 1: vsri.i32 <b3=reg128#15,<a3=reg128#13,#14
  528. # asm 2: vsri.i32 <b3=q14,<a3=q12,#14
  529. vsri.i32 q14,q12,#14
  530. # qhasm: diag1 = diag1[1,2,3] diag1[0]
  531. # asm 1: vext.32 >diag1=reg128#9,<diag1=reg128#9,<diag1=reg128#9,#1
  532. # asm 2: vext.32 >diag1=q8,<diag1=q8,<diag1=q8,#1
  533. vext.32 q8,q8,q8,#1
  534. # qhasm: 4x next_b3 insert= next_a3 >> 14
  535. # asm 1: vsri.i32 <next_b3=reg128#16,<next_a3=reg128#14,#14
  536. # asm 2: vsri.i32 <next_b3=q15,<next_a3=q13,#14
  537. vsri.i32 q15,q13,#14
  538. # qhasm: diag0 ^= b3
  539. # asm 1: veor >diag0=reg128#8,<diag0=reg128#8,<b3=reg128#15
  540. # asm 2: veor >diag0=q7,<diag0=q7,<b3=q14
  541. veor q7,q7,q14
  542. # qhasm: next_diag1 = next_diag1[1,2,3] next_diag1[0]
  543. # asm 1: vext.32 >next_diag1=reg128#5,<next_diag1=reg128#5,<next_diag1=reg128#5,#1
  544. # asm 2: vext.32 >next_diag1=q4,<next_diag1=q4,<next_diag1=q4,#1
  545. vext.32 q4,q4,q4,#1
  546. # qhasm: next_diag0 ^= next_b3
  547. # asm 1: veor >next_diag0=reg128#2,<next_diag0=reg128#2,<next_b3=reg128#16
  548. # asm 2: veor >next_diag0=q1,<next_diag0=q1,<next_b3=q15
  549. veor q1,q1,q15
  550. # qhasm: 4x a0 = diag3 + diag0
  551. # asm 1: vadd.i32 >a0=reg128#13,<diag3=reg128#7,<diag0=reg128#8
  552. # asm 2: vadd.i32 >a0=q12,<diag3=q6,<diag0=q7
  553. vadd.i32 q12,q6,q7
  554. # qhasm: 4x next_a0 = next_diag3 + next_diag0
  555. # asm 1: vadd.i32 >next_a0=reg128#14,<next_diag3=reg128#11,<next_diag0=reg128#2
  556. # asm 2: vadd.i32 >next_a0=q13,<next_diag3=q10,<next_diag0=q1
  557. vadd.i32 q13,q10,q1
  558. # qhasm: 4x b0 = a0 << 7
  559. # asm 1: vshl.i32 >b0=reg128#15,<a0=reg128#13,#7
  560. # asm 2: vshl.i32 >b0=q14,<a0=q12,#7
  561. vshl.i32 q14,q12,#7
  562. # qhasm: 4x next_b0 = next_a0 << 7
  563. # asm 1: vshl.i32 >next_b0=reg128#16,<next_a0=reg128#14,#7
  564. # asm 2: vshl.i32 >next_b0=q15,<next_a0=q13,#7
  565. vshl.i32 q15,q13,#7
  566. # qhasm: 4x b0 insert= a0 >> 25
  567. # asm 1: vsri.i32 <b0=reg128#15,<a0=reg128#13,#25
  568. # asm 2: vsri.i32 <b0=q14,<a0=q12,#25
  569. vsri.i32 q14,q12,#25
  570. # qhasm: 4x next_b0 insert= next_a0 >> 25
  571. # asm 1: vsri.i32 <next_b0=reg128#16,<next_a0=reg128#14,#25
  572. # asm 2: vsri.i32 <next_b0=q15,<next_a0=q13,#25
  573. vsri.i32 q15,q13,#25
  574. # qhasm: diag1 ^= b0
  575. # asm 1: veor >diag1=reg128#9,<diag1=reg128#9,<b0=reg128#15
  576. # asm 2: veor >diag1=q8,<diag1=q8,<b0=q14
  577. veor q8,q8,q14
  578. # qhasm: next_diag1 ^= next_b0
  579. # asm 1: veor >next_diag1=reg128#5,<next_diag1=reg128#5,<next_b0=reg128#16
  580. # asm 2: veor >next_diag1=q4,<next_diag1=q4,<next_b0=q15
  581. veor q4,q4,q15
  582. # qhasm: 4x a1 = diag0 + diag1
  583. # asm 1: vadd.i32 >a1=reg128#13,<diag0=reg128#8,<diag1=reg128#9
  584. # asm 2: vadd.i32 >a1=q12,<diag0=q7,<diag1=q8
  585. vadd.i32 q12,q7,q8
  586. # qhasm: 4x next_a1 = next_diag0 + next_diag1
  587. # asm 1: vadd.i32 >next_a1=reg128#14,<next_diag0=reg128#2,<next_diag1=reg128#5
  588. # asm 2: vadd.i32 >next_a1=q13,<next_diag0=q1,<next_diag1=q4
  589. vadd.i32 q13,q1,q4
  590. # qhasm: 4x b1 = a1 << 9
  591. # asm 1: vshl.i32 >b1=reg128#15,<a1=reg128#13,#9
  592. # asm 2: vshl.i32 >b1=q14,<a1=q12,#9
  593. vshl.i32 q14,q12,#9
  594. # qhasm: 4x next_b1 = next_a1 << 9
  595. # asm 1: vshl.i32 >next_b1=reg128#16,<next_a1=reg128#14,#9
  596. # asm 2: vshl.i32 >next_b1=q15,<next_a1=q13,#9
  597. vshl.i32 q15,q13,#9
  598. # qhasm: 4x b1 insert= a1 >> 23
  599. # asm 1: vsri.i32 <b1=reg128#15,<a1=reg128#13,#23
  600. # asm 2: vsri.i32 <b1=q14,<a1=q12,#23
  601. vsri.i32 q14,q12,#23
  602. # qhasm: unsigned>? i -= 2
  603. # asm 1: subs <i=int32#5,<i=int32#5,#2
  604. # asm 2: subs <i=r4,<i=r4,#2
  605. subs r4,r4,#2
  606. # qhasm: 4x next_b1 insert= next_a1 >> 23
  607. # asm 1: vsri.i32 <next_b1=reg128#16,<next_a1=reg128#14,#23
  608. # asm 2: vsri.i32 <next_b1=q15,<next_a1=q13,#23
  609. vsri.i32 q15,q13,#23
  610. # qhasm: diag2 ^= b1
  611. # asm 1: veor >diag2=reg128#6,<diag2=reg128#6,<b1=reg128#15
  612. # asm 2: veor >diag2=q5,<diag2=q5,<b1=q14
  613. veor q5,q5,q14
  614. # qhasm: next_diag2 ^= next_b1
  615. # asm 1: veor >next_diag2=reg128#12,<next_diag2=reg128#12,<next_b1=reg128#16
  616. # asm 2: veor >next_diag2=q11,<next_diag2=q11,<next_b1=q15
  617. veor q11,q11,q15
  618. # qhasm: 4x a2 = diag1 + diag2
  619. # asm 1: vadd.i32 >a2=reg128#13,<diag1=reg128#9,<diag2=reg128#6
  620. # asm 2: vadd.i32 >a2=q12,<diag1=q8,<diag2=q5
  621. vadd.i32 q12,q8,q5
  622. # qhasm: diag1 = diag1[3] diag1[0,1,2]
  623. # asm 1: vext.32 >diag1=reg128#9,<diag1=reg128#9,<diag1=reg128#9,#3
  624. # asm 2: vext.32 >diag1=q8,<diag1=q8,<diag1=q8,#3
  625. vext.32 q8,q8,q8,#3
  626. # qhasm: 4x next_a2 = next_diag1 + next_diag2
  627. # asm 1: vadd.i32 >next_a2=reg128#14,<next_diag1=reg128#5,<next_diag2=reg128#12
  628. # asm 2: vadd.i32 >next_a2=q13,<next_diag1=q4,<next_diag2=q11
  629. vadd.i32 q13,q4,q11
  630. # qhasm: 4x b2 = a2 << 13
  631. # asm 1: vshl.i32 >b2=reg128#15,<a2=reg128#13,#13
  632. # asm 2: vshl.i32 >b2=q14,<a2=q12,#13
  633. vshl.i32 q14,q12,#13
  634. # qhasm: next_diag1 = next_diag1[3] next_diag1[0,1,2]
  635. # asm 1: vext.32 >next_diag1=reg128#5,<next_diag1=reg128#5,<next_diag1=reg128#5,#3
  636. # asm 2: vext.32 >next_diag1=q4,<next_diag1=q4,<next_diag1=q4,#3
  637. vext.32 q4,q4,q4,#3
  638. # qhasm: 4x next_b2 = next_a2 << 13
  639. # asm 1: vshl.i32 >next_b2=reg128#16,<next_a2=reg128#14,#13
  640. # asm 2: vshl.i32 >next_b2=q15,<next_a2=q13,#13
  641. vshl.i32 q15,q13,#13
  642. # qhasm: 4x b2 insert= a2 >> 19
  643. # asm 1: vsri.i32 <b2=reg128#15,<a2=reg128#13,#19
  644. # asm 2: vsri.i32 <b2=q14,<a2=q12,#19
  645. vsri.i32 q14,q12,#19
  646. # qhasm: 4x next_b2 insert= next_a2 >> 19
  647. # asm 1: vsri.i32 <next_b2=reg128#16,<next_a2=reg128#14,#19
  648. # asm 2: vsri.i32 <next_b2=q15,<next_a2=q13,#19
  649. vsri.i32 q15,q13,#19
  650. # qhasm: diag3 ^= b2
  651. # asm 1: veor >diag3=reg128#7,<diag3=reg128#7,<b2=reg128#15
  652. # asm 2: veor >diag3=q6,<diag3=q6,<b2=q14
  653. veor q6,q6,q14
  654. # qhasm: next_diag3 ^= next_b2
  655. # asm 1: veor >next_diag3=reg128#11,<next_diag3=reg128#11,<next_b2=reg128#16
  656. # asm 2: veor >next_diag3=q10,<next_diag3=q10,<next_b2=q15
  657. veor q10,q10,q15
  658. # qhasm: 4x a3 = diag2 + diag3
  659. # asm 1: vadd.i32 >a3=reg128#13,<diag2=reg128#6,<diag3=reg128#7
  660. # asm 2: vadd.i32 >a3=q12,<diag2=q5,<diag3=q6
  661. vadd.i32 q12,q5,q6
  662. # qhasm: diag2 = diag2[2,3] diag2[0,1]
  663. # asm 1: vswp <diag2=reg128#6%bot,<diag2=reg128#6%top
  664. # asm 2: vswp <diag2=d10,<diag2=d11
  665. vswp d10,d11
  666. # qhasm: 4x next_a3 = next_diag2 + next_diag3
  667. # asm 1: vadd.i32 >next_a3=reg128#14,<next_diag2=reg128#12,<next_diag3=reg128#11
  668. # asm 2: vadd.i32 >next_a3=q13,<next_diag2=q11,<next_diag3=q10
  669. vadd.i32 q13,q11,q10
  670. # qhasm: 4x b3 = a3 << 18
  671. # asm 1: vshl.i32 >b3=reg128#15,<a3=reg128#13,#18
  672. # asm 2: vshl.i32 >b3=q14,<a3=q12,#18
  673. vshl.i32 q14,q12,#18
  674. # qhasm: next_diag2 = next_diag2[2,3] next_diag2[0,1]
  675. # asm 1: vswp <next_diag2=reg128#12%bot,<next_diag2=reg128#12%top
  676. # asm 2: vswp <next_diag2=d22,<next_diag2=d23
  677. vswp d22,d23
  678. # qhasm: 4x next_b3 = next_a3 << 18
  679. # asm 1: vshl.i32 >next_b3=reg128#16,<next_a3=reg128#14,#18
  680. # asm 2: vshl.i32 >next_b3=q15,<next_a3=q13,#18
  681. vshl.i32 q15,q13,#18
  682. # qhasm: 4x b3 insert= a3 >> 14
  683. # asm 1: vsri.i32 <b3=reg128#15,<a3=reg128#13,#14
  684. # asm 2: vsri.i32 <b3=q14,<a3=q12,#14
  685. vsri.i32 q14,q12,#14
  686. # qhasm: diag3 = diag3[1,2,3] diag3[0]
  687. # asm 1: vext.32 >diag3=reg128#7,<diag3=reg128#7,<diag3=reg128#7,#1
  688. # asm 2: vext.32 >diag3=q6,<diag3=q6,<diag3=q6,#1
  689. vext.32 q6,q6,q6,#1
  690. # qhasm: 4x next_b3 insert= next_a3 >> 14
  691. # asm 1: vsri.i32 <next_b3=reg128#16,<next_a3=reg128#14,#14
  692. # asm 2: vsri.i32 <next_b3=q15,<next_a3=q13,#14
  693. vsri.i32 q15,q13,#14
  694. # qhasm: diag0 ^= b3
  695. # asm 1: veor >diag0=reg128#8,<diag0=reg128#8,<b3=reg128#15
  696. # asm 2: veor >diag0=q7,<diag0=q7,<b3=q14
  697. veor q7,q7,q14
  698. # qhasm: next_diag3 = next_diag3[1,2,3] next_diag3[0]
  699. # asm 1: vext.32 >next_diag3=reg128#13,<next_diag3=reg128#11,<next_diag3=reg128#11,#1
  700. # asm 2: vext.32 >next_diag3=q12,<next_diag3=q10,<next_diag3=q10,#1
  701. vext.32 q12,q10,q10,#1
  702. # qhasm: next_diag0 ^= next_b3
  703. # asm 1: veor >next_diag0=reg128#2,<next_diag0=reg128#2,<next_b3=reg128#16
  704. # asm 2: veor >next_diag0=q1,<next_diag0=q1,<next_b3=q15
  705. veor q1,q1,q15
  706. # qhasm: goto mainloop2 if unsigned>
  707. bhi ._mainloop2
  708. # qhasm: 2x abab = 0xffffffff
  709. # asm 1: vmov.i64 >abab=reg128#11,#0xffffffff
  710. # asm 2: vmov.i64 >abab=q10,#0xffffffff
  711. vmov.i64 q10,#0xffffffff
  712. # qhasm: new x4x9x14x3
  713. # qhasm: x4x9x14x3 bot = stack_start3 bot
  714. # asm 1: vldr <x4x9x14x3=reg128#14%bot,<stack_start3=stack128#9
  715. # asm 2: vldr <x4x9x14x3=d26,<stack_start3=[sp,#224]
  716. vldr d26,[sp,#224]
  717. # qhasm: x4x9x14x3 top = stack_start3 top
  718. # asm 1: vldr <x4x9x14x3=reg128#14%top,<stack_start3=stack128#9
  719. # asm 2: vldr <x4x9x14x3=d27,<stack_start3=[sp,#232]
  720. vldr d27,[sp,#232]
  721. # qhasm: 4x x0x5x10x15 = diag0 + start0
  722. # asm 1: vadd.i32 >x0x5x10x15=reg128#8,<diag0=reg128#8,<start0=reg128#3
  723. # asm 2: vadd.i32 >x0x5x10x15=q7,<diag0=q7,<start0=q2
  724. vadd.i32 q7,q7,q2
  725. # qhasm: 4x x12x1x6x11 = diag1 + start1
  726. # asm 1: vadd.i32 >x12x1x6x11=reg128#9,<diag1=reg128#9,<start1=reg128#4
  727. # asm 2: vadd.i32 >x12x1x6x11=q8,<diag1=q8,<start1=q3
  728. vadd.i32 q8,q8,q3
  729. # qhasm: 4x x8x13x2x7 = diag2 + start2
  730. # asm 1: vadd.i32 >x8x13x2x7=reg128#6,<diag2=reg128#6,<start2=reg128#10
  731. # asm 2: vadd.i32 >x8x13x2x7=q5,<diag2=q5,<start2=q9
  732. vadd.i32 q5,q5,q9
  733. # qhasm: 4x x4x9x14x3 += diag3
  734. # asm 1: vadd.i32 >x4x9x14x3=reg128#7,<x4x9x14x3=reg128#14,<diag3=reg128#7
  735. # asm 2: vadd.i32 >x4x9x14x3=q6,<x4x9x14x3=q13,<diag3=q6
  736. vadd.i32 q6,q13,q6
  737. # qhasm: x0x1x10x11 = x0x5x10x15
  738. # asm 1: vmov >x0x1x10x11=reg128#10,<x0x5x10x15=reg128#8
  739. # asm 2: vmov >x0x1x10x11=q9,<x0x5x10x15=q7
  740. vmov q9,q7
  741. # qhasm: x12x13x6x7 = x12x1x6x11
  742. # asm 1: vmov >x12x13x6x7=reg128#14,<x12x1x6x11=reg128#9
  743. # asm 2: vmov >x12x13x6x7=q13,<x12x1x6x11=q8
  744. vmov q13,q8
  745. # qhasm: x8x9x2x3 = x8x13x2x7
  746. # asm 1: vmov >x8x9x2x3=reg128#15,<x8x13x2x7=reg128#6
  747. # asm 2: vmov >x8x9x2x3=q14,<x8x13x2x7=q5
  748. vmov q14,q5
  749. # qhasm: x4x5x14x15 = x4x9x14x3
  750. # asm 1: vmov >x4x5x14x15=reg128#16,<x4x9x14x3=reg128#7
  751. # asm 2: vmov >x4x5x14x15=q15,<x4x9x14x3=q6
  752. vmov q15,q6
  753. # qhasm: x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
  754. # asm 1: vbif <x0x1x10x11=reg128#10,<x12x1x6x11=reg128#9,<abab=reg128#11
  755. # asm 2: vbif <x0x1x10x11=q9,<x12x1x6x11=q8,<abab=q10
  756. vbif q9,q8,q10
  757. # qhasm: x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
  758. # asm 1: vbif <x12x13x6x7=reg128#14,<x8x13x2x7=reg128#6,<abab=reg128#11
  759. # asm 2: vbif <x12x13x6x7=q13,<x8x13x2x7=q5,<abab=q10
  760. vbif q13,q5,q10
  761. # qhasm: x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
  762. # asm 1: vbif <x8x9x2x3=reg128#15,<x4x9x14x3=reg128#7,<abab=reg128#11
  763. # asm 2: vbif <x8x9x2x3=q14,<x4x9x14x3=q6,<abab=q10
  764. vbif q14,q6,q10
  765. # qhasm: x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
  766. # asm 1: vbif <x4x5x14x15=reg128#16,<x0x5x10x15=reg128#8,<abab=reg128#11
  767. # asm 2: vbif <x4x5x14x15=q15,<x0x5x10x15=q7,<abab=q10
  768. vbif q15,q7,q10
  769. # qhasm: x0x1x2x3 = x0x1x10x11
  770. # asm 1: vmov >x0x1x2x3=reg128#6,<x0x1x10x11=reg128#10
  771. # asm 2: vmov >x0x1x2x3=q5,<x0x1x10x11=q9
  772. vmov q5,q9
  773. # qhasm: x4x5x6x7 = x4x5x14x15
  774. # asm 1: vmov >x4x5x6x7=reg128#7,<x4x5x14x15=reg128#16
  775. # asm 2: vmov >x4x5x6x7=q6,<x4x5x14x15=q15
  776. vmov q6,q15
  777. # qhasm: x8x9x10x11 = x8x9x2x3
  778. # asm 1: vmov >x8x9x10x11=reg128#8,<x8x9x2x3=reg128#15
  779. # asm 2: vmov >x8x9x10x11=q7,<x8x9x2x3=q14
  780. vmov q7,q14
  781. # qhasm: x12x13x14x15 = x12x13x6x7
  782. # asm 1: vmov >x12x13x14x15=reg128#9,<x12x13x6x7=reg128#14
  783. # asm 2: vmov >x12x13x14x15=q8,<x12x13x6x7=q13
  784. vmov q8,q13
  785. # qhasm: x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
  786. # asm 1: vmov <x0x1x2x3=reg128#6%top,<x8x9x2x3=reg128#15%top
  787. # asm 2: vmov <x0x1x2x3=d11,<x8x9x2x3=d29
  788. vmov d11,d29
  789. # qhasm: x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
  790. # asm 1: vmov <x4x5x6x7=reg128#7%top,<x12x13x6x7=reg128#14%top
  791. # asm 2: vmov <x4x5x6x7=d13,<x12x13x6x7=d27
  792. vmov d13,d27
  793. # qhasm: x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
  794. # asm 1: vmov <x8x9x10x11=reg128#8%top,<x0x1x10x11=reg128#10%top
  795. # asm 2: vmov <x8x9x10x11=d15,<x0x1x10x11=d19
  796. vmov d15,d19
  797. # qhasm: x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
  798. # asm 1: vmov <x12x13x14x15=reg128#9%top,<x4x5x14x15=reg128#16%top
  799. # asm 2: vmov <x12x13x14x15=d17,<x4x5x14x15=d31
  800. vmov d17,d31
  801. # qhasm: =? m - 0
  802. # asm 1: cmp <m=int32#2,#0
  803. # asm 2: cmp <m=r1,#0
  804. cmp r1,#0
  805. # qhasm: goto nomessage2 if =
  806. beq ._nomessage2
  807. # qhasm: m0m1m2m3 = mem128[m]
  808. # asm 1: vld1.8 {>m0m1m2m3=reg128#10%bot->m0m1m2m3=reg128#10%top},[<m=int32#2]
  809. # asm 2: vld1.8 {>m0m1m2m3=d18->m0m1m2m3=d19},[<m=r1]
  810. vld1.8 {d18-d19},[r1]
  811. # qhasm: m += 16
  812. # asm 1: add <m=int32#2,<m=int32#2,#16
  813. # asm 2: add <m=r1,<m=r1,#16
  814. add r1,r1,#16
  815. # qhasm: m4m5m6m7 = mem128[m]
  816. # asm 1: vld1.8 {>m4m5m6m7=reg128#14%bot->m4m5m6m7=reg128#14%top},[<m=int32#2]
  817. # asm 2: vld1.8 {>m4m5m6m7=d26->m4m5m6m7=d27},[<m=r1]
  818. vld1.8 {d26-d27},[r1]
  819. # qhasm: m += 16
  820. # asm 1: add <m=int32#2,<m=int32#2,#16
  821. # asm 2: add <m=r1,<m=r1,#16
  822. add r1,r1,#16
  823. # qhasm: m8m9m10m11 = mem128[m]
  824. # asm 1: vld1.8 {>m8m9m10m11=reg128#15%bot->m8m9m10m11=reg128#15%top},[<m=int32#2]
  825. # asm 2: vld1.8 {>m8m9m10m11=d28->m8m9m10m11=d29},[<m=r1]
  826. vld1.8 {d28-d29},[r1]
  827. # qhasm: m += 16
  828. # asm 1: add <m=int32#2,<m=int32#2,#16
  829. # asm 2: add <m=r1,<m=r1,#16
  830. add r1,r1,#16
  831. # qhasm: m12m13m14m15 = mem128[m]
  832. # asm 1: vld1.8 {>m12m13m14m15=reg128#16%bot->m12m13m14m15=reg128#16%top},[<m=int32#2]
  833. # asm 2: vld1.8 {>m12m13m14m15=d30->m12m13m14m15=d31},[<m=r1]
  834. vld1.8 {d30-d31},[r1]
  835. # qhasm: m += 16
  836. # asm 1: add <m=int32#2,<m=int32#2,#16
  837. # asm 2: add <m=r1,<m=r1,#16
  838. add r1,r1,#16
  839. # qhasm: x0x1x2x3 ^= m0m1m2m3
  840. # asm 1: veor >x0x1x2x3=reg128#6,<x0x1x2x3=reg128#6,<m0m1m2m3=reg128#10
  841. # asm 2: veor >x0x1x2x3=q5,<x0x1x2x3=q5,<m0m1m2m3=q9
  842. veor q5,q5,q9
  843. # qhasm: x4x5x6x7 ^= m4m5m6m7
  844. # asm 1: veor >x4x5x6x7=reg128#7,<x4x5x6x7=reg128#7,<m4m5m6m7=reg128#14
  845. # asm 2: veor >x4x5x6x7=q6,<x4x5x6x7=q6,<m4m5m6m7=q13
  846. veor q6,q6,q13
  847. # qhasm: x8x9x10x11 ^= m8m9m10m11
  848. # asm 1: veor >x8x9x10x11=reg128#8,<x8x9x10x11=reg128#8,<m8m9m10m11=reg128#15
  849. # asm 2: veor >x8x9x10x11=q7,<x8x9x10x11=q7,<m8m9m10m11=q14
  850. veor q7,q7,q14
  851. # qhasm: x12x13x14x15 ^= m12m13m14m15
  852. # asm 1: veor >x12x13x14x15=reg128#9,<x12x13x14x15=reg128#9,<m12m13m14m15=reg128#16
  853. # asm 2: veor >x12x13x14x15=q8,<x12x13x14x15=q8,<m12m13m14m15=q15
  854. veor q8,q8,q15
  855. # qhasm: nomessage2:
  856. ._nomessage2:
  857. # qhasm: mem128[c] = x0x1x2x3
  858. # asm 1: vst1.8 {<x0x1x2x3=reg128#6%bot-<x0x1x2x3=reg128#6%top},[<c=int32#1]
  859. # asm 2: vst1.8 {<x0x1x2x3=d10-<x0x1x2x3=d11},[<c=r0]
  860. vst1.8 {d10-d11},[r0]
  861. # qhasm: c += 16
  862. # asm 1: add <c=int32#1,<c=int32#1,#16
  863. # asm 2: add <c=r0,<c=r0,#16
  864. add r0,r0,#16
  865. # qhasm: mem128[c] = x4x5x6x7
  866. # asm 1: vst1.8 {<x4x5x6x7=reg128#7%bot-<x4x5x6x7=reg128#7%top},[<c=int32#1]
  867. # asm 2: vst1.8 {<x4x5x6x7=d12-<x4x5x6x7=d13},[<c=r0]
  868. vst1.8 {d12-d13},[r0]
  869. # qhasm: c += 16
  870. # asm 1: add <c=int32#1,<c=int32#1,#16
  871. # asm 2: add <c=r0,<c=r0,#16
  872. add r0,r0,#16
  873. # qhasm: mem128[c] = x8x9x10x11
  874. # asm 1: vst1.8 {<x8x9x10x11=reg128#8%bot-<x8x9x10x11=reg128#8%top},[<c=int32#1]
  875. # asm 2: vst1.8 {<x8x9x10x11=d14-<x8x9x10x11=d15},[<c=r0]
  876. vst1.8 {d14-d15},[r0]
  877. # qhasm: c += 16
  878. # asm 1: add <c=int32#1,<c=int32#1,#16
  879. # asm 2: add <c=r0,<c=r0,#16
  880. add r0,r0,#16
  881. # qhasm: mem128[c] = x12x13x14x15
  882. # asm 1: vst1.8 {<x12x13x14x15=reg128#9%bot-<x12x13x14x15=reg128#9%top},[<c=int32#1]
  883. # asm 2: vst1.8 {<x12x13x14x15=d16-<x12x13x14x15=d17},[<c=r0]
  884. vst1.8 {d16-d17},[r0]
  885. # qhasm: c += 16
  886. # asm 1: add <c=int32#1,<c=int32#1,#16
  887. # asm 2: add <c=r0,<c=r0,#16
  888. add r0,r0,#16
  889. # qhasm: new x8x13x2x7
  890. # qhasm: x8x13x2x7 bot = next_start2 bot
  891. # asm 1: vldr <x8x13x2x7=reg128#6%bot,<next_start2=stack128#7
  892. # asm 2: vldr <x8x13x2x7=d10,<next_start2=[sp,#192]
  893. vldr d10,[sp,#192]
  894. # qhasm: x8x13x2x7 top = next_start2 top
  895. # asm 1: vldr <x8x13x2x7=reg128#6%top,<next_start2=stack128#7
  896. # asm 2: vldr <x8x13x2x7=d11,<next_start2=[sp,#200]
  897. vldr d11,[sp,#200]
  898. # qhasm: new x4x9x14x3
  899. # qhasm: x4x9x14x3 bot = next_start3 bot
  900. # asm 1: vldr <x4x9x14x3=reg128#7%bot,<next_start3=stack128#8
  901. # asm 2: vldr <x4x9x14x3=d12,<next_start3=[sp,#208]
  902. vldr d12,[sp,#208]
  903. # qhasm: x4x9x14x3 top = next_start3 top
  904. # asm 1: vldr <x4x9x14x3=reg128#7%top,<next_start3=stack128#8
  905. # asm 2: vldr <x4x9x14x3=d13,<next_start3=[sp,#216]
  906. vldr d13,[sp,#216]
  907. # qhasm: 4x x0x5x10x15 = next_diag0 + start0
  908. # asm 1: vadd.i32 >x0x5x10x15=reg128#2,<next_diag0=reg128#2,<start0=reg128#3
  909. # asm 2: vadd.i32 >x0x5x10x15=q1,<next_diag0=q1,<start0=q2
  910. vadd.i32 q1,q1,q2
  911. # qhasm: 4x x12x1x6x11 = next_diag1 + start1
  912. # asm 1: vadd.i32 >x12x1x6x11=reg128#5,<next_diag1=reg128#5,<start1=reg128#4
  913. # asm 2: vadd.i32 >x12x1x6x11=q4,<next_diag1=q4,<start1=q3
  914. vadd.i32 q4,q4,q3
  915. # qhasm: 4x x8x13x2x7 += next_diag2
  916. # asm 1: vadd.i32 >x8x13x2x7=reg128#6,<x8x13x2x7=reg128#6,<next_diag2=reg128#12
  917. # asm 2: vadd.i32 >x8x13x2x7=q5,<x8x13x2x7=q5,<next_diag2=q11
  918. vadd.i32 q5,q5,q11
  919. # qhasm: 4x x4x9x14x3 += next_diag3
  920. # asm 1: vadd.i32 >x4x9x14x3=reg128#7,<x4x9x14x3=reg128#7,<next_diag3=reg128#13
  921. # asm 2: vadd.i32 >x4x9x14x3=q6,<x4x9x14x3=q6,<next_diag3=q12
  922. vadd.i32 q6,q6,q12
  923. # qhasm: x0x1x10x11 = x0x5x10x15
  924. # asm 1: vmov >x0x1x10x11=reg128#8,<x0x5x10x15=reg128#2
  925. # asm 2: vmov >x0x1x10x11=q7,<x0x5x10x15=q1
  926. vmov q7,q1
  927. # qhasm: x12x13x6x7 = x12x1x6x11
  928. # asm 1: vmov >x12x13x6x7=reg128#9,<x12x1x6x11=reg128#5
  929. # asm 2: vmov >x12x13x6x7=q8,<x12x1x6x11=q4
  930. vmov q8,q4
  931. # qhasm: x8x9x2x3 = x8x13x2x7
  932. # asm 1: vmov >x8x9x2x3=reg128#10,<x8x13x2x7=reg128#6
  933. # asm 2: vmov >x8x9x2x3=q9,<x8x13x2x7=q5
  934. vmov q9,q5
  935. # qhasm: x4x5x14x15 = x4x9x14x3
  936. # asm 1: vmov >x4x5x14x15=reg128#12,<x4x9x14x3=reg128#7
  937. # asm 2: vmov >x4x5x14x15=q11,<x4x9x14x3=q6
  938. vmov q11,q6
  939. # qhasm: x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
  940. # asm 1: vbif <x0x1x10x11=reg128#8,<x12x1x6x11=reg128#5,<abab=reg128#11
  941. # asm 2: vbif <x0x1x10x11=q7,<x12x1x6x11=q4,<abab=q10
  942. vbif q7,q4,q10
  943. # qhasm: x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
  944. # asm 1: vbif <x12x13x6x7=reg128#9,<x8x13x2x7=reg128#6,<abab=reg128#11
  945. # asm 2: vbif <x12x13x6x7=q8,<x8x13x2x7=q5,<abab=q10
  946. vbif q8,q5,q10
  947. # qhasm: x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
  948. # asm 1: vbif <x8x9x2x3=reg128#10,<x4x9x14x3=reg128#7,<abab=reg128#11
  949. # asm 2: vbif <x8x9x2x3=q9,<x4x9x14x3=q6,<abab=q10
  950. vbif q9,q6,q10
  951. # qhasm: x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
  952. # asm 1: vbif <x4x5x14x15=reg128#12,<x0x5x10x15=reg128#2,<abab=reg128#11
  953. # asm 2: vbif <x4x5x14x15=q11,<x0x5x10x15=q1,<abab=q10
  954. vbif q11,q1,q10
  955. # qhasm: x0x1x2x3 = x0x1x10x11
  956. # asm 1: vmov >x0x1x2x3=reg128#2,<x0x1x10x11=reg128#8
  957. # asm 2: vmov >x0x1x2x3=q1,<x0x1x10x11=q7
  958. vmov q1,q7
  959. # qhasm: x4x5x6x7 = x4x5x14x15
  960. # asm 1: vmov >x4x5x6x7=reg128#5,<x4x5x14x15=reg128#12
  961. # asm 2: vmov >x4x5x6x7=q4,<x4x5x14x15=q11
  962. vmov q4,q11
  963. # qhasm: x8x9x10x11 = x8x9x2x3
  964. # asm 1: vmov >x8x9x10x11=reg128#6,<x8x9x2x3=reg128#10
  965. # asm 2: vmov >x8x9x10x11=q5,<x8x9x2x3=q9
  966. vmov q5,q9
  967. # qhasm: x12x13x14x15 = x12x13x6x7
  968. # asm 1: vmov >x12x13x14x15=reg128#7,<x12x13x6x7=reg128#9
  969. # asm 2: vmov >x12x13x14x15=q6,<x12x13x6x7=q8
  970. vmov q6,q8
  971. # qhasm: x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
  972. # asm 1: vmov <x0x1x2x3=reg128#2%top,<x8x9x2x3=reg128#10%top
  973. # asm 2: vmov <x0x1x2x3=d3,<x8x9x2x3=d19
  974. vmov d3,d19
  975. # qhasm: x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
  976. # asm 1: vmov <x4x5x6x7=reg128#5%top,<x12x13x6x7=reg128#9%top
  977. # asm 2: vmov <x4x5x6x7=d9,<x12x13x6x7=d17
  978. vmov d9,d17
  979. # qhasm: x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
  980. # asm 1: vmov <x8x9x10x11=reg128#6%top,<x0x1x10x11=reg128#8%top
  981. # asm 2: vmov <x8x9x10x11=d11,<x0x1x10x11=d15
  982. vmov d11,d15
  983. # qhasm: x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
  984. # asm 1: vmov <x12x13x14x15=reg128#7%top,<x4x5x14x15=reg128#12%top
  985. # asm 2: vmov <x12x13x14x15=d13,<x4x5x14x15=d23
  986. vmov d13,d23
  987. # qhasm: =? m - 0
  988. # asm 1: cmp <m=int32#2,#0
  989. # asm 2: cmp <m=r1,#0
  990. cmp r1,#0
  991. # qhasm: goto nomessage2next if =
  992. beq ._nomessage2next
  993. # qhasm: m0m1m2m3 = mem128[m]
  994. # asm 1: vld1.8 {>m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[<m=int32#2]
  995. # asm 2: vld1.8 {>m0m1m2m3=d14->m0m1m2m3=d15},[<m=r1]
  996. vld1.8 {d14-d15},[r1]
  997. # qhasm: m += 16
  998. # asm 1: add <m=int32#2,<m=int32#2,#16
  999. # asm 2: add <m=r1,<m=r1,#16
  1000. add r1,r1,#16
  1001. # qhasm: m4m5m6m7 = mem128[m]
  1002. # asm 1: vld1.8 {>m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[<m=int32#2]
  1003. # asm 2: vld1.8 {>m4m5m6m7=d16->m4m5m6m7=d17},[<m=r1]
  1004. vld1.8 {d16-d17},[r1]
  1005. # qhasm: m += 16
  1006. # asm 1: add <m=int32#2,<m=int32#2,#16
  1007. # asm 2: add <m=r1,<m=r1,#16
  1008. add r1,r1,#16
  1009. # qhasm: m8m9m10m11 = mem128[m]
  1010. # asm 1: vld1.8 {>m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[<m=int32#2]
  1011. # asm 2: vld1.8 {>m8m9m10m11=d18->m8m9m10m11=d19},[<m=r1]
  1012. vld1.8 {d18-d19},[r1]
  1013. # qhasm: m += 16
  1014. # asm 1: add <m=int32#2,<m=int32#2,#16
  1015. # asm 2: add <m=r1,<m=r1,#16
  1016. add r1,r1,#16
  1017. # qhasm: m12m13m14m15 = mem128[m]
  1018. # asm 1: vld1.8 {>m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[<m=int32#2]
  1019. # asm 2: vld1.8 {>m12m13m14m15=d20->m12m13m14m15=d21},[<m=r1]
  1020. vld1.8 {d20-d21},[r1]
  1021. # qhasm: m += 16
  1022. # asm 1: add <m=int32#2,<m=int32#2,#16
  1023. # asm 2: add <m=r1,<m=r1,#16
  1024. add r1,r1,#16
  1025. # qhasm: x0x1x2x3 ^= m0m1m2m3
  1026. # asm 1: veor >x0x1x2x3=reg128#2,<x0x1x2x3=reg128#2,<m0m1m2m3=reg128#8
  1027. # asm 2: veor >x0x1x2x3=q1,<x0x1x2x3=q1,<m0m1m2m3=q7
  1028. veor q1,q1,q7
  1029. # qhasm: x4x5x6x7 ^= m4m5m6m7
  1030. # asm 1: veor >x4x5x6x7=reg128#5,<x4x5x6x7=reg128#5,<m4m5m6m7=reg128#9
  1031. # asm 2: veor >x4x5x6x7=q4,<x4x5x6x7=q4,<m4m5m6m7=q8
  1032. veor q4,q4,q8
  1033. # qhasm: x8x9x10x11 ^= m8m9m10m11
  1034. # asm 1: veor >x8x9x10x11=reg128#6,<x8x9x10x11=reg128#6,<m8m9m10m11=reg128#10
  1035. # asm 2: veor >x8x9x10x11=q5,<x8x9x10x11=q5,<m8m9m10m11=q9
  1036. veor q5,q5,q9
  1037. # qhasm: x12x13x14x15 ^= m12m13m14m15
  1038. # asm 1: veor >x12x13x14x15=reg128#7,<x12x13x14x15=reg128#7,<m12m13m14m15=reg128#11
  1039. # asm 2: veor >x12x13x14x15=q6,<x12x13x14x15=q6,<m12m13m14m15=q10
  1040. veor q6,q6,q10
  1041. # qhasm: nomessage2next:
  1042. ._nomessage2next:
  1043. # qhasm: mem128[c] = x0x1x2x3
  1044. # asm 1: vst1.8 {<x0x1x2x3=reg128#2%bot-<x0x1x2x3=reg128#2%top},[<c=int32#1]
  1045. # asm 2: vst1.8 {<x0x1x2x3=d2-<x0x1x2x3=d3},[<c=r0]
  1046. vst1.8 {d2-d3},[r0]
  1047. # qhasm: c += 16
  1048. # asm 1: add <c=int32#1,<c=int32#1,#16
  1049. # asm 2: add <c=r0,<c=r0,#16
  1050. add r0,r0,#16
  1051. # qhasm: mem128[c] = x4x5x6x7
  1052. # asm 1: vst1.8 {<x4x5x6x7=reg128#5%bot-<x4x5x6x7=reg128#5%top},[<c=int32#1]
  1053. # asm 2: vst1.8 {<x4x5x6x7=d8-<x4x5x6x7=d9},[<c=r0]
  1054. vst1.8 {d8-d9},[r0]
  1055. # qhasm: c += 16
  1056. # asm 1: add <c=int32#1,<c=int32#1,#16
  1057. # asm 2: add <c=r0,<c=r0,#16
  1058. add r0,r0,#16
  1059. # qhasm: mem128[c] = x8x9x10x11
  1060. # asm 1: vst1.8 {<x8x9x10x11=reg128#6%bot-<x8x9x10x11=reg128#6%top},[<c=int32#1]
  1061. # asm 2: vst1.8 {<x8x9x10x11=d10-<x8x9x10x11=d11},[<c=r0]
  1062. vst1.8 {d10-d11},[r0]
  1063. # qhasm: c += 16
  1064. # asm 1: add <c=int32#1,<c=int32#1,#16
  1065. # asm 2: add <c=r0,<c=r0,#16
  1066. add r0,r0,#16
  1067. # qhasm: mem128[c] = x12x13x14x15
  1068. # asm 1: vst1.8 {<x12x13x14x15=reg128#7%bot-<x12x13x14x15=reg128#7%top},[<c=int32#1]
  1069. # asm 2: vst1.8 {<x12x13x14x15=d12-<x12x13x14x15=d13},[<c=r0]
  1070. vst1.8 {d12-d13},[r0]
  1071. # qhasm: c += 16
  1072. # asm 1: add <c=int32#1,<c=int32#1,#16
  1073. # asm 2: add <c=r0,<c=r0,#16
  1074. add r0,r0,#16
  1075. # qhasm: carry? mlenlow -= 128
  1076. # asm 1: subs <mlenlow=int32#3,<mlenlow=int32#3,#128
  1077. # asm 2: subs <mlenlow=r2,<mlenlow=r2,#128
  1078. subs r2,r2,#128
  1079. # qhasm: mlenhigh -= 0 - carry
  1080. # asm 1: sbc <mlenhigh=int32#4,<mlenhigh=int32#4,#0
  1081. # asm 2: sbc <mlenhigh=r3,<mlenhigh=r3,#0
  1082. sbc r3,r3,#0
  1083. # qhasm: unsigned<? mlenlow - 128
  1084. # asm 1: cmp <mlenlow=int32#3,#128
  1085. # asm 2: cmp <mlenlow=r2,#128
  1086. cmp r2,#128
  1087. # qhasm: goto mlenatleast128 if !unsigned<
  1088. bhs ._mlenatleast128
  1089. # qhasm: mlenlowbelow128:
  1090. ._mlenlowbelow128:
  1091. # qhasm: unsigned>? mlenhigh - 0
  1092. # asm 1: cmp <mlenhigh=int32#4,#0
  1093. # asm 2: cmp <mlenhigh=r3,#0
  1094. cmp r3,#0
  1095. # qhasm: goto mlenatleast128 if unsigned>
  1096. bhi ._mlenatleast128
  1097. # qhasm: =? mlenlow - 0
  1098. # asm 1: cmp <mlenlow=int32#3,#0
  1099. # asm 2: cmp <mlenlow=r2,#0
  1100. cmp r2,#0
  1101. # qhasm: goto done if =
  1102. beq ._done
  1103. # qhasm: mlenatleast1:
  1104. ._mlenatleast1:
  1105. # qhasm: unsigned<? mlenlow - 64
  1106. # asm 1: cmp <mlenlow=int32#3,#64
  1107. # asm 2: cmp <mlenlow=r2,#64
  1108. cmp r2,#64
  1109. # qhasm: goto mlenatleast64 if !unsigned<
  1110. bhs ._mlenatleast64
  1111. # qhasm: savec = c
  1112. # asm 1: str <c=int32#1,>savec=stack32#1
  1113. # asm 2: str <c=r0,>savec=[sp,#64]
  1114. str r0,[sp,#64]
  1115. # qhasm: c = &tmp
  1116. # asm 1: lea >c=int32#1,<tmp=stack512#1
  1117. # asm 2: lea >c=r0,<tmp=[sp,#0]
  1118. add r0,sp,#0
  1119. # qhasm: =? m - 0
  1120. # asm 1: cmp <m=int32#2,#0
  1121. # asm 2: cmp <m=r1,#0
  1122. cmp r1,#0
  1123. # qhasm: goto mlenatleast64 if =
  1124. beq ._mlenatleast64
  1125. # qhasm: i = 0
  1126. # asm 1: ldr >i=int32#4,=0
  1127. # asm 2: ldr >i=r3,=0
  1128. ldr r3,=0
  1129. # qhasm: mcopy:
  1130. ._mcopy:
  1131. # qhasm: mi = mem8[m + 0]
  1132. # asm 1: ldrb >mi=int32#5,[<m=int32#2,#0]
  1133. # asm 2: ldrb >mi=r4,[<m=r1,#0]
  1134. ldrb r4,[r1,#0]
  1135. # qhasm: mem8[c + 0] = mi
  1136. # asm 1: strb <mi=int32#5,[<c=int32#1,#0]
  1137. # asm 2: strb <mi=r4,[<c=r0,#0]
  1138. strb r4,[r0,#0]
  1139. # qhasm: m += 1
  1140. # asm 1: add <m=int32#2,<m=int32#2,#1
  1141. # asm 2: add <m=r1,<m=r1,#1
  1142. add r1,r1,#1
  1143. # qhasm: c += 1
  1144. # asm 1: add <c=int32#1,<c=int32#1,#1
  1145. # asm 2: add <c=r0,<c=r0,#1
  1146. add r0,r0,#1
  1147. # qhasm: i += 1
  1148. # asm 1: add <i=int32#4,<i=int32#4,#1
  1149. # asm 2: add <i=r3,<i=r3,#1
  1150. add r3,r3,#1
  1151. # qhasm: unsigned<? i - mlenlow
  1152. # asm 1: cmp <i=int32#4,<mlenlow=int32#3
  1153. # asm 2: cmp <i=r3,<mlenlow=r2
  1154. cmp r3,r2
  1155. # qhasm: goto mcopy if unsigned<
  1156. blo ._mcopy
  1157. # qhasm: mi = 0
  1158. # asm 1: ldr >mi=int32#2,=0
  1159. # asm 2: ldr >mi=r1,=0
  1160. ldr r1,=0
  1161. # qhasm: pad:
  1162. ._pad:
  1163. # qhasm: mem8[c + 0] = mi
  1164. # asm 1: strb <mi=int32#2,[<c=int32#1,#0]
  1165. # asm 2: strb <mi=r1,[<c=r0,#0]
  1166. strb r1,[r0,#0]
  1167. # qhasm: c += 1
  1168. # asm 1: add <c=int32#1,<c=int32#1,#1
  1169. # asm 2: add <c=r0,<c=r0,#1
  1170. add r0,r0,#1
  1171. # qhasm: i += 1
  1172. # asm 1: add <i=int32#4,<i=int32#4,#1
  1173. # asm 2: add <i=r3,<i=r3,#1
  1174. add r3,r3,#1
  1175. # qhasm: unsigned<? i - 64
  1176. # asm 1: cmp <i=int32#4,#64
  1177. # asm 2: cmp <i=r3,#64
  1178. cmp r3,#64
  1179. # qhasm: goto pad if unsigned<
  1180. blo ._pad
  1181. # qhasm: c -= 64
  1182. # asm 1: sub <c=int32#1,<c=int32#1,#64
  1183. # asm 2: sub <c=r0,<c=r0,#64
  1184. sub r0,r0,#64
  1185. # qhasm: m = &tmp
  1186. # asm 1: lea >m=int32#2,<tmp=stack512#1
  1187. # asm 2: lea >m=r1,<tmp=[sp,#0]
  1188. add r1,sp,#0
  1189. # qhasm: mlenatleast64:
  1190. ._mlenatleast64:
  1191. # qhasm: new k2k3k6k7
  1192. # qhasm: k2k3k6k7 bot = k2k3k6k7_stack bot
  1193. # asm 1: vldr <k2k3k6k7=reg128#2%bot,<k2k3k6k7_stack=stack128#5
  1194. # asm 2: vldr <k2k3k6k7=d2,<k2k3k6k7_stack=[sp,#160]
  1195. vldr d2,[sp,#160]
  1196. # qhasm: k2k3k6k7 top = k2k3k6k7_stack top
  1197. # asm 1: vldr <k2k3k6k7=reg128#2%top,<k2k3k6k7_stack=stack128#5
  1198. # asm 2: vldr <k2k3k6k7=d3,<k2k3k6k7_stack=[sp,#168]
  1199. vldr d3,[sp,#168]
  1200. # qhasm: new k1n1k7k2
  1201. # qhasm: k1n1k7k2 bot = k1n1k7k2_stack bot
  1202. # asm 1: vldr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
  1203. # asm 2: vldr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
  1204. vldr d8,[sp,#176]
  1205. # qhasm: k1n1k7k2 top = k1n1k7k2_stack top
  1206. # asm 1: vldr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
  1207. # asm 2: vldr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
  1208. vldr d9,[sp,#184]
  1209. # qhasm: n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
  1210. # asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
  1211. # asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
  1212. vext.32 d1,d0,d0,#1
  1213. # qhasm: new start2
  1214. # qhasm: start2 = start2[0,1] k1n1k7k2[0,1]
  1215. # asm 1: vmov <start2=reg128#6%top,<k1n1k7k2=reg128#5%bot
  1216. # asm 2: vmov <start2=d11,<k1n1k7k2=d8
  1217. vmov d11,d8
  1218. # qhasm: start2 = n2n3n3n2[3] k2k3k6k7[2] start2[2,3]
  1219. # asm 1: vext.32 <start2=reg128#6%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
  1220. # asm 2: vext.32 <start2=d10,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
  1221. vext.32 d10,d1,d3,#1
  1222. # qhasm: new start3
  1223. # qhasm: start3 = start3[0,1] k1n1k7k2[2,3]
  1224. # asm 1: vmov <start3=reg128#7%top,<k1n1k7k2=reg128#5%top
  1225. # asm 2: vmov <start3=d13,<k1n1k7k2=d9
  1226. vmov d13,d9
  1227. # qhasm: start3 = k2k3k6k7[1] n2n3n3n2[2] start3[2,3]
  1228. # asm 1: vext.32 <start3=reg128#7%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
  1229. # asm 2: vext.32 <start3=d12,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
  1230. vext.32 d12,d2,d1,#1
  1231. # qhasm: diag0 = start0
  1232. # asm 1: vmov >diag0=reg128#2,<start0=reg128#3
  1233. # asm 2: vmov >diag0=q1,<start0=q2
  1234. vmov q1,q2
  1235. # qhasm: diag1 = start1
  1236. # asm 1: vmov >diag1=reg128#5,<start1=reg128#4
  1237. # asm 2: vmov >diag1=q4,<start1=q3
  1238. vmov q4,q3
  1239. # qhasm: diag2 = start2
  1240. # asm 1: vmov >diag2=reg128#8,<start2=reg128#6
  1241. # asm 2: vmov >diag2=q7,<start2=q5
  1242. vmov q7,q5
  1243. # qhasm: diag3 = start3
  1244. # asm 1: vmov >diag3=reg128#9,<start3=reg128#7
  1245. # asm 2: vmov >diag3=q8,<start3=q6
  1246. vmov q8,q6
  1247. # qhasm: 2x nextblock = 0xff
  1248. # asm 1: vmov.i64 >nextblock=reg128#10,#0xff
  1249. # asm 2: vmov.i64 >nextblock=q9,#0xff
  1250. vmov.i64 q9,#0xff
  1251. # qhasm: 4x nextblock unsigned>>= 7
  1252. # asm 1: vshr.u32 >nextblock=reg128#10,<nextblock=reg128#10,#7
  1253. # asm 2: vshr.u32 >nextblock=q9,<nextblock=q9,#7
  1254. vshr.u32 q9,q9,#7
  1255. # qhasm: 2x n2n3n3n2 += nextblock
  1256. # asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#10
  1257. # asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q9
  1258. vadd.i64 q0,q0,q9
  1259. # qhasm: i = 12
  1260. # asm 1: ldr >i=int32#4,=12
  1261. # asm 2: ldr >i=r3,=12
  1262. ldr r3,=12
  1263. # qhasm: mainloop1:
  1264. ._mainloop1:
  1265. # qhasm: 4x a0 = diag1 + diag0
  1266. # asm 1: vadd.i32 >a0=reg128#10,<diag1=reg128#5,<diag0=reg128#2
  1267. # asm 2: vadd.i32 >a0=q9,<diag1=q4,<diag0=q1
  1268. vadd.i32 q9,q4,q1
  1269. # qhasm: 4x b0 = a0 << 7
  1270. # asm 1: vshl.i32 >b0=reg128#11,<a0=reg128#10,#7
  1271. # asm 2: vshl.i32 >b0=q10,<a0=q9,#7
  1272. vshl.i32 q10,q9,#7
  1273. # qhasm: 4x b0 insert= a0 >> 25
  1274. # asm 1: vsri.i32 <b0=reg128#11,<a0=reg128#10,#25
  1275. # asm 2: vsri.i32 <b0=q10,<a0=q9,#25
  1276. vsri.i32 q10,q9,#25
  1277. # qhasm: diag3 ^= b0
  1278. # asm 1: veor >diag3=reg128#9,<diag3=reg128#9,<b0=reg128#11
  1279. # asm 2: veor >diag3=q8,<diag3=q8,<b0=q10
  1280. veor q8,q8,q10
  1281. # qhasm: 4x a1 = diag0 + diag3
  1282. # asm 1: vadd.i32 >a1=reg128#10,<diag0=reg128#2,<diag3=reg128#9
  1283. # asm 2: vadd.i32 >a1=q9,<diag0=q1,<diag3=q8
  1284. vadd.i32 q9,q1,q8
  1285. # qhasm: 4x b1 = a1 << 9
  1286. # asm 1: vshl.i32 >b1=reg128#11,<a1=reg128#10,#9
  1287. # asm 2: vshl.i32 >b1=q10,<a1=q9,#9
  1288. vshl.i32 q10,q9,#9
  1289. # qhasm: 4x b1 insert= a1 >> 23
  1290. # asm 1: vsri.i32 <b1=reg128#11,<a1=reg128#10,#23
  1291. # asm 2: vsri.i32 <b1=q10,<a1=q9,#23
  1292. vsri.i32 q10,q9,#23
  1293. # qhasm: diag2 ^= b1
  1294. # asm 1: veor >diag2=reg128#8,<diag2=reg128#8,<b1=reg128#11
  1295. # asm 2: veor >diag2=q7,<diag2=q7,<b1=q10
  1296. veor q7,q7,q10
  1297. # qhasm: 4x a2 = diag3 + diag2
  1298. # asm 1: vadd.i32 >a2=reg128#10,<diag3=reg128#9,<diag2=reg128#8
  1299. # asm 2: vadd.i32 >a2=q9,<diag3=q8,<diag2=q7
  1300. vadd.i32 q9,q8,q7
  1301. # qhasm: diag3 = diag3[3] diag3[0,1,2]
  1302. # asm 1: vext.32 >diag3=reg128#9,<diag3=reg128#9,<diag3=reg128#9,#3
  1303. # asm 2: vext.32 >diag3=q8,<diag3=q8,<diag3=q8,#3
  1304. vext.32 q8,q8,q8,#3
  1305. # qhasm: 4x b2 = a2 << 13
  1306. # asm 1: vshl.i32 >b2=reg128#11,<a2=reg128#10,#13
  1307. # asm 2: vshl.i32 >b2=q10,<a2=q9,#13
  1308. vshl.i32 q10,q9,#13
  1309. # qhasm: 4x b2 insert= a2 >> 19
  1310. # asm 1: vsri.i32 <b2=reg128#11,<a2=reg128#10,#19
  1311. # asm 2: vsri.i32 <b2=q10,<a2=q9,#19
  1312. vsri.i32 q10,q9,#19
  1313. # qhasm: diag1 ^= b2
  1314. # asm 1: veor >diag1=reg128#5,<diag1=reg128#5,<b2=reg128#11
  1315. # asm 2: veor >diag1=q4,<diag1=q4,<b2=q10
  1316. veor q4,q4,q10
  1317. # qhasm: 4x a3 = diag2 + diag1
  1318. # asm 1: vadd.i32 >a3=reg128#10,<diag2=reg128#8,<diag1=reg128#5
  1319. # asm 2: vadd.i32 >a3=q9,<diag2=q7,<diag1=q4
  1320. vadd.i32 q9,q7,q4
  1321. # qhasm: diag2 = diag2[2,3] diag2[0,1]
  1322. # asm 1: vswp <diag2=reg128#8%bot,<diag2=reg128#8%top
  1323. # asm 2: vswp <diag2=d14,<diag2=d15
  1324. vswp d14,d15
  1325. # qhasm: 4x b3 = a3 << 18
  1326. # asm 1: vshl.i32 >b3=reg128#11,<a3=reg128#10,#18
  1327. # asm 2: vshl.i32 >b3=q10,<a3=q9,#18
  1328. vshl.i32 q10,q9,#18
  1329. # qhasm: 4x b3 insert= a3 >> 14
  1330. # asm 1: vsri.i32 <b3=reg128#11,<a3=reg128#10,#14
  1331. # asm 2: vsri.i32 <b3=q10,<a3=q9,#14
  1332. vsri.i32 q10,q9,#14
  1333. # qhasm: diag1 = diag1[1,2,3] diag1[0]
  1334. # asm 1: vext.32 >diag1=reg128#5,<diag1=reg128#5,<diag1=reg128#5,#1
  1335. # asm 2: vext.32 >diag1=q4,<diag1=q4,<diag1=q4,#1
  1336. vext.32 q4,q4,q4,#1
  1337. # qhasm: diag0 ^= b3
  1338. # asm 1: veor >diag0=reg128#2,<diag0=reg128#2,<b3=reg128#11
  1339. # asm 2: veor >diag0=q1,<diag0=q1,<b3=q10
  1340. veor q1,q1,q10
  1341. # qhasm: 4x a0 = diag3 + diag0
  1342. # asm 1: vadd.i32 >a0=reg128#10,<diag3=reg128#9,<diag0=reg128#2
  1343. # asm 2: vadd.i32 >a0=q9,<diag3=q8,<diag0=q1
  1344. vadd.i32 q9,q8,q1
  1345. # qhasm: 4x b0 = a0 << 7
  1346. # asm 1: vshl.i32 >b0=reg128#11,<a0=reg128#10,#7
  1347. # asm 2: vshl.i32 >b0=q10,<a0=q9,#7
  1348. vshl.i32 q10,q9,#7
  1349. # qhasm: 4x b0 insert= a0 >> 25
  1350. # asm 1: vsri.i32 <b0=reg128#11,<a0=reg128#10,#25
  1351. # asm 2: vsri.i32 <b0=q10,<a0=q9,#25
  1352. vsri.i32 q10,q9,#25
  1353. # qhasm: diag1 ^= b0
  1354. # asm 1: veor >diag1=reg128#5,<diag1=reg128#5,<b0=reg128#11
  1355. # asm 2: veor >diag1=q4,<diag1=q4,<b0=q10
  1356. veor q4,q4,q10
  1357. # qhasm: 4x a1 = diag0 + diag1
  1358. # asm 1: vadd.i32 >a1=reg128#10,<diag0=reg128#2,<diag1=reg128#5
  1359. # asm 2: vadd.i32 >a1=q9,<diag0=q1,<diag1=q4
  1360. vadd.i32 q9,q1,q4
  1361. # qhasm: 4x b1 = a1 << 9
  1362. # asm 1: vshl.i32 >b1=reg128#11,<a1=reg128#10,#9
  1363. # asm 2: vshl.i32 >b1=q10,<a1=q9,#9
  1364. vshl.i32 q10,q9,#9
  1365. # qhasm: 4x b1 insert= a1 >> 23
  1366. # asm 1: vsri.i32 <b1=reg128#11,<a1=reg128#10,#23
  1367. # asm 2: vsri.i32 <b1=q10,<a1=q9,#23
  1368. vsri.i32 q10,q9,#23
  1369. # qhasm: unsigned>? i -= 2
  1370. # asm 1: subs <i=int32#4,<i=int32#4,#2
  1371. # asm 2: subs <i=r3,<i=r3,#2
  1372. subs r3,r3,#2
  1373. # qhasm: diag2 ^= b1
  1374. # asm 1: veor >diag2=reg128#8,<diag2=reg128#8,<b1=reg128#11
  1375. # asm 2: veor >diag2=q7,<diag2=q7,<b1=q10
  1376. veor q7,q7,q10
  1377. # qhasm: 4x a2 = diag1 + diag2
  1378. # asm 1: vadd.i32 >a2=reg128#10,<diag1=reg128#5,<diag2=reg128#8
  1379. # asm 2: vadd.i32 >a2=q9,<diag1=q4,<diag2=q7
  1380. vadd.i32 q9,q4,q7
  1381. # qhasm: diag1 = diag1[3] diag1[0,1,2]
  1382. # asm 1: vext.32 >diag1=reg128#5,<diag1=reg128#5,<diag1=reg128#5,#3
  1383. # asm 2: vext.32 >diag1=q4,<diag1=q4,<diag1=q4,#3
  1384. vext.32 q4,q4,q4,#3
  1385. # qhasm: 4x b2 = a2 << 13
  1386. # asm 1: vshl.i32 >b2=reg128#11,<a2=reg128#10,#13
  1387. # asm 2: vshl.i32 >b2=q10,<a2=q9,#13
  1388. vshl.i32 q10,q9,#13
  1389. # qhasm: 4x b2 insert= a2 >> 19
  1390. # asm 1: vsri.i32 <b2=reg128#11,<a2=reg128#10,#19
  1391. # asm 2: vsri.i32 <b2=q10,<a2=q9,#19
  1392. vsri.i32 q10,q9,#19
  1393. # qhasm: diag3 ^= b2
  1394. # asm 1: veor >diag3=reg128#9,<diag3=reg128#9,<b2=reg128#11
  1395. # asm 2: veor >diag3=q8,<diag3=q8,<b2=q10
  1396. veor q8,q8,q10
  1397. # qhasm: 4x a3 = diag2 + diag3
  1398. # asm 1: vadd.i32 >a3=reg128#10,<diag2=reg128#8,<diag3=reg128#9
  1399. # asm 2: vadd.i32 >a3=q9,<diag2=q7,<diag3=q8
  1400. vadd.i32 q9,q7,q8
  1401. # qhasm: diag2 = diag2[2,3] diag2[0,1]
  1402. # asm 1: vswp <diag2=reg128#8%bot,<diag2=reg128#8%top
  1403. # asm 2: vswp <diag2=d14,<diag2=d15
  1404. vswp d14,d15
  1405. # qhasm: 4x b3 = a3 << 18
  1406. # asm 1: vshl.i32 >b3=reg128#11,<a3=reg128#10,#18
  1407. # asm 2: vshl.i32 >b3=q10,<a3=q9,#18
  1408. vshl.i32 q10,q9,#18
  1409. # qhasm: 4x b3 insert= a3 >> 14
  1410. # asm 1: vsri.i32 <b3=reg128#11,<a3=reg128#10,#14
  1411. # asm 2: vsri.i32 <b3=q10,<a3=q9,#14
  1412. vsri.i32 q10,q9,#14
  1413. # qhasm: diag3 = diag3[1,2,3] diag3[0]
  1414. # asm 1: vext.32 >diag3=reg128#9,<diag3=reg128#9,<diag3=reg128#9,#1
  1415. # asm 2: vext.32 >diag3=q8,<diag3=q8,<diag3=q8,#1
  1416. vext.32 q8,q8,q8,#1
  1417. # qhasm: diag0 ^= b3
  1418. # asm 1: veor >diag0=reg128#2,<diag0=reg128#2,<b3=reg128#11
  1419. # asm 2: veor >diag0=q1,<diag0=q1,<b3=q10
  1420. veor q1,q1,q10
  1421. # qhasm: goto mainloop1 if unsigned>
  1422. bhi ._mainloop1
  1423. # qhasm: 2x abab = 0xffffffff
  1424. # asm 1: vmov.i64 >abab=reg128#10,#0xffffffff
  1425. # asm 2: vmov.i64 >abab=q9,#0xffffffff
  1426. vmov.i64 q9,#0xffffffff
  1427. # qhasm: 4x x0x5x10x15 = diag0 + start0
  1428. # asm 1: vadd.i32 >x0x5x10x15=reg128#2,<diag0=reg128#2,<start0=reg128#3
  1429. # asm 2: vadd.i32 >x0x5x10x15=q1,<diag0=q1,<start0=q2
  1430. vadd.i32 q1,q1,q2
  1431. # qhasm: 4x x12x1x6x11 = diag1 + start1
  1432. # asm 1: vadd.i32 >x12x1x6x11=reg128#5,<diag1=reg128#5,<start1=reg128#4
  1433. # asm 2: vadd.i32 >x12x1x6x11=q4,<diag1=q4,<start1=q3
  1434. vadd.i32 q4,q4,q3
  1435. # qhasm: 4x x8x13x2x7 = diag2 + start2
  1436. # asm 1: vadd.i32 >x8x13x2x7=reg128#6,<diag2=reg128#8,<start2=reg128#6
  1437. # asm 2: vadd.i32 >x8x13x2x7=q5,<diag2=q7,<start2=q5
  1438. vadd.i32 q5,q7,q5
  1439. # qhasm: 4x x4x9x14x3 = diag3 + start3
  1440. # asm 1: vadd.i32 >x4x9x14x3=reg128#7,<diag3=reg128#9,<start3=reg128#7
  1441. # asm 2: vadd.i32 >x4x9x14x3=q6,<diag3=q8,<start3=q6
  1442. vadd.i32 q6,q8,q6
  1443. # qhasm: x0x1x10x11 = x0x5x10x15
  1444. # asm 1: vmov >x0x1x10x11=reg128#8,<x0x5x10x15=reg128#2
  1445. # asm 2: vmov >x0x1x10x11=q7,<x0x5x10x15=q1
  1446. vmov q7,q1
  1447. # qhasm: x12x13x6x7 = x12x1x6x11
  1448. # asm 1: vmov >x12x13x6x7=reg128#9,<x12x1x6x11=reg128#5
  1449. # asm 2: vmov >x12x13x6x7=q8,<x12x1x6x11=q4
  1450. vmov q8,q4
  1451. # qhasm: x8x9x2x3 = x8x13x2x7
  1452. # asm 1: vmov >x8x9x2x3=reg128#11,<x8x13x2x7=reg128#6
  1453. # asm 2: vmov >x8x9x2x3=q10,<x8x13x2x7=q5
  1454. vmov q10,q5
  1455. # qhasm: x4x5x14x15 = x4x9x14x3
  1456. # asm 1: vmov >x4x5x14x15=reg128#12,<x4x9x14x3=reg128#7
  1457. # asm 2: vmov >x4x5x14x15=q11,<x4x9x14x3=q6
  1458. vmov q11,q6
  1459. # qhasm: x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
  1460. # asm 1: vbif <x0x1x10x11=reg128#8,<x12x1x6x11=reg128#5,<abab=reg128#10
  1461. # asm 2: vbif <x0x1x10x11=q7,<x12x1x6x11=q4,<abab=q9
  1462. vbif q7,q4,q9
  1463. # qhasm: x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
  1464. # asm 1: vbif <x12x13x6x7=reg128#9,<x8x13x2x7=reg128#6,<abab=reg128#10
  1465. # asm 2: vbif <x12x13x6x7=q8,<x8x13x2x7=q5,<abab=q9
  1466. vbif q8,q5,q9
  1467. # qhasm: x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
  1468. # asm 1: vbif <x8x9x2x3=reg128#11,<x4x9x14x3=reg128#7,<abab=reg128#10
  1469. # asm 2: vbif <x8x9x2x3=q10,<x4x9x14x3=q6,<abab=q9
  1470. vbif q10,q6,q9
  1471. # qhasm: x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
  1472. # asm 1: vbif <x4x5x14x15=reg128#12,<x0x5x10x15=reg128#2,<abab=reg128#10
  1473. # asm 2: vbif <x4x5x14x15=q11,<x0x5x10x15=q1,<abab=q9
  1474. vbif q11,q1,q9
  1475. # qhasm: x0x1x2x3 = x0x1x10x11
  1476. # asm 1: vmov >x0x1x2x3=reg128#2,<x0x1x10x11=reg128#8
  1477. # asm 2: vmov >x0x1x2x3=q1,<x0x1x10x11=q7
  1478. vmov q1,q7
  1479. # qhasm: x4x5x6x7 = x4x5x14x15
  1480. # asm 1: vmov >x4x5x6x7=reg128#5,<x4x5x14x15=reg128#12
  1481. # asm 2: vmov >x4x5x6x7=q4,<x4x5x14x15=q11
  1482. vmov q4,q11
  1483. # qhasm: x8x9x10x11 = x8x9x2x3
  1484. # asm 1: vmov >x8x9x10x11=reg128#6,<x8x9x2x3=reg128#11
  1485. # asm 2: vmov >x8x9x10x11=q5,<x8x9x2x3=q10
  1486. vmov q5,q10
  1487. # qhasm: x12x13x14x15 = x12x13x6x7
  1488. # asm 1: vmov >x12x13x14x15=reg128#7,<x12x13x6x7=reg128#9
  1489. # asm 2: vmov >x12x13x14x15=q6,<x12x13x6x7=q8
  1490. vmov q6,q8
  1491. # qhasm: x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
  1492. # asm 1: vmov <x0x1x2x3=reg128#2%top,<x8x9x2x3=reg128#11%top
  1493. # asm 2: vmov <x0x1x2x3=d3,<x8x9x2x3=d21
  1494. vmov d3,d21
  1495. # qhasm: x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
  1496. # asm 1: vmov <x4x5x6x7=reg128#5%top,<x12x13x6x7=reg128#9%top
  1497. # asm 2: vmov <x4x5x6x7=d9,<x12x13x6x7=d17
  1498. vmov d9,d17
  1499. # qhasm: x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
  1500. # asm 1: vmov <x8x9x10x11=reg128#6%top,<x0x1x10x11=reg128#8%top
  1501. # asm 2: vmov <x8x9x10x11=d11,<x0x1x10x11=d15
  1502. vmov d11,d15
  1503. # qhasm: x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
  1504. # asm 1: vmov <x12x13x14x15=reg128#7%top,<x4x5x14x15=reg128#12%top
  1505. # asm 2: vmov <x12x13x14x15=d13,<x4x5x14x15=d23
  1506. vmov d13,d23
  1507. # qhasm: =? m - 0
  1508. # asm 1: cmp <m=int32#2,#0
  1509. # asm 2: cmp <m=r1,#0
  1510. cmp r1,#0
  1511. # qhasm: goto nomessage1 if =
  1512. beq ._nomessage1
  1513. # qhasm: m0m1m2m3 = mem128[m]
  1514. # asm 1: vld1.8 {>m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[<m=int32#2]
  1515. # asm 2: vld1.8 {>m0m1m2m3=d14->m0m1m2m3=d15},[<m=r1]
  1516. vld1.8 {d14-d15},[r1]
  1517. # qhasm: m += 16
  1518. # asm 1: add <m=int32#2,<m=int32#2,#16
  1519. # asm 2: add <m=r1,<m=r1,#16
  1520. add r1,r1,#16
  1521. # qhasm: m4m5m6m7 = mem128[m]
  1522. # asm 1: vld1.8 {>m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[<m=int32#2]
  1523. # asm 2: vld1.8 {>m4m5m6m7=d16->m4m5m6m7=d17},[<m=r1]
  1524. vld1.8 {d16-d17},[r1]
  1525. # qhasm: m += 16
  1526. # asm 1: add <m=int32#2,<m=int32#2,#16
  1527. # asm 2: add <m=r1,<m=r1,#16
  1528. add r1,r1,#16
  1529. # qhasm: m8m9m10m11 = mem128[m]
  1530. # asm 1: vld1.8 {>m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[<m=int32#2]
  1531. # asm 2: vld1.8 {>m8m9m10m11=d18->m8m9m10m11=d19},[<m=r1]
  1532. vld1.8 {d18-d19},[r1]
  1533. # qhasm: m += 16
  1534. # asm 1: add <m=int32#2,<m=int32#2,#16
  1535. # asm 2: add <m=r1,<m=r1,#16
  1536. add r1,r1,#16
  1537. # qhasm: m12m13m14m15 = mem128[m]
  1538. # asm 1: vld1.8 {>m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[<m=int32#2]
  1539. # asm 2: vld1.8 {>m12m13m14m15=d20->m12m13m14m15=d21},[<m=r1]
  1540. vld1.8 {d20-d21},[r1]
  1541. # qhasm: m += 16
  1542. # asm 1: add <m=int32#2,<m=int32#2,#16
  1543. # asm 2: add <m=r1,<m=r1,#16
  1544. add r1,r1,#16
  1545. # qhasm: x0x1x2x3 ^= m0m1m2m3
  1546. # asm 1: veor >x0x1x2x3=reg128#2,<x0x1x2x3=reg128#2,<m0m1m2m3=reg128#8
  1547. # asm 2: veor >x0x1x2x3=q1,<x0x1x2x3=q1,<m0m1m2m3=q7
  1548. veor q1,q1,q7
  1549. # qhasm: x4x5x6x7 ^= m4m5m6m7
  1550. # asm 1: veor >x4x5x6x7=reg128#5,<x4x5x6x7=reg128#5,<m4m5m6m7=reg128#9
  1551. # asm 2: veor >x4x5x6x7=q4,<x4x5x6x7=q4,<m4m5m6m7=q8
  1552. veor q4,q4,q8
  1553. # qhasm: x8x9x10x11 ^= m8m9m10m11
  1554. # asm 1: veor >x8x9x10x11=reg128#6,<x8x9x10x11=reg128#6,<m8m9m10m11=reg128#10
  1555. # asm 2: veor >x8x9x10x11=q5,<x8x9x10x11=q5,<m8m9m10m11=q9
  1556. veor q5,q5,q9
  1557. # qhasm: x12x13x14x15 ^= m12m13m14m15
  1558. # asm 1: veor >x12x13x14x15=reg128#7,<x12x13x14x15=reg128#7,<m12m13m14m15=reg128#11
  1559. # asm 2: veor >x12x13x14x15=q6,<x12x13x14x15=q6,<m12m13m14m15=q10
  1560. veor q6,q6,q10
  1561. # qhasm: nomessage1:
  1562. ._nomessage1:
  1563. # qhasm: mem128[c] = x0x1x2x3
  1564. # asm 1: vst1.8 {<x0x1x2x3=reg128#2%bot-<x0x1x2x3=reg128#2%top},[<c=int32#1]
  1565. # asm 2: vst1.8 {<x0x1x2x3=d2-<x0x1x2x3=d3},[<c=r0]
  1566. vst1.8 {d2-d3},[r0]
  1567. # qhasm: c += 16
  1568. # asm 1: add <c=int32#1,<c=int32#1,#16
  1569. # asm 2: add <c=r0,<c=r0,#16
  1570. add r0,r0,#16
  1571. # qhasm: mem128[c] = x4x5x6x7
  1572. # asm 1: vst1.8 {<x4x5x6x7=reg128#5%bot-<x4x5x6x7=reg128#5%top},[<c=int32#1]
  1573. # asm 2: vst1.8 {<x4x5x6x7=d8-<x4x5x6x7=d9},[<c=r0]
  1574. vst1.8 {d8-d9},[r0]
  1575. # qhasm: c += 16
  1576. # asm 1: add <c=int32#1,<c=int32#1,#16
  1577. # asm 2: add <c=r0,<c=r0,#16
  1578. add r0,r0,#16
  1579. # qhasm: mem128[c] = x8x9x10x11
  1580. # asm 1: vst1.8 {<x8x9x10x11=reg128#6%bot-<x8x9x10x11=reg128#6%top},[<c=int32#1]
  1581. # asm 2: vst1.8 {<x8x9x10x11=d10-<x8x9x10x11=d11},[<c=r0]
  1582. vst1.8 {d10-d11},[r0]
  1583. # qhasm: c += 16
  1584. # asm 1: add <c=int32#1,<c=int32#1,#16
  1585. # asm 2: add <c=r0,<c=r0,#16
  1586. add r0,r0,#16
  1587. # qhasm: mem128[c] = x12x13x14x15
  1588. # asm 1: vst1.8 {<x12x13x14x15=reg128#7%bot-<x12x13x14x15=reg128#7%top},[<c=int32#1]
  1589. # asm 2: vst1.8 {<x12x13x14x15=d12-<x12x13x14x15=d13},[<c=r0]
  1590. vst1.8 {d12-d13},[r0]
  1591. # qhasm: c += 16
  1592. # asm 1: add <c=int32#1,<c=int32#1,#16
  1593. # asm 2: add <c=r0,<c=r0,#16
  1594. add r0,r0,#16
  1595. # qhasm: unsigned<? mlenlow - 64
  1596. # asm 1: cmp <mlenlow=int32#3,#64
  1597. # asm 2: cmp <mlenlow=r2,#64
  1598. cmp r2,#64
  1599. # qhasm: goto xmlenatleast64 if !unsigned<
  1600. bhs ._xmlenatleast64
  1601. # qhasm: i = 0
  1602. # asm 1: ldr >i=int32#4,=0
  1603. # asm 2: ldr >i=r3,=0
  1604. ldr r3,=0
  1605. # qhasm: m = c - 64
  1606. # asm 1: sub >m=int32#2,<c=int32#1,#64
  1607. # asm 2: sub >m=r1,<c=r0,#64
  1608. sub r1,r0,#64
  1609. # qhasm: c = savec
  1610. # asm 1: ldr >c=int32#1,<savec=stack32#1
  1611. # asm 2: ldr >c=r0,<savec=[sp,#64]
  1612. ldr r0,[sp,#64]
  1613. # qhasm: ccopy:
  1614. ._ccopy:
  1615. # qhasm: ci = mem8[m + 0]
  1616. # asm 1: ldrb >ci=int32#5,[<m=int32#2,#0]
  1617. # asm 2: ldrb >ci=r4,[<m=r1,#0]
  1618. ldrb r4,[r1,#0]
  1619. # qhasm: mem8[c + 0] = ci
  1620. # asm 1: strb <ci=int32#5,[<c=int32#1,#0]
  1621. # asm 2: strb <ci=r4,[<c=r0,#0]
  1622. strb r4,[r0,#0]
  1623. # qhasm: m += 1
  1624. # asm 1: add <m=int32#2,<m=int32#2,#1
  1625. # asm 2: add <m=r1,<m=r1,#1
  1626. add r1,r1,#1
  1627. # qhasm: c += 1
  1628. # asm 1: add <c=int32#1,<c=int32#1,#1
  1629. # asm 2: add <c=r0,<c=r0,#1
  1630. add r0,r0,#1
  1631. # qhasm: i += 1
  1632. # asm 1: add <i=int32#4,<i=int32#4,#1
  1633. # asm 2: add <i=r3,<i=r3,#1
  1634. add r3,r3,#1
  1635. # qhasm: unsigned<? i - mlenlow
  1636. # asm 1: cmp <i=int32#4,<mlenlow=int32#3
  1637. # asm 2: cmp <i=r3,<mlenlow=r2
  1638. cmp r3,r2
  1639. # qhasm: goto ccopy if unsigned<
  1640. blo ._ccopy
  1641. # qhasm: xmlenatleast64:
  1642. ._xmlenatleast64:
  1643. # qhasm: unsigned>? mlenlow -= 64
  1644. # asm 1: subs <mlenlow=int32#3,<mlenlow=int32#3,#64
  1645. # asm 2: subs <mlenlow=r2,<mlenlow=r2,#64
  1646. subs r2,r2,#64
  1647. # qhasm: goto mlenatleast1 if unsigned>
  1648. bhi ._mlenatleast1
  1649. # qhasm: done:
  1650. ._done:
  1651. # qhasm: new caller_r4
  1652. # qhasm: caller_r4 = stack_r4
  1653. # asm 1: ldr >caller_r4=int32#5,<stack_r4=stack32#2
  1654. # asm 2: ldr >caller_r4=r4,<stack_r4=[sp,#68]
  1655. ldr r4,[sp,#68]
  1656. # qhasm: new caller_q4
  1657. # qhasm: new caller_q5
  1658. # qhasm: new caller_q6
  1659. # qhasm: new caller_q7
  1660. # qhasm: caller_q4 bot = stack_q4 bot
  1661. # asm 1: vldr <caller_q4=reg128#5%bot,<stack_q4=stack128#1
  1662. # asm 2: vldr <caller_q4=d8,<stack_q4=[sp,#96]
  1663. vldr d8,[sp,#96]
  1664. # qhasm: caller_q4 top = stack_q4 top
  1665. # asm 1: vldr <caller_q4=reg128#5%top,<stack_q4=stack128#1
  1666. # asm 2: vldr <caller_q4=d9,<stack_q4=[sp,#104]
  1667. vldr d9,[sp,#104]
  1668. # qhasm: caller_q5 bot = stack_q5 bot
  1669. # asm 1: vldr <caller_q5=reg128#6%bot,<stack_q5=stack128#2
  1670. # asm 2: vldr <caller_q5=d10,<stack_q5=[sp,#112]
  1671. vldr d10,[sp,#112]
  1672. # qhasm: caller_q5 top = stack_q5 top
  1673. # asm 1: vldr <caller_q5=reg128#6%top,<stack_q5=stack128#2
  1674. # asm 2: vldr <caller_q5=d11,<stack_q5=[sp,#120]
  1675. vldr d11,[sp,#120]
  1676. # qhasm: caller_q6 bot = stack_q6 bot
  1677. # asm 1: vldr <caller_q6=reg128#7%bot,<stack_q6=stack128#3
  1678. # asm 2: vldr <caller_q6=d12,<stack_q6=[sp,#128]
  1679. vldr d12,[sp,#128]
  1680. # qhasm: caller_q6 top = stack_q6 top
  1681. # asm 1: vldr <caller_q6=reg128#7%top,<stack_q6=stack128#3
  1682. # asm 2: vldr <caller_q6=d13,<stack_q6=[sp,#136]
  1683. vldr d13,[sp,#136]
  1684. # qhasm: caller_q7 bot = stack_q7 bot
  1685. # asm 1: vldr <caller_q7=reg128#8%bot,<stack_q7=stack128#4
  1686. # asm 2: vldr <caller_q7=d14,<stack_q7=[sp,#144]
  1687. vldr d14,[sp,#144]
  1688. # qhasm: caller_q7 top = stack_q7 top
  1689. # asm 1: vldr <caller_q7=reg128#8%top,<stack_q7=stack128#4
  1690. # asm 2: vldr <caller_q7=d15,<stack_q7=[sp,#152]
  1691. vldr d15,[sp,#152]
  1692. # qhasm: int32 result
  1693. # qhasm: result = 0
  1694. # asm 1: ldr >result=int32#1,=0
  1695. # asm 2: ldr >result=r0,=0
  1696. ldr r0,=0
  1697. # qhasm: return result
  1698. add sp,sp,#256
  1699. bx lr