Browse Source

more cleanup

Adam Ierymenko 6 years ago
parent
commit
7f301c44b7

+ 0 - 6
ext/arm32-neon-salsa2012-asm/README.md

@@ -1,6 +0,0 @@
-ARM NEON (32-bit) ASM implementation of Salsa20/12
-======
-
-This is from [supercop](http://bench.cr.yp.to/supercop.html) and was originally written by Daniel J. Bernstein. Code is in the public domain like the rest of Salsa20. It's much faster than the naive implementation.
-
-It's included automatically in 32-bit Linux ARM builds. It likely will not work on 64-bit ARM, so it'll need to be ported at least. That will unfortunately keep it out of mobile versions for now since those are all going 64-bit.

+ 0 - 25
ext/arm32-neon-salsa2012-asm/salsa2012.h

@@ -1,25 +0,0 @@
-#ifndef ZT_SALSA2012_ARM32NEON_ASM
-#define ZT_SALSA2012_ARM32NEON_ASM
-
-#if defined(__linux__) || defined(linux) || defined(__LINUX__) || defined(__linux)
-#include <sys/auxv.h>
-#include <asm/hwcap.h>
-#define zt_arm_has_neon() ((getauxval(AT_HWCAP) & HWCAP_NEON) != 0)
-#elif defined(__ARM_NEON__) || defined(__ARM_NEON)
-#define zt_arm_has_neon() (true)
-#else
-#define zt_arm_has_neon() (false)
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// ciphertext buffer, message/NULL, length, nonce (8 bytes), key (32 bytes)
-extern int zt_salsa2012_armneon3_xor(unsigned char *c,const unsigned char *m,unsigned long long len,const unsigned char *n,const unsigned char *k);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif

+ 0 - 2231
ext/arm32-neon-salsa2012-asm/salsa2012.s

@@ -1,2231 +0,0 @@
-
-# qhasm: int32 input_0
-
-# qhasm: int32 input_1
-
-# qhasm: int32 input_2
-
-# qhasm: int32 input_3
-
-# qhasm: stack32 input_4
-
-# qhasm: stack32 input_5
-
-# qhasm: stack32 input_6
-
-# qhasm: stack32 input_7
-
-# qhasm: int32 caller_r4
-
-# qhasm: int32 caller_r5
-
-# qhasm: int32 caller_r6
-
-# qhasm: int32 caller_r7
-
-# qhasm: int32 caller_r8
-
-# qhasm: int32 caller_r9
-
-# qhasm: int32 caller_r10
-
-# qhasm: int32 caller_r11
-
-# qhasm: int32 caller_r14
-
-# qhasm: reg128 caller_q4
-
-# qhasm: reg128 caller_q5
-
-# qhasm: reg128 caller_q6
-
-# qhasm: reg128 caller_q7
-
-# qhasm: startcode
-.fpu neon
-.text
-
-# qhasm: constant sigma:
-.align 2
-sigma:
-
-# qhasm:   const32 1634760805
-.word 1634760805
-
-# qhasm:   const32 857760878
-.word 857760878
-
-# qhasm:   const32 2036477234
-.word 2036477234
-
-# qhasm:   const32 1797285236
-.word 1797285236
-
-# qhasm: int128 abab
-
-# qhasm: int128 diag0
-
-# qhasm: int128 diag1
-
-# qhasm: int128 diag2
-
-# qhasm: int128 diag3
-
-# qhasm: int128 a0
-
-# qhasm: int128 a1
-
-# qhasm: int128 a2
-
-# qhasm: int128 a3
-
-# qhasm: int128 b0
-
-# qhasm: int128 b1
-
-# qhasm: int128 b2
-
-# qhasm: int128 b3
-
-# qhasm: int128 next_diag0
-
-# qhasm: int128 next_diag1
-
-# qhasm: int128 next_diag2
-
-# qhasm: int128 next_diag3
-
-# qhasm: int128 next_a0
-
-# qhasm: int128 next_a1
-
-# qhasm: int128 next_a2
-
-# qhasm: int128 next_a3
-
-# qhasm: int128 next_b0
-
-# qhasm: int128 next_b1
-
-# qhasm: int128 next_b2
-
-# qhasm: int128 next_b3
-
-# qhasm: int128 x0x5x10x15
-
-# qhasm: int128 x12x1x6x11
-
-# qhasm: int128 x8x13x2x7
-
-# qhasm: int128 x4x9x14x3
-
-# qhasm: int128 x0x1x10x11
-
-# qhasm: int128 x12x13x6x7
-
-# qhasm: int128 x8x9x2x3
-
-# qhasm: int128 x4x5x14x15
-
-# qhasm: int128 x0x1x2x3
-
-# qhasm: int128 x4x5x6x7
-
-# qhasm: int128 x8x9x10x11
-
-# qhasm: int128 x12x13x14x15
-
-# qhasm: int128 m0m1m2m3
-
-# qhasm: int128 m4m5m6m7
-
-# qhasm: int128 m8m9m10m11
-
-# qhasm: int128 m12m13m14m15
-
-# qhasm: int128 start0
-
-# qhasm: int128 start1
-
-# qhasm: int128 start2
-
-# qhasm: int128 start3
-
-# qhasm: stack128 stack_start3
-
-# qhasm: stack128 next_start2
-
-# qhasm: stack128 next_start3
-
-# qhasm: int128 k0k1k2k3
-
-# qhasm: int128 k4k5k6k7
-
-# qhasm: int128 k1n1k7k2
-
-# qhasm: int128 n2n3n3n2
-
-# qhasm: int128 k2k3k6k7
-
-# qhasm: int128 nextblock
-
-# qhasm: stack128 stack_q4
-
-# qhasm: stack128 stack_q5
-
-# qhasm: stack128 stack_q6
-
-# qhasm: stack128 stack_q7
-
-# qhasm: stack32 stack_r4
-
-# qhasm: stack128 k2k3k6k7_stack
-
-# qhasm: stack128 k1n1k7k2_stack
-
-# qhasm: stack512 tmp
-
-# qhasm: stack32 savec
-
-# qhasm: int32 i
-
-# qhasm: int32 ci
-
-# qhasm: int32 mi
-
-# qhasm: enter zt_salsa2012_armneon3_xor
-.align 2
-.global _zt_salsa2012_armneon3_xor
-.global zt_salsa2012_armneon3_xor
-.type _zt_salsa2012_armneon3_xor STT_FUNC
-.type zt_salsa2012_armneon3_xor STT_FUNC
-_zt_salsa2012_armneon3_xor:
-zt_salsa2012_armneon3_xor:
-sub sp,sp,#256
-
-# qhasm: new stack_q4
-
-# qhasm: new stack_q5
-
-# qhasm: new stack_q6
-
-# qhasm: new stack_q7
-
-# qhasm: stack_q4 bot = caller_q4 bot
-# asm 1: vstr <caller_q4=reg128#5%bot,<stack_q4=stack128#1
-# asm 2: vstr <caller_q4=d8,<stack_q4=[sp,#96]
-vstr d8,[sp,#96]
-
-# qhasm: stack_q4 top = caller_q4 top
-# asm 1: vstr <caller_q4=reg128#5%top,<stack_q4=stack128#1
-# asm 2: vstr <caller_q4=d9,<stack_q4=[sp,#104]
-vstr d9,[sp,#104]
-
-# qhasm: stack_q5 bot = caller_q5 bot
-# asm 1: vstr <caller_q5=reg128#6%bot,<stack_q5=stack128#2
-# asm 2: vstr <caller_q5=d10,<stack_q5=[sp,#112]
-vstr d10,[sp,#112]
-
-# qhasm: stack_q5 top = caller_q5 top
-# asm 1: vstr <caller_q5=reg128#6%top,<stack_q5=stack128#2
-# asm 2: vstr <caller_q5=d11,<stack_q5=[sp,#120]
-vstr d11,[sp,#120]
-
-# qhasm: stack_q6 bot = caller_q6 bot
-# asm 1: vstr <caller_q6=reg128#7%bot,<stack_q6=stack128#3
-# asm 2: vstr <caller_q6=d12,<stack_q6=[sp,#128]
-vstr d12,[sp,#128]
-
-# qhasm: stack_q6 top = caller_q6 top
-# asm 1: vstr <caller_q6=reg128#7%top,<stack_q6=stack128#3
-# asm 2: vstr <caller_q6=d13,<stack_q6=[sp,#136]
-vstr d13,[sp,#136]
-
-# qhasm: stack_q7 bot = caller_q7 bot
-# asm 1: vstr <caller_q7=reg128#8%bot,<stack_q7=stack128#4
-# asm 2: vstr <caller_q7=d14,<stack_q7=[sp,#144]
-vstr d14,[sp,#144]
-
-# qhasm: stack_q7 top = caller_q7 top
-# asm 1: vstr <caller_q7=reg128#8%top,<stack_q7=stack128#4
-# asm 2: vstr <caller_q7=d15,<stack_q7=[sp,#152]
-vstr d15,[sp,#152]
-
-# qhasm: new stack_r4
-
-# qhasm: stack_r4 = caller_r4
-# asm 1: str <caller_r4=int32#5,>stack_r4=stack32#2
-# asm 2: str <caller_r4=r4,>stack_r4=[sp,#68]
-str r4,[sp,#68]
-
-# qhasm: int32 c
-
-# qhasm: c = input_0
-# asm 1: mov >c=int32#1,<input_0=int32#1
-# asm 2: mov >c=r0,<input_0=r0
-mov r0,r0
-
-# qhasm: int32 m
-
-# qhasm: m = input_1
-# asm 1: mov >m=int32#2,<input_1=int32#2
-# asm 2: mov >m=r1,<input_1=r1
-mov r1,r1
-
-# qhasm: int32 mlenlow
-
-# qhasm: mlenlow = input_2
-# asm 1: mov >mlenlow=int32#3,<input_2=int32#3
-# asm 2: mov >mlenlow=r2,<input_2=r2
-mov r2,r2
-
-# qhasm: int32 mlenhigh
-
-# qhasm: mlenhigh = input_3
-# asm 1: mov >mlenhigh=int32#4,<input_3=int32#4
-# asm 2: mov >mlenhigh=r3,<input_3=r3
-mov r3,r3
-
-# qhasm: int32 n
-
-# qhasm: n = input_4
-# asm 1: ldr >n=int32#5,<input_4=stack32#arg1
-# asm 2: ldr >n=r4,<input_4=[sp,#256]
-ldr r4,[sp,#256]
-
-# qhasm: int32 k
-
-# qhasm: k = input_5
-# asm 1: ldr >k=int32#13,<input_5=stack32#arg2
-# asm 2: ldr >k=r12,<input_5=[sp,#260]
-ldr r12,[sp,#260]
-
-# qhasm: k0k1k2k3 = mem128[k]
-# asm 1: vld1.8 {>k0k1k2k3=reg128#1%bot->k0k1k2k3=reg128#1%top},[<k=int32#13]
-# asm 2: vld1.8 {>k0k1k2k3=d0->k0k1k2k3=d1},[<k=r12]
-vld1.8 {d0-d1},[r12]
-
-# qhasm: k += 16
-# asm 1: add <k=int32#13,<k=int32#13,#16
-# asm 2: add <k=r12,<k=r12,#16
-add r12,r12,#16
-
-# qhasm: k4k5k6k7 = mem128[k]
-# asm 1: vld1.8 {>k4k5k6k7=reg128#2%bot->k4k5k6k7=reg128#2%top},[<k=int32#13]
-# asm 2: vld1.8 {>k4k5k6k7=d2->k4k5k6k7=d3},[<k=r12]
-vld1.8 {d2-d3},[r12]
-
-# qhasm: i = sigma
-# asm 1: ldr >i=int32#13,=sigma
-# asm 2: ldr >i=r12,=sigma
-ldr r12,=sigma
-
-# qhasm: start0 = mem128[i]
-# asm 1: vld1.8 {>start0=reg128#3%bot->start0=reg128#3%top},[<i=int32#13]
-# asm 2: vld1.8 {>start0=d4->start0=d5},[<i=r12]
-vld1.8 {d4-d5},[r12]
-
-# qhasm: 2x start1 = 0
-# asm 1: vmov.i64 >start1=reg128#4,#0
-# asm 2: vmov.i64 >start1=q3,#0
-vmov.i64 q3,#0
-
-# qhasm: start1 bot = mem64[n]                            
-# asm 1: vld1.8 {<start1=reg128#4%bot},[<n=int32#5]
-# asm 2: vld1.8 {<start1=d6},[<n=r4]
-vld1.8 {d6},[r4]
-
-# qhasm: start1 = start1[1] start1[0] start1[2,3]         
-# asm 1: vext.32 <start1=reg128#4%bot,<start1=reg128#4%bot,<start1=reg128#4%bot,#1
-# asm 2: vext.32 <start1=d6,<start1=d6,<start1=d6,#1
-vext.32 d6,d6,d6,#1
-
-# qhasm: start1 = start1[0,1] start1[1] k4k5k6k7[0]       
-# asm 1: vext.32 <start1=reg128#4%top,<start1=reg128#4%bot,<k4k5k6k7=reg128#2%bot,#1
-# asm 2: vext.32 <start1=d7,<start1=d6,<k4k5k6k7=d2,#1
-vext.32 d7,d6,d2,#1
-
-# qhasm: new k1n1k7k2
-
-# qhasm: k1n1k7k2 = k0k1k2k3[1] start1[0] k1n1k7k2[2,3]   
-# asm 1: vext.32 <k1n1k7k2=reg128#5%bot,<k0k1k2k3=reg128#1%bot,<start1=reg128#4%bot,#1
-# asm 2: vext.32 <k1n1k7k2=d8,<k0k1k2k3=d0,<start1=d6,#1
-vext.32 d8,d0,d6,#1
-
-# qhasm: k1n1k7k2 = k1n1k7k2[0,1] k4k5k6k7[3] k0k1k2k3[2] 
-# asm 1: vext.32 <k1n1k7k2=reg128#5%top,<k4k5k6k7=reg128#2%top,<k0k1k2k3=reg128#1%top,#1
-# asm 2: vext.32 <k1n1k7k2=d9,<k4k5k6k7=d3,<k0k1k2k3=d1,#1
-vext.32 d9,d3,d1,#1
-
-# qhasm: k2k3k6k7 = k4k5k6k7
-# asm 1: vmov >k2k3k6k7=reg128#6,<k4k5k6k7=reg128#2
-# asm 2: vmov >k2k3k6k7=q5,<k4k5k6k7=q1
-vmov q5,q1
-
-# qhasm: k2k3k6k7 = k0k1k2k3[2,3] k2k3k6k7[2,3]
-# asm 1: vmov <k2k3k6k7=reg128#6%bot,<k0k1k2k3=reg128#1%top
-# asm 2: vmov <k2k3k6k7=d10,<k0k1k2k3=d1
-vmov d10,d1
-
-# qhasm: start1 = k4k5k6k7[1] k0k1k2k3[0] start1[2,3]     
-# asm 1: vext.32 <start1=reg128#4%bot,<k4k5k6k7=reg128#2%bot,<k0k1k2k3=reg128#1%bot,#1
-# asm 2: vext.32 <start1=d6,<k4k5k6k7=d2,<k0k1k2k3=d0,#1
-vext.32 d6,d2,d0,#1
-
-# qhasm: new k2k3k6k7_stack
-
-# qhasm: k2k3k6k7_stack bot = k2k3k6k7 bot
-# asm 1: vstr <k2k3k6k7=reg128#6%bot,<k2k3k6k7_stack=stack128#5
-# asm 2: vstr <k2k3k6k7=d10,<k2k3k6k7_stack=[sp,#160]
-vstr d10,[sp,#160]
-
-# qhasm: k2k3k6k7_stack top = k2k3k6k7 top
-# asm 1: vstr <k2k3k6k7=reg128#6%top,<k2k3k6k7_stack=stack128#5
-# asm 2: vstr <k2k3k6k7=d11,<k2k3k6k7_stack=[sp,#168]
-vstr d11,[sp,#168]
-
-# qhasm: new k1n1k7k2_stack
-
-# qhasm: k1n1k7k2_stack bot = k1n1k7k2 bot
-# asm 1: vstr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
-# asm 2: vstr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
-vstr d8,[sp,#176]
-
-# qhasm: k1n1k7k2_stack top = k1n1k7k2 top
-# asm 1: vstr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
-# asm 2: vstr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
-vstr d9,[sp,#184]
-
-# qhasm: 2x n2n3n3n2 = 0
-# asm 1: vmov.i64 >n2n3n3n2=reg128#1,#0
-# asm 2: vmov.i64 >n2n3n3n2=q0,#0
-vmov.i64 q0,#0
-
-# qhasm:                         unsigned<? mlenlow - 128
-# asm 1: cmp <mlenlow=int32#3,#128
-# asm 2: cmp <mlenlow=r2,#128
-cmp r2,#128
-
-# qhasm: goto mlenlowbelow128 if unsigned<
-blo ._mlenlowbelow128
-
-# qhasm: mlenatleast128:
-._mlenatleast128:
-
-# qhasm:   new k2k3k6k7
-
-# qhasm:   k2k3k6k7 bot = k2k3k6k7_stack bot
-# asm 1: vldr <k2k3k6k7=reg128#2%bot,<k2k3k6k7_stack=stack128#5
-# asm 2: vldr <k2k3k6k7=d2,<k2k3k6k7_stack=[sp,#160]
-vldr d2,[sp,#160]
-
-# qhasm:   k2k3k6k7 top = k2k3k6k7_stack top
-# asm 1: vldr <k2k3k6k7=reg128#2%top,<k2k3k6k7_stack=stack128#5
-# asm 2: vldr <k2k3k6k7=d3,<k2k3k6k7_stack=[sp,#168]
-vldr d3,[sp,#168]
-
-# qhasm:   new k1n1k7k2
-
-# qhasm:   k1n1k7k2 bot = k1n1k7k2_stack bot
-# asm 1: vldr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
-# asm 2: vldr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
-vldr d8,[sp,#176]
-
-# qhasm:   k1n1k7k2 top = k1n1k7k2_stack top
-# asm 1: vldr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
-# asm 2: vldr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
-vldr d9,[sp,#184]
-
-# qhasm:   n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
-# asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
-# asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
-vext.32 d1,d0,d0,#1
-
-# qhasm:   new diag2
-
-# qhasm:   diag2 = diag2[0,1] k1n1k7k2[0,1]             
-# asm 1: vmov <diag2=reg128#6%top,<k1n1k7k2=reg128#5%bot
-# asm 2: vmov <diag2=d11,<k1n1k7k2=d8
-vmov d11,d8
-
-# qhasm:   diag2 = n2n3n3n2[3] k2k3k6k7[2] diag2[2,3]   
-# asm 1: vext.32 <diag2=reg128#6%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
-# asm 2: vext.32 <diag2=d10,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
-vext.32 d10,d1,d3,#1
-
-# qhasm:   new diag3
-
-# qhasm:   diag3 = diag3[0,1] k1n1k7k2[2,3]             
-# asm 1: vmov <diag3=reg128#7%top,<k1n1k7k2=reg128#5%top
-# asm 2: vmov <diag3=d13,<k1n1k7k2=d9
-vmov d13,d9
-
-# qhasm:   diag3 = k2k3k6k7[1] n2n3n3n2[2] diag3[2,3]   
-# asm 1: vext.32 <diag3=reg128#7%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
-# asm 2: vext.32 <diag3=d12,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
-vext.32 d12,d2,d1,#1
-
-# qhasm:   diag0 = start0
-# asm 1: vmov >diag0=reg128#8,<start0=reg128#3
-# asm 2: vmov >diag0=q7,<start0=q2
-vmov q7,q2
-
-# qhasm:   diag1 = start1
-# asm 1: vmov >diag1=reg128#9,<start1=reg128#4
-# asm 2: vmov >diag1=q8,<start1=q3
-vmov q8,q3
-
-# qhasm:   start2 = diag2
-# asm 1: vmov >start2=reg128#10,<diag2=reg128#6
-# asm 2: vmov >start2=q9,<diag2=q5
-vmov q9,q5
-
-# qhasm:   new stack_start3
-
-# qhasm:   stack_start3 bot = diag3 bot
-# asm 1: vstr <diag3=reg128#7%bot,<stack_start3=stack128#9
-# asm 2: vstr <diag3=d12,<stack_start3=[sp,#224]
-vstr d12,[sp,#224]
-
-# qhasm:   stack_start3 top = diag3 top
-# asm 1: vstr <diag3=reg128#7%top,<stack_start3=stack128#9
-# asm 2: vstr <diag3=d13,<stack_start3=[sp,#232]
-vstr d13,[sp,#232]
-
-# qhasm:   2x nextblock = 0xff
-# asm 1: vmov.i64 >nextblock=reg128#11,#0xff
-# asm 2: vmov.i64 >nextblock=q10,#0xff
-vmov.i64 q10,#0xff
-
-# qhasm:   4x nextblock unsigned>>= 7
-# asm 1: vshr.u32 >nextblock=reg128#11,<nextblock=reg128#11,#7
-# asm 2: vshr.u32 >nextblock=q10,<nextblock=q10,#7
-vshr.u32 q10,q10,#7
-
-# qhasm:   2x n2n3n3n2 += nextblock
-# asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#11
-# asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q10
-vadd.i64 q0,q0,q10
-
-# qhasm:   n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
-# asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
-# asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
-vext.32 d1,d0,d0,#1
-
-# qhasm:   new next_diag2
-
-# qhasm:   next_diag2 = next_diag2[0,1] k1n1k7k2[0,1]
-# asm 1: vmov <next_diag2=reg128#12%top,<k1n1k7k2=reg128#5%bot
-# asm 2: vmov <next_diag2=d23,<k1n1k7k2=d8
-vmov d23,d8
-
-# qhasm:   next_diag2 = n2n3n3n2[3] k2k3k6k7[2] next_diag2[2,3]
-# asm 1: vext.32 <next_diag2=reg128#12%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
-# asm 2: vext.32 <next_diag2=d22,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
-vext.32 d22,d1,d3,#1
-
-# qhasm:   new next_diag3
-
-# qhasm:   next_diag3 = next_diag3[0,1] k1n1k7k2[2,3]
-# asm 1: vmov <next_diag3=reg128#13%top,<k1n1k7k2=reg128#5%top
-# asm 2: vmov <next_diag3=d25,<k1n1k7k2=d9
-vmov d25,d9
-
-# qhasm:   next_diag3 = k2k3k6k7[1] n2n3n3n2[2] next_diag3[2,3]
-# asm 1: vext.32 <next_diag3=reg128#13%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
-# asm 2: vext.32 <next_diag3=d24,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
-vext.32 d24,d2,d1,#1
-
-# qhasm:   2x n2n3n3n2 += nextblock
-# asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#11
-# asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q10
-vadd.i64 q0,q0,q10
-
-# qhasm:   next_diag0 = diag0
-# asm 1: vmov >next_diag0=reg128#2,<diag0=reg128#8
-# asm 2: vmov >next_diag0=q1,<diag0=q7
-vmov q1,q7
-
-# qhasm:   next_diag1 = diag1
-# asm 1: vmov >next_diag1=reg128#5,<diag1=reg128#9
-# asm 2: vmov >next_diag1=q4,<diag1=q8
-vmov q4,q8
-
-# qhasm:   next_start2 bot = next_diag2 bot
-# asm 1: vstr <next_diag2=reg128#12%bot,<next_start2=stack128#7
-# asm 2: vstr <next_diag2=d22,<next_start2=[sp,#192]
-vstr d22,[sp,#192]
-
-# qhasm:   next_start2 top = next_diag2 top
-# asm 1: vstr <next_diag2=reg128#12%top,<next_start2=stack128#7
-# asm 2: vstr <next_diag2=d23,<next_start2=[sp,#200]
-vstr d23,[sp,#200]
-
-# qhasm:   next_start3 bot = next_diag3 bot
-# asm 1: vstr <next_diag3=reg128#13%bot,<next_start3=stack128#8
-# asm 2: vstr <next_diag3=d24,<next_start3=[sp,#208]
-vstr d24,[sp,#208]
-
-# qhasm:   next_start3 top = next_diag3 top
-# asm 1: vstr <next_diag3=reg128#13%top,<next_start3=stack128#8
-# asm 2: vstr <next_diag3=d25,<next_start3=[sp,#216]
-vstr d25,[sp,#216]
-
-# qhasm:   i = 12
-# asm 1: ldr >i=int32#5,=12
-# asm 2: ldr >i=r4,=12
-ldr r4,=12
-
-# qhasm:   mainloop2:
-._mainloop2:
-
-# qhasm:     4x a0 = diag1 + diag0
-# asm 1: vadd.i32 >a0=reg128#11,<diag1=reg128#9,<diag0=reg128#8
-# asm 2: vadd.i32 >a0=q10,<diag1=q8,<diag0=q7
-vadd.i32 q10,q8,q7
-
-# qhasm:    					4x next_a0 = next_diag1 + next_diag0
-# asm 1: vadd.i32 >next_a0=reg128#14,<next_diag1=reg128#5,<next_diag0=reg128#2
-# asm 2: vadd.i32 >next_a0=q13,<next_diag1=q4,<next_diag0=q1
-vadd.i32 q13,q4,q1
-
-# qhasm:     4x b0 = a0 << 7
-# asm 1: vshl.i32 >b0=reg128#15,<a0=reg128#11,#7
-# asm 2: vshl.i32 >b0=q14,<a0=q10,#7
-vshl.i32 q14,q10,#7
-
-# qhasm:     					4x next_b0 = next_a0 << 7
-# asm 1: vshl.i32 >next_b0=reg128#16,<next_a0=reg128#14,#7
-# asm 2: vshl.i32 >next_b0=q15,<next_a0=q13,#7
-vshl.i32 q15,q13,#7
-
-# qhasm:     4x b0 insert= a0 >> 25
-# asm 1: vsri.i32 <b0=reg128#15,<a0=reg128#11,#25
-# asm 2: vsri.i32 <b0=q14,<a0=q10,#25
-vsri.i32 q14,q10,#25
-
-# qhasm:     					4x next_b0 insert= next_a0 >> 25
-# asm 1: vsri.i32 <next_b0=reg128#16,<next_a0=reg128#14,#25
-# asm 2: vsri.i32 <next_b0=q15,<next_a0=q13,#25
-vsri.i32 q15,q13,#25
-
-# qhasm:        diag3 ^= b0
-# asm 1: veor >diag3=reg128#7,<diag3=reg128#7,<b0=reg128#15
-# asm 2: veor >diag3=q6,<diag3=q6,<b0=q14
-veor q6,q6,q14
-
-# qhasm:        					next_diag3 ^= next_b0
-# asm 1: veor >next_diag3=reg128#11,<next_diag3=reg128#13,<next_b0=reg128#16
-# asm 2: veor >next_diag3=q10,<next_diag3=q12,<next_b0=q15
-veor q10,q12,q15
-
-# qhasm:     4x a1 = diag0 + diag3
-# asm 1: vadd.i32 >a1=reg128#13,<diag0=reg128#8,<diag3=reg128#7
-# asm 2: vadd.i32 >a1=q12,<diag0=q7,<diag3=q6
-vadd.i32 q12,q7,q6
-
-# qhasm:     					4x next_a1 = next_diag0 + next_diag3
-# asm 1: vadd.i32 >next_a1=reg128#14,<next_diag0=reg128#2,<next_diag3=reg128#11
-# asm 2: vadd.i32 >next_a1=q13,<next_diag0=q1,<next_diag3=q10
-vadd.i32 q13,q1,q10
-
-# qhasm:     4x b1 = a1 << 9
-# asm 1: vshl.i32 >b1=reg128#15,<a1=reg128#13,#9
-# asm 2: vshl.i32 >b1=q14,<a1=q12,#9
-vshl.i32 q14,q12,#9
-
-# qhasm:     					4x next_b1 = next_a1 << 9
-# asm 1: vshl.i32 >next_b1=reg128#16,<next_a1=reg128#14,#9
-# asm 2: vshl.i32 >next_b1=q15,<next_a1=q13,#9
-vshl.i32 q15,q13,#9
-
-# qhasm:     4x b1 insert= a1 >> 23
-# asm 1: vsri.i32 <b1=reg128#15,<a1=reg128#13,#23
-# asm 2: vsri.i32 <b1=q14,<a1=q12,#23
-vsri.i32 q14,q12,#23
-
-# qhasm:     					4x next_b1 insert= next_a1 >> 23
-# asm 1: vsri.i32 <next_b1=reg128#16,<next_a1=reg128#14,#23
-# asm 2: vsri.i32 <next_b1=q15,<next_a1=q13,#23
-vsri.i32 q15,q13,#23
-
-# qhasm:        diag2 ^= b1
-# asm 1: veor >diag2=reg128#6,<diag2=reg128#6,<b1=reg128#15
-# asm 2: veor >diag2=q5,<diag2=q5,<b1=q14
-veor q5,q5,q14
-
-# qhasm:        					next_diag2 ^= next_b1
-# asm 1: veor >next_diag2=reg128#12,<next_diag2=reg128#12,<next_b1=reg128#16
-# asm 2: veor >next_diag2=q11,<next_diag2=q11,<next_b1=q15
-veor q11,q11,q15
-
-# qhasm:     4x a2 = diag3 + diag2
-# asm 1: vadd.i32 >a2=reg128#13,<diag3=reg128#7,<diag2=reg128#6
-# asm 2: vadd.i32 >a2=q12,<diag3=q6,<diag2=q5
-vadd.i32 q12,q6,q5
-
-# qhasm:             diag3 = diag3[3] diag3[0,1,2]
-# asm 1: vext.32 >diag3=reg128#7,<diag3=reg128#7,<diag3=reg128#7,#3
-# asm 2: vext.32 >diag3=q6,<diag3=q6,<diag3=q6,#3
-vext.32 q6,q6,q6,#3
-
-# qhasm:     					4x next_a2 = next_diag3 + next_diag2
-# asm 1: vadd.i32 >next_a2=reg128#14,<next_diag3=reg128#11,<next_diag2=reg128#12
-# asm 2: vadd.i32 >next_a2=q13,<next_diag3=q10,<next_diag2=q11
-vadd.i32 q13,q10,q11
-
-# qhasm:     4x b2 = a2 << 13
-# asm 1: vshl.i32 >b2=reg128#15,<a2=reg128#13,#13
-# asm 2: vshl.i32 >b2=q14,<a2=q12,#13
-vshl.i32 q14,q12,#13
-
-# qhasm:             					next_diag3 = next_diag3[3] next_diag3[0,1,2]
-# asm 1: vext.32 >next_diag3=reg128#11,<next_diag3=reg128#11,<next_diag3=reg128#11,#3
-# asm 2: vext.32 >next_diag3=q10,<next_diag3=q10,<next_diag3=q10,#3
-vext.32 q10,q10,q10,#3
-
-# qhasm:     					4x next_b2 = next_a2 << 13
-# asm 1: vshl.i32 >next_b2=reg128#16,<next_a2=reg128#14,#13
-# asm 2: vshl.i32 >next_b2=q15,<next_a2=q13,#13
-vshl.i32 q15,q13,#13
-
-# qhasm:     4x b2 insert= a2 >> 19
-# asm 1: vsri.i32 <b2=reg128#15,<a2=reg128#13,#19
-# asm 2: vsri.i32 <b2=q14,<a2=q12,#19
-vsri.i32 q14,q12,#19
-
-# qhasm:     					4x next_b2 insert= next_a2 >> 19
-# asm 1: vsri.i32 <next_b2=reg128#16,<next_a2=reg128#14,#19
-# asm 2: vsri.i32 <next_b2=q15,<next_a2=q13,#19
-vsri.i32 q15,q13,#19
-
-# qhasm:        diag1 ^= b2
-# asm 1: veor >diag1=reg128#9,<diag1=reg128#9,<b2=reg128#15
-# asm 2: veor >diag1=q8,<diag1=q8,<b2=q14
-veor q8,q8,q14
-
-# qhasm:        					next_diag1 ^= next_b2
-# asm 1: veor >next_diag1=reg128#5,<next_diag1=reg128#5,<next_b2=reg128#16
-# asm 2: veor >next_diag1=q4,<next_diag1=q4,<next_b2=q15
-veor q4,q4,q15
-
-# qhasm:     4x a3 = diag2 + diag1
-# asm 1: vadd.i32 >a3=reg128#13,<diag2=reg128#6,<diag1=reg128#9
-# asm 2: vadd.i32 >a3=q12,<diag2=q5,<diag1=q8
-vadd.i32 q12,q5,q8
-
-# qhasm:             diag2 = diag2[2,3] diag2[0,1]
-# asm 1: vswp <diag2=reg128#6%bot,<diag2=reg128#6%top
-# asm 2: vswp <diag2=d10,<diag2=d11
-vswp d10,d11
-
-# qhasm:     					4x next_a3 = next_diag2 + next_diag1
-# asm 1: vadd.i32 >next_a3=reg128#14,<next_diag2=reg128#12,<next_diag1=reg128#5
-# asm 2: vadd.i32 >next_a3=q13,<next_diag2=q11,<next_diag1=q4
-vadd.i32 q13,q11,q4
-
-# qhasm:     4x b3 = a3 << 18
-# asm 1: vshl.i32 >b3=reg128#15,<a3=reg128#13,#18
-# asm 2: vshl.i32 >b3=q14,<a3=q12,#18
-vshl.i32 q14,q12,#18
-
-# qhasm:             					next_diag2 = next_diag2[2,3] next_diag2[0,1]
-# asm 1: vswp <next_diag2=reg128#12%bot,<next_diag2=reg128#12%top
-# asm 2: vswp <next_diag2=d22,<next_diag2=d23
-vswp d22,d23
-
-# qhasm:     					4x next_b3 = next_a3 << 18
-# asm 1: vshl.i32 >next_b3=reg128#16,<next_a3=reg128#14,#18
-# asm 2: vshl.i32 >next_b3=q15,<next_a3=q13,#18
-vshl.i32 q15,q13,#18
-
-# qhasm:     4x b3 insert= a3 >> 14
-# asm 1: vsri.i32 <b3=reg128#15,<a3=reg128#13,#14
-# asm 2: vsri.i32 <b3=q14,<a3=q12,#14
-vsri.i32 q14,q12,#14
-
-# qhasm:             diag1 = diag1[1,2,3] diag1[0]
-# asm 1: vext.32 >diag1=reg128#9,<diag1=reg128#9,<diag1=reg128#9,#1
-# asm 2: vext.32 >diag1=q8,<diag1=q8,<diag1=q8,#1
-vext.32 q8,q8,q8,#1
-
-# qhasm:     					4x next_b3 insert= next_a3 >> 14
-# asm 1: vsri.i32 <next_b3=reg128#16,<next_a3=reg128#14,#14
-# asm 2: vsri.i32 <next_b3=q15,<next_a3=q13,#14
-vsri.i32 q15,q13,#14
-
-# qhasm:        diag0 ^= b3
-# asm 1: veor >diag0=reg128#8,<diag0=reg128#8,<b3=reg128#15
-# asm 2: veor >diag0=q7,<diag0=q7,<b3=q14
-veor q7,q7,q14
-
-# qhasm:             					next_diag1 = next_diag1[1,2,3] next_diag1[0]
-# asm 1: vext.32 >next_diag1=reg128#5,<next_diag1=reg128#5,<next_diag1=reg128#5,#1
-# asm 2: vext.32 >next_diag1=q4,<next_diag1=q4,<next_diag1=q4,#1
-vext.32 q4,q4,q4,#1
-
-# qhasm:        					next_diag0 ^= next_b3
-# asm 1: veor >next_diag0=reg128#2,<next_diag0=reg128#2,<next_b3=reg128#16
-# asm 2: veor >next_diag0=q1,<next_diag0=q1,<next_b3=q15
-veor q1,q1,q15
-
-# qhasm:     4x a0 = diag3 + diag0
-# asm 1: vadd.i32 >a0=reg128#13,<diag3=reg128#7,<diag0=reg128#8
-# asm 2: vadd.i32 >a0=q12,<diag3=q6,<diag0=q7
-vadd.i32 q12,q6,q7
-
-# qhasm:     					4x next_a0 = next_diag3 + next_diag0
-# asm 1: vadd.i32 >next_a0=reg128#14,<next_diag3=reg128#11,<next_diag0=reg128#2
-# asm 2: vadd.i32 >next_a0=q13,<next_diag3=q10,<next_diag0=q1
-vadd.i32 q13,q10,q1
-
-# qhasm:     4x b0 = a0 << 7
-# asm 1: vshl.i32 >b0=reg128#15,<a0=reg128#13,#7
-# asm 2: vshl.i32 >b0=q14,<a0=q12,#7
-vshl.i32 q14,q12,#7
-
-# qhasm:     					4x next_b0 = next_a0 << 7
-# asm 1: vshl.i32 >next_b0=reg128#16,<next_a0=reg128#14,#7
-# asm 2: vshl.i32 >next_b0=q15,<next_a0=q13,#7
-vshl.i32 q15,q13,#7
-
-# qhasm:     4x b0 insert= a0 >> 25
-# asm 1: vsri.i32 <b0=reg128#15,<a0=reg128#13,#25
-# asm 2: vsri.i32 <b0=q14,<a0=q12,#25
-vsri.i32 q14,q12,#25
-
-# qhasm:     					4x next_b0 insert= next_a0 >> 25
-# asm 1: vsri.i32 <next_b0=reg128#16,<next_a0=reg128#14,#25
-# asm 2: vsri.i32 <next_b0=q15,<next_a0=q13,#25
-vsri.i32 q15,q13,#25
-
-# qhasm:        diag1 ^= b0
-# asm 1: veor >diag1=reg128#9,<diag1=reg128#9,<b0=reg128#15
-# asm 2: veor >diag1=q8,<diag1=q8,<b0=q14
-veor q8,q8,q14
-
-# qhasm:        					next_diag1 ^= next_b0
-# asm 1: veor >next_diag1=reg128#5,<next_diag1=reg128#5,<next_b0=reg128#16
-# asm 2: veor >next_diag1=q4,<next_diag1=q4,<next_b0=q15
-veor q4,q4,q15
-
-# qhasm:     4x a1 = diag0 + diag1
-# asm 1: vadd.i32 >a1=reg128#13,<diag0=reg128#8,<diag1=reg128#9
-# asm 2: vadd.i32 >a1=q12,<diag0=q7,<diag1=q8
-vadd.i32 q12,q7,q8
-
-# qhasm:     					4x next_a1 = next_diag0 + next_diag1
-# asm 1: vadd.i32 >next_a1=reg128#14,<next_diag0=reg128#2,<next_diag1=reg128#5
-# asm 2: vadd.i32 >next_a1=q13,<next_diag0=q1,<next_diag1=q4
-vadd.i32 q13,q1,q4
-
-# qhasm:     4x b1 = a1 << 9
-# asm 1: vshl.i32 >b1=reg128#15,<a1=reg128#13,#9
-# asm 2: vshl.i32 >b1=q14,<a1=q12,#9
-vshl.i32 q14,q12,#9
-
-# qhasm:     					4x next_b1 = next_a1 << 9
-# asm 1: vshl.i32 >next_b1=reg128#16,<next_a1=reg128#14,#9
-# asm 2: vshl.i32 >next_b1=q15,<next_a1=q13,#9
-vshl.i32 q15,q13,#9
-
-# qhasm:     4x b1 insert= a1 >> 23
-# asm 1: vsri.i32 <b1=reg128#15,<a1=reg128#13,#23
-# asm 2: vsri.i32 <b1=q14,<a1=q12,#23
-vsri.i32 q14,q12,#23
-
-# qhasm:                   						unsigned>? i -= 2
-# asm 1: subs <i=int32#5,<i=int32#5,#2
-# asm 2: subs <i=r4,<i=r4,#2
-subs r4,r4,#2
-
-# qhasm:     					4x next_b1 insert= next_a1 >> 23
-# asm 1: vsri.i32 <next_b1=reg128#16,<next_a1=reg128#14,#23
-# asm 2: vsri.i32 <next_b1=q15,<next_a1=q13,#23
-vsri.i32 q15,q13,#23
-
-# qhasm:        diag2 ^= b1
-# asm 1: veor >diag2=reg128#6,<diag2=reg128#6,<b1=reg128#15
-# asm 2: veor >diag2=q5,<diag2=q5,<b1=q14
-veor q5,q5,q14
-
-# qhasm:        					next_diag2 ^= next_b1
-# asm 1: veor >next_diag2=reg128#12,<next_diag2=reg128#12,<next_b1=reg128#16
-# asm 2: veor >next_diag2=q11,<next_diag2=q11,<next_b1=q15
-veor q11,q11,q15
-
-# qhasm:     4x a2 = diag1 + diag2
-# asm 1: vadd.i32 >a2=reg128#13,<diag1=reg128#9,<diag2=reg128#6
-# asm 2: vadd.i32 >a2=q12,<diag1=q8,<diag2=q5
-vadd.i32 q12,q8,q5
-
-# qhasm:             diag1 = diag1[3] diag1[0,1,2]
-# asm 1: vext.32 >diag1=reg128#9,<diag1=reg128#9,<diag1=reg128#9,#3
-# asm 2: vext.32 >diag1=q8,<diag1=q8,<diag1=q8,#3
-vext.32 q8,q8,q8,#3
-
-# qhasm:     					4x next_a2 = next_diag1 + next_diag2
-# asm 1: vadd.i32 >next_a2=reg128#14,<next_diag1=reg128#5,<next_diag2=reg128#12
-# asm 2: vadd.i32 >next_a2=q13,<next_diag1=q4,<next_diag2=q11
-vadd.i32 q13,q4,q11
-
-# qhasm:     4x b2 = a2 << 13
-# asm 1: vshl.i32 >b2=reg128#15,<a2=reg128#13,#13
-# asm 2: vshl.i32 >b2=q14,<a2=q12,#13
-vshl.i32 q14,q12,#13
-
-# qhasm:             					next_diag1 = next_diag1[3] next_diag1[0,1,2]
-# asm 1: vext.32 >next_diag1=reg128#5,<next_diag1=reg128#5,<next_diag1=reg128#5,#3
-# asm 2: vext.32 >next_diag1=q4,<next_diag1=q4,<next_diag1=q4,#3
-vext.32 q4,q4,q4,#3
-
-# qhasm:     					4x next_b2 = next_a2 << 13
-# asm 1: vshl.i32 >next_b2=reg128#16,<next_a2=reg128#14,#13
-# asm 2: vshl.i32 >next_b2=q15,<next_a2=q13,#13
-vshl.i32 q15,q13,#13
-
-# qhasm:     4x b2 insert= a2 >> 19
-# asm 1: vsri.i32 <b2=reg128#15,<a2=reg128#13,#19
-# asm 2: vsri.i32 <b2=q14,<a2=q12,#19
-vsri.i32 q14,q12,#19
-
-# qhasm:     					4x next_b2 insert= next_a2 >> 19
-# asm 1: vsri.i32 <next_b2=reg128#16,<next_a2=reg128#14,#19
-# asm 2: vsri.i32 <next_b2=q15,<next_a2=q13,#19
-vsri.i32 q15,q13,#19
-
-# qhasm:        diag3 ^= b2
-# asm 1: veor >diag3=reg128#7,<diag3=reg128#7,<b2=reg128#15
-# asm 2: veor >diag3=q6,<diag3=q6,<b2=q14
-veor q6,q6,q14
-
-# qhasm:        					next_diag3 ^= next_b2
-# asm 1: veor >next_diag3=reg128#11,<next_diag3=reg128#11,<next_b2=reg128#16
-# asm 2: veor >next_diag3=q10,<next_diag3=q10,<next_b2=q15
-veor q10,q10,q15
-
-# qhasm:     4x a3 = diag2 + diag3
-# asm 1: vadd.i32 >a3=reg128#13,<diag2=reg128#6,<diag3=reg128#7
-# asm 2: vadd.i32 >a3=q12,<diag2=q5,<diag3=q6
-vadd.i32 q12,q5,q6
-
-# qhasm:             diag2 = diag2[2,3] diag2[0,1]
-# asm 1: vswp <diag2=reg128#6%bot,<diag2=reg128#6%top
-# asm 2: vswp <diag2=d10,<diag2=d11
-vswp d10,d11
-
-# qhasm:     					4x next_a3 = next_diag2 + next_diag3
-# asm 1: vadd.i32 >next_a3=reg128#14,<next_diag2=reg128#12,<next_diag3=reg128#11
-# asm 2: vadd.i32 >next_a3=q13,<next_diag2=q11,<next_diag3=q10
-vadd.i32 q13,q11,q10
-
-# qhasm:     4x b3 = a3 << 18
-# asm 1: vshl.i32 >b3=reg128#15,<a3=reg128#13,#18
-# asm 2: vshl.i32 >b3=q14,<a3=q12,#18
-vshl.i32 q14,q12,#18
-
-# qhasm:             					next_diag2 = next_diag2[2,3] next_diag2[0,1]
-# asm 1: vswp <next_diag2=reg128#12%bot,<next_diag2=reg128#12%top
-# asm 2: vswp <next_diag2=d22,<next_diag2=d23
-vswp d22,d23
-
-# qhasm:     					4x next_b3 = next_a3 << 18
-# asm 1: vshl.i32 >next_b3=reg128#16,<next_a3=reg128#14,#18
-# asm 2: vshl.i32 >next_b3=q15,<next_a3=q13,#18
-vshl.i32 q15,q13,#18
-
-# qhasm:     4x b3 insert= a3 >> 14
-# asm 1: vsri.i32 <b3=reg128#15,<a3=reg128#13,#14
-# asm 2: vsri.i32 <b3=q14,<a3=q12,#14
-vsri.i32 q14,q12,#14
-
-# qhasm:             diag3 = diag3[1,2,3] diag3[0]
-# asm 1: vext.32 >diag3=reg128#7,<diag3=reg128#7,<diag3=reg128#7,#1
-# asm 2: vext.32 >diag3=q6,<diag3=q6,<diag3=q6,#1
-vext.32 q6,q6,q6,#1
-
-# qhasm:     					4x next_b3 insert= next_a3 >> 14
-# asm 1: vsri.i32 <next_b3=reg128#16,<next_a3=reg128#14,#14
-# asm 2: vsri.i32 <next_b3=q15,<next_a3=q13,#14
-vsri.i32 q15,q13,#14
-
-# qhasm:        diag0 ^= b3
-# asm 1: veor >diag0=reg128#8,<diag0=reg128#8,<b3=reg128#15
-# asm 2: veor >diag0=q7,<diag0=q7,<b3=q14
-veor q7,q7,q14
-
-# qhasm:             					next_diag3 = next_diag3[1,2,3] next_diag3[0]
-# asm 1: vext.32 >next_diag3=reg128#13,<next_diag3=reg128#11,<next_diag3=reg128#11,#1
-# asm 2: vext.32 >next_diag3=q12,<next_diag3=q10,<next_diag3=q10,#1
-vext.32 q12,q10,q10,#1
-
-# qhasm:        					next_diag0 ^= next_b3
-# asm 1: veor >next_diag0=reg128#2,<next_diag0=reg128#2,<next_b3=reg128#16
-# asm 2: veor >next_diag0=q1,<next_diag0=q1,<next_b3=q15
-veor q1,q1,q15
-
-# qhasm:   goto mainloop2 if unsigned>
-bhi ._mainloop2
-
-# qhasm:   2x abab = 0xffffffff
-# asm 1: vmov.i64 >abab=reg128#11,#0xffffffff
-# asm 2: vmov.i64 >abab=q10,#0xffffffff
-vmov.i64 q10,#0xffffffff
-
-# qhasm:   new x4x9x14x3
-
-# qhasm:   x4x9x14x3 bot = stack_start3 bot
-# asm 1: vldr <x4x9x14x3=reg128#14%bot,<stack_start3=stack128#9
-# asm 2: vldr <x4x9x14x3=d26,<stack_start3=[sp,#224]
-vldr d26,[sp,#224]
-
-# qhasm:   x4x9x14x3 top = stack_start3 top
-# asm 1: vldr <x4x9x14x3=reg128#14%top,<stack_start3=stack128#9
-# asm 2: vldr <x4x9x14x3=d27,<stack_start3=[sp,#232]
-vldr d27,[sp,#232]
-
-# qhasm:   4x x0x5x10x15 = diag0 + start0
-# asm 1: vadd.i32 >x0x5x10x15=reg128#8,<diag0=reg128#8,<start0=reg128#3
-# asm 2: vadd.i32 >x0x5x10x15=q7,<diag0=q7,<start0=q2
-vadd.i32 q7,q7,q2
-
-# qhasm:   4x x12x1x6x11 = diag1 + start1
-# asm 1: vadd.i32 >x12x1x6x11=reg128#9,<diag1=reg128#9,<start1=reg128#4
-# asm 2: vadd.i32 >x12x1x6x11=q8,<diag1=q8,<start1=q3
-vadd.i32 q8,q8,q3
-
-# qhasm:   4x x8x13x2x7 = diag2 + start2
-# asm 1: vadd.i32 >x8x13x2x7=reg128#6,<diag2=reg128#6,<start2=reg128#10
-# asm 2: vadd.i32 >x8x13x2x7=q5,<diag2=q5,<start2=q9
-vadd.i32 q5,q5,q9
-
-# qhasm:   4x x4x9x14x3 += diag3
-# asm 1: vadd.i32 >x4x9x14x3=reg128#7,<x4x9x14x3=reg128#14,<diag3=reg128#7
-# asm 2: vadd.i32 >x4x9x14x3=q6,<x4x9x14x3=q13,<diag3=q6
-vadd.i32 q6,q13,q6
-
-# qhasm:   x0x1x10x11 = x0x5x10x15
-# asm 1: vmov >x0x1x10x11=reg128#10,<x0x5x10x15=reg128#8
-# asm 2: vmov >x0x1x10x11=q9,<x0x5x10x15=q7
-vmov q9,q7
-
-# qhasm:   x12x13x6x7 = x12x1x6x11
-# asm 1: vmov >x12x13x6x7=reg128#14,<x12x1x6x11=reg128#9
-# asm 2: vmov >x12x13x6x7=q13,<x12x1x6x11=q8
-vmov q13,q8
-
-# qhasm:   x8x9x2x3 = x8x13x2x7
-# asm 1: vmov >x8x9x2x3=reg128#15,<x8x13x2x7=reg128#6
-# asm 2: vmov >x8x9x2x3=q14,<x8x13x2x7=q5
-vmov q14,q5
-
-# qhasm:   x4x5x14x15 = x4x9x14x3
-# asm 1: vmov >x4x5x14x15=reg128#16,<x4x9x14x3=reg128#7
-# asm 2: vmov >x4x5x14x15=q15,<x4x9x14x3=q6
-vmov q15,q6
-
-# qhasm:   x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
-# asm 1: vbif <x0x1x10x11=reg128#10,<x12x1x6x11=reg128#9,<abab=reg128#11
-# asm 2: vbif <x0x1x10x11=q9,<x12x1x6x11=q8,<abab=q10
-vbif q9,q8,q10
-
-# qhasm:   x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
-# asm 1: vbif <x12x13x6x7=reg128#14,<x8x13x2x7=reg128#6,<abab=reg128#11
-# asm 2: vbif <x12x13x6x7=q13,<x8x13x2x7=q5,<abab=q10
-vbif q13,q5,q10
-
-# qhasm:   x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
-# asm 1: vbif <x8x9x2x3=reg128#15,<x4x9x14x3=reg128#7,<abab=reg128#11
-# asm 2: vbif <x8x9x2x3=q14,<x4x9x14x3=q6,<abab=q10
-vbif q14,q6,q10
-
-# qhasm:   x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
-# asm 1: vbif <x4x5x14x15=reg128#16,<x0x5x10x15=reg128#8,<abab=reg128#11
-# asm 2: vbif <x4x5x14x15=q15,<x0x5x10x15=q7,<abab=q10
-vbif q15,q7,q10
-
-# qhasm:   x0x1x2x3 = x0x1x10x11
-# asm 1: vmov >x0x1x2x3=reg128#6,<x0x1x10x11=reg128#10
-# asm 2: vmov >x0x1x2x3=q5,<x0x1x10x11=q9
-vmov q5,q9
-
-# qhasm:   x4x5x6x7 = x4x5x14x15
-# asm 1: vmov >x4x5x6x7=reg128#7,<x4x5x14x15=reg128#16
-# asm 2: vmov >x4x5x6x7=q6,<x4x5x14x15=q15
-vmov q6,q15
-
-# qhasm:   x8x9x10x11 = x8x9x2x3
-# asm 1: vmov >x8x9x10x11=reg128#8,<x8x9x2x3=reg128#15
-# asm 2: vmov >x8x9x10x11=q7,<x8x9x2x3=q14
-vmov q7,q14
-
-# qhasm:   x12x13x14x15 = x12x13x6x7
-# asm 1: vmov >x12x13x14x15=reg128#9,<x12x13x6x7=reg128#14
-# asm 2: vmov >x12x13x14x15=q8,<x12x13x6x7=q13
-vmov q8,q13
-
-# qhasm:   x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
-# asm 1: vmov <x0x1x2x3=reg128#6%top,<x8x9x2x3=reg128#15%top
-# asm 2: vmov <x0x1x2x3=d11,<x8x9x2x3=d29
-vmov d11,d29
-
-# qhasm:   x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
-# asm 1: vmov <x4x5x6x7=reg128#7%top,<x12x13x6x7=reg128#14%top
-# asm 2: vmov <x4x5x6x7=d13,<x12x13x6x7=d27
-vmov d13,d27
-
-# qhasm:   x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
-# asm 1: vmov <x8x9x10x11=reg128#8%top,<x0x1x10x11=reg128#10%top
-# asm 2: vmov <x8x9x10x11=d15,<x0x1x10x11=d19
-vmov d15,d19
-
-# qhasm:   x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
-# asm 1: vmov <x12x13x14x15=reg128#9%top,<x4x5x14x15=reg128#16%top
-# asm 2: vmov <x12x13x14x15=d17,<x4x5x14x15=d31
-vmov d17,d31
-
-# qhasm:                      =? m - 0
-# asm 1: cmp <m=int32#2,#0
-# asm 2: cmp <m=r1,#0
-cmp r1,#0
-
-# qhasm:   goto nomessage2 if =
-beq ._nomessage2
-
-# qhasm:     m0m1m2m3 = mem128[m]
-# asm 1: vld1.8 {>m0m1m2m3=reg128#10%bot->m0m1m2m3=reg128#10%top},[<m=int32#2]
-# asm 2: vld1.8 {>m0m1m2m3=d18->m0m1m2m3=d19},[<m=r1]
-vld1.8 {d18-d19},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m4m5m6m7 = mem128[m]
-# asm 1: vld1.8 {>m4m5m6m7=reg128#14%bot->m4m5m6m7=reg128#14%top},[<m=int32#2]
-# asm 2: vld1.8 {>m4m5m6m7=d26->m4m5m6m7=d27},[<m=r1]
-vld1.8 {d26-d27},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m8m9m10m11 = mem128[m]
-# asm 1: vld1.8 {>m8m9m10m11=reg128#15%bot->m8m9m10m11=reg128#15%top},[<m=int32#2]
-# asm 2: vld1.8 {>m8m9m10m11=d28->m8m9m10m11=d29},[<m=r1]
-vld1.8 {d28-d29},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m12m13m14m15 = mem128[m]
-# asm 1: vld1.8 {>m12m13m14m15=reg128#16%bot->m12m13m14m15=reg128#16%top},[<m=int32#2]
-# asm 2: vld1.8 {>m12m13m14m15=d30->m12m13m14m15=d31},[<m=r1]
-vld1.8 {d30-d31},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     x0x1x2x3 ^= m0m1m2m3
-# asm 1: veor >x0x1x2x3=reg128#6,<x0x1x2x3=reg128#6,<m0m1m2m3=reg128#10
-# asm 2: veor >x0x1x2x3=q5,<x0x1x2x3=q5,<m0m1m2m3=q9
-veor q5,q5,q9
-
-# qhasm:     x4x5x6x7 ^= m4m5m6m7
-# asm 1: veor >x4x5x6x7=reg128#7,<x4x5x6x7=reg128#7,<m4m5m6m7=reg128#14
-# asm 2: veor >x4x5x6x7=q6,<x4x5x6x7=q6,<m4m5m6m7=q13
-veor q6,q6,q13
-
-# qhasm:     x8x9x10x11 ^= m8m9m10m11
-# asm 1: veor >x8x9x10x11=reg128#8,<x8x9x10x11=reg128#8,<m8m9m10m11=reg128#15
-# asm 2: veor >x8x9x10x11=q7,<x8x9x10x11=q7,<m8m9m10m11=q14
-veor q7,q7,q14
-
-# qhasm:     x12x13x14x15 ^= m12m13m14m15
-# asm 1: veor >x12x13x14x15=reg128#9,<x12x13x14x15=reg128#9,<m12m13m14m15=reg128#16
-# asm 2: veor >x12x13x14x15=q8,<x12x13x14x15=q8,<m12m13m14m15=q15
-veor q8,q8,q15
-
-# qhasm:   nomessage2:
-._nomessage2:
-
-# qhasm:   mem128[c] = x0x1x2x3
-# asm 1: vst1.8 {<x0x1x2x3=reg128#6%bot-<x0x1x2x3=reg128#6%top},[<c=int32#1]
-# asm 2: vst1.8 {<x0x1x2x3=d10-<x0x1x2x3=d11},[<c=r0]
-vst1.8 {d10-d11},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x4x5x6x7
-# asm 1: vst1.8 {<x4x5x6x7=reg128#7%bot-<x4x5x6x7=reg128#7%top},[<c=int32#1]
-# asm 2: vst1.8 {<x4x5x6x7=d12-<x4x5x6x7=d13},[<c=r0]
-vst1.8 {d12-d13},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x8x9x10x11
-# asm 1: vst1.8 {<x8x9x10x11=reg128#8%bot-<x8x9x10x11=reg128#8%top},[<c=int32#1]
-# asm 2: vst1.8 {<x8x9x10x11=d14-<x8x9x10x11=d15},[<c=r0]
-vst1.8 {d14-d15},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x12x13x14x15
-# asm 1: vst1.8 {<x12x13x14x15=reg128#9%bot-<x12x13x14x15=reg128#9%top},[<c=int32#1]
-# asm 2: vst1.8 {<x12x13x14x15=d16-<x12x13x14x15=d17},[<c=r0]
-vst1.8 {d16-d17},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   new x8x13x2x7
-
-# qhasm:   x8x13x2x7 bot = next_start2 bot
-# asm 1: vldr <x8x13x2x7=reg128#6%bot,<next_start2=stack128#7
-# asm 2: vldr <x8x13x2x7=d10,<next_start2=[sp,#192]
-vldr d10,[sp,#192]
-
-# qhasm:   x8x13x2x7 top = next_start2 top
-# asm 1: vldr <x8x13x2x7=reg128#6%top,<next_start2=stack128#7
-# asm 2: vldr <x8x13x2x7=d11,<next_start2=[sp,#200]
-vldr d11,[sp,#200]
-
-# qhasm:   new x4x9x14x3
-
-# qhasm:   x4x9x14x3 bot = next_start3 bot
-# asm 1: vldr <x4x9x14x3=reg128#7%bot,<next_start3=stack128#8
-# asm 2: vldr <x4x9x14x3=d12,<next_start3=[sp,#208]
-vldr d12,[sp,#208]
-
-# qhasm:   x4x9x14x3 top = next_start3 top
-# asm 1: vldr <x4x9x14x3=reg128#7%top,<next_start3=stack128#8
-# asm 2: vldr <x4x9x14x3=d13,<next_start3=[sp,#216]
-vldr d13,[sp,#216]
-
-# qhasm:   4x x0x5x10x15 = next_diag0 + start0
-# asm 1: vadd.i32 >x0x5x10x15=reg128#2,<next_diag0=reg128#2,<start0=reg128#3
-# asm 2: vadd.i32 >x0x5x10x15=q1,<next_diag0=q1,<start0=q2
-vadd.i32 q1,q1,q2
-
-# qhasm:   4x x12x1x6x11 = next_diag1 + start1
-# asm 1: vadd.i32 >x12x1x6x11=reg128#5,<next_diag1=reg128#5,<start1=reg128#4
-# asm 2: vadd.i32 >x12x1x6x11=q4,<next_diag1=q4,<start1=q3
-vadd.i32 q4,q4,q3
-
-# qhasm:   4x x8x13x2x7 += next_diag2
-# asm 1: vadd.i32 >x8x13x2x7=reg128#6,<x8x13x2x7=reg128#6,<next_diag2=reg128#12
-# asm 2: vadd.i32 >x8x13x2x7=q5,<x8x13x2x7=q5,<next_diag2=q11
-vadd.i32 q5,q5,q11
-
-# qhasm:   4x x4x9x14x3 += next_diag3
-# asm 1: vadd.i32 >x4x9x14x3=reg128#7,<x4x9x14x3=reg128#7,<next_diag3=reg128#13
-# asm 2: vadd.i32 >x4x9x14x3=q6,<x4x9x14x3=q6,<next_diag3=q12
-vadd.i32 q6,q6,q12
-
-# qhasm:   x0x1x10x11 = x0x5x10x15
-# asm 1: vmov >x0x1x10x11=reg128#8,<x0x5x10x15=reg128#2
-# asm 2: vmov >x0x1x10x11=q7,<x0x5x10x15=q1
-vmov q7,q1
-
-# qhasm:   x12x13x6x7 = x12x1x6x11
-# asm 1: vmov >x12x13x6x7=reg128#9,<x12x1x6x11=reg128#5
-# asm 2: vmov >x12x13x6x7=q8,<x12x1x6x11=q4
-vmov q8,q4
-
-# qhasm:   x8x9x2x3 = x8x13x2x7
-# asm 1: vmov >x8x9x2x3=reg128#10,<x8x13x2x7=reg128#6
-# asm 2: vmov >x8x9x2x3=q9,<x8x13x2x7=q5
-vmov q9,q5
-
-# qhasm:   x4x5x14x15 = x4x9x14x3
-# asm 1: vmov >x4x5x14x15=reg128#12,<x4x9x14x3=reg128#7
-# asm 2: vmov >x4x5x14x15=q11,<x4x9x14x3=q6
-vmov q11,q6
-
-# qhasm:   x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
-# asm 1: vbif <x0x1x10x11=reg128#8,<x12x1x6x11=reg128#5,<abab=reg128#11
-# asm 2: vbif <x0x1x10x11=q7,<x12x1x6x11=q4,<abab=q10
-vbif q7,q4,q10
-
-# qhasm:   x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
-# asm 1: vbif <x12x13x6x7=reg128#9,<x8x13x2x7=reg128#6,<abab=reg128#11
-# asm 2: vbif <x12x13x6x7=q8,<x8x13x2x7=q5,<abab=q10
-vbif q8,q5,q10
-
-# qhasm:   x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
-# asm 1: vbif <x8x9x2x3=reg128#10,<x4x9x14x3=reg128#7,<abab=reg128#11
-# asm 2: vbif <x8x9x2x3=q9,<x4x9x14x3=q6,<abab=q10
-vbif q9,q6,q10
-
-# qhasm:   x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
-# asm 1: vbif <x4x5x14x15=reg128#12,<x0x5x10x15=reg128#2,<abab=reg128#11
-# asm 2: vbif <x4x5x14x15=q11,<x0x5x10x15=q1,<abab=q10
-vbif q11,q1,q10
-
-# qhasm:   x0x1x2x3 = x0x1x10x11
-# asm 1: vmov >x0x1x2x3=reg128#2,<x0x1x10x11=reg128#8
-# asm 2: vmov >x0x1x2x3=q1,<x0x1x10x11=q7
-vmov q1,q7
-
-# qhasm:   x4x5x6x7 = x4x5x14x15
-# asm 1: vmov >x4x5x6x7=reg128#5,<x4x5x14x15=reg128#12
-# asm 2: vmov >x4x5x6x7=q4,<x4x5x14x15=q11
-vmov q4,q11
-
-# qhasm:   x8x9x10x11 = x8x9x2x3
-# asm 1: vmov >x8x9x10x11=reg128#6,<x8x9x2x3=reg128#10
-# asm 2: vmov >x8x9x10x11=q5,<x8x9x2x3=q9
-vmov q5,q9
-
-# qhasm:   x12x13x14x15 = x12x13x6x7
-# asm 1: vmov >x12x13x14x15=reg128#7,<x12x13x6x7=reg128#9
-# asm 2: vmov >x12x13x14x15=q6,<x12x13x6x7=q8
-vmov q6,q8
-
-# qhasm:   x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
-# asm 1: vmov <x0x1x2x3=reg128#2%top,<x8x9x2x3=reg128#10%top
-# asm 2: vmov <x0x1x2x3=d3,<x8x9x2x3=d19
-vmov d3,d19
-
-# qhasm:   x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
-# asm 1: vmov <x4x5x6x7=reg128#5%top,<x12x13x6x7=reg128#9%top
-# asm 2: vmov <x4x5x6x7=d9,<x12x13x6x7=d17
-vmov d9,d17
-
-# qhasm:   x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
-# asm 1: vmov <x8x9x10x11=reg128#6%top,<x0x1x10x11=reg128#8%top
-# asm 2: vmov <x8x9x10x11=d11,<x0x1x10x11=d15
-vmov d11,d15
-
-# qhasm:   x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
-# asm 1: vmov <x12x13x14x15=reg128#7%top,<x4x5x14x15=reg128#12%top
-# asm 2: vmov <x12x13x14x15=d13,<x4x5x14x15=d23
-vmov d13,d23
-
-# qhasm:                      =? m - 0
-# asm 1: cmp <m=int32#2,#0
-# asm 2: cmp <m=r1,#0
-cmp r1,#0
-
-# qhasm:   goto nomessage2next if =
-beq ._nomessage2next
-
-# qhasm:     m0m1m2m3 = mem128[m]
-# asm 1: vld1.8 {>m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[<m=int32#2]
-# asm 2: vld1.8 {>m0m1m2m3=d14->m0m1m2m3=d15},[<m=r1]
-vld1.8 {d14-d15},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m4m5m6m7 = mem128[m]
-# asm 1: vld1.8 {>m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[<m=int32#2]
-# asm 2: vld1.8 {>m4m5m6m7=d16->m4m5m6m7=d17},[<m=r1]
-vld1.8 {d16-d17},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m8m9m10m11 = mem128[m]
-# asm 1: vld1.8 {>m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[<m=int32#2]
-# asm 2: vld1.8 {>m8m9m10m11=d18->m8m9m10m11=d19},[<m=r1]
-vld1.8 {d18-d19},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m12m13m14m15 = mem128[m]
-# asm 1: vld1.8 {>m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[<m=int32#2]
-# asm 2: vld1.8 {>m12m13m14m15=d20->m12m13m14m15=d21},[<m=r1]
-vld1.8 {d20-d21},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     x0x1x2x3 ^= m0m1m2m3
-# asm 1: veor >x0x1x2x3=reg128#2,<x0x1x2x3=reg128#2,<m0m1m2m3=reg128#8
-# asm 2: veor >x0x1x2x3=q1,<x0x1x2x3=q1,<m0m1m2m3=q7
-veor q1,q1,q7
-
-# qhasm:     x4x5x6x7 ^= m4m5m6m7
-# asm 1: veor >x4x5x6x7=reg128#5,<x4x5x6x7=reg128#5,<m4m5m6m7=reg128#9
-# asm 2: veor >x4x5x6x7=q4,<x4x5x6x7=q4,<m4m5m6m7=q8
-veor q4,q4,q8
-
-# qhasm:     x8x9x10x11 ^= m8m9m10m11
-# asm 1: veor >x8x9x10x11=reg128#6,<x8x9x10x11=reg128#6,<m8m9m10m11=reg128#10
-# asm 2: veor >x8x9x10x11=q5,<x8x9x10x11=q5,<m8m9m10m11=q9
-veor q5,q5,q9
-
-# qhasm:     x12x13x14x15 ^= m12m13m14m15
-# asm 1: veor >x12x13x14x15=reg128#7,<x12x13x14x15=reg128#7,<m12m13m14m15=reg128#11
-# asm 2: veor >x12x13x14x15=q6,<x12x13x14x15=q6,<m12m13m14m15=q10
-veor q6,q6,q10
-
-# qhasm:   nomessage2next:
-._nomessage2next:
-
-# qhasm:   mem128[c] = x0x1x2x3
-# asm 1: vst1.8 {<x0x1x2x3=reg128#2%bot-<x0x1x2x3=reg128#2%top},[<c=int32#1]
-# asm 2: vst1.8 {<x0x1x2x3=d2-<x0x1x2x3=d3},[<c=r0]
-vst1.8 {d2-d3},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x4x5x6x7
-# asm 1: vst1.8 {<x4x5x6x7=reg128#5%bot-<x4x5x6x7=reg128#5%top},[<c=int32#1]
-# asm 2: vst1.8 {<x4x5x6x7=d8-<x4x5x6x7=d9},[<c=r0]
-vst1.8 {d8-d9},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x8x9x10x11
-# asm 1: vst1.8 {<x8x9x10x11=reg128#6%bot-<x8x9x10x11=reg128#6%top},[<c=int32#1]
-# asm 2: vst1.8 {<x8x9x10x11=d10-<x8x9x10x11=d11},[<c=r0]
-vst1.8 {d10-d11},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x12x13x14x15
-# asm 1: vst1.8 {<x12x13x14x15=reg128#7%bot-<x12x13x14x15=reg128#7%top},[<c=int32#1]
-# asm 2: vst1.8 {<x12x13x14x15=d12-<x12x13x14x15=d13},[<c=r0]
-vst1.8 {d12-d13},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   carry? mlenlow -= 128
-# asm 1: subs <mlenlow=int32#3,<mlenlow=int32#3,#128
-# asm 2: subs <mlenlow=r2,<mlenlow=r2,#128
-subs r2,r2,#128
-
-# qhasm:   mlenhigh -= 0 - carry
-# asm 1: sbc <mlenhigh=int32#4,<mlenhigh=int32#4,#0
-# asm 2: sbc <mlenhigh=r3,<mlenhigh=r3,#0
-sbc r3,r3,#0
-
-# qhasm:                         unsigned<? mlenlow - 128
-# asm 1: cmp <mlenlow=int32#3,#128
-# asm 2: cmp <mlenlow=r2,#128
-cmp r2,#128
-
-# qhasm: goto mlenatleast128 if !unsigned<
-bhs ._mlenatleast128
-
-# qhasm: mlenlowbelow128:
-._mlenlowbelow128:
-
-# qhasm:                        unsigned>? mlenhigh - 0
-# asm 1: cmp <mlenhigh=int32#4,#0
-# asm 2: cmp <mlenhigh=r3,#0
-cmp r3,#0
-
-# qhasm: goto mlenatleast128 if unsigned>
-bhi ._mlenatleast128
-
-# qhasm:              =? mlenlow - 0
-# asm 1: cmp <mlenlow=int32#3,#0
-# asm 2: cmp <mlenlow=r2,#0
-cmp r2,#0
-
-# qhasm: goto done if =
-beq ._done
-
-# qhasm: mlenatleast1:
-._mlenatleast1:
-
-# qhasm:                          unsigned<? mlenlow - 64
-# asm 1: cmp <mlenlow=int32#3,#64
-# asm 2: cmp <mlenlow=r2,#64
-cmp r2,#64
-
-# qhasm:   goto mlenatleast64 if !unsigned<
-bhs ._mlenatleast64
-
-# qhasm:     savec = c
-# asm 1: str <c=int32#1,>savec=stack32#1
-# asm 2: str <c=r0,>savec=[sp,#64]
-str r0,[sp,#64]
-
-# qhasm:     c = &tmp
-# asm 1: lea >c=int32#1,<tmp=stack512#1
-# asm 2: lea >c=r0,<tmp=[sp,#0]
-add r0,sp,#0
-
-# qhasm:                           =? m - 0
-# asm 1: cmp <m=int32#2,#0
-# asm 2: cmp <m=r1,#0
-cmp r1,#0
-
-# qhasm:     goto mlenatleast64 if =
-beq ._mlenatleast64
-
-# qhasm:     i = 0
-# asm 1: ldr >i=int32#4,=0
-# asm 2: ldr >i=r3,=0
-ldr r3,=0
-
-# qhasm:     mcopy:
-._mcopy:
-
-# qhasm:       mi = mem8[m + 0]
-# asm 1: ldrb >mi=int32#5,[<m=int32#2,#0]
-# asm 2: ldrb >mi=r4,[<m=r1,#0]
-ldrb r4,[r1,#0]
-
-# qhasm:       mem8[c + 0] = mi
-# asm 1: strb <mi=int32#5,[<c=int32#1,#0]
-# asm 2: strb <mi=r4,[<c=r0,#0]
-strb r4,[r0,#0]
-
-# qhasm:       m += 1
-# asm 1: add <m=int32#2,<m=int32#2,#1
-# asm 2: add <m=r1,<m=r1,#1
-add r1,r1,#1
-
-# qhasm:       c += 1
-# asm 1: add <c=int32#1,<c=int32#1,#1
-# asm 2: add <c=r0,<c=r0,#1
-add r0,r0,#1
-
-# qhasm:       i += 1
-# asm 1: add <i=int32#4,<i=int32#4,#1
-# asm 2: add <i=r3,<i=r3,#1
-add r3,r3,#1
-
-# qhasm:                   unsigned<? i - mlenlow
-# asm 1: cmp <i=int32#4,<mlenlow=int32#3
-# asm 2: cmp <i=r3,<mlenlow=r2
-cmp r3,r2
-
-# qhasm:     goto mcopy if unsigned<
-blo ._mcopy
-
-# qhasm:     mi = 0
-# asm 1: ldr >mi=int32#2,=0
-# asm 2: ldr >mi=r1,=0
-ldr r1,=0
-
-# qhasm:     pad:
-._pad:
-
-# qhasm:       mem8[c + 0] = mi
-# asm 1: strb <mi=int32#2,[<c=int32#1,#0]
-# asm 2: strb <mi=r1,[<c=r0,#0]
-strb r1,[r0,#0]
-
-# qhasm:       c += 1
-# asm 1: add <c=int32#1,<c=int32#1,#1
-# asm 2: add <c=r0,<c=r0,#1
-add r0,r0,#1
-
-# qhasm:       i += 1
-# asm 1: add <i=int32#4,<i=int32#4,#1
-# asm 2: add <i=r3,<i=r3,#1
-add r3,r3,#1
-
-# qhasm:                 unsigned<? i - 64
-# asm 1: cmp <i=int32#4,#64
-# asm 2: cmp <i=r3,#64
-cmp r3,#64
-
-# qhasm:     goto pad if unsigned<
-blo ._pad
-
-# qhasm:     c -= 64
-# asm 1: sub <c=int32#1,<c=int32#1,#64
-# asm 2: sub <c=r0,<c=r0,#64
-sub r0,r0,#64
-
-# qhasm:     m = &tmp
-# asm 1: lea >m=int32#2,<tmp=stack512#1
-# asm 2: lea >m=r1,<tmp=[sp,#0]
-add r1,sp,#0
-
-# qhasm:   mlenatleast64:
-._mlenatleast64:
-
-# qhasm:   new k2k3k6k7
-
-# qhasm:   k2k3k6k7 bot = k2k3k6k7_stack bot
-# asm 1: vldr <k2k3k6k7=reg128#2%bot,<k2k3k6k7_stack=stack128#5
-# asm 2: vldr <k2k3k6k7=d2,<k2k3k6k7_stack=[sp,#160]
-vldr d2,[sp,#160]
-
-# qhasm:   k2k3k6k7 top = k2k3k6k7_stack top
-# asm 1: vldr <k2k3k6k7=reg128#2%top,<k2k3k6k7_stack=stack128#5
-# asm 2: vldr <k2k3k6k7=d3,<k2k3k6k7_stack=[sp,#168]
-vldr d3,[sp,#168]
-
-# qhasm:   new k1n1k7k2
-
-# qhasm:   k1n1k7k2 bot = k1n1k7k2_stack bot
-# asm 1: vldr <k1n1k7k2=reg128#5%bot,<k1n1k7k2_stack=stack128#6
-# asm 2: vldr <k1n1k7k2=d8,<k1n1k7k2_stack=[sp,#176]
-vldr d8,[sp,#176]
-
-# qhasm:   k1n1k7k2 top = k1n1k7k2_stack top
-# asm 1: vldr <k1n1k7k2=reg128#5%top,<k1n1k7k2_stack=stack128#6
-# asm 2: vldr <k1n1k7k2=d9,<k1n1k7k2_stack=[sp,#184]
-vldr d9,[sp,#184]
-
-# qhasm:   n2n3n3n2 = n2n3n3n2[0,1] n2n3n3n2[1] n2n3n3n2[0]
-# asm 1: vext.32 <n2n3n3n2=reg128#1%top,<n2n3n3n2=reg128#1%bot,<n2n3n3n2=reg128#1%bot,#1
-# asm 2: vext.32 <n2n3n3n2=d1,<n2n3n3n2=d0,<n2n3n3n2=d0,#1
-vext.32 d1,d0,d0,#1
-
-# qhasm:   new start2
-
-# qhasm:   start2 = start2[0,1] k1n1k7k2[0,1]             
-# asm 1: vmov <start2=reg128#6%top,<k1n1k7k2=reg128#5%bot
-# asm 2: vmov <start2=d11,<k1n1k7k2=d8
-vmov d11,d8
-
-# qhasm:   start2 = n2n3n3n2[3] k2k3k6k7[2] start2[2,3]   
-# asm 1: vext.32 <start2=reg128#6%bot,<n2n3n3n2=reg128#1%top,<k2k3k6k7=reg128#2%top,#1
-# asm 2: vext.32 <start2=d10,<n2n3n3n2=d1,<k2k3k6k7=d3,#1
-vext.32 d10,d1,d3,#1
-
-# qhasm:   new start3
-
-# qhasm:   start3 = start3[0,1] k1n1k7k2[2,3]             
-# asm 1: vmov <start3=reg128#7%top,<k1n1k7k2=reg128#5%top
-# asm 2: vmov <start3=d13,<k1n1k7k2=d9
-vmov d13,d9
-
-# qhasm:   start3 = k2k3k6k7[1] n2n3n3n2[2] start3[2,3]   
-# asm 1: vext.32 <start3=reg128#7%bot,<k2k3k6k7=reg128#2%bot,<n2n3n3n2=reg128#1%top,#1
-# asm 2: vext.32 <start3=d12,<k2k3k6k7=d2,<n2n3n3n2=d1,#1
-vext.32 d12,d2,d1,#1
-
-# qhasm:   diag0 = start0
-# asm 1: vmov >diag0=reg128#2,<start0=reg128#3
-# asm 2: vmov >diag0=q1,<start0=q2
-vmov q1,q2
-
-# qhasm:   diag1 = start1
-# asm 1: vmov >diag1=reg128#5,<start1=reg128#4
-# asm 2: vmov >diag1=q4,<start1=q3
-vmov q4,q3
-
-# qhasm:   diag2 = start2
-# asm 1: vmov >diag2=reg128#8,<start2=reg128#6
-# asm 2: vmov >diag2=q7,<start2=q5
-vmov q7,q5
-
-# qhasm:   diag3 = start3
-# asm 1: vmov >diag3=reg128#9,<start3=reg128#7
-# asm 2: vmov >diag3=q8,<start3=q6
-vmov q8,q6
-
-# qhasm:   2x nextblock = 0xff
-# asm 1: vmov.i64 >nextblock=reg128#10,#0xff
-# asm 2: vmov.i64 >nextblock=q9,#0xff
-vmov.i64 q9,#0xff
-
-# qhasm:   4x nextblock unsigned>>= 7
-# asm 1: vshr.u32 >nextblock=reg128#10,<nextblock=reg128#10,#7
-# asm 2: vshr.u32 >nextblock=q9,<nextblock=q9,#7
-vshr.u32 q9,q9,#7
-
-# qhasm:   2x n2n3n3n2 += nextblock
-# asm 1: vadd.i64 >n2n3n3n2=reg128#1,<n2n3n3n2=reg128#1,<nextblock=reg128#10
-# asm 2: vadd.i64 >n2n3n3n2=q0,<n2n3n3n2=q0,<nextblock=q9
-vadd.i64 q0,q0,q9
-
-# qhasm:   i = 12
-# asm 1: ldr >i=int32#4,=12
-# asm 2: ldr >i=r3,=12
-ldr r3,=12
-
-# qhasm:   mainloop1:
-._mainloop1:
-
-# qhasm:     4x a0 = diag1 + diag0
-# asm 1: vadd.i32 >a0=reg128#10,<diag1=reg128#5,<diag0=reg128#2
-# asm 2: vadd.i32 >a0=q9,<diag1=q4,<diag0=q1
-vadd.i32 q9,q4,q1
-
-# qhasm:     4x b0 = a0 << 7
-# asm 1: vshl.i32 >b0=reg128#11,<a0=reg128#10,#7
-# asm 2: vshl.i32 >b0=q10,<a0=q9,#7
-vshl.i32 q10,q9,#7
-
-# qhasm:     4x b0 insert= a0 >> 25
-# asm 1: vsri.i32 <b0=reg128#11,<a0=reg128#10,#25
-# asm 2: vsri.i32 <b0=q10,<a0=q9,#25
-vsri.i32 q10,q9,#25
-
-# qhasm:        diag3 ^= b0
-# asm 1: veor >diag3=reg128#9,<diag3=reg128#9,<b0=reg128#11
-# asm 2: veor >diag3=q8,<diag3=q8,<b0=q10
-veor q8,q8,q10
-
-# qhasm:     4x a1 = diag0 + diag3
-# asm 1: vadd.i32 >a1=reg128#10,<diag0=reg128#2,<diag3=reg128#9
-# asm 2: vadd.i32 >a1=q9,<diag0=q1,<diag3=q8
-vadd.i32 q9,q1,q8
-
-# qhasm:     4x b1 = a1 << 9
-# asm 1: vshl.i32 >b1=reg128#11,<a1=reg128#10,#9
-# asm 2: vshl.i32 >b1=q10,<a1=q9,#9
-vshl.i32 q10,q9,#9
-
-# qhasm:     4x b1 insert= a1 >> 23
-# asm 1: vsri.i32 <b1=reg128#11,<a1=reg128#10,#23
-# asm 2: vsri.i32 <b1=q10,<a1=q9,#23
-vsri.i32 q10,q9,#23
-
-# qhasm:        diag2 ^= b1
-# asm 1: veor >diag2=reg128#8,<diag2=reg128#8,<b1=reg128#11
-# asm 2: veor >diag2=q7,<diag2=q7,<b1=q10
-veor q7,q7,q10
-
-# qhasm:     4x a2 = diag3 + diag2
-# asm 1: vadd.i32 >a2=reg128#10,<diag3=reg128#9,<diag2=reg128#8
-# asm 2: vadd.i32 >a2=q9,<diag3=q8,<diag2=q7
-vadd.i32 q9,q8,q7
-
-# qhasm:             diag3 = diag3[3] diag3[0,1,2]
-# asm 1: vext.32 >diag3=reg128#9,<diag3=reg128#9,<diag3=reg128#9,#3
-# asm 2: vext.32 >diag3=q8,<diag3=q8,<diag3=q8,#3
-vext.32 q8,q8,q8,#3
-
-# qhasm:     4x b2 = a2 << 13
-# asm 1: vshl.i32 >b2=reg128#11,<a2=reg128#10,#13
-# asm 2: vshl.i32 >b2=q10,<a2=q9,#13
-vshl.i32 q10,q9,#13
-
-# qhasm:     4x b2 insert= a2 >> 19
-# asm 1: vsri.i32 <b2=reg128#11,<a2=reg128#10,#19
-# asm 2: vsri.i32 <b2=q10,<a2=q9,#19
-vsri.i32 q10,q9,#19
-
-# qhasm:        diag1 ^= b2
-# asm 1: veor >diag1=reg128#5,<diag1=reg128#5,<b2=reg128#11
-# asm 2: veor >diag1=q4,<diag1=q4,<b2=q10
-veor q4,q4,q10
-
-# qhasm:     4x a3 = diag2 + diag1
-# asm 1: vadd.i32 >a3=reg128#10,<diag2=reg128#8,<diag1=reg128#5
-# asm 2: vadd.i32 >a3=q9,<diag2=q7,<diag1=q4
-vadd.i32 q9,q7,q4
-
-# qhasm:             diag2 = diag2[2,3] diag2[0,1]
-# asm 1: vswp <diag2=reg128#8%bot,<diag2=reg128#8%top
-# asm 2: vswp <diag2=d14,<diag2=d15
-vswp d14,d15
-
-# qhasm:     4x b3 = a3 << 18
-# asm 1: vshl.i32 >b3=reg128#11,<a3=reg128#10,#18
-# asm 2: vshl.i32 >b3=q10,<a3=q9,#18
-vshl.i32 q10,q9,#18
-
-# qhasm:     4x b3 insert= a3 >> 14
-# asm 1: vsri.i32 <b3=reg128#11,<a3=reg128#10,#14
-# asm 2: vsri.i32 <b3=q10,<a3=q9,#14
-vsri.i32 q10,q9,#14
-
-# qhasm:             diag1 = diag1[1,2,3] diag1[0]
-# asm 1: vext.32 >diag1=reg128#5,<diag1=reg128#5,<diag1=reg128#5,#1
-# asm 2: vext.32 >diag1=q4,<diag1=q4,<diag1=q4,#1
-vext.32 q4,q4,q4,#1
-
-# qhasm:        diag0 ^= b3
-# asm 1: veor >diag0=reg128#2,<diag0=reg128#2,<b3=reg128#11
-# asm 2: veor >diag0=q1,<diag0=q1,<b3=q10
-veor q1,q1,q10
-
-# qhasm:     4x a0 = diag3 + diag0
-# asm 1: vadd.i32 >a0=reg128#10,<diag3=reg128#9,<diag0=reg128#2
-# asm 2: vadd.i32 >a0=q9,<diag3=q8,<diag0=q1
-vadd.i32 q9,q8,q1
-
-# qhasm:     4x b0 = a0 << 7
-# asm 1: vshl.i32 >b0=reg128#11,<a0=reg128#10,#7
-# asm 2: vshl.i32 >b0=q10,<a0=q9,#7
-vshl.i32 q10,q9,#7
-
-# qhasm:     4x b0 insert= a0 >> 25
-# asm 1: vsri.i32 <b0=reg128#11,<a0=reg128#10,#25
-# asm 2: vsri.i32 <b0=q10,<a0=q9,#25
-vsri.i32 q10,q9,#25
-
-# qhasm:        diag1 ^= b0
-# asm 1: veor >diag1=reg128#5,<diag1=reg128#5,<b0=reg128#11
-# asm 2: veor >diag1=q4,<diag1=q4,<b0=q10
-veor q4,q4,q10
-
-# qhasm:     4x a1 = diag0 + diag1
-# asm 1: vadd.i32 >a1=reg128#10,<diag0=reg128#2,<diag1=reg128#5
-# asm 2: vadd.i32 >a1=q9,<diag0=q1,<diag1=q4
-vadd.i32 q9,q1,q4
-
-# qhasm:     4x b1 = a1 << 9
-# asm 1: vshl.i32 >b1=reg128#11,<a1=reg128#10,#9
-# asm 2: vshl.i32 >b1=q10,<a1=q9,#9
-vshl.i32 q10,q9,#9
-
-# qhasm:     4x b1 insert= a1 >> 23
-# asm 1: vsri.i32 <b1=reg128#11,<a1=reg128#10,#23
-# asm 2: vsri.i32 <b1=q10,<a1=q9,#23
-vsri.i32 q10,q9,#23
-
-# qhasm:                   						unsigned>? i -= 2
-# asm 1: subs <i=int32#4,<i=int32#4,#2
-# asm 2: subs <i=r3,<i=r3,#2
-subs r3,r3,#2
-
-# qhasm:        diag2 ^= b1
-# asm 1: veor >diag2=reg128#8,<diag2=reg128#8,<b1=reg128#11
-# asm 2: veor >diag2=q7,<diag2=q7,<b1=q10
-veor q7,q7,q10
-
-# qhasm:     4x a2 = diag1 + diag2
-# asm 1: vadd.i32 >a2=reg128#10,<diag1=reg128#5,<diag2=reg128#8
-# asm 2: vadd.i32 >a2=q9,<diag1=q4,<diag2=q7
-vadd.i32 q9,q4,q7
-
-# qhasm:             diag1 = diag1[3] diag1[0,1,2]
-# asm 1: vext.32 >diag1=reg128#5,<diag1=reg128#5,<diag1=reg128#5,#3
-# asm 2: vext.32 >diag1=q4,<diag1=q4,<diag1=q4,#3
-vext.32 q4,q4,q4,#3
-
-# qhasm:     4x b2 = a2 << 13
-# asm 1: vshl.i32 >b2=reg128#11,<a2=reg128#10,#13
-# asm 2: vshl.i32 >b2=q10,<a2=q9,#13
-vshl.i32 q10,q9,#13
-
-# qhasm:     4x b2 insert= a2 >> 19
-# asm 1: vsri.i32 <b2=reg128#11,<a2=reg128#10,#19
-# asm 2: vsri.i32 <b2=q10,<a2=q9,#19
-vsri.i32 q10,q9,#19
-
-# qhasm:        diag3 ^= b2
-# asm 1: veor >diag3=reg128#9,<diag3=reg128#9,<b2=reg128#11
-# asm 2: veor >diag3=q8,<diag3=q8,<b2=q10
-veor q8,q8,q10
-
-# qhasm:     4x a3 = diag2 + diag3
-# asm 1: vadd.i32 >a3=reg128#10,<diag2=reg128#8,<diag3=reg128#9
-# asm 2: vadd.i32 >a3=q9,<diag2=q7,<diag3=q8
-vadd.i32 q9,q7,q8
-
-# qhasm:             diag2 = diag2[2,3] diag2[0,1]
-# asm 1: vswp <diag2=reg128#8%bot,<diag2=reg128#8%top
-# asm 2: vswp <diag2=d14,<diag2=d15
-vswp d14,d15
-
-# qhasm:     4x b3 = a3 << 18
-# asm 1: vshl.i32 >b3=reg128#11,<a3=reg128#10,#18
-# asm 2: vshl.i32 >b3=q10,<a3=q9,#18
-vshl.i32 q10,q9,#18
-
-# qhasm:     4x b3 insert= a3 >> 14
-# asm 1: vsri.i32 <b3=reg128#11,<a3=reg128#10,#14
-# asm 2: vsri.i32 <b3=q10,<a3=q9,#14
-vsri.i32 q10,q9,#14
-
-# qhasm:             diag3 = diag3[1,2,3] diag3[0]
-# asm 1: vext.32 >diag3=reg128#9,<diag3=reg128#9,<diag3=reg128#9,#1
-# asm 2: vext.32 >diag3=q8,<diag3=q8,<diag3=q8,#1
-vext.32 q8,q8,q8,#1
-
-# qhasm:        diag0 ^= b3
-# asm 1: veor >diag0=reg128#2,<diag0=reg128#2,<b3=reg128#11
-# asm 2: veor >diag0=q1,<diag0=q1,<b3=q10
-veor q1,q1,q10
-
-# qhasm:   goto mainloop1 if unsigned>
-bhi ._mainloop1
-
-# qhasm:   2x abab = 0xffffffff
-# asm 1: vmov.i64 >abab=reg128#10,#0xffffffff
-# asm 2: vmov.i64 >abab=q9,#0xffffffff
-vmov.i64 q9,#0xffffffff
-
-# qhasm:   4x x0x5x10x15 = diag0 + start0
-# asm 1: vadd.i32 >x0x5x10x15=reg128#2,<diag0=reg128#2,<start0=reg128#3
-# asm 2: vadd.i32 >x0x5x10x15=q1,<diag0=q1,<start0=q2
-vadd.i32 q1,q1,q2
-
-# qhasm:   4x x12x1x6x11 = diag1 + start1
-# asm 1: vadd.i32 >x12x1x6x11=reg128#5,<diag1=reg128#5,<start1=reg128#4
-# asm 2: vadd.i32 >x12x1x6x11=q4,<diag1=q4,<start1=q3
-vadd.i32 q4,q4,q3
-
-# qhasm:   4x x8x13x2x7 = diag2 + start2
-# asm 1: vadd.i32 >x8x13x2x7=reg128#6,<diag2=reg128#8,<start2=reg128#6
-# asm 2: vadd.i32 >x8x13x2x7=q5,<diag2=q7,<start2=q5
-vadd.i32 q5,q7,q5
-
-# qhasm:   4x x4x9x14x3 = diag3 + start3
-# asm 1: vadd.i32 >x4x9x14x3=reg128#7,<diag3=reg128#9,<start3=reg128#7
-# asm 2: vadd.i32 >x4x9x14x3=q6,<diag3=q8,<start3=q6
-vadd.i32 q6,q8,q6
-
-# qhasm:   x0x1x10x11 = x0x5x10x15
-# asm 1: vmov >x0x1x10x11=reg128#8,<x0x5x10x15=reg128#2
-# asm 2: vmov >x0x1x10x11=q7,<x0x5x10x15=q1
-vmov q7,q1
-
-# qhasm:   x12x13x6x7 = x12x1x6x11
-# asm 1: vmov >x12x13x6x7=reg128#9,<x12x1x6x11=reg128#5
-# asm 2: vmov >x12x13x6x7=q8,<x12x1x6x11=q4
-vmov q8,q4
-
-# qhasm:   x8x9x2x3 = x8x13x2x7
-# asm 1: vmov >x8x9x2x3=reg128#11,<x8x13x2x7=reg128#6
-# asm 2: vmov >x8x9x2x3=q10,<x8x13x2x7=q5
-vmov q10,q5
-
-# qhasm:   x4x5x14x15 = x4x9x14x3
-# asm 1: vmov >x4x5x14x15=reg128#12,<x4x9x14x3=reg128#7
-# asm 2: vmov >x4x5x14x15=q11,<x4x9x14x3=q6
-vmov q11,q6
-
-# qhasm:   x0x1x10x11 = (abab & x0x1x10x11) | (~abab & x12x1x6x11)
-# asm 1: vbif <x0x1x10x11=reg128#8,<x12x1x6x11=reg128#5,<abab=reg128#10
-# asm 2: vbif <x0x1x10x11=q7,<x12x1x6x11=q4,<abab=q9
-vbif q7,q4,q9
-
-# qhasm:   x12x13x6x7 = (abab & x12x13x6x7) | (~abab & x8x13x2x7)
-# asm 1: vbif <x12x13x6x7=reg128#9,<x8x13x2x7=reg128#6,<abab=reg128#10
-# asm 2: vbif <x12x13x6x7=q8,<x8x13x2x7=q5,<abab=q9
-vbif q8,q5,q9
-
-# qhasm:   x8x9x2x3 = (abab & x8x9x2x3) | (~abab & x4x9x14x3)
-# asm 1: vbif <x8x9x2x3=reg128#11,<x4x9x14x3=reg128#7,<abab=reg128#10
-# asm 2: vbif <x8x9x2x3=q10,<x4x9x14x3=q6,<abab=q9
-vbif q10,q6,q9
-
-# qhasm:   x4x5x14x15 = (abab & x4x5x14x15) | (~abab & x0x5x10x15)
-# asm 1: vbif <x4x5x14x15=reg128#12,<x0x5x10x15=reg128#2,<abab=reg128#10
-# asm 2: vbif <x4x5x14x15=q11,<x0x5x10x15=q1,<abab=q9
-vbif q11,q1,q9
-
-# qhasm:   x0x1x2x3 = x0x1x10x11
-# asm 1: vmov >x0x1x2x3=reg128#2,<x0x1x10x11=reg128#8
-# asm 2: vmov >x0x1x2x3=q1,<x0x1x10x11=q7
-vmov q1,q7
-
-# qhasm:   x4x5x6x7 = x4x5x14x15
-# asm 1: vmov >x4x5x6x7=reg128#5,<x4x5x14x15=reg128#12
-# asm 2: vmov >x4x5x6x7=q4,<x4x5x14x15=q11
-vmov q4,q11
-
-# qhasm:   x8x9x10x11 = x8x9x2x3
-# asm 1: vmov >x8x9x10x11=reg128#6,<x8x9x2x3=reg128#11
-# asm 2: vmov >x8x9x10x11=q5,<x8x9x2x3=q10
-vmov q5,q10
-
-# qhasm:   x12x13x14x15 = x12x13x6x7
-# asm 1: vmov >x12x13x14x15=reg128#7,<x12x13x6x7=reg128#9
-# asm 2: vmov >x12x13x14x15=q6,<x12x13x6x7=q8
-vmov q6,q8
-
-# qhasm:   x0x1x2x3 = x0x1x2x3[0,1] x8x9x2x3[2,3]
-# asm 1: vmov <x0x1x2x3=reg128#2%top,<x8x9x2x3=reg128#11%top
-# asm 2: vmov <x0x1x2x3=d3,<x8x9x2x3=d21
-vmov d3,d21
-
-# qhasm:   x4x5x6x7 = x4x5x6x7[0,1] x12x13x6x7[2,3]
-# asm 1: vmov <x4x5x6x7=reg128#5%top,<x12x13x6x7=reg128#9%top
-# asm 2: vmov <x4x5x6x7=d9,<x12x13x6x7=d17
-vmov d9,d17
-
-# qhasm:   x8x9x10x11 = x8x9x10x11[0,1] x0x1x10x11[2,3]
-# asm 1: vmov <x8x9x10x11=reg128#6%top,<x0x1x10x11=reg128#8%top
-# asm 2: vmov <x8x9x10x11=d11,<x0x1x10x11=d15
-vmov d11,d15
-
-# qhasm:   x12x13x14x15 = x12x13x14x15[0,1] x4x5x14x15[2,3]
-# asm 1: vmov <x12x13x14x15=reg128#7%top,<x4x5x14x15=reg128#12%top
-# asm 2: vmov <x12x13x14x15=d13,<x4x5x14x15=d23
-vmov d13,d23
-
-# qhasm:                      =? m - 0
-# asm 1: cmp <m=int32#2,#0
-# asm 2: cmp <m=r1,#0
-cmp r1,#0
-
-# qhasm:   goto nomessage1 if =
-beq ._nomessage1
-
-# qhasm:     m0m1m2m3 = mem128[m]
-# asm 1: vld1.8 {>m0m1m2m3=reg128#8%bot->m0m1m2m3=reg128#8%top},[<m=int32#2]
-# asm 2: vld1.8 {>m0m1m2m3=d14->m0m1m2m3=d15},[<m=r1]
-vld1.8 {d14-d15},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m4m5m6m7 = mem128[m]
-# asm 1: vld1.8 {>m4m5m6m7=reg128#9%bot->m4m5m6m7=reg128#9%top},[<m=int32#2]
-# asm 2: vld1.8 {>m4m5m6m7=d16->m4m5m6m7=d17},[<m=r1]
-vld1.8 {d16-d17},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m8m9m10m11 = mem128[m]
-# asm 1: vld1.8 {>m8m9m10m11=reg128#10%bot->m8m9m10m11=reg128#10%top},[<m=int32#2]
-# asm 2: vld1.8 {>m8m9m10m11=d18->m8m9m10m11=d19},[<m=r1]
-vld1.8 {d18-d19},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     m12m13m14m15 = mem128[m]
-# asm 1: vld1.8 {>m12m13m14m15=reg128#11%bot->m12m13m14m15=reg128#11%top},[<m=int32#2]
-# asm 2: vld1.8 {>m12m13m14m15=d20->m12m13m14m15=d21},[<m=r1]
-vld1.8 {d20-d21},[r1]
-
-# qhasm:     m += 16
-# asm 1: add <m=int32#2,<m=int32#2,#16
-# asm 2: add <m=r1,<m=r1,#16
-add r1,r1,#16
-
-# qhasm:     x0x1x2x3 ^= m0m1m2m3
-# asm 1: veor >x0x1x2x3=reg128#2,<x0x1x2x3=reg128#2,<m0m1m2m3=reg128#8
-# asm 2: veor >x0x1x2x3=q1,<x0x1x2x3=q1,<m0m1m2m3=q7
-veor q1,q1,q7
-
-# qhasm:     x4x5x6x7 ^= m4m5m6m7
-# asm 1: veor >x4x5x6x7=reg128#5,<x4x5x6x7=reg128#5,<m4m5m6m7=reg128#9
-# asm 2: veor >x4x5x6x7=q4,<x4x5x6x7=q4,<m4m5m6m7=q8
-veor q4,q4,q8
-
-# qhasm:     x8x9x10x11 ^= m8m9m10m11
-# asm 1: veor >x8x9x10x11=reg128#6,<x8x9x10x11=reg128#6,<m8m9m10m11=reg128#10
-# asm 2: veor >x8x9x10x11=q5,<x8x9x10x11=q5,<m8m9m10m11=q9
-veor q5,q5,q9
-
-# qhasm:     x12x13x14x15 ^= m12m13m14m15
-# asm 1: veor >x12x13x14x15=reg128#7,<x12x13x14x15=reg128#7,<m12m13m14m15=reg128#11
-# asm 2: veor >x12x13x14x15=q6,<x12x13x14x15=q6,<m12m13m14m15=q10
-veor q6,q6,q10
-
-# qhasm:   nomessage1:
-._nomessage1:
-
-# qhasm:   mem128[c] = x0x1x2x3
-# asm 1: vst1.8 {<x0x1x2x3=reg128#2%bot-<x0x1x2x3=reg128#2%top},[<c=int32#1]
-# asm 2: vst1.8 {<x0x1x2x3=d2-<x0x1x2x3=d3},[<c=r0]
-vst1.8 {d2-d3},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x4x5x6x7
-# asm 1: vst1.8 {<x4x5x6x7=reg128#5%bot-<x4x5x6x7=reg128#5%top},[<c=int32#1]
-# asm 2: vst1.8 {<x4x5x6x7=d8-<x4x5x6x7=d9},[<c=r0]
-vst1.8 {d8-d9},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x8x9x10x11
-# asm 1: vst1.8 {<x8x9x10x11=reg128#6%bot-<x8x9x10x11=reg128#6%top},[<c=int32#1]
-# asm 2: vst1.8 {<x8x9x10x11=d10-<x8x9x10x11=d11},[<c=r0]
-vst1.8 {d10-d11},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:   mem128[c] = x12x13x14x15
-# asm 1: vst1.8 {<x12x13x14x15=reg128#7%bot-<x12x13x14x15=reg128#7%top},[<c=int32#1]
-# asm 2: vst1.8 {<x12x13x14x15=d12-<x12x13x14x15=d13},[<c=r0]
-vst1.8 {d12-d13},[r0]
-
-# qhasm:   c += 16
-# asm 1: add <c=int32#1,<c=int32#1,#16
-# asm 2: add <c=r0,<c=r0,#16
-add r0,r0,#16
-
-# qhasm:                           unsigned<? mlenlow - 64
-# asm 1: cmp <mlenlow=int32#3,#64
-# asm 2: cmp <mlenlow=r2,#64
-cmp r2,#64
-
-# qhasm:   goto xmlenatleast64 if !unsigned<
-bhs ._xmlenatleast64
-
-# qhasm:     i = 0
-# asm 1: ldr >i=int32#4,=0
-# asm 2: ldr >i=r3,=0
-ldr r3,=0
-
-# qhasm:     m = c - 64
-# asm 1: sub >m=int32#2,<c=int32#1,#64
-# asm 2: sub >m=r1,<c=r0,#64
-sub r1,r0,#64
-
-# qhasm:     c = savec
-# asm 1: ldr >c=int32#1,<savec=stack32#1
-# asm 2: ldr >c=r0,<savec=[sp,#64]
-ldr r0,[sp,#64]
-
-# qhasm:     ccopy:
-._ccopy:
-
-# qhasm:       ci = mem8[m + 0]
-# asm 1: ldrb >ci=int32#5,[<m=int32#2,#0]
-# asm 2: ldrb >ci=r4,[<m=r1,#0]
-ldrb r4,[r1,#0]
-
-# qhasm:       mem8[c + 0] = ci
-# asm 1: strb <ci=int32#5,[<c=int32#1,#0]
-# asm 2: strb <ci=r4,[<c=r0,#0]
-strb r4,[r0,#0]
-
-# qhasm:       m += 1
-# asm 1: add <m=int32#2,<m=int32#2,#1
-# asm 2: add <m=r1,<m=r1,#1
-add r1,r1,#1
-
-# qhasm:       c += 1
-# asm 1: add <c=int32#1,<c=int32#1,#1
-# asm 2: add <c=r0,<c=r0,#1
-add r0,r0,#1
-
-# qhasm:       i += 1
-# asm 1: add <i=int32#4,<i=int32#4,#1
-# asm 2: add <i=r3,<i=r3,#1
-add r3,r3,#1
-
-# qhasm:                   unsigned<? i - mlenlow
-# asm 1: cmp <i=int32#4,<mlenlow=int32#3
-# asm 2: cmp <i=r3,<mlenlow=r2
-cmp r3,r2
-
-# qhasm:     goto ccopy if unsigned<
-blo ._ccopy
-
-# qhasm:   xmlenatleast64:
-._xmlenatleast64:
-
-# qhasm:                      unsigned>? mlenlow -= 64
-# asm 1: subs <mlenlow=int32#3,<mlenlow=int32#3,#64
-# asm 2: subs <mlenlow=r2,<mlenlow=r2,#64
-subs r2,r2,#64
-
-# qhasm: goto mlenatleast1 if unsigned>
-bhi ._mlenatleast1
-
-# qhasm: done:
-._done:
-
-# qhasm: new caller_r4
-
-# qhasm: caller_r4 = stack_r4
-# asm 1: ldr >caller_r4=int32#5,<stack_r4=stack32#2
-# asm 2: ldr >caller_r4=r4,<stack_r4=[sp,#68]
-ldr r4,[sp,#68]
-
-# qhasm: new caller_q4
-
-# qhasm: new caller_q5
-
-# qhasm: new caller_q6
-
-# qhasm: new caller_q7
-
-# qhasm: caller_q4 bot = stack_q4 bot
-# asm 1: vldr <caller_q4=reg128#5%bot,<stack_q4=stack128#1
-# asm 2: vldr <caller_q4=d8,<stack_q4=[sp,#96]
-vldr d8,[sp,#96]
-
-# qhasm: caller_q4 top = stack_q4 top
-# asm 1: vldr <caller_q4=reg128#5%top,<stack_q4=stack128#1
-# asm 2: vldr <caller_q4=d9,<stack_q4=[sp,#104]
-vldr d9,[sp,#104]
-
-# qhasm: caller_q5 bot = stack_q5 bot
-# asm 1: vldr <caller_q5=reg128#6%bot,<stack_q5=stack128#2
-# asm 2: vldr <caller_q5=d10,<stack_q5=[sp,#112]
-vldr d10,[sp,#112]
-
-# qhasm: caller_q5 top = stack_q5 top
-# asm 1: vldr <caller_q5=reg128#6%top,<stack_q5=stack128#2
-# asm 2: vldr <caller_q5=d11,<stack_q5=[sp,#120]
-vldr d11,[sp,#120]
-
-# qhasm: caller_q6 bot = stack_q6 bot
-# asm 1: vldr <caller_q6=reg128#7%bot,<stack_q6=stack128#3
-# asm 2: vldr <caller_q6=d12,<stack_q6=[sp,#128]
-vldr d12,[sp,#128]
-
-# qhasm: caller_q6 top = stack_q6 top
-# asm 1: vldr <caller_q6=reg128#7%top,<stack_q6=stack128#3
-# asm 2: vldr <caller_q6=d13,<stack_q6=[sp,#136]
-vldr d13,[sp,#136]
-
-# qhasm: caller_q7 bot = stack_q7 bot
-# asm 1: vldr <caller_q7=reg128#8%bot,<stack_q7=stack128#4
-# asm 2: vldr <caller_q7=d14,<stack_q7=[sp,#144]
-vldr d14,[sp,#144]
-
-# qhasm: caller_q7 top = stack_q7 top
-# asm 1: vldr <caller_q7=reg128#8%top,<stack_q7=stack128#4
-# asm 2: vldr <caller_q7=d15,<stack_q7=[sp,#152]
-vldr d15,[sp,#152]
-
-# qhasm: int32 result
-
-# qhasm: result = 0
-# asm 1: ldr >result=int32#1,=0
-# asm 2: ldr >result=r0,=0
-ldr r0,=0
-
-# qhasm: return result
-add sp,sp,#256
-bx lr

+ 0 - 6
ext/x64-salsa2012-asm/README.md

@@ -1,6 +0,0 @@
-Blazingly fast X64 ASM implementation of Salsa20/12
-======
-
-This is ripped from the [cnacl](https://github.com/cjdelisle/cnacl) source. The actual code is by Danial J. Bernstein and is in the public domain.
-
-This is included on Linux and Mac 64-bit builds and is significantly faster than the SSE intrinsics or C versions. It's used for packet encode/decode only since its use differs a bit from the regular Salsa20 C++ class. Specifically it lacks the ability to be called on multiple blocks, preferring instead to take a key and a single stream to encrypt and that's it.

+ 0 - 16
ext/x64-salsa2012-asm/salsa2012.h

@@ -1,16 +0,0 @@
-#ifndef ZT_X64_SALSA2012_ASM
-#define ZT_X64_SALSA2012_ASM
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Generates Salsa20/12 key stream
-// output, outlen, nonce, key (256-bit / 32-byte)
-extern int zt_salsa2012_amd64_xmm6(unsigned char *, unsigned long long, const unsigned char *, const unsigned char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif

+ 0 - 4488
ext/x64-salsa2012-asm/salsa2012.s

@@ -1,4488 +0,0 @@
-# qhasm: enter zt_salsa2012_amd64_xmm6
-.text
-.p2align 5
-.globl _zt_salsa2012_amd64_xmm6
-.globl zt_salsa2012_amd64_xmm6
-_zt_salsa2012_amd64_xmm6:
-zt_salsa2012_amd64_xmm6:
-mov %rsp,%r11
-and $31,%r11
-add $480,%r11
-sub %r11,%rsp
-
-# qhasm: r11_stack = r11_caller
-# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
-# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
-movq %r11,352(%rsp)
-
-# qhasm: r12_stack = r12_caller
-# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
-# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
-movq %r12,360(%rsp)
-
-# qhasm: r13_stack = r13_caller
-# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
-# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
-movq %r13,368(%rsp)
-
-# qhasm: r14_stack = r14_caller
-# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
-# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
-movq %r14,376(%rsp)
-
-# qhasm: r15_stack = r15_caller
-# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
-# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
-movq %r15,384(%rsp)
-
-# qhasm: rbx_stack = rbx_caller
-# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
-# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
-movq %rbx,392(%rsp)
-
-# qhasm: rbp_stack = rbp_caller
-# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
-# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
-movq %rbp,400(%rsp)
-
-# qhasm: bytes = arg2
-# asm 1: mov  <arg2=int64#2,>bytes=int64#6
-# asm 2: mov  <arg2=%rsi,>bytes=%r9
-mov  %rsi,%r9
-
-# qhasm: out = arg1
-# asm 1: mov  <arg1=int64#1,>out=int64#1
-# asm 2: mov  <arg1=%rdi,>out=%rdi
-mov  %rdi,%rdi
-
-# qhasm: m = out
-# asm 1: mov  <out=int64#1,>m=int64#2
-# asm 2: mov  <out=%rdi,>m=%rsi
-mov  %rdi,%rsi
-
-# qhasm: iv = arg3
-# asm 1: mov  <arg3=int64#3,>iv=int64#3
-# asm 2: mov  <arg3=%rdx,>iv=%rdx
-mov  %rdx,%rdx
-
-# qhasm: k = arg4
-# asm 1: mov  <arg4=int64#4,>k=int64#8
-# asm 2: mov  <arg4=%rcx,>k=%r10
-mov  %rcx,%r10
-
-# qhasm:               unsigned>? bytes - 0
-# asm 1: cmp  $0,<bytes=int64#6
-# asm 2: cmp  $0,<bytes=%r9
-cmp  $0,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm: goto done if !unsigned>
-jbe ._done
-
-# qhasm: a = 0
-# asm 1: mov  $0,>a=int64#7
-# asm 2: mov  $0,>a=%rax
-mov  $0,%rax
-
-# qhasm: i = bytes
-# asm 1: mov  <bytes=int64#6,>i=int64#4
-# asm 2: mov  <bytes=%r9,>i=%rcx
-mov  %r9,%rcx
-
-# qhasm: while (i) { *out++ = a; --i }
-rep stosb
-
-# qhasm: out -= bytes
-# asm 1: sub  <bytes=int64#6,<out=int64#1
-# asm 2: sub  <bytes=%r9,<out=%rdi
-sub  %r9,%rdi
-# comment:fp stack unchanged by jump
-
-# qhasm: goto start
-jmp ._start
-
-# qhasm: enter zt_salsa2012_amd64_xmm6_xor
-.text
-.p2align 5
-.globl _zt_salsa2012_amd64_xmm6_xor
-.globl zt_salsa2012_amd64_xmm6_xor
-_zt_salsa2012_amd64_xmm6_xor:
-zt_salsa2012_amd64_xmm6_xor:
-mov %rsp,%r11
-and $31,%r11
-add $480,%r11
-sub %r11,%rsp
-
-# qhasm: r11_stack = r11_caller
-# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
-# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
-movq %r11,352(%rsp)
-
-# qhasm: r12_stack = r12_caller
-# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
-# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
-movq %r12,360(%rsp)
-
-# qhasm: r13_stack = r13_caller
-# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
-# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
-movq %r13,368(%rsp)
-
-# qhasm: r14_stack = r14_caller
-# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
-# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
-movq %r14,376(%rsp)
-
-# qhasm: r15_stack = r15_caller
-# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
-# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
-movq %r15,384(%rsp)
-
-# qhasm: rbx_stack = rbx_caller
-# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
-# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
-movq %rbx,392(%rsp)
-
-# qhasm: rbp_stack = rbp_caller
-# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
-# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
-movq %rbp,400(%rsp)
-
-# qhasm: out = arg1
-# asm 1: mov  <arg1=int64#1,>out=int64#1
-# asm 2: mov  <arg1=%rdi,>out=%rdi
-mov  %rdi,%rdi
-
-# qhasm: m = arg2
-# asm 1: mov  <arg2=int64#2,>m=int64#2
-# asm 2: mov  <arg2=%rsi,>m=%rsi
-mov  %rsi,%rsi
-
-# qhasm: bytes = arg3
-# asm 1: mov  <arg3=int64#3,>bytes=int64#6
-# asm 2: mov  <arg3=%rdx,>bytes=%r9
-mov  %rdx,%r9
-
-# qhasm: iv = arg4
-# asm 1: mov  <arg4=int64#4,>iv=int64#3
-# asm 2: mov  <arg4=%rcx,>iv=%rdx
-mov  %rcx,%rdx
-
-# qhasm: k = arg5
-# asm 1: mov  <arg5=int64#5,>k=int64#8
-# asm 2: mov  <arg5=%r8,>k=%r10
-mov  %r8,%r10
-
-# qhasm:               unsigned>? bytes - 0
-# asm 1: cmp  $0,<bytes=int64#6
-# asm 2: cmp  $0,<bytes=%r9
-cmp  $0,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm: goto done if !unsigned>
-jbe ._done
-# comment:fp stack unchanged by fallthrough
-
-# qhasm: start:
-._start:
-
-# qhasm:   in12 = *(uint32 *) (k + 20)
-# asm 1: movl   20(<k=int64#8),>in12=int64#4d
-# asm 2: movl   20(<k=%r10),>in12=%ecx
-movl   20(%r10),%ecx
-
-# qhasm:   in1 = *(uint32 *) (k + 0)
-# asm 1: movl   0(<k=int64#8),>in1=int64#5d
-# asm 2: movl   0(<k=%r10),>in1=%r8d
-movl   0(%r10),%r8d
-
-# qhasm:   in6 = *(uint32 *) (iv + 0)
-# asm 1: movl   0(<iv=int64#3),>in6=int64#7d
-# asm 2: movl   0(<iv=%rdx),>in6=%eax
-movl   0(%rdx),%eax
-
-# qhasm:   in11 = *(uint32 *) (k + 16)
-# asm 1: movl   16(<k=int64#8),>in11=int64#9d
-# asm 2: movl   16(<k=%r10),>in11=%r11d
-movl   16(%r10),%r11d
-
-# qhasm:   ((uint32 *)&x1)[0] = in12
-# asm 1: movl <in12=int64#4d,>x1=stack128#1
-# asm 2: movl <in12=%ecx,>x1=0(%rsp)
-movl %ecx,0(%rsp)
-
-# qhasm:   ((uint32 *)&x1)[1] = in1
-# asm 1: movl <in1=int64#5d,4+<x1=stack128#1
-# asm 2: movl <in1=%r8d,4+<x1=0(%rsp)
-movl %r8d,4+0(%rsp)
-
-# qhasm:   ((uint32 *)&x1)[2] = in6
-# asm 1: movl <in6=int64#7d,8+<x1=stack128#1
-# asm 2: movl <in6=%eax,8+<x1=0(%rsp)
-movl %eax,8+0(%rsp)
-
-# qhasm:   ((uint32 *)&x1)[3] = in11
-# asm 1: movl <in11=int64#9d,12+<x1=stack128#1
-# asm 2: movl <in11=%r11d,12+<x1=0(%rsp)
-movl %r11d,12+0(%rsp)
-
-# qhasm:   in8 = 0
-# asm 1: mov  $0,>in8=int64#4
-# asm 2: mov  $0,>in8=%rcx
-mov  $0,%rcx
-
-# qhasm:   in13 = *(uint32 *) (k + 24)
-# asm 1: movl   24(<k=int64#8),>in13=int64#5d
-# asm 2: movl   24(<k=%r10),>in13=%r8d
-movl   24(%r10),%r8d
-
-# qhasm:   in2 = *(uint32 *) (k + 4)
-# asm 1: movl   4(<k=int64#8),>in2=int64#7d
-# asm 2: movl   4(<k=%r10),>in2=%eax
-movl   4(%r10),%eax
-
-# qhasm:   in7 = *(uint32 *) (iv + 4)
-# asm 1: movl   4(<iv=int64#3),>in7=int64#3d
-# asm 2: movl   4(<iv=%rdx),>in7=%edx
-movl   4(%rdx),%edx
-
-# qhasm:   ((uint32 *)&x2)[0] = in8
-# asm 1: movl <in8=int64#4d,>x2=stack128#2
-# asm 2: movl <in8=%ecx,>x2=16(%rsp)
-movl %ecx,16(%rsp)
-
-# qhasm:   ((uint32 *)&x2)[1] = in13
-# asm 1: movl <in13=int64#5d,4+<x2=stack128#2
-# asm 2: movl <in13=%r8d,4+<x2=16(%rsp)
-movl %r8d,4+16(%rsp)
-
-# qhasm:   ((uint32 *)&x2)[2] = in2
-# asm 1: movl <in2=int64#7d,8+<x2=stack128#2
-# asm 2: movl <in2=%eax,8+<x2=16(%rsp)
-movl %eax,8+16(%rsp)
-
-# qhasm:   ((uint32 *)&x2)[3] = in7
-# asm 1: movl <in7=int64#3d,12+<x2=stack128#2
-# asm 2: movl <in7=%edx,12+<x2=16(%rsp)
-movl %edx,12+16(%rsp)
-
-# qhasm:   in4 = *(uint32 *) (k + 12)
-# asm 1: movl   12(<k=int64#8),>in4=int64#3d
-# asm 2: movl   12(<k=%r10),>in4=%edx
-movl   12(%r10),%edx
-
-# qhasm:   in9 = 0
-# asm 1: mov  $0,>in9=int64#4
-# asm 2: mov  $0,>in9=%rcx
-mov  $0,%rcx
-
-# qhasm:   in14 = *(uint32 *) (k + 28)
-# asm 1: movl   28(<k=int64#8),>in14=int64#5d
-# asm 2: movl   28(<k=%r10),>in14=%r8d
-movl   28(%r10),%r8d
-
-# qhasm:   in3 = *(uint32 *) (k + 8)
-# asm 1: movl   8(<k=int64#8),>in3=int64#7d
-# asm 2: movl   8(<k=%r10),>in3=%eax
-movl   8(%r10),%eax
-
-# qhasm:   ((uint32 *)&x3)[0] = in4
-# asm 1: movl <in4=int64#3d,>x3=stack128#3
-# asm 2: movl <in4=%edx,>x3=32(%rsp)
-movl %edx,32(%rsp)
-
-# qhasm:   ((uint32 *)&x3)[1] = in9
-# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
-# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
-movl %ecx,4+32(%rsp)
-
-# qhasm:   ((uint32 *)&x3)[2] = in14
-# asm 1: movl <in14=int64#5d,8+<x3=stack128#3
-# asm 2: movl <in14=%r8d,8+<x3=32(%rsp)
-movl %r8d,8+32(%rsp)
-
-# qhasm:   ((uint32 *)&x3)[3] = in3
-# asm 1: movl <in3=int64#7d,12+<x3=stack128#3
-# asm 2: movl <in3=%eax,12+<x3=32(%rsp)
-movl %eax,12+32(%rsp)
-
-# qhasm:   in0 = 1634760805
-# asm 1: mov  $1634760805,>in0=int64#3
-# asm 2: mov  $1634760805,>in0=%rdx
-mov  $1634760805,%rdx
-
-# qhasm:   in5 = 857760878
-# asm 1: mov  $857760878,>in5=int64#4
-# asm 2: mov  $857760878,>in5=%rcx
-mov  $857760878,%rcx
-
-# qhasm:   in10 = 2036477234
-# asm 1: mov  $2036477234,>in10=int64#5
-# asm 2: mov  $2036477234,>in10=%r8
-mov  $2036477234,%r8
-
-# qhasm:   in15 = 1797285236
-# asm 1: mov  $1797285236,>in15=int64#7
-# asm 2: mov  $1797285236,>in15=%rax
-mov  $1797285236,%rax
-
-# qhasm:   ((uint32 *)&x0)[0] = in0
-# asm 1: movl <in0=int64#3d,>x0=stack128#4
-# asm 2: movl <in0=%edx,>x0=48(%rsp)
-movl %edx,48(%rsp)
-
-# qhasm:   ((uint32 *)&x0)[1] = in5
-# asm 1: movl <in5=int64#4d,4+<x0=stack128#4
-# asm 2: movl <in5=%ecx,4+<x0=48(%rsp)
-movl %ecx,4+48(%rsp)
-
-# qhasm:   ((uint32 *)&x0)[2] = in10
-# asm 1: movl <in10=int64#5d,8+<x0=stack128#4
-# asm 2: movl <in10=%r8d,8+<x0=48(%rsp)
-movl %r8d,8+48(%rsp)
-
-# qhasm:   ((uint32 *)&x0)[3] = in15
-# asm 1: movl <in15=int64#7d,12+<x0=stack128#4
-# asm 2: movl <in15=%eax,12+<x0=48(%rsp)
-movl %eax,12+48(%rsp)
-
-# qhasm:                               unsigned<? bytes - 256
-# asm 1: cmp  $256,<bytes=int64#6
-# asm 2: cmp  $256,<bytes=%r9
-cmp  $256,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm:   goto bytesbetween1and255 if unsigned<
-jb ._bytesbetween1and255
-
-# qhasm:   z0 = x0
-# asm 1: movdqa <x0=stack128#4,>z0=int6464#1
-# asm 2: movdqa <x0=48(%rsp),>z0=%xmm0
-movdqa 48(%rsp),%xmm0
-
-# qhasm:   z5 = z0[1,1,1,1]
-# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
-# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
-pshufd $0x55,%xmm0,%xmm1
-
-# qhasm:   z10 = z0[2,2,2,2]
-# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
-# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
-pshufd $0xaa,%xmm0,%xmm2
-
-# qhasm:   z15 = z0[3,3,3,3]
-# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
-# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
-pshufd $0xff,%xmm0,%xmm3
-
-# qhasm:   z0 = z0[0,0,0,0]
-# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
-# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
-pshufd $0x00,%xmm0,%xmm0
-
-# qhasm:   orig5 = z5
-# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
-# asm 2: movdqa <z5=%xmm1,>orig5=64(%rsp)
-movdqa %xmm1,64(%rsp)
-
-# qhasm:   orig10 = z10
-# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
-# asm 2: movdqa <z10=%xmm2,>orig10=80(%rsp)
-movdqa %xmm2,80(%rsp)
-
-# qhasm:   orig15 = z15
-# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
-# asm 2: movdqa <z15=%xmm3,>orig15=96(%rsp)
-movdqa %xmm3,96(%rsp)
-
-# qhasm:   orig0 = z0
-# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
-# asm 2: movdqa <z0=%xmm0,>orig0=112(%rsp)
-movdqa %xmm0,112(%rsp)
-
-# qhasm:   z1 = x1
-# asm 1: movdqa <x1=stack128#1,>z1=int6464#1
-# asm 2: movdqa <x1=0(%rsp),>z1=%xmm0
-movdqa 0(%rsp),%xmm0
-
-# qhasm:   z6 = z1[2,2,2,2]
-# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
-# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
-pshufd $0xaa,%xmm0,%xmm1
-
-# qhasm:   z11 = z1[3,3,3,3]
-# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
-# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
-pshufd $0xff,%xmm0,%xmm2
-
-# qhasm:   z12 = z1[0,0,0,0]
-# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
-# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
-pshufd $0x00,%xmm0,%xmm3
-
-# qhasm:   z1 = z1[1,1,1,1]
-# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
-# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
-pshufd $0x55,%xmm0,%xmm0
-
-# qhasm:   orig6 = z6
-# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
-# asm 2: movdqa <z6=%xmm1,>orig6=128(%rsp)
-movdqa %xmm1,128(%rsp)
-
-# qhasm:   orig11 = z11
-# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
-# asm 2: movdqa <z11=%xmm2,>orig11=144(%rsp)
-movdqa %xmm2,144(%rsp)
-
-# qhasm:   orig12 = z12
-# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
-# asm 2: movdqa <z12=%xmm3,>orig12=160(%rsp)
-movdqa %xmm3,160(%rsp)
-
-# qhasm:   orig1 = z1
-# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
-# asm 2: movdqa <z1=%xmm0,>orig1=176(%rsp)
-movdqa %xmm0,176(%rsp)
-
-# qhasm:   z2 = x2
-# asm 1: movdqa <x2=stack128#2,>z2=int6464#1
-# asm 2: movdqa <x2=16(%rsp),>z2=%xmm0
-movdqa 16(%rsp),%xmm0
-
-# qhasm:   z7 = z2[3,3,3,3]
-# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
-# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
-pshufd $0xff,%xmm0,%xmm1
-
-# qhasm:   z13 = z2[1,1,1,1]
-# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
-# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
-pshufd $0x55,%xmm0,%xmm2
-
-# qhasm:   z2 = z2[2,2,2,2]
-# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
-# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
-pshufd $0xaa,%xmm0,%xmm0
-
-# qhasm:   orig7 = z7
-# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
-# asm 2: movdqa <z7=%xmm1,>orig7=192(%rsp)
-movdqa %xmm1,192(%rsp)
-
-# qhasm:   orig13 = z13
-# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
-# asm 2: movdqa <z13=%xmm2,>orig13=208(%rsp)
-movdqa %xmm2,208(%rsp)
-
-# qhasm:   orig2 = z2
-# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
-# asm 2: movdqa <z2=%xmm0,>orig2=224(%rsp)
-movdqa %xmm0,224(%rsp)
-
-# qhasm:   z3 = x3
-# asm 1: movdqa <x3=stack128#3,>z3=int6464#1
-# asm 2: movdqa <x3=32(%rsp),>z3=%xmm0
-movdqa 32(%rsp),%xmm0
-
-# qhasm:   z4 = z3[0,0,0,0]
-# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
-# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
-pshufd $0x00,%xmm0,%xmm1
-
-# qhasm:   z14 = z3[2,2,2,2]
-# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
-# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
-pshufd $0xaa,%xmm0,%xmm2
-
-# qhasm:   z3 = z3[3,3,3,3]
-# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
-# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
-pshufd $0xff,%xmm0,%xmm0
-
-# qhasm:   orig4 = z4
-# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
-# asm 2: movdqa <z4=%xmm1,>orig4=240(%rsp)
-movdqa %xmm1,240(%rsp)
-
-# qhasm:   orig14 = z14
-# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
-# asm 2: movdqa <z14=%xmm2,>orig14=256(%rsp)
-movdqa %xmm2,256(%rsp)
-
-# qhasm:   orig3 = z3
-# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
-# asm 2: movdqa <z3=%xmm0,>orig3=272(%rsp)
-movdqa %xmm0,272(%rsp)
-
-# qhasm: bytesatleast256:
-._bytesatleast256:
-
-# qhasm:   in8 = ((uint32 *)&x2)[0]
-# asm 1: movl <x2=stack128#2,>in8=int64#3d
-# asm 2: movl <x2=16(%rsp),>in8=%edx
-movl 16(%rsp),%edx
-
-# qhasm:   in9 = ((uint32 *)&x3)[1]
-# asm 1: movl 4+<x3=stack128#3,>in9=int64#4d
-# asm 2: movl 4+<x3=32(%rsp),>in9=%ecx
-movl 4+32(%rsp),%ecx
-
-# qhasm:   ((uint32 *) &orig8)[0] = in8
-# asm 1: movl <in8=int64#3d,>orig8=stack128#19
-# asm 2: movl <in8=%edx,>orig8=288(%rsp)
-movl %edx,288(%rsp)
-
-# qhasm:   ((uint32 *) &orig9)[0] = in9
-# asm 1: movl <in9=int64#4d,>orig9=stack128#20
-# asm 2: movl <in9=%ecx,>orig9=304(%rsp)
-movl %ecx,304(%rsp)
-
-# qhasm:   in8 += 1
-# asm 1: add  $1,<in8=int64#3
-# asm 2: add  $1,<in8=%rdx
-add  $1,%rdx
-
-# qhasm:   in9 <<= 32
-# asm 1: shl  $32,<in9=int64#4
-# asm 2: shl  $32,<in9=%rcx
-shl  $32,%rcx
-
-# qhasm:   in8 += in9
-# asm 1: add  <in9=int64#4,<in8=int64#3
-# asm 2: add  <in9=%rcx,<in8=%rdx
-add  %rcx,%rdx
-
-# qhasm:   in9 = in8
-# asm 1: mov  <in8=int64#3,>in9=int64#4
-# asm 2: mov  <in8=%rdx,>in9=%rcx
-mov  %rdx,%rcx
-
-# qhasm:   (uint64) in9 >>= 32
-# asm 1: shr  $32,<in9=int64#4
-# asm 2: shr  $32,<in9=%rcx
-shr  $32,%rcx
-
-# qhasm:   ((uint32 *) &orig8)[1] = in8
-# asm 1: movl <in8=int64#3d,4+<orig8=stack128#19
-# asm 2: movl <in8=%edx,4+<orig8=288(%rsp)
-movl %edx,4+288(%rsp)
-
-# qhasm:   ((uint32 *) &orig9)[1] = in9
-# asm 1: movl <in9=int64#4d,4+<orig9=stack128#20
-# asm 2: movl <in9=%ecx,4+<orig9=304(%rsp)
-movl %ecx,4+304(%rsp)
-
-# qhasm:   in8 += 1
-# asm 1: add  $1,<in8=int64#3
-# asm 2: add  $1,<in8=%rdx
-add  $1,%rdx
-
-# qhasm:   in9 <<= 32
-# asm 1: shl  $32,<in9=int64#4
-# asm 2: shl  $32,<in9=%rcx
-shl  $32,%rcx
-
-# qhasm:   in8 += in9
-# asm 1: add  <in9=int64#4,<in8=int64#3
-# asm 2: add  <in9=%rcx,<in8=%rdx
-add  %rcx,%rdx
-
-# qhasm:   in9 = in8
-# asm 1: mov  <in8=int64#3,>in9=int64#4
-# asm 2: mov  <in8=%rdx,>in9=%rcx
-mov  %rdx,%rcx
-
-# qhasm:   (uint64) in9 >>= 32
-# asm 1: shr  $32,<in9=int64#4
-# asm 2: shr  $32,<in9=%rcx
-shr  $32,%rcx
-
-# qhasm:   ((uint32 *) &orig8)[2] = in8
-# asm 1: movl <in8=int64#3d,8+<orig8=stack128#19
-# asm 2: movl <in8=%edx,8+<orig8=288(%rsp)
-movl %edx,8+288(%rsp)
-
-# qhasm:   ((uint32 *) &orig9)[2] = in9
-# asm 1: movl <in9=int64#4d,8+<orig9=stack128#20
-# asm 2: movl <in9=%ecx,8+<orig9=304(%rsp)
-movl %ecx,8+304(%rsp)
-
-# qhasm:   in8 += 1
-# asm 1: add  $1,<in8=int64#3
-# asm 2: add  $1,<in8=%rdx
-add  $1,%rdx
-
-# qhasm:   in9 <<= 32
-# asm 1: shl  $32,<in9=int64#4
-# asm 2: shl  $32,<in9=%rcx
-shl  $32,%rcx
-
-# qhasm:   in8 += in9
-# asm 1: add  <in9=int64#4,<in8=int64#3
-# asm 2: add  <in9=%rcx,<in8=%rdx
-add  %rcx,%rdx
-
-# qhasm:   in9 = in8
-# asm 1: mov  <in8=int64#3,>in9=int64#4
-# asm 2: mov  <in8=%rdx,>in9=%rcx
-mov  %rdx,%rcx
-
-# qhasm:   (uint64) in9 >>= 32
-# asm 1: shr  $32,<in9=int64#4
-# asm 2: shr  $32,<in9=%rcx
-shr  $32,%rcx
-
-# qhasm:   ((uint32 *) &orig8)[3] = in8
-# asm 1: movl <in8=int64#3d,12+<orig8=stack128#19
-# asm 2: movl <in8=%edx,12+<orig8=288(%rsp)
-movl %edx,12+288(%rsp)
-
-# qhasm:   ((uint32 *) &orig9)[3] = in9
-# asm 1: movl <in9=int64#4d,12+<orig9=stack128#20
-# asm 2: movl <in9=%ecx,12+<orig9=304(%rsp)
-movl %ecx,12+304(%rsp)
-
-# qhasm:   in8 += 1
-# asm 1: add  $1,<in8=int64#3
-# asm 2: add  $1,<in8=%rdx
-add  $1,%rdx
-
-# qhasm:   in9 <<= 32
-# asm 1: shl  $32,<in9=int64#4
-# asm 2: shl  $32,<in9=%rcx
-shl  $32,%rcx
-
-# qhasm:   in8 += in9
-# asm 1: add  <in9=int64#4,<in8=int64#3
-# asm 2: add  <in9=%rcx,<in8=%rdx
-add  %rcx,%rdx
-
-# qhasm:   in9 = in8
-# asm 1: mov  <in8=int64#3,>in9=int64#4
-# asm 2: mov  <in8=%rdx,>in9=%rcx
-mov  %rdx,%rcx
-
-# qhasm:   (uint64) in9 >>= 32
-# asm 1: shr  $32,<in9=int64#4
-# asm 2: shr  $32,<in9=%rcx
-shr  $32,%rcx
-
-# qhasm:   ((uint32 *)&x2)[0] = in8
-# asm 1: movl <in8=int64#3d,>x2=stack128#2
-# asm 2: movl <in8=%edx,>x2=16(%rsp)
-movl %edx,16(%rsp)
-
-# qhasm:   ((uint32 *)&x3)[1] = in9
-# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
-# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
-movl %ecx,4+32(%rsp)
-
-# qhasm:   bytes_backup = bytes
-# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
-# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
-movq %r9,408(%rsp)
-
-# qhasm: i = 12
-# asm 1: mov  $12,>i=int64#3
-# asm 2: mov  $12,>i=%rdx
-mov  $12,%rdx
-
-# qhasm:   z5 = orig5
-# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
-# asm 2: movdqa <orig5=64(%rsp),>z5=%xmm0
-movdqa 64(%rsp),%xmm0
-
-# qhasm:   z10 = orig10
-# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
-# asm 2: movdqa <orig10=80(%rsp),>z10=%xmm1
-movdqa 80(%rsp),%xmm1
-
-# qhasm:   z15 = orig15
-# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
-# asm 2: movdqa <orig15=96(%rsp),>z15=%xmm2
-movdqa 96(%rsp),%xmm2
-
-# qhasm:   z14 = orig14
-# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
-# asm 2: movdqa <orig14=256(%rsp),>z14=%xmm3
-movdqa 256(%rsp),%xmm3
-
-# qhasm:   z3 = orig3
-# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
-# asm 2: movdqa <orig3=272(%rsp),>z3=%xmm4
-movdqa 272(%rsp),%xmm4
-
-# qhasm:   z6 = orig6
-# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
-# asm 2: movdqa <orig6=128(%rsp),>z6=%xmm5
-movdqa 128(%rsp),%xmm5
-
-# qhasm:   z11 = orig11
-# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
-# asm 2: movdqa <orig11=144(%rsp),>z11=%xmm6
-movdqa 144(%rsp),%xmm6
-
-# qhasm:   z1 = orig1
-# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
-# asm 2: movdqa <orig1=176(%rsp),>z1=%xmm7
-movdqa 176(%rsp),%xmm7
-
-# qhasm:   z7 = orig7
-# asm 1: movdqa <orig7=stack128#13,>z7=int6464#9
-# asm 2: movdqa <orig7=192(%rsp),>z7=%xmm8
-movdqa 192(%rsp),%xmm8
-
-# qhasm:   z13 = orig13
-# asm 1: movdqa <orig13=stack128#14,>z13=int6464#10
-# asm 2: movdqa <orig13=208(%rsp),>z13=%xmm9
-movdqa 208(%rsp),%xmm9
-
-# qhasm:   z2 = orig2
-# asm 1: movdqa <orig2=stack128#15,>z2=int6464#11
-# asm 2: movdqa <orig2=224(%rsp),>z2=%xmm10
-movdqa 224(%rsp),%xmm10
-
-# qhasm:   z9 = orig9
-# asm 1: movdqa <orig9=stack128#20,>z9=int6464#12
-# asm 2: movdqa <orig9=304(%rsp),>z9=%xmm11
-movdqa 304(%rsp),%xmm11
-
-# qhasm:   z0 = orig0
-# asm 1: movdqa <orig0=stack128#8,>z0=int6464#13
-# asm 2: movdqa <orig0=112(%rsp),>z0=%xmm12
-movdqa 112(%rsp),%xmm12
-
-# qhasm:   z12 = orig12
-# asm 1: movdqa <orig12=stack128#11,>z12=int6464#14
-# asm 2: movdqa <orig12=160(%rsp),>z12=%xmm13
-movdqa 160(%rsp),%xmm13
-
-# qhasm:   z4 = orig4
-# asm 1: movdqa <orig4=stack128#16,>z4=int6464#15
-# asm 2: movdqa <orig4=240(%rsp),>z4=%xmm14
-movdqa 240(%rsp),%xmm14
-
-# qhasm:   z8 = orig8
-# asm 1: movdqa <orig8=stack128#19,>z8=int6464#16
-# asm 2: movdqa <orig8=288(%rsp),>z8=%xmm15
-movdqa 288(%rsp),%xmm15
-
-# qhasm: mainloop1:
-._mainloop1:
-
-# qhasm: 						z10_stack = z10
-# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
-# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
-movdqa %xmm1,320(%rsp)
-
-# qhasm: 								z15_stack = z15
-# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
-# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
-movdqa %xmm2,336(%rsp)
-
-# qhasm: 		y4 = z12
-# asm 1: movdqa <z12=int6464#14,>y4=int6464#2
-# asm 2: movdqa <z12=%xmm13,>y4=%xmm1
-movdqa %xmm13,%xmm1
-
-# qhasm: uint32323232	y4 += z0
-# asm 1: paddd <z0=int6464#13,<y4=int6464#2
-# asm 2: paddd <z0=%xmm12,<y4=%xmm1
-paddd %xmm12,%xmm1
-
-# qhasm: 		r4 = y4
-# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
-# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y4 <<= 7
-# asm 1: pslld $7,<y4=int6464#2
-# asm 2: pslld $7,<y4=%xmm1
-pslld $7,%xmm1
-
-# qhasm: 		z4 ^= y4
-# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
-# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
-pxor  %xmm1,%xmm14
-
-# qhasm: uint32323232	r4 >>= 25
-# asm 1: psrld $25,<r4=int6464#3
-# asm 2: psrld $25,<r4=%xmm2
-psrld $25,%xmm2
-
-# qhasm: 		z4 ^= r4
-# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
-# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
-pxor  %xmm2,%xmm14
-
-# qhasm: 				y9 = z1
-# asm 1: movdqa <z1=int6464#8,>y9=int6464#2
-# asm 2: movdqa <z1=%xmm7,>y9=%xmm1
-movdqa %xmm7,%xmm1
-
-# qhasm: uint32323232			y9 += z5
-# asm 1: paddd <z5=int6464#1,<y9=int6464#2
-# asm 2: paddd <z5=%xmm0,<y9=%xmm1
-paddd %xmm0,%xmm1
-
-# qhasm: 				r9 = y9
-# asm 1: movdqa <y9=int6464#2,>r9=int6464#3
-# asm 2: movdqa <y9=%xmm1,>r9=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y9 <<= 7
-# asm 1: pslld $7,<y9=int6464#2
-# asm 2: pslld $7,<y9=%xmm1
-pslld $7,%xmm1
-
-# qhasm: 				z9 ^= y9
-# asm 1: pxor  <y9=int6464#2,<z9=int6464#12
-# asm 2: pxor  <y9=%xmm1,<z9=%xmm11
-pxor  %xmm1,%xmm11
-
-# qhasm: uint32323232			r9 >>= 25
-# asm 1: psrld $25,<r9=int6464#3
-# asm 2: psrld $25,<r9=%xmm2
-psrld $25,%xmm2
-
-# qhasm: 				z9 ^= r9
-# asm 1: pxor  <r9=int6464#3,<z9=int6464#12
-# asm 2: pxor  <r9=%xmm2,<z9=%xmm11
-pxor  %xmm2,%xmm11
-
-# qhasm: 		y8 = z0
-# asm 1: movdqa <z0=int6464#13,>y8=int6464#2
-# asm 2: movdqa <z0=%xmm12,>y8=%xmm1
-movdqa %xmm12,%xmm1
-
-# qhasm: uint32323232	y8 += z4
-# asm 1: paddd <z4=int6464#15,<y8=int6464#2
-# asm 2: paddd <z4=%xmm14,<y8=%xmm1
-paddd %xmm14,%xmm1
-
-# qhasm: 		r8 = y8
-# asm 1: movdqa <y8=int6464#2,>r8=int6464#3
-# asm 2: movdqa <y8=%xmm1,>r8=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y8 <<= 9
-# asm 1: pslld $9,<y8=int6464#2
-# asm 2: pslld $9,<y8=%xmm1
-pslld $9,%xmm1
-
-# qhasm: 		z8 ^= y8
-# asm 1: pxor  <y8=int6464#2,<z8=int6464#16
-# asm 2: pxor  <y8=%xmm1,<z8=%xmm15
-pxor  %xmm1,%xmm15
-
-# qhasm: uint32323232	r8 >>= 23
-# asm 1: psrld $23,<r8=int6464#3
-# asm 2: psrld $23,<r8=%xmm2
-psrld $23,%xmm2
-
-# qhasm: 		z8 ^= r8
-# asm 1: pxor  <r8=int6464#3,<z8=int6464#16
-# asm 2: pxor  <r8=%xmm2,<z8=%xmm15
-pxor  %xmm2,%xmm15
-
-# qhasm: 				y13 = z5
-# asm 1: movdqa <z5=int6464#1,>y13=int6464#2
-# asm 2: movdqa <z5=%xmm0,>y13=%xmm1
-movdqa %xmm0,%xmm1
-
-# qhasm: uint32323232			y13 += z9
-# asm 1: paddd <z9=int6464#12,<y13=int6464#2
-# asm 2: paddd <z9=%xmm11,<y13=%xmm1
-paddd %xmm11,%xmm1
-
-# qhasm: 				r13 = y13
-# asm 1: movdqa <y13=int6464#2,>r13=int6464#3
-# asm 2: movdqa <y13=%xmm1,>r13=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y13 <<= 9
-# asm 1: pslld $9,<y13=int6464#2
-# asm 2: pslld $9,<y13=%xmm1
-pslld $9,%xmm1
-
-# qhasm: 				z13 ^= y13
-# asm 1: pxor  <y13=int6464#2,<z13=int6464#10
-# asm 2: pxor  <y13=%xmm1,<z13=%xmm9
-pxor  %xmm1,%xmm9
-
-# qhasm: uint32323232			r13 >>= 23
-# asm 1: psrld $23,<r13=int6464#3
-# asm 2: psrld $23,<r13=%xmm2
-psrld $23,%xmm2
-
-# qhasm: 				z13 ^= r13
-# asm 1: pxor  <r13=int6464#3,<z13=int6464#10
-# asm 2: pxor  <r13=%xmm2,<z13=%xmm9
-pxor  %xmm2,%xmm9
-
-# qhasm: 		y12 = z4
-# asm 1: movdqa <z4=int6464#15,>y12=int6464#2
-# asm 2: movdqa <z4=%xmm14,>y12=%xmm1
-movdqa %xmm14,%xmm1
-
-# qhasm: uint32323232	y12 += z8
-# asm 1: paddd <z8=int6464#16,<y12=int6464#2
-# asm 2: paddd <z8=%xmm15,<y12=%xmm1
-paddd %xmm15,%xmm1
-
-# qhasm: 		r12 = y12
-# asm 1: movdqa <y12=int6464#2,>r12=int6464#3
-# asm 2: movdqa <y12=%xmm1,>r12=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y12 <<= 13
-# asm 1: pslld $13,<y12=int6464#2
-# asm 2: pslld $13,<y12=%xmm1
-pslld $13,%xmm1
-
-# qhasm: 		z12 ^= y12
-# asm 1: pxor  <y12=int6464#2,<z12=int6464#14
-# asm 2: pxor  <y12=%xmm1,<z12=%xmm13
-pxor  %xmm1,%xmm13
-
-# qhasm: uint32323232	r12 >>= 19
-# asm 1: psrld $19,<r12=int6464#3
-# asm 2: psrld $19,<r12=%xmm2
-psrld $19,%xmm2
-
-# qhasm: 		z12 ^= r12
-# asm 1: pxor  <r12=int6464#3,<z12=int6464#14
-# asm 2: pxor  <r12=%xmm2,<z12=%xmm13
-pxor  %xmm2,%xmm13
-
-# qhasm: 				y1 = z9
-# asm 1: movdqa <z9=int6464#12,>y1=int6464#2
-# asm 2: movdqa <z9=%xmm11,>y1=%xmm1
-movdqa %xmm11,%xmm1
-
-# qhasm: uint32323232			y1 += z13
-# asm 1: paddd <z13=int6464#10,<y1=int6464#2
-# asm 2: paddd <z13=%xmm9,<y1=%xmm1
-paddd %xmm9,%xmm1
-
-# qhasm: 				r1 = y1
-# asm 1: movdqa <y1=int6464#2,>r1=int6464#3
-# asm 2: movdqa <y1=%xmm1,>r1=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y1 <<= 13
-# asm 1: pslld $13,<y1=int6464#2
-# asm 2: pslld $13,<y1=%xmm1
-pslld $13,%xmm1
-
-# qhasm: 				z1 ^= y1
-# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
-# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
-pxor  %xmm1,%xmm7
-
-# qhasm: uint32323232			r1 >>= 19
-# asm 1: psrld $19,<r1=int6464#3
-# asm 2: psrld $19,<r1=%xmm2
-psrld $19,%xmm2
-
-# qhasm: 				z1 ^= r1
-# asm 1: pxor  <r1=int6464#3,<z1=int6464#8
-# asm 2: pxor  <r1=%xmm2,<z1=%xmm7
-pxor  %xmm2,%xmm7
-
-# qhasm: 		y0 = z8
-# asm 1: movdqa <z8=int6464#16,>y0=int6464#2
-# asm 2: movdqa <z8=%xmm15,>y0=%xmm1
-movdqa %xmm15,%xmm1
-
-# qhasm: uint32323232	y0 += z12
-# asm 1: paddd <z12=int6464#14,<y0=int6464#2
-# asm 2: paddd <z12=%xmm13,<y0=%xmm1
-paddd %xmm13,%xmm1
-
-# qhasm: 		r0 = y0
-# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
-# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y0 <<= 18
-# asm 1: pslld $18,<y0=int6464#2
-# asm 2: pslld $18,<y0=%xmm1
-pslld $18,%xmm1
-
-# qhasm: 		z0 ^= y0
-# asm 1: pxor  <y0=int6464#2,<z0=int6464#13
-# asm 2: pxor  <y0=%xmm1,<z0=%xmm12
-pxor  %xmm1,%xmm12
-
-# qhasm: uint32323232	r0 >>= 14
-# asm 1: psrld $14,<r0=int6464#3
-# asm 2: psrld $14,<r0=%xmm2
-psrld $14,%xmm2
-
-# qhasm: 		z0 ^= r0
-# asm 1: pxor  <r0=int6464#3,<z0=int6464#13
-# asm 2: pxor  <r0=%xmm2,<z0=%xmm12
-pxor  %xmm2,%xmm12
-
-# qhasm: 						z10 = z10_stack
-# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
-# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
-movdqa 320(%rsp),%xmm1
-
-# qhasm: 		z0_stack = z0
-# asm 1: movdqa <z0=int6464#13,>z0_stack=stack128#21
-# asm 2: movdqa <z0=%xmm12,>z0_stack=320(%rsp)
-movdqa %xmm12,320(%rsp)
-
-# qhasm: 				y5 = z13
-# asm 1: movdqa <z13=int6464#10,>y5=int6464#3
-# asm 2: movdqa <z13=%xmm9,>y5=%xmm2
-movdqa %xmm9,%xmm2
-
-# qhasm: uint32323232			y5 += z1
-# asm 1: paddd <z1=int6464#8,<y5=int6464#3
-# asm 2: paddd <z1=%xmm7,<y5=%xmm2
-paddd %xmm7,%xmm2
-
-# qhasm: 				r5 = y5
-# asm 1: movdqa <y5=int6464#3,>r5=int6464#13
-# asm 2: movdqa <y5=%xmm2,>r5=%xmm12
-movdqa %xmm2,%xmm12
-
-# qhasm: uint32323232			y5 <<= 18
-# asm 1: pslld $18,<y5=int6464#3
-# asm 2: pslld $18,<y5=%xmm2
-pslld $18,%xmm2
-
-# qhasm: 				z5 ^= y5
-# asm 1: pxor  <y5=int6464#3,<z5=int6464#1
-# asm 2: pxor  <y5=%xmm2,<z5=%xmm0
-pxor  %xmm2,%xmm0
-
-# qhasm: uint32323232			r5 >>= 14
-# asm 1: psrld $14,<r5=int6464#13
-# asm 2: psrld $14,<r5=%xmm12
-psrld $14,%xmm12
-
-# qhasm: 				z5 ^= r5
-# asm 1: pxor  <r5=int6464#13,<z5=int6464#1
-# asm 2: pxor  <r5=%xmm12,<z5=%xmm0
-pxor  %xmm12,%xmm0
-
-# qhasm: 						y14 = z6
-# asm 1: movdqa <z6=int6464#6,>y14=int6464#3
-# asm 2: movdqa <z6=%xmm5,>y14=%xmm2
-movdqa %xmm5,%xmm2
-
-# qhasm: uint32323232					y14 += z10
-# asm 1: paddd <z10=int6464#2,<y14=int6464#3
-# asm 2: paddd <z10=%xmm1,<y14=%xmm2
-paddd %xmm1,%xmm2
-
-# qhasm: 						r14 = y14
-# asm 1: movdqa <y14=int6464#3,>r14=int6464#13
-# asm 2: movdqa <y14=%xmm2,>r14=%xmm12
-movdqa %xmm2,%xmm12
-
-# qhasm: uint32323232					y14 <<= 7
-# asm 1: pslld $7,<y14=int6464#3
-# asm 2: pslld $7,<y14=%xmm2
-pslld $7,%xmm2
-
-# qhasm: 						z14 ^= y14
-# asm 1: pxor  <y14=int6464#3,<z14=int6464#4
-# asm 2: pxor  <y14=%xmm2,<z14=%xmm3
-pxor  %xmm2,%xmm3
-
-# qhasm: uint32323232					r14 >>= 25
-# asm 1: psrld $25,<r14=int6464#13
-# asm 2: psrld $25,<r14=%xmm12
-psrld $25,%xmm12
-
-# qhasm: 						z14 ^= r14
-# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
-# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
-pxor  %xmm12,%xmm3
-
-# qhasm: 								z15 = z15_stack
-# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
-# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
-movdqa 336(%rsp),%xmm2
-
-# qhasm: 				z5_stack = z5
-# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#22
-# asm 2: movdqa <z5=%xmm0,>z5_stack=336(%rsp)
-movdqa %xmm0,336(%rsp)
-
-# qhasm: 								y3 = z11
-# asm 1: movdqa <z11=int6464#7,>y3=int6464#1
-# asm 2: movdqa <z11=%xmm6,>y3=%xmm0
-movdqa %xmm6,%xmm0
-
-# qhasm: uint32323232							y3 += z15
-# asm 1: paddd <z15=int6464#3,<y3=int6464#1
-# asm 2: paddd <z15=%xmm2,<y3=%xmm0
-paddd %xmm2,%xmm0
-
-# qhasm: 								r3 = y3
-# asm 1: movdqa <y3=int6464#1,>r3=int6464#13
-# asm 2: movdqa <y3=%xmm0,>r3=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y3 <<= 7
-# asm 1: pslld $7,<y3=int6464#1
-# asm 2: pslld $7,<y3=%xmm0
-pslld $7,%xmm0
-
-# qhasm: 								z3 ^= y3
-# asm 1: pxor  <y3=int6464#1,<z3=int6464#5
-# asm 2: pxor  <y3=%xmm0,<z3=%xmm4
-pxor  %xmm0,%xmm4
-
-# qhasm: uint32323232							r3 >>= 25
-# asm 1: psrld $25,<r3=int6464#13
-# asm 2: psrld $25,<r3=%xmm12
-psrld $25,%xmm12
-
-# qhasm: 								z3 ^= r3
-# asm 1: pxor  <r3=int6464#13,<z3=int6464#5
-# asm 2: pxor  <r3=%xmm12,<z3=%xmm4
-pxor  %xmm12,%xmm4
-
-# qhasm: 						y2 = z10
-# asm 1: movdqa <z10=int6464#2,>y2=int6464#1
-# asm 2: movdqa <z10=%xmm1,>y2=%xmm0
-movdqa %xmm1,%xmm0
-
-# qhasm: uint32323232					y2 += z14
-# asm 1: paddd <z14=int6464#4,<y2=int6464#1
-# asm 2: paddd <z14=%xmm3,<y2=%xmm0
-paddd %xmm3,%xmm0
-
-# qhasm: 						r2 = y2
-# asm 1: movdqa <y2=int6464#1,>r2=int6464#13
-# asm 2: movdqa <y2=%xmm0,>r2=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y2 <<= 9
-# asm 1: pslld $9,<y2=int6464#1
-# asm 2: pslld $9,<y2=%xmm0
-pslld $9,%xmm0
-
-# qhasm: 						z2 ^= y2
-# asm 1: pxor  <y2=int6464#1,<z2=int6464#11
-# asm 2: pxor  <y2=%xmm0,<z2=%xmm10
-pxor  %xmm0,%xmm10
-
-# qhasm: uint32323232					r2 >>= 23
-# asm 1: psrld $23,<r2=int6464#13
-# asm 2: psrld $23,<r2=%xmm12
-psrld $23,%xmm12
-
-# qhasm: 						z2 ^= r2
-# asm 1: pxor  <r2=int6464#13,<z2=int6464#11
-# asm 2: pxor  <r2=%xmm12,<z2=%xmm10
-pxor  %xmm12,%xmm10
-
-# qhasm: 								y7 = z15
-# asm 1: movdqa <z15=int6464#3,>y7=int6464#1
-# asm 2: movdqa <z15=%xmm2,>y7=%xmm0
-movdqa %xmm2,%xmm0
-
-# qhasm: uint32323232							y7 += z3
-# asm 1: paddd <z3=int6464#5,<y7=int6464#1
-# asm 2: paddd <z3=%xmm4,<y7=%xmm0
-paddd %xmm4,%xmm0
-
-# qhasm: 								r7 = y7
-# asm 1: movdqa <y7=int6464#1,>r7=int6464#13
-# asm 2: movdqa <y7=%xmm0,>r7=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y7 <<= 9
-# asm 1: pslld $9,<y7=int6464#1
-# asm 2: pslld $9,<y7=%xmm0
-pslld $9,%xmm0
-
-# qhasm: 								z7 ^= y7
-# asm 1: pxor  <y7=int6464#1,<z7=int6464#9
-# asm 2: pxor  <y7=%xmm0,<z7=%xmm8
-pxor  %xmm0,%xmm8
-
-# qhasm: uint32323232							r7 >>= 23
-# asm 1: psrld $23,<r7=int6464#13
-# asm 2: psrld $23,<r7=%xmm12
-psrld $23,%xmm12
-
-# qhasm: 								z7 ^= r7
-# asm 1: pxor  <r7=int6464#13,<z7=int6464#9
-# asm 2: pxor  <r7=%xmm12,<z7=%xmm8
-pxor  %xmm12,%xmm8
-
-# qhasm: 						y6 = z14
-# asm 1: movdqa <z14=int6464#4,>y6=int6464#1
-# asm 2: movdqa <z14=%xmm3,>y6=%xmm0
-movdqa %xmm3,%xmm0
-
-# qhasm: uint32323232					y6 += z2
-# asm 1: paddd <z2=int6464#11,<y6=int6464#1
-# asm 2: paddd <z2=%xmm10,<y6=%xmm0
-paddd %xmm10,%xmm0
-
-# qhasm: 						r6 = y6
-# asm 1: movdqa <y6=int6464#1,>r6=int6464#13
-# asm 2: movdqa <y6=%xmm0,>r6=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y6 <<= 13
-# asm 1: pslld $13,<y6=int6464#1
-# asm 2: pslld $13,<y6=%xmm0
-pslld $13,%xmm0
-
-# qhasm: 						z6 ^= y6
-# asm 1: pxor  <y6=int6464#1,<z6=int6464#6
-# asm 2: pxor  <y6=%xmm0,<z6=%xmm5
-pxor  %xmm0,%xmm5
-
-# qhasm: uint32323232					r6 >>= 19
-# asm 1: psrld $19,<r6=int6464#13
-# asm 2: psrld $19,<r6=%xmm12
-psrld $19,%xmm12
-
-# qhasm: 						z6 ^= r6
-# asm 1: pxor  <r6=int6464#13,<z6=int6464#6
-# asm 2: pxor  <r6=%xmm12,<z6=%xmm5
-pxor  %xmm12,%xmm5
-
-# qhasm: 								y11 = z3
-# asm 1: movdqa <z3=int6464#5,>y11=int6464#1
-# asm 2: movdqa <z3=%xmm4,>y11=%xmm0
-movdqa %xmm4,%xmm0
-
-# qhasm: uint32323232							y11 += z7
-# asm 1: paddd <z7=int6464#9,<y11=int6464#1
-# asm 2: paddd <z7=%xmm8,<y11=%xmm0
-paddd %xmm8,%xmm0
-
-# qhasm: 								r11 = y11
-# asm 1: movdqa <y11=int6464#1,>r11=int6464#13
-# asm 2: movdqa <y11=%xmm0,>r11=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y11 <<= 13
-# asm 1: pslld $13,<y11=int6464#1
-# asm 2: pslld $13,<y11=%xmm0
-pslld $13,%xmm0
-
-# qhasm: 								z11 ^= y11
-# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
-# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
-pxor  %xmm0,%xmm6
-
-# qhasm: uint32323232							r11 >>= 19
-# asm 1: psrld $19,<r11=int6464#13
-# asm 2: psrld $19,<r11=%xmm12
-psrld $19,%xmm12
-
-# qhasm: 								z11 ^= r11
-# asm 1: pxor  <r11=int6464#13,<z11=int6464#7
-# asm 2: pxor  <r11=%xmm12,<z11=%xmm6
-pxor  %xmm12,%xmm6
-
-# qhasm: 						y10 = z2
-# asm 1: movdqa <z2=int6464#11,>y10=int6464#1
-# asm 2: movdqa <z2=%xmm10,>y10=%xmm0
-movdqa %xmm10,%xmm0
-
-# qhasm: uint32323232					y10 += z6
-# asm 1: paddd <z6=int6464#6,<y10=int6464#1
-# asm 2: paddd <z6=%xmm5,<y10=%xmm0
-paddd %xmm5,%xmm0
-
-# qhasm: 						r10 = y10
-# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
-# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y10 <<= 18
-# asm 1: pslld $18,<y10=int6464#1
-# asm 2: pslld $18,<y10=%xmm0
-pslld $18,%xmm0
-
-# qhasm: 						z10 ^= y10
-# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
-# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
-pxor  %xmm0,%xmm1
-
-# qhasm: uint32323232					r10 >>= 14
-# asm 1: psrld $14,<r10=int6464#13
-# asm 2: psrld $14,<r10=%xmm12
-psrld $14,%xmm12
-
-# qhasm: 						z10 ^= r10
-# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
-# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
-pxor  %xmm12,%xmm1
-
-# qhasm: 		z0 = z0_stack
-# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#1
-# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm0
-movdqa 320(%rsp),%xmm0
-
-# qhasm: 						z10_stack = z10
-# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
-# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
-movdqa %xmm1,320(%rsp)
-
-# qhasm: 		y1 = z3
-# asm 1: movdqa <z3=int6464#5,>y1=int6464#2
-# asm 2: movdqa <z3=%xmm4,>y1=%xmm1
-movdqa %xmm4,%xmm1
-
-# qhasm: uint32323232	y1 += z0
-# asm 1: paddd <z0=int6464#1,<y1=int6464#2
-# asm 2: paddd <z0=%xmm0,<y1=%xmm1
-paddd %xmm0,%xmm1
-
-# qhasm: 		r1 = y1
-# asm 1: movdqa <y1=int6464#2,>r1=int6464#13
-# asm 2: movdqa <y1=%xmm1,>r1=%xmm12
-movdqa %xmm1,%xmm12
-
-# qhasm: uint32323232	y1 <<= 7
-# asm 1: pslld $7,<y1=int6464#2
-# asm 2: pslld $7,<y1=%xmm1
-pslld $7,%xmm1
-
-# qhasm: 		z1 ^= y1
-# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
-# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
-pxor  %xmm1,%xmm7
-
-# qhasm: uint32323232	r1 >>= 25
-# asm 1: psrld $25,<r1=int6464#13
-# asm 2: psrld $25,<r1=%xmm12
-psrld $25,%xmm12
-
-# qhasm: 		z1 ^= r1
-# asm 1: pxor  <r1=int6464#13,<z1=int6464#8
-# asm 2: pxor  <r1=%xmm12,<z1=%xmm7
-pxor  %xmm12,%xmm7
-
-# qhasm: 								y15 = z7
-# asm 1: movdqa <z7=int6464#9,>y15=int6464#2
-# asm 2: movdqa <z7=%xmm8,>y15=%xmm1
-movdqa %xmm8,%xmm1
-
-# qhasm: uint32323232							y15 += z11
-# asm 1: paddd <z11=int6464#7,<y15=int6464#2
-# asm 2: paddd <z11=%xmm6,<y15=%xmm1
-paddd %xmm6,%xmm1
-
-# qhasm: 								r15 = y15
-# asm 1: movdqa <y15=int6464#2,>r15=int6464#13
-# asm 2: movdqa <y15=%xmm1,>r15=%xmm12
-movdqa %xmm1,%xmm12
-
-# qhasm: uint32323232							y15 <<= 18
-# asm 1: pslld $18,<y15=int6464#2
-# asm 2: pslld $18,<y15=%xmm1
-pslld $18,%xmm1
-
-# qhasm: 								z15 ^= y15
-# asm 1: pxor  <y15=int6464#2,<z15=int6464#3
-# asm 2: pxor  <y15=%xmm1,<z15=%xmm2
-pxor  %xmm1,%xmm2
-
-# qhasm: uint32323232							r15 >>= 14
-# asm 1: psrld $14,<r15=int6464#13
-# asm 2: psrld $14,<r15=%xmm12
-psrld $14,%xmm12
-
-# qhasm: 								z15 ^= r15
-# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
-# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
-pxor  %xmm12,%xmm2
-
-# qhasm: 				z5 = z5_stack
-# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#13
-# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm12
-movdqa 336(%rsp),%xmm12
-
-# qhasm: 								z15_stack = z15
-# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
-# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
-movdqa %xmm2,336(%rsp)
-
-# qhasm: 				y6 = z4
-# asm 1: movdqa <z4=int6464#15,>y6=int6464#2
-# asm 2: movdqa <z4=%xmm14,>y6=%xmm1
-movdqa %xmm14,%xmm1
-
-# qhasm: uint32323232			y6 += z5
-# asm 1: paddd <z5=int6464#13,<y6=int6464#2
-# asm 2: paddd <z5=%xmm12,<y6=%xmm1
-paddd %xmm12,%xmm1
-
-# qhasm: 				r6 = y6
-# asm 1: movdqa <y6=int6464#2,>r6=int6464#3
-# asm 2: movdqa <y6=%xmm1,>r6=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y6 <<= 7
-# asm 1: pslld $7,<y6=int6464#2
-# asm 2: pslld $7,<y6=%xmm1
-pslld $7,%xmm1
-
-# qhasm: 				z6 ^= y6
-# asm 1: pxor  <y6=int6464#2,<z6=int6464#6
-# asm 2: pxor  <y6=%xmm1,<z6=%xmm5
-pxor  %xmm1,%xmm5
-
-# qhasm: uint32323232			r6 >>= 25
-# asm 1: psrld $25,<r6=int6464#3
-# asm 2: psrld $25,<r6=%xmm2
-psrld $25,%xmm2
-
-# qhasm: 				z6 ^= r6
-# asm 1: pxor  <r6=int6464#3,<z6=int6464#6
-# asm 2: pxor  <r6=%xmm2,<z6=%xmm5
-pxor  %xmm2,%xmm5
-
-# qhasm: 		y2 = z0
-# asm 1: movdqa <z0=int6464#1,>y2=int6464#2
-# asm 2: movdqa <z0=%xmm0,>y2=%xmm1
-movdqa %xmm0,%xmm1
-
-# qhasm: uint32323232	y2 += z1
-# asm 1: paddd <z1=int6464#8,<y2=int6464#2
-# asm 2: paddd <z1=%xmm7,<y2=%xmm1
-paddd %xmm7,%xmm1
-
-# qhasm: 		r2 = y2
-# asm 1: movdqa <y2=int6464#2,>r2=int6464#3
-# asm 2: movdqa <y2=%xmm1,>r2=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y2 <<= 9
-# asm 1: pslld $9,<y2=int6464#2
-# asm 2: pslld $9,<y2=%xmm1
-pslld $9,%xmm1
-
-# qhasm: 		z2 ^= y2
-# asm 1: pxor  <y2=int6464#2,<z2=int6464#11
-# asm 2: pxor  <y2=%xmm1,<z2=%xmm10
-pxor  %xmm1,%xmm10
-
-# qhasm: uint32323232	r2 >>= 23
-# asm 1: psrld $23,<r2=int6464#3
-# asm 2: psrld $23,<r2=%xmm2
-psrld $23,%xmm2
-
-# qhasm: 		z2 ^= r2
-# asm 1: pxor  <r2=int6464#3,<z2=int6464#11
-# asm 2: pxor  <r2=%xmm2,<z2=%xmm10
-pxor  %xmm2,%xmm10
-
-# qhasm: 				y7 = z5
-# asm 1: movdqa <z5=int6464#13,>y7=int6464#2
-# asm 2: movdqa <z5=%xmm12,>y7=%xmm1
-movdqa %xmm12,%xmm1
-
-# qhasm: uint32323232			y7 += z6
-# asm 1: paddd <z6=int6464#6,<y7=int6464#2
-# asm 2: paddd <z6=%xmm5,<y7=%xmm1
-paddd %xmm5,%xmm1
-
-# qhasm: 				r7 = y7
-# asm 1: movdqa <y7=int6464#2,>r7=int6464#3
-# asm 2: movdqa <y7=%xmm1,>r7=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y7 <<= 9
-# asm 1: pslld $9,<y7=int6464#2
-# asm 2: pslld $9,<y7=%xmm1
-pslld $9,%xmm1
-
-# qhasm: 				z7 ^= y7
-# asm 1: pxor  <y7=int6464#2,<z7=int6464#9
-# asm 2: pxor  <y7=%xmm1,<z7=%xmm8
-pxor  %xmm1,%xmm8
-
-# qhasm: uint32323232			r7 >>= 23
-# asm 1: psrld $23,<r7=int6464#3
-# asm 2: psrld $23,<r7=%xmm2
-psrld $23,%xmm2
-
-# qhasm: 				z7 ^= r7
-# asm 1: pxor  <r7=int6464#3,<z7=int6464#9
-# asm 2: pxor  <r7=%xmm2,<z7=%xmm8
-pxor  %xmm2,%xmm8
-
-# qhasm: 		y3 = z1
-# asm 1: movdqa <z1=int6464#8,>y3=int6464#2
-# asm 2: movdqa <z1=%xmm7,>y3=%xmm1
-movdqa %xmm7,%xmm1
-
-# qhasm: uint32323232	y3 += z2
-# asm 1: paddd <z2=int6464#11,<y3=int6464#2
-# asm 2: paddd <z2=%xmm10,<y3=%xmm1
-paddd %xmm10,%xmm1
-
-# qhasm: 		r3 = y3
-# asm 1: movdqa <y3=int6464#2,>r3=int6464#3
-# asm 2: movdqa <y3=%xmm1,>r3=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y3 <<= 13
-# asm 1: pslld $13,<y3=int6464#2
-# asm 2: pslld $13,<y3=%xmm1
-pslld $13,%xmm1
-
-# qhasm: 		z3 ^= y3
-# asm 1: pxor  <y3=int6464#2,<z3=int6464#5
-# asm 2: pxor  <y3=%xmm1,<z3=%xmm4
-pxor  %xmm1,%xmm4
-
-# qhasm: uint32323232	r3 >>= 19
-# asm 1: psrld $19,<r3=int6464#3
-# asm 2: psrld $19,<r3=%xmm2
-psrld $19,%xmm2
-
-# qhasm: 		z3 ^= r3
-# asm 1: pxor  <r3=int6464#3,<z3=int6464#5
-# asm 2: pxor  <r3=%xmm2,<z3=%xmm4
-pxor  %xmm2,%xmm4
-
-# qhasm: 				y4 = z6
-# asm 1: movdqa <z6=int6464#6,>y4=int6464#2
-# asm 2: movdqa <z6=%xmm5,>y4=%xmm1
-movdqa %xmm5,%xmm1
-
-# qhasm: uint32323232			y4 += z7
-# asm 1: paddd <z7=int6464#9,<y4=int6464#2
-# asm 2: paddd <z7=%xmm8,<y4=%xmm1
-paddd %xmm8,%xmm1
-
-# qhasm: 				r4 = y4
-# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
-# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232			y4 <<= 13
-# asm 1: pslld $13,<y4=int6464#2
-# asm 2: pslld $13,<y4=%xmm1
-pslld $13,%xmm1
-
-# qhasm: 				z4 ^= y4
-# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
-# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
-pxor  %xmm1,%xmm14
-
-# qhasm: uint32323232			r4 >>= 19
-# asm 1: psrld $19,<r4=int6464#3
-# asm 2: psrld $19,<r4=%xmm2
-psrld $19,%xmm2
-
-# qhasm: 				z4 ^= r4
-# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
-# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
-pxor  %xmm2,%xmm14
-
-# qhasm: 		y0 = z2
-# asm 1: movdqa <z2=int6464#11,>y0=int6464#2
-# asm 2: movdqa <z2=%xmm10,>y0=%xmm1
-movdqa %xmm10,%xmm1
-
-# qhasm: uint32323232	y0 += z3
-# asm 1: paddd <z3=int6464#5,<y0=int6464#2
-# asm 2: paddd <z3=%xmm4,<y0=%xmm1
-paddd %xmm4,%xmm1
-
-# qhasm: 		r0 = y0
-# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
-# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
-movdqa %xmm1,%xmm2
-
-# qhasm: uint32323232	y0 <<= 18
-# asm 1: pslld $18,<y0=int6464#2
-# asm 2: pslld $18,<y0=%xmm1
-pslld $18,%xmm1
-
-# qhasm: 		z0 ^= y0
-# asm 1: pxor  <y0=int6464#2,<z0=int6464#1
-# asm 2: pxor  <y0=%xmm1,<z0=%xmm0
-pxor  %xmm1,%xmm0
-
-# qhasm: uint32323232	r0 >>= 14
-# asm 1: psrld $14,<r0=int6464#3
-# asm 2: psrld $14,<r0=%xmm2
-psrld $14,%xmm2
-
-# qhasm: 		z0 ^= r0
-# asm 1: pxor  <r0=int6464#3,<z0=int6464#1
-# asm 2: pxor  <r0=%xmm2,<z0=%xmm0
-pxor  %xmm2,%xmm0
-
-# qhasm: 						z10 = z10_stack
-# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
-# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
-movdqa 320(%rsp),%xmm1
-
-# qhasm: 		z0_stack = z0
-# asm 1: movdqa <z0=int6464#1,>z0_stack=stack128#21
-# asm 2: movdqa <z0=%xmm0,>z0_stack=320(%rsp)
-movdqa %xmm0,320(%rsp)
-
-# qhasm: 				y5 = z7
-# asm 1: movdqa <z7=int6464#9,>y5=int6464#1
-# asm 2: movdqa <z7=%xmm8,>y5=%xmm0
-movdqa %xmm8,%xmm0
-
-# qhasm: uint32323232			y5 += z4
-# asm 1: paddd <z4=int6464#15,<y5=int6464#1
-# asm 2: paddd <z4=%xmm14,<y5=%xmm0
-paddd %xmm14,%xmm0
-
-# qhasm: 				r5 = y5
-# asm 1: movdqa <y5=int6464#1,>r5=int6464#3
-# asm 2: movdqa <y5=%xmm0,>r5=%xmm2
-movdqa %xmm0,%xmm2
-
-# qhasm: uint32323232			y5 <<= 18
-# asm 1: pslld $18,<y5=int6464#1
-# asm 2: pslld $18,<y5=%xmm0
-pslld $18,%xmm0
-
-# qhasm: 				z5 ^= y5
-# asm 1: pxor  <y5=int6464#1,<z5=int6464#13
-# asm 2: pxor  <y5=%xmm0,<z5=%xmm12
-pxor  %xmm0,%xmm12
-
-# qhasm: uint32323232			r5 >>= 14
-# asm 1: psrld $14,<r5=int6464#3
-# asm 2: psrld $14,<r5=%xmm2
-psrld $14,%xmm2
-
-# qhasm: 				z5 ^= r5
-# asm 1: pxor  <r5=int6464#3,<z5=int6464#13
-# asm 2: pxor  <r5=%xmm2,<z5=%xmm12
-pxor  %xmm2,%xmm12
-
-# qhasm: 						y11 = z9
-# asm 1: movdqa <z9=int6464#12,>y11=int6464#1
-# asm 2: movdqa <z9=%xmm11,>y11=%xmm0
-movdqa %xmm11,%xmm0
-
-# qhasm: uint32323232					y11 += z10
-# asm 1: paddd <z10=int6464#2,<y11=int6464#1
-# asm 2: paddd <z10=%xmm1,<y11=%xmm0
-paddd %xmm1,%xmm0
-
-# qhasm: 						r11 = y11
-# asm 1: movdqa <y11=int6464#1,>r11=int6464#3
-# asm 2: movdqa <y11=%xmm0,>r11=%xmm2
-movdqa %xmm0,%xmm2
-
-# qhasm: uint32323232					y11 <<= 7
-# asm 1: pslld $7,<y11=int6464#1
-# asm 2: pslld $7,<y11=%xmm0
-pslld $7,%xmm0
-
-# qhasm: 						z11 ^= y11
-# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
-# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
-pxor  %xmm0,%xmm6
-
-# qhasm: uint32323232					r11 >>= 25
-# asm 1: psrld $25,<r11=int6464#3
-# asm 2: psrld $25,<r11=%xmm2
-psrld $25,%xmm2
-
-# qhasm: 						z11 ^= r11
-# asm 1: pxor  <r11=int6464#3,<z11=int6464#7
-# asm 2: pxor  <r11=%xmm2,<z11=%xmm6
-pxor  %xmm2,%xmm6
-
-# qhasm: 								z15 = z15_stack
-# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
-# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
-movdqa 336(%rsp),%xmm2
-
-# qhasm: 				z5_stack = z5
-# asm 1: movdqa <z5=int6464#13,>z5_stack=stack128#22
-# asm 2: movdqa <z5=%xmm12,>z5_stack=336(%rsp)
-movdqa %xmm12,336(%rsp)
-
-# qhasm: 								y12 = z14
-# asm 1: movdqa <z14=int6464#4,>y12=int6464#1
-# asm 2: movdqa <z14=%xmm3,>y12=%xmm0
-movdqa %xmm3,%xmm0
-
-# qhasm: uint32323232							y12 += z15
-# asm 1: paddd <z15=int6464#3,<y12=int6464#1
-# asm 2: paddd <z15=%xmm2,<y12=%xmm0
-paddd %xmm2,%xmm0
-
-# qhasm: 								r12 = y12
-# asm 1: movdqa <y12=int6464#1,>r12=int6464#13
-# asm 2: movdqa <y12=%xmm0,>r12=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y12 <<= 7
-# asm 1: pslld $7,<y12=int6464#1
-# asm 2: pslld $7,<y12=%xmm0
-pslld $7,%xmm0
-
-# qhasm: 								z12 ^= y12
-# asm 1: pxor  <y12=int6464#1,<z12=int6464#14
-# asm 2: pxor  <y12=%xmm0,<z12=%xmm13
-pxor  %xmm0,%xmm13
-
-# qhasm: uint32323232							r12 >>= 25
-# asm 1: psrld $25,<r12=int6464#13
-# asm 2: psrld $25,<r12=%xmm12
-psrld $25,%xmm12
-
-# qhasm: 								z12 ^= r12
-# asm 1: pxor  <r12=int6464#13,<z12=int6464#14
-# asm 2: pxor  <r12=%xmm12,<z12=%xmm13
-pxor  %xmm12,%xmm13
-
-# qhasm: 						y8 = z10
-# asm 1: movdqa <z10=int6464#2,>y8=int6464#1
-# asm 2: movdqa <z10=%xmm1,>y8=%xmm0
-movdqa %xmm1,%xmm0
-
-# qhasm: uint32323232					y8 += z11
-# asm 1: paddd <z11=int6464#7,<y8=int6464#1
-# asm 2: paddd <z11=%xmm6,<y8=%xmm0
-paddd %xmm6,%xmm0
-
-# qhasm: 						r8 = y8
-# asm 1: movdqa <y8=int6464#1,>r8=int6464#13
-# asm 2: movdqa <y8=%xmm0,>r8=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y8 <<= 9
-# asm 1: pslld $9,<y8=int6464#1
-# asm 2: pslld $9,<y8=%xmm0
-pslld $9,%xmm0
-
-# qhasm: 						z8 ^= y8
-# asm 1: pxor  <y8=int6464#1,<z8=int6464#16
-# asm 2: pxor  <y8=%xmm0,<z8=%xmm15
-pxor  %xmm0,%xmm15
-
-# qhasm: uint32323232					r8 >>= 23
-# asm 1: psrld $23,<r8=int6464#13
-# asm 2: psrld $23,<r8=%xmm12
-psrld $23,%xmm12
-
-# qhasm: 						z8 ^= r8
-# asm 1: pxor  <r8=int6464#13,<z8=int6464#16
-# asm 2: pxor  <r8=%xmm12,<z8=%xmm15
-pxor  %xmm12,%xmm15
-
-# qhasm: 								y13 = z15
-# asm 1: movdqa <z15=int6464#3,>y13=int6464#1
-# asm 2: movdqa <z15=%xmm2,>y13=%xmm0
-movdqa %xmm2,%xmm0
-
-# qhasm: uint32323232							y13 += z12
-# asm 1: paddd <z12=int6464#14,<y13=int6464#1
-# asm 2: paddd <z12=%xmm13,<y13=%xmm0
-paddd %xmm13,%xmm0
-
-# qhasm: 								r13 = y13
-# asm 1: movdqa <y13=int6464#1,>r13=int6464#13
-# asm 2: movdqa <y13=%xmm0,>r13=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y13 <<= 9
-# asm 1: pslld $9,<y13=int6464#1
-# asm 2: pslld $9,<y13=%xmm0
-pslld $9,%xmm0
-
-# qhasm: 								z13 ^= y13
-# asm 1: pxor  <y13=int6464#1,<z13=int6464#10
-# asm 2: pxor  <y13=%xmm0,<z13=%xmm9
-pxor  %xmm0,%xmm9
-
-# qhasm: uint32323232							r13 >>= 23
-# asm 1: psrld $23,<r13=int6464#13
-# asm 2: psrld $23,<r13=%xmm12
-psrld $23,%xmm12
-
-# qhasm: 								z13 ^= r13
-# asm 1: pxor  <r13=int6464#13,<z13=int6464#10
-# asm 2: pxor  <r13=%xmm12,<z13=%xmm9
-pxor  %xmm12,%xmm9
-
-# qhasm: 						y9 = z11
-# asm 1: movdqa <z11=int6464#7,>y9=int6464#1
-# asm 2: movdqa <z11=%xmm6,>y9=%xmm0
-movdqa %xmm6,%xmm0
-
-# qhasm: uint32323232					y9 += z8
-# asm 1: paddd <z8=int6464#16,<y9=int6464#1
-# asm 2: paddd <z8=%xmm15,<y9=%xmm0
-paddd %xmm15,%xmm0
-
-# qhasm: 						r9 = y9
-# asm 1: movdqa <y9=int6464#1,>r9=int6464#13
-# asm 2: movdqa <y9=%xmm0,>r9=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y9 <<= 13
-# asm 1: pslld $13,<y9=int6464#1
-# asm 2: pslld $13,<y9=%xmm0
-pslld $13,%xmm0
-
-# qhasm: 						z9 ^= y9
-# asm 1: pxor  <y9=int6464#1,<z9=int6464#12
-# asm 2: pxor  <y9=%xmm0,<z9=%xmm11
-pxor  %xmm0,%xmm11
-
-# qhasm: uint32323232					r9 >>= 19
-# asm 1: psrld $19,<r9=int6464#13
-# asm 2: psrld $19,<r9=%xmm12
-psrld $19,%xmm12
-
-# qhasm: 						z9 ^= r9
-# asm 1: pxor  <r9=int6464#13,<z9=int6464#12
-# asm 2: pxor  <r9=%xmm12,<z9=%xmm11
-pxor  %xmm12,%xmm11
-
-# qhasm: 								y14 = z12
-# asm 1: movdqa <z12=int6464#14,>y14=int6464#1
-# asm 2: movdqa <z12=%xmm13,>y14=%xmm0
-movdqa %xmm13,%xmm0
-
-# qhasm: uint32323232							y14 += z13
-# asm 1: paddd <z13=int6464#10,<y14=int6464#1
-# asm 2: paddd <z13=%xmm9,<y14=%xmm0
-paddd %xmm9,%xmm0
-
-# qhasm: 								r14 = y14
-# asm 1: movdqa <y14=int6464#1,>r14=int6464#13
-# asm 2: movdqa <y14=%xmm0,>r14=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y14 <<= 13
-# asm 1: pslld $13,<y14=int6464#1
-# asm 2: pslld $13,<y14=%xmm0
-pslld $13,%xmm0
-
-# qhasm: 								z14 ^= y14
-# asm 1: pxor  <y14=int6464#1,<z14=int6464#4
-# asm 2: pxor  <y14=%xmm0,<z14=%xmm3
-pxor  %xmm0,%xmm3
-
-# qhasm: uint32323232							r14 >>= 19
-# asm 1: psrld $19,<r14=int6464#13
-# asm 2: psrld $19,<r14=%xmm12
-psrld $19,%xmm12
-
-# qhasm: 								z14 ^= r14
-# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
-# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
-pxor  %xmm12,%xmm3
-
-# qhasm: 						y10 = z8
-# asm 1: movdqa <z8=int6464#16,>y10=int6464#1
-# asm 2: movdqa <z8=%xmm15,>y10=%xmm0
-movdqa %xmm15,%xmm0
-
-# qhasm: uint32323232					y10 += z9
-# asm 1: paddd <z9=int6464#12,<y10=int6464#1
-# asm 2: paddd <z9=%xmm11,<y10=%xmm0
-paddd %xmm11,%xmm0
-
-# qhasm: 						r10 = y10
-# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
-# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232					y10 <<= 18
-# asm 1: pslld $18,<y10=int6464#1
-# asm 2: pslld $18,<y10=%xmm0
-pslld $18,%xmm0
-
-# qhasm: 						z10 ^= y10
-# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
-# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
-pxor  %xmm0,%xmm1
-
-# qhasm: uint32323232					r10 >>= 14
-# asm 1: psrld $14,<r10=int6464#13
-# asm 2: psrld $14,<r10=%xmm12
-psrld $14,%xmm12
-
-# qhasm: 						z10 ^= r10
-# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
-# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
-pxor  %xmm12,%xmm1
-
-# qhasm: 								y15 = z13
-# asm 1: movdqa <z13=int6464#10,>y15=int6464#1
-# asm 2: movdqa <z13=%xmm9,>y15=%xmm0
-movdqa %xmm9,%xmm0
-
-# qhasm: uint32323232							y15 += z14
-# asm 1: paddd <z14=int6464#4,<y15=int6464#1
-# asm 2: paddd <z14=%xmm3,<y15=%xmm0
-paddd %xmm3,%xmm0
-
-# qhasm: 								r15 = y15
-# asm 1: movdqa <y15=int6464#1,>r15=int6464#13
-# asm 2: movdqa <y15=%xmm0,>r15=%xmm12
-movdqa %xmm0,%xmm12
-
-# qhasm: uint32323232							y15 <<= 18
-# asm 1: pslld $18,<y15=int6464#1
-# asm 2: pslld $18,<y15=%xmm0
-pslld $18,%xmm0
-
-# qhasm: 								z15 ^= y15
-# asm 1: pxor  <y15=int6464#1,<z15=int6464#3
-# asm 2: pxor  <y15=%xmm0,<z15=%xmm2
-pxor  %xmm0,%xmm2
-
-# qhasm: uint32323232							r15 >>= 14
-# asm 1: psrld $14,<r15=int6464#13
-# asm 2: psrld $14,<r15=%xmm12
-psrld $14,%xmm12
-
-# qhasm: 								z15 ^= r15
-# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
-# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
-pxor  %xmm12,%xmm2
-
-# qhasm: 		z0 = z0_stack
-# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#13
-# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm12
-movdqa 320(%rsp),%xmm12
-
-# qhasm: 				z5 = z5_stack
-# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#1
-# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm0
-movdqa 336(%rsp),%xmm0
-
-# qhasm:                   unsigned>? i -= 2
-# asm 1: sub  $2,<i=int64#3
-# asm 2: sub  $2,<i=%rdx
-sub  $2,%rdx
-# comment:fp stack unchanged by jump
-
-# qhasm: goto mainloop1 if unsigned>
-ja ._mainloop1
-
-# qhasm:   uint32323232 z0 += orig0
-# asm 1: paddd <orig0=stack128#8,<z0=int6464#13
-# asm 2: paddd <orig0=112(%rsp),<z0=%xmm12
-paddd 112(%rsp),%xmm12
-
-# qhasm:   uint32323232 z1 += orig1
-# asm 1: paddd <orig1=stack128#12,<z1=int6464#8
-# asm 2: paddd <orig1=176(%rsp),<z1=%xmm7
-paddd 176(%rsp),%xmm7
-
-# qhasm:   uint32323232 z2 += orig2
-# asm 1: paddd <orig2=stack128#15,<z2=int6464#11
-# asm 2: paddd <orig2=224(%rsp),<z2=%xmm10
-paddd 224(%rsp),%xmm10
-
-# qhasm:   uint32323232 z3 += orig3
-# asm 1: paddd <orig3=stack128#18,<z3=int6464#5
-# asm 2: paddd <orig3=272(%rsp),<z3=%xmm4
-paddd 272(%rsp),%xmm4
-
-# qhasm:   in0 = z0
-# asm 1: movd   <z0=int6464#13,>in0=int64#3
-# asm 2: movd   <z0=%xmm12,>in0=%rdx
-movd   %xmm12,%rdx
-
-# qhasm:   in1 = z1
-# asm 1: movd   <z1=int6464#8,>in1=int64#4
-# asm 2: movd   <z1=%xmm7,>in1=%rcx
-movd   %xmm7,%rcx
-
-# qhasm:   in2 = z2
-# asm 1: movd   <z2=int6464#11,>in2=int64#5
-# asm 2: movd   <z2=%xmm10,>in2=%r8
-movd   %xmm10,%r8
-
-# qhasm:   in3 = z3
-# asm 1: movd   <z3=int6464#5,>in3=int64#6
-# asm 2: movd   <z3=%xmm4,>in3=%r9
-movd   %xmm4,%r9
-
-# qhasm:   z0 <<<= 96
-# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
-# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
-pshufd $0x39,%xmm12,%xmm12
-
-# qhasm:   z1 <<<= 96
-# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
-# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
-pshufd $0x39,%xmm7,%xmm7
-
-# qhasm:   z2 <<<= 96
-# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
-# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
-pshufd $0x39,%xmm10,%xmm10
-
-# qhasm:   z3 <<<= 96
-# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
-# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
-pshufd $0x39,%xmm4,%xmm4
-
-# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 0)
-# asm 1: xorl 0(<m=int64#2),<in0=int64#3d
-# asm 2: xorl 0(<m=%rsi),<in0=%edx
-xorl 0(%rsi),%edx
-
-# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 4)
-# asm 1: xorl 4(<m=int64#2),<in1=int64#4d
-# asm 2: xorl 4(<m=%rsi),<in1=%ecx
-xorl 4(%rsi),%ecx
-
-# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 8)
-# asm 1: xorl 8(<m=int64#2),<in2=int64#5d
-# asm 2: xorl 8(<m=%rsi),<in2=%r8d
-xorl 8(%rsi),%r8d
-
-# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 12)
-# asm 1: xorl 12(<m=int64#2),<in3=int64#6d
-# asm 2: xorl 12(<m=%rsi),<in3=%r9d
-xorl 12(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 0) = in0
-# asm 1: movl   <in0=int64#3d,0(<out=int64#1)
-# asm 2: movl   <in0=%edx,0(<out=%rdi)
-movl   %edx,0(%rdi)
-
-# qhasm:   *(uint32 *) (out + 4) = in1
-# asm 1: movl   <in1=int64#4d,4(<out=int64#1)
-# asm 2: movl   <in1=%ecx,4(<out=%rdi)
-movl   %ecx,4(%rdi)
-
-# qhasm:   *(uint32 *) (out + 8) = in2
-# asm 1: movl   <in2=int64#5d,8(<out=int64#1)
-# asm 2: movl   <in2=%r8d,8(<out=%rdi)
-movl   %r8d,8(%rdi)
-
-# qhasm:   *(uint32 *) (out + 12) = in3
-# asm 1: movl   <in3=int64#6d,12(<out=int64#1)
-# asm 2: movl   <in3=%r9d,12(<out=%rdi)
-movl   %r9d,12(%rdi)
-
-# qhasm:   in0 = z0
-# asm 1: movd   <z0=int6464#13,>in0=int64#3
-# asm 2: movd   <z0=%xmm12,>in0=%rdx
-movd   %xmm12,%rdx
-
-# qhasm:   in1 = z1
-# asm 1: movd   <z1=int6464#8,>in1=int64#4
-# asm 2: movd   <z1=%xmm7,>in1=%rcx
-movd   %xmm7,%rcx
-
-# qhasm:   in2 = z2
-# asm 1: movd   <z2=int6464#11,>in2=int64#5
-# asm 2: movd   <z2=%xmm10,>in2=%r8
-movd   %xmm10,%r8
-
-# qhasm:   in3 = z3
-# asm 1: movd   <z3=int6464#5,>in3=int64#6
-# asm 2: movd   <z3=%xmm4,>in3=%r9
-movd   %xmm4,%r9
-
-# qhasm:   z0 <<<= 96
-# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
-# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
-pshufd $0x39,%xmm12,%xmm12
-
-# qhasm:   z1 <<<= 96
-# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
-# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
-pshufd $0x39,%xmm7,%xmm7
-
-# qhasm:   z2 <<<= 96
-# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
-# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
-pshufd $0x39,%xmm10,%xmm10
-
-# qhasm:   z3 <<<= 96
-# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
-# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
-pshufd $0x39,%xmm4,%xmm4
-
-# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 64)
-# asm 1: xorl 64(<m=int64#2),<in0=int64#3d
-# asm 2: xorl 64(<m=%rsi),<in0=%edx
-xorl 64(%rsi),%edx
-
-# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 68)
-# asm 1: xorl 68(<m=int64#2),<in1=int64#4d
-# asm 2: xorl 68(<m=%rsi),<in1=%ecx
-xorl 68(%rsi),%ecx
-
-# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 72)
-# asm 1: xorl 72(<m=int64#2),<in2=int64#5d
-# asm 2: xorl 72(<m=%rsi),<in2=%r8d
-xorl 72(%rsi),%r8d
-
-# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 76)
-# asm 1: xorl 76(<m=int64#2),<in3=int64#6d
-# asm 2: xorl 76(<m=%rsi),<in3=%r9d
-xorl 76(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 64) = in0
-# asm 1: movl   <in0=int64#3d,64(<out=int64#1)
-# asm 2: movl   <in0=%edx,64(<out=%rdi)
-movl   %edx,64(%rdi)
-
-# qhasm:   *(uint32 *) (out + 68) = in1
-# asm 1: movl   <in1=int64#4d,68(<out=int64#1)
-# asm 2: movl   <in1=%ecx,68(<out=%rdi)
-movl   %ecx,68(%rdi)
-
-# qhasm:   *(uint32 *) (out + 72) = in2
-# asm 1: movl   <in2=int64#5d,72(<out=int64#1)
-# asm 2: movl   <in2=%r8d,72(<out=%rdi)
-movl   %r8d,72(%rdi)
-
-# qhasm:   *(uint32 *) (out + 76) = in3
-# asm 1: movl   <in3=int64#6d,76(<out=int64#1)
-# asm 2: movl   <in3=%r9d,76(<out=%rdi)
-movl   %r9d,76(%rdi)
-
-# qhasm:   in0 = z0
-# asm 1: movd   <z0=int6464#13,>in0=int64#3
-# asm 2: movd   <z0=%xmm12,>in0=%rdx
-movd   %xmm12,%rdx
-
-# qhasm:   in1 = z1
-# asm 1: movd   <z1=int6464#8,>in1=int64#4
-# asm 2: movd   <z1=%xmm7,>in1=%rcx
-movd   %xmm7,%rcx
-
-# qhasm:   in2 = z2
-# asm 1: movd   <z2=int6464#11,>in2=int64#5
-# asm 2: movd   <z2=%xmm10,>in2=%r8
-movd   %xmm10,%r8
-
-# qhasm:   in3 = z3
-# asm 1: movd   <z3=int6464#5,>in3=int64#6
-# asm 2: movd   <z3=%xmm4,>in3=%r9
-movd   %xmm4,%r9
-
-# qhasm:   z0 <<<= 96
-# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
-# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
-pshufd $0x39,%xmm12,%xmm12
-
-# qhasm:   z1 <<<= 96
-# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
-# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
-pshufd $0x39,%xmm7,%xmm7
-
-# qhasm:   z2 <<<= 96
-# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
-# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
-pshufd $0x39,%xmm10,%xmm10
-
-# qhasm:   z3 <<<= 96
-# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
-# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
-pshufd $0x39,%xmm4,%xmm4
-
-# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 128)
-# asm 1: xorl 128(<m=int64#2),<in0=int64#3d
-# asm 2: xorl 128(<m=%rsi),<in0=%edx
-xorl 128(%rsi),%edx
-
-# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 132)
-# asm 1: xorl 132(<m=int64#2),<in1=int64#4d
-# asm 2: xorl 132(<m=%rsi),<in1=%ecx
-xorl 132(%rsi),%ecx
-
-# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 136)
-# asm 1: xorl 136(<m=int64#2),<in2=int64#5d
-# asm 2: xorl 136(<m=%rsi),<in2=%r8d
-xorl 136(%rsi),%r8d
-
-# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 140)
-# asm 1: xorl 140(<m=int64#2),<in3=int64#6d
-# asm 2: xorl 140(<m=%rsi),<in3=%r9d
-xorl 140(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 128) = in0
-# asm 1: movl   <in0=int64#3d,128(<out=int64#1)
-# asm 2: movl   <in0=%edx,128(<out=%rdi)
-movl   %edx,128(%rdi)
-
-# qhasm:   *(uint32 *) (out + 132) = in1
-# asm 1: movl   <in1=int64#4d,132(<out=int64#1)
-# asm 2: movl   <in1=%ecx,132(<out=%rdi)
-movl   %ecx,132(%rdi)
-
-# qhasm:   *(uint32 *) (out + 136) = in2
-# asm 1: movl   <in2=int64#5d,136(<out=int64#1)
-# asm 2: movl   <in2=%r8d,136(<out=%rdi)
-movl   %r8d,136(%rdi)
-
-# qhasm:   *(uint32 *) (out + 140) = in3
-# asm 1: movl   <in3=int64#6d,140(<out=int64#1)
-# asm 2: movl   <in3=%r9d,140(<out=%rdi)
-movl   %r9d,140(%rdi)
-
-# qhasm:   in0 = z0
-# asm 1: movd   <z0=int6464#13,>in0=int64#3
-# asm 2: movd   <z0=%xmm12,>in0=%rdx
-movd   %xmm12,%rdx
-
-# qhasm:   in1 = z1
-# asm 1: movd   <z1=int6464#8,>in1=int64#4
-# asm 2: movd   <z1=%xmm7,>in1=%rcx
-movd   %xmm7,%rcx
-
-# qhasm:   in2 = z2
-# asm 1: movd   <z2=int6464#11,>in2=int64#5
-# asm 2: movd   <z2=%xmm10,>in2=%r8
-movd   %xmm10,%r8
-
-# qhasm:   in3 = z3
-# asm 1: movd   <z3=int6464#5,>in3=int64#6
-# asm 2: movd   <z3=%xmm4,>in3=%r9
-movd   %xmm4,%r9
-
-# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 192)
-# asm 1: xorl 192(<m=int64#2),<in0=int64#3d
-# asm 2: xorl 192(<m=%rsi),<in0=%edx
-xorl 192(%rsi),%edx
-
-# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 196)
-# asm 1: xorl 196(<m=int64#2),<in1=int64#4d
-# asm 2: xorl 196(<m=%rsi),<in1=%ecx
-xorl 196(%rsi),%ecx
-
-# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 200)
-# asm 1: xorl 200(<m=int64#2),<in2=int64#5d
-# asm 2: xorl 200(<m=%rsi),<in2=%r8d
-xorl 200(%rsi),%r8d
-
-# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 204)
-# asm 1: xorl 204(<m=int64#2),<in3=int64#6d
-# asm 2: xorl 204(<m=%rsi),<in3=%r9d
-xorl 204(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 192) = in0
-# asm 1: movl   <in0=int64#3d,192(<out=int64#1)
-# asm 2: movl   <in0=%edx,192(<out=%rdi)
-movl   %edx,192(%rdi)
-
-# qhasm:   *(uint32 *) (out + 196) = in1
-# asm 1: movl   <in1=int64#4d,196(<out=int64#1)
-# asm 2: movl   <in1=%ecx,196(<out=%rdi)
-movl   %ecx,196(%rdi)
-
-# qhasm:   *(uint32 *) (out + 200) = in2
-# asm 1: movl   <in2=int64#5d,200(<out=int64#1)
-# asm 2: movl   <in2=%r8d,200(<out=%rdi)
-movl   %r8d,200(%rdi)
-
-# qhasm:   *(uint32 *) (out + 204) = in3
-# asm 1: movl   <in3=int64#6d,204(<out=int64#1)
-# asm 2: movl   <in3=%r9d,204(<out=%rdi)
-movl   %r9d,204(%rdi)
-
-# qhasm:   uint32323232 z4 += orig4
-# asm 1: paddd <orig4=stack128#16,<z4=int6464#15
-# asm 2: paddd <orig4=240(%rsp),<z4=%xmm14
-paddd 240(%rsp),%xmm14
-
-# qhasm:   uint32323232 z5 += orig5
-# asm 1: paddd <orig5=stack128#5,<z5=int6464#1
-# asm 2: paddd <orig5=64(%rsp),<z5=%xmm0
-paddd 64(%rsp),%xmm0
-
-# qhasm:   uint32323232 z6 += orig6
-# asm 1: paddd <orig6=stack128#9,<z6=int6464#6
-# asm 2: paddd <orig6=128(%rsp),<z6=%xmm5
-paddd 128(%rsp),%xmm5
-
-# qhasm:   uint32323232 z7 += orig7
-# asm 1: paddd <orig7=stack128#13,<z7=int6464#9
-# asm 2: paddd <orig7=192(%rsp),<z7=%xmm8
-paddd 192(%rsp),%xmm8
-
-# qhasm:   in4 = z4
-# asm 1: movd   <z4=int6464#15,>in4=int64#3
-# asm 2: movd   <z4=%xmm14,>in4=%rdx
-movd   %xmm14,%rdx
-
-# qhasm:   in5 = z5
-# asm 1: movd   <z5=int6464#1,>in5=int64#4
-# asm 2: movd   <z5=%xmm0,>in5=%rcx
-movd   %xmm0,%rcx
-
-# qhasm:   in6 = z6
-# asm 1: movd   <z6=int6464#6,>in6=int64#5
-# asm 2: movd   <z6=%xmm5,>in6=%r8
-movd   %xmm5,%r8
-
-# qhasm:   in7 = z7
-# asm 1: movd   <z7=int6464#9,>in7=int64#6
-# asm 2: movd   <z7=%xmm8,>in7=%r9
-movd   %xmm8,%r9
-
-# qhasm:   z4 <<<= 96
-# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
-# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
-pshufd $0x39,%xmm14,%xmm14
-
-# qhasm:   z5 <<<= 96
-# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
-# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm:   z6 <<<= 96
-# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
-# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
-pshufd $0x39,%xmm5,%xmm5
-
-# qhasm:   z7 <<<= 96
-# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
-# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
-pshufd $0x39,%xmm8,%xmm8
-
-# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 16)
-# asm 1: xorl 16(<m=int64#2),<in4=int64#3d
-# asm 2: xorl 16(<m=%rsi),<in4=%edx
-xorl 16(%rsi),%edx
-
-# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 20)
-# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
-# asm 2: xorl 20(<m=%rsi),<in5=%ecx
-xorl 20(%rsi),%ecx
-
-# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 24)
-# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
-# asm 2: xorl 24(<m=%rsi),<in6=%r8d
-xorl 24(%rsi),%r8d
-
-# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 28)
-# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
-# asm 2: xorl 28(<m=%rsi),<in7=%r9d
-xorl 28(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 16) = in4
-# asm 1: movl   <in4=int64#3d,16(<out=int64#1)
-# asm 2: movl   <in4=%edx,16(<out=%rdi)
-movl   %edx,16(%rdi)
-
-# qhasm:   *(uint32 *) (out + 20) = in5
-# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
-# asm 2: movl   <in5=%ecx,20(<out=%rdi)
-movl   %ecx,20(%rdi)
-
-# qhasm:   *(uint32 *) (out + 24) = in6
-# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
-# asm 2: movl   <in6=%r8d,24(<out=%rdi)
-movl   %r8d,24(%rdi)
-
-# qhasm:   *(uint32 *) (out + 28) = in7
-# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
-# asm 2: movl   <in7=%r9d,28(<out=%rdi)
-movl   %r9d,28(%rdi)
-
-# qhasm:   in4 = z4
-# asm 1: movd   <z4=int6464#15,>in4=int64#3
-# asm 2: movd   <z4=%xmm14,>in4=%rdx
-movd   %xmm14,%rdx
-
-# qhasm:   in5 = z5
-# asm 1: movd   <z5=int6464#1,>in5=int64#4
-# asm 2: movd   <z5=%xmm0,>in5=%rcx
-movd   %xmm0,%rcx
-
-# qhasm:   in6 = z6
-# asm 1: movd   <z6=int6464#6,>in6=int64#5
-# asm 2: movd   <z6=%xmm5,>in6=%r8
-movd   %xmm5,%r8
-
-# qhasm:   in7 = z7
-# asm 1: movd   <z7=int6464#9,>in7=int64#6
-# asm 2: movd   <z7=%xmm8,>in7=%r9
-movd   %xmm8,%r9
-
-# qhasm:   z4 <<<= 96
-# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
-# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
-pshufd $0x39,%xmm14,%xmm14
-
-# qhasm:   z5 <<<= 96
-# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
-# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm:   z6 <<<= 96
-# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
-# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
-pshufd $0x39,%xmm5,%xmm5
-
-# qhasm:   z7 <<<= 96
-# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
-# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
-pshufd $0x39,%xmm8,%xmm8
-
-# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 80)
-# asm 1: xorl 80(<m=int64#2),<in4=int64#3d
-# asm 2: xorl 80(<m=%rsi),<in4=%edx
-xorl 80(%rsi),%edx
-
-# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 84)
-# asm 1: xorl 84(<m=int64#2),<in5=int64#4d
-# asm 2: xorl 84(<m=%rsi),<in5=%ecx
-xorl 84(%rsi),%ecx
-
-# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 88)
-# asm 1: xorl 88(<m=int64#2),<in6=int64#5d
-# asm 2: xorl 88(<m=%rsi),<in6=%r8d
-xorl 88(%rsi),%r8d
-
-# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 92)
-# asm 1: xorl 92(<m=int64#2),<in7=int64#6d
-# asm 2: xorl 92(<m=%rsi),<in7=%r9d
-xorl 92(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 80) = in4
-# asm 1: movl   <in4=int64#3d,80(<out=int64#1)
-# asm 2: movl   <in4=%edx,80(<out=%rdi)
-movl   %edx,80(%rdi)
-
-# qhasm:   *(uint32 *) (out + 84) = in5
-# asm 1: movl   <in5=int64#4d,84(<out=int64#1)
-# asm 2: movl   <in5=%ecx,84(<out=%rdi)
-movl   %ecx,84(%rdi)
-
-# qhasm:   *(uint32 *) (out + 88) = in6
-# asm 1: movl   <in6=int64#5d,88(<out=int64#1)
-# asm 2: movl   <in6=%r8d,88(<out=%rdi)
-movl   %r8d,88(%rdi)
-
-# qhasm:   *(uint32 *) (out + 92) = in7
-# asm 1: movl   <in7=int64#6d,92(<out=int64#1)
-# asm 2: movl   <in7=%r9d,92(<out=%rdi)
-movl   %r9d,92(%rdi)
-
-# qhasm:   in4 = z4
-# asm 1: movd   <z4=int6464#15,>in4=int64#3
-# asm 2: movd   <z4=%xmm14,>in4=%rdx
-movd   %xmm14,%rdx
-
-# qhasm:   in5 = z5
-# asm 1: movd   <z5=int6464#1,>in5=int64#4
-# asm 2: movd   <z5=%xmm0,>in5=%rcx
-movd   %xmm0,%rcx
-
-# qhasm:   in6 = z6
-# asm 1: movd   <z6=int6464#6,>in6=int64#5
-# asm 2: movd   <z6=%xmm5,>in6=%r8
-movd   %xmm5,%r8
-
-# qhasm:   in7 = z7
-# asm 1: movd   <z7=int6464#9,>in7=int64#6
-# asm 2: movd   <z7=%xmm8,>in7=%r9
-movd   %xmm8,%r9
-
-# qhasm:   z4 <<<= 96
-# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
-# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
-pshufd $0x39,%xmm14,%xmm14
-
-# qhasm:   z5 <<<= 96
-# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
-# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm:   z6 <<<= 96
-# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
-# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
-pshufd $0x39,%xmm5,%xmm5
-
-# qhasm:   z7 <<<= 96
-# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
-# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
-pshufd $0x39,%xmm8,%xmm8
-
-# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 144)
-# asm 1: xorl 144(<m=int64#2),<in4=int64#3d
-# asm 2: xorl 144(<m=%rsi),<in4=%edx
-xorl 144(%rsi),%edx
-
-# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 148)
-# asm 1: xorl 148(<m=int64#2),<in5=int64#4d
-# asm 2: xorl 148(<m=%rsi),<in5=%ecx
-xorl 148(%rsi),%ecx
-
-# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 152)
-# asm 1: xorl 152(<m=int64#2),<in6=int64#5d
-# asm 2: xorl 152(<m=%rsi),<in6=%r8d
-xorl 152(%rsi),%r8d
-
-# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 156)
-# asm 1: xorl 156(<m=int64#2),<in7=int64#6d
-# asm 2: xorl 156(<m=%rsi),<in7=%r9d
-xorl 156(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 144) = in4
-# asm 1: movl   <in4=int64#3d,144(<out=int64#1)
-# asm 2: movl   <in4=%edx,144(<out=%rdi)
-movl   %edx,144(%rdi)
-
-# qhasm:   *(uint32 *) (out + 148) = in5
-# asm 1: movl   <in5=int64#4d,148(<out=int64#1)
-# asm 2: movl   <in5=%ecx,148(<out=%rdi)
-movl   %ecx,148(%rdi)
-
-# qhasm:   *(uint32 *) (out + 152) = in6
-# asm 1: movl   <in6=int64#5d,152(<out=int64#1)
-# asm 2: movl   <in6=%r8d,152(<out=%rdi)
-movl   %r8d,152(%rdi)
-
-# qhasm:   *(uint32 *) (out + 156) = in7
-# asm 1: movl   <in7=int64#6d,156(<out=int64#1)
-# asm 2: movl   <in7=%r9d,156(<out=%rdi)
-movl   %r9d,156(%rdi)
-
-# qhasm:   in4 = z4
-# asm 1: movd   <z4=int6464#15,>in4=int64#3
-# asm 2: movd   <z4=%xmm14,>in4=%rdx
-movd   %xmm14,%rdx
-
-# qhasm:   in5 = z5
-# asm 1: movd   <z5=int6464#1,>in5=int64#4
-# asm 2: movd   <z5=%xmm0,>in5=%rcx
-movd   %xmm0,%rcx
-
-# qhasm:   in6 = z6
-# asm 1: movd   <z6=int6464#6,>in6=int64#5
-# asm 2: movd   <z6=%xmm5,>in6=%r8
-movd   %xmm5,%r8
-
-# qhasm:   in7 = z7
-# asm 1: movd   <z7=int6464#9,>in7=int64#6
-# asm 2: movd   <z7=%xmm8,>in7=%r9
-movd   %xmm8,%r9
-
-# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 208)
-# asm 1: xorl 208(<m=int64#2),<in4=int64#3d
-# asm 2: xorl 208(<m=%rsi),<in4=%edx
-xorl 208(%rsi),%edx
-
-# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 212)
-# asm 1: xorl 212(<m=int64#2),<in5=int64#4d
-# asm 2: xorl 212(<m=%rsi),<in5=%ecx
-xorl 212(%rsi),%ecx
-
-# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 216)
-# asm 1: xorl 216(<m=int64#2),<in6=int64#5d
-# asm 2: xorl 216(<m=%rsi),<in6=%r8d
-xorl 216(%rsi),%r8d
-
-# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 220)
-# asm 1: xorl 220(<m=int64#2),<in7=int64#6d
-# asm 2: xorl 220(<m=%rsi),<in7=%r9d
-xorl 220(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 208) = in4
-# asm 1: movl   <in4=int64#3d,208(<out=int64#1)
-# asm 2: movl   <in4=%edx,208(<out=%rdi)
-movl   %edx,208(%rdi)
-
-# qhasm:   *(uint32 *) (out + 212) = in5
-# asm 1: movl   <in5=int64#4d,212(<out=int64#1)
-# asm 2: movl   <in5=%ecx,212(<out=%rdi)
-movl   %ecx,212(%rdi)
-
-# qhasm:   *(uint32 *) (out + 216) = in6
-# asm 1: movl   <in6=int64#5d,216(<out=int64#1)
-# asm 2: movl   <in6=%r8d,216(<out=%rdi)
-movl   %r8d,216(%rdi)
-
-# qhasm:   *(uint32 *) (out + 220) = in7
-# asm 1: movl   <in7=int64#6d,220(<out=int64#1)
-# asm 2: movl   <in7=%r9d,220(<out=%rdi)
-movl   %r9d,220(%rdi)
-
-# qhasm:   uint32323232 z8 += orig8
-# asm 1: paddd <orig8=stack128#19,<z8=int6464#16
-# asm 2: paddd <orig8=288(%rsp),<z8=%xmm15
-paddd 288(%rsp),%xmm15
-
-# qhasm:   uint32323232 z9 += orig9
-# asm 1: paddd <orig9=stack128#20,<z9=int6464#12
-# asm 2: paddd <orig9=304(%rsp),<z9=%xmm11
-paddd 304(%rsp),%xmm11
-
-# qhasm:   uint32323232 z10 += orig10
-# asm 1: paddd <orig10=stack128#6,<z10=int6464#2
-# asm 2: paddd <orig10=80(%rsp),<z10=%xmm1
-paddd 80(%rsp),%xmm1
-
-# qhasm:   uint32323232 z11 += orig11
-# asm 1: paddd <orig11=stack128#10,<z11=int6464#7
-# asm 2: paddd <orig11=144(%rsp),<z11=%xmm6
-paddd 144(%rsp),%xmm6
-
-# qhasm:   in8 = z8
-# asm 1: movd   <z8=int6464#16,>in8=int64#3
-# asm 2: movd   <z8=%xmm15,>in8=%rdx
-movd   %xmm15,%rdx
-
-# qhasm:   in9 = z9
-# asm 1: movd   <z9=int6464#12,>in9=int64#4
-# asm 2: movd   <z9=%xmm11,>in9=%rcx
-movd   %xmm11,%rcx
-
-# qhasm:   in10 = z10
-# asm 1: movd   <z10=int6464#2,>in10=int64#5
-# asm 2: movd   <z10=%xmm1,>in10=%r8
-movd   %xmm1,%r8
-
-# qhasm:   in11 = z11
-# asm 1: movd   <z11=int6464#7,>in11=int64#6
-# asm 2: movd   <z11=%xmm6,>in11=%r9
-movd   %xmm6,%r9
-
-# qhasm:   z8 <<<= 96
-# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
-# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
-pshufd $0x39,%xmm15,%xmm15
-
-# qhasm:   z9 <<<= 96
-# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
-# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
-pshufd $0x39,%xmm11,%xmm11
-
-# qhasm:   z10 <<<= 96
-# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
-# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm:   z11 <<<= 96
-# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
-# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
-pshufd $0x39,%xmm6,%xmm6
-
-# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 32)
-# asm 1: xorl 32(<m=int64#2),<in8=int64#3d
-# asm 2: xorl 32(<m=%rsi),<in8=%edx
-xorl 32(%rsi),%edx
-
-# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 36)
-# asm 1: xorl 36(<m=int64#2),<in9=int64#4d
-# asm 2: xorl 36(<m=%rsi),<in9=%ecx
-xorl 36(%rsi),%ecx
-
-# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 40)
-# asm 1: xorl 40(<m=int64#2),<in10=int64#5d
-# asm 2: xorl 40(<m=%rsi),<in10=%r8d
-xorl 40(%rsi),%r8d
-
-# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 44)
-# asm 1: xorl 44(<m=int64#2),<in11=int64#6d
-# asm 2: xorl 44(<m=%rsi),<in11=%r9d
-xorl 44(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 32) = in8
-# asm 1: movl   <in8=int64#3d,32(<out=int64#1)
-# asm 2: movl   <in8=%edx,32(<out=%rdi)
-movl   %edx,32(%rdi)
-
-# qhasm:   *(uint32 *) (out + 36) = in9
-# asm 1: movl   <in9=int64#4d,36(<out=int64#1)
-# asm 2: movl   <in9=%ecx,36(<out=%rdi)
-movl   %ecx,36(%rdi)
-
-# qhasm:   *(uint32 *) (out + 40) = in10
-# asm 1: movl   <in10=int64#5d,40(<out=int64#1)
-# asm 2: movl   <in10=%r8d,40(<out=%rdi)
-movl   %r8d,40(%rdi)
-
-# qhasm:   *(uint32 *) (out + 44) = in11
-# asm 1: movl   <in11=int64#6d,44(<out=int64#1)
-# asm 2: movl   <in11=%r9d,44(<out=%rdi)
-movl   %r9d,44(%rdi)
-
-# qhasm:   in8 = z8
-# asm 1: movd   <z8=int6464#16,>in8=int64#3
-# asm 2: movd   <z8=%xmm15,>in8=%rdx
-movd   %xmm15,%rdx
-
-# qhasm:   in9 = z9
-# asm 1: movd   <z9=int6464#12,>in9=int64#4
-# asm 2: movd   <z9=%xmm11,>in9=%rcx
-movd   %xmm11,%rcx
-
-# qhasm:   in10 = z10
-# asm 1: movd   <z10=int6464#2,>in10=int64#5
-# asm 2: movd   <z10=%xmm1,>in10=%r8
-movd   %xmm1,%r8
-
-# qhasm:   in11 = z11
-# asm 1: movd   <z11=int6464#7,>in11=int64#6
-# asm 2: movd   <z11=%xmm6,>in11=%r9
-movd   %xmm6,%r9
-
-# qhasm:   z8 <<<= 96
-# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
-# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
-pshufd $0x39,%xmm15,%xmm15
-
-# qhasm:   z9 <<<= 96
-# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
-# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
-pshufd $0x39,%xmm11,%xmm11
-
-# qhasm:   z10 <<<= 96
-# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
-# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm:   z11 <<<= 96
-# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
-# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
-pshufd $0x39,%xmm6,%xmm6
-
-# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 96)
-# asm 1: xorl 96(<m=int64#2),<in8=int64#3d
-# asm 2: xorl 96(<m=%rsi),<in8=%edx
-xorl 96(%rsi),%edx
-
-# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 100)
-# asm 1: xorl 100(<m=int64#2),<in9=int64#4d
-# asm 2: xorl 100(<m=%rsi),<in9=%ecx
-xorl 100(%rsi),%ecx
-
-# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 104)
-# asm 1: xorl 104(<m=int64#2),<in10=int64#5d
-# asm 2: xorl 104(<m=%rsi),<in10=%r8d
-xorl 104(%rsi),%r8d
-
-# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 108)
-# asm 1: xorl 108(<m=int64#2),<in11=int64#6d
-# asm 2: xorl 108(<m=%rsi),<in11=%r9d
-xorl 108(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 96) = in8
-# asm 1: movl   <in8=int64#3d,96(<out=int64#1)
-# asm 2: movl   <in8=%edx,96(<out=%rdi)
-movl   %edx,96(%rdi)
-
-# qhasm:   *(uint32 *) (out + 100) = in9
-# asm 1: movl   <in9=int64#4d,100(<out=int64#1)
-# asm 2: movl   <in9=%ecx,100(<out=%rdi)
-movl   %ecx,100(%rdi)
-
-# qhasm:   *(uint32 *) (out + 104) = in10
-# asm 1: movl   <in10=int64#5d,104(<out=int64#1)
-# asm 2: movl   <in10=%r8d,104(<out=%rdi)
-movl   %r8d,104(%rdi)
-
-# qhasm:   *(uint32 *) (out + 108) = in11
-# asm 1: movl   <in11=int64#6d,108(<out=int64#1)
-# asm 2: movl   <in11=%r9d,108(<out=%rdi)
-movl   %r9d,108(%rdi)
-
-# qhasm:   in8 = z8
-# asm 1: movd   <z8=int6464#16,>in8=int64#3
-# asm 2: movd   <z8=%xmm15,>in8=%rdx
-movd   %xmm15,%rdx
-
-# qhasm:   in9 = z9
-# asm 1: movd   <z9=int6464#12,>in9=int64#4
-# asm 2: movd   <z9=%xmm11,>in9=%rcx
-movd   %xmm11,%rcx
-
-# qhasm:   in10 = z10
-# asm 1: movd   <z10=int6464#2,>in10=int64#5
-# asm 2: movd   <z10=%xmm1,>in10=%r8
-movd   %xmm1,%r8
-
-# qhasm:   in11 = z11
-# asm 1: movd   <z11=int6464#7,>in11=int64#6
-# asm 2: movd   <z11=%xmm6,>in11=%r9
-movd   %xmm6,%r9
-
-# qhasm:   z8 <<<= 96
-# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
-# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
-pshufd $0x39,%xmm15,%xmm15
-
-# qhasm:   z9 <<<= 96
-# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
-# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
-pshufd $0x39,%xmm11,%xmm11
-
-# qhasm:   z10 <<<= 96
-# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
-# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm:   z11 <<<= 96
-# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
-# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
-pshufd $0x39,%xmm6,%xmm6
-
-# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 160)
-# asm 1: xorl 160(<m=int64#2),<in8=int64#3d
-# asm 2: xorl 160(<m=%rsi),<in8=%edx
-xorl 160(%rsi),%edx
-
-# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 164)
-# asm 1: xorl 164(<m=int64#2),<in9=int64#4d
-# asm 2: xorl 164(<m=%rsi),<in9=%ecx
-xorl 164(%rsi),%ecx
-
-# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 168)
-# asm 1: xorl 168(<m=int64#2),<in10=int64#5d
-# asm 2: xorl 168(<m=%rsi),<in10=%r8d
-xorl 168(%rsi),%r8d
-
-# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 172)
-# asm 1: xorl 172(<m=int64#2),<in11=int64#6d
-# asm 2: xorl 172(<m=%rsi),<in11=%r9d
-xorl 172(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 160) = in8
-# asm 1: movl   <in8=int64#3d,160(<out=int64#1)
-# asm 2: movl   <in8=%edx,160(<out=%rdi)
-movl   %edx,160(%rdi)
-
-# qhasm:   *(uint32 *) (out + 164) = in9
-# asm 1: movl   <in9=int64#4d,164(<out=int64#1)
-# asm 2: movl   <in9=%ecx,164(<out=%rdi)
-movl   %ecx,164(%rdi)
-
-# qhasm:   *(uint32 *) (out + 168) = in10
-# asm 1: movl   <in10=int64#5d,168(<out=int64#1)
-# asm 2: movl   <in10=%r8d,168(<out=%rdi)
-movl   %r8d,168(%rdi)
-
-# qhasm:   *(uint32 *) (out + 172) = in11
-# asm 1: movl   <in11=int64#6d,172(<out=int64#1)
-# asm 2: movl   <in11=%r9d,172(<out=%rdi)
-movl   %r9d,172(%rdi)
-
-# qhasm:   in8 = z8
-# asm 1: movd   <z8=int6464#16,>in8=int64#3
-# asm 2: movd   <z8=%xmm15,>in8=%rdx
-movd   %xmm15,%rdx
-
-# qhasm:   in9 = z9
-# asm 1: movd   <z9=int6464#12,>in9=int64#4
-# asm 2: movd   <z9=%xmm11,>in9=%rcx
-movd   %xmm11,%rcx
-
-# qhasm:   in10 = z10
-# asm 1: movd   <z10=int6464#2,>in10=int64#5
-# asm 2: movd   <z10=%xmm1,>in10=%r8
-movd   %xmm1,%r8
-
-# qhasm:   in11 = z11
-# asm 1: movd   <z11=int6464#7,>in11=int64#6
-# asm 2: movd   <z11=%xmm6,>in11=%r9
-movd   %xmm6,%r9
-
-# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 224)
-# asm 1: xorl 224(<m=int64#2),<in8=int64#3d
-# asm 2: xorl 224(<m=%rsi),<in8=%edx
-xorl 224(%rsi),%edx
-
-# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 228)
-# asm 1: xorl 228(<m=int64#2),<in9=int64#4d
-# asm 2: xorl 228(<m=%rsi),<in9=%ecx
-xorl 228(%rsi),%ecx
-
-# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 232)
-# asm 1: xorl 232(<m=int64#2),<in10=int64#5d
-# asm 2: xorl 232(<m=%rsi),<in10=%r8d
-xorl 232(%rsi),%r8d
-
-# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 236)
-# asm 1: xorl 236(<m=int64#2),<in11=int64#6d
-# asm 2: xorl 236(<m=%rsi),<in11=%r9d
-xorl 236(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 224) = in8
-# asm 1: movl   <in8=int64#3d,224(<out=int64#1)
-# asm 2: movl   <in8=%edx,224(<out=%rdi)
-movl   %edx,224(%rdi)
-
-# qhasm:   *(uint32 *) (out + 228) = in9
-# asm 1: movl   <in9=int64#4d,228(<out=int64#1)
-# asm 2: movl   <in9=%ecx,228(<out=%rdi)
-movl   %ecx,228(%rdi)
-
-# qhasm:   *(uint32 *) (out + 232) = in10
-# asm 1: movl   <in10=int64#5d,232(<out=int64#1)
-# asm 2: movl   <in10=%r8d,232(<out=%rdi)
-movl   %r8d,232(%rdi)
-
-# qhasm:   *(uint32 *) (out + 236) = in11
-# asm 1: movl   <in11=int64#6d,236(<out=int64#1)
-# asm 2: movl   <in11=%r9d,236(<out=%rdi)
-movl   %r9d,236(%rdi)
-
-# qhasm:   uint32323232 z12 += orig12
-# asm 1: paddd <orig12=stack128#11,<z12=int6464#14
-# asm 2: paddd <orig12=160(%rsp),<z12=%xmm13
-paddd 160(%rsp),%xmm13
-
-# qhasm:   uint32323232 z13 += orig13
-# asm 1: paddd <orig13=stack128#14,<z13=int6464#10
-# asm 2: paddd <orig13=208(%rsp),<z13=%xmm9
-paddd 208(%rsp),%xmm9
-
-# qhasm:   uint32323232 z14 += orig14
-# asm 1: paddd <orig14=stack128#17,<z14=int6464#4
-# asm 2: paddd <orig14=256(%rsp),<z14=%xmm3
-paddd 256(%rsp),%xmm3
-
-# qhasm:   uint32323232 z15 += orig15
-# asm 1: paddd <orig15=stack128#7,<z15=int6464#3
-# asm 2: paddd <orig15=96(%rsp),<z15=%xmm2
-paddd 96(%rsp),%xmm2
-
-# qhasm:   in12 = z12
-# asm 1: movd   <z12=int6464#14,>in12=int64#3
-# asm 2: movd   <z12=%xmm13,>in12=%rdx
-movd   %xmm13,%rdx
-
-# qhasm:   in13 = z13
-# asm 1: movd   <z13=int6464#10,>in13=int64#4
-# asm 2: movd   <z13=%xmm9,>in13=%rcx
-movd   %xmm9,%rcx
-
-# qhasm:   in14 = z14
-# asm 1: movd   <z14=int6464#4,>in14=int64#5
-# asm 2: movd   <z14=%xmm3,>in14=%r8
-movd   %xmm3,%r8
-
-# qhasm:   in15 = z15
-# asm 1: movd   <z15=int6464#3,>in15=int64#6
-# asm 2: movd   <z15=%xmm2,>in15=%r9
-movd   %xmm2,%r9
-
-# qhasm:   z12 <<<= 96
-# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
-# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
-pshufd $0x39,%xmm13,%xmm13
-
-# qhasm:   z13 <<<= 96
-# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
-# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
-pshufd $0x39,%xmm9,%xmm9
-
-# qhasm:   z14 <<<= 96
-# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
-# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm:   z15 <<<= 96
-# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
-# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 48)
-# asm 1: xorl 48(<m=int64#2),<in12=int64#3d
-# asm 2: xorl 48(<m=%rsi),<in12=%edx
-xorl 48(%rsi),%edx
-
-# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 52)
-# asm 1: xorl 52(<m=int64#2),<in13=int64#4d
-# asm 2: xorl 52(<m=%rsi),<in13=%ecx
-xorl 52(%rsi),%ecx
-
-# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 56)
-# asm 1: xorl 56(<m=int64#2),<in14=int64#5d
-# asm 2: xorl 56(<m=%rsi),<in14=%r8d
-xorl 56(%rsi),%r8d
-
-# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 60)
-# asm 1: xorl 60(<m=int64#2),<in15=int64#6d
-# asm 2: xorl 60(<m=%rsi),<in15=%r9d
-xorl 60(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 48) = in12
-# asm 1: movl   <in12=int64#3d,48(<out=int64#1)
-# asm 2: movl   <in12=%edx,48(<out=%rdi)
-movl   %edx,48(%rdi)
-
-# qhasm:   *(uint32 *) (out + 52) = in13
-# asm 1: movl   <in13=int64#4d,52(<out=int64#1)
-# asm 2: movl   <in13=%ecx,52(<out=%rdi)
-movl   %ecx,52(%rdi)
-
-# qhasm:   *(uint32 *) (out + 56) = in14
-# asm 1: movl   <in14=int64#5d,56(<out=int64#1)
-# asm 2: movl   <in14=%r8d,56(<out=%rdi)
-movl   %r8d,56(%rdi)
-
-# qhasm:   *(uint32 *) (out + 60) = in15
-# asm 1: movl   <in15=int64#6d,60(<out=int64#1)
-# asm 2: movl   <in15=%r9d,60(<out=%rdi)
-movl   %r9d,60(%rdi)
-
-# qhasm:   in12 = z12
-# asm 1: movd   <z12=int6464#14,>in12=int64#3
-# asm 2: movd   <z12=%xmm13,>in12=%rdx
-movd   %xmm13,%rdx
-
-# qhasm:   in13 = z13
-# asm 1: movd   <z13=int6464#10,>in13=int64#4
-# asm 2: movd   <z13=%xmm9,>in13=%rcx
-movd   %xmm9,%rcx
-
-# qhasm:   in14 = z14
-# asm 1: movd   <z14=int6464#4,>in14=int64#5
-# asm 2: movd   <z14=%xmm3,>in14=%r8
-movd   %xmm3,%r8
-
-# qhasm:   in15 = z15
-# asm 1: movd   <z15=int6464#3,>in15=int64#6
-# asm 2: movd   <z15=%xmm2,>in15=%r9
-movd   %xmm2,%r9
-
-# qhasm:   z12 <<<= 96
-# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
-# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
-pshufd $0x39,%xmm13,%xmm13
-
-# qhasm:   z13 <<<= 96
-# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
-# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
-pshufd $0x39,%xmm9,%xmm9
-
-# qhasm:   z14 <<<= 96
-# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
-# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm:   z15 <<<= 96
-# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
-# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 112)
-# asm 1: xorl 112(<m=int64#2),<in12=int64#3d
-# asm 2: xorl 112(<m=%rsi),<in12=%edx
-xorl 112(%rsi),%edx
-
-# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 116)
-# asm 1: xorl 116(<m=int64#2),<in13=int64#4d
-# asm 2: xorl 116(<m=%rsi),<in13=%ecx
-xorl 116(%rsi),%ecx
-
-# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 120)
-# asm 1: xorl 120(<m=int64#2),<in14=int64#5d
-# asm 2: xorl 120(<m=%rsi),<in14=%r8d
-xorl 120(%rsi),%r8d
-
-# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 124)
-# asm 1: xorl 124(<m=int64#2),<in15=int64#6d
-# asm 2: xorl 124(<m=%rsi),<in15=%r9d
-xorl 124(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 112) = in12
-# asm 1: movl   <in12=int64#3d,112(<out=int64#1)
-# asm 2: movl   <in12=%edx,112(<out=%rdi)
-movl   %edx,112(%rdi)
-
-# qhasm:   *(uint32 *) (out + 116) = in13
-# asm 1: movl   <in13=int64#4d,116(<out=int64#1)
-# asm 2: movl   <in13=%ecx,116(<out=%rdi)
-movl   %ecx,116(%rdi)
-
-# qhasm:   *(uint32 *) (out + 120) = in14
-# asm 1: movl   <in14=int64#5d,120(<out=int64#1)
-# asm 2: movl   <in14=%r8d,120(<out=%rdi)
-movl   %r8d,120(%rdi)
-
-# qhasm:   *(uint32 *) (out + 124) = in15
-# asm 1: movl   <in15=int64#6d,124(<out=int64#1)
-# asm 2: movl   <in15=%r9d,124(<out=%rdi)
-movl   %r9d,124(%rdi)
-
-# qhasm:   in12 = z12
-# asm 1: movd   <z12=int6464#14,>in12=int64#3
-# asm 2: movd   <z12=%xmm13,>in12=%rdx
-movd   %xmm13,%rdx
-
-# qhasm:   in13 = z13
-# asm 1: movd   <z13=int6464#10,>in13=int64#4
-# asm 2: movd   <z13=%xmm9,>in13=%rcx
-movd   %xmm9,%rcx
-
-# qhasm:   in14 = z14
-# asm 1: movd   <z14=int6464#4,>in14=int64#5
-# asm 2: movd   <z14=%xmm3,>in14=%r8
-movd   %xmm3,%r8
-
-# qhasm:   in15 = z15
-# asm 1: movd   <z15=int6464#3,>in15=int64#6
-# asm 2: movd   <z15=%xmm2,>in15=%r9
-movd   %xmm2,%r9
-
-# qhasm:   z12 <<<= 96
-# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
-# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
-pshufd $0x39,%xmm13,%xmm13
-
-# qhasm:   z13 <<<= 96
-# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
-# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
-pshufd $0x39,%xmm9,%xmm9
-
-# qhasm:   z14 <<<= 96
-# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
-# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm:   z15 <<<= 96
-# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
-# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 176)
-# asm 1: xorl 176(<m=int64#2),<in12=int64#3d
-# asm 2: xorl 176(<m=%rsi),<in12=%edx
-xorl 176(%rsi),%edx
-
-# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 180)
-# asm 1: xorl 180(<m=int64#2),<in13=int64#4d
-# asm 2: xorl 180(<m=%rsi),<in13=%ecx
-xorl 180(%rsi),%ecx
-
-# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 184)
-# asm 1: xorl 184(<m=int64#2),<in14=int64#5d
-# asm 2: xorl 184(<m=%rsi),<in14=%r8d
-xorl 184(%rsi),%r8d
-
-# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 188)
-# asm 1: xorl 188(<m=int64#2),<in15=int64#6d
-# asm 2: xorl 188(<m=%rsi),<in15=%r9d
-xorl 188(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 176) = in12
-# asm 1: movl   <in12=int64#3d,176(<out=int64#1)
-# asm 2: movl   <in12=%edx,176(<out=%rdi)
-movl   %edx,176(%rdi)
-
-# qhasm:   *(uint32 *) (out + 180) = in13
-# asm 1: movl   <in13=int64#4d,180(<out=int64#1)
-# asm 2: movl   <in13=%ecx,180(<out=%rdi)
-movl   %ecx,180(%rdi)
-
-# qhasm:   *(uint32 *) (out + 184) = in14
-# asm 1: movl   <in14=int64#5d,184(<out=int64#1)
-# asm 2: movl   <in14=%r8d,184(<out=%rdi)
-movl   %r8d,184(%rdi)
-
-# qhasm:   *(uint32 *) (out + 188) = in15
-# asm 1: movl   <in15=int64#6d,188(<out=int64#1)
-# asm 2: movl   <in15=%r9d,188(<out=%rdi)
-movl   %r9d,188(%rdi)
-
-# qhasm:   in12 = z12
-# asm 1: movd   <z12=int6464#14,>in12=int64#3
-# asm 2: movd   <z12=%xmm13,>in12=%rdx
-movd   %xmm13,%rdx
-
-# qhasm:   in13 = z13
-# asm 1: movd   <z13=int6464#10,>in13=int64#4
-# asm 2: movd   <z13=%xmm9,>in13=%rcx
-movd   %xmm9,%rcx
-
-# qhasm:   in14 = z14
-# asm 1: movd   <z14=int6464#4,>in14=int64#5
-# asm 2: movd   <z14=%xmm3,>in14=%r8
-movd   %xmm3,%r8
-
-# qhasm:   in15 = z15
-# asm 1: movd   <z15=int6464#3,>in15=int64#6
-# asm 2: movd   <z15=%xmm2,>in15=%r9
-movd   %xmm2,%r9
-
-# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 240)
-# asm 1: xorl 240(<m=int64#2),<in12=int64#3d
-# asm 2: xorl 240(<m=%rsi),<in12=%edx
-xorl 240(%rsi),%edx
-
-# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 244)
-# asm 1: xorl 244(<m=int64#2),<in13=int64#4d
-# asm 2: xorl 244(<m=%rsi),<in13=%ecx
-xorl 244(%rsi),%ecx
-
-# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 248)
-# asm 1: xorl 248(<m=int64#2),<in14=int64#5d
-# asm 2: xorl 248(<m=%rsi),<in14=%r8d
-xorl 248(%rsi),%r8d
-
-# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 252)
-# asm 1: xorl 252(<m=int64#2),<in15=int64#6d
-# asm 2: xorl 252(<m=%rsi),<in15=%r9d
-xorl 252(%rsi),%r9d
-
-# qhasm:   *(uint32 *) (out + 240) = in12
-# asm 1: movl   <in12=int64#3d,240(<out=int64#1)
-# asm 2: movl   <in12=%edx,240(<out=%rdi)
-movl   %edx,240(%rdi)
-
-# qhasm:   *(uint32 *) (out + 244) = in13
-# asm 1: movl   <in13=int64#4d,244(<out=int64#1)
-# asm 2: movl   <in13=%ecx,244(<out=%rdi)
-movl   %ecx,244(%rdi)
-
-# qhasm:   *(uint32 *) (out + 248) = in14
-# asm 1: movl   <in14=int64#5d,248(<out=int64#1)
-# asm 2: movl   <in14=%r8d,248(<out=%rdi)
-movl   %r8d,248(%rdi)
-
-# qhasm:   *(uint32 *) (out + 252) = in15
-# asm 1: movl   <in15=int64#6d,252(<out=int64#1)
-# asm 2: movl   <in15=%r9d,252(<out=%rdi)
-movl   %r9d,252(%rdi)
-
-# qhasm:   bytes = bytes_backup
-# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
-# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
-movq 408(%rsp),%r9
-
-# qhasm:   bytes -= 256
-# asm 1: sub  $256,<bytes=int64#6
-# asm 2: sub  $256,<bytes=%r9
-sub  $256,%r9
-
-# qhasm:   m += 256
-# asm 1: add  $256,<m=int64#2
-# asm 2: add  $256,<m=%rsi
-add  $256,%rsi
-
-# qhasm:   out += 256
-# asm 1: add  $256,<out=int64#1
-# asm 2: add  $256,<out=%rdi
-add  $256,%rdi
-
-# qhasm:                            unsigned<? bytes - 256
-# asm 1: cmp  $256,<bytes=int64#6
-# asm 2: cmp  $256,<bytes=%r9
-cmp  $256,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm:   goto bytesatleast256 if !unsigned<
-jae ._bytesatleast256
-
-# qhasm:                 unsigned>? bytes - 0
-# asm 1: cmp  $0,<bytes=int64#6
-# asm 2: cmp  $0,<bytes=%r9
-cmp  $0,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm:   goto done if !unsigned>
-jbe ._done
-# comment:fp stack unchanged by fallthrough
-
-# qhasm: bytesbetween1and255:
-._bytesbetween1and255:
-
-# qhasm:                   unsigned<? bytes - 64
-# asm 1: cmp  $64,<bytes=int64#6
-# asm 2: cmp  $64,<bytes=%r9
-cmp  $64,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm:   goto nocopy if !unsigned<
-jae ._nocopy
-
-# qhasm:     ctarget = out
-# asm 1: mov  <out=int64#1,>ctarget=int64#3
-# asm 2: mov  <out=%rdi,>ctarget=%rdx
-mov  %rdi,%rdx
-
-# qhasm:     out = &tmp
-# asm 1: leaq <tmp=stack512#1,>out=int64#1
-# asm 2: leaq <tmp=416(%rsp),>out=%rdi
-leaq 416(%rsp),%rdi
-
-# qhasm:     i = bytes
-# asm 1: mov  <bytes=int64#6,>i=int64#4
-# asm 2: mov  <bytes=%r9,>i=%rcx
-mov  %r9,%rcx
-
-# qhasm:     while (i) { *out++ = *m++; --i }
-rep movsb
-
-# qhasm:     out = &tmp
-# asm 1: leaq <tmp=stack512#1,>out=int64#1
-# asm 2: leaq <tmp=416(%rsp),>out=%rdi
-leaq 416(%rsp),%rdi
-
-# qhasm:     m = &tmp
-# asm 1: leaq <tmp=stack512#1,>m=int64#2
-# asm 2: leaq <tmp=416(%rsp),>m=%rsi
-leaq 416(%rsp),%rsi
-# comment:fp stack unchanged by fallthrough
-
-# qhasm:   nocopy:
-._nocopy:
-
-# qhasm:   bytes_backup = bytes
-# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
-# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
-movq %r9,408(%rsp)
-
-# qhasm: diag0 = x0
-# asm 1: movdqa <x0=stack128#4,>diag0=int6464#1
-# asm 2: movdqa <x0=48(%rsp),>diag0=%xmm0
-movdqa 48(%rsp),%xmm0
-
-# qhasm: diag1 = x1
-# asm 1: movdqa <x1=stack128#1,>diag1=int6464#2
-# asm 2: movdqa <x1=0(%rsp),>diag1=%xmm1
-movdqa 0(%rsp),%xmm1
-
-# qhasm: diag2 = x2
-# asm 1: movdqa <x2=stack128#2,>diag2=int6464#3
-# asm 2: movdqa <x2=16(%rsp),>diag2=%xmm2
-movdqa 16(%rsp),%xmm2
-
-# qhasm: diag3 = x3
-# asm 1: movdqa <x3=stack128#3,>diag3=int6464#4
-# asm 2: movdqa <x3=32(%rsp),>diag3=%xmm3
-movdqa 32(%rsp),%xmm3
-
-# qhasm:                     a0 = diag1
-# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
-# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
-movdqa %xmm1,%xmm4
-
-# qhasm: i = 12
-# asm 1: mov  $12,>i=int64#4
-# asm 2: mov  $12,>i=%rcx
-mov  $12,%rcx
-
-# qhasm: mainloop2:
-._mainloop2:
-
-# qhasm: uint32323232        a0 += diag0
-# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
-# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
-paddd %xmm0,%xmm4
-
-# qhasm:                                 a1 = diag0
-# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
-# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
-movdqa %xmm0,%xmm5
-
-# qhasm:                     b0 = a0
-# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
-# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232        a0 <<= 7
-# asm 1: pslld $7,<a0=int6464#5
-# asm 2: pslld $7,<a0=%xmm4
-pslld $7,%xmm4
-
-# qhasm: uint32323232        b0 >>= 25
-# asm 1: psrld $25,<b0=int6464#7
-# asm 2: psrld $25,<b0=%xmm6
-psrld $25,%xmm6
-
-# qhasm:                 diag3 ^= a0
-# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
-# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
-pxor  %xmm4,%xmm3
-
-# qhasm:                 diag3 ^= b0
-# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
-# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
-pxor  %xmm6,%xmm3
-
-# qhasm: uint32323232                        a1 += diag3
-# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
-# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
-paddd %xmm3,%xmm5
-
-# qhasm:                                                 a2 = diag3
-# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
-# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
-movdqa %xmm3,%xmm4
-
-# qhasm:                                     b1 = a1
-# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
-# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                        a1 <<= 9
-# asm 1: pslld $9,<a1=int6464#6
-# asm 2: pslld $9,<a1=%xmm5
-pslld $9,%xmm5
-
-# qhasm: uint32323232                        b1 >>= 23
-# asm 1: psrld $23,<b1=int6464#7
-# asm 2: psrld $23,<b1=%xmm6
-psrld $23,%xmm6
-
-# qhasm:                                 diag2 ^= a1
-# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
-# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
-pxor  %xmm5,%xmm2
-
-# qhasm:                 diag3 <<<= 32
-# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x93,%xmm3,%xmm3
-
-# qhasm:                                 diag2 ^= b1
-# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
-# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
-pxor  %xmm6,%xmm2
-
-# qhasm: uint32323232                                        a2 += diag2
-# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
-# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
-paddd %xmm2,%xmm4
-
-# qhasm:                                                                 a3 = diag2
-# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
-# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
-movdqa %xmm2,%xmm5
-
-# qhasm:                                                     b2 = a2
-# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
-# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232                                        a2 <<= 13
-# asm 1: pslld $13,<a2=int6464#5
-# asm 2: pslld $13,<a2=%xmm4
-pslld $13,%xmm4
-
-# qhasm: uint32323232                                        b2 >>= 19
-# asm 1: psrld $19,<b2=int6464#7
-# asm 2: psrld $19,<b2=%xmm6
-psrld $19,%xmm6
-
-# qhasm:                                                 diag1 ^= a2
-# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
-# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
-pxor  %xmm4,%xmm1
-
-# qhasm:                                 diag2 <<<= 64
-# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x4e,%xmm2,%xmm2
-
-# qhasm:                                                 diag1 ^= b2
-# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
-# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
-pxor  %xmm6,%xmm1
-
-# qhasm: uint32323232                                                        a3 += diag1
-# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
-# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
-paddd %xmm1,%xmm5
-
-# qhasm:                 a4 = diag3
-# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
-# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
-movdqa %xmm3,%xmm4
-
-# qhasm:                                                                     b3 = a3
-# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
-# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                                                        a3 <<= 18
-# asm 1: pslld $18,<a3=int6464#6
-# asm 2: pslld $18,<a3=%xmm5
-pslld $18,%xmm5
-
-# qhasm: uint32323232                                                        b3 >>= 14
-# asm 1: psrld $14,<b3=int6464#7
-# asm 2: psrld $14,<b3=%xmm6
-psrld $14,%xmm6
-
-# qhasm:                                                                 diag0 ^= a3
-# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
-# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
-pxor  %xmm5,%xmm0
-
-# qhasm:                                                 diag1 <<<= 96
-# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm:                                                                 diag0 ^= b3
-# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
-# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
-pxor  %xmm6,%xmm0
-
-# qhasm: uint32323232        a4 += diag0
-# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
-# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
-paddd %xmm0,%xmm4
-
-# qhasm:                                 a5 = diag0
-# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
-# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
-movdqa %xmm0,%xmm5
-
-# qhasm:                     b4 = a4
-# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
-# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232        a4 <<= 7
-# asm 1: pslld $7,<a4=int6464#5
-# asm 2: pslld $7,<a4=%xmm4
-pslld $7,%xmm4
-
-# qhasm: uint32323232        b4 >>= 25
-# asm 1: psrld $25,<b4=int6464#7
-# asm 2: psrld $25,<b4=%xmm6
-psrld $25,%xmm6
-
-# qhasm:                 diag1 ^= a4
-# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
-# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
-pxor  %xmm4,%xmm1
-
-# qhasm:                 diag1 ^= b4
-# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
-# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
-pxor  %xmm6,%xmm1
-
-# qhasm: uint32323232                        a5 += diag1
-# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
-# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
-paddd %xmm1,%xmm5
-
-# qhasm:                                                 a6 = diag1
-# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
-# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
-movdqa %xmm1,%xmm4
-
-# qhasm:                                     b5 = a5
-# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
-# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                        a5 <<= 9
-# asm 1: pslld $9,<a5=int6464#6
-# asm 2: pslld $9,<a5=%xmm5
-pslld $9,%xmm5
-
-# qhasm: uint32323232                        b5 >>= 23
-# asm 1: psrld $23,<b5=int6464#7
-# asm 2: psrld $23,<b5=%xmm6
-psrld $23,%xmm6
-
-# qhasm:                                 diag2 ^= a5
-# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
-# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
-pxor  %xmm5,%xmm2
-
-# qhasm:                 diag1 <<<= 32
-# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x93,%xmm1,%xmm1
-
-# qhasm:                                 diag2 ^= b5
-# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
-# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
-pxor  %xmm6,%xmm2
-
-# qhasm: uint32323232                                        a6 += diag2
-# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
-# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
-paddd %xmm2,%xmm4
-
-# qhasm:                                                                 a7 = diag2
-# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
-# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
-movdqa %xmm2,%xmm5
-
-# qhasm:                                                     b6 = a6
-# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
-# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232                                        a6 <<= 13
-# asm 1: pslld $13,<a6=int6464#5
-# asm 2: pslld $13,<a6=%xmm4
-pslld $13,%xmm4
-
-# qhasm: uint32323232                                        b6 >>= 19
-# asm 1: psrld $19,<b6=int6464#7
-# asm 2: psrld $19,<b6=%xmm6
-psrld $19,%xmm6
-
-# qhasm:                                                 diag3 ^= a6
-# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
-# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
-pxor  %xmm4,%xmm3
-
-# qhasm:                                 diag2 <<<= 64
-# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x4e,%xmm2,%xmm2
-
-# qhasm:                                                 diag3 ^= b6
-# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
-# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
-pxor  %xmm6,%xmm3
-
-# qhasm: uint32323232                                                        a7 += diag3
-# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
-# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
-paddd %xmm3,%xmm5
-
-# qhasm:                 a0 = diag1
-# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
-# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
-movdqa %xmm1,%xmm4
-
-# qhasm:                                                                     b7 = a7
-# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
-# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                                                        a7 <<= 18
-# asm 1: pslld $18,<a7=int6464#6
-# asm 2: pslld $18,<a7=%xmm5
-pslld $18,%xmm5
-
-# qhasm: uint32323232                                                        b7 >>= 14
-# asm 1: psrld $14,<b7=int6464#7
-# asm 2: psrld $14,<b7=%xmm6
-psrld $14,%xmm6
-
-# qhasm:                                                                 diag0 ^= a7
-# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
-# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
-pxor  %xmm5,%xmm0
-
-# qhasm:                                                 diag3 <<<= 96
-# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm:                                                                 diag0 ^= b7
-# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
-# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
-pxor  %xmm6,%xmm0
-
-# qhasm: uint32323232        a0 += diag0
-# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
-# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
-paddd %xmm0,%xmm4
-
-# qhasm:                                 a1 = diag0
-# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
-# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
-movdqa %xmm0,%xmm5
-
-# qhasm:                     b0 = a0
-# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
-# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232        a0 <<= 7
-# asm 1: pslld $7,<a0=int6464#5
-# asm 2: pslld $7,<a0=%xmm4
-pslld $7,%xmm4
-
-# qhasm: uint32323232        b0 >>= 25
-# asm 1: psrld $25,<b0=int6464#7
-# asm 2: psrld $25,<b0=%xmm6
-psrld $25,%xmm6
-
-# qhasm:                 diag3 ^= a0
-# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
-# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
-pxor  %xmm4,%xmm3
-
-# qhasm:                 diag3 ^= b0
-# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
-# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
-pxor  %xmm6,%xmm3
-
-# qhasm: uint32323232                        a1 += diag3
-# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
-# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
-paddd %xmm3,%xmm5
-
-# qhasm:                                                 a2 = diag3
-# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
-# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
-movdqa %xmm3,%xmm4
-
-# qhasm:                                     b1 = a1
-# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
-# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                        a1 <<= 9
-# asm 1: pslld $9,<a1=int6464#6
-# asm 2: pslld $9,<a1=%xmm5
-pslld $9,%xmm5
-
-# qhasm: uint32323232                        b1 >>= 23
-# asm 1: psrld $23,<b1=int6464#7
-# asm 2: psrld $23,<b1=%xmm6
-psrld $23,%xmm6
-
-# qhasm:                                 diag2 ^= a1
-# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
-# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
-pxor  %xmm5,%xmm2
-
-# qhasm:                 diag3 <<<= 32
-# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x93,%xmm3,%xmm3
-
-# qhasm:                                 diag2 ^= b1
-# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
-# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
-pxor  %xmm6,%xmm2
-
-# qhasm: uint32323232                                        a2 += diag2
-# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
-# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
-paddd %xmm2,%xmm4
-
-# qhasm:                                                                 a3 = diag2
-# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
-# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
-movdqa %xmm2,%xmm5
-
-# qhasm:                                                     b2 = a2
-# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
-# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232                                        a2 <<= 13
-# asm 1: pslld $13,<a2=int6464#5
-# asm 2: pslld $13,<a2=%xmm4
-pslld $13,%xmm4
-
-# qhasm: uint32323232                                        b2 >>= 19
-# asm 1: psrld $19,<b2=int6464#7
-# asm 2: psrld $19,<b2=%xmm6
-psrld $19,%xmm6
-
-# qhasm:                                                 diag1 ^= a2
-# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
-# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
-pxor  %xmm4,%xmm1
-
-# qhasm:                                 diag2 <<<= 64
-# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x4e,%xmm2,%xmm2
-
-# qhasm:                                                 diag1 ^= b2
-# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
-# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
-pxor  %xmm6,%xmm1
-
-# qhasm: uint32323232                                                        a3 += diag1
-# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
-# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
-paddd %xmm1,%xmm5
-
-# qhasm:                 a4 = diag3
-# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
-# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
-movdqa %xmm3,%xmm4
-
-# qhasm:                                                                     b3 = a3
-# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
-# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                                                        a3 <<= 18
-# asm 1: pslld $18,<a3=int6464#6
-# asm 2: pslld $18,<a3=%xmm5
-pslld $18,%xmm5
-
-# qhasm: uint32323232                                                        b3 >>= 14
-# asm 1: psrld $14,<b3=int6464#7
-# asm 2: psrld $14,<b3=%xmm6
-psrld $14,%xmm6
-
-# qhasm:                                                                 diag0 ^= a3
-# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
-# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
-pxor  %xmm5,%xmm0
-
-# qhasm:                                                 diag1 <<<= 96
-# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm:                                                                 diag0 ^= b3
-# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
-# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
-pxor  %xmm6,%xmm0
-
-# qhasm: uint32323232        a4 += diag0
-# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
-# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
-paddd %xmm0,%xmm4
-
-# qhasm:                                 a5 = diag0
-# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
-# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
-movdqa %xmm0,%xmm5
-
-# qhasm:                     b4 = a4
-# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
-# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232        a4 <<= 7
-# asm 1: pslld $7,<a4=int6464#5
-# asm 2: pslld $7,<a4=%xmm4
-pslld $7,%xmm4
-
-# qhasm: uint32323232        b4 >>= 25
-# asm 1: psrld $25,<b4=int6464#7
-# asm 2: psrld $25,<b4=%xmm6
-psrld $25,%xmm6
-
-# qhasm:                 diag1 ^= a4
-# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
-# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
-pxor  %xmm4,%xmm1
-
-# qhasm:                 diag1 ^= b4
-# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
-# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
-pxor  %xmm6,%xmm1
-
-# qhasm: uint32323232                        a5 += diag1
-# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
-# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
-paddd %xmm1,%xmm5
-
-# qhasm:                                                 a6 = diag1
-# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
-# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
-movdqa %xmm1,%xmm4
-
-# qhasm:                                     b5 = a5
-# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
-# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                        a5 <<= 9
-# asm 1: pslld $9,<a5=int6464#6
-# asm 2: pslld $9,<a5=%xmm5
-pslld $9,%xmm5
-
-# qhasm: uint32323232                        b5 >>= 23
-# asm 1: psrld $23,<b5=int6464#7
-# asm 2: psrld $23,<b5=%xmm6
-psrld $23,%xmm6
-
-# qhasm:                                 diag2 ^= a5
-# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
-# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
-pxor  %xmm5,%xmm2
-
-# qhasm:                 diag1 <<<= 32
-# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x93,%xmm1,%xmm1
-
-# qhasm:                                 diag2 ^= b5
-# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
-# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
-pxor  %xmm6,%xmm2
-
-# qhasm: uint32323232                                        a6 += diag2
-# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
-# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
-paddd %xmm2,%xmm4
-
-# qhasm:                                                                 a7 = diag2
-# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
-# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
-movdqa %xmm2,%xmm5
-
-# qhasm:                                                     b6 = a6
-# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
-# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
-movdqa %xmm4,%xmm6
-
-# qhasm: uint32323232                                        a6 <<= 13
-# asm 1: pslld $13,<a6=int6464#5
-# asm 2: pslld $13,<a6=%xmm4
-pslld $13,%xmm4
-
-# qhasm: uint32323232                                        b6 >>= 19
-# asm 1: psrld $19,<b6=int6464#7
-# asm 2: psrld $19,<b6=%xmm6
-psrld $19,%xmm6
-
-# qhasm:                                                 diag3 ^= a6
-# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
-# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
-pxor  %xmm4,%xmm3
-
-# qhasm:                                 diag2 <<<= 64
-# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x4e,%xmm2,%xmm2
-
-# qhasm:                                                 diag3 ^= b6
-# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
-# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
-pxor  %xmm6,%xmm3
-
-# qhasm:                  unsigned>? i -= 4
-# asm 1: sub  $4,<i=int64#4
-# asm 2: sub  $4,<i=%rcx
-sub  $4,%rcx
-
-# qhasm: uint32323232                                                        a7 += diag3
-# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
-# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
-paddd %xmm3,%xmm5
-
-# qhasm:                 a0 = diag1
-# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
-# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
-movdqa %xmm1,%xmm4
-
-# qhasm:                                                                     b7 = a7
-# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
-# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
-movdqa %xmm5,%xmm6
-
-# qhasm: uint32323232                                                        a7 <<= 18
-# asm 1: pslld $18,<a7=int6464#6
-# asm 2: pslld $18,<a7=%xmm5
-pslld $18,%xmm5
-
-# qhasm:                 b0 = 0
-# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
-# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
-pxor   %xmm7,%xmm7
-
-# qhasm: uint32323232                                                        b7 >>= 14
-# asm 1: psrld $14,<b7=int6464#7
-# asm 2: psrld $14,<b7=%xmm6
-psrld $14,%xmm6
-
-# qhasm:                                                                 diag0 ^= a7
-# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
-# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
-pxor  %xmm5,%xmm0
-
-# qhasm:                                                 diag3 <<<= 96
-# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm:                                                                 diag0 ^= b7
-# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
-# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
-pxor  %xmm6,%xmm0
-# comment:fp stack unchanged by jump
-
-# qhasm: goto mainloop2 if unsigned>
-ja ._mainloop2
-
-# qhasm: uint32323232 diag0 += x0
-# asm 1: paddd <x0=stack128#4,<diag0=int6464#1
-# asm 2: paddd <x0=48(%rsp),<diag0=%xmm0
-paddd 48(%rsp),%xmm0
-
-# qhasm: uint32323232 diag1 += x1
-# asm 1: paddd <x1=stack128#1,<diag1=int6464#2
-# asm 2: paddd <x1=0(%rsp),<diag1=%xmm1
-paddd 0(%rsp),%xmm1
-
-# qhasm: uint32323232 diag2 += x2
-# asm 1: paddd <x2=stack128#2,<diag2=int6464#3
-# asm 2: paddd <x2=16(%rsp),<diag2=%xmm2
-paddd 16(%rsp),%xmm2
-
-# qhasm: uint32323232 diag3 += x3
-# asm 1: paddd <x3=stack128#3,<diag3=int6464#4
-# asm 2: paddd <x3=32(%rsp),<diag3=%xmm3
-paddd 32(%rsp),%xmm3
-
-# qhasm: in0 = diag0
-# asm 1: movd   <diag0=int6464#1,>in0=int64#4
-# asm 2: movd   <diag0=%xmm0,>in0=%rcx
-movd   %xmm0,%rcx
-
-# qhasm: in12 = diag1
-# asm 1: movd   <diag1=int6464#2,>in12=int64#5
-# asm 2: movd   <diag1=%xmm1,>in12=%r8
-movd   %xmm1,%r8
-
-# qhasm: in8 = diag2
-# asm 1: movd   <diag2=int6464#3,>in8=int64#6
-# asm 2: movd   <diag2=%xmm2,>in8=%r9
-movd   %xmm2,%r9
-
-# qhasm: in4 = diag3
-# asm 1: movd   <diag3=int6464#4,>in4=int64#7
-# asm 2: movd   <diag3=%xmm3,>in4=%rax
-movd   %xmm3,%rax
-
-# qhasm: diag0 <<<= 96
-# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
-# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm: diag1 <<<= 96
-# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm: diag2 <<<= 96
-# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm: diag3 <<<= 96
-# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
-# asm 1: xorl 0(<m=int64#2),<in0=int64#4d
-# asm 2: xorl 0(<m=%rsi),<in0=%ecx
-xorl 0(%rsi),%ecx
-
-# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
-# asm 1: xorl 48(<m=int64#2),<in12=int64#5d
-# asm 2: xorl 48(<m=%rsi),<in12=%r8d
-xorl 48(%rsi),%r8d
-
-# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
-# asm 1: xorl 32(<m=int64#2),<in8=int64#6d
-# asm 2: xorl 32(<m=%rsi),<in8=%r9d
-xorl 32(%rsi),%r9d
-
-# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
-# asm 1: xorl 16(<m=int64#2),<in4=int64#7d
-# asm 2: xorl 16(<m=%rsi),<in4=%eax
-xorl 16(%rsi),%eax
-
-# qhasm: *(uint32 *) (out + 0) = in0
-# asm 1: movl   <in0=int64#4d,0(<out=int64#1)
-# asm 2: movl   <in0=%ecx,0(<out=%rdi)
-movl   %ecx,0(%rdi)
-
-# qhasm: *(uint32 *) (out + 48) = in12
-# asm 1: movl   <in12=int64#5d,48(<out=int64#1)
-# asm 2: movl   <in12=%r8d,48(<out=%rdi)
-movl   %r8d,48(%rdi)
-
-# qhasm: *(uint32 *) (out + 32) = in8
-# asm 1: movl   <in8=int64#6d,32(<out=int64#1)
-# asm 2: movl   <in8=%r9d,32(<out=%rdi)
-movl   %r9d,32(%rdi)
-
-# qhasm: *(uint32 *) (out + 16) = in4
-# asm 1: movl   <in4=int64#7d,16(<out=int64#1)
-# asm 2: movl   <in4=%eax,16(<out=%rdi)
-movl   %eax,16(%rdi)
-
-# qhasm: in5 = diag0
-# asm 1: movd   <diag0=int6464#1,>in5=int64#4
-# asm 2: movd   <diag0=%xmm0,>in5=%rcx
-movd   %xmm0,%rcx
-
-# qhasm: in1 = diag1
-# asm 1: movd   <diag1=int6464#2,>in1=int64#5
-# asm 2: movd   <diag1=%xmm1,>in1=%r8
-movd   %xmm1,%r8
-
-# qhasm: in13 = diag2
-# asm 1: movd   <diag2=int6464#3,>in13=int64#6
-# asm 2: movd   <diag2=%xmm2,>in13=%r9
-movd   %xmm2,%r9
-
-# qhasm: in9 = diag3
-# asm 1: movd   <diag3=int6464#4,>in9=int64#7
-# asm 2: movd   <diag3=%xmm3,>in9=%rax
-movd   %xmm3,%rax
-
-# qhasm: diag0 <<<= 96
-# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
-# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm: diag1 <<<= 96
-# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm: diag2 <<<= 96
-# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm: diag3 <<<= 96
-# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
-# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
-# asm 2: xorl 20(<m=%rsi),<in5=%ecx
-xorl 20(%rsi),%ecx
-
-# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
-# asm 1: xorl 4(<m=int64#2),<in1=int64#5d
-# asm 2: xorl 4(<m=%rsi),<in1=%r8d
-xorl 4(%rsi),%r8d
-
-# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
-# asm 1: xorl 52(<m=int64#2),<in13=int64#6d
-# asm 2: xorl 52(<m=%rsi),<in13=%r9d
-xorl 52(%rsi),%r9d
-
-# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
-# asm 1: xorl 36(<m=int64#2),<in9=int64#7d
-# asm 2: xorl 36(<m=%rsi),<in9=%eax
-xorl 36(%rsi),%eax
-
-# qhasm: *(uint32 *) (out + 20) = in5
-# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
-# asm 2: movl   <in5=%ecx,20(<out=%rdi)
-movl   %ecx,20(%rdi)
-
-# qhasm: *(uint32 *) (out + 4) = in1
-# asm 1: movl   <in1=int64#5d,4(<out=int64#1)
-# asm 2: movl   <in1=%r8d,4(<out=%rdi)
-movl   %r8d,4(%rdi)
-
-# qhasm: *(uint32 *) (out + 52) = in13
-# asm 1: movl   <in13=int64#6d,52(<out=int64#1)
-# asm 2: movl   <in13=%r9d,52(<out=%rdi)
-movl   %r9d,52(%rdi)
-
-# qhasm: *(uint32 *) (out + 36) = in9
-# asm 1: movl   <in9=int64#7d,36(<out=int64#1)
-# asm 2: movl   <in9=%eax,36(<out=%rdi)
-movl   %eax,36(%rdi)
-
-# qhasm: in10 = diag0
-# asm 1: movd   <diag0=int6464#1,>in10=int64#4
-# asm 2: movd   <diag0=%xmm0,>in10=%rcx
-movd   %xmm0,%rcx
-
-# qhasm: in6 = diag1
-# asm 1: movd   <diag1=int6464#2,>in6=int64#5
-# asm 2: movd   <diag1=%xmm1,>in6=%r8
-movd   %xmm1,%r8
-
-# qhasm: in2 = diag2
-# asm 1: movd   <diag2=int6464#3,>in2=int64#6
-# asm 2: movd   <diag2=%xmm2,>in2=%r9
-movd   %xmm2,%r9
-
-# qhasm: in14 = diag3
-# asm 1: movd   <diag3=int6464#4,>in14=int64#7
-# asm 2: movd   <diag3=%xmm3,>in14=%rax
-movd   %xmm3,%rax
-
-# qhasm: diag0 <<<= 96
-# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
-# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
-pshufd $0x39,%xmm0,%xmm0
-
-# qhasm: diag1 <<<= 96
-# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
-# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
-pshufd $0x39,%xmm1,%xmm1
-
-# qhasm: diag2 <<<= 96
-# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
-# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
-pshufd $0x39,%xmm2,%xmm2
-
-# qhasm: diag3 <<<= 96
-# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
-# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
-pshufd $0x39,%xmm3,%xmm3
-
-# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
-# asm 1: xorl 40(<m=int64#2),<in10=int64#4d
-# asm 2: xorl 40(<m=%rsi),<in10=%ecx
-xorl 40(%rsi),%ecx
-
-# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
-# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
-# asm 2: xorl 24(<m=%rsi),<in6=%r8d
-xorl 24(%rsi),%r8d
-
-# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
-# asm 1: xorl 8(<m=int64#2),<in2=int64#6d
-# asm 2: xorl 8(<m=%rsi),<in2=%r9d
-xorl 8(%rsi),%r9d
-
-# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
-# asm 1: xorl 56(<m=int64#2),<in14=int64#7d
-# asm 2: xorl 56(<m=%rsi),<in14=%eax
-xorl 56(%rsi),%eax
-
-# qhasm: *(uint32 *) (out + 40) = in10
-# asm 1: movl   <in10=int64#4d,40(<out=int64#1)
-# asm 2: movl   <in10=%ecx,40(<out=%rdi)
-movl   %ecx,40(%rdi)
-
-# qhasm: *(uint32 *) (out + 24) = in6
-# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
-# asm 2: movl   <in6=%r8d,24(<out=%rdi)
-movl   %r8d,24(%rdi)
-
-# qhasm: *(uint32 *) (out + 8) = in2
-# asm 1: movl   <in2=int64#6d,8(<out=int64#1)
-# asm 2: movl   <in2=%r9d,8(<out=%rdi)
-movl   %r9d,8(%rdi)
-
-# qhasm: *(uint32 *) (out + 56) = in14
-# asm 1: movl   <in14=int64#7d,56(<out=int64#1)
-# asm 2: movl   <in14=%eax,56(<out=%rdi)
-movl   %eax,56(%rdi)
-
-# qhasm: in15 = diag0
-# asm 1: movd   <diag0=int6464#1,>in15=int64#4
-# asm 2: movd   <diag0=%xmm0,>in15=%rcx
-movd   %xmm0,%rcx
-
-# qhasm: in11 = diag1
-# asm 1: movd   <diag1=int6464#2,>in11=int64#5
-# asm 2: movd   <diag1=%xmm1,>in11=%r8
-movd   %xmm1,%r8
-
-# qhasm: in7 = diag2
-# asm 1: movd   <diag2=int6464#3,>in7=int64#6
-# asm 2: movd   <diag2=%xmm2,>in7=%r9
-movd   %xmm2,%r9
-
-# qhasm: in3 = diag3
-# asm 1: movd   <diag3=int6464#4,>in3=int64#7
-# asm 2: movd   <diag3=%xmm3,>in3=%rax
-movd   %xmm3,%rax
-
-# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
-# asm 1: xorl 60(<m=int64#2),<in15=int64#4d
-# asm 2: xorl 60(<m=%rsi),<in15=%ecx
-xorl 60(%rsi),%ecx
-
-# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
-# asm 1: xorl 44(<m=int64#2),<in11=int64#5d
-# asm 2: xorl 44(<m=%rsi),<in11=%r8d
-xorl 44(%rsi),%r8d
-
-# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
-# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
-# asm 2: xorl 28(<m=%rsi),<in7=%r9d
-xorl 28(%rsi),%r9d
-
-# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
-# asm 1: xorl 12(<m=int64#2),<in3=int64#7d
-# asm 2: xorl 12(<m=%rsi),<in3=%eax
-xorl 12(%rsi),%eax
-
-# qhasm: *(uint32 *) (out + 60) = in15
-# asm 1: movl   <in15=int64#4d,60(<out=int64#1)
-# asm 2: movl   <in15=%ecx,60(<out=%rdi)
-movl   %ecx,60(%rdi)
-
-# qhasm: *(uint32 *) (out + 44) = in11
-# asm 1: movl   <in11=int64#5d,44(<out=int64#1)
-# asm 2: movl   <in11=%r8d,44(<out=%rdi)
-movl   %r8d,44(%rdi)
-
-# qhasm: *(uint32 *) (out + 28) = in7
-# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
-# asm 2: movl   <in7=%r9d,28(<out=%rdi)
-movl   %r9d,28(%rdi)
-
-# qhasm: *(uint32 *) (out + 12) = in3
-# asm 1: movl   <in3=int64#7d,12(<out=int64#1)
-# asm 2: movl   <in3=%eax,12(<out=%rdi)
-movl   %eax,12(%rdi)
-
-# qhasm:   bytes = bytes_backup
-# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
-# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
-movq 408(%rsp),%r9
-
-# qhasm:   in8 = ((uint32 *)&x2)[0]
-# asm 1: movl <x2=stack128#2,>in8=int64#4d
-# asm 2: movl <x2=16(%rsp),>in8=%ecx
-movl 16(%rsp),%ecx
-
-# qhasm:   in9 = ((uint32 *)&x3)[1]
-# asm 1: movl 4+<x3=stack128#3,>in9=int64#5d
-# asm 2: movl 4+<x3=32(%rsp),>in9=%r8d
-movl 4+32(%rsp),%r8d
-
-# qhasm:   in8 += 1
-# asm 1: add  $1,<in8=int64#4
-# asm 2: add  $1,<in8=%rcx
-add  $1,%rcx
-
-# qhasm:   in9 <<= 32
-# asm 1: shl  $32,<in9=int64#5
-# asm 2: shl  $32,<in9=%r8
-shl  $32,%r8
-
-# qhasm:   in8 += in9
-# asm 1: add  <in9=int64#5,<in8=int64#4
-# asm 2: add  <in9=%r8,<in8=%rcx
-add  %r8,%rcx
-
-# qhasm:   in9 = in8
-# asm 1: mov  <in8=int64#4,>in9=int64#5
-# asm 2: mov  <in8=%rcx,>in9=%r8
-mov  %rcx,%r8
-
-# qhasm:   (uint64) in9 >>= 32
-# asm 1: shr  $32,<in9=int64#5
-# asm 2: shr  $32,<in9=%r8
-shr  $32,%r8
-
-# qhasm:   ((uint32 *)&x2)[0] = in8
-# asm 1: movl <in8=int64#4d,>x2=stack128#2
-# asm 2: movl <in8=%ecx,>x2=16(%rsp)
-movl %ecx,16(%rsp)
-
-# qhasm:   ((uint32 *)&x3)[1] = in9
-# asm 1: movl <in9=int64#5d,4+<x3=stack128#3
-# asm 2: movl <in9=%r8d,4+<x3=32(%rsp)
-movl %r8d,4+32(%rsp)
-
-# qhasm:                          unsigned>? unsigned<? bytes - 64
-# asm 1: cmp  $64,<bytes=int64#6
-# asm 2: cmp  $64,<bytes=%r9
-cmp  $64,%r9
-# comment:fp stack unchanged by jump
-
-# qhasm:   goto bytesatleast65 if unsigned>
-ja ._bytesatleast65
-# comment:fp stack unchanged by jump
-
-# qhasm:     goto bytesatleast64 if !unsigned<
-jae ._bytesatleast64
-
-# qhasm:       m = out
-# asm 1: mov  <out=int64#1,>m=int64#2
-# asm 2: mov  <out=%rdi,>m=%rsi
-mov  %rdi,%rsi
-
-# qhasm:       out = ctarget
-# asm 1: mov  <ctarget=int64#3,>out=int64#1
-# asm 2: mov  <ctarget=%rdx,>out=%rdi
-mov  %rdx,%rdi
-
-# qhasm:       i = bytes
-# asm 1: mov  <bytes=int64#6,>i=int64#4
-# asm 2: mov  <bytes=%r9,>i=%rcx
-mov  %r9,%rcx
-
-# qhasm:       while (i) { *out++ = *m++; --i }
-rep movsb
-# comment:fp stack unchanged by fallthrough
-
-# qhasm:     bytesatleast64:
-._bytesatleast64:
-# comment:fp stack unchanged by fallthrough
-
-# qhasm:     done:
-._done:
-
-# qhasm:     r11_caller = r11_stack
-# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
-# asm 2: movq <r11_stack=352(%rsp),>r11_caller=%r11
-movq 352(%rsp),%r11
-
-# qhasm:     r12_caller = r12_stack
-# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
-# asm 2: movq <r12_stack=360(%rsp),>r12_caller=%r12
-movq 360(%rsp),%r12
-
-# qhasm:     r13_caller = r13_stack
-# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
-# asm 2: movq <r13_stack=368(%rsp),>r13_caller=%r13
-movq 368(%rsp),%r13
-
-# qhasm:     r14_caller = r14_stack
-# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
-# asm 2: movq <r14_stack=376(%rsp),>r14_caller=%r14
-movq 376(%rsp),%r14
-
-# qhasm:     r15_caller = r15_stack
-# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
-# asm 2: movq <r15_stack=384(%rsp),>r15_caller=%r15
-movq 384(%rsp),%r15
-
-# qhasm:     rbx_caller = rbx_stack
-# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
-# asm 2: movq <rbx_stack=392(%rsp),>rbx_caller=%rbx
-movq 392(%rsp),%rbx
-
-# qhasm:     rbp_caller = rbp_stack
-# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
-# asm 2: movq <rbp_stack=400(%rsp),>rbp_caller=%rbp
-movq 400(%rsp),%rbp
-
-# qhasm:     leave
-add %r11,%rsp
-xor %rax,%rax
-xor %rdx,%rdx
-ret
-
-# qhasm:   bytesatleast65:
-._bytesatleast65:
-
-# qhasm:   bytes -= 64
-# asm 1: sub  $64,<bytes=int64#6
-# asm 2: sub  $64,<bytes=%r9
-sub  $64,%r9
-
-# qhasm:   out += 64
-# asm 1: add  $64,<out=int64#1
-# asm 2: add  $64,<out=%rdi
-add  $64,%rdi
-
-# qhasm:   m += 64
-# asm 1: add  $64,<m=int64#2
-# asm 2: add  $64,<m=%rsi
-add  $64,%rsi
-# comment:fp stack unchanged by jump
-
-# qhasm: goto bytesbetween1and255
-jmp ._bytesbetween1and255