|
@@ -18,24 +18,24 @@ movhps[to_mem](r0: ptr64; r1: xmm)
|
|
|
movlhps(var r0: xmm; r1: xmm)
|
|
|
movhlps(var r0: xmm; r1: xmm)
|
|
|
|
|
|
-addss(var r0: xmm; r1: xmm)
|
|
|
-addss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-subss(var r0: xmm; r1: xmm)
|
|
|
-subss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-mulss(var r0: xmm; r1: xmm)
|
|
|
-mulss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-divss(var r0: xmm; r1: xmm)
|
|
|
-divss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-rcpss(var r0: xmm; r1: xmm)
|
|
|
-rcpss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-sqrtss(var r0: xmm; r1: xmm)
|
|
|
-sqrtss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-maxss(var r0: xmm; r1: xmm)
|
|
|
-maxss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-minss(var r0: xmm; r1: xmm)
|
|
|
-minss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-rsqrtss(var r0: xmm; r1: xmm)
|
|
|
-rsqrtss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
+addss(var r0: f32; r1: f32)
|
|
|
+addss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+subss(var r0: f32; r1: f32)
|
|
|
+subss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+mulss(var r0: f32; r1: f32)
|
|
|
+mulss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+divss(var r0: f32; r1: f32)
|
|
|
+divss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+rcpss(var r0: f32; r1: f32)
|
|
|
+rcpss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+sqrtss(var r0: f32; r1: f32)
|
|
|
+sqrtss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+maxss(var r0: f32; r1: f32)
|
|
|
+maxss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+minss(var r0: f32; r1: f32)
|
|
|
+minss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+rsqrtss(var r0: f32; r1: f32)
|
|
|
+rsqrtss[from_mem](var r0: f32; r1: ptr32)
|
|
|
|
|
|
addps(var r0: xmm; r1: xmm)
|
|
|
addps[from_mem](var r0: xmm; r1: ptr128)
|
|
@@ -65,8 +65,8 @@ xorps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
andnps(var r0: xmm; r1: xmm)
|
|
|
andnps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
|
|
-cmpss(var r0: xmm; r1: xmm; imm: i32) (imm in [0..7])
|
|
|
-cmpss[from_mem](var r0: xmm; r1: ptr32; imm: i32) (imm in [0..7])
|
|
|
+cmpss(var r0: f32; r1: f32; imm: i32) (imm in [0..7])
|
|
|
+cmpss[from_mem](var r0: f32; r1: ptr32; imm: i32) (imm in [0..7])
|
|
|
cmpps(var r0: xmm; r1: xmm; imm: i32) (imm in [0..7])
|
|
|
cmpps[from_mem](var r0: xmm; r1: ptr128; imm: i32) (imm in [0..7])
|
|
|
|
|
@@ -77,11 +77,11 @@ unpckhps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
unpcklps(var r0: xmm; r1: xmm)
|
|
|
unpcklps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
|
|
-cvtsi2ss(var r0: xmm; r1: reg)
|
|
|
-cvtsi2ss[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-cvtss2si(out r0: reg; r1: xmm)
|
|
|
+cvtsi2ss(var r0: f32; r1: reg)
|
|
|
+cvtsi2ss[from_mem](var r0: f32; r1: ptr32)
|
|
|
+cvtss2si(out r0: reg; r1: f32)
|
|
|
cvtss2si[from_mem](out r0: reg; r1: ptr32)
|
|
|
-cvttss2si(out r0: reg; r1: xmm)
|
|
|
+cvttss2si(out r0: reg; r1: f32)
|
|
|
cvttss2si[from_mem](out r0: reg; r1: ptr32)
|
|
|
|
|
|
cvtpi2ps(var r0: xmm; r1: mm)
|
|
@@ -134,32 +134,32 @@ movsd[from_val](out r0: xmm; r1: f64)
|
|
|
; SSE2 packed arithmetic instructions
|
|
|
addpd(var r0: xmm; r1: xmm)
|
|
|
addpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-addsd(var r0: xmm; r1: xmm)
|
|
|
-addsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+addsd(var r0: f64; r1: f64)
|
|
|
+addsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
divpd(var r0: xmm; r1: xmm)
|
|
|
divpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-divsd(var r0: xmm; r1: xmm)
|
|
|
-divsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+divsd(var r0: f64; r1: f64)
|
|
|
+divsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
maxpd(var r0: xmm; r1: xmm)
|
|
|
maxpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-maxsd(var r0: xmm; r1: xmm)
|
|
|
+maxsd(var r0: f64; r1: xmm)
|
|
|
maxsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
minpd(var r0: xmm; r1: xmm)
|
|
|
minpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-minsd(var r0: xmm; r1: xmm)
|
|
|
-minsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+minsd(var r0: f64; r1: f64)
|
|
|
+minsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
mulpd(var r0: xmm; r1: xmm)
|
|
|
mulpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-mulsd(var r0: xmm; r1: xmm)
|
|
|
-mulsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+mulsd(var r0: f64; r1: xmm)
|
|
|
+mulsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
sqrtpd(out r0: xmm; r1: xmm)
|
|
|
sqrtpd[from_mem](out r0: xmm; r1: ptr128)
|
|
|
-sqrtsd(out r0: xmm; r1: xmm)
|
|
|
-sqrtsd[from_mem](out r0: xmm; r1: ptr64)
|
|
|
+sqrtsd(out r0: f64; r1: f64)
|
|
|
+sqrtsd[from_mem](out r0: f64; r1: ptr64)
|
|
|
subpd(var r0: xmm; r1: xmm)
|
|
|
subpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
-subsd(var r0: xmm; r1: xmm)
|
|
|
-subsd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+subsd(var r0: f64; r1: f64)
|
|
|
+subsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
|
|
|
; SSE2 logical instructions
|
|
|
andpd(var r0: xmm; r1: xmm)
|
|
@@ -174,12 +174,12 @@ xorpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
; SSE2 compare instructions
|
|
|
cmppd(var r0: xmm; r1: xmm; imm: i32)
|
|
|
cmppd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
|
-cmpsd(var r0: xmm; r1: xmm; imm: i32)
|
|
|
-cmpsd[from_mem](var r0: xmm; r1: ptr64; imm: i32)
|
|
|
-comisd(var r0: xmm; r1: xmm)
|
|
|
-comisd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
-ucomisd(var r0: xmm; r1: xmm)
|
|
|
-ucomisd[from_mem](var r0: xmm; r1: ptr64)
|
|
|
+cmpsd(var r0: f64; r1: f64; imm: i32)
|
|
|
+cmpsd[from_mem](var r0: f64; r1: ptr64; imm: i32)
|
|
|
+comisd(var r0: f64; r1: f64)
|
|
|
+comisd[from_mem](var r0: f64; r1: ptr64)
|
|
|
+ucomisd(var r0: f64; r1: f64)
|
|
|
+ucomisd[from_mem](var r0: f64; r1: ptr64)
|
|
|
|
|
|
; SSE2 shuffle and unpack instructions
|
|
|
shufpd(var r0: xmm; r1: xmm; imm: i32)
|
|
@@ -210,10 +210,10 @@ cvtsd2si(var r0: sreg; r1: xmm)
|
|
|
cvtsd2si[from_mem](var r0: sreg; r1: ptr64)
|
|
|
cvtsd2ss(var r0: xmm; r1: xmm)
|
|
|
cvtsd2ss[from_mem](var r0: xmm; r1: ptr64)
|
|
|
-cvtsi2sd(var r0: xmm; r1: r32)
|
|
|
-cvtsi2sd[from_mem](var r0: xmm; r1: ptr32)
|
|
|
-cvtss2sd(var r0: xmm; r1: xmm)
|
|
|
-cvtss2sd[from_mem](var r0: xmm; r1: ptr32)
|
|
|
+cvtsi2sd(var r0: f64; r1: r32)
|
|
|
+cvtsi2sd[from_mem](var r0: f64; r1: ptr32)
|
|
|
+cvtss2sd(var r0: f64; r1: f64)
|
|
|
+cvtss2sd[from_mem](var r0: f64; r1: ptr32)
|
|
|
cvttpd2dq(var r0: xmm; r1: xmm)
|
|
|
cvttpd2dq[from_mem](var r0: xmm; r1: ptr128)
|
|
|
cvttpd2pi(var r0: mm; r1: xmm)
|
|
@@ -451,8 +451,8 @@ roundss(out r0: xmm; r1: xmm; imm: i32)
|
|
|
roundss[from_mem](out r0: xmm; r1: ptr32; imm: i32)
|
|
|
roundpd(out r0: xmm; r1: xmm; imm: i32)
|
|
|
roundpd[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
|
-roundsd(out r0: xmm; r1: xmm; imm: i32)
|
|
|
-roundsd[from_mem](out r0: xmm; r1: ptr64; imm: i32)
|
|
|
+roundsd(out r0: f64; r1: f64; imm: i32)
|
|
|
+roundsd[from_mem](out r0: f64; r1: ptr64; imm: i32)
|
|
|
insertps(var r0: xmm; r1: xmm; imm: i32)
|
|
|
insertps[from_mem](var r0: xmm; r1: ptr32; imm: i32)
|
|
|
extractps(out r0: r32; r1: xmm; imm: i32)
|