10 жил өмнө · ad03eba715
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -426,11 +426,11 @@
 
															 #define LJ_TARGET_UNALIGNED	0
														
 
															 #endif
														
 
															-/* Various workarounds for embedded operating systems. */
														
 
															-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
														
 
															+/* Various workarounds for embedded operating systems or weak C runtimes. */
														
 
															+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
														
 
															 #define LUAJIT_NO_LOG2
														
 
															 #endif
														
 
															-#if defined(__symbian__)
														
 
															+#if defined(__symbian__) || LJ_TARGET_WINDOWS
														
 
															 #define LUAJIT_NO_EXP2
														
 
															 #endif
														
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1262,9 +1262,6 @@ static void asm_call(ASMState *as, IRIns *ir)
 
															 }
														
 
															 #if !LJ_SOFTFP
														
 
															-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
														
 
															-
														
 
															-#if !LJ_TARGET_X86ORX64
														
 
															 static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
														
 
															 {
														
 
															   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
														
@@ -1274,7 +1271,6 @@ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
 
															   asm_setupresult(as, ir, ci);
														
 
															   asm_gencall(as, ci, args);
														
 
															 }
														
 
															-#endif
														
 
															 static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
														
 
															 {
														
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
 
															   }
														
 
															 }
														
 
															-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
														
 
															-{
														
 
															-  /* The modified regs must match with the *.dasc implementation. */
														
 
															-  RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
														
 
															-  IRIns *irx;
														
 
															-  if (ra_hasreg(ir->r))
														
 
															-    rset_clear(drop, ir->r);  /* Dest reg handled below. */
														
 
															-  ra_evictset(as, drop);
														
 
															-  ra_destreg(as, ir, RID_XMM0);
														
 
															-  emit_call(as, lj_vm_pow_sse);
														
 
															-  irx = IR(lref);
														
 
															-  if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
														
 
															-    irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
														
 
															-  ra_left(as, RID_XMM0, lref);
														
 
															-  ra_left(as, RID_XMM1, rref);
														
 
															-}
														
 
															-
														
 
															 static void asm_fpmath(ASMState *as, IRIns *ir)
														
 
															 {
														
 
															-  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
														
 
															+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
														
 
															   if (fpm == IRFPM_SQRT) {
														
 
															     Reg dest = ra_dest(as, ir, RSET_FPR);
														
 
															     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
														
@@ -1645,53 +1628,28 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 
															     }
														
 
															   } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
														
 
															     /* Rejoined to pow(). */
														
 
															-  } else {  /* Handle x87 ops. */
														
 
															-    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
														
 
															-    Reg dest = ir->r;
														
 
															-    if (ra_hasreg(dest)) {
														
 
															-      ra_free(as, dest);
														
 
															-      ra_modified(as, dest);
														
 
															-      emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
														
 
															-    }
														
 
															-    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
														
 
															-    switch (fpm) {  /* st0 = lj_vm_*(st0) */
														
 
															-    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
														
 
															-    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
														
 
															-    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
														
 
															-    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
														
 
															-    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
														
 
															-    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
														
 
															-      /* Note: the use of fyl2xp1 would be pointless here. When computing
														
 
															-      ** log(1.0+eps) the precision is already lost after 1.0 is added.
														
 
															-      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
														
 
															-      */
														
 
															-      emit_x87op(as, XI_FYL2X); break;
														
 
															-    case IRFPM_OTHER:
														
 
															-      switch (ir->o) {
														
 
															-      case IR_ATAN2:
														
 
															-	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
														
 
															-      case IR_LDEXP:
														
 
															-	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
														
 
															-      default: lua_assert(0); break;
														
 
															-      }
														
 
															-      break;
														
 
															-    default: lua_assert(0); break;
														
 
															-    }
														
 
															-    asm_x87load(as, ir->op1);
														
 
															-    switch (fpm) {
														
 
															-    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
														
 
															-    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
														
 
															-    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
														
 
															-    case IRFPM_OTHER:
														
 
															-      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
														
 
															-      break;
														
 
															-    default: break;
														
 
															-    }
														
 
															+  } else {
														
 
															+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
														
 
															   }
														
 
															 }
														
 
															-#define asm_atan2(as, ir)	asm_fpmath(as, ir)
														
 
															-#define asm_ldexp(as, ir)	asm_fpmath(as, ir)
														
 
															+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
														
 
															+
														
 
															+static void asm_ldexp(ASMState *as, IRIns *ir)
														
 
															+{
														
 
															+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
														
 
															+  Reg dest = ir->r;
														
 
															+  if (ra_hasreg(dest)) {
														
 
															+    ra_free(as, dest);
														
 
															+    ra_modified(as, dest);
														
 
															+    emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
														
 
															+  }
														
 
															+  emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
														
 
															+  emit_x87op(as, XI_FPOP1);
														
 
															+  emit_x87op(as, XI_FSCALE);
														
 
															+  asm_x87load(as, ir->op1);
														
 
															+  asm_x87load(as, ir->op2);
														
 
															+}
														
 
															 static void asm_fppowi(ASMState *as, IRIns *ir)
														
 
															 {
														
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -169,18 +169,18 @@ typedef struct CCallInfo {
 
															   _(FPMATH,	lj_vm_ceil,		1,   N, NUM, XA_FP) \
														
 
															   _(FPMATH,	lj_vm_trunc,		1,   N, NUM, XA_FP) \
														
 
															   _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	exp,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	lj_vm_exp2,		1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	log,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	lj_vm_log2,		1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	log10,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	sin,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	cos,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	tan,			1,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	lj_vm_powi,		2,   N, NUM, XA_FP) \
														
 
															-  _(FPMATH,	pow,			2,   N, NUM, XA2_FP) \
														
 
															-  _(FPMATH,	atan2,			2,   N, NUM, XA2_FP) \
														
 
															-  _(FPMATH,	ldexp,			2,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	exp,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	lj_vm_exp2,		1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	log,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	log10,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	sin,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	cos,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	tan,			1,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
														
 
															+  _(ANY,	pow,			2,   N, NUM, XA2_FP) \
														
 
															+  _(ANY,	atan2,			2,   N, NUM, XA2_FP) \
														
 
															+  _(ANY,	ldexp,			2,   N, NUM, XA_FP) \
														
 
															   _(SOFTFP,	lj_vm_tobit,		2,   N, INT, 0) \
														
 
															   _(SOFTFP,	softfp_add,		4,   N, NUM, 0) \
														
 
															   _(SOFTFP,	softfp_sub,		4,   N, NUM, 0) \
														
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -55,15 +55,13 @@ LJ_ASMF void lj_vm_exit_interp(void);
 
															 #define lj_vm_ceil	ceil
														
 
															 #else
														
 
															 LJ_ASMF double lj_vm_floor(double);
														
 
															-#if !LJ_TARGET_X86ORX64
														
 
															 LJ_ASMF double lj_vm_ceil(double);
														
 
															-#endif
														
 
															 #if LJ_TARGET_ARM
														
 
															 LJ_ASMF double lj_vm_floor_sf(double);
														
 
															 LJ_ASMF double lj_vm_ceil_sf(double);
														
 
															 #endif
														
 
															 #endif
														
 
															-#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
														
 
															+#ifdef LUAJIT_NO_LOG2
														
 
															 LJ_ASMF double lj_vm_log2(double);
														
 
															 #else
														
 
															 #define lj_vm_log2	log2
														
@@ -74,11 +72,11 @@ LJ_ASMF double lj_vm_log2(double);
 
															 LJ_ASMF void lj_vm_floor_sse(void);
														
 
															 LJ_ASMF void lj_vm_ceil_sse(void);
														
 
															 LJ_ASMF void lj_vm_trunc_sse(void);
														
 
															-LJ_ASMF void lj_vm_exp_x87(void);
														
 
															-LJ_ASMF void lj_vm_exp2_x87(void);
														
 
															-LJ_ASMF void lj_vm_pow_sse(void);
														
 
															 LJ_ASMF void lj_vm_powi_sse(void);
														
 
															+#define lj_vm_powi	NULL
														
 
															 #else
														
 
															+LJ_ASMF double lj_vm_powi(double, int32_t);
														
 
															+#endif
														
 
															 #if LJ_TARGET_PPC
														
 
															 #define lj_vm_trunc	trunc
														
 
															 #else
														
@@ -87,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double);
 
															 LJ_ASMF double lj_vm_trunc_sf(double);
														
 
															 #endif
														
 
															 #endif
														
 
															-LJ_ASMF double lj_vm_powi(double, int32_t);
														
 
															 #ifdef LUAJIT_NO_EXP2
														
 
															 LJ_ASMF double lj_vm_exp2(double);
														
 
															 #else
														
 
															 #define lj_vm_exp2	exp2
														
 
															 #endif
														
 
															-#endif
														
 
															 LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
														
 
															 #if LJ_HASFFI
														
 
															 LJ_ASMF int lj_vm_errno(void);
														
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -17,14 +17,25 @@
 
															 #if LJ_TARGET_X86 && __ELF__ && __PIC__
														
 
															 /* Wrapper functions to deal with the ELF/x86 PIC disaster. */
														
 
															+LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
														
 
															+LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
														
 
															+LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
														
 
															+LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
														
 
															+LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
														
 
															+LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
														
 
															+LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
														
 
															+LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
														
 
															+LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
														
 
															 LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
														
 
															 LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
														
 
															 LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
														
 
															+LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
														
 
															+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
														
 
															+LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
														
 
															 #endif
														
 
															 /* -- Helper functions for generated machine code ------------------------- */
														
 
															-#if !LJ_TARGET_X86ORX64
														
 
															 double lj_vm_foldarith(double x, double y, int op)
														
 
															 {
														
 
															   switch (op) {
														
@@ -45,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op)
 
															   default: return x;
														
 
															   }
														
 
															 }
														
 
															-#endif
														
 
															 #if LJ_HASJIT
														
@@ -109,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
 
															   else
														
 
															     return 1.0 / lj_vm_powui(x, (uint32_t)-k);
														
 
															 }
														
 
															+#endif
														
 
															 /* Computes fpm(x) for extended math functions. */
														
 
															 double lj_vm_foldfpm(double x, int fpm)
														
@@ -130,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
 
															   }
														
 
															   return 0;
														
 
															 }
														
 
															-#endif
														
 
															 #if LJ_HASFFI
														
 
															 int lj_vm_errno(void)
														
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -373,7 +373,6 @@
 
															 |  fpop
														
 
															 |.endmacro
														
 
															 |
														
 
															-|.macro fdup; fld st0; .endmacro
														
 
															 |.macro fpop1; fstp st1; .endmacro
														
 
															 |
														
 
															 |// Synthesize SSE FP constants.
														
@@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
														
 
															   |.endmacro
														
 
															   |
														
 
															-  |.macro .ffunc_n, name
														
 
															-  |  .ffunc_1 name
														
 
															-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
														
 
															-  |  fld qword [BASE]
														
 
															-  |.endmacro
														
 
															-  |
														
 
															-  |.macro .ffunc_n, name, op
														
 
															-  |  .ffunc_1 name
														
 
															-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
														
 
															-  |  op
														
 
															-  |  fld qword [BASE]
														
 
															-  |.endmacro
														
 
															-  |
														
 
															   |.macro .ffunc_nsse, name, op
														
 
															   |  .ffunc_1 name
														
 
															   |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
														
@@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  .ffunc_nsse name, movsd
														
 
															   |.endmacro
														
 
															   |
														
 
															-  |.macro .ffunc_nn, name
														
 
															-  |  .ffunc_2 name
														
 
															-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
														
 
															-  |  cmp dword [BASE+12], LJ_TISNUM;  jae ->fff_fallback
														
 
															-  |  fld qword [BASE]
														
 
															-  |  fld qword [BASE+8]
														
 
															-  |.endmacro
														
 
															-  |
														
 
															   |.macro .ffunc_nnsse, name
														
 
															   |  .ffunc_2 name
														
 
															   |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
														
@@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  mov RAa, -8			// Results start at BASE+RA = BASE-8.
														
 
															   |  jmp ->vm_return
														
 
															   |
														
 
															+  |.if X64
														
 
															+  |.define fff_resfp, fff_resxmm0
														
 
															+  |.else
														
 
															+  |.define fff_resfp, fff_resn
														
 
															+  |.endif
														
 
															+  |
														
 
															   |.macro math_round, func
														
 
															   |  .ffunc math_ .. func
														
 
															   |.if DUALNUM
														
@@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |.ffunc math_log
														
 
															   |  cmp NARGS:RD, 1+1; jne ->fff_fallback	// Exactly one argument.
														
 
															   |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
														
 
															-  |  fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
														
 
															-  |
														
 
															-  |.ffunc_n math_log10, fldlg2;	fyl2x;		jmp ->fff_resn
														
 
															-  |.ffunc_n math_exp;	call ->vm_exp_x87;	jmp ->fff_resn
														
 
															-  |
														
 
															-  |.ffunc_n math_sin;	fsin;			jmp ->fff_resn
														
 
															-  |.ffunc_n math_cos;	fcos;			jmp ->fff_resn
														
 
															-  |.ffunc_n math_tan;	fptan; fpop;		jmp ->fff_resn
														
 
															-  |
														
 
															-  |.ffunc_n math_asin
														
 
															-  |  fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
														
 
															-  |  jmp ->fff_resn
														
 
															-  |.ffunc_n math_acos
														
 
															-  |  fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
														
 
															-  |  jmp ->fff_resn
														
 
															-  |.ffunc_n math_atan;	fld1; fpatan;		jmp ->fff_resn
														
 
															+  |  movsd xmm0, qword [BASE]
														
 
															+  |.if not X64
														
 
															+  |  movsd FPARG1, xmm0
														
 
															+  |.endif
														
 
															+  |  mov RB, BASE
														
 
															+  |  call extern log
														
 
															+  |  mov BASE, RB
														
 
															+  |  jmp ->fff_resfp
														
 
															   |
														
 
															   |.macro math_extern, func
														
 
															   |  .ffunc_nsse math_ .. func
														
@@ -2086,18 +2062,36 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  mov RB, BASE
														
 
															   |  call extern func
														
 
															   |  mov BASE, RB
														
 
															-  |.if X64
														
 
															-  |  jmp ->fff_resxmm0
														
 
															-  |.else
														
 
															-  |  jmp ->fff_resn
														
 
															+  |  jmp ->fff_resfp
														
 
															+  |.endmacro
														
 
															+  |
														
 
															+  |.macro math_extern2, func
														
 
															+  |  .ffunc_nnsse math_ .. func
														
 
															+  |.if not X64
														
 
															+  |  movsd FPARG1, xmm0
														
 
															+  |  movsd FPARG3, xmm1
														
 
															   |.endif
														
 
															+  |  mov RB, BASE
														
 
															+  |  call extern func
														
 
															+  |  mov BASE, RB
														
 
															+  |  jmp ->fff_resfp
														
 
															   |.endmacro
														
 
															   |
														
 
															+  |  math_extern log10
														
 
															+  |  math_extern exp
														
 
															+  |  math_extern sin
														
 
															+  |  math_extern cos
														
 
															+  |  math_extern tan
														
 
															+  |  math_extern asin
														
 
															+  |  math_extern acos
														
 
															+  |  math_extern atan
														
 
															   |  math_extern sinh
														
 
															   |  math_extern cosh
														
 
															   |  math_extern tanh
														
 
															+  |  math_extern2 pow
														
 
															+  |  math_extern2 atan2
														
 
															+  |  math_extern2 fmod
														
 
															   |
														
 
															-  |.ffunc_nn math_atan2;	fpatan;		jmp ->fff_resn
														
 
															   |.ffunc_nnr math_ldexp;	fscale; fpop1;	jmp ->fff_resn
														
 
															   |
														
 
															   |.ffunc_1 math_frexp
														
@@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |4:
														
 
															   |  xorps xmm4, xmm4; jmp <1			// Return +-Inf and +-0.
														
 
															   |
														
 
															-  |.ffunc_nnr math_fmod
														
 
															-  |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
														
 
															-  |  fpop1
														
 
															-  |  jmp ->fff_resn
														
 
															-  |
														
 
															-  |.ffunc_nnsse math_pow;	call ->vm_pow_sse;	jmp ->fff_resxmm0
														
 
															-  |
														
 
															   |.macro math_minmax, name, cmovop, sseop
														
 
															   |  .ffunc name
														
 
															   |  mov RA, 2
														
@@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |
														
 
															   |// FP value rounding. Called by math.floor/math.ceil fast functions
														
 
															   |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
														
 
															-  |.macro vm_round, name, mode
														
 
															+  |.macro vm_round, name, mode, cond
														
 
															+  |->name:
														
 
															+  |.if not X64 and cond
														
 
															+  |  movsd xmm0, qword [esp+4]
														
 
															+  |  call ->name .. _sse
														
 
															+  |  movsd qword [esp+4], xmm0  // Overwrite callee-owned arg.
														
 
															+  |  fld qword [esp+4]
														
 
															+  |  ret
														
 
															+  |.endif
														
 
															+  |
														
 
															   |->name .. _sse:
														
 
															   |  sseconst_abs xmm2, RDa
														
 
															   |  sseconst_2p52 xmm3, RDa
														
@@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  ret
														
 
															   |.endmacro
														
 
															   |
														
 
															-  |->vm_floor:
														
 
															-  |.if not X64
														
 
															-  |  movsd xmm0, qword [esp+4]
														
 
															-  |  call ->vm_floor_sse
														
 
															-  |  movsd qword [esp+4], xmm0  // Overwrite callee-owned arg.
														
 
															-  |  fld qword [esp+4]
														
 
															-  |  ret
														
 
															-  |.endif
														
 
															-  |
														
 
															-  |  vm_round vm_floor, 0
														
 
															-  |  vm_round vm_ceil,  1
														
 
															-  |  vm_round vm_trunc, 2
														
 
															+  |  vm_round vm_floor, 0, 1
														
 
															+  |  vm_round vm_ceil,  1, JIT
														
 
															+  |  vm_round vm_trunc, 2, JIT
														
 
															   |
														
 
															   |// FP modulo x%y. Called by BC_MOD* and vm_arith.
														
 
															   |->vm_mod:
														
@@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  subsd xmm0, xmm1
														
 
															   |  ret
														
 
															   |
														
 
															-  |// FP log2(x). Called by math.log(x, base).
														
 
															-  |->vm_log2:
														
 
															-  |.if X64WIN
														
 
															-  |  movsd qword [rsp+8], xmm0		// Use scratch area.
														
 
															-  |  fld1
														
 
															-  |  fld qword [rsp+8]
														
 
															-  |  fyl2x
														
 
															-  |  fstp qword [rsp+8]
														
 
															-  |  movsd xmm0, qword [rsp+8]
														
 
															-  |.elif X64
														
 
															-  |  movsd qword [rsp-8], xmm0		// Use red zone.
														
 
															-  |  fld1
														
 
															-  |  fld qword [rsp-8]
														
 
															-  |  fyl2x
														
 
															-  |  fstp qword [rsp-8]
														
 
															-  |  movsd xmm0, qword [rsp-8]
														
 
															-  |.else
														
 
															-  |  fld1
														
 
															-  |  fld qword [esp+4]
														
 
															-  |  fyl2x
														
 
															-  |.endif
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
														
 
															-  |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
														
 
															-  |// Caveat: needs 3 slots on x87 stack!
														
 
															-  |->vm_exp_x87:
														
 
															-  |  fldl2e; fmulp st1				// e^x ==> 2^(x*log2(e))
														
 
															-  |->vm_exp2_x87:
														
 
															-  |  .if X64WIN
														
 
															-  |    .define expscratch, dword [rsp+8]	// Use scratch area.
														
 
															-  |  .elif X64
														
 
															-  |    .define expscratch, dword [rsp-8]	// Use red zone.
														
 
															-  |  .else
														
 
															-  |    .define expscratch, dword [esp+4]	// Needs 4 byte scratch area.
														
 
															-  |  .endif
														
 
															-  |  fst expscratch				// Caveat: overwrites ARG1.
														
 
															-  |  cmp expscratch, 0x7f800000; je >1		// Special case: e^+Inf = +Inf
														
 
															-  |  cmp expscratch, 0xff800000; je >2		// Special case: e^-Inf = 0
														
 
															-  |->vm_exp2raw:  // Entry point for vm_pow. Without +-Inf check.
														
 
															-  |  fdup; frndint; fsub st1, st0; fxch		// Split into frac/int part.
														
 
															-  |  f2xm1; fld1; faddp st1; fscale; fpop1	// ==> (2^frac-1 +1) << int
														
 
															-  |1:
														
 
															-  |  ret
														
 
															-  |2:
														
 
															-  |  fpop; fldz; ret
														
 
															-  |
														
 
															-  |// Generic power function x^y. Called by BC_POW, math.pow fast function,
														
 
															-  |// and vm_arith.
														
 
															-  |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
														
 
															-  |// Needs 16 byte scratch area for x86. Also called from JIT code.
														
 
															-  |->vm_pow_sse:
														
 
															-  |  cvttsd2si eax, xmm1
														
 
															-  |  cvtsi2sd xmm2, eax
														
 
															-  |  ucomisd xmm1, xmm2
														
 
															-  |  jnz >8				// Branch for FP exponents.
														
 
															-  |  jp >9				// Branch for NaN exponent.
														
 
															-  |  // Fallthrough.
														
 
															-  |
														
 
															   |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
														
 
															   |->vm_powi_sse:
														
 
															   |  cmp eax, 1; jle >6			// i<=1?
														
@@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx)
 
															   |  sseconst_1 xmm0, RDa
														
 
															   |  ret
														
 
															   |
														
 
															-  |8:  // FP/FP power function x^y.
														
 
															-  |.if X64
														
 
															-  |  movd rax, xmm1; shl rax, 1
														
 
															-  |  rol rax, 12; cmp rax, 0xffe; je >2		// x^+-Inf?
														
 
															-  |  movd rax, xmm0; shl rax, 1; je >4		// +-0^y?
														
 
															-  |  rol rax, 12; cmp rax, 0xffe; je >5		// +-Inf^y?
														
 
															-  |  .if X64WIN
														
 
															-  |    movsd qword [rsp+16], xmm1		// Use scratch area.
														
 
															-  |    movsd qword [rsp+8], xmm0
														
 
															-  |    fld qword [rsp+16]
														
 
															-  |    fld qword [rsp+8]
														
 
															-  |  .else
														
 
															-  |    movsd qword [rsp-16], xmm1		// Use red zone.
														
 
															-  |    movsd qword [rsp-8], xmm0
														
 
															-  |    fld qword [rsp-16]
														
 
															-  |    fld qword [rsp-8]
														
 
															-  |  .endif
														
 
															-  |.else
														
 
															-  |  movsd qword [esp+12], xmm1			// Needs 16 byte scratch area.
														
 
															-  |  movsd qword [esp+4], xmm0
														
 
															-  |  cmp dword [esp+12], 0; jne >1
														
 
															-  |  mov eax, [esp+16]; shl eax, 1
														
 
															-  |  cmp eax, 0xffe00000; je >2			// x^+-Inf?
														
 
															-  |1:
														
 
															-  |  cmp dword [esp+4], 0; jne >1
														
 
															-  |  mov eax, [esp+8]; shl eax, 1; je >4	// +-0^y?
														
 
															-  |  cmp eax, 0xffe00000; je >5			// +-Inf^y?
														
 
															-  |1:
														
 
															-  |  fld qword [esp+12]
														
 
															-  |  fld qword [esp+4]
														
 
															-  |.endif
														
 
															-  |  fyl2x					// y*log2(x)
														
 
															-  |  fdup; frndint; fsub st1, st0; fxch		// Split into frac/int part.
														
 
															-  |  f2xm1; fld1; faddp st1; fscale; fpop1	// ==> (2^frac-1 +1) << int
														
 
															-  |.if X64WIN
														
 
															-  |  fstp qword [rsp+8]				// Use scratch area.
														
 
															-  |  movsd xmm0, qword [rsp+8]
														
 
															-  |.elif X64
														
 
															-  |  fstp qword [rsp-8]				// Use red zone.
														
 
															-  |  movsd xmm0, qword [rsp-8]
														
 
															-  |.else
														
 
															-  |  fstp qword [esp+4]				// Needs 8 byte scratch area.
														
 
															-  |  movsd xmm0, qword [esp+4]
														
 
															-  |.endif
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |9:  // Handle x^NaN.
														
 
															-  |  sseconst_1 xmm2, RDa
														
 
															-  |  ucomisd xmm0, xmm2; je >1			// 1^NaN ==> 1
														
 
															-  |  movaps xmm0, xmm1				// x^NaN ==> NaN
														
 
															-  |1:
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |2:  // Handle x^+-Inf.
														
 
															-  |  sseconst_abs xmm2, RDa
														
 
															-  |  andpd xmm0, xmm2				// |x|
														
 
															-  |  sseconst_1 xmm2, RDa
														
 
															-  |  ucomisd xmm0, xmm2; je <1			// +-1^+-Inf ==> 1
														
 
															-  |  movmskpd eax, xmm1
														
 
															-  |  xorps xmm0, xmm0
														
 
															-  |  mov ah, al; setc al; xor al, ah; jne <1	// |x|<>1, x^+-Inf ==> +Inf/0
														
 
															-  |3:
														
 
															-  |  sseconst_hi xmm0, RDa, 7ff00000  // +Inf
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |4:  // Handle +-0^y.
														
 
															-  |  movmskpd eax, xmm1; test eax, eax; jnz <3	// y < 0, +-0^y ==> +Inf
														
 
															-  |  xorps xmm0, xmm0				// y >= 0, +-0^y ==> 0
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |5:  // Handle +-Inf^y.
														
 
															-  |  movmskpd eax, xmm1; test eax, eax; jz <3	// y >= 0, +-Inf^y ==> +Inf
														
 
															-  |  xorps xmm0, xmm0				// y < 0, +-Inf^y ==> 0
														
 
															-  |  ret
														
 
															-  |
														
 
															-  |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
														
 
															-  |// Computes fpm(x) for extended math functions. ORDER FPM.
														
 
															-  |->vm_foldfpm:
														
 
															-  |.if JIT
														
 
															-  |.if X64
														
 
															-  |  .if X64WIN
														
 
															-  |    .define fpmop, CARG2d
														
 
															-  |  .else
														
 
															-  |    .define fpmop, CARG1d
														
 
															-  |  .endif
														
 
															-  |  cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
														
 
															-  |  cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
														
 
															-  |  sqrtsd xmm0, xmm0; ret
														
 
															-  |2:
														
 
															-  |  .if X64WIN
														
 
															-  |    movsd qword [rsp+8], xmm0	// Use scratch area.
														
 
															-  |    fld qword [rsp+8]
														
 
															-  |  .else
														
 
															-  |    movsd qword [rsp-8], xmm0	// Use red zone.
														
 
															-  |    fld qword [rsp-8]
														
 
															-  |  .endif
														
 
															-  |  cmp fpmop, 5; ja >2
														
 
															-  |  .if X64WIN; pop rax; .endif
														
 
															-  |  je >1
														
 
															-  |  call ->vm_exp_x87
														
 
															-  |  .if X64WIN; push rax; .endif
														
 
															-  |  jmp >7
														
 
															-  |1:
														
 
															-  |  call ->vm_exp2_x87
														
 
															-  |  .if X64WIN; push rax; .endif
														
 
															-  |  jmp >7
														
 
															-  |2: ; cmp fpmop, 7; je >1; ja >2
														
 
															-  |  fldln2; fxch; fyl2x; jmp >7
														
 
															-  |1: ; fld1; fxch; fyl2x; jmp >7
														
 
															-  |2: ; cmp fpmop, 9; je >1; ja >2
														
 
															-  |  fldlg2; fxch; fyl2x; jmp >7
														
 
															-  |1: ; fsin; jmp >7
														
 
															-  |2: ; cmp fpmop, 11; je >1; ja >9
														
 
															-  |   fcos; jmp >7
														
 
															-  |1: ; fptan; fpop
														
 
															-  |7:
														
 
															-  |  .if X64WIN
														
 
															-  |    fstp qword [rsp+8]		// Use scratch area.
														
 
															-  |    movsd xmm0, qword [rsp+8]
														
 
															-  |  .else
														
 
															-  |    fstp qword [rsp-8]		// Use red zone.
														
 
															-  |    movsd xmm0, qword [rsp-8]
														
 
															-  |  .endif
														
 
															-  |  ret
														
 
															-  |.else  // x86 calling convention.
														
 
															-  |  .define fpmop, eax
														
 
															-  |  mov fpmop, [esp+12]
														
 
															-  |  movsd xmm0, qword [esp+4]
														
 
															-  |  cmp fpmop, 1; je >1; ja >2
														
 
															-  |  call ->vm_floor_sse; jmp >7
														
 
															-  |1: ; call ->vm_ceil_sse; jmp >7
														
 
															-  |2: ; cmp fpmop, 3; je >1; ja >2
														
 
															-  |  call ->vm_trunc_sse; jmp >7
														
 
															-  |1:
														
 
															-  |  sqrtsd xmm0, xmm0
														
 
															-  |7:
														
 
															-  |  movsd qword [esp+4], xmm0	// Overwrite callee-owned args.
														
 
															-  |  fld qword [esp+4]
														
 
															-  |  ret
														
 
															-  |2: ; fld qword [esp+4]
														
 
															-  |  cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
														
 
															-  |2: ; cmp fpmop, 7; je >1; ja >2
														
 
															-  |  fldln2; fxch; fyl2x; ret
														
 
															-  |1: ; fld1; fxch; fyl2x; ret
														
 
															-  |2: ; cmp fpmop, 9; je >1; ja >2
														
 
															-  |  fldlg2; fxch; fyl2x; ret
														
 
															-  |1: ; fsin; ret
														
 
															-  |2: ; cmp fpmop, 11; je >1; ja >9
														
 
															-  |   fcos; ret
														
 
															-  |1: ; fptan; fpop; ret
														
 
															-  |.endif
														
 
															-  |9: ; int3					// Bad fpm.
														
 
															-  |.endif
														
 
															-  |
														
 
															-  |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
														
 
															-  |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
														
 
															-  |// and basic math functions. ORDER ARITH
														
 
															-  |->vm_foldarith:
														
 
															-  |.if X64
														
 
															-  |
														
 
															-  |  .if X64WIN
														
 
															-  |    .define foldop, CARG3d
														
 
															-  |  .else
														
 
															-  |    .define foldop, CARG1d
														
 
															-  |  .endif
														
 
															-  |  cmp foldop, 1; je >1; ja >2
														
 
															-  |  addsd xmm0, xmm1; ret
														
 
															-  |1: ; subsd xmm0, xmm1; ret
														
 
															-  |2: ; cmp foldop, 3; je >1; ja >2
														
 
															-  |  mulsd xmm0, xmm1; ret
														
 
															-  |1: ; divsd xmm0, xmm1; ret
														
 
															-  |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
														
 
															-  |  cmp foldop, 7; je >1; ja >2
														
 
															-  |  sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
														
 
															-  |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
														
 
															-  |2: ; cmp foldop, 9; ja >2
														
 
															-  |.if X64WIN
														
 
															-  |  movsd qword [rsp+8], xmm0	// Use scratch area.
														
 
															-  |  movsd qword [rsp+16], xmm1
														
 
															-  |  fld qword [rsp+8]
														
 
															-  |  fld qword [rsp+16]
														
 
															-  |.else
														
 
															-  |  movsd qword [rsp-8], xmm0	// Use red zone.
														
 
															-  |  movsd qword [rsp-16], xmm1
														
 
															-  |  fld qword [rsp-8]
														
 
															-  |  fld qword [rsp-16]
														
 
															-  |.endif
														
 
															-  |  je >1
														
 
															-  |  fpatan
														
 
															-  |7:
														
 
															-  |.if X64WIN
														
 
															-  |  fstp qword [rsp+8]		// Use scratch area.
														
 
															-  |  movsd xmm0, qword [rsp+8]
														
 
															-  |.else
														
 
															-  |  fstp qword [rsp-8]		// Use red zone.
														
 
															-  |  movsd xmm0, qword [rsp-8]
														
 
															-  |.endif
														
 
															-  |  ret
														
 
															-  |1: ; fxch; fscale; fpop1; jmp <7
														
 
															-  |2: ; cmp foldop, 11; je >1; ja >9
														
 
															-  |  minsd xmm0, xmm1; ret
														
 
															-  |1: ; maxsd xmm0, xmm1; ret
														
 
															-  |9: ; int3				// Bad op.
														
 
															-  |
														
 
															-  |.else  // x86 calling convention.
														
 
															-  |
														
 
															-  |  .define foldop, eax
														
 
															-  |  mov foldop, [esp+20]
														
 
															-  |  movsd xmm0, qword [esp+4]
														
 
															-  |  movsd xmm1, qword [esp+12]
														
 
															-  |  cmp foldop, 1; je >1; ja >2
														
 
															-  |  addsd xmm0, xmm1
														
 
															-  |7:
														
 
															-  |  movsd qword [esp+4], xmm0	// Overwrite callee-owned args.
														
 
															-  |  fld qword [esp+4]
														
 
															-  |  ret
														
 
															-  |1: ; subsd xmm0, xmm1; jmp <7
														
 
															-  |2: ; cmp foldop, 3; je >1; ja >2
														
 
															-  |  mulsd xmm0, xmm1; jmp <7
														
 
															-  |1: ; divsd xmm0, xmm1; jmp <7
														
 
															-  |2: ; cmp foldop, 5
														
 
															-  |  je >1; ja >2
														
 
															-  |  call ->vm_mod; jmp <7
														
 
															-  |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7  // Writes to scratch area.
														
 
															-  |2: ; cmp foldop, 7; je >1; ja >2
														
 
															-  |  sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
														
 
															-  |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
														
 
															-  |2: ; cmp foldop, 9; ja >2
														
 
															-  |  fld qword [esp+4]		// Reload from stack
														
 
															-  |  fld qword [esp+12]
														
 
															-  |  je >1
														
 
															-  |  fpatan; ret
														
 
															-  |1: ; fxch; fscale; fpop1; ret
														
 
															-  |2: ; cmp foldop, 11; je >1; ja >9
														
 
															-  |  minsd xmm0, xmm1; jmp <7
														
 
															-  |1: ; maxsd xmm0, xmm1; jmp <7
														
 
															-  |9: ; int3				// Bad op.
														
 
															-  |
														
 
															-  |.endif
														
 
															-  |
														
 
															   |//-----------------------------------------------------------------------
														
 
															   |//-- Miscellaneous functions --------------------------------------------
														
 
															   |//-----------------------------------------------------------------------
														
@@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
															     break;
														
 
															   case BC_POW:
														
 
															     |  ins_arithpre movsd, xmm1
														
 
															-    |  call ->vm_pow_sse
														
 
															+    |  mov RB, BASE
														
 
															+    |.if not X64
														
 
															+    |  movsd FPARG1, xmm0
														
 
															+    |  movsd FPARG3, xmm1
														
 
															+    |.endif
														
 
															+    |  call extern pow
														
 
															+    |  movzx RA, PC_RA
														
 
															+    |  mov BASE, RB
														
 
															+    |.if X64
														
 
															     |  ins_arithpost
														
 
															+    |.else
														
 
															+    |  fstp qword [BASE+RA*8]
														
 
															+    |.endif
														
 
															     |  ins_next
														
 
															     break;