浏览代码

Remove pow() splitting and cleanup backends.

Mike Pall 5 年之前
父节点
当前提交
b2307c8ad8
共有 14 个文件被更改,包括 95 次插入222 次删除
  1. 0 3
      src/lj_arch.h
  2. 68 38
      src/lj_asm.c
  3. 1 9
      src/lj_asm_arm.h
  4. 1 38
      src/lj_asm_arm64.h
  5. 2 36
      src/lj_asm_mips.h
  6. 1 8
      src/lj_asm_ppc.h
  7. 1 36
      src/lj_asm_x86.h
  8. 1 1
      src/lj_ir.h
  9. 0 1
      src/lj_ircall.h
  10. 14 4
      src/lj_opt_fold.c
  11. 6 14
      src/lj_opt_narrow.c
  12. 0 21
      src/lj_opt_split.c
  13. 0 5
      src/lj_vm.h
  14. 0 8
      src/lj_vmmath.c

+ 0 - 3
src/lj_arch.h

@@ -586,9 +586,6 @@
 #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #define LUAJIT_NO_LOG2
 #endif
 #endif
-#if defined(__symbian__) || LJ_TARGET_WINDOWS
-#define LUAJIT_NO_EXP2
-#endif
 #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
 #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
 #define LJ_NO_SYSTEM		1
 #define LJ_NO_SYSTEM		1
 #endif
 #endif

+ 68 - 38
src/lj_asm.c

@@ -1308,32 +1308,6 @@ static void asm_call(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
 }
 }
 
 
-#if !LJ_SOFTFP32
-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-  IRRef args[2];
-  args[0] = lref;
-  args[1] = rref;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      asm_fppow(as, ir, irpp->op1, irp->op2);
-      return 1;
-    }
-  }
-  return 0;
-}
-#endif
-
 /* -- PHI and loop handling ----------------------------------------------- */
 /* -- PHI and loop handling ----------------------------------------------- */
 
 
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1604,6 +1578,62 @@ static void asm_loop(ASMState *as)
 #error "Missing assembler for target CPU"
 #error "Missing assembler for target CPU"
 #endif
 #endif
 
 
+/* -- Common instruction helpers ------------------------------------------ */
+
+#if !LJ_SOFTFP32
+#if !LJ_TARGET_X86ORX64
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#define asm_fppowi(as, ir)	asm_callid(as, ir, IRCALL_lj_vm_powi)
+#endif
+
+static void asm_pow(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
+#endif
+  if (irt_isnum(IR(ir->op2)->t))
+    asm_callid(as, ir, IRCALL_pow);
+  else
+    asm_fppowi(as, ir);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fpdiv(as, ir);
+}
+#endif
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
+static void asm_fuseequal(ASMState *as, IRIns *ir)
+{
+  /* Fuse HREF + EQ/NE. */
+  if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+    as->curins--;
+    asm_href(as, ir-1, (IROp)ir->o);
+  } else {
+    asm_equal(as, ir);
+  }
+}
+
 /* -- Instruction dispatch ------------------------------------------------ */
 /* -- Instruction dispatch ------------------------------------------------ */
 
 
 /* Assemble a single instruction. */
 /* Assemble a single instruction. */
@@ -1626,14 +1656,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_ABC:
   case IR_ABC:
     asm_comp(as, ir);
     asm_comp(as, ir);
     break;
     break;
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-    } else {
-      asm_equal(as, ir);
-    }
-    break;
+  case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
 
 
   case IR_RETF: asm_retf(as, ir); break;
   case IR_RETF: asm_retf(as, ir); break;
 
 
@@ -1702,7 +1725,13 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
   case IR_TNEW: asm_tnew(as, ir); break;
   case IR_TNEW: asm_tnew(as, ir); break;
   case IR_TDUP: asm_tdup(as, ir); break;
   case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+  case IR_CNEW: case IR_CNEWI:
+#if LJ_HASFFI
+    asm_cnew(as, ir);
+#else
+    lua_assert(0);
+#endif
+    break;
 
 
   /* Buffer operations. */
   /* Buffer operations. */
   case IR_BUFHDR: asm_bufhdr(as, ir); break;
   case IR_BUFHDR: asm_bufhdr(as, ir); break;
@@ -2167,6 +2196,10 @@ static void asm_setup_regsp(ASMState *as)
 	if (inloop)
 	if (inloop)
 	  as->modset |= RSET_SCRATCH;
 	  as->modset |= RSET_SCRATCH;
 #if LJ_TARGET_X86
 #if LJ_TARGET_X86
+	if (irt_isnum(IR(ir->op2)->t)) {
+	  if (as->evenspill < 4)  /* Leave room to call pow(). */
+	    as->evenspill = 4;
+	}
 	break;
 	break;
 #else
 #else
 	ir->prev = REGSP_HINT(RID_FPRET);
 	ir->prev = REGSP_HINT(RID_FPRET);
@@ -2192,9 +2225,6 @@ static void asm_setup_regsp(ASMState *as)
 	  continue;
 	  continue;
 	}
 	}
 	break;
 	break;
-      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
-	if (as->evenspill < 4)  /* Leave room to call pow(). */
-	  as->evenspill = 4;
       }
       }
 #endif
 #endif
       if (inloop)
       if (inloop)

+ 1 - 9
src/lj_asm_arm.h

@@ -1268,8 +1268,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
 }
 }
-#else
-#define asm_cnew(as, ir)	((void)0)
 #endif
 #endif
 
 
 /* -- Write barriers ------------------------------------------------------ */
 /* -- Write barriers ------------------------------------------------------ */
@@ -1364,8 +1362,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id)
 
 
 static void asm_fpmath(ASMState *as, IRIns *ir)
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
 {
-  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-    return;
   if (ir->op2 <= IRFPM_TRUNC)
   if (ir->op2 <= IRFPM_TRUNC)
     asm_callround(as, ir, ir->op2);
     asm_callround(as, ir, ir->op2);
   else if (ir->op2 == IRFPM_SQRT)
   else if (ir->op2 == IRFPM_SQRT)
@@ -1507,14 +1503,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
 #define asm_mulov(as, ir)	asm_mul(as, ir)
 #define asm_mulov(as, ir)	asm_mul(as, ir)
 
 
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
-#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
-#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_fpdiv(as, ir)	asm_fparith(as, ir, ARMI_VDIV_D)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
-#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
 #endif
 #endif
 
 
-#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
-
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP

+ 1 - 38
src/lj_asm_arm64.h

@@ -1242,8 +1242,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
 }
 }
-#else
-#define asm_cnew(as, ir)	((void)0)
 #endif
 #endif
 
 
 /* -- Write barriers ------------------------------------------------------ */
 /* -- Write barriers ------------------------------------------------------ */
@@ -1320,8 +1318,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
   } else if (fpm <= IRFPM_TRUNC) {
   } else if (fpm <= IRFPM_TRUNC) {
     asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
     asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
 			fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
 			fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
-  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
-    return;
   } else {
   } else {
     asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
     asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
   }
   }
@@ -1428,45 +1424,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
   asm_intmul(as, ir);
 }
 }
 
 
-static void asm_div(ASMState *as, IRIns *ir)
-{
-#if LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					  IRCALL_lj_carith_divu64);
-  else
-#endif
-    asm_fparith(as, ir, A64I_FDIVd);
-}
-
-static void asm_pow(ASMState *as, IRIns *ir)
-{
-#if LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					  IRCALL_lj_carith_powu64);
-  else
-#endif
-    asm_callid(as, ir, IRCALL_lj_vm_powi);
-}
-
 #define asm_addov(as, ir)	asm_add(as, ir)
 #define asm_addov(as, ir)	asm_add(as, ir)
 #define asm_subov(as, ir)	asm_sub(as, ir)
 #define asm_subov(as, ir)	asm_sub(as, ir)
 #define asm_mulov(as, ir)	asm_mul(as, ir)
 #define asm_mulov(as, ir)	asm_mul(as, ir)
 
 
+#define asm_fpdiv(as, ir)	asm_fparith(as, ir, A64I_FDIVd)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, A64I_FABS)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, A64I_FABS)
-#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
-
-static void asm_mod(ASMState *as, IRIns *ir)
-{
-#if LJ_HASFFI
-  if (!irt_isint(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					  IRCALL_lj_carith_modu64);
-  else
-#endif
-    asm_callid(as, ir, IRCALL_lj_vm_modi);
-}
 
 
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {

+ 2 - 36
src/lj_asm_mips.h

@@ -1607,8 +1607,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
 }
 }
-#else
-#define asm_cnew(as, ir)	((void)0)
 #endif
 #endif
 
 
 /* -- Write barriers ------------------------------------------------------ */
 /* -- Write barriers ------------------------------------------------------ */
@@ -1677,8 +1675,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
 #if !LJ_SOFTFP32
 #if !LJ_SOFTFP32
 static void asm_fpmath(ASMState *as, IRIns *ir)
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
 {
-  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-    return;
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
   if (ir->op2 <= IRFPM_TRUNC)
   if (ir->op2 <= IRFPM_TRUNC)
     asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
     asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
@@ -1766,41 +1762,13 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-static void asm_mod(ASMState *as, IRIns *ir)
-{
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isint(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					  IRCALL_lj_carith_modu64);
-  else
-#endif
-    asm_callid(as, ir, IRCALL_lj_vm_modi);
-}
-
 #if !LJ_SOFTFP32
 #if !LJ_SOFTFP32
-static void asm_pow(ASMState *as, IRIns *ir)
-{
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					  IRCALL_lj_carith_powu64);
-  else
-#endif
-    asm_callid(as, ir, IRCALL_lj_vm_powi);
-}
-
-static void asm_div(ASMState *as, IRIns *ir)
+static void asm_fpdiv(ASMState *as, IRIns *ir)
 {
 {
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					  IRCALL_lj_carith_divu64);
-  else
-#endif
 #if !LJ_SOFTFP
 #if !LJ_SOFTFP
     asm_fparith(as, ir, MIPSI_DIV_D);
     asm_fparith(as, ir, MIPSI_DIV_D);
 #else
 #else
-  asm_callid(as, ir, IRCALL_softfp_div);
+    asm_callid(as, ir, IRCALL_softfp_div);
 #endif
 #endif
 }
 }
 #endif
 #endif
@@ -1838,8 +1806,6 @@ static void asm_abs(ASMState *as, IRIns *ir)
 }
 }
 #endif
 #endif
 
 
-#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
-
 static void asm_arithov(ASMState *as, IRIns *ir)
 static void asm_arithov(ASMState *as, IRIns *ir)
 {
 {
   /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
   /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */

+ 1 - 8
src/lj_asm_ppc.h

@@ -1174,8 +1174,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
 	       ra_releasetmp(as, ASMREF_TMP1));
 }
 }
-#else
-#define asm_cnew(as, ir)	((void)0)
 #endif
 #endif
 
 
 /* -- Write barriers ------------------------------------------------------ */
 /* -- Write barriers ------------------------------------------------------ */
@@ -1246,8 +1244,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
 
 
 static void asm_fpmath(ASMState *as, IRIns *ir)
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
 {
-  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-    return;
   if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
   if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
     asm_fpunary(as, ir, PPCI_FSQRT);
     asm_fpunary(as, ir, PPCI_FSQRT);
   else
   else
@@ -1361,9 +1357,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
   }
 }
 }
 
 
-#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
-#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
-#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_fpdiv(as, ir)	asm_fparith(as, ir, PPCI_FDIV)
 
 
 static void asm_neg(ASMState *as, IRIns *ir)
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 {
@@ -1387,7 +1381,6 @@ static void asm_neg(ASMState *as, IRIns *ir)
 }
 }
 
 
 #define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
 #define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
-#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
 
 
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 {
 {

+ 1 - 36
src/lj_asm_x86.h

@@ -1843,8 +1843,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
   asm_gencall(as, ci, args);
   asm_gencall(as, ci, args);
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
 }
 }
-#else
-#define asm_cnew(as, ir)	((void)0)
 #endif
 #endif
 
 
 /* -- Write barriers ------------------------------------------------------ */
 /* -- Write barriers ------------------------------------------------------ */
@@ -1950,8 +1948,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
       ra_left(as, RID_XMM0, ir->op1);
       ra_left(as, RID_XMM0, ir->op1);
     }
     }
-  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
-    /* Rejoined to pow(). */
   } else {
   } else {
     asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
     asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
   }
   }
@@ -1986,17 +1982,6 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
   ra_left(as, RID_EAX, ir->op2);
   ra_left(as, RID_EAX, ir->op2);
 }
 }
 
 
-static void asm_pow(ASMState *as, IRIns *ir)
-{
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					  IRCALL_lj_carith_powu64);
-  else
-#endif
-    asm_fppowi(as, ir);
-}
-
 static int asm_swapops(ASMState *as, IRIns *ir)
 static int asm_swapops(ASMState *as, IRIns *ir)
 {
 {
   IRIns *irl = IR(ir->op1);
   IRIns *irl = IR(ir->op1);
@@ -2193,27 +2178,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
     asm_intarith(as, ir, XOg_X_IMUL);
     asm_intarith(as, ir, XOg_X_IMUL);
 }
 }
 
 
-static void asm_div(ASMState *as, IRIns *ir)
-{
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isnum(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					  IRCALL_lj_carith_divu64);
-  else
-#endif
-    asm_fparith(as, ir, XO_DIVSD);
-}
-
-static void asm_mod(ASMState *as, IRIns *ir)
-{
-#if LJ_64 && LJ_HASFFI
-  if (!irt_isint(ir->t))
-    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					  IRCALL_lj_carith_modu64);
-  else
-#endif
-    asm_callid(as, ir, IRCALL_lj_vm_modi);
-}
+#define asm_fpdiv(as, ir)	asm_fparith(as, ir, XO_DIVSD)
 
 
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 {
 {

+ 1 - 1
src/lj_ir.h

@@ -177,7 +177,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
 /* FPMATH sub-functions. ORDER FPM. */
 /* FPMATH sub-functions. ORDER FPM. */
 #define IRFPMDEF(_) \
 #define IRFPMDEF(_) \
   _(FLOOR) _(CEIL) _(TRUNC)  /* Must be first and in this order. */ \
   _(FLOOR) _(CEIL) _(TRUNC)  /* Must be first and in this order. */ \
-  _(SQRT) _(EXP2) _(LOG) _(LOG2) \
+  _(SQRT) _(LOG) _(LOG2) \
   _(OTHER)
   _(OTHER)
 
 
 typedef enum {
 typedef enum {

+ 0 - 1
src/lj_ircall.h

@@ -192,7 +192,6 @@ typedef struct CCallInfo {
   _(FPMATH,	lj_vm_ceil,		1,   N, NUM, XA_FP) \
   _(FPMATH,	lj_vm_ceil,		1,   N, NUM, XA_FP) \
   _(FPMATH,	lj_vm_trunc,		1,   N, NUM, XA_FP) \
   _(FPMATH,	lj_vm_trunc,		1,   N, NUM, XA_FP) \
   _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
   _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
-  _(ANY,	lj_vm_exp2,		1,   N, NUM, XA_FP) \
   _(ANY,	log,			1,   N, NUM, XA_FP) \
   _(ANY,	log,			1,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \

+ 14 - 4
src/lj_opt_fold.c

@@ -237,10 +237,11 @@ LJFOLDF(kfold_fpcall2)
 }
 }
 
 
 LJFOLD(POW KNUM KINT)
 LJFOLD(POW KNUM KINT)
+LJFOLD(POW KNUM KNUM)
 LJFOLDF(kfold_numpow)
 LJFOLDF(kfold_numpow)
 {
 {
   lua_Number a = knumleft;
   lua_Number a = knumleft;
-  lua_Number b = (lua_Number)fright->i;
+  lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
   lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
   lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
   return lj_ir_knum(J, y);
   return lj_ir_knum(J, y);
 }
 }
@@ -1077,7 +1078,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
 }
 }
 
 
 LJFOLD(POW any KINT)
 LJFOLD(POW any KINT)
-LJFOLDF(simplify_numpow_xk)
+LJFOLDF(simplify_numpow_xkint)
 {
 {
   int32_t k = fright->i;
   int32_t k = fright->i;
   TRef ref = fins->op1;
   TRef ref = fins->op1;
@@ -1106,13 +1107,22 @@ LJFOLDF(simplify_numpow_xk)
   return ref;
   return ref;
 }
 }
 
 
+LJFOLD(POW any KNUM)
+LJFOLDF(simplify_numpow_xknum)
+{
+  if (knumright == 0.5)  /* x ^ 0.5 ==> sqrt(x) */
+    return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
+  return NEXTFOLD;
+}
+
 LJFOLD(POW KNUM any)
 LJFOLD(POW KNUM any)
 LJFOLDF(simplify_numpow_kx)
 LJFOLDF(simplify_numpow_kx)
 {
 {
   lua_Number n = knumleft;
   lua_Number n = knumleft;
-  if (n == 2.0) {  /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
-    fins->o = IR_CONV;
+  if (n == 2.0 && irt_isint(fright->t)) {  /* 2.0 ^ i ==> ldexp(1.0, i) */
 #if LJ_TARGET_X86ORX64
 #if LJ_TARGET_X86ORX64
+    /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
+    fins->o = IR_CONV;
     fins->op1 = fins->op2;
     fins->op1 = fins->op2;
     fins->op2 = IRCONV_NUM_INT;
     fins->op2 = IRCONV_NUM_INT;
     fins->op2 = (IRRef1)lj_opt_fold(J);
     fins->op2 = (IRRef1)lj_opt_fold(J);

+ 6 - 14
src/lj_opt_narrow.c

@@ -593,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
   /* Narrowing must be unconditional to preserve (-x)^i semantics. */
   /* Narrowing must be unconditional to preserve (-x)^i semantics. */
   if (tvisint(vc) || numisint(numV(vc))) {
   if (tvisint(vc) || numisint(numV(vc))) {
     int checkrange = 0;
     int checkrange = 0;
-    /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
+    /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
     if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
     if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
       int32_t k = numberVint(vc);
       int32_t k = numberVint(vc);
-      if (!(k >= -65536 && k <= 65536)) goto split_pow;
+      if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
       checkrange = 1;
       checkrange = 1;
     }
     }
     if (!tref_isinteger(rc)) {
     if (!tref_isinteger(rc)) {
@@ -607,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
       TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
       TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
       emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
       emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
     }
     }
-    return emitir(IRTN(IR_POW), rb, rc);
+  } else {
+force_pow_num:
+    rc = lj_ir_tonum(J, rc);  /* Want POW(num, num), not POW(num, int). */
   }
   }
-split_pow:
-  /* FOLD covers most cases, but some are easier to do here. */
-  if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
-    return rb;  /* 1 ^ x ==> 1 */
-  rc = lj_ir_tonum(J, rc);
-  if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
-    return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT);  /* x ^ 0.5 ==> sqrt(x) */
-  /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
-  rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
-  rc = emitir(IRTN(IR_MUL), rb, rc);
-  return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
+  return emitir(IRTN(IR_POW), rb, rc);
 }
 }
 
 
 /* -- Predictive narrowing of induction variables ------------------------- */
 /* -- Predictive narrowing of induction variables ------------------------- */

+ 0 - 21
src/lj_opt_split.c

@@ -403,27 +403,6 @@ static void split_ir(jit_State *J)
 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
 	break;
 	break;
       case IR_FPMATH:
       case IR_FPMATH:
-	/* Try to rejoin pow from EXP2, MUL and LOG2. */
-	if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
-	  IRIns *irp = IR(nir->op1);
-	  if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
-	    IRIns *irm4 = IR(irp->op1);
-	    IRIns *irm3 = IR(irm4->op1);
-	    IRIns *irm12 = IR(irm3->op1);
-	    IRIns *irl1 = IR(irm12->op1);
-	    if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
-		irl1->op2 == IRCALL_lj_vm_log2) {
-	      IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
-	      IRRef arg3 = irm3->op2, arg4 = irm4->op2;
-	      J->cur.nins--;
-	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
-	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
-	      ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
-	      hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
-	      break;
-	    }
-	  }
-	}
 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
 	break;
 	break;
       case IR_LDEXP:
       case IR_LDEXP:

+ 0 - 5
src/lj_vm.h

@@ -92,11 +92,6 @@ LJ_ASMF double lj_vm_trunc(double);
 LJ_ASMF double lj_vm_trunc_sf(double);
 LJ_ASMF double lj_vm_trunc_sf(double);
 #endif
 #endif
 #endif
 #endif
-#ifdef LUAJIT_NO_EXP2
-LJ_ASMF double lj_vm_exp2(double);
-#else
-#define lj_vm_exp2	exp2
-#endif
 #if LJ_HASFFI
 #if LJ_HASFFI
 LJ_ASMF int lj_vm_errno(void);
 LJ_ASMF int lj_vm_errno(void);
 #endif
 #endif

+ 0 - 8
src/lj_vmmath.c

@@ -79,13 +79,6 @@ double lj_vm_log2(double a)
 }
 }
 #endif
 #endif
 
 
-#ifdef LUAJIT_NO_EXP2
-double lj_vm_exp2(double a)
-{
-  return exp(a * 0.6931471805599453);
-}
-#endif
-
 #if !LJ_TARGET_X86ORX64
 #if !LJ_TARGET_X86ORX64
 /* Unsigned x^k. */
 /* Unsigned x^k. */
 static double lj_vm_powui(double x, uint32_t k)
 static double lj_vm_powui(double x, uint32_t k)
@@ -128,7 +121,6 @@ double lj_vm_foldfpm(double x, int fpm)
   case IRFPM_CEIL: return lj_vm_ceil(x);
   case IRFPM_CEIL: return lj_vm_ceil(x);
   case IRFPM_TRUNC: return lj_vm_trunc(x);
   case IRFPM_TRUNC: return lj_vm_trunc(x);
   case IRFPM_SQRT: return sqrt(x);
   case IRFPM_SQRT: return sqrt(x);
-  case IRFPM_EXP2: return lj_vm_exp2(x);
   case IRFPM_LOG: return log(x);
   case IRFPM_LOG: return log(x);
   case IRFPM_LOG2: return lj_vm_log2(x);
   case IRFPM_LOG2: return lj_vm_log2(x);
   default: lua_assert(0);
   default: lua_assert(0);