Browse Source

Add narrowing of modulo operator.

Mike Pall 14 years ago
parent
commit
8c5935000d
9 changed files with 76 additions and 16 deletions
  1. 12 1
      src/lj_asm_arm.h
  2. 15 4
      src/lj_asm_x86.h
  3. 1 0
      src/lj_ircall.h
  4. 1 1
      src/lj_iropt.h
  5. 23 1
      src/lj_opt_fold.c
  6. 8 8
      src/lj_opt_narrow.c
  7. 1 1
      src/lj_record.c
  8. 1 0
      src/lj_vm.h
  9. 14 0
      src/lj_vmmath.c

+ 12 - 1
src/lj_asm_arm.h

@@ -1110,6 +1110,16 @@ static void asm_intmul(ASMState *as, IRIns *ir)
   if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);
 }
 
+static void asm_intmod(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
 static void asm_bitswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1652,6 +1662,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_ADD: case IR_ADDOV: asm_arithop(as, ir, ARMI_ADD); break;
   case IR_SUB: case IR_SUBOV: asm_arithop(as, ir, ARMI_SUB); break;
   case IR_MUL: case IR_MULOV: asm_intmul(as, ir); break;
+  case IR_MOD: asm_intmod(as, ir); break;
 
   case IR_NEG: asm_intneg(as, ir, ARMI_RSB); break;
 
@@ -1659,7 +1670,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_MAX: asm_intmin_max(as, ir, CC_LT); break;
 
   case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
-  case IR_DIV: case IR_MOD: case IR_POW: case IR_ABS: case IR_TOBIT:
+  case IR_DIV: case IR_POW: case IR_ABS: case IR_TOBIT:
     lua_assert(0);  /* Unused for LJ_SOFTFP. */
     break;
 

+ 15 - 4
src/lj_asm_x86.h

@@ -1651,6 +1651,16 @@ static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
 }
 #endif
 
+static void asm_intmod(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
 static int asm_swapops(ASMState *as, IRIns *ir)
 {
   IRIns *irl = IR(ir->op1);
@@ -2499,11 +2509,12 @@ static void asm_ir(ASMState *as, IRIns *ir)
     break;
   case IR_MOD:
 #if LJ_64 && LJ_HASFFI
-    asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					   IRCALL_lj_carith_modu64);
-#else
-    lua_assert(0);
+    if (!irt_isint(ir->t))
+      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					     IRCALL_lj_carith_modu64);
+    else
 #endif
+      asm_intmod(as, ir);
     break;
 
   case IR_NEG:

+ 1 - 0
src/lj_ircall.h

@@ -143,6 +143,7 @@ typedef struct CCallInfo {
   _(lj_gc_barrieruv,	2,  FS, NIL, 0) \
   _(lj_mem_newgco,	2,  FS, P32, CCI_L) \
   _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
+  _(lj_vm_modi,		2,  FN, INT, 0) \
   IRCALLDEF_SOFTFP(_) \
   IRCALLDEF_FPMATH(_) \
   IRCALLDEF_FFI(_) \

+ 1 - 1
src/lj_iropt.h

@@ -141,7 +141,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
 LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
 				 TValue *vb, TValue *vc, IROp op);
 LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
-LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc);
+LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc);
 LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
 LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
 

+ 23 - 1
src/lj_opt_fold.c

@@ -231,6 +231,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
   case IR_ADD: k1 += k2; break;
   case IR_SUB: k1 -= k2; break;
   case IR_MUL: k1 *= k2; break;
+  case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
@@ -249,6 +250,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
 LJFOLD(ADD KINT KINT)
 LJFOLD(SUB KINT KINT)
 LJFOLD(MUL KINT KINT)
+LJFOLD(MOD KINT KINT)
 LJFOLD(BAND KINT KINT)
 LJFOLD(BOR KINT KINT)
 LJFOLD(BXOR KINT KINT)
@@ -1148,7 +1150,6 @@ LJFOLDF(simplify_intmul_k32)
 
 LJFOLD(MUL any KINT64)
 LJFOLDF(simplify_intmul_k64)
-
 {
   if (ir_kint64(fright)->u64 == 0)  /* i * 0 ==> 0 */
     return INT64FOLD(0);
@@ -1160,6 +1161,27 @@ LJFOLDF(simplify_intmul_k64)
   return NEXTFOLD;
 }
 
+LJFOLD(MOD any KINT)
+LJFOLDF(simplify_intmod_k)
+{
+  int32_t k = fright->i;
+  lua_assert(k != 0);
+  if (k > 0 && (k & (k-1)) == 0) {  /* i % (2^k) ==> i & (2^k-1) */
+    fins->o = IR_BAND;
+    fins->op2 = lj_ir_kint(J, k-1);
+    return RETRYFOLD;
+  }
+  return NEXTFOLD;
+}
+
+LJFOLD(MOD KINT any)
+LJFOLDF(simplify_intmod_kleft)
+{
+  if (fleft->i == 0)
+    return INTFOLD(0);
+  return NEXTFOLD;
+}
+
 LJFOLD(SUB any any)
 LJFOLD(SUBOV any any)
 LJFOLDF(simplify_intsub)

+ 8 - 8
src/lj_opt_narrow.c

@@ -551,16 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
 }
 
 /* Narrowing of modulo operator. */
-TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
+TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
 {
   TRef tmp;
-  if ((J->flags & JIT_F_OPT_NARROW) &&
-      tref_isk(rc) && tref_isint(rc)) {  /* Optimize x % k. */
-    int32_t k = IR(tref_ref(rc))->i;
-    if (k > 0 && (k & (k-1)) == 0) {  /* i % 2^k ==> band(i, 2^k-1) */
-      if (tref_isinteger(rb))
-	return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1));
-    }
+  if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) &&
+      tref_isinteger(rb) && tref_isinteger(rc) &&
+      (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) {
+    emitir(IRTGI(IR_NE), rc, lj_ir_kint(J, 0));
+    return emitir(IRTI(IR_MOD), rb, rc);
   }
   /* b % c ==> b - floor(b/c)*c */
   rb = lj_ir_tonum(J, rb);

+ 1 - 1
src/lj_record.c

@@ -1715,7 +1715,7 @@ void lj_record_ins(jit_State *J)
   case BC_MODVN: case BC_MODVV:
   recmod:
     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
-      rc = lj_opt_narrow_mod(J, rb, rc);
+      rc = lj_opt_narrow_mod(J, rb, rc, rcv);
     else
       rc = rec_mm_arith(J, &ix, MM_mod);
     break;

+ 1 - 0
src/lj_vm.h

@@ -71,6 +71,7 @@ LJ_ASMF double lj_vm_exp2(double);
 #define lj_vm_exp2	exp2
 #endif
 #endif
+LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
 #endif
 
 /* Continuations for metamethods. */

+ 14 - 0
src/lj_vmmath.c

@@ -39,6 +39,20 @@ double lj_vm_exp2(double a)
 }
 #endif
 
+#if !LJ_TARGET_ARM
+int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
+{
+  uint32_t y, ua, ub;
+  lua_assert(b != 0);  /* This must be checked before using this function. */
+  ua = a < 0 ? -(uint32_t)a : (uint32_t)a;
+  ub = b < 0 ? -(uint32_t)b : (uint32_t)b;
+  y = ua % ub;
+  if (y != 0 && (a^b) < 0) y = y - ub;
+  if (((int32_t)y^b) < 0) y = -y;
+  return (int32_t)y;
+}
+#endif
+
 #if !LJ_TARGET_X86ORX64
 /* Unsigned x^k. */
 static double lj_vm_powui(double x, uint32_t k)