Browse Source

Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.

Mike Pall 14 years ago
parent
commit
1f26961092
2 changed files with 226 additions and 35 deletions
  1. 4 4
      src/buildvm_fold.c
  2. 222 31
      src/lj_opt_fold.c

+ 4 - 4
src/buildvm_fold.c

@@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
       if (*p == '\0')
 	return i;
     } else if (allowany && !strcmp("any", p)) {
-      return 0xff;
+      return allowany;
     } else {
       for (i = 0; ir_names[i]; i++)
 	if (!strcmp(ir_names[i], p))
@@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
 static void foldrule(char *p)
 {
   uint32_t op = nexttoken(&p, 0, 0);
-  uint32_t left = nexttoken(&p, 0, 1);
-  uint32_t right = nexttoken(&p, 1, 1);
-  uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right;
+  uint32_t left = nexttoken(&p, 0, 0x7f);
+  uint32_t right = nexttoken(&p, 1, 0x3ff);
+  uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right;
   uint32_t i;
   if (nkeys >= BUILD_MAX_FOLD) {
     fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");

+ 222 - 31
src/lj_opt_fold.c

@@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum)
   return INT64FOLD((uint64_t)(int64_t)knumleft);
 }
 
+LJFOLD(CONV KINT IRCONV_NUM_INT)
+LJFOLDF(kfold_conv_kint_num)
+{
+  return lj_ir_knum(J, cast_num(fleft->i));
+}
+
+LJFOLD(CONV KINT IRCONV_I64_INT)
+LJFOLD(CONV KINT IRCONV_U64_INT)
+LJFOLDF(kfold_conv_kint_i64)
+{
+  return INT64FOLD((uint64_t)(int64_t)fleft->i);
+}
+
+LJFOLD(CONV KINT64 IRCONV_NUM_I64)
+LJFOLDF(kfold_conv_kint64_num_i64)
+{
+  return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64));
+}
+
+LJFOLD(CONV KINT64 IRCONV_NUM_U64)
+LJFOLDF(kfold_conv_kint64_num_u64)
+{
+  return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64));
+}
+
+LJFOLD(CONV KINT64 IRCONV_INT_I64)
+LJFOLD(CONV KINT64 IRCONV_U32_I64)
+LJFOLDF(kfold_conv_kint64_int_i64)
+{
+  return INTFOLD((int32_t)ir_kint64(fleft)->u64);
+}
+
+LJFOLD(CONV KNUM IRCONV_INT_NUM)
+LJFOLDF(kfold_conv_knum_int_num)
+{
+  lua_Number n = knumleft;
+  if (!(fins->op2 & IRCONV_TRUNC)) {
+    int32_t k = lj_num2int(n);
+    if (irt_isguard(fins->t) && n != cast_num(k)) {
+      /* We're about to create a guard which always fails, like CONV +1.5.
+      ** Some pathological loops cause this during LICM, e.g.:
+      **   local x,k,t = 0,1.5,{1,[1.5]=2}
+      **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
+      **   assert(x == 300)
+      */
+      return FAILFOLD;
+    }
+    return INTFOLD(k);
+  } else {
+    return INTFOLD((int32_t)n);
+  }
+}
+
+LJFOLD(CONV KNUM IRCONV_I64_NUM)
+LJFOLDF(kfold_conv_knum_i64_num)
+{
+  lua_assert((fins->op2 & IRCONV_TRUNC));
+  return INT64FOLD((uint64_t)(int64_t)knumleft);
+}
+
+LJFOLD(CONV KNUM IRCONV_U64_NUM)
+LJFOLDF(kfold_conv_knum_u64_num)
+{
+  lua_assert((fins->op2 & IRCONV_TRUNC));
+  return INT64FOLD(lj_num2u64(knumleft));
+}
+
 LJFOLD(TOSTR KNUM)
 LJFOLDF(kfold_tostr_knum)
 {
@@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx)
   return NEXTFOLD;
 }
 
-/* -- FP conversion narrowing --------------------------------------------- */
+/* -- Simplify conversions ------------------------------------------------ */
 
+LJFOLD(CONV CONV IRCONV_NUM_INT)  /* _NUM */
+LJFOLDF(shortcut_conv_num_int)
+{
+  PHIBARRIER(fleft);
+  /* Only safe with a guarded conversion to int. */
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
+    return fleft->op1;  /* f(g(x)) ==> x */
+  return NEXTFOLD;
+}
+
+LJFOLD(CONV CONV IRCONV_INT_NUM)  /* _INT */
+LJFOLDF(simplify_conv_int_num)
+{
+  /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
+    return fleft->op1;
+  return NEXTFOLD;
+}
+
+LJFOLD(CONV CONV IRCONV_U32_NUM)  /* _U32*/
+LJFOLDF(simplify_conv_u32_num)
+{
+  /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
+    return fleft->op1;
+  return NEXTFOLD;
+}
+
+LJFOLD(CONV CONV IRCONV_I64_NUM)  /* _INT or _U32*/
+LJFOLDF(simplify_conv_i64_num)
+{
+  PHIBARRIER(fleft);
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
+    /* Reduce to a sign-extension. */
+    fins->op1 = fleft->op1;
+    fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
+    return RETRYFOLD;
+  } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
+#if LJ_TARGET_X64
+    return fleft->op1;
+#else
+    /* Reduce to a zero-extension. */
+    fins->op1 = fleft->op1;
+    fins->op2 = (IRT_I64<<5)|IRT_U32;
+    return RETRYFOLD;
+#endif
+  }
+  return NEXTFOLD;
+}
+
+LJFOLD(CONV CONV IRCONV_U64_NUM)  /* _U32*/
+LJFOLDF(simplify_conv_u64_num)
+{
+  PHIBARRIER(fleft);
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
+#if LJ_TARGET_X64
+    return fleft->op1;
+#else
+    /* Reduce to a zero-extension. */
+    fins->op1 = fleft->op1;
+    fins->op2 = (IRT_U64<<5)|IRT_U32;
+    return RETRYFOLD;
+#endif
+  }
+  return NEXTFOLD;
+}
+
+/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
+LJFOLD(TOBIT CONV KNUM)
+LJFOLDF(simplify_tobit_conv)
+{
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
+      (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
+    /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+    lua_assert(irt_isnum(fleft->t));
+    return fleft->op1;
+  }
+  return NEXTFOLD;
+}
+
+/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
+LJFOLD(FPMATH CONV IRFPM_FLOOR)
+LJFOLD(FPMATH CONV IRFPM_CEIL)
+LJFOLD(FPMATH CONV IRFPM_TRUNC)
+LJFOLDF(simplify_floor_conv)
+{
+  if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
+      (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
+    return LEFTFOLD;
+  return NEXTFOLD;
+}
+
+/* Strength reduction of widening. */
+LJFOLD(CONV any IRCONV_I64_INT)
+LJFOLDF(simplify_conv_sext)
+{
+  IRRef ref = fins->op1;
+  int64_t ofs = 0;
+  if (!(fins->op2 & IRCONV_SEXT))
+    return NEXTFOLD;
+  PHIBARRIER(fleft);
+  if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
+    ofs = (int64_t)IR(fleft->op2)->i;
+    ref = fleft->op1;
+  }
+  /* Use scalar evolution analysis results to strength-reduce sign-extension. */
+  if (ref == J->scev.idx) {
+    IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
+    lua_assert(irt_isint(J->scev.t));
+    if (lo && IR(lo)->i + ofs >= 0) {
+#if LJ_TARGET_X64
+      /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
+      return LEFTFOLD;
+#else
+      /* Reduce to a (cheaper) zero-extension. */
+      fins->op2 &= ~IRCONV_SEXT;
+      return RETRYFOLD;
+#endif
+    }
+  }
+  return NEXTFOLD;
+}
+
+/* Special CSE rule for CONV. */
+LJFOLD(CONV any any)
+LJFOLDF(cse_conv)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+    IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
+    uint8_t guard = irt_isguard(fins->t);
+    IRRef ref = J->chain[IR_CONV];
+    while (ref > op1) {
+      IRIns *ir = IR(ref);
+      /* Commoning with stronger checks is ok. */
+      if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
+	  irt_isguard(ir->t) >= guard)
+	return ref;
+      ref = ir->prev;
+    }
+  }
+  return EMITFOLD;  /* No fallthrough to regular CSE. */
+}
+
+/* FP conversion narrowing. */
 LJFOLD(TOINT ADD any)
 LJFOLD(TOINT SUB any)
 LJFOLD(TOBIT ADD KNUM)
@@ -771,26 +982,6 @@ LJFOLDF(cse_toint)
   return EMITFOLD;  /* No fallthrough to regular CSE. */
 }
 
-/* Special CSE rule for CONV. */
-LJFOLD(CONV any any)
-LJFOLDF(cse_conv)
-{
-  if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
-    IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
-    uint8_t guard = irt_isguard(fins->t);
-    IRRef ref = J->chain[IR_CONV];
-    while (ref > op1) {
-      IRIns *ir = IR(ref);
-      /* Commoning with stronger checks is ok. */
-      if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
-	  irt_isguard(ir->t) >= guard)
-	return ref;
-      ref = ir->prev;
-    }
-  }
-  return EMITFOLD;  /* No fallthrough to regular CSE. */
-}
-
 /* -- Strength reduction of widening -------------------------------------- */
 
 LJFOLD(TOI64 any 3)  /* IRTOINT_ZEXT64 */
@@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit)
 
 /* Every entry in the generated hash table is a 32 bit pattern:
 **
-** xxxxxxxx iiiiiiii llllllll rrrrrrrr
+** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
 **
-** xxxxxxxx = 8 bit index into fold function table
-** iiiiiiii = 8 bit folded instruction opcode
-** llllllll = 8 bit left instruction opcode
-** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
+**   xxxxxxxx = 8 bit index into fold function table
+**    iiiiiii = 7 bit folded instruction opcode
+**    lllllll = 7 bit left instruction opcode
+** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
 */
 
 #include "lj_folddef.h"
@@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
   /* Fold engine start/retry point. */
 retry:
   /* Construct key from opcode and operand opcodes (unless literal/none). */
-  key = ((uint32_t)fins->o << 16);
+  key = ((uint32_t)fins->o << 17);
   if (fins->op1 >= J->cur.nk) {
-    key += (uint32_t)IR(fins->op1)->o << 8;
+    key += (uint32_t)IR(fins->op1)->o << 10;
     *fleft = *IR(fins->op1);
   }
   if (fins->op2 >= J->cur.nk) {
@@ -1777,7 +1968,7 @@ retry:
   /* Check for a match in order from most specific to least specific. */
   any = 0;
   for (;;) {
-    uint32_t k = key | any;
+    uint32_t k = key | (any & 0x1ffff);
     uint32_t h = fold_hashkey(k);
     uint32_t fh = fold_hash[h];  /* Lookup key in semi-perfect hash table. */
     if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
@@ -1785,9 +1976,9 @@ retry:
       if (ref != NEXTFOLD)
 	break;
     }
-    if (any == 0xffff)  /* Exhausted folding. Pass on to CSE. */
+    if (any == 0xfffff)  /* Exhausted folding. Pass on to CSE. */
       return lj_opt_cse(J);
-    any = (any | (any >> 8)) ^ 0xff00;
+    any = (any | (any >> 10)) ^ 0xffc00;
   }
 
   /* Return value processing, ordered by frequency. */