Ver código fonte

Improve FOR loop const specialization and integerness checks.

Mike Pall 15 anos atrás
pai
commit
b6e4fde0dc
2 arquivos alterados com 68 adições e 34 exclusões
  1. 7 5
      src/lj_asm.c
  2. 61 29
      src/lj_record.c

+ 7 - 5
src/lj_asm.c

@@ -1893,9 +1893,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
   IRType1 t = ir->t;
   Reg base;
   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
-  lua_assert(!irt_isguard(ir->t) ==
-	     !((ir->op2 & IRSLOAD_TYPECHECK) || irt_isint(t)));
-  if (irt_isint(t)) {
+  lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  if (irt_isint(t) && irt_isguard(t)) {
     Reg left = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
@@ -1904,9 +1903,12 @@ static void asm_sload(ASMState *as, IRIns *ir)
   } else if (ra_used(ir)) {
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
-    lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t));
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
-    emit_movrmro(as, dest, base, ofs);
+    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+    if (irt_isint(t))
+      emit_rmro(as, XO_CVTSD2SI, dest, base, ofs);
+    else
+      emit_movrmro(as, dest, base, ofs);
   } else {
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
       return;  /* No type check: avoid base alloc. */

+ 61 - 29
src/lj_record.c

@@ -249,33 +249,65 @@ nocanon:
   J->mergesnap = 1;  /* In case recording continues. */
 }
 
-/* Peek before FORI to find a const initializer, otherwise load from slot. */
-static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t)
-{
-  /* A store to slot-1 means there's no conditional assignment for slot. */
-  if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) {
-    BCIns ins = pc[0];
-    if (bc_a(ins) == slot) {
-      if (bc_op(ins) == BC_KSHORT) {
-	int32_t k = (int32_t)(int16_t)bc_d(ins);
-	if (t == IRT_INT)
-	  return lj_ir_kint(J, k);
-	else
-	  return lj_ir_knum(J, cast_num(k));
-      } else if (bc_op(ins) == BC_KNUM) {
-	lua_Number n = proto_knum(J->pt, bc_d(ins));
-	if (t == IRT_INT)
-	  return lj_ir_kint(J, lj_num2int(n));
-	else
-	  return lj_ir_knum(J, n);
+/* Search bytecode backwards for a int/num constant slot initializer. */
+static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
+{
+  /* This algorithm is rather simplistic and assumes quite a bit about
+  ** how the bytecode is generated. It works fine for FORI initializers,
+  ** but it won't necessarily work in other cases (e.g. iterator arguments).
+  ** It doesn't do anything fancy, either (like backpropagating MOVs).
+  */
+  const BCIns *pc, *startpc = proto_bc(J->pt);
+  for (pc = endpc-1; pc > startpc; pc--) {
+    BCIns ins = *pc;
+    BCOp op = bc_op(ins);
+    /* First try to find the last instruction that stores to this slot. */
+    if (bcmode_a(op) == BCMbase && bc_a(ins) <= slot) {
+      return 0;  /* Multiple results, e.g. from a CALL or KNIL. */
+    } else if (bcmode_a(op) == BCMdst && bc_a(ins) == slot) {
+      if (op == BC_KSHORT || op == BC_KNUM) {  /* Found const. initializer. */
+	/* Now try to verify there's no forward jump across it. */
+	const BCIns *kpc = pc;
+	for ( ; pc > startpc; pc--)
+	  if (bc_op(*pc) == BC_JMP) {
+	    const BCIns *target = pc+bc_j(*pc)+1;
+	    if (target > kpc && target <= endpc)
+	      return 0;  /* Conditional assignment. */
+	  }
+	if (op == BC_KSHORT) {
+	  int32_t k = (int32_t)(int16_t)bc_d(ins);
+	  return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k));
+	} else {
+	  lua_Number n = proto_knum(J->pt, bc_d(ins));
+	  if (t == IRT_INT) {
+	    int32_t k = lj_num2int(n);
+	    if (n == cast_num(k))  /* -0 is ok here. */
+	      return lj_ir_kint(J, k);
+	    return 0;  /* Type mismatch. */
+	  } else {
+	    return lj_ir_knum(J, n);
+	  }
+	}
       }
+      return 0;  /* Non-constant initializer. */
     }
   }
-  if (J->base[slot])
-    return J->base[slot];
-  if (t == IRT_INT)
-    t |= IRT_GUARD;
-  return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT);
+  return 0;  /* No assignment to this slot found? */
+}
+
+/* Peek before FORI to find a const initializer. Otherwise load from slot. */
+static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t)
+{
+  TRef tr = find_kinit(J, fori, slot, t);
+  if (!tr) {
+    tr = J->base[slot];
+    if (!tr) {
+      if (t == IRT_INT)
+	t |= IRT_GUARD;
+      tr = sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT);
+    }
+  }
+  return tr;
 }
 
 /* Simulate the runtime behavior of the FOR loop iterator.
@@ -311,8 +343,8 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
     idx = tr[FORL_IDX];
     if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0);
     t = tref_type(idx);
-    stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
-    step = fori_arg(J, fori-1, ra+FORL_STEP, t);
+    stop = fori_arg(J, fori, ra+FORL_STOP, t);
+    step = fori_arg(J, fori, ra+FORL_STEP, t);
     tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
   } else {  /* Handle FORI/JFORI opcodes. */
     BCReg i;
@@ -2134,8 +2166,8 @@ static void rec_setup_forl(jit_State *J, const BCIns *fori)
   cTValue *forbase = &J->L->base[ra];
   IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase)
 					   : IRT_NUM;
-  TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
-  TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t);
+  TRef stop = fori_arg(J, fori, ra+FORL_STOP, t);
+  TRef step = fori_arg(J, fori, ra+FORL_STEP, t);
   int dir = (0 <= numV(&forbase[FORL_STEP]));
   lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
   if (!tref_isk(step)) {
@@ -2165,7 +2197,7 @@ static void rec_setup_forl(jit_State *J, const BCIns *fori)
     k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
     emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
   }
-  if (t == IRT_INT)
+  if (t == IRT_INT && !find_kinit(J, fori, ra+FORL_IDX, IRT_INT))
     t |= IRT_GUARD;
   J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT);
   J->maxslot = ra+FORL_EXT+1;