Browse Source

ARM: Fuse XLOAD/XSTORE operands.

Mike Pall 14 years ago
parent
commit
7ad4908a48
2 changed files with 145 additions and 52 deletions
  1. 138 52
      src/lj_asm_arm.h
  2. 7 0
      src/lj_crecord.c

+ 138 - 52
src/lj_asm_arm.h

@@ -261,6 +261,83 @@ static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow)
   return ra_allocref(as, ref, allow);
 }
 
+/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
+static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
+      irref_isk(ir->op2) && IR(ir->op2)->i == 2)
+    return ir->op1;
+  return 0;  /* No fusion. */
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
+			 RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  int32_t ofs = 0;
+  Reg base;
+  if (ra_noreg(ir->r) && mayfuse(as, ref)) {
+    int32_t lim = (ai & 0x04000000) ? 4096 : 256;
+    if (ir->o == IR_ADD) {
+      if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) {
+	ref = ir->op1;
+      } else {
+	IRRef lref = ir->op1, rref = ir->op2;
+	Reg rn, rm;
+	if ((ai & 0x04000000)) {
+	  IRRef sref = asm_fuselsl2(as, rref);
+	  if (sref) {
+	    rref = sref;
+	    ai |= ARMF_SH(ARMSH_LSL, 2);
+	  } else if ((sref = asm_fuselsl2(as, lref)) != 0) {
+	    lref = rref;
+	    rref = sref;
+	    ai |= ARMF_SH(ARMSH_LSL, 2);
+	  }
+	}
+	rn = ra_alloc1(as, lref, allow);
+	rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
+	if ((ai & 0x04000000)) ai |= ARMI_LS_R;
+	emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
+	return;
+      }
+    } else if (ir->o == IR_STRREF) {
+      ofs = (int32_t)sizeof(GCstr);
+      if (irref_isk(ir->op2)) {
+	ofs += IR(ir->op2)->i;
+	ref = ir->op1;
+      } else if (irref_isk(ir->op1)) {
+	ofs += IR(ir->op1)->i;
+	ref = ir->op2;
+      } else {
+	/* NYI: Fuse ADD with constant. */
+	Reg rn = ra_alloc1(as, ir->op1, allow);
+	uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
+	if ((ai & 0x04000000))
+	  emit_lso(as, ai, rd, rd, ofs);
+	else
+	  emit_lsox(as, ai, rd, rd, ofs);
+	emit_dn(as, ARMI_ADD^m, rd, rn);
+	return;
+      }
+      if (ofs <= -lim || ofs >= lim) {
+	Reg rn = ra_alloc1(as, ref, allow);
+	Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+	if ((ai & 0x04000000)) ai |= ARMI_LS_R;
+	emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
+	return;
+      }
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  if ((ai & 0x04000000))
+    emit_lso(as, ai, rd, base, ofs);
+  else
+    emit_lsox(as, ai, rd, base, ofs);
+}
+
 /* -- Calls --------------------------------------------------------------- */
 
 /* Generate a call to a C function. */
@@ -749,68 +826,75 @@ static void asm_strref(ASMState *as, IRIns *ir)
 
 /* -- Loads and stores ---------------------------------------------------- */
 
-static void asm_fxload(ASMState *as, IRIns *ir)
+static ARMIns asm_fxloadins(IRIns *ir)
 {
-  Reg idx, dest = ra_dest(as, ir, RSET_GPR);
-  int32_t ofs;
-  ARMIns ai;
-  if (ir->o == IR_FLOAD) {
-    idx = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->op2 == IRFL_TAB_ARRAY) {
-      ofs = asm_fuseabase(as, ir->op1);
-      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
-	emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
-	return;
-      }
-    }
-    ofs = field_ofs[ir->op2];
-  } else {
-    /* NYI: Fuse xload operands. */
-    lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-    idx = ra_alloc1(as, ir->op1, RSET_GPR);
-    ofs = 0;
-  }
   switch (irt_type(ir->t)) {
-  case IRT_I8: ai = ARMI_LDRSB; break;
-  case IRT_U8: ai = ARMI_LDRB; goto use_lso;
-  case IRT_I16: ai = ARMI_LDRSH; break;
-  case IRT_U16: ai = ARMI_LDRH; break;
+  case IRT_I8: return ARMI_LDRSB;
+  case IRT_U8: return ARMI_LDRB;
+  case IRT_I16: return ARMI_LDRSH;
+  case IRT_U16: return ARMI_LDRH;
   case IRT_NUM: lua_assert(0);
   case IRT_FLOAT:
-  default: ai = ARMI_LDR;
-  use_lso:
-    emit_lso(as, ai, dest, idx, ofs);
-    return;
+  default: return ARMI_LDR;
   }
-  emit_lsox(as, ai, dest, idx, ofs);
 }
 
-static void asm_fxstore(ASMState *as, IRIns *ir)
+static ARMIns asm_fxstoreins(IRIns *ir)
 {
-  Reg idx, src = ra_alloc1(as, ir->op2, RSET_GPR);
-  RegSet allow = rset_exclude(RSET_GPR, src);
-  int32_t ofs;
-  ARMIns ai;
-  if (ir->o == IR_FSTORE) {
-    IRIns *irf = IR(ir->op1);
-    idx = ra_alloc1(as, irf->op1, allow);
-    ofs = field_ofs[irf->op2];
-  } else {
-    /* NYI: Fuse xstore operands. */
-    idx = ra_alloc1(as, ir->op1, allow);
-    ofs = 0;
-  }
   switch (irt_type(ir->t)) {
-  case IRT_I8: case IRT_U8: ai = ARMI_STRB; goto use_lso;
-  case IRT_I16: case IRT_U16: ai = ARMI_STRH; break;
+  case IRT_I8: case IRT_U8: return ARMI_STRB;
+  case IRT_I16: case IRT_U16: return ARMI_STRH;
   case IRT_NUM: lua_assert(0);
   case IRT_FLOAT:
-  default: ai = ARMI_STR;
-  use_lso:
-    emit_lso(as, ai, src, idx, ofs);
-    return;
+  default: return ARMI_STR;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+  ARMIns ai = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op2 == IRFL_TAB_ARRAY) {
+    ofs = asm_fuseabase(as, ir->op1);
+    if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+      emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
+      return;
+    }
   }
-  emit_lsox(as, ai, src, idx, ofs);
+  ofs = field_ofs[ir->op2];
+  if ((ai & 0x04000000))
+    emit_lso(as, ai, dest, idx, ofs);
+  else
+    emit_lsox(as, ai, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+  IRIns *irf = IR(ir->op1);
+  Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+  int32_t ofs = field_ofs[irf->op2];
+  ARMIns ai = asm_fxstoreins(ir);
+  if ((ai & 0x04000000))
+    emit_lso(as, ai, src, idx, ofs);
+  else
+    emit_lsox(as, ai, src, idx, ofs);
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir)
+{
+  Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+  asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+	       rset_exclude(RSET_GPR, src));
 }
 
 static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -1687,11 +1771,13 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
     asm_ahuvload(as, ir);
     break;
-  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
   case IR_SLOAD: asm_sload(as, ir); break;
 
   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir); break;
 
   /* Allocations. */
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

+ 7 - 0
src/lj_crecord.c

@@ -526,6 +526,13 @@ again:
 	idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1));
       sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info)));
       idx = crec_reassoc_ofs(J, idx, &ofs, sz);
+#if LJ_TARGET_ARM
+      /* Hoist base add to allow fusion of shifts into operands. */
+      if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) {
+	ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
+	ofs = 0;
+      }
+#endif
       idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz));
       ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr);
     }