ソースを参照

Use HIOP for XSTORE in SPLIT pass.

Mike Pall 13 年 前
コミット
264177b0d0
5 ファイル変更77 行追加62 行削除
  1. 27 17
      src/lj_asm_arm.h
  2. 24 14
      src/lj_asm_mips.h
  3. 16 9
      src/lj_asm_ppc.h
  4. 7 3
      src/lj_asm_x86.h
  5. 3 19
      src/lj_opt_split.c

+ 27 - 17
src/lj_asm_arm.h

@@ -206,17 +206,19 @@ static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
 
 /* Fuse XLOAD/XSTORE reference into load/store operand. */
 static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
-			 RegSet allow)
+			 RegSet allow, int32_t ofs)
 {
   IRIns *ir = IR(ref);
-  int32_t ofs = 0;
   Reg base;
   if (ra_noreg(ir->r) && mayfuse(as, ref)) {
     int32_t lim = (ai & 0x04000000) ? 4096 : 256;
     if (ir->o == IR_ADD) {
-      if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) {
+      int32_t ofs2;
+      if (irref_isk(ir->op2) &&
+	  (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) {
+	ofs = ofs2;
 	ref = ir->op1;
-      } else {
+      } else if (ofs == 0) {
 	IRRef lref = ir->op1, rref = ir->op2;
 	Reg rn, rm;
 	if ((ai & 0x04000000)) {
@@ -237,6 +239,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
 	return;
       }
     } else if (ir->o == IR_STRREF) {
+      lua_assert(ofs == 0);
       ofs = (int32_t)sizeof(GCstr);
       if (irref_isk(ir->op2)) {
 	ofs += IR(ir->op2)->i;
@@ -809,29 +812,33 @@ static void asm_fload(ASMState *as, IRIns *ir)
 
 static void asm_fstore(ASMState *as, IRIns *ir)
 {
-  Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
-  IRIns *irf = IR(ir->op1);
-  Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
-  int32_t ofs = field_ofs[irf->op2];
-  ARMIns ai = asm_fxstoreins(ir);
-  if ((ai & 0x04000000))
-    emit_lso(as, ai, src, idx, ofs);
-  else
-    emit_lsox(as, ai, src, idx, ofs);
+  if (ir->r == RID_SINK) {  /* Sink store. */
+    asm_snap_prep(as);
+  } else {
+    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    ARMIns ai = asm_fxstoreins(ir);
+    if ((ai & 0x04000000))
+      emit_lso(as, ai, src, idx, ofs);
+    else
+      emit_lsox(as, ai, src, idx, ofs);
+  }
 }
 
 static void asm_xload(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
 {
   Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
   asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
-	       rset_exclude(RSET_GPR, src));
+	       rset_exclude(RSET_GPR, src), ofs);
 }
 
 static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -1374,6 +1381,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     if (uselo || usehi)
       asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
     return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    asm_xstore(as, ir, 4);
+    return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
@@ -1702,7 +1712,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
 
   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
   case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
 
   /* Allocations. */
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

+ 24 - 14
src/lj_asm_mips.h

@@ -183,20 +183,20 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 
 /* Fuse XLOAD/XSTORE reference into load/store operand. */
 static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
-			 RegSet allow)
+			 RegSet allow, int32_t ofs)
 {
   IRIns *ir = IR(ref);
-  int32_t ofs = 0;
   Reg base;
   if (ra_noreg(ir->r) && mayfuse(as, ref)) {
     if (ir->o == IR_ADD) {
       int32_t ofs2;
-      if (irref_isk(ir->op2) && (ofs2 = IR(ir->op2)->i, checki16(ofs2))) {
+      if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
 	ref = ir->op1;
 	ofs = ofs2;
       }
     } else if (ir->o == IR_STRREF) {
       int32_t ofs2 = 65536;
+      lua_assert(ofs == 0);
       ofs = (int32_t)sizeof(GCstr);
       if (irref_isk(ir->op2)) {
 	ofs2 = ofs + IR(ir->op2)->i;
@@ -889,27 +889,32 @@ static void asm_fload(ASMState *as, IRIns *ir)
 
 static void asm_fstore(ASMState *as, IRIns *ir)
 {
-  Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
-  IRIns *irf = IR(ir->op1);
-  Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
-  int32_t ofs = field_ofs[irf->op2];
-  MIPSIns mi = asm_fxstoreins(ir);
-  lua_assert(!irt_isfp(ir->t));
-  emit_tsi(as, mi, src, idx, ofs);
+  if (ir->r == RID_SINK) {  /* Sink store. */
+    asm_snap_prep(as);
+    return;
+  } else {
+    Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    MIPSIns mi = asm_fxstoreins(ir);
+    lua_assert(!irt_isfp(ir->t));
+    emit_tsi(as, mi, src, idx, ofs);
+  }
 }
 
 static void asm_xload(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
 {
   Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
   asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
-	       rset_exclude(RSET_GPR, src));
+	       rset_exclude(RSET_GPR, src), ofs);
 }
 
 static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -1554,6 +1559,11 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     as->curins--;  /* Always skip the loword comparison. */
     asm_comp64eq(as, ir);
     return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    as->curins--;  /* Handle both stores here. */
+    asm_xstore(as, ir, LJ_LE ? 4 : 0);
+    asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+    return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
@@ -1832,7 +1842,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
 
   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
   case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
 
   /* Allocations. */
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

+ 16 - 9
src/lj_asm_ppc.h

@@ -162,22 +162,24 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 
 /* Fuse XLOAD/XSTORE reference into load/store operand. */
 static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
-			 RegSet allow)
+			 RegSet allow, int32_t ofs)
 {
   IRIns *ir = IR(ref);
-  int32_t ofs = 0;
   Reg base;
   if (ra_noreg(ir->r) && mayfuse(as, ref)) {
     if (ir->o == IR_ADD) {
-      if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) {
+      int32_t ofs2;
+      if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
+	ofs = ofs2;
 	ref = ir->op1;
-      } else {
+      } else if (ofs == 0) {
 	Reg right, left = ra_alloc2(as, ir, allow);
 	right = (left >> 8); left &= 255;
 	emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
 	return;
       }
     } else if (ir->o == IR_STRREF) {
+      lua_assert(ofs == 0);
       ofs = (int32_t)sizeof(GCstr);
       if (irref_isk(ir->op2)) {
 	ofs += IR(ir->op2)->i;
@@ -904,13 +906,13 @@ static void asm_xload(ASMState *as, IRIns *ir)
   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
   if (irt_isi8(ir->t))
     emit_as(as, PPCI_EXTSB, dest, dest);
-  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
 {
   IRIns *irb;
-  if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
+  if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
       ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
     /* Fuse BSWAP with XSTORE to stwbrx. */
     Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
@@ -918,7 +920,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
   } else {
     Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
-		 rset_exclude(RSET_GPR, src));
+		 rset_exclude(RSET_GPR, src), ofs);
   }
 }
 
@@ -1743,6 +1745,11 @@ static void asm_hiop(ASMState *as, IRIns *ir)
     as->curins--;  /* Always skip the loword comparison. */
     asm_comp64(as, ir);
     return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    as->curins--;  /* Handle both stores here. */
+    asm_xstore(as, ir, 0);
+    asm_xstore(as, ir-1, 4);
+    return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
@@ -2035,7 +2042,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
 
   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
   case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir, 0); break;
 
   /* Allocations. */
   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

+ 7 - 3
src/lj_asm_x86.h

@@ -1273,11 +1273,12 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
     }
     rset_clear(allow, src);
   }
-  if (ir->o == IR_FSTORE)
+  if (ir->o == IR_FSTORE) {
     asm_fusefref(as, IR(ir->op1), allow);
-  else
+  } else {
     asm_fusexref(as, ir->op1, allow);
-    /* ir->op2 is ignored -- unaligned stores are ok on x86. */
+    if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
+  }
   if (ra_hasreg(src)) {
     x86Op xo;
     switch (irt_type(ir->t)) {
@@ -2249,6 +2250,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     asm_comp_int64(as, ir);
     return;
+  } else if ((ir-1)->o == IR_XSTORE) {
+    asm_fxstore(as, ir);
+    return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {

+ 3 - 19
src/lj_opt_split.c

@@ -78,8 +78,7 @@
 ** 0105    int HIOP   0103  +0
 ** 0106    p32 ADD    base  +16
 ** 0107    int XSTORE 0106  0104
-** 0108    p32 ADD    base  +20
-** 0109    int XSTORE 0108  0105
+** 0108    int HIOP   0106  0105
 **
 **         mov eax, [esi+0x8]
 **         mov ecx, [esi+0xc]
@@ -328,19 +327,9 @@ static void split_ir(jit_State *J)
 #endif
 	break;
 	}
-      case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
+      case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
 	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
 	break;
-      case IR_XSTORE: {
-#if LJ_LE
-	IRRef hiref = hisubst[ir->op2];
-#else
-	IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
-#endif
-	split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
-		   split_ptr(J, oir, ir->op1), hiref);
-	break;
-	}
       case IR_CONV: {  /* Conversion to number. Others handled below. */
 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 	UNUSED(st);
@@ -434,12 +423,7 @@ static void split_ir(jit_State *J)
 #endif
 	break;
       case IR_XSTORE:
-#if LJ_LE
-	hiref = hisubst[ir->op2];
-#else
-	hiref = nir->op2; nir->op2 = hisubst[ir->op2];
-#endif
-	split_emit(J, IRTI(IR_XSTORE), split_ptr(J, oir, ir->op1), hiref);
+	split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
 	break;
       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);