فهرست منبع

String buffers, part 3a: Add IR_TMPREF for passing TValues to helpers.

Sponsored by fmad.io.
Mike Pall 4 سال پیش
والد
کامیت
6df650fe3f
12فایلهای تغییر یافته به همراه249 افزوده شده و 125 حذف شده
  1. 1 0
      src/jit/dump.lua
  2. 15 4
      src/lj_asm.c
  3. 46 23
      src/lj_asm_arm.h
  4. 16 12
      src/lj_asm_arm64.h
  5. 57 25
      src/lj_asm_mips.h
  6. 50 21
      src/lj_asm_ppc.h
  7. 45 34
      src/lj_asm_x86.h
  8. 6 0
      src/lj_ir.h
  9. 1 0
      src/lj_opt_fold.c
  10. 1 1
      src/lj_opt_split.c
  11. 10 5
      src/lj_record.c
  12. 1 0
      src/lj_record.h

+ 1 - 0
src/jit/dump.lua

@@ -287,6 +287,7 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
+  ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
   ["BUFHDR"] = { [0] = "RESET", "APPEND" },
   ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }

+ 15 - 4
src/lj_asm.c

@@ -1144,7 +1144,7 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
 
 /* -- Buffer operations --------------------------------------------------- */
 
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
 
 static void asm_bufhdr(ASMState *as, IRIns *ir)
 {
@@ -1218,7 +1218,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
   if (args[1] == ASMREF_TMP1) {
     Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
     if (kchar == -129)
-      asm_tvptr(as, tmp, irs->op1);
+      asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
     else
       ra_allockreg(as, kchar, tmp);
   }
@@ -1256,7 +1256,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
   asm_setupresult(as, ir, ci);  /* GCstr * */
   asm_gencall(as, ci, args);
   if (ir->op2 == IRTOSTR_NUM)
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
 }
 
 #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
@@ -1303,7 +1303,13 @@ static void asm_newref(ASMState *as, IRIns *ir)
   args[2] = ASMREF_TMP1;  /* cTValue *key */
   asm_setupresult(as, ir, ci);  /* TValue * */
   asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
+}
+
+static void asm_tmpref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+  asm_tvptr(as, r, ir->op1, ir->op2);
 }
 
 static void asm_lref(ASMState *as, IRIns *ir)
@@ -1785,6 +1791,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
   case IR_NEWREF: asm_newref(as, ir); break;
   case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
   case IR_FREF: asm_fref(as, ir); break;
+  case IR_TMPREF: asm_tmpref(as, ir); break;
   case IR_STRREF: asm_strref(as, ir); break;
   case IR_LREF: asm_lref(as, ir); break;
 
@@ -2192,6 +2199,10 @@ static void asm_setup_regsp(ASMState *as)
       ir->prev = (uint16_t)REGSP_HINT((rload & 15));
       rload = lj_ror(rload, 4);
       continue;
+    case IR_TMPREF:
+      if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
+	as->evenspill = 4;  /* TMPREF OUT2 needs two TValues on the stack. */
+      break;
 #endif
     case IR_CALLXS: {
       CCallInfo ci;

+ 46 - 23
src/lj_asm_arm.h

@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
 	*ofsp = (ofs & 255);  /* Mask out less bits to allow LDRD. */
 	return ra_allock(as, (ofs & ~255), allow);
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = 0;
+      return RID_SP;
     }
   }
   *ofsp = 0;
@@ -666,35 +669,55 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    } else {
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		 RID_SP, 0);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		 RID_SP, 4);
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
 #if LJ_SOFTFP
-      lj_assertA(0, "unsplit FP op");
+	lj_assertA(0, "unsplit FP op");
 #else
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+	/* Otherwise force a spill and use the spill slot. */
+	emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
 #endif
+      }
+    } else {
+      /* Otherwise use [sp] and [sp+4] to hold the TValue.
+      ** This assumes the following call has max. 4 args.
+      */
+      Reg type;
+      emit_dm(as, ARMI_MOV, dest, RID_SP);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_lso(as, ARMI_STR, src, RID_SP, 0);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_lso(as, ARMI_STR, type, RID_SP, 4);
     }
   } else {
-    /* Otherwise use [sp] and [sp+4] to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
     emit_dm(as, ARMI_MOV, dest, RID_SP);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_lso(as, ARMI_STR, src, RID_SP, 0);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_lso(as, ARMI_STR, type, RID_SP, 4);
   }
 }
 
@@ -1909,7 +1932,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
 #if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
 #endif
   case IR_CNEWI:
     /* Nothing to do here. Handled by lo op itself. */

+ 16 - 12
src/lj_asm_arm64.h

@@ -198,6 +198,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
 	  return RID_GL;
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
+      return RID_GL;
     }
   }
   *ofsp = 0;
@@ -675,22 +678,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
 }
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+	return;
+      }
+      emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
     } else {
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+      asm_tvstore64(as, dest, 0, ref);
     }
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    asm_tvstore64(as, dest, 0, ref);
-    emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
   }
+  /* g->tmptv holds the TValue(s). */
+  emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)

+ 57 - 25
src/lj_asm_mips.h

@@ -193,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -839,34 +842,63 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
 #endif
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, igcptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+#if LJ_64
+	emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
+#else
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#endif
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+	emit_tsi(as, MIPSI_SDC1, (src & 31),  RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
+#endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
 #if LJ_32
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+      Reg type;
+      emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
 #else
-    asm_tvstore64(as, dest, 0, ref);
-    emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
-	     (int32_t)(offsetof(global_State, tmptv)-32768));
+      asm_tvstore64(as, dest, 0, ref);
+      emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
 #endif
+    }
+  } else {
+    emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -2415,7 +2447,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
 #if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
 #endif
   case IR_CNEWI:
     /* Nothing to do here. Handled by lo op itself. */

+ 50 - 21
src/lj_asm_ppc.h

@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -567,28 +570,54 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+#endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
+      Reg type;
+      emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
     }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+  } else {
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -1958,7 +1987,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
 #if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
 #endif
   case IR_CNEWI:
     /* Nothing to do here. Handled by lo op itself. */

+ 45 - 34
src/lj_asm_x86.h

@@ -216,6 +216,16 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
 #endif
       }
       break;
+    case IR_TMPREF:
+#if LJ_GC64
+      as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
+      as->mrm.base = RID_DISPATCH;
+      as->mrm.idx = RID_NONE;
+#else
+      as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
+      as->mrm.base = as->mrm.idx = RID_NONE;
+#endif
+      return;
     default:
       lj_assertA(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
 		 ir->o == IR_KKPTR,
@@ -1050,47 +1060,48 @@ static void asm_strto(ASMState *as, IRIns *ir)
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ref))
-      emit_loada(as, dest, ir_knum(ir));
-    else
-      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-#if LJ_GC64
-    if (irref_isk(ref)) {
-      TValue k;
-      lj_ir_kvalue(as->J->L, &k, ir);
-      emit_movmroi(as, dest, 4, k.u32.hi);
-      emit_movmroi(as, dest, 0, k.u32.lo);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	emit_loada(as, dest, ir_knum(ir));
+	return;
+      }
+      emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
     } else {
-      /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      if (irt_is64(ir->t)) {
-	emit_u32(as, irt_toitype(ir->t) << 15);
-	emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+#if LJ_GC64
+      if (irref_isk(ref)) {
+	TValue k;
+	lj_ir_kvalue(as->J->L, &k, ir);
+	emit_movmroi(as, dest, 4, k.u32.hi);
+	emit_movmroi(as, dest, 0, k.u32.lo);
       } else {
-	/* Currently, no caller passes integers that might end up here. */
-	emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	/* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	if (irt_is64(ir->t)) {
+	  emit_u32(as, irt_toitype(ir->t) << 15);
+	  emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+	} else {
+	  emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	}
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
       }
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    }
 #else
-    if (!irref_isk(ref)) {
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    } else if (!irt_ispri(ir->t)) {
-      emit_movmroi(as, dest, 0, ir->i);
-    }
-    if (!(LJ_64 && irt_islightud(ir->t)))
-      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+      if (!irref_isk(ref)) {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+      } else if (!irt_ispri(ir->t)) {
+	emit_movmroi(as, dest, 0, ir->i);
+      }
+      if (!(LJ_64 && irt_islightud(ir->t)))
+	emit_movmroi(as, dest, 4, irt_toitype(ir->t));
 #endif
-    emit_loada(as, dest, &J2G(as->J)->tmptv);
+    }
   }
+  emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)

+ 6 - 0
src/lj_ir.h

@@ -95,6 +95,7 @@
   _(UREFO,	LW, ref, lit) \
   _(UREFC,	LW, ref, lit) \
   _(FREF,	R , ref, lit) \
+  _(TMPREF,	S , ref, lit) \
   _(STRREF,	N , ref, ref) \
   _(LREF,	L , ___, ___) \
   \
@@ -218,6 +219,11 @@ IRFLDEF(FLENUM)
   IRFL__MAX
 } IRFieldID;
 
+/* TMPREF mode bits, stored in op2. */
+#define IRTMPREF_IN1		0x01	/* First input value. */
+#define IRTMPREF_OUT1		0x02	/* First output value. */
+#define IRTMPREF_OUT2		0x04	/* Second output value. */
+
 /* SLOAD mode bits, stored in op2. */
 #define IRSLOAD_PARENT		0x01	/* Coalesce with parent trace. */
 #define IRSLOAD_FRAME		0x02	/* Load 32 bits of ftsz. */

+ 1 - 0
src/lj_opt_fold.c

@@ -2421,6 +2421,7 @@ LJFOLD(XSTORE any any)
 LJFOLDX(lj_opt_dse_xstore)
 
 LJFOLD(NEWREF any any)  /* Treated like a store. */
+LJFOLD(TMPREF any any)
 LJFOLD(CALLA any any)
 LJFOLD(CALLL any any)  /* Safeguard fallback. */
 LJFOLD(CALLS any any)

+ 1 - 1
src/lj_opt_split.c

@@ -645,7 +645,7 @@ static void split_ir(jit_State *J)
       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 #endif
       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
-    } else if (ir->o == IR_TOSTR) {
+    } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
       if (hisubst[ir->op1]) {
 	if (irref_isk(ir->op1))
 	  nir->op1 = ir->op1;

+ 10 - 5
src/lj_record.c

@@ -259,6 +259,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
     return 0;  /* Can't represent lightuserdata (pointless). */
 }
 
+/* Emit a VLOAD with the correct type. */
+TRef lj_record_vload(jit_State *J, TRef ref, IRType t)
+{
+  TRef tr = emitir(IRTG(IR_VLOAD, t), ref, 0);
+  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+  return tr;
+}
+
 /* -- Record loop ops ----------------------------------------------------- */
 
 /* Loop event. */
@@ -1832,9 +1840,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 	  IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
 	  TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
 			     vbase, lj_ir_kint(J, (int32_t)i));
-	  TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
-	  J->base[dst+i] = tr;
+	  J->base[dst+i] = lj_record_vload(J, aref, t);
 	}
       } else {
 	emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1881,8 +1887,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 		       lj_ir_kint(J, frofs-(8<<LJ_FR2)));
 	t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
 	aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
-	tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+	tr = lj_record_vload(J, aref, t);
       }
       J->base[dst-2-LJ_FR2] = tr;
       J->maxslot = dst-1-LJ_FR2;

+ 1 - 0
src/lj_record.h

@@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
 			     cTValue *av, cTValue *bv);
 LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
 LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
+LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, IRType t);
 
 LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
 LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);