Browse Source

Update LuaJIT to commit 43d0a19

Sasha Szpakowski 2 years ago
parent
commit
3b08388623

+ 1 - 1
libs/LuaJIT/.relver

@@ -1 +1 @@
-1697887905
+1700008891

+ 1 - 2
libs/LuaJIT/src/jit/dis_arm64.lua

@@ -985,8 +985,7 @@ local function disass_ins(ctx)
 	x = x.."]"
 	x = x.."]"
       end
       end
     elseif p == "P" then
     elseif p == "P" then
-      local opcv, sh = rshift(op, 26), 2
-      if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+      local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
       local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
       local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
       local rn = map_regs.x[band(rshift(op, 5), 31)]
       local rn = map_regs.x[band(rshift(op, 5), 31)]
       local ind = band(rshift(op, 23), 3)
       local ind = band(rshift(op, 23), 3)

+ 18 - 10
libs/LuaJIT/src/lj_asm_arm.h

@@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, ARMI_LDR, dest, v);
     emit_lsptr(as, ARMI_LDR, dest, v);
   } else {
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guardcc(as, CC_NE);
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
       emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
       emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
-      emit_opk(as, ARMI_ADD, dest, uv,
+    }
+    if (ir->o == IR_UREFC)
+      emit_opk(as, ARMI_ADD, dest, dest,
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
-      emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    else
+      emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_lso(as, ARMI_LDRB, RID_TMP, dest,
+	       (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadi(as, dest, k);
     } else {
     } else {
-      emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
     }
     }
-    emit_lso(as, ARMI_LDR, uv, func,
-	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
   }
   }
 }
 }
 
 

+ 14 - 6
libs/LuaJIT/src/lj_asm_arm64.h

@@ -931,22 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, A64I_LDRx, dest, v);
     emit_lsptr(as, A64I_LDRx, dest, v);
   } else {
   } else {
-    if (ir->o == IR_UREFC) {
-      asm_guardcnb(as, A64I_CBZ, RID_TMP);
+    if (guarded)
+      asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
+    if (ir->o == IR_UREFC)
       emit_opk(as, A64I_ADDx, dest, dest,
       emit_opk(as, A64I_ADDx, dest, dest,
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
+    else
+      emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
       emit_lso(as, A64I_LDRB, RID_TMP, dest,
       emit_lso(as, A64I_LDRB, RID_TMP, dest,
 	       (int32_t)offsetof(GCupval, closed));
 	       (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadu64(as, dest, k);
     } else {
     } else {
-      emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
+      emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
     }
     }
-    emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
-	     (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
   }
   }
 }
 }
 
 

+ 17 - 10
libs/LuaJIT/src/lj_asm_mips.h

@@ -1207,22 +1207,29 @@ nolo:
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
     emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
   } else {
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
-      emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
-      emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    if (guarded)
+      asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
+    if (ir->o == IR_UREFC)
+      emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
+    else
+      emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loada(as, dest, o);
     } else {
     } else {
-      emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) +
+	       (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
     }
     }
-    emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
-	     (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
   }
   }
 }
 }
 
 

+ 17 - 10
libs/LuaJIT/src/lj_asm_ppc.h

@@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
     emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
   } else {
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guardcc(as, CC_NE);
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
       emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
       emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
-      emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
-      emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    }
+    if (ir->o == IR_UREFC)
+      emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
+    else
+      emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadi(as, dest, k);
     } else {
     } else {
-      emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
     }
     }
-    emit_tai(as, PPCI_LWZ, uv, func,
-	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
   }
   }
 }
 }
 
 

+ 31 - 18
libs/LuaJIT/src/lj_asm_x86.h

@@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
 /* Check if there's no conflicting instruction between curins and ref.
 /* Check if there's no conflicting instruction between curins and ref.
 ** Also avoid fusing loads if there are multiple references.
 ** Also avoid fusing loads if there are multiple references.
 */
 */
-static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
+static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
 {
 {
   IRIns *ir = as->ir;
   IRIns *ir = as->ir;
   IRRef i = as->curins;
   IRRef i = as->curins;
@@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
   while (--i > ref) {
   while (--i > ref) {
     if (ir[i].o == conflict)
     if (ir[i].o == conflict)
       return 0;  /* Conflict found. */
       return 0;  /* Conflict found. */
-    else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
+    else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
+      return 0;
+    else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
       return 0;
       return 0;
   }
   }
   return 1;  /* Ok, no conflict. */
   return 1;  /* Ok, no conflict. */
@@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
     lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
     lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
     /* We can avoid the FLOAD of t->array for colocated arrays. */
     /* We can avoid the FLOAD of t->array for colocated arrays. */
     if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
     if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
-	!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
+	!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
       as->mrm.ofs = (int32_t)sizeof(GCtab);  /* Ofs to colocated array. */
       as->mrm.ofs = (int32_t)sizeof(GCtab);  /* Ofs to colocated array. */
       return irb->op1;  /* Table obj. */
       return irb->op1;  /* Table obj. */
     }
     }
@@ -456,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
     if (ir->o == IR_SLOAD) {
     if (ir->o == IR_SLOAD) {
       if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
       if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
-	  noconflict(as, ref, IR_RETF, 0) &&
+	  noconflict(as, ref, IR_RETF, 2) &&
 	  !(LJ_GC64 && irt_isaddr(ir->t))) {
 	  !(LJ_GC64 && irt_isaddr(ir->t))) {
 	as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
 	as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
 	as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
 	as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@@ -467,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
     } else if (ir->o == IR_FLOAD) {
     } else if (ir->o == IR_FLOAD) {
       /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
       /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
       if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
       if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
-	  noconflict(as, ref, IR_FSTORE, 0)) {
+	  noconflict(as, ref, IR_FSTORE, 2)) {
 	asm_fusefref(as, ir, xallow);
 	asm_fusefref(as, ir, xallow);
 	return RID_MRM;
 	return RID_MRM;
       }
       }
     } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
     } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
-      if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
+      if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
 	  !(LJ_GC64 && irt_isaddr(ir->t))) {
 	  !(LJ_GC64 && irt_isaddr(ir->t))) {
 	asm_fuseahuref(as, ir->op1, xallow);
 	asm_fuseahuref(as, ir->op1, xallow);
 	return RID_MRM;
 	return RID_MRM;
@@ -482,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
       ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
       ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
       */
       */
       if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
       if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
-	  noconflict(as, ref, IR_XSTORE, 0)) {
+	  noconflict(as, ref, IR_XSTORE, 2)) {
 	asm_fusexref(as, ir->op1, xallow);
 	asm_fusexref(as, ir->op1, xallow);
 	return RID_MRM;
 	return RID_MRM;
       }
       }
@@ -815,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
   emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
   emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  checkmclim(as);
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   /* Can't fuse since left is needed twice. */
   /* Can't fuse since left is needed twice. */
 }
 }
@@ -857,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       emit_rr(as, XO_SUBSD, dest, bias);  /* Subtract 2^52+2^51 bias. */
       emit_rr(as, XO_SUBSD, dest, bias);  /* Subtract 2^52+2^51 bias. */
       emit_rr(as, XO_XORPS, dest, bias);  /* Merge bias and integer. */
       emit_rr(as, XO_XORPS, dest, bias);  /* Merge bias and integer. */
       emit_rma(as, XO_MOVSD, bias, k);
       emit_rma(as, XO_MOVSD, bias, k);
+      checkmclim(as);
       emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
       emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
       return;
       return;
     } else {  /* Integer to FP conversion. */
     } else {  /* Integer to FP conversion. */
@@ -1173,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     asm_guardcc(as, CC_E);
     asm_guardcc(as, CC_E);
   else
   else
     emit_sjcc(as, CC_E, l_end);
     emit_sjcc(as, CC_E, l_end);
+  checkmclim(as);
   if (irt_isnum(kt)) {
   if (irt_isnum(kt)) {
     if (isk) {
     if (isk) {
       /* Assumes -0.0 is already canonicalized to +0.0. */
       /* Assumes -0.0 is already canonicalized to +0.0. */
@@ -1232,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 #endif
 #endif
   }
   }
   emit_sfixup(as, l_loop);
   emit_sfixup(as, l_loop);
-  checkmclim(as);
 #if LJ_GC64
 #if LJ_GC64
   if (!isk && irt_isaddr(kt)) {
   if (!isk && irt_isaddr(kt)) {
     emit_rr(as, XO_OR, tmp|REX_64, key);
     emit_rr(as, XO_OR, tmp|REX_64, key);
@@ -1259,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
       emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
       emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
       emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
       emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
       emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
+      checkmclim(as);
       emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
       emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
       emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
       emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
       emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
       emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@@ -1276,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
       } else {
       } else {
 	emit_rr(as, XO_MOV, tmp, key);
 	emit_rr(as, XO_MOV, tmp, key);
 #if LJ_GC64
 #if LJ_GC64
-	checkmclim(as);
 	emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
 	emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
 	if ((as->flags & JIT_F_BMI2)) {
 	if ((as->flags & JIT_F_BMI2)) {
 	  emit_i8(as, 32);
 	  emit_i8(as, 32);
@@ -1373,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 static void asm_uref(ASMState *as, IRIns *ir)
 {
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_rma(as, XO_MOV, dest|REX_GC64, v);
     emit_rma(as, XO_MOV, dest|REX_GC64, v);
   } else {
   } else {
     Reg uv = ra_scratch(as, RSET_GPR);
     Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
+    if (ir->o == IR_UREFC)
       emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
       emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
-      asm_guardcc(as, CC_NE);
-      emit_i8(as, 1);
+    else
+      emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
+      emit_i8(as, 0);
       emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
       emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
+    }
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loada(as, uv, o);
     } else {
     } else {
-      emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
+      emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
+	        (int32_t)offsetof(GCfuncL, uvptr) +
+	        (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
     }
     }
-    emit_rmro(as, XO_MOV, uv|REX_GC64, func,
-	      (int32_t)offsetof(GCfuncL, uvptr) +
-	      (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
   }
   }
 }
 }
 
 
@@ -1547,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
   if (irt_islightud(ir->t)) {
   if (irt_islightud(ir->t)) {
     Reg dest = asm_load_lightud64(as, ir, 1);
     Reg dest = asm_load_lightud64(as, ir, 1);
     if (ra_hasreg(dest)) {
     if (ra_hasreg(dest)) {
+      checkmclim(as);
       asm_fuseahuref(as, ir->op1, RSET_GPR);
       asm_fuseahuref(as, ir->op1, RSET_GPR);
       if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
       if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
       emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
       emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@@ -1594,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
   if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
   if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
     lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
     lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
 	       "bad load type %d", irt_type(ir->t));
 	       "bad load type %d", irt_type(ir->t));
+    checkmclim(as);
 #if LJ_GC64
 #if LJ_GC64
     emit_u32(as, LJ_TISNUM << 15);
     emit_u32(as, LJ_TISNUM << 15);
 #else
 #else

+ 3 - 1
libs/LuaJIT/src/lj_cparse.c

@@ -1766,9 +1766,11 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
     cp_check(cp, '(');
     cp_check(cp, '(');
     if (cp->tok == CTOK_IDENT) {
     if (cp->tok == CTOK_IDENT) {
       if (cp_str_is(cp->str, "push")) {
       if (cp_str_is(cp->str, "push")) {
-	if (cp->curpack < CPARSE_MAX_PACKSTACK) {
+	if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
 	  cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
 	  cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
 	  cp->curpack++;
 	  cp->curpack++;
+	} else {
+	  cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
 	}
 	}
       } else if (cp_str_is(cp->str, "pop")) {
       } else if (cp_str_is(cp->str, "pop")) {
 	if (cp->curpack > 0) cp->curpack--;
 	if (cp->curpack > 0) cp->curpack--;

+ 7 - 4
libs/LuaJIT/src/lj_def.h

@@ -259,12 +259,8 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
 #else
 #else
 unsigned char _BitScanForward(unsigned long *, unsigned long);
 unsigned char _BitScanForward(unsigned long *, unsigned long);
 unsigned char _BitScanReverse(unsigned long *, unsigned long);
 unsigned char _BitScanReverse(unsigned long *, unsigned long);
-unsigned char _BitScanForward64(unsigned long *, uint64_t);
-unsigned char _BitScanReverse64(unsigned long *, uint64_t);
 #pragma intrinsic(_BitScanForward)
 #pragma intrinsic(_BitScanForward)
 #pragma intrinsic(_BitScanReverse)
 #pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward64)
-#pragma intrinsic(_BitScanReverse64)
 
 
 static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
 static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
 {
 {
@@ -276,6 +272,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
   unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
   unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
 }
 }
 
 
+#if defined(_M_X64) || defined(_M_ARM64)
+unsigned char _BitScanForward64(unsigned long *, uint64_t);
+unsigned char _BitScanReverse64(unsigned long *, uint64_t);
+#pragma intrinsic(_BitScanForward64)
+#pragma intrinsic(_BitScanReverse64)
+
 static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
 static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
 {
 {
   unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
   unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
@@ -286,6 +288,7 @@ static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
   unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
   unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
 }
 }
 #endif
 #endif
+#endif
 
 
 unsigned long _byteswap_ulong(unsigned long);
 unsigned long _byteswap_ulong(unsigned long);
 uint64_t _byteswap_uint64(uint64_t);
 uint64_t _byteswap_uint64(uint64_t);

+ 38 - 9
libs/LuaJIT/src/lj_opt_fold.c

@@ -2134,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload)
 LJFOLD(ALEN any any)
 LJFOLD(ALEN any any)
 LJFOLDX(lj_opt_fwd_alen)
 LJFOLDX(lj_opt_fwd_alen)
 
 
+/* Try to merge UREFO/UREFC into referenced instruction. */
+static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
+{
+  if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
+    /* Might be pointing to some other coroutine's stack.
+    ** And GC might shrink said stack, thereby repointing the upvalue.
+    ** GC might even collect said coroutine, thereby closing the upvalue.
+    */
+    if (gcstep_barrier(J, ref))
+      return EMITFOLD;  /* So cannot merge. */
+    /* Current fins wants a check, but ir doesn't have one. */
+    if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
+	irt_type(ir->t) == IRT_IGC)
+      ir->t.irt += IRT_PGC-IRT_IGC;  /* So install a check. */
+  }
+  return ref;  /* Not a TRef, but the caller doesn't care. */
+}
+
 /* Upvalue refs are really loads, but there are no corresponding stores.
 /* Upvalue refs are really loads, but there are no corresponding stores.
-** So CSE is ok for them, except for UREFO across a GC step (see below).
+** So CSE is ok for them, except for guarded UREFO across a GC step.
 ** If the referenced function is const, its upvalue addresses are const, too.
 ** If the referenced function is const, its upvalue addresses are const, too.
 ** This can be used to improve CSE by looking for the same address,
 ** This can be used to improve CSE by looking for the same address,
 ** even if the upvalues originate from a different function.
 ** even if the upvalues originate from a different function.
@@ -2153,9 +2171,7 @@ LJFOLDF(cse_uref)
       if (irref_isk(ir->op1)) {
       if (irref_isk(ir->op1)) {
 	GCfunc *fn2 = ir_kfunc(IR(ir->op1));
 	GCfunc *fn2 = ir_kfunc(IR(ir->op1));
 	if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
 	if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
-	  if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
-	    break;
-	  return ref;
+	  return merge_uref(J, ref, ir);
 	}
 	}
       }
       }
       ref = ir->prev;
       ref = ir->prev;
@@ -2164,6 +2180,24 @@ LJFOLDF(cse_uref)
   return EMITFOLD;
   return EMITFOLD;
 }
 }
 
 
+/* Custom CSE for UREFO. */
+LJFOLD(UREFO any any)
+LJFOLDF(cse_urefo)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+    IRRef ref = J->chain[IR_UREFO];
+    IRRef lim = fins->op1;
+    IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+    while (ref > lim) {
+      IRIns *ir = IR(ref);
+      if (ir->op12 == op12)
+	return merge_uref(J, ref, ir);
+      ref = ir->prev;
+    }
+  }
+  return EMITFOLD;
+}
+
 LJFOLD(HREFK any any)
 LJFOLD(HREFK any any)
 LJFOLDX(lj_opt_fwd_hrefk)
 LJFOLDX(lj_opt_fwd_hrefk)
 
 
@@ -2384,14 +2418,9 @@ LJFOLDF(fold_base)
 
 
 /* Write barriers are amenable to CSE, but not across any incremental
 /* Write barriers are amenable to CSE, but not across any incremental
 ** GC steps.
 ** GC steps.
-**
-** The same logic applies to open upvalue references, because a stack
-** may be resized during a GC step (not the current stack, but maybe that
-** of a coroutine).
 */
 */
 LJFOLD(TBAR any)
 LJFOLD(TBAR any)
 LJFOLD(OBAR any any)
 LJFOLD(OBAR any any)
-LJFOLD(UREFO any any)
 LJFOLDF(barrier_tab)
 LJFOLDF(barrier_tab)
 {
 {
   TRef tr = lj_opt_cse(J);
   TRef tr = lj_opt_cse(J);

+ 10 - 5
libs/LuaJIT/src/lj_opt_mem.c

@@ -464,18 +464,23 @@ doemit:
 */
 */
 static AliasRet aa_uref(IRIns *refa, IRIns *refb)
 static AliasRet aa_uref(IRIns *refa, IRIns *refb)
 {
 {
-  if (refa->o != refb->o)
-    return ALIAS_NO;  /* Different UREFx type. */
   if (refa->op1 == refb->op1) {  /* Same function. */
   if (refa->op1 == refb->op1) {  /* Same function. */
     if (refa->op2 == refb->op2)
     if (refa->op2 == refb->op2)
       return ALIAS_MUST;  /* Same function, same upvalue idx. */
       return ALIAS_MUST;  /* Same function, same upvalue idx. */
     else
     else
       return ALIAS_NO;  /* Same function, different upvalue idx. */
       return ALIAS_NO;  /* Same function, different upvalue idx. */
   } else {  /* Different functions, check disambiguation hash values. */
   } else {  /* Different functions, check disambiguation hash values. */
-    if (((refa->op2 ^ refb->op2) & 0xff))
+    if (((refa->op2 ^ refb->op2) & 0xff)) {
       return ALIAS_NO;  /* Upvalues with different hash values cannot alias. */
       return ALIAS_NO;  /* Upvalues with different hash values cannot alias. */
-    else
-      return ALIAS_MAY;  /* No conclusion can be drawn for same hash value. */
+    } else if (refa->o != refb->o) {
+      /* Different UREFx type, but need to confirm the UREFO really is open. */
+      if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
+      else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
+      return ALIAS_NO;
+    } else {
+      /* No conclusion can be drawn for same hash value and same UREFx type. */
+      return ALIAS_MAY;
+    }
   }
   }
 }
 }
 
 

+ 12 - 2
libs/LuaJIT/src/lj_record.c

@@ -976,6 +976,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
       emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
       emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
       J->retdepth++;
       J->retdepth++;
       J->needsnap = 1;
       J->needsnap = 1;
+      J->scev.idx = REF_NIL;
       lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
       lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
       /* Shift result slots up and clear the slots of the new frame below. */
       /* Shift result slots up and clear the slots of the new frame below. */
       memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
       memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
@@ -1772,12 +1773,12 @@ noconstify:
   /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
   /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
   uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
   uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
   if (!uvp->closed) {
   if (!uvp->closed) {
-    uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
     /* In current stack? */
     /* In current stack? */
     if (uvval(uvp) >= tvref(J->L->stack) &&
     if (uvval(uvp) >= tvref(J->L->stack) &&
 	uvval(uvp) < tvref(J->L->maxstack)) {
 	uvval(uvp) < tvref(J->L->maxstack)) {
       int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
       int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
       if (slot >= 0) {  /* Aliases an SSA slot? */
       if (slot >= 0) {  /* Aliases an SSA slot? */
+	uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
 	emitir(IRTG(IR_EQ, IRT_PGC),
 	emitir(IRTG(IR_EQ, IRT_PGC),
 	       REF_BASE,
 	       REF_BASE,
 	       emitir(IRT(IR_ADD, IRT_PGC), uref,
 	       emitir(IRT(IR_ADD, IRT_PGC), uref,
@@ -1792,12 +1793,21 @@ noconstify:
 	}
 	}
       }
       }
     }
     }
+    /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
+    ** Always marked as a guard, since it might get promoted to IRT_PGC later.
+    */
+    uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
+    uref = tref_ref(uref);
     emitir(IRTG(IR_UGT, IRT_PGC),
     emitir(IRTG(IR_UGT, IRT_PGC),
 	   emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
 	   emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
 	   lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
 	   lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
   } else {
   } else {
+    /* If fn is constant, then so is the GCupval*, and the upvalue cannot
+    ** transition back to open, so no guard is required in this case.
+    */
+    IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
+    uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
     needbarrier = 1;
     needbarrier = 1;
-    uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
   }
   }
   if (val == 0) {  /* Upvalue load */
   if (val == 0) {  /* Upvalue load */
     IRType t = itype2irt(uvval(uvp));
     IRType t = itype2irt(uvval(uvp));

+ 5 - 2
libs/LuaJIT/src/lj_state.c

@@ -346,8 +346,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
   lj_assertG(L != mainthread(g), "free of main thread");
   lj_assertG(L != mainthread(g), "free of main thread");
   if (obj2gco(L) == gcref(g->cur_L))
   if (obj2gco(L) == gcref(g->cur_L))
     setgcrefnull(g->cur_L);
     setgcrefnull(g->cur_L);
-  lj_func_closeuv(L, tvref(L->stack));
-  lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+  if (gcref(L->openupval) != NULL) {
+    lj_func_closeuv(L, tvref(L->stack));
+    lj_trace_abort(g);  /* For aa_uref soundness. */
+    lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+  }
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
   lj_mem_freet(g, L);
   lj_mem_freet(g, L);
 }
 }

+ 7 - 0
libs/LuaJIT/src/vm_arm.dasc

@@ -1195,8 +1195,11 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc pcall
   |.ffunc pcall
+  |   ldr RB, L->maxstack
+  |   add INS, BASE, NARGS8:RC
   |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
   |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
   |   cmp NARGS8:RC, #8
   |   cmp NARGS8:RC, #8
+  |   cmphs RB, INS
   |   blo ->fff_fallback
   |   blo ->fff_fallback
   |  tst RA, #HOOK_ACTIVE		// Remember active hook before pcall.
   |  tst RA, #HOOK_ACTIVE		// Remember active hook before pcall.
   |   mov RB, BASE
   |   mov RB, BASE
@@ -1207,7 +1210,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_call_dispatch
   |  b ->vm_call_dispatch
   |
   |
   |.ffunc_2 xpcall
   |.ffunc_2 xpcall
+  |   ldr RB, L->maxstack
+  |   add INS, BASE, NARGS8:RC
   |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
   |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |   cmp RB, INS
+  |   blo ->fff_fallback
   |  checkfunc CARG4, ->fff_fallback	// Traceback must be a function.
   |  checkfunc CARG4, ->fff_fallback	// Traceback must be a function.
   |   mov RB, BASE
   |   mov RB, BASE
   |  strd CARG12, [BASE, #8]		// Swap function and traceback.
   |  strd CARG12, [BASE, #8]		// Swap function and traceback.

+ 8 - 0
libs/LuaJIT/src/vm_arm64.dasc

@@ -1211,6 +1211,10 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc pcall
   |.ffunc pcall
+  |  ldr TMP1, L->maxstack
+  |  add TMP2, BASE, NARGS8:RC
+  |  cmp TMP1, TMP2
+  |  blo ->fff_fallback
   |   cmp NARGS8:RC, #8
   |   cmp NARGS8:RC, #8
   |  ldrb TMP0w, GL->hookmask
   |  ldrb TMP0w, GL->hookmask
   |   blo ->fff_fallback
   |   blo ->fff_fallback
@@ -1230,6 +1234,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_call_dispatch
   |  b ->vm_call_dispatch
   |
   |
   |.ffunc xpcall
   |.ffunc xpcall
+  |  ldr TMP1, L->maxstack
+  |  add TMP2, BASE, NARGS8:RC
+  |  cmp TMP1, TMP2
+  |  blo ->fff_fallback
   |     ldp CARG1, CARG2, [BASE]
   |     ldp CARG1, CARG2, [BASE]
   |  ldrb TMP0w, GL->hookmask
   |  ldrb TMP0w, GL->hookmask
   |   subs NARGS8:TMP1, NARGS8:RC, #16
   |   subs NARGS8:TMP1, NARGS8:RC, #16

+ 9 - 1
libs/LuaJIT/src/vm_mips.dasc

@@ -1374,9 +1374,13 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc pcall
   |.ffunc pcall
+  |   lw TMP1, L->maxstack
+  |   addu TMP2, BASE, NARGS8:RC
   |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   |  beqz NARGS8:RC, ->fff_fallback
   |  beqz NARGS8:RC, ->fff_fallback
-  |   move TMP2, BASE
+  |.  sltu AT, TMP1, TMP2
+  |   bnez AT, ->fff_fallback
+  |.   move TMP2, BASE
   |   addiu BASE, BASE, 8
   |   addiu BASE, BASE, 8
   |  // Remember active hook before pcall.
   |  // Remember active hook before pcall.
   |  srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
   |  srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
@@ -1386,8 +1390,12 @@ static void build_subroutines(BuildCtx *ctx)
   |.  addiu NARGS8:RC, NARGS8:RC, -8
   |.  addiu NARGS8:RC, NARGS8:RC, -8
   |
   |
   |.ffunc xpcall
   |.ffunc xpcall
+  |   lw TMP1, L->maxstack
+  |   addu TMP2, BASE, NARGS8:RC
   |    sltiu AT, NARGS8:RC, 16
   |    sltiu AT, NARGS8:RC, 16
   |  lw CARG4, 8+HI(BASE)
   |  lw CARG4, 8+HI(BASE)
+  |   sltu TMP1, TMP1, TMP2
+  |    or AT, AT, TMP1
   |    bnez AT, ->fff_fallback
   |    bnez AT, ->fff_fallback
   |.  lw CARG3, 8+LO(BASE)
   |.  lw CARG3, 8+LO(BASE)
   |   lw CARG1, LO(BASE)
   |   lw CARG1, LO(BASE)

+ 10 - 2
libs/LuaJIT/src/vm_mips64.dasc

@@ -1415,8 +1415,12 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc pcall
   |.ffunc pcall
+  |  ld TMP1, L->maxstack
+  |  daddu TMP2, BASE, NARGS8:RC
+  |  sltu AT, TMP1, TMP2
+  |  bnez AT, ->fff_fallback
+  |.  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   |  daddiu NARGS8:RC, NARGS8:RC, -8
   |  daddiu NARGS8:RC, NARGS8:RC, -8
-  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   |  bltz NARGS8:RC, ->fff_fallback
   |  bltz NARGS8:RC, ->fff_fallback
   |.   move TMP2, BASE
   |.   move TMP2, BASE
   |   daddiu BASE, BASE, 16
   |   daddiu BASE, BASE, 16
@@ -1437,8 +1441,12 @@ static void build_subroutines(BuildCtx *ctx)
   |.  nop
   |.  nop
   |
   |
   |.ffunc xpcall
   |.ffunc xpcall
+  |  ld TMP1, L->maxstack
+  |  daddu TMP2, BASE, NARGS8:RC
+  |  sltu AT, TMP1, TMP2
+  |  bnez AT, ->fff_fallback
+  |.  ld CARG1, 0(BASE)
   |  daddiu NARGS8:TMP0, NARGS8:RC, -16
   |  daddiu NARGS8:TMP0, NARGS8:RC, -16
-  |  ld CARG1, 0(BASE)
   |   ld CARG2, 8(BASE)
   |   ld CARG2, 8(BASE)
   |    bltz NARGS8:TMP0, ->fff_fallback
   |    bltz NARGS8:TMP0, ->fff_fallback
   |.    lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
   |.    lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)

+ 9 - 0
libs/LuaJIT/src/vm_ppc.dasc

@@ -1735,8 +1735,12 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc pcall
   |.ffunc pcall
+  |    lwz TMP1, L->maxstack
+  |    add TMP2, BASE, NARGS8:RC
   |  cmplwi NARGS8:RC, 8
   |  cmplwi NARGS8:RC, 8
   |   lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   |   lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+  |    cmplw cr1, TMP1, TMP2
+  |  cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   |  blt ->fff_fallback
   |  blt ->fff_fallback
   |   mr TMP2, BASE
   |   mr TMP2, BASE
   |   la BASE, 8(BASE)
   |   la BASE, 8(BASE)
@@ -1747,14 +1751,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_call_dispatch
   |  b ->vm_call_dispatch
   |
   |
   |.ffunc xpcall
   |.ffunc xpcall
+  |     lwz TMP1, L->maxstack
+  |     add TMP2, BASE, NARGS8:RC
   |  cmplwi NARGS8:RC, 16
   |  cmplwi NARGS8:RC, 16
   |   lwz CARG3, 8(BASE)
   |   lwz CARG3, 8(BASE)
+  |     cmplw cr1, TMP1, TMP2
   |.if FPU
   |.if FPU
   |    lfd FARG2, 8(BASE)
   |    lfd FARG2, 8(BASE)
+  |  cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   |    lfd FARG1, 0(BASE)
   |    lfd FARG1, 0(BASE)
   |.else
   |.else
   |    lwz CARG1, 0(BASE)
   |    lwz CARG1, 0(BASE)
   |    lwz CARG2, 4(BASE)
   |    lwz CARG2, 4(BASE)
+  |  cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   |    lwz CARG4, 12(BASE)
   |    lwz CARG4, 12(BASE)
   |.endif
   |.endif
   |  blt ->fff_fallback
   |  blt ->fff_fallback

+ 6 - 0
libs/LuaJIT/src/vm_x64.dasc

@@ -1463,6 +1463,9 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc_1 pcall
   |.ffunc_1 pcall
+  |  mov L:RB, SAVE_L
+  |  lea RA, [BASE+NARGS:RD*8]
+  |  cmp RA, L:RB->maxstack; ja ->fff_fallback
   |  lea RA, [BASE+16]
   |  lea RA, [BASE+16]
   |  sub NARGS:RDd, 1
   |  sub NARGS:RDd, 1
   |  mov PCd, 16+FRAME_PCALL
   |  mov PCd, 16+FRAME_PCALL
@@ -1481,6 +1484,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp ->vm_call_dispatch
   |  jmp ->vm_call_dispatch
   |
   |
   |.ffunc_2 xpcall
   |.ffunc_2 xpcall
+  |  mov L:RB, SAVE_L
+  |  lea RA, [BASE+NARGS:RD*8]
+  |  cmp RA, L:RB->maxstack; ja ->fff_fallback
   |  mov LFUNC:RA, [BASE+8]
   |  mov LFUNC:RA, [BASE+8]
   |  checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
   |  checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
   |  mov LFUNC:RB, [BASE]		// Swap function and traceback.
   |  mov LFUNC:RB, [BASE]		// Swap function and traceback.

+ 7 - 1
libs/LuaJIT/src/vm_x86.dasc

@@ -1369,7 +1369,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov LFUNC:RB, [RA-8]
   |  mov LFUNC:RB, [RA-8]
   |  add NARGS:RD, 1
   |  add NARGS:RD, 1
   |  // This is fragile. L->base must not move, KBASE must always be defined.
   |  // This is fragile. L->base must not move, KBASE must always be defined.
-  |.if x64
+  |.if X64
   |  cmp KBASEa, rdx			// Continue with CALLT if flag set.
   |  cmp KBASEa, rdx			// Continue with CALLT if flag set.
   |.else
   |.else
   |  cmp KBASE, BASE			// Continue with CALLT if flag set.
   |  cmp KBASE, BASE			// Continue with CALLT if flag set.
@@ -1793,6 +1793,9 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- Base library: catch errors ----------------------------------------
   |//-- Base library: catch errors ----------------------------------------
   |
   |
   |.ffunc_1 pcall
   |.ffunc_1 pcall
+  |  mov L:RB, SAVE_L
+  |  lea RA, [BASE+NARGS:RD*8]
+  |  cmp RA, L:RB->maxstack; ja ->fff_fallback
   |  lea RA, [BASE+8]
   |  lea RA, [BASE+8]
   |  sub NARGS:RD, 1
   |  sub NARGS:RD, 1
   |  mov PC, 8+FRAME_PCALL
   |  mov PC, 8+FRAME_PCALL
@@ -1804,6 +1807,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp ->vm_call_dispatch
   |  jmp ->vm_call_dispatch
   |
   |
   |.ffunc_2 xpcall
   |.ffunc_2 xpcall
+  |  mov L:RB, SAVE_L
+  |  lea RA, [BASE+NARGS:RD*8]
+  |  cmp RA, L:RB->maxstack; ja ->fff_fallback
   |  cmp dword [BASE+12], LJ_TFUNC;  jne ->fff_fallback
   |  cmp dword [BASE+12], LJ_TFUNC;  jne ->fff_fallback
   |  mov RB, [BASE+4]			// Swap function and traceback.
   |  mov RB, [BASE+4]			// Swap function and traceback.
   |  mov [BASE+12], RB
   |  mov [BASE+12], RB