Răsfoiți Sursa

Update LuaJIT to commit e826d0c

Sasha Szpakowski 2 ani în urmă
părinte
comite
69d3c41bfc

+ 1 - 1
CMakeLists.txt

@@ -167,7 +167,7 @@ endif()
 
 set(MEGA_ZLIB_VER "1.2.12")
 set(MEGA_LUA51_VER "5.1.5")
-set(MEGA_LUAJIT_VER "2.1.0-1c27912")
+set(MEGA_LUAJIT_VER "2.1.0-e826d0c")
 set(MEGA_LIBOGG_VER "1.3.2")
 set(MEGA_LIBVORBIS_VER "1.3.5")
 set(MEGA_LIBTHEORA_VER "1.1.1")

+ 1 - 1
libs/LuaJIT/.relver

@@ -1 +1 @@
-1694316387
+1697887905

+ 1 - 3
libs/LuaJIT/doc/extensions.html

@@ -426,9 +426,7 @@ the toolchain used to compile LuaJIT:
 on the C stack. The contents of the C++ exception object
 pass through unmodified.</li>
 <li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
-The corresponding Lua error message can be retrieved from the Lua stack.<br>
-For MSVC for Windows 64 bit this requires compilation of your C++ code
-with <tt>/EHa</tt>.</li>
+The corresponding Lua error message can be retrieved from the Lua stack.</li>
 <li>Throwing Lua errors across C++ frames is safe. C++ destructors
 will be called.</li>
 </ul>

+ 4 - 1
libs/LuaJIT/doc/install.html

@@ -203,7 +203,7 @@ Or install Microsoft's Visual Studio (MSVC).
 </p>
 <h3>Building with MSVC</h3>
 <p>
-Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
+Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
 directory with the source code and run these commands:
 </p>
 <pre class="code">
@@ -214,6 +214,9 @@ msvcbuild
 Check the <tt>msvcbuild.bat</tt> file for more options.
 Then follow the installation instructions below.
 </p>
+<p>
+For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
+</p>
 <h3>Building with MinGW or Cygwin</h3>
 <p>
 Open a command prompt window and make sure the MinGW or Cygwin programs

+ 2 - 1
libs/LuaJIT/doc/running.html

@@ -120,7 +120,8 @@ file name:
 </p>
 <ul>
 <li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
-<li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
+<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
+<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
 <li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
 (OS- and architecture-specific).</li>
 <li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).

+ 4 - 4
libs/LuaJIT/dynasm/dasm_arm64.lua

@@ -549,7 +549,7 @@ end
 local function parse_load_pair(params, nparams, n, op)
   if params[n+2] then werror("too many operands") end
   local pn, p2 = params[n], params[n+1]
-  local scale = shr(op, 30) == 0 and 2 or 3
+  local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
   local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
   if not p1 then
     if not p2 then
@@ -806,8 +806,8 @@ map_op = {
   ["ldrsw_*"] = "98000000DxB|b8800000DxL",
   -- NOTE: ldur etc. are handled by ldr et al.
 
-  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
   ["ldpsw_*"] = "68400000DAxP",
 
   -- Branches.
@@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos)
 	werror("bad register type")
       end
       parse_reg_type = false
-    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+    elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
       if parse_reg_type ~= p then
 	werror("register size mismatch")
       end

+ 10 - 11
libs/LuaJIT/src/host/buildvm_peobj.c

@@ -9,7 +9,7 @@
 #include "buildvm.h"
 #include "lj_bc.h"
 
-#if LJ_TARGET_WINDOWS
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
 
 /* Context for PE object emitter. */
 static char *strtab;
@@ -354,15 +354,15 @@ void emit_peobj(BuildCtx *ctx)
 #define CBE16(x)	(*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
 #define CALLOC_S(s)	(*p++ = ((s) >> 4))  /* s < 512 */
 #define CSAVE_FPLR(o)	(*p++ = 0x40 | ((o) >> 3))  /* o <= 504 */
-#define CSAVE_REGP(r,o)	CBE16(0xc800 | (((r)-19)<< 6) | ((o) >> 3))
+#define CSAVE_REGP(r,o)	CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
 #define CSAVE_REGS(r1,r2,o1) do { \
   int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
 } while (0)
+#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
 #define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
 #define CSAVE_FREGS(r1,r2,o1) do { \
   int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
 } while (0)
-#define CSAVE_REGX(r,o)	CBE16(0xd400 | (((r) - 19) << 5) | (~(o) >> 3))
 #define CADD_FP(s)	CBE16(0xe200 | ((s) >> 3))  /* s < 8*256 */
 #define CODE_NOP	0xe3
 #define CODE_END	0xe4
@@ -373,11 +373,11 @@ void emit_peobj(BuildCtx *ctx)
 
     /* Unwind codes for .text section with handler. */
     p = uwc;
-    CALLOC_S(208);		/* +1 */
-    CSAVE_FPLR(192);		/* +1 */
     CADD_FP(192);		/* +2 */
-    CSAVE_REGS(19, 28, 184);	/* +5*2 */
-    CSAVE_FREGS(8, 15, 104);	/* +4*2 */
+    CSAVE_REGS(19, 28, 176);	/* +5*2 */
+    CSAVE_FREGS(8, 15, 96);	/* +4*2 */
+    CSAVE_FPLR(192);		/* +1 */
+    CALLOC_S(208);		/* +1 */
     CEND_ALIGN;			/* +1 +1 -> 24 */
 
     u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
@@ -389,11 +389,10 @@ void emit_peobj(BuildCtx *ctx)
 
     /* Unwind codes for vm_ffi_call without handler. */
     p = uwc;
-    CSAVE_FPLR(16);		/* +1 */
     CADD_FP(16);		/* +2 */
-    CSAVE_REGX(19, -24);	/* +2 */
-    CSAVE_REGX(20, -32);	/* +2 */
-    CEND_ALIGN;			/* +1 +0 -> 8 */
+    CSAVE_FPLR(16);		/* +1 */
+    CSAVE_REGPX(19, -32);	/* +2 */
+    CEND_ALIGN;			/* +1 +2 -> 8 */
 
     u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
     owrite(ctx, &u32, 4);

+ 2 - 2
libs/LuaJIT/src/jit/bcsave.lua

@@ -38,7 +38,7 @@ Save LuaJIT bytecode: luajit -b[options] input output
   --        Stop handling options.
   -         Use stdin as input and/or stdout as output.
 
-File types: c h obj o raw (default)
+File types: c cc h obj o raw (default)
 ]]
   os.exit(1)
 end
@@ -81,7 +81,7 @@ end
 ------------------------------------------------------------------------------
 
 local map_type = {
-  raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
+  raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
 }
 
 local map_arch = {

+ 1 - 1
libs/LuaJIT/src/jit/dis_arm64.lua

@@ -948,7 +948,7 @@ local function disass_ins(ctx)
     elseif p == "U" then
       local rn = map_regs.x[band(rshift(op, 5), 31)]
       local sz = band(rshift(op, 30), 3)
-      local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+      local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
       if imm12 ~= 0 then
 	x = "["..rn..", #"..imm12.."]"
       else

+ 4 - 1
libs/LuaJIT/src/lib_base.c

@@ -616,7 +616,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
     setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
   }
-  lj_state_growstack(co, (MSize)(L->top - L->base));
+  if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
+    cTValue *msg = --co->top;
+    lj_err_callermsg(L, strVdata(msg));
+  }
   return FFH_RETRY;
 }
 

+ 1 - 1
libs/LuaJIT/src/lib_ffi.c

@@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
     "\003win"
 #endif
 #if LJ_ABI_PAUTH
-    "\007pauth"
+    "\005pauth"
 #endif
 #if LJ_TARGET_UWP
     "\003uwp"

+ 6 - 1
libs/LuaJIT/src/lj_api.c

@@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
   if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
     return 0;  /* Stack overflow. */
   } else if (size > 0) {
-    lj_state_checkstack(L, (MSize)size);
+    int avail = (int)(mref(L->maxstack, TValue) - L->top);
+    if (size > avail &&
+	lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
+      L->top--;
+      return 0;  /* Out of memory. */
+    }
   }
   return 1;
 }

+ 1 - 1
libs/LuaJIT/src/lj_asm_arm.h

@@ -2255,7 +2255,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
     as->evenspill = nslots;
-  return REGSP_HINT(RID_RET);
+  return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)

+ 33 - 18
libs/LuaJIT/src/lj_asm_arm64.h

@@ -222,7 +222,8 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
     return A64F_M(ir->r);
   } else if (irref_isk(ref)) {
     int64_t k = get_k64val(as, ref);
-    uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : emit_isk12(k);
+    uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
+			   emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
     if (m)
       return m;
   } else if (mayfuse(as, ref)) {
@@ -432,6 +433,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
   gpr = REGARG_FIRSTGPR;
+#if LJ_HASFFI && LJ_ABI_WIN
+  if ((ci->flags & CCI_VARARG)) {
+    fpr = REGARG_LASTFPR+1;
+  }
+#endif
   for (n = 0; n < nargs; n++) { /* Setup args. */
     IRRef ref = args[n];
     IRIns *ir = IR(ref);
@@ -442,6 +448,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 		     "reg %d not free", fpr);  /* Must have been evicted. */
 	  ra_leftov(as, fpr, ref);
 	  fpr++;
+#if LJ_HASFFI && LJ_ABI_WIN
+	} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
+	  Reg rf = ra_alloc1(as, ref, RSET_FPR);
+	  emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
+#endif
 	} else {
 	  Reg r = ra_alloc1(as, ref, RSET_FPR);
 	  int32_t al = spalign;
@@ -776,7 +787,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
   int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
-  Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey;
+  Reg tmp = RID_TMP, type = RID_NONE, key, tkey;
   IRRef refkey = ir->op2;
   IRIns *irkey = IR(refkey);
   int isk = irref_isk(refkey);
@@ -786,26 +797,22 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
   MCLabel l_end, l_loop;
   rset_clear(allow, tab);
 
-  /* Allocate registers outside of the loop. */
-  if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) {
-    key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow);
-    rset_clear(allow, key);
-  }
-  if (!isk) {
-    tkey = ra_scratch(as, allow);
-    rset_clear(allow, tkey);
-  } else if (irt_isnum(kt)) {
-    tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */
-  } else {
+  /* Allocate register for tkey outside of the loop. */
+  if (isk) {
     int64_t kk;
     if (irt_isaddr(kt)) {
       kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+    } else if (irt_isnum(kt)) {
+      kk = (int64_t)ir_knum(irkey)->u64;
+      /* Assumes -0.0 is already canonicalized to +0.0. */
     } else {
       lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
       kk = ~((int64_t)~irt_toitype(kt) << 47);
     }
-    tkey = ra_allock(as, kk, allow);
-    rset_clear(allow, tkey);
+    k = emit_isk12(kk);
+    tkey = k ? 0 : ra_allock(as, kk, allow);
+  } else {
+    tkey = ra_scratch(as, allow);
   }
 
   /* Key not found in chain: jump to exit (if merged) or load niltv. */
@@ -838,10 +845,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
   /* Construct tkey as canonicalized or tagged key. */
   if (!isk) {
     if (irt_isnum(kt)) {
+      key = ra_alloc1(as, refkey, RSET_FPR);
       emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
+      /* A64I_FMOV_R_D from key to tkey done below. */
     } else {
       lj_assertA(irt_isaddr(kt), "bad HREF key type");
-      type = ra_allock(as, irt_toitype(kt) << 15, allow);
+      key = ra_alloc1(as, refkey, allow);
+      type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
       emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
     }
   }
@@ -1943,6 +1953,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
     int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
     int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
     asm_collectargs(as, ir, ci, args);
+#if LJ_ABI_WIN
+    if ((ci->flags & CCI_VARARG)) nfpr = 0;
+#endif
     for (i = 0; i < nargs; i++) {
       int al = spalign;
       if (!args[i]) {
@@ -1954,7 +1967,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 #endif
       } else if (irt_isfp(IR(args[i])->t)) {
 	if (nfpr > 0) { nfpr--; continue; }
-#if LJ_TARGET_OSX
+#if LJ_ABI_WIN
+	if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
+#elif LJ_TARGET_OSX
 	al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
 #endif
       } else {
@@ -1970,7 +1985,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
       as->evenspill = nslots;
   }
 #endif
-  return REGSP_HINT(RID_RET);
+  return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)

+ 2 - 1
libs/LuaJIT/src/lj_asm_x86.h

@@ -140,7 +140,8 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
     }
   } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
     /* Fuse base offset (vararg load). */
-    as->mrm.ofs = IR(irb->op2)->i;
+    IRIns *irk = IR(irb->op2);
+    as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
     return irb->op1;
   }
   return ref;  /* Otherwise use the given array base. */

+ 4 - 0
libs/LuaJIT/src/lj_carith.c

@@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
 	p = (uint8_t *)cdata_getptr(p, ct->size);
 	if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
       } else if (ctype_isfunc(ct->info)) {
+	CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
 	p = (uint8_t *)*(void **)p;
 	ct = ctype_get(cts,
 	  lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+	if (i) {  /* cts->tab may have been reallocated. */
+	  ca->ct[0] = ctype_get(cts, id0);
+	}
       }
       if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
       ca->ct[i] = ct;

+ 17 - 1
libs/LuaJIT/src/lj_ccall.c

@@ -985,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
     fid = ctf->sib;
   }
 
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+  if ((ct->info & CTF_VARARG)) {
+    nsp -= maxgpr * CTSIZE_PTR;  /* May end up with negative nsp. */
+    ngpr = maxgpr;
+    nfpr = CCALL_NARG_FPR;
+  }
+#endif
+
   /* Walk through all passed arguments. */
   for (o = L->base+1, narg = 1; o < top; o++, narg++) {
     CTypeID did;
@@ -1035,9 +1043,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
 	align = CTSIZE_PTR-1;
       nsp = (nsp + align) & ~align;
     }
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+    /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
+    dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
+#else
     dp = ((uint8_t *)cc->stack) + nsp;
+#endif
     nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
-    if (nsp > CCALL_SIZE_STACK) {  /* Too many arguments. */
+    if ((int32_t)nsp > CCALL_SIZE_STACK) {  /* Too many arguments. */
     err_nyi:
       lj_err_caller(L, LJ_ERR_FFI_NYICALL);
     }
@@ -1099,6 +1112,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
 #endif
   }
   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+  if ((int32_t)nsp < 0) nsp = 0;
+#endif
 
 #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
   cc->nfpr = nfpr;  /* Required for vararg functions. */

+ 1 - 5
libs/LuaJIT/src/lj_crecord.c

@@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
     ngpr = 1;
   else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
     ngpr = 2;
-#elif LJ_TARGET_ARM64
-#if LJ_ABI_WIN
-#error "NYI: ARM64 Windows ABI calling conventions"
-#elif LJ_TARGET_OSX
+#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
   int ngpr = CCALL_NARG_GPR;
-#endif
 #endif
 
   /* Skip initial attributes. */

+ 4 - 2
libs/LuaJIT/src/lj_ctype.h

@@ -276,6 +276,8 @@ typedef struct CTState {
 #define CTTYDEFP(_)
 #endif
 
+#define CTF_LONG_IF8		(CTF_LONG * (sizeof(long) == 8))
+
 /* Common types. */
 #define CTTYDEF(_) \
   _(NONE,		0,	CT_ATTRIB, CTATTRIB(CTA_BAD)) \
@@ -289,8 +291,8 @@ typedef struct CTState {
   _(UINT16,		2,	CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \
   _(INT32,		4,	CT_NUM, CTALIGN(2)) \
   _(UINT32,		4,	CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \
-  _(INT64,		8,	CT_NUM, CTF_LONG|CTALIGN(3)) \
-  _(UINT64,		8,	CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \
+  _(INT64,		8,	CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \
+  _(UINT64,		8,	CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \
   _(FLOAT,		4,	CT_NUM, CTF_FP|CTALIGN(2)) \
   _(DOUBLE,		8,	CT_NUM, CTF_FP|CTALIGN(3)) \
   _(COMPLEX_FLOAT,	8,	CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \

+ 1 - 1
libs/LuaJIT/src/lj_def.h

@@ -69,7 +69,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_UPVAL	60		/* Max. # of upvalues. */
 
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA	(5+2*LJ_FR2)	/* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA	(5+3*LJ_FR2)	/* Extra stack space (metamethods). */
 
 #define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
 

+ 1 - 1
libs/LuaJIT/src/lj_dispatch.c

@@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn)
     int numparams = pt->numparams;
     int gotparams = (int)(L->top - L->base);
     int need = pt->framesize;
-    if ((pt->flags & PROTO_VARARG)) need += 1+gotparams;
+    if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams;
     lj_state_checkstack(L, (MSize)need);
     numparams -= gotparams;
     return numparams >= 0 ? numparams : 0;

+ 31 - 9
libs/LuaJIT/src/lj_emit_arm64.h

@@ -124,9 +124,9 @@ static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
   }
 }
 
-static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
+static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
 {
-  int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
+  int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
   lj_assertA(ot, "load/store offset %d out of range", ofs);
   /* Combine LDR/STR pairs to LDP/STP. */
   if ((sc == 2 || sc == 3) &&
@@ -193,6 +193,32 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
   return 0;  /* Failed. */
 }
 
+#define glofs(as, k) \
+  ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
+#define mcpofs(as, k) \
+  ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
+#define checkmcpofs(as, k) \
+  (A64F_S_OK(mcpofs(as, k)>>2, 19))
+
+/* Try to form a const as ADR or ADRP or ADRP + ADD. */
+static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
+{
+  A64Ins ai = A64I_ADR;
+  int64_t ofs = mcpofs(as, k);
+  if (!A64F_S_OK((uint64_t)ofs, 21)) {
+    uint64_t kpage = k & ~0xfffull;
+    MCode *adrp = as->mcp - 1 - (k != kpage);
+    ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
+    if (!A64F_S_OK(ofs, 21))
+      return 0;  /* Failed. */
+    if (k != kpage)
+      emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
+    ai = A64I_ADRP;
+  }
+  emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd);
+  return 1;
+}
+
 static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
 {
   int zeros = 0, ones = 0, neg, lshift = 0;
@@ -213,6 +239,9 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
     if (emit_kdelta(as, rd, u64, is64)) {
       return;
     }
+    if (emit_kadrp(as, rd, u64)) {  /* Either 1 or 2 ins. */
+      return;
+    }
   }
   if (neg) {
     u64 = ~u64;
@@ -240,13 +269,6 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
 /* Load a 64 bit constant into a GPR. */
 #define emit_loadu64(as, rd, i)	emit_loadk(as, rd, i)
 
-#define glofs(as, k) \
-  ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
-#define mcpofs(as, k) \
-  ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
-#define checkmcpofs(as, k) \
-  (A64F_S_OK(mcpofs(as, k)>>2, 19))
-
 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
 
 /* Get/set from constant pointer. */

+ 40 - 11
libs/LuaJIT/src/lj_err.c

@@ -174,12 +174,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
     case FRAME_PCALL:  /* FF pcall() frame. */
     case FRAME_PCALLH:  /* FF pcall() frame inside hook. */
       if (errcode) {
+	global_State *g;
 	if (errcode == LUA_YIELD) {
 	  frame = frame_prevd(frame);
 	  break;
 	}
+	g = G(L);
+	setgcref(g->cur_L, obj2gco(L));
 	if (frame_typep(frame) == FRAME_PCALL)
-	  hook_leave(G(L));
+	  hook_leave(g);
 	L->base = frame_prevd(frame) + 1;
 	L->cframe = cf;
 	unwindstack(L, L->base);
@@ -209,11 +212,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 ** from 3rd party docs or must be found by trial-and-error. They really
 ** don't want you to write your own language-specific exception handler
 ** or to interact gracefully with MSVC. :-(
-**
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
 */
 
 #define WIN32_LEAN_AND_MEAN
@@ -261,6 +259,8 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
 {
 #if LJ_TARGET_X86
   void *cf = (char *)f - CFRAME_OFS_SEH;
+#elif LJ_TARGET_ARM64
+  void *cf = (char *)f - CFRAME_SIZE;
 #else
   void *cf = f;
 #endif
@@ -268,11 +268,25 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
   int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
 		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
   if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    if (rec->ExceptionCode == STATUS_LONGJUMP &&
+	rec->ExceptionRecord &&
+	LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) {
+      errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode);
+      if ((rec->ExceptionFlags & 0x20)) {  /* EH_TARGET_UNWIND */
+	/* Unwinding is about to finish; revert the ExceptionCode so that
+	** RtlRestoreContext does not try to restore from a _JUMP_BUFFER.
+	*/
+	rec->ExceptionCode = 0;
+      }
+    }
     /* Unwind internal frames. */
     err_unwind(L, cf, errcode);
   } else {
     void *cf2 = err_unwind(L, cf, 0);
     if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+#if !LJ_TARGET_X86
+      EXCEPTION_RECORD rec2;
+#endif
       if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
 	  rec->ExceptionCode == LJ_GCC_EXCODE) {
 #if !LJ_TARGET_CYGWIN
@@ -295,14 +309,29 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
 	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
       /* lj_vm_rtlunwind does not return. */
 #else
+      if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* For unwind purposes, wrap the EXCEPTION_RECORD in something that
+	** looks like a longjmp, so that MSVC will execute C++ destructors in
+	** the frames we unwind over. ExceptionInformation[0] should really
+	** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely
+	** at this point.
+	*/
+	rec2.ExceptionCode = STATUS_LONGJUMP;
+	rec2.ExceptionRecord = rec;
+	rec2.ExceptionAddress = 0;
+	rec2.NumberParameters = 1;
+	rec2.ExceptionInformation[0] = (ULONG_PTR)ctx;
+	rec = &rec2;
+      }
       /* Unwind the stack and call all handlers for all lower C frames
       ** (including ourselves) again with EH_UNWINDING set. Then set
-      ** stack pointer = cf, result = errcode and jump to the specified target.
+      ** stack pointer = f, result = errcode and jump to the specified target.
       */
-      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-			       lj_vm_unwind_ff_eh :
-			       lj_vm_unwind_c_eh),
-		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+      RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+			      lj_vm_unwind_ff_eh :
+			      lj_vm_unwind_c_eh),
+		  rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord,
+		  dispatch->HistoryTable);
       /* RtlUnwindEx should never return. */
 #endif
     }

+ 11 - 12
libs/LuaJIT/src/lj_ffrecord.c

@@ -1130,7 +1130,7 @@ static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
 /* Emit BUFHDR for write to extended string buffer. */
 static TRef recff_sbufx_write(jit_State *J, TRef ud)
 {
-  TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
+  TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata)));
   return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
 }
 
@@ -1164,20 +1164,19 @@ static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd
   SBufExt *sbx = bufV(&rd->argv[0]);
   int iscow = (int)sbufiscow(sbx);
   TRef trl = recff_sbufx_get_L(J, ud);
-  TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
-  TRef zero = lj_ir_kint(J, 0);
-  emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
+  TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
+  TRef zeropgc = lj_ir_kintpgc(J, 0);
+  emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc);
   if (iscow) {
-    trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
-		 LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
-			   lj_ir_kint(J, SBUF_FLAG_COW));
-    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
-    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
-    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
+    TRef zerop = lj_ir_kintp(J, 0);
+    trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop);
     recff_sbufx_set_L(J, ud, trl);
     emitir(IRT(IR_FSTORE, IRT_PGC),
-	   emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
-    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
+	   emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop);
   } else {
     TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
     recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);

+ 2 - 2
libs/LuaJIT/src/lj_ir.h

@@ -76,8 +76,8 @@
   \
   _(ABS,	N , ref, ref) \
   _(LDEXP,	N , ref, ref) \
-  _(MIN,	C , ref, ref) \
-  _(MAX,	C , ref, ref) \
+  _(MIN,	N , ref, ref) \
+  _(MAX,	N , ref, ref) \
   _(FPMATH,	N , ref, lit) \
   \
   /* Overflow-checking arithmetic ops. */ \

+ 1 - 1
libs/LuaJIT/src/lj_ircall.h

@@ -63,7 +63,7 @@ typedef struct CCallInfo {
 /* Helpers for conditional function definitions. */
 #define IRCALLCOND_ANY(x)		x
 
-#if LJ_TARGET_X86ORX64
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64
 #define IRCALLCOND_FPMATH(x)		NULL
 #else
 #define IRCALLCOND_FPMATH(x)		x

+ 6 - 0
libs/LuaJIT/src/lj_iropt.h

@@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
 #define lj_ir_kintp(J, k)	lj_ir_kint(J, (int32_t)(k))
 #endif
 
+#if LJ_GC64
+#define lj_ir_kintpgc		lj_ir_kintp
+#else
+#define lj_ir_kintpgc		lj_ir_kint
+#endif
+
 static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
 {
   TValue tv;

+ 1 - 1
libs/LuaJIT/src/lj_opt_dce.c

@@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J)
     IRIns *ir = IR(ins);
     if (irt_ismarked(ir->t)) {
       irt_clearmark(ir->t);
-      pchain[ir->o] = &ir->prev;
     } else if (!ir_sideeff(ir)) {
       *pchain[ir->o] = ir->prev;  /* Reroute original instruction chain. */
       lj_ir_nop(ir);
       continue;
     }
+    pchain[ir->o] = &ir->prev;
     if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
     if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
   }

+ 4 - 4
libs/LuaJIT/src/lj_opt_fold.c

@@ -377,10 +377,10 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
   case IR_BSHL: k1 <<= (k2 & 63); break;
-  case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
-  case IR_BSAR: k1 >>= (k2 & 63); break;
-  case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
-  case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
+  case IR_BSHR: k1 >>= (k2 & 63); break;
+  case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
+  case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
+  case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
   default: lj_assertJ(0, "bad IR op %d", op); break;
   }
 #else

+ 9 - 5
libs/LuaJIT/src/lj_record.c

@@ -1781,7 +1781,7 @@ noconstify:
 	emitir(IRTG(IR_EQ, IRT_PGC),
 	       REF_BASE,
 	       emitir(IRT(IR_ADD, IRT_PGC), uref,
-		      lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
+		      lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8)));
 	slot -= (int32_t)J->baseslot;  /* Note: slot number may be negative! */
 	if (val == 0) {
 	  return getslot(J, slot);
@@ -1794,7 +1794,7 @@ noconstify:
     }
     emitir(IRTG(IR_UGT, IRT_PGC),
 	   emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
-	   lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
+	   lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
   } else {
     needbarrier = 1;
     uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
@@ -1972,7 +1972,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 	  emitir(IRTGI(IR_EQ), fr,
 		 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
 	vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
-	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2)));
+	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
+		       lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2)));
 	for (i = 0; i < nload; i++) {
 	  IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
 	  J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t);
@@ -1991,8 +1992,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
       TRef tr = TREF_NIL;
       ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
       if (idx < 0) goto nyivarg;
-      if (idx != 0 && !tref_isinteger(tridx))
+      if (idx != 0 && !tref_isinteger(tridx)) {
+	if (tref_isstr(tridx))
+	  tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0);
 	tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
+      }
       if (idx != 0 && tref_isk(tridx)) {
 	emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
 	       fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
@@ -2020,7 +2024,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 	IRType t;
 	TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
 	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
-		       lj_ir_kint(J, frofs-(8<<LJ_FR2)));
+		       lj_ir_kintpgc(J, frofs-(8<<LJ_FR2)));
 	t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
 	aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
 	tr = lj_record_vload(J, aref, 0, t);

+ 23 - 4
libs/LuaJIT/src/lj_state.c

@@ -103,8 +103,17 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
 void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
 {
   MSize n;
-  if (L->stacksize > LJ_STACK_MAXEX)  /* Overflow while handling overflow? */
-    lj_err_throw(L, LUA_ERRERR);
+  if (L->stacksize >= LJ_STACK_MAXEX) {
+    /* 4. Throw 'error in error handling' when we are _over_ the limit. */
+    if (L->stacksize > LJ_STACK_MAXEX)
+      lj_err_throw(L, LUA_ERRERR);  /* Does not invoke an error handler. */
+    /* 1. We are _at_ the limit after the last growth. */
+    if (L->status < LUA_ERRRUN) {  /* 2. Throw 'stack overflow'. */
+      L->status = LUA_ERRRUN;  /* Prevent ending here again for pushed msg. */
+      lj_err_msg(L, LJ_ERR_STKOV);  /* May invoke an error handler. */
+    }
+    /* 3. Add space (over the limit) for pushed message and error handler. */
+  }
   n = L->stacksize + need;
   if (n > LJ_STACK_MAX) {
     n += 2*LUA_MINSTACK;
@@ -114,8 +123,6 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
       n = LJ_STACK_MAX;
   }
   resizestack(L, n);
-  if (L->stacksize >= LJ_STACK_MAXEX)
-    lj_err_msg(L, LJ_ERR_STKOV);
 }
 
 void LJ_FASTCALL lj_state_growstack1(lua_State *L)
@@ -123,6 +130,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L)
   lj_state_growstack(L, 1);
 }
 
+static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud)
+{
+  UNUSED(dummy);
+  lj_state_growstack(co, *(MSize *)ud);
+  return NULL;
+}
+
+int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need)
+{
+  return lj_vm_cpcall(L, NULL, &need, cpgrowstack);
+}
+
 /* Allocate basic stack for new state. */
 static void stack_init(lua_State *L1, lua_State *L)
 {

+ 1 - 0
libs/LuaJIT/src/lj_state.h

@@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L);
 LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
 LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
 LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
+LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need);
 
 static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
 {

+ 2 - 0
libs/LuaJIT/src/lj_target_arm64.h

@@ -234,6 +234,8 @@ typedef enum A64Ins {
   A64I_MOVZx = 0xd2800000,
   A64I_MOVNw = 0x12800000,
   A64I_MOVNx = 0x92800000,
+  A64I_ADR = 0x10000000,
+  A64I_ADRP = 0x90000000,
 
   A64I_LDRB = 0x39400000,
   A64I_LDRH = 0x79400000,

+ 16 - 10
libs/LuaJIT/src/lj_trace.c

@@ -613,21 +613,27 @@ static int trace_abort(jit_State *J)
     J->cur.link = 0;
     J->cur.linktype = LJ_TRLINK_NONE;
     lj_vmevent_send(L, TRACE,
-      TValue *frame;
+      cTValue *bot = tvref(L->stack)+LJ_FR2;
+      cTValue *frame;
       const BCIns *pc;
-      GCfunc *fn;
+      BCPos pos = 0;
       setstrV(L, L->top++, lj_str_newlit(L, "abort"));
       setintV(L->top++, traceno);
       /* Find original Lua function call to generate a better error message. */
-      frame = J->L->base-1;
-      pc = J->pc;
-      while (!isluafunc(frame_func(frame))) {
-	pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
-	frame = frame_prev(frame);
+      for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
+	if (isluafunc(frame_func(frame))) {
+	  pos = proto_bcpos(funcproto(frame_func(frame)), pc);
+	  break;
+	} else if (frame_prev(frame) <= bot) {
+	  break;
+	} else if (frame_iscont(frame)) {
+	  pc = frame_contpc(frame) - 1;
+	} else {
+	  pc = frame_pc(frame) - 1;
+	}
       }
-      fn = frame_func(frame);
-      setfuncV(L, L->top++, fn);
-      setintV(L->top++, proto_bcpos(funcproto(fn), pc));
+      setfuncV(L, L->top++, frame_func(frame));
+      setintV(L->top++, pos);
       copyTV(L, L->top++, restorestack(L, errobj));
       copyTV(L, L->top++, &J->errinfo);
     );

+ 14 - 1
libs/LuaJIT/src/msvcbuild.bat

@@ -27,12 +27,15 @@
 @set BUILDTYPE=release
 @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 
+@setlocal
+@call :SETHOSTVARS
 %LJCOMPILE% host\minilua.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:minilua.exe minilua.obj
 @if errorlevel 1 goto :BAD
 if exist minilua.exe.manifest^
   %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+@endlocal
 
 @set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
 @set LJARCH=x64
@@ -46,6 +49,7 @@ if exist minilua.exe.manifest^
 :NO32
 @if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
 @set DASC=vm_arm64.dasc
+@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
 @set LJARCH=arm64
 @goto :DA
 :X64
@@ -60,12 +64,15 @@ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
 if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
 minilua host\genversion.lua
 
-%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
+@setlocal
+@call :SETHOSTVARS
+%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
 @if errorlevel 1 goto :BAD
 if exist buildvm.exe.manifest^
   %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+@endlocal
 
 buildvm -m peobj -o lj_vm.obj
 @if errorlevel 1 goto :BAD
@@ -124,6 +131,12 @@ if exist luajit.exe.manifest^
 @echo.
 @echo === Successfully built LuaJIT for Windows/%LJARCH% ===
 
+@goto :END
+:SETHOSTVARS
+@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" (
+  call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo
+  echo on
+)
 @goto :END
 :BAD
 @echo.

+ 115 - 92
libs/LuaJIT/src/vm_arm64.dasc

@@ -113,13 +113,37 @@
 |
 |.define TMPDofs,	#24
 |
+|.if WIN
+|// Windows unwind data is suited to r1 stored first.
+|.macro stp_unwind, r1, r2, where
+|  stp r1, r2, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where
+|  ldp r1, r2, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where, post_index
+|  ldp r1, r2, where, post_index
+|.endmacro
+|.else
+|// Otherwise store r2 first for compact unwind info (OSX).
+|.macro stp_unwind, r1, r2, where
+|  stp r2, r1, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where
+|  ldp r2, r1, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where, post_index
+|  ldp r2, r1, where, post_index
+|.endmacro
+|.endif
+|
 |.macro save_, gpr1, gpr2, fpr1, fpr2
-|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
-|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+|  stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |.macro rest_, gpr1, gpr2, fpr1, fpr2
-|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
-|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+|  ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |
 |.macro saveregs
@@ -127,14 +151,14 @@
 |  sub sp, sp, # CFRAME_SPACE
 |  stp fp, lr, [sp, # SAVE_FP_LR_]
 |  add fp, sp, # SAVE_FP_LR_
-|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+|  stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
 |  save_ 25, 26, 12, 13
 |  save_ 27, 28, 14, 15
 |.endmacro
 |.macro restoreregs
-|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+|  ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
 |  rest_ 21, 22, 8, 9
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
@@ -267,8 +291,17 @@
 |  blo target
 |.endmacro
 |
+|.macro init_constants
+|  movn TISNIL, #0
+|  movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+|  movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+|.endmacro
+|
 |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
 |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|.macro mov_nil, reg; mov reg, TISNIL; .endmacro
+|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro
+|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro
 |
 #define GL_J(field)	(GG_G2J + (int)offsetof(jit_State, field))
 |
@@ -406,26 +439,26 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
   |  // (void *cframe, int errcode)
+  |  add fp, CARG1, # SAVE_FP_LR_
   |  mov sp, CARG1
   |  mov CRET1, CARG2
-  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate TMP0w, C
   |  ldr GL, L->glref
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |   mv_vmstate TMP0w, C
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
   |
   |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
   |  // (void *cframe)
-  |  and sp, CARG1, #CFRAME_RAWMASK
-  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  add fp, CARG1, # SAVE_FP_LR_
+  |  mov sp, CARG1
   |  ldr L, SAVE_L
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |    movn TISNIL, #0
+  |    init_constants
+  |   ldr GL, L->glref			// Setup pointer to global state.
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
   |    mov RC, #16			// 2 results: false + error message.
   |  ldr BASE, L->base
-  |   ldr GL, L->glref			// Setup pointer to global state.
   |    mov_false TMP0
   |  sub RA, BASE, #8			// Results start at BASE-8.
   |  ldr PC, [BASE, FRAME_PC]		// Fetch PC of previous frame.
@@ -486,11 +519,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  str L, GL->cur_L
   |  mov RA, BASE
   |   ldp BASE, CARG1, L->base
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |    init_constants
   |  ldr PC, [BASE, FRAME_PC]
   |     strb wzr, L->status
-  |    movn TISNIL, #0
   |   sub RC, CARG1, BASE
   |  ands CARG1, PC, #FRAME_TYPE
   |   add RC, RC, #8
@@ -526,10 +557,8 @@ static void build_subroutines(BuildCtx *ctx)
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
   |  str L, GL->cur_L
   |  ldp RB, CARG1, L->base		// RB = old base (for vmeta_call).
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
   |  add PC, PC, BASE
-  |    movn TISNIL, #0
+  |    init_constants
   |  sub PC, PC, RB			// PC = frame delta + frame type
   |   sub NARGS8:RC, CARG1, BASE
   |    st_vmstate ST_INTERP
@@ -638,7 +667,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  b >1
   |
   |->vmeta_tgetb:			// RB = table, RC = index
-  |  add RC, RC, TISNUM
+  |  add_TISNUM RC, RC
   |   add CARG2, BASE, RB, lsl #3
   |   add CARG3, sp, TMPDofs
   |  str RC, TMPD
@@ -673,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  sxtw CARG2, TMP1w
   |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
   |  // Returns cTValue * or NULL.
-  |  mov TMP0, TISNIL
+  |  mov_nil TMP0
   |  cbz CRET1, ->BC_TGETR_Z
   |  ldr TMP0, [CRET1]
   |  b ->BC_TGETR_Z
@@ -696,7 +725,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  b >1
   |
   |->vmeta_tsetb:			// RB = table, RC = index
-  |  add RC, RC, TISNUM
+  |  add_TISNUM RC, RC
   |   add CARG2, BASE, RB, lsl #3
   |   add CARG3, sp, TMPDofs
   |  str RC, TMPD
@@ -1010,7 +1039,7 @@ static void build_subroutines(BuildCtx *ctx)
   |1:  // Field metatable must be at same offset for GCtab and GCudata!
   |  ldr TAB:RB, TAB:CARG1->metatable
   |2:
-  |   mov CARG1, TISNIL
+  |   mov_nil CARG1
   |   ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
   |  cbz TAB:RB, ->fff_restv
   |  ldr TMP1w, TAB:RB->hmask
@@ -1032,7 +1061,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
   |  b ->fff_restv
   |5:
-  |  cmp TMP0, TISNIL
+  |  cmp_nil TMP0
   |  bne ->fff_restv
   |  b <4
   |
@@ -1132,8 +1161,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  cbnz TAB:CARG2, ->fff_fallback
 #endif
   |  mov RC, #(3+1)*8
-  |  stp CARG1, TISNIL, [BASE, #-8]
-  |   str CFUNC:CARG4, [BASE, #-16]
+  |  stp CFUNC:CARG4, CARG1, [BASE, #-16]
+  |   str TISNIL, [BASE]
   |  b ->fff_res
   |
   |.ffunc_2 ipairs_aux
@@ -1145,14 +1174,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  add CARG2w, CARG2w, #1
   |  cmp CARG2w, TMP1w
   |    ldr PC, [BASE, FRAME_PC]
-  |     add TMP2, CARG2, TISNUM
+  |     add_TISNUM TMP2, CARG2
   |   mov RC, #(0+1)*8
   |     str TMP2, [BASE, #-16]
   |  bhs >2				// Not in array part?
   |  ldr TMP0, [CARG3, CARG2, lsl #3]
   |1:
   |   mov TMP1, #(2+1)*8
-  |   cmp TMP0, TISNIL
+  |   cmp_nil TMP0
   |  str TMP0, [BASE, #-8]
   |   csel RC, RC, TMP1, eq
   |  b ->fff_res
@@ -1175,8 +1204,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  cbnz TAB:CARG2, ->fff_fallback
 #endif
   |  mov RC, #(3+1)*8
-  |  stp CARG1, TISNUM, [BASE, #-8]
-  |   str CFUNC:CARG4, [BASE, #-16]
+  |  stp CFUNC:CARG4, CARG1, [BASE, #-16]
+  |   str TISNUM, [BASE]
   |  b ->fff_res
   |
   |//-- Base library: catch errors ----------------------------------------
@@ -1366,7 +1395,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  eor CARG2w, CARG1w, CARG1w, asr #31
   |   movz CARG3, #0x41e0, lsl #48	// 2^31.
   |  subs CARG1w, CARG2w, CARG1w, asr #31
-  |   add CARG1, CARG1, TISNUM
+  |   add_TISNUM CARG1, CARG1
   |  csel CARG1, CARG1, CARG3, pl
   |  // Fallthrough.
   |
@@ -1457,7 +1486,7 @@ static void build_subroutines(BuildCtx *ctx)
   |    ldr PC, [BASE, FRAME_PC]
   |  str d0, [BASE, #-16]
   |    mov RC, #(2+1)*8
-  |   add CARG2, CARG2, TISNUM
+  |   add_TISNUM CARG2, CARG2
   |   str CARG2, [BASE, #-8]
   |  b ->fff_res
   |
@@ -1523,7 +1552,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  bne ->fff_fallback
   |  ldrb TMP0w, STR:CARG1[1]		// Access is always ok (NUL at end).
   |   ldr CARG3w, STR:CARG1->len
-  |  add TMP0, TMP0, TISNUM
+  |  add_TISNUM TMP0, TMP0
   |  str TMP0, [BASE, #-16]
   |  mov RC, #(0+1)*8
   |   cbz CARG3, ->fff_res
@@ -1669,17 +1698,17 @@ static void build_subroutines(BuildCtx *ctx)
   |.ffunc_bit tobit
   |  mov TMP0w, CARG1w
   |9:  // Label reused by .ffunc_bit_op users.
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.ffunc_bit bswap
   |  rev TMP0w, CARG1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.ffunc_bit bnot
   |  mvn TMP0w, CARG1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.macro .ffunc_bit_sh, name, ins, shmod
@@ -1700,7 +1729,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  checkint CARG1, ->vm_tobit_fb
   |2:
   |  ins TMP0w, CARG1w, TMP1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |.endmacro
   |
@@ -1889,8 +1918,7 @@ static void build_subroutines(BuildCtx *ctx)
   |    and CARG3, CARG3, #LJ_GCVMASK
   |   beq >2
   |1:  // Move results down.
-  |  ldr CARG1, [RA]
-  |    add RA, RA, #8
+  |  ldr CARG1, [RA], #8
   |   subs RB, RB, #8
   |  str CARG1, [BASE, RC, lsl #3]
   |    add RC, RC, #1
@@ -2005,13 +2033,11 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  ldr L, SAVE_L
   |1:
+  |   init_constants
   |  cmn CARG1w, #LUA_ERRERR
   |  bhs >9				// Check for error from exit.
-  |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |    movn TISNIL, #0
+  |   lsl RC, CARG1, #3
   |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
   |   str RCw, SAVE_MULTRES
   |   str BASE, L->base
@@ -2162,7 +2188,7 @@ static void build_subroutines(BuildCtx *ctx)
   |//-----------------------------------------------------------------------
   |
   |// Handler for callback functions.
-  |// Saveregs already performed. Callback slot number in [sp], g in r12.
+  |// Saveregs already performed. Callback slot number in w9, g in x10.
   |->vm_ffi_callback:
   |.if FFI
   |.type CTSTATE, CTState, PC
@@ -2186,9 +2212,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // Returns lua_State *.
   |  ldp BASE, RC, L:CRET1->base
-  |   movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |   movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |   movn TISNIL, #0
+  |   init_constants
   |   mov L, CRET1
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
   |  sub RC, RC, BASE
@@ -2215,7 +2239,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.if FFI
   |  .type CCSTATE, CCallState, x19
   |  sp_auth
-  |  stp x20, CCSTATE, [sp, #-32]!
+  |  stp_unwind CCSTATE, x20, [sp, #-32]!
   |  stp fp, lr, [sp, #16]
   |  add fp, sp, #16
   |  mov CCSTATE, x0
@@ -2247,7 +2271,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
   |  ldp fp, lr, [sp, #16]
-  |  ldp x20, CCSTATE, [sp], #32
+  |  ldp_unwind CCSTATE, x20, [sp], #32
   |  ret_auth
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
@@ -2567,7 +2591,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  bne >5
     |  negs TMP0w, TMP0w
     |   movz CARG3, #0x41e0, lsl #48	// 2^31.
-    |   add TMP0, TMP0, TISNUM
+    |   add_TISNUM TMP0, TMP0
     |  csel TMP0, TMP0, CARG3, vc
     |5:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -2582,7 +2606,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  bne >2
     |  ldr CARG1w, STR:CARG1->len
     |1:
-    |  add CARG1, CARG1, TISNUM
+    |  add_TISNUM CARG1, CARG1
     |  str CARG1, [BASE, RA, lsl #3]
     |  ins_next
     |
@@ -2690,7 +2714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  intins CARG1w, CARG1w, CARG2w
     |  ins_arithfallback bvs
     |.endif
-    |  add CARG1, CARG1, TISNUM
+    |  add_TISNUM CARG1, CARG1
     |  str CARG1, [BASE, RA, lsl #3]
     |4:
     |  ins_next
@@ -2783,7 +2807,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   case BC_KSHORT:
     |  // RA = dst, RC = int16_literal
     |  sxth RCw, RCw
-    |  add TMP0, RC, TISNUM
+    |  add_TISNUM TMP0, RC
     |  str TMP0, [BASE, RA, lsl #3]
     |  ins_next
     break;
@@ -3006,7 +3030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   cmp TMP1w, CARG1w		// In array part?
     |   bhs ->vmeta_tgetv
     |  ldr TMP0, [CARG3]
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |1:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -3049,7 +3073,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   ldr NODE:CARG3, NODE:CARG3->next
     |  cmp CARG1, CARG4
     |  bne >4
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |3:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -3058,7 +3082,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |4:  // Follow hash chain.
     |  cbnz NODE:CARG3, <1
     |  // End of hash chain: key not found, nil result.
-    |   mov TMP0, TISNIL
+    |   mov_nil TMP0
     |
     |5:  // Check for __index if table value is nil.
     |  ldr TAB:CARG1, TAB:CARG2->metatable
@@ -3079,7 +3103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   cmp RCw, CARG1w			// In array part?
     |   bhs ->vmeta_tgetb
     |  ldr TMP0, [CARG3]
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |1:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -3126,7 +3150,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ldr TMP1, [CARG3]
     |   ldr TMP0, [BASE, RA, lsl #3]
     |    ldrb TMP2w, TAB:CARG2->marked
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >5
     |1:
     |   str TMP0, [CARG3]
@@ -3178,7 +3202,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cmp CARG1, CARG4
     |  bne >5
     |   ldr TMP0, [BASE, RA, lsl #3]
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >4
     |2:
     |   str TMP0, NODE:CARG3->val
@@ -3237,7 +3261,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ldr TMP1, [CARG3]
     |   ldr TMP0, [BASE, RA, lsl #3]
     |    ldrb TMP2w, TAB:CARG2->marked
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >5
     |1:
     |   str TMP0, [CARG3]
@@ -3336,9 +3360,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |->BC_CALL_Z:
     |  mov RB, BASE			// Save old BASE for vmeta_call.
     |  add BASE, BASE, RA, lsl #3
-    |  ldr CARG3, [BASE]
+    |  ldr CARG3, [BASE], #16
     |   sub NARGS8:RC, NARGS8:RC, #8
-    |   add BASE, BASE, #16
     |  checkfunc CARG3, ->vmeta_call
     |  ins_call
     break;
@@ -3354,9 +3377,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // RA = base, (RB = 0,) RC = (nargs+1)*8
     |->BC_CALLT1_Z:
     |  add RA, BASE, RA, lsl #3
-    |  ldr TMP1, [RA]
+    |  ldr TMP1, [RA], #16
     |   sub NARGS8:RC, NARGS8:RC, #8
-    |   add RA, RA, #16
     |  checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
     |  ldr PC, [BASE, FRAME_PC]
     |->BC_CALLT2_Z:
@@ -3436,10 +3458,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   add CARG3, CARG2, CARG1, lsl #3
     |  bhs >5				// Index points after array part?
     |   ldr TMP0, [CARG3]
-    |   cmp TMP0, TISNIL
+    |   cmp_nil TMP0
     |   cinc CARG1, CARG1, eq		// Skip holes in array part.
     |   beq <1
-    |   add CARG1, CARG1, TISNUM
+    |   add_TISNUM CARG1, CARG1
     |   stp CARG1, TMP0, [RA]
     |    add CARG1, CARG1, #1
     |3:
@@ -3457,7 +3479,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   add NODE:CARG3, NODE:RB, CARG1, lsl #3  // node = tab->node + idx*3*8
     |  bhi <4
     |  ldp TMP0, CARG1, NODE:CARG3->val
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |   add RC, RC, #1
     |  beq <6				// Skip holes in hash part.
     |  stp CARG1, TMP0, [RA]
@@ -3475,8 +3497,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  checkfunc CFUNC:CARG1, >5
     |   asr TMP0, TAB:CARG3, #47
     |  ldrb TMP1w, CFUNC:CARG1->ffid
-    |   cmn TMP0, #-LJ_TTAB
-    |   ccmp CARG4, TISNIL, #0, eq
+    |   cmp_nil CARG4
+    |   ccmn TMP0, #-LJ_TTAB, #0, eq
     |  ccmp TMP1w, #FF_next_N, #0, eq
     |  bne >5
     |  mov TMP0w, #0xfffe7fff		// LJ_KEYINDEX
@@ -3516,51 +3538,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   and RC, RC, #255
     |  // RA = base, RB = (nresults+1), RC = numparams
     |  ldr TMP1, [BASE, FRAME_PC]
-    |  add RC, BASE, RC, lsl #3
-    |   add RA, BASE, RA, lsl #3
-    |  add RC, RC, #FRAME_VARG
-    |   add TMP2, RA, RB, lsl #3
-    |  sub RC, RC, TMP1			// RC = vbase
-    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
+    |  add TMP0, BASE, RC, lsl #3
+    |   add RC, BASE, RA, lsl #3	// RC = destination
+    |  add TMP0, TMP0, #FRAME_VARG
+    |   add TMP2, RC, RB, lsl #3
+    |  sub RA, TMP0, TMP1		// RA = vbase
+    |  // Note: RA may now be even _above_ BASE if nargs was < numparams.
     |   sub TMP3, BASE, #16		// TMP3 = vtop
     |  cbz RB, >5
     |   sub TMP2, TMP2, #16
     |1:  // Copy vararg slots to destination slots.
-    |  cmp RC, TMP3
-    |  ldr TMP0, [RC], #8
-    |  csel TMP0, TMP0, TISNIL, lo
-    |   cmp RA, TMP2
-    |  str TMP0, [RA], #8
+    |  cmp RA, TMP3
+    |  ldr TMP0, [RA], #8
+    |  csinv TMP0, TMP0, xzr, lo	// TISNIL = ~xzr
+    |   cmp RC, TMP2
+    |  str TMP0, [RC], #8
     |   blo <1
     |2:
     |  ins_next
     |
     |5:  // Copy all varargs.
     |  ldr TMP0, L->maxstack
-    |   subs TMP2, TMP3, RC
+    |   subs TMP2, TMP3, RA
     |   csel RB, xzr, TMP2, le		// MULTRES = (max(vtop-vbase,0)+1)*8
     |   add RB, RB, #8
-    |  add TMP1, RA, TMP2
+    |  add TMP1, RC, TMP2
     |   str RBw, SAVE_MULTRES
     |   ble <2				// Nothing to copy.
     |  cmp TMP1, TMP0
     |  bhi >7
     |6:
-    |  ldr TMP0, [RC], #8
-    |  str TMP0, [RA], #8
-    |  cmp RC, TMP3
+    |  ldr TMP0, [RA], #8
+    |  str TMP0, [RC], #8
+    |  cmp RA, TMP3
     |  blo <6
     |  b <2
     |
     |7:  // Grow stack for varargs.
     |  lsr CARG2, TMP2, #3
-    |   stp BASE, RA, L->base
+    |   stp BASE, RC, L->base
     |  mov CARG1, L
-    |  sub RC, RC, BASE			// Need delta, because BASE may change.
+    |  sub RA, RA, BASE			// Need delta, because BASE may change.
     |   str PC, SAVE_PC
     |  bl extern lj_state_growstack	// (lua_State *L, int n)
-    |  ldp BASE, RA, L->base
-    |  add RC, BASE, RC
+    |  ldp BASE, RC, L->base
+    |  add RA, BASE, RA
     |  sub TMP3, BASE, #16
     |  b <6
     break;
@@ -3704,7 +3726,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     } else {
       |  adds CARG1w, CARG1w, CARG3w
       |  bvs >2
-      |   add TMP0, CARG1, TISNUM
+      |   add_TISNUM TMP0, CARG1
       |  tbnz CARG3w, #31, >4
       |  cmp CARG1w, CARG2w
     }
@@ -3783,7 +3805,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // RA = base, RC = target
     |  ldr CARG1, [BASE, RA, lsl #3]
     |   add TMP1, BASE, RA, lsl #3
-    |  cmp CARG1, TISNIL
+    |  cmp_nil CARG1
     |  beq >1				// Stop if iterator returned nil.
     if (op == BC_JITERL) {
       |  str CARG1, [TMP1, #-8]
@@ -3892,6 +3914,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   add TMP2, BASE, RC
     |   add LFUNC:CARG3, CARG3, TMP0, lsl #47
     |  add RA, RA, RC
+    |  sub CARG1, CARG1, #8
     |   add TMP0, RC, #16+FRAME_VARG
     |   str LFUNC:CARG3, [TMP2], #8	// Store (tagged) copy of LFUNC.
     |    ldr KBASE, [PC, #-4+PC2PROTO(k)]

+ 1 - 0
libs/LuaJIT/src/vm_mips64.dasc

@@ -5396,6 +5396,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   settp LFUNC:RB, TMP0
     |  daddu TMP0, RA, RC
     |   sd LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
+    |  daddiu TMP2, TMP2, -8
     |   daddiu TMP3, RC, 16+FRAME_VARG
     |  sltu AT, TMP0, TMP2
     |    ld KBASE, -4+PC2PROTO(k)(PC)