2
0
Эх сурвалжийг харах

update LuaJIT to latest commit (72efc42)

Sasha Szpakowski 2 жил өмнө
parent
commit
e650bfa0bd
39 өөрчлөгдсөн 366 нэмэгдсэн , 164 устгасан
  1. 3 1
      libs/LuaJIT/Makefile
  2. 3 1
      libs/LuaJIT/doc/ext_ffi_api.html
  3. 6 11
      libs/LuaJIT/dynasm/dasm_arm.h
  4. 6 11
      libs/LuaJIT/dynasm/dasm_arm64.h
  5. 7 0
      libs/LuaJIT/dynasm/dasm_arm64.lua
  6. 6 11
      libs/LuaJIT/dynasm/dasm_mips.h
  7. 6 11
      libs/LuaJIT/dynasm/dasm_ppc.h
  8. 7 12
      libs/LuaJIT/dynasm/dasm_x86.h
  9. 4 0
      libs/LuaJIT/src/Makefile
  10. 6 3
      libs/LuaJIT/src/host/buildvm_asm.c
  11. 2 1
      libs/LuaJIT/src/host/buildvm_lib.c
  12. 4 1
      libs/LuaJIT/src/lib_ffi.c
  13. 2 1
      libs/LuaJIT/src/lib_jit.c
  14. 7 0
      libs/LuaJIT/src/lj_arch.h
  15. 9 2
      libs/LuaJIT/src/lj_asm.c
  16. 3 4
      libs/LuaJIT/src/lj_asm_arm.h
  17. 8 9
      libs/LuaJIT/src/lj_asm_arm64.h
  18. 5 5
      libs/LuaJIT/src/lj_asm_mips.h
  19. 4 4
      libs/LuaJIT/src/lj_asm_ppc.h
  20. 4 4
      libs/LuaJIT/src/lj_asm_x86.h
  21. 2 1
      libs/LuaJIT/src/lj_bcwrite.c
  22. 3 3
      libs/LuaJIT/src/lj_ccallback.c
  23. 10 4
      libs/LuaJIT/src/lj_emit_arm64.h
  24. 21 5
      libs/LuaJIT/src/lj_err.c
  25. 3 0
      libs/LuaJIT/src/lj_jit.h
  26. 14 0
      libs/LuaJIT/src/lj_obj.h
  27. 2 2
      libs/LuaJIT/src/lj_opt_mem.c
  28. 1 0
      libs/LuaJIT/src/lj_parse.c
  29. 8 5
      libs/LuaJIT/src/lj_record.c
  30. 6 0
      libs/LuaJIT/src/lj_target_arm64.h
  31. 23 17
      libs/LuaJIT/src/lj_trace.c
  32. 3 3
      libs/LuaJIT/src/lj_vm.h
  33. 15 2
      libs/LuaJIT/src/vm_arm.dasc
  34. 59 18
      libs/LuaJIT/src/vm_arm64.dasc
  35. 23 4
      libs/LuaJIT/src/vm_mips.dasc
  36. 23 4
      libs/LuaJIT/src/vm_mips64.dasc
  37. 20 2
      libs/LuaJIT/src/vm_ppc.dasc
  38. 12 1
      libs/LuaJIT/src/vm_x64.dasc
  39. 16 1
      libs/LuaJIT/src/vm_x86.dasc

+ 3 - 1
libs/LuaJIT/Makefile

@@ -92,7 +92,9 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
-	      dis_mips64.lua dis_mips64el.lua vmdef.lua
+	      dis_mips64.lua dis_mips64el.lua \
+	      dis_mips64r6.lua dis_mips64r6el.lua \
+	      vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)

+ 3 - 1
libs/LuaJIT/doc/ext_ffi_api.html

@@ -463,8 +463,10 @@ otherwise. The following parameters are currently defined:
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <tr class="even">
-<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<td class="abiparam">pauth</td><td class="abidesc">Pointer authentication ABI</td></tr>
 <tr class="odd">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="even">
 <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 

+ 6 - 11
libs/LuaJIT/dynasm/dasm_arm.h

@@ -70,7 +70,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -123,7 +117,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -372,7 +367,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -396,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 6 - 11
libs/LuaJIT/dynasm/dasm_arm64.h

@@ -72,7 +72,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -89,7 +89,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -100,12 +99,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -125,7 +119,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -150,6 +144,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -444,7 +439,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
+	    ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4;
 	    n = (int)na;
 	    CK((ptrdiff_t)n == na, RANGE_REL);
 	    goto patchrel;
@@ -487,7 +482,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  goto patchrel;
 	}
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 7 - 0
libs/LuaJIT/dynasm/dasm_arm64.lua

@@ -823,6 +823,13 @@ map_op = {
   tbz_3  = "36000000DTBw|36000000DTBx",
   tbnz_3 = "37000000DTBw|37000000DTBx",
 
+  -- ARM64e: Pointer authentication codes (PAC).
+  blraaz_1  = "d63f081fNx",
+  braa_2    = "d71f0800NDx",
+  braaz_1   = "d61f081fNx",
+  pacibsp_0 = "d503237f",
+  retab_0   = "d65f0fff",
+
   -- Miscellaneous instructions.
   -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
   -- TODO: sys, sysl, ic, dc, at, tlbi

+ 6 - 11
libs/LuaJIT/dynasm/dasm_mips.h

@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -350,7 +345,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -369,7 +364,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMMS:

+ 6 - 11
libs/LuaJIT/dynasm/dasm_ppc.h

@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -354,7 +349,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -368,7 +363,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 7 - 12
libs/LuaJIT/dynasm/dasm_x86.h

@@ -68,7 +68,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -121,7 +115,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -445,7 +440,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
-	  b++; n = (int)(ptrdiff_t)D->globals[-n];
+	  b++; n = (int)(ptrdiff_t)D->globals[-n-10];
 	  /* fallthrough */
 	case DASM_REL_A: rel_a:
 	  n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -459,7 +454,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	}
 	case DASM_IMM_LG:
 	  p++;
-	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
+	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; }
 	  /* fallthrough */
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
@@ -469,7 +464,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_LABEL_LG: {
 	  int idx = *p++;
 	  if (idx >= 10)
-	    D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+	    D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
 	  break;
 	}
 	case DASM_LABEL_PC: case DASM_SETLABEL: break;

+ 4 - 0
libs/LuaJIT/src/Makefile

@@ -433,6 +433,10 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D NO_UNWIND
   TARGET_ARCH+= -DLUAJIT_NO_UNWIND
 endif
+ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D PAUTH
+  TARGET_ARCH+= -DLJ_ABI_PAUTH=1
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN

+ 6 - 3
libs/LuaJIT/src/host/buildvm_asm.c

@@ -243,6 +243,12 @@ void emit_asm(BuildCtx *ctx)
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.text\n");
+#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP
+  fprintf(ctx->fp, "\t.module fp=32\n");
+#endif
+#if LJ_TARGET_MIPS
+  fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
+#endif
   emit_asm_align(ctx, 4);
 
 #if LJ_TARGET_PS3
@@ -268,9 +274,6 @@ void emit_asm(BuildCtx *ctx)
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".pad #28\n");
 #endif
-#endif
-#if LJ_TARGET_MIPS
-  fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
 
   for (i = rel = 0; i < ctx->nsym; i++) {

+ 2 - 1
libs/LuaJIT/src/host/buildvm_lib.c

@@ -378,7 +378,8 @@ void emit_lib(BuildCtx *ctx)
       char *p;
       /* Simplistic pre-processor. Only handles top-level #if/#endif. */
       if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') {
-	int ok = 1, len = strlen(buf);
+	int ok = 1;
+	size_t len = strlen(buf);
 	if (buf[len-1] == '\n') {
 	  buf[len-1] = 0;
 	  if (buf[len-2] == '\r') {

+ 4 - 1
libs/LuaJIT/src/lib_ffi.c

@@ -745,6 +745,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 #if LJ_ABI_WIN
     "\003win"
 #endif
+#if LJ_ABI_PAUTH
+    "\007pauth"
+#endif
 #if LJ_TARGET_UWP
     "\003uwp"
 #endif
@@ -776,7 +779,7 @@ LJLIB_CF(ffi_metatype)
   if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) ||
 	ctype_isvector(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  tv = lj_tab_setinth(L, t, -(int32_t)id);
+  tv = lj_tab_setinth(L, t, -(int32_t)ctype_typeid(cts, ct));
   if (!tvisnil(tv))
     lj_err_caller(L, LJ_ERR_PROTMT);
   settabV(L, tv, mt);

+ 2 - 1
libs/LuaJIT/src/lib_jit.c

@@ -422,7 +422,8 @@ LJLIB_CF(jit_util_ircalladdr)
 {
   uint32_t idx = (uint32_t)lj_lib_checkint(L, 1);
   if (idx < IRCALL__MAX) {
-    setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func);
+    ASMFunction func = lj_ir_callinfo[idx].func;
+    setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func));
     return 1;
   }
   return 0;

+ 7 - 0
libs/LuaJIT/src/lj_arch.h

@@ -259,6 +259,9 @@
 #define LJ_ARCH_NAME		"arm64"
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #endif
+#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
+#define LJ_ABI_PAUTH		1
+#endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_EHRAREG	30
@@ -603,6 +606,10 @@
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
 #define LJ_SOFTFP32		(LJ_SOFTFP && LJ_32)
 
+#ifndef LJ_ABI_PAUTH
+#define LJ_ABI_PAUTH		0
+#endif
+
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #define LJ_LE			0
 #define LJ_BE			1

+ 9 - 2
libs/LuaJIT/src/lj_asm.c

@@ -1888,6 +1888,8 @@ static void asm_head_side(ASMState *as)
   IRRef1 sloadins[RID_MAX];
   RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
   RegSet live = RSET_EMPTY;  /* Live parent registers. */
+  RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
+  Reg pbase;
   IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
   int32_t spadj, spdelta;
   int pass2 = 0;
@@ -1898,7 +1900,11 @@ static void asm_head_side(ASMState *as)
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
     asm_snap_alloc(as, 0);
   }
-  allow = asm_head_side_base(as, irp, allow);
+  pbase = asm_head_side_base(as, irp);
+  if (pbase != RID_NONE) {
+    rset_clear(allow, pbase);
+    rset_clear(pallow, pbase);
+  }
 
   /* Scan all parent SLOADs and collect register dependencies. */
   for (i = as->stopins; i > REF_BASE; i--) {
@@ -1928,6 +1934,7 @@ static void asm_head_side(ASMState *as)
       sloadins[rs] = (IRRef1)i;
       rset_set(live, rs);  /* Block live parent register. */
     }
+    if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs));
   }
 
   /* Calculate stack frame adjustment. */
@@ -2044,7 +2051,7 @@ static void asm_head_side(ASMState *as)
     ExitNo exitno = as->J->exitno;
 #endif
     as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
-    asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
+    asm_stack_check(as, as->topslot, irp, pallow, exitno);
   }
 }
 

+ 3 - 4
libs/LuaJIT/src/lj_asm_arm.h

@@ -2167,7 +2167,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -2175,16 +2175,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 8 - 9
libs/LuaJIT/src/lj_asm_arm64.h

@@ -421,8 +421,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
   uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
   Reg gpr, fpr = REGARG_FIRSTFPR;
-  if ((void *)ci->func)
-    emit_call(as, (void *)ci->func);
+  if (ci->func)
+    emit_call(as, ci->func);
   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
   gpr = REGARG_FIRSTGPR;
@@ -501,7 +501,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
     ci.func = (ASMFunction)(ir_k64(irf)->u64);
   } else {  /* Need a non-argument register for indirect calls. */
     Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    emit_n(as, A64I_BLR, freg);
+    emit_n(as, A64I_BLR_AUTH, freg);
     ci.func = (ASMFunction)(void *)0;
   }
   asm_gencall(as, &ci, args);
@@ -909,7 +909,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   IRIns *irkey = IR(kslot->op1);
   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
-  int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+  int bigofs = !emit_checkofs(A64I_LDRx, kofs);
   Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg key, idx = node;
@@ -935,7 +935,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
   emit_lso(as, A64I_LDRx, key, idx, kofs);
   if (bigofs)
-    emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+    emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
 }
 
 static void asm_uref(ASMState *as, IRIns *ir)
@@ -1915,7 +1915,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -1923,16 +1923,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 5 - 5
libs/LuaJIT/src/lj_asm_mips.h

@@ -1337,8 +1337,8 @@ static void asm_fload(ASMState *as, IRIns *ir)
       }
     }
     ofs = field_ofs[ir->op2];
+    lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   }
-  lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   emit_tsi(as, mi, dest, idx, ofs);
 }
 
@@ -2667,7 +2667,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2676,15 +2676,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 4 - 4
libs/LuaJIT/src/lj_asm_ppc.h

@@ -2186,7 +2186,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2195,15 +2195,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 4 - 4
libs/LuaJIT/src/lj_asm_x86.h

@@ -2877,7 +2877,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce or reload BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2886,16 +2886,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
       /* Move from coalesced parent reg. */
-      rset_clear(allow, irp->r);
       emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 2 - 1
libs/LuaJIT/src/lj_bcwrite.c

@@ -189,7 +189,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
       goto save_int;
     } else {
       /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
-      if (!LJ_DUALNUM) {  /* Narrow number constants to integers. */
+      if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
+	/* Narrow number constants to integers. */
 	lua_Number num = numV(o);
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */

+ 3 - 3
libs/LuaJIT/src/lj_ccallback.c

@@ -171,13 +171,13 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
 static void *callback_mcode_init(global_State *g, uint32_t *page)
 {
   uint32_t *p = page;
-  void *target = (void *)lj_vm_ffi_callback;
+  ASMFunction target = lj_vm_ffi_callback;
   MSize slot;
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
-  *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
+  *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11));
   *p++ = A64I_LE(A64I_NOP);
-  ((void **)p)[0] = target;
+  ((ASMFunction *)p)[0] = target;
   ((void **)p)[1] = g;
   p += 4;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {

+ 10 - 4
libs/LuaJIT/src/lj_emit_arm64.h

@@ -143,7 +143,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
       goto nopair;
     }
     if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
-      *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
+      *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) |
 	(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
       return;
     }
@@ -348,16 +348,22 @@ static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
 
 #define emit_jmp(as, target)	emit_branch(as, A64I_B, (target))
 
-static void emit_call(ASMState *as, void *target)
+static void emit_call(ASMState *as, ASMFunction target)
 {
   MCode *p = --as->mcp;
-  ptrdiff_t delta = (char *)target - (char *)p;
+#if LJ_ABI_PAUTH
+  char *targetp = ptrauth_auth_data((char *)target,
+				    ptrauth_key_function_pointer, 0);
+#else
+  char *targetp = (char *)target;
+#endif
+  ptrdiff_t delta = targetp - (char *)p;
   if (A64F_S_OK(delta>>2, 26)) {
     *p = A64I_BL | A64F_S26(delta>>2);
   } else {  /* Target out of range: need indirect call. But don't use R0-R7. */
     Reg r = ra_allock(as, i64ptr(target),
 		      RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    *p = A64I_BLR | A64F_N(r);
+    *p = A64I_BLR_AUTH | A64F_N(r);
   }
 }
 

+ 21 - 5
libs/LuaJIT/src/lj_err.c

@@ -444,10 +444,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
     if ((actions & _UA_FORCE_UNWIND)) {
       return _URC_CONTINUE_UNWIND;
     } else if (cf) {
+      ASMFunction ip;
       _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode);
-      _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ?
-				     lj_vm_unwind_ff_eh :
-				     lj_vm_unwind_c_eh));
+      ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh;
+      _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip));
       return _URC_INSTALL_CONTEXT;
     }
 #if LJ_TARGET_X86ORX64
@@ -580,9 +580,17 @@ extern void __deregister_frame(const void *);
 
 uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
 {
-  void **handler;
+  ASMFunction handler = (ASMFunction)err_unwind_jit;
   memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
-  handler = (void *)err_unwind_jit;
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+  handler = ptrauth_auth_and_resign(handler,
+    ptrauth_key_function_pointer, 0,
+    ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER);
+#else
+#error "missing pointer authentication support for this architecture"
+#endif
+#endif
   memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
   *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
     (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
@@ -777,6 +785,10 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
 {
   if (L->status == LUA_ERRERR+1)  /* Don't touch the stack during lua_open. */
     lj_vm_unwind_c(L->cframe, LUA_ERRMEM);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
   if (curr_funcisL(L)) L->top = curr_topL(L);
   setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
   lj_err_throw(L, LUA_ERRMEM);
@@ -871,6 +883,10 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
   const char *msg;
   va_list argp;
   va_start(argp, em);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
   if (curr_funcisL(L)) L->top = curr_topL(L);
   msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);

+ 3 - 0
libs/LuaJIT/src/lj_jit.h

@@ -273,6 +273,9 @@ typedef struct GCtrace {
   BCIns startins;	/* Original bytecode of starting instruction. */
   MSize szmcode;	/* Size of machine code. */
   MCode *mcode;		/* Start of machine code. */
+#if LJ_ABI_PAUTH
+  ASMFunction mcauth;	/* Start of machine code, with ptr auth applied. */
+#endif
   MSize mcloop;		/* Offset of loop start in machine code. */
   uint16_t nchild;	/* Number of child traces (root trace only). */
   uint16_t spadjust;	/* Stack pointer adjustment (offset in bytes). */

+ 14 - 0
libs/LuaJIT/src/lj_obj.h

@@ -1042,4 +1042,18 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
 LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
 LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
 
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+#include <ptrauth.h>
+#define lj_ptr_sign(ptr, ctx) \
+  ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx))
+#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer)
+#else
+#error "No support for pointer authentication for this architecture"
+#endif
+#else
+#define lj_ptr_sign(ptr, ctx) (ptr)
+#define lj_ptr_strip(ptr) (ptr)
+#endif
+
 #endif

+ 2 - 2
libs/LuaJIT/src/lj_opt_mem.c

@@ -229,8 +229,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
 	if (key->o == IR_KSLOT) key = IR(key->op1);
 	lj_ir_kvalue(J->L, &keyv, key);
 	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
-	lj_assertJ(itype2irt(tv) == irt_type(fins->t),
-		   "mismatched type in constant table");
+	if (itype2irt(tv) != irt_type(fins->t))
+	  return 0;  /* Type instability in loop-carried dependency. */
 	if (irt_isnum(fins->t))
 	  return lj_ir_knum_u64(J, tv->u64);
 	else if (LJ_DUALNUM && irt_isint(fins->t))

+ 1 - 0
libs/LuaJIT/src/lj_parse.c

@@ -2518,6 +2518,7 @@ static int predict_next(LexState *ls, FuncState *fs, BCPos pc)
   cTValue *o;
   switch (bc_op(ins)) {
   case BC_MOV:
+    if (bc_d(ins) >= fs->nactvar) return 0;
     name = gco2str(gcref(var_get(ls, fs, bc_d(ins)).name));
     break;
   case BC_UGET:

+ 8 - 5
libs/LuaJIT/src/lj_record.c

@@ -116,6 +116,7 @@ static void rec_check_slots(jit_State *J)
       cTValue *tv = &base[s];
       IRRef ref = tref_ref(tr);
       IRIns *ir = NULL;  /* Silence compiler. */
+      lj_assertJ(tv < J->L->top, "slot %d above top of Lua stack", s);
       if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
 	lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
 		   "slot %d ref %04d out of range", s, ref - REF_BIAS);
@@ -1941,11 +1942,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
   if (J->framedepth > 0) {  /* Simple case: varargs defined on-trace. */
     ptrdiff_t i;
     if (nvararg < 0) nvararg = 0;
-    if (nresults == -1) {
-      nresults = nvararg;
-      J->maxslot = dst + (BCReg)nvararg;
-    } else if (dst + nresults > J->maxslot) {
+    if (nresults != 1) {
+      if (nresults == -1) nresults = nvararg;
       J->maxslot = dst + (BCReg)nresults;
+    } else if (dst >= J->maxslot) {
+      J->maxslot = dst + 1;
     }
     if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
       lj_trace_err(J, LJ_TRERR_STACKOV);
@@ -1976,8 +1977,9 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
       }
       for (i = nvararg; i < nresults; i++)
 	J->base[dst+i] = TREF_NIL;
-      if (dst + (BCReg)nresults > J->maxslot)
+      if (nresults != 1 || dst >= J->maxslot) {
 	J->maxslot = dst + (BCReg)nresults;
+      }
     } else if (select_detect(J)) {  /* y = select(x, ...) */
       TRef tridx = J->base[dst-1];
       TRef tr = TREF_NIL;
@@ -2477,6 +2479,7 @@ void lj_record_ins(jit_State *J)
 
   case BC_TSETM:
     rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
+    J->maxslot = ra;  /* The table slot at ra-1 is the highest used slot. */
     break;
 
   case BC_TNEW:

+ 6 - 0
libs/LuaJIT/src/lj_target_arm64.h

@@ -260,6 +260,9 @@ typedef enum A64Ins {
   A64I_CBZ = 0x34000000,
   A64I_CBNZ = 0x35000000,
 
+  A64I_BRAAZ = 0xd61f081f,
+  A64I_BLRAAZ = 0xd63f081f,
+
   A64I_NOP = 0xd503201f,
 
   /* FP */
@@ -317,6 +320,9 @@ typedef enum A64Ins {
   A64I_FMOV_DI = 0x1e601000,
 } A64Ins;
 
+#define A64I_BR_AUTH	(LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR)
+#define A64I_BLR_AUTH	(LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR)
+
 typedef enum A64Shift {
   A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
 } A64Shift;

+ 23 - 17
libs/LuaJIT/src/lj_trace.c

@@ -153,6 +153,9 @@ static void trace_save(jit_State *J, GCtrace *T)
   newwhite(J2G(J), T);
   T->gct = ~LJ_TTRACE;
   T->ir = (IRIns *)p - J->cur.nk;  /* The IR has already been copied above. */
+#if LJ_ABI_PAUTH
+  T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T);
+#endif
   p += szins;
   TRACE_APPENDVEC(snap, nsnap, SnapShot)
   TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
@@ -428,6 +431,12 @@ static void trace_start(jit_State *J)
     return;
   }
 
+  /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */
+  if (!J->parent && bc_op(*J->pc) == BC_JLOOP) {  /* Already compiled. */
+    J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
+    return;
+  }
+
   /* Get a new trace number. */
   traceno = trace_findfree(J);
   if (LJ_UNLIKELY(traceno == 0)) {  /* No free trace? */
@@ -864,7 +873,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   ExitDataCP exd;
   int errcode, exitcode = J->exitcode;
   TValue exiterr;
-  const BCIns *pc;
+  const BCIns *pc, *retpc;
   void *cf;
   GCtrace *T;
 
@@ -916,22 +925,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   } else {
     trace_hotside(J, pc);
   }
-  if (bc_op(*pc) == BC_JLOOP) {
-    BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
-    int isret = bc_isret(bc_op(*retpc));
-    if (isret || bc_op(*retpc) == BC_ITERN) {
-      if (J->state == LJ_TRACE_RECORD) {
-	J->patchins = *pc;
-	J->patchpc = (BCIns *)pc;
-	*J->patchpc = *retpc;
-	J->bcskip = 1;
-      } else if (isret) {
-	pc = retpc;
-	setcframe_pc(cf, pc);
-      }
-    }
-  }
-  /* Return MULTRES or 0. */
+  /* Return MULTRES or 0 or -17. */
   ERRNO_RESTORE
   switch (bc_op(*pc)) {
   case BC_CALLM: case BC_CALLMT:
@@ -940,6 +934,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
   case BC_TSETM:
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc));
+  case BC_JLOOP:
+    retpc = &traceref(J, bc_d(*pc))->startins;
+    if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) {
+      /* Dispatch to original ins to ensure forward progress. */
+      if (J->state != LJ_TRACE_RECORD) return -17;
+      /* Unpatch bytecode when recording. */
+      J->patchins = *pc;
+      J->patchpc = (BCIns *)pc;
+      *J->patchpc = *retpc;
+      J->bcskip = 1;
+    }
+    return 0;
   default:
     if (bc_op(*pc) >= BC_FUNCF)
       return (int)((BCReg)(L->top - L->base) + 1);

+ 3 - 3
libs/LuaJIT/src/lj_vm.h

@@ -54,8 +54,8 @@ LJ_ASMF void lj_vm_profhook(void);
 LJ_ASMF void lj_vm_IITERN(void);
 
 /* Trace exit handling. */
-LJ_ASMF void lj_vm_exit_handler(void);
-LJ_ASMF void lj_vm_exit_interp(void);
+LJ_ASMF char lj_vm_exit_handler[];
+LJ_ASMF char lj_vm_exit_interp[];
 
 /* Internal math helper functions. */
 #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
@@ -111,6 +111,6 @@ LJ_ASMF void lj_cont_stitch(void);  /* Trace stitching. */
 LJ_ASMF char lj_vm_asm_begin[];
 
 /* Bytecode offsets are relative to lj_vm_asm_begin. */
-#define makeasmfunc(ofs)	((ASMFunction)(lj_vm_asm_begin + (ofs)))
+#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0)
 
 #endif

+ 15 - 2
libs/LuaJIT/src/vm_arm.dasc

@@ -2196,8 +2196,8 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1, #0
-  |  blt >9				// Check for error from exit.
+  |  cmn CARG1, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |   str RC, SAVE_MULTRES
@@ -2213,6 +2213,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   ldr INS, [PC], #4
   |     lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
   |    st_vmstate CARG4
+  |  cmn CARG1, #17			// Static dispatch?
+  |  beq >5
   |  cmp OP, #BC_FUNCC+2		// Fast function?
   |  bhs >4
   |2:
@@ -2238,6 +2240,17 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:CARG1, [CARG1, RC, lsl #2]
+  |  ldr INS, TRACE:CARG1->startins
+  |  decode_OP OP, INS
+  |   decode_RA8 RA, INS
+  |  add OP, DISPATCH, OP, lsl #2
+  |   decode_RD RC, INS
+  |  ldr pc, [OP, #GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
   |  rsb CARG2, CARG1, #0
   |  mov CARG1, L

+ 59 - 18
libs/LuaJIT/src/vm_arm64.dasc

@@ -77,6 +77,23 @@
 |.define CRET1,		x0
 |.define CRET1w,	w0
 |
+|//-----------------------------------------------------------------------
+|
+|// ARM64e pointer authentication codes (PAC).
+|.if PAUTH
+|.macro sp_auth; pacibsp; .endmacro
+|.macro br_auth, reg; braaz reg; .endmacro
+|.macro blr_auth, reg; blraaz reg; .endmacro
+|.macro ret_auth; retab; .endmacro
+|.else
+|.macro sp_auth; .endmacro
+|.macro br_auth, reg; br reg; .endmacro
+|.macro blr_auth, reg; blr reg; .endmacro
+|.macro ret_auth; ret; .endmacro
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
 |// Stack layout while in interpreter. Must match with lj_frame.h.
 |
 |.define CFRAME_SPACE,	208
@@ -106,6 +123,7 @@
 |.endmacro
 |
 |.macro saveregs
+|  sp_auth
 |  sub sp, sp, # CFRAME_SPACE
 |  stp fp, lr, [sp, # SAVE_FP_LR_]
 |  add fp, sp, # SAVE_FP_LR_
@@ -180,7 +198,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   decode_RD RC, INS
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |// Instruction footer.
@@ -209,7 +227,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   add RA, BASE, RA, lsl #3
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |.macro ins_call
@@ -356,7 +374,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_leave_unw:
   |  restoreregs
-  |  ret
+  |  ret_auth
   |
   |6:
   |  bgt >7				// Less results wanted?
@@ -542,7 +560,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   str RC, SAVE_CFRAME
   |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |    str L, GL->cur_L
-  |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  blr_auth CARG4		// (lua_State *L, lua_CFunction func, void *ud)
   |  mov BASE, CRET1
   |   mov PC, #FRAME_CP
   |  cbnz BASE, <3			// Else continue with the call.
@@ -573,7 +591,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr CARG3, LFUNC:CARG3->pc
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  // BASE = base, RA = resultptr, CARG4 = meta base
-  |    br CARG1
+  |    br_auth CARG1
   |
   |.if FFI
   |1:
@@ -1707,7 +1725,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cmp TMP1, TMP2
   |   mov CARG1, L
   |  bhi >5				// Need to grow stack.
-  |   blr CARG3				// (lua_State *L)
+  |   blr_auth CARG3			// (lua_State *L)
   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   |   ldr BASE, L->base
   |  cmp CRET1w, #0
@@ -1743,6 +1761,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->fff_gcstep:			// Call GC step function.
   |  // BASE = new base, RC = nargs*8
+  |  sp_auth
   |   add CARG2, BASE, NARGS8:RC	// Calculate L->top.
   |  mov RA, lr
   |   stp BASE, CARG2, L->base
@@ -1754,7 +1773,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov lr, RA				// Help return address predictor.
   |  sub NARGS8:RC, CARG2, BASE		// Calculate nargs*8.
   |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  ret
+  |  ret_auth
   |
   |//-----------------------------------------------------------------------
   |//-- Special dispatch targets -------------------------------------------
@@ -1781,7 +1800,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
   |5:  // Re-dispatch to static ins.
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->vm_inshook:			// Dispatch target for instr/line hooks.
   |  ldrb TMP2w, GL->hookmask
@@ -1807,7 +1826,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA RA, INS
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
   |   decode_RD RC, INS
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->cont_hook:				// Continue from hook yield.
   |  ldr CARG1, [CARG4, #-40]
@@ -1857,7 +1876,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  sub NARGS8:RC, TMP1, BASE
   |   ldr INSw, [PC, #-4]
   |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  br CRET1
+  |  br_auth CRET1
   |
   |->cont_stitch:			// Trace stitching.
   |.if JIT
@@ -1986,8 +2005,8 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1w, #0
-  |  blt >9				// Check for error from exit.
+  |  cmn CARG1w, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
@@ -2004,6 +2023,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldrb RBw, [PC, # OFS_OP]
   |   ldr INSw, [PC], #4
   |    st_vmstate CARG4w
+  |  cmn CARG1w, #17			// Static dispatch?
+  |  beq >5
   |  cmp RBw, #BC_FUNCC+2		// Fast function?
   |   add TMP1, GL, INS, uxtb #3
   |  bhs >4
@@ -2014,13 +2035,13 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA RA, INS
   |   lsr TMP0, INS, #16
   |   csel RC, TMP0, RC, lo
-  |   blo >5
+  |   blo >3
   |   ldr CARG3, [BASE, FRAME_FUNC]
   |   sub RC, RC, #8
   |   add RA, BASE, RA, lsl #3	// Yes: RA = BASE+framesize*8, RC = nargs*8
   |   and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |5:
-  |  br RB
+  |3:
+  |  br_auth RB
   |
   |4:  // Check frame below fast function.
   |  ldr CARG1, [BASE, FRAME_PC]
@@ -2036,6 +2057,17 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr RA, [GL, #GL_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:RA, [RA, RC, lsl #3]
+  |  ldr INSw, TRACE:RA->startins
+  |  add TMP0, GL, INS, uxtb #3
+  |   decode_RA RA, INS
+  |  ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC]
+  |   decode_RD RC, INS
+  |  br_auth RB
+  |
   |9:  // Rethrow error from the right C frame.
   |  neg CARG2w, CARG1w
   |  mov CARG1, L
@@ -2182,6 +2214,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
+  |  sp_auth
   |  stp x20, CCSTATE, [sp, #-32]!
   |  stp fp, lr, [sp, #16]
   |  add fp, sp, #16
@@ -2208,14 +2241,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldp x6, x7, CCSTATE->gpr[6]
   |   ldp d6, d7, CCSTATE->fpr[6]
   |  ldr x8, CCSTATE->retp
-  |  blr TMP3
+  |  blr_auth TMP3
   |  sub sp, fp, #16
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
   |  ldp fp, lr, [sp, #16]
   |  ldp x20, CCSTATE, [sp], #32
-  |  ret
+  |  ret_auth
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
   |
@@ -3786,12 +3819,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   mov CARG2w, #0  // Traces on ARM64 don't store the trace #, so use 0.
     |  ldr TRACE:RC, [CARG1, RC, lsl #3]
     |   st_vmstate CARG2w
+    |.if PAUTH
+    |  ldr RA, TRACE:RC->mcauth
+    |.else
     |  ldr RA, TRACE:RC->mcode
+    |.endif
     |   str BASE, GL->jit_base
     |   str L, GL->tmpbuf.L
     |  sub sp, sp, #16	// See SPS_FIXED. Avoids sp adjust in every root trace.
+    |.if PAUTH
+    |  braa RA, RC
+    |.else
     |  br RA
     |.endif
+    |.endif
     break;
 
   case BC_JMP:
@@ -3901,7 +3942,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
-    |  blr CARG4			// (lua_State *L [, lua_CFunction f])
+    |  blr_auth CARG4			// (lua_State *L [, lua_CFunction f])
     |  // Returns nresults.
     |  ldp BASE, TMP1, L->base
     |    str L, GL->cur_L

+ 23 - 4
libs/LuaJIT/src/vm_mips.dasc

@@ -2466,7 +2466,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   addiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sw BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  lw LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0			// TOBIT = 2^52 + 2^51 (float).
   |  sll MULTRES, CRET1, 3
@@ -2480,14 +2481,16 @@ static void build_subroutines(BuildCtx *ctx)
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   addiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  addiu PC, PC, 4
   |  decode_OP4a TMP1, INS
   |  decode_OP4b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  addu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  lw AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2515,6 +2518,22 @@ static void build_subroutines(BuildCtx *ctx)
   |  jr AT
   |.  addu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lw TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4b RD
+  |  addu TMP0, TMP0, RD
+  |  lw TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP4a TMP1, INS
+  |  decode_OP4b TMP1
+  |  addu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  lw AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
   |  load_got lj_err_trace
   |  sub CARG2, r0, CRET1

+ 23 - 4
libs/LuaJIT/src/vm_mips64.dasc

@@ -2571,7 +2571,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   daddiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sd BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  ld LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   |  dsll MULTRES, CRET1, 3
@@ -2586,14 +2587,16 @@ static void build_subroutines(BuildCtx *ctx)
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   daddiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  daddiu PC, PC, 4
   |  decode_OP8a TMP1, INS
   |  decode_OP8b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  daddu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  ld AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2622,6 +2625,22 @@ static void build_subroutines(BuildCtx *ctx)
   |  jr AT
   |.  daddu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ld TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD8b RD
+  |  daddu TMP0, TMP0, RD
+  |  ld TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP8a TMP1, INS
+  |  decode_OP8b TMP1
+  |  daddu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
   |  load_got lj_err_trace
   |  sub CARG2, r0, CRET1

+ 20 - 2
libs/LuaJIT/src/vm_ppc.dasc

@@ -3015,8 +3015,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  addi DISPATCH, JGL, -GG_DISP2G-32768
   |  stp BASE, L->base
   |1:
-  |  cmpwi CARG1, 0
-  |  blt >9				// Check for error from exit.
+  |  li TMP2, -LUA_ERRERR
+  |  cmplw CARG1, TMP2
+  |  bge >9				// Check for error from exit.
   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   |   slwi MULTRES, CARG1, 3
   |    li TMP2, 0
@@ -3041,6 +3042,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   addi PC, PC, 4
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1.
   |    stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |  cmpwi CARG1, -17			// Static dispatch?
+  |  beq >5
   |  decode_OPP TMP1, INS
   |   decode_RA8 RA, INS
   |  lpx TMP0, DISPATCH, TMP1
@@ -3070,6 +3073,21 @@ static void build_subroutines(BuildCtx *ctx)
   |   add RA, RA, BASE
   |  bctr
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4 RD, INS
+  |  lwzx TRACE:TMP1, TMP1, RD
+  |  lwz INS, TRACE:TMP1->startins
+  |  decode_OPP TMP1, INS
+  |  addi TMP1, TMP1, GG_DISP2STATIC
+  |  lpx TMP0, DISPATCH, TMP1
+  |  mtctr TMP0
+  |   decode_RB8 RB, INS
+  |   decode_RD8 RD, INS
+  |   decode_RA8 RA, INS
+  |   decode_RC8 RC, INS
+  |  bctr
+  |
   |9:  // Rethrow error from the right C frame.
   |  neg CARG2, CARG1
   |  mr CARG1, L

+ 12 - 1
libs/LuaJIT/src/vm_x64.dasc

@@ -2453,7 +2453,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov r12, [RA]
   |  mov rsp, RA			// Reposition stack to C frame.
   |.endif
-  |  test RDd, RDd; js >9		// Check for error from exit.
+  |  cmp RDd, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RDd
   |  mov LFUNC:KBASE, [BASE-16]
@@ -2469,6 +2469,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RCd, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2491,6 +2493,15 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*8]
+  |  mov RCd, TRACE:RA->startins
+  |  movzx RAd, RCH
+  |  movzx OP, RCL
+  |  shr RCd, 16
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
   |  mov CARG2d, RDd
   |  mov CARG1, L:RB

+ 16 - 1
libs/LuaJIT/src/vm_x86.dasc

@@ -2902,7 +2902,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov r13, TMPa
   |  mov r12, TMPQ
   |.endif
-  |  test RD, RD; js >9			// Check for error from exit.
+  |  cmp RD, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RD
   |  mov LFUNC:KBASE, [BASE-8]
@@ -2917,6 +2917,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RC, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2942,6 +2944,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*4]
+  |  mov RC, TRACE:RA->startins
+  |  movzx RA, RCH
+  |  movzx OP, RCL
+  |  shr RC, 16
+  |.if X64
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |.else
+  |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
+  |.endif
+  |
   |9:  // Rethrow error from the right C frame.
   |  mov FCARG2, RD
   |  mov FCARG1, L:RB