Browse Source

Update LuaJIT to commit 72efc42

Sasha Szpakowski 2 years ago
parent
commit
e7e82e2da5
61 changed files with 573 additions and 254 deletions
  1. 3 1
      libs/LuaJIT/Makefile
  2. 7 0
      libs/LuaJIT/doc/contact.html
  3. 3 1
      libs/LuaJIT/doc/ext_ffi_api.html
  4. 8 0
      libs/LuaJIT/doc/running.html
  5. 6 11
      libs/LuaJIT/dynasm/dasm_arm.h
  6. 6 11
      libs/LuaJIT/dynasm/dasm_arm64.h
  7. 7 0
      libs/LuaJIT/dynasm/dasm_arm64.lua
  8. 6 11
      libs/LuaJIT/dynasm/dasm_mips.h
  9. 6 11
      libs/LuaJIT/dynasm/dasm_ppc.h
  10. 7 12
      libs/LuaJIT/dynasm/dasm_x86.h
  11. 4 0
      libs/LuaJIT/src/Makefile
  12. 21 21
      libs/LuaJIT/src/Makefile.dep
  13. 6 3
      libs/LuaJIT/src/host/buildvm_asm.c
  14. 11 4
      libs/LuaJIT/src/host/buildvm_lib.c
  15. 33 2
      libs/LuaJIT/src/lib_aux.c
  16. 1 1
      libs/LuaJIT/src/lib_base.c
  17. 2 1
      libs/LuaJIT/src/lib_bit.c
  18. 4 1
      libs/LuaJIT/src/lib_ffi.c
  19. 2 1
      libs/LuaJIT/src/lib_jit.c
  20. 13 0
      libs/LuaJIT/src/lj_arch.h
  21. 9 2
      libs/LuaJIT/src/lj_asm.c
  22. 8 5
      libs/LuaJIT/src/lj_asm_arm.h
  23. 11 11
      libs/LuaJIT/src/lj_asm_arm64.h
  24. 6 6
      libs/LuaJIT/src/lj_asm_mips.h
  25. 6 5
      libs/LuaJIT/src/lj_asm_ppc.h
  26. 4 4
      libs/LuaJIT/src/lj_asm_x86.h
  27. 2 1
      libs/LuaJIT/src/lj_bcwrite.c
  28. 1 1
      libs/LuaJIT/src/lj_carith.c
  29. 3 3
      libs/LuaJIT/src/lj_ccallback.c
  30. 1 1
      libs/LuaJIT/src/lj_cparse.c
  31. 6 1
      libs/LuaJIT/src/lj_crecord.c
  32. 13 1
      libs/LuaJIT/src/lj_ctype.c
  33. 6 3
      libs/LuaJIT/src/lj_debug.c
  34. 1 1
      libs/LuaJIT/src/lj_emit_arm.h
  35. 15 8
      libs/LuaJIT/src/lj_emit_arm64.h
  36. 23 7
      libs/LuaJIT/src/lj_err.c
  37. 8 2
      libs/LuaJIT/src/lj_gc.c
  38. 6 1
      libs/LuaJIT/src/lj_jit.h
  39. 15 1
      libs/LuaJIT/src/lj_obj.h
  40. 3 3
      libs/LuaJIT/src/lj_opt_fold.c
  41. 8 2
      libs/LuaJIT/src/lj_opt_mem.c
  42. 7 6
      libs/LuaJIT/src/lj_parse.c
  43. 12 7
      libs/LuaJIT/src/lj_record.c
  44. 1 1
      libs/LuaJIT/src/lj_state.c
  45. 2 2
      libs/LuaJIT/src/lj_strfmt.c
  46. 13 13
      libs/LuaJIT/src/lj_strscan.c
  47. 6 0
      libs/LuaJIT/src/lj_target_arm64.h
  48. 23 17
      libs/LuaJIT/src/lj_trace.c
  49. 3 3
      libs/LuaJIT/src/lj_vm.h
  50. 4 3
      libs/LuaJIT/src/lj_vmevent.h
  51. 15 4
      libs/LuaJIT/src/lj_vmmath.c
  52. 1 0
      libs/LuaJIT/src/nxbuild.bat
  53. 2 2
      libs/LuaJIT/src/ps4build.bat
  54. 2 2
      libs/LuaJIT/src/ps5build.bat
  55. 15 2
      libs/LuaJIT/src/vm_arm.dasc
  56. 62 19
      libs/LuaJIT/src/vm_arm64.dasc
  57. 23 4
      libs/LuaJIT/src/vm_mips.dasc
  58. 23 4
      libs/LuaJIT/src/vm_mips64.dasc
  59. 20 2
      libs/LuaJIT/src/vm_ppc.dasc
  60. 12 1
      libs/LuaJIT/src/vm_x64.dasc
  61. 16 1
      libs/LuaJIT/src/vm_x86.dasc

+ 3 - 1
libs/LuaJIT/Makefile

@@ -92,7 +92,9 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
-	      dis_mips64.lua dis_mips64el.lua vmdef.lua
+	      dis_mips64.lua dis_mips64el.lua \
+	      dis_mips64r6.lua dis_mips64r6el.lua \
+	      vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)

+ 7 - 0
libs/LuaJIT/doc/contact.html

@@ -84,6 +84,13 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 </p>
 </noscript>
 
+<p><i>
+Note: I cannot reply to GMail, Google Workplace, Outlook or Office365
+mail addresses, since they prefer to mindlessly filter out mails sent
+from small domains using independent mail servers, such as mine. If you
+don't like that, please complain to Google or Microsoft, not me.
+</i></p>
+
 <h2>Copyright</h2>
 <p>
 All documentation is

+ 3 - 1
libs/LuaJIT/doc/ext_ffi_api.html

@@ -463,8 +463,10 @@ otherwise. The following parameters are currently defined:
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <tr class="even">
-<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<td class="abiparam">pauth</td><td class="abidesc">Pointer authentication ABI</td></tr>
 <tr class="odd">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="even">
 <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 

+ 8 - 0
libs/LuaJIT/doc/running.html

@@ -220,6 +220,12 @@ mix the three forms, but note that setting an optimization level
 overrides all earlier flags.
 </p>
 <p>
+Note that <tt>-Ofma</tt> is not enabled by default at any level,
+because it affects floating-point result accuracy. Only enable this,
+if you fully understand the trade-offs of FMA for performance (higher),
+determinism (lower) and numerical accuracy (higher).
+</p>
+<p>
 Here are the available flags and at what optimization levels they
 are enabled:
 </p>
@@ -251,6 +257,8 @@ are enabled:
 <td class="flag_name">sink</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Allocation/Store Sinking</td></tr>
 <tr class="even">
 <td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
+<tr class="odd">
+<td class="flag_name">fma </td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_desc">Fused multiply-add</td></tr>
 </table>
 <p>
 Here are the parameters and their default settings:

+ 6 - 11
libs/LuaJIT/dynasm/dasm_arm.h

@@ -70,7 +70,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -123,7 +117,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -372,7 +367,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -396,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 6 - 11
libs/LuaJIT/dynasm/dasm_arm64.h

@@ -72,7 +72,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -89,7 +89,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -100,12 +99,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -125,7 +119,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -150,6 +144,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -444,7 +439,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
+	    ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4;
 	    n = (int)na;
 	    CK((ptrdiff_t)n == na, RANGE_REL);
 	    goto patchrel;
@@ -487,7 +482,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  goto patchrel;
 	}
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 7 - 0
libs/LuaJIT/dynasm/dasm_arm64.lua

@@ -823,6 +823,13 @@ map_op = {
   tbz_3  = "36000000DTBw|36000000DTBx",
   tbnz_3 = "37000000DTBw|37000000DTBx",
 
+  -- ARM64e: Pointer authentication codes (PAC).
+  blraaz_1  = "d63f081fNx",
+  braa_2    = "d71f0800NDx",
+  braaz_1   = "d61f081fNx",
+  pacibsp_0 = "d503237f",
+  retab_0   = "d65f0fff",
+
   -- Miscellaneous instructions.
   -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
   -- TODO: sys, sysl, ic, dc, at, tlbi

+ 6 - 11
libs/LuaJIT/dynasm/dasm_mips.h

@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -350,7 +345,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -369,7 +364,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMMS:

+ 6 - 11
libs/LuaJIT/dynasm/dasm_ppc.h

@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -354,7 +349,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	case DASM_REL_LG:
 	  if (n < 0) {
-	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
 	    goto patchrel;
 	  }
 	  /* fallthrough */
@@ -368,7 +363,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:

+ 7 - 12
libs/LuaJIT/dynasm/dasm_x86.h

@@ -68,7 +68,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -121,7 +115,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -445,7 +440,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
-	  b++; n = (int)(ptrdiff_t)D->globals[-n];
+	  b++; n = (int)(ptrdiff_t)D->globals[-n-10];
 	  /* fallthrough */
 	case DASM_REL_A: rel_a:
 	  n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -459,7 +454,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	}
 	case DASM_IMM_LG:
 	  p++;
-	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
+	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; }
 	  /* fallthrough */
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
@@ -469,7 +464,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_LABEL_LG: {
 	  int idx = *p++;
 	  if (idx >= 10)
-	    D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+	    D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
 	  break;
 	}
 	case DASM_LABEL_PC: case DASM_SETLABEL: break;

+ 4 - 0
libs/LuaJIT/src/Makefile

@@ -433,6 +433,10 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
   DASM_AFLAGS+= -D NO_UNWIND
   TARGET_ARCH+= -DLUAJIT_NO_UNWIND
 endif
+ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D PAUTH
+  TARGET_ARCH+= -DLJ_ABI_PAUTH=1
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN

+ 21 - 21
libs/LuaJIT/src/Makefile.dep

@@ -1,6 +1,6 @@
 lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_vmevent.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
  lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
@@ -124,7 +124,7 @@ lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
- lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_vmevent.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
  lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
@@ -222,25 +222,25 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
  lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
  lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
  lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
- lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
- lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
- lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
- lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
- lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
- lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
- lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
- lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
- lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
- lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
- lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
- lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
- lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
- lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
- lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
- lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
- lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_buffer.c lib_init.c
+ lj_traceerr.h lj_vm.h lj_vmevent.h lj_err.c lj_debug.h lj_ff.h \
+ lj_ffdef.h lj_strfmt.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c \
+ lj_buf.c lj_str.c lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c \
+ lj_strscan.h lj_lib.h lj_debug.c lj_prng.c lj_state.c lj_lex.h \
+ lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h lj_profile.h \
+ lj_vmevent.c lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c \
+ lj_serialize.c lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h \
+ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+ lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+ lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+ lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+ lib_ffi.c lib_buffer.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

+ 6 - 3
libs/LuaJIT/src/host/buildvm_asm.c

@@ -243,6 +243,12 @@ void emit_asm(BuildCtx *ctx)
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.text\n");
+#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP
+  fprintf(ctx->fp, "\t.module fp=32\n");
+#endif
+#if LJ_TARGET_MIPS
+  fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
+#endif
   emit_asm_align(ctx, 4);
 
 #if LJ_TARGET_PS3
@@ -268,9 +274,6 @@ void emit_asm(BuildCtx *ctx)
 	  ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
 	  ".pad #28\n");
 #endif
-#endif
-#if LJ_TARGET_MIPS
-  fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
 #endif
 
   for (i = rel = 0; i < ctx->nsym; i++) {

+ 11 - 4
libs/LuaJIT/src/host/buildvm_lib.c

@@ -379,13 +379,20 @@ void emit_lib(BuildCtx *ctx)
       /* Simplistic pre-processor. Only handles top-level #if/#endif. */
       if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') {
 	int ok = 1;
-	if (!strcmp(buf, "#if LJ_52\n"))
+	size_t len = strlen(buf);
+	if (buf[len-1] == '\n') {
+	  buf[len-1] = 0;
+	  if (buf[len-2] == '\r') {
+	    buf[len-2] = 0;
+	  }
+	}
+	if (!strcmp(buf, "#if LJ_52"))
 	  ok = LJ_52;
-	else if (!strcmp(buf, "#if LJ_HASJIT\n"))
+	else if (!strcmp(buf, "#if LJ_HASJIT"))
 	  ok = LJ_HASJIT;
-	else if (!strcmp(buf, "#if LJ_HASFFI\n"))
+	else if (!strcmp(buf, "#if LJ_HASFFI"))
 	  ok = LJ_HASFFI;
-	else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
+	else if (!strcmp(buf, "#if LJ_HASBUFFER"))
 	  ok = LJ_HASBUFFER;
 	if (!ok) {
 	  int lvl = 1;

+ 33 - 2
libs/LuaJIT/src/lib_aux.c

@@ -21,6 +21,7 @@
 #include "lj_state.h"
 #include "lj_trace.h"
 #include "lj_lib.h"
+#include "lj_vmevent.h"
 
 #if LJ_TARGET_POSIX
 #include <sys/wait.h>
@@ -318,6 +319,18 @@ static int panic(lua_State *L)
   return 0;
 }
 
+#ifndef LUAJIT_DISABLE_VMEVENT
+static int error_finalizer(lua_State *L)
+{
+  const char *s = lua_tostring(L, -1);
+  fputs("ERROR in finalizer: ", stderr);
+  fputs(s ? s : "?", stderr);
+  fputc('\n', stderr);
+  fflush(stderr);
+  return 0;
+}
+#endif
+
 #ifdef LUAJIT_USE_SYSMALLOC
 
 #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
@@ -339,7 +352,16 @@ static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
 LUALIB_API lua_State *luaL_newstate(void)
 {
   lua_State *L = lua_newstate(mem_alloc, NULL);
-  if (L) G(L)->panic = panic;
+  if (L) {
+    G(L)->panic = panic;
+#ifndef LUAJIT_DISABLE_VMEVENT
+    luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+    lua_pushcfunction(L, error_finalizer);
+    lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
+    G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
+    L->top--;
+#endif
+  }
   return L;
 }
 
@@ -353,7 +375,16 @@ LUALIB_API lua_State *luaL_newstate(void)
 #else
   L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
 #endif
-  if (L) G(L)->panic = panic;
+  if (L) {
+    G(L)->panic = panic;
+#ifndef LUAJIT_DISABLE_VMEVENT
+    luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+    lua_pushcfunction(L, error_finalizer);
+    lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
+    G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
+    L->top--;
+#endif
+  }
   return L;
 }
 

+ 1 - 1
libs/LuaJIT/src/lib_base.c

@@ -303,7 +303,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	while (lj_char_isspace((unsigned char)(*ep))) ep++;
 	if (*ep == '\0') {
 	  if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
-	    if (neg) ul = (unsigned long)-(long)ul;
+	    if (neg) ul = ~ul+1u;
 	    setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	  } else {
 	    lua_Number n = (lua_Number)ul;

+ 2 - 1
libs/LuaJIT/src/lib_bit.c

@@ -155,7 +155,8 @@ LJLIB_CF(bit_tohex)		LJLIB_REC(.)
 #endif
   SBuf *sb = lj_buf_tmp_(L);
   SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
 #if LJ_HASFFI
   if (n < 16) b &= ((uint64_t)1 << 4*n)-1;

+ 4 - 1
libs/LuaJIT/src/lib_ffi.c

@@ -745,6 +745,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 #if LJ_ABI_WIN
     "\003win"
 #endif
+#if LJ_ABI_PAUTH
+    "\007pauth"
+#endif
 #if LJ_TARGET_UWP
     "\003uwp"
 #endif
@@ -776,7 +779,7 @@ LJLIB_CF(ffi_metatype)
   if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) ||
 	ctype_isvector(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  tv = lj_tab_setinth(L, t, -(int32_t)id);
+  tv = lj_tab_setinth(L, t, -(int32_t)ctype_typeid(cts, ct));
   if (!tvisnil(tv))
     lj_err_caller(L, LJ_ERR_PROTMT);
   settabV(L, tv, mt);

+ 2 - 1
libs/LuaJIT/src/lib_jit.c

@@ -422,7 +422,8 @@ LJLIB_CF(jit_util_ircalladdr)
 {
   uint32_t idx = (uint32_t)lj_lib_checkint(L, 1);
   if (idx < IRCALL__MAX) {
-    setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func);
+    ASMFunction func = lj_ir_callinfo[idx].func;
+    setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func));
     return 1;
   }
   return 0;

+ 13 - 0
libs/LuaJIT/src/lj_arch.h

@@ -259,6 +259,9 @@
 #define LJ_ARCH_NAME		"arm64"
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #endif
+#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
+#define LJ_ABI_PAUTH		1
+#endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_EHRAREG	30
@@ -466,11 +469,17 @@
 #endif
 #endif
 #elif !LJ_TARGET_PS3
+#if __clang__
+#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5))
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
 #endif
 #endif
 #endif
+#endif
 
 /* Check target-specific constraints. */
 #ifndef _BUILDVM_H
@@ -597,6 +606,10 @@
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
 #define LJ_SOFTFP32		(LJ_SOFTFP && LJ_32)
 
+#ifndef LJ_ABI_PAUTH
+#define LJ_ABI_PAUTH		0
+#endif
+
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #define LJ_LE			0
 #define LJ_BE			1

+ 9 - 2
libs/LuaJIT/src/lj_asm.c

@@ -1888,6 +1888,8 @@ static void asm_head_side(ASMState *as)
   IRRef1 sloadins[RID_MAX];
   RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
   RegSet live = RSET_EMPTY;  /* Live parent registers. */
+  RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
+  Reg pbase;
   IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
   int32_t spadj, spdelta;
   int pass2 = 0;
@@ -1898,7 +1900,11 @@ static void asm_head_side(ASMState *as)
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
     asm_snap_alloc(as, 0);
   }
-  allow = asm_head_side_base(as, irp, allow);
+  pbase = asm_head_side_base(as, irp);
+  if (pbase != RID_NONE) {
+    rset_clear(allow, pbase);
+    rset_clear(pallow, pbase);
+  }
 
   /* Scan all parent SLOADs and collect register dependencies. */
   for (i = as->stopins; i > REF_BASE; i--) {
@@ -1928,6 +1934,7 @@ static void asm_head_side(ASMState *as)
       sloadins[rs] = (IRRef1)i;
       rset_set(live, rs);  /* Block live parent register. */
     }
+    if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs));
   }
 
   /* Calculate stack frame adjustment. */
@@ -2044,7 +2051,7 @@ static void asm_head_side(ASMState *as)
     ExitNo exitno = as->J->exitno;
 #endif
     as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
-    asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
+    asm_stack_check(as, as->topslot, irp, pallow, exitno);
   }
 }
 

+ 8 - 5
libs/LuaJIT/src/lj_asm_arm.h

@@ -313,7 +313,11 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
 }
 
 #if !LJ_SOFTFP
-/* Fuse to multiply-add/sub instruction. */
+/*
+** Fuse to multiply-add/sub instruction.
+** VMLA rounds twice (UMA, not FMA) -- no need to check for JIT_F_OPT_FMA.
+** VFMA needs VFPv4, which is uncommon on the remaining ARM32 targets.
+*/
 static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 {
   IRRef lref = ir->op1, rref = ir->op2;
@@ -2163,7 +2167,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -2171,16 +2175,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 11 - 11
libs/LuaJIT/src/lj_asm_arm64.h

@@ -337,7 +337,8 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
 {
   IRRef lref = ir->op1, rref = ir->op2;
   IRIns *irm;
-  if (lref != rref &&
+  if ((as->flags & JIT_F_OPT_FMA) &&
+      lref != rref &&
       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
        ra_noreg(irm->r)) ||
        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
@@ -420,8 +421,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
   uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
   Reg gpr, fpr = REGARG_FIRSTFPR;
-  if ((void *)ci->func)
-    emit_call(as, (void *)ci->func);
+  if (ci->func)
+    emit_call(as, ci->func);
   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
   gpr = REGARG_FIRSTGPR;
@@ -500,7 +501,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
     ci.func = (ASMFunction)(ir_k64(irf)->u64);
   } else {  /* Need a non-argument register for indirect calls. */
     Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    emit_n(as, A64I_BLR, freg);
+    emit_n(as, A64I_BLR_AUTH, freg);
     ci.func = (ASMFunction)(void *)0;
   }
   asm_gencall(as, &ci, args);
@@ -908,7 +909,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   IRIns *irkey = IR(kslot->op1);
   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
-  int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+  int bigofs = !emit_checkofs(A64I_LDRx, kofs);
   Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg key, idx = node;
@@ -934,7 +935,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
   emit_lso(as, A64I_LDRx, key, idx, kofs);
   if (bigofs)
-    emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+    emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
 }
 
 static void asm_uref(ASMState *as, IRIns *ir)
@@ -1201,7 +1202,7 @@ dotypecheck:
       tmp = ra_scratch(as, allow);
       rset_clear(allow, tmp);
     }
-    if (ra_hasreg(dest) && irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
+    if (ra_hasreg(dest) && tmp != dest)
       emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
     /* Need type check, even if the load result is unused. */
     asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
@@ -1914,7 +1915,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -1922,16 +1923,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 6 - 6
libs/LuaJIT/src/lj_asm_mips.h

@@ -1337,8 +1337,8 @@ static void asm_fload(ASMState *as, IRIns *ir)
       }
     }
     ofs = field_ofs[ir->op2];
+    lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   }
-  lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   emit_tsi(as, mi, dest, idx, ofs);
 }
 
@@ -1894,7 +1894,7 @@ static void asm_arithov(ASMState *as, IRIns *ir)
   lj_assertA(!irt_is64(ir->t), "bad usage");
   if (irref_isk(ir->op2)) {
     int k = IR(ir->op2)->i;
-    if (ir->o == IR_SUBOV) k = -k;
+    if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
       left = ra_alloc1(as, ir->op1, RSET_GPR);
       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -2667,7 +2667,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2676,15 +2676,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 6 - 5
libs/LuaJIT/src/lj_asm_ppc.h

@@ -235,7 +235,8 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 {
   IRRef lref = ir->op1, rref = ir->op2;
   IRIns *irm;
-  if (lref != rref &&
+  if ((as->flags & JIT_F_OPT_FMA) &&
+      lref != rref &&
       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
 	ra_noreg(irm->r)) ||
        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
@@ -2185,7 +2186,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2194,15 +2195,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 4 - 4
libs/LuaJIT/src/lj_asm_x86.h

@@ -2877,7 +2877,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce or reload BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2886,16 +2886,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
       /* Move from coalesced parent reg. */
-      rset_clear(allow, irp->r);
       emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */

+ 2 - 1
libs/LuaJIT/src/lj_bcwrite.c

@@ -189,7 +189,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
       goto save_int;
     } else {
       /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
-      if (!LJ_DUALNUM) {  /* Narrow number constants to integers. */
+      if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
+	/* Narrow number constants to integers. */
 	lua_Number num = numV(o);
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */

+ 1 - 1
libs/LuaJIT/src/lj_carith.c

@@ -207,7 +207,7 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
       else
 	*up = lj_carith_powu64(u0, u1);
       break;
-    case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
+    case MM_unm: *up = ~u0+1u; break;
     default:
       lj_assertL(0, "bad metamethod %d", mm);
       break;

+ 3 - 3
libs/LuaJIT/src/lj_ccallback.c

@@ -171,13 +171,13 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
 static void *callback_mcode_init(global_State *g, uint32_t *page)
 {
   uint32_t *p = page;
-  void *target = (void *)lj_vm_ffi_callback;
+  ASMFunction target = lj_vm_ffi_callback;
   MSize slot;
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
-  *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
+  *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11));
   *p++ = A64I_LE(A64I_NOP);
-  ((void **)p)[0] = target;
+  ((ASMFunction *)p)[0] = target;
   ((void **)p)[1] = g;
   p += 4;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {

+ 1 - 1
libs/LuaJIT/src/lj_cparse.c

@@ -488,7 +488,7 @@ static void cp_expr_prefix(CPState *cp, CPValue *k)
   } else if (cp_opt(cp, '+')) {
     cp_expr_unary(cp, k);  /* Nothing to do (well, integer promotion). */
   } else if (cp_opt(cp, '-')) {
-    cp_expr_unary(cp, k); k->i32 = -k->i32;
+    cp_expr_unary(cp, k); k->i32 = (int32_t)(~(uint32_t)k->i32+1);
   } else if (cp_opt(cp, '~')) {
     cp_expr_unary(cp, k); k->i32 = ~k->i32;
   } else if (cp_opt(cp, '!')) {

+ 6 - 1
libs/LuaJIT/src/lj_crecord.c

@@ -1504,9 +1504,13 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
 	if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
 	goto ok;
       } else if (ctype_isfunc(ct->info)) {
+	CTypeID id0 = i ? ctype_typeid(cts, s[0]) : 0;
 	tr = emitir(IRT(IR_FLOAD, IRT_PTR), tr, IRFL_CDATA_PTR);
 	ct = ctype_get(cts,
 	  lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+	if (i) {
+	  s[0] = ctype_get(cts, id0);  /* cts->tab may have been reallocated. */
+	}
 	goto ok;
       } else {
 	tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata)));
@@ -1875,7 +1879,8 @@ TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
   } else {
     n = id ? 16 : 8;
   }
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
   if (id) {
     tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);

+ 13 - 1
libs/LuaJIT/src/lj_ctype.c

@@ -191,8 +191,20 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size)
   }
   id = cts->top;
   if (LJ_UNLIKELY(id >= cts->sizetab)) {
+#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
+    CType *ct;
+#endif
     if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
+#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
+    ct = lj_mem_newvec(cts->L, id+1, CType);
+    memcpy(ct, cts->tab, id*sizeof(CType));
+    memset(cts->tab, 0, id*sizeof(CType));
+    lj_mem_freevec(cts->g, cts->tab, cts->sizetab, CType);
+    cts->tab = ct;
+    cts->sizetab = id+1;
+#else
     lj_mem_growvec(cts->L, cts->tab, cts->sizetab, CTID_MAX, CType);
+#endif
   }
   cts->top = id+1;
   cts->tab[id].info = info;
@@ -570,7 +582,7 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
   if (isunsigned) {
     *--p = 'U';
   } else if ((int64_t)n < 0) {
-    n = (uint64_t)-(int64_t)n;
+    n = ~n+1u;
     sign = 1;
   }
   do { *--p = (char)('0' + n % 10); } while (n /= 10);

+ 6 - 3
libs/LuaJIT/src/lj_debug.c

@@ -101,9 +101,12 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
   pos = proto_bcpos(pt, ins) - 1;
 #if LJ_HASJIT
   if (pos > pt->sizebc) {  /* Undo the effects of lj_trace_exit for JLOOP. */
-    GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
-    lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected");
-    pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
+    if (bc_isret(bc_op(ins[-1]))) {
+      GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
+      pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
+    } else {
+      pos = NO_BCPOS;  /* Punt in case of stack overflow for stitched trace. */
+    }
   }
 #endif
   return pos;

+ 1 - 1
libs/LuaJIT/src/lj_emit_arm.h

@@ -157,7 +157,7 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
       if (other) {
 	int32_t delta = i - other;
 	uint32_t sh, inv = 0, k2, k;
-	if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; }
+	if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; }
 	sh = lj_ffs(delta) & ~1;
 	k2 = emit_isk12(0, delta & (255 << sh));
 	k = emit_isk12(0, delta & ~(255 << sh));

+ 15 - 8
libs/LuaJIT/src/lj_emit_arm64.h

@@ -27,8 +27,8 @@ static uint64_t get_k64val(ASMState *as, IRRef ref)
 /* Encode constant in K12 format for data processing instructions. */
 static uint32_t emit_isk12(int64_t n)
 {
-  uint64_t k = (n < 0) ? -n : n;
-  uint32_t m = (n < 0) ? 0x40000000 : 0;
+  uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
+  uint32_t m = n < 0 ? 0x40000000 : 0;
   if (k < 0x1000) {
     return A64I_K12|m|A64F_U12(k);
   } else if ((k & 0xfff000) == k) {
@@ -143,7 +143,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
       goto nopair;
     }
     if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
-      *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
+      *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) |
 	(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
       return;
     }
@@ -177,7 +177,7 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
 	emit_dm(as, A64I_MOVx, rd, r);
 	return 1;
       } else {
-	uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
+	uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
 	if (k12) {
 	  emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
 	  return 1;
@@ -348,16 +348,22 @@ static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
 
 #define emit_jmp(as, target)	emit_branch(as, A64I_B, (target))
 
-static void emit_call(ASMState *as, void *target)
+static void emit_call(ASMState *as, ASMFunction target)
 {
   MCode *p = --as->mcp;
-  ptrdiff_t delta = (char *)target - (char *)p;
+#if LJ_ABI_PAUTH
+  char *targetp = ptrauth_auth_data((char *)target,
+				    ptrauth_key_function_pointer, 0);
+#else
+  char *targetp = (char *)target;
+#endif
+  ptrdiff_t delta = targetp - (char *)p;
   if (A64F_S_OK(delta>>2, 26)) {
     *p = A64I_BL | A64F_S26(delta>>2);
   } else {  /* Target out of range: need indirect call. But don't use R0-R7. */
     Reg r = ra_allock(as, i64ptr(target),
 		      RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    *p = A64I_BLR | A64F_N(r);
+    *p = A64I_BLR_AUTH | A64F_N(r);
   }
 }
 
@@ -417,7 +423,8 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
 {
   if (ofs)
     emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
-		 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
+		 ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs,
+		 rset_exclude(RSET_GPR, r));
 }
 
 #define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))

+ 23 - 7
libs/LuaJIT/src/lj_err.c

@@ -329,12 +329,12 @@ static void err_unwind_win_jit(global_State *g, int errcode)
   memset(&hist, 0, sizeof(hist));
   RtlCaptureContext(&ctx);
   while (1) {
-    uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
+    DWORD64 frame, base, addr = ctx.CONTEXT_REG_PC;
     void *hdata;
     PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
     if (!func) {  /* Found frame without .pdata: must be JIT-compiled code. */
       ExitNo exitno;
-      uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+      uintptr_t stub = lj_trace_unwind(G2J(g), (uintptr_t)(addr - sizeof(MCode)), &exitno);
       if (stub) {  /* Jump to side exit to unwind the trace. */
 	ctx.CONTEXT_REG_PC = stub;
 	G2J(g)->exitcode = errcode;
@@ -444,10 +444,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
     if ((actions & _UA_FORCE_UNWIND)) {
       return _URC_CONTINUE_UNWIND;
     } else if (cf) {
+      ASMFunction ip;
       _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode);
-      _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ?
-				     lj_vm_unwind_ff_eh :
-				     lj_vm_unwind_c_eh));
+      ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh;
+      _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip));
       return _URC_INSTALL_CONTEXT;
     }
 #if LJ_TARGET_X86ORX64
@@ -580,9 +580,17 @@ extern void __deregister_frame(const void *);
 
 uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
 {
-  void **handler;
+  ASMFunction handler = (ASMFunction)err_unwind_jit;
   memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
-  handler = (void *)err_unwind_jit;
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+  handler = ptrauth_auth_and_resign(handler,
+    ptrauth_key_function_pointer, 0,
+    ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER);
+#else
+#error "missing pointer authentication support for this architecture"
+#endif
+#endif
   memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
   *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
     (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
@@ -777,6 +785,10 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
 {
   if (L->status == LUA_ERRERR+1)  /* Don't touch the stack during lua_open. */
     lj_vm_unwind_c(L->cframe, LUA_ERRMEM);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
   if (curr_funcisL(L)) L->top = curr_topL(L);
   setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
   lj_err_throw(L, LUA_ERRMEM);
@@ -871,6 +883,10 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
   const char *msg;
   va_list argp;
   va_start(argp, em);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
   if (curr_funcisL(L)) L->top = curr_topL(L);
   msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);

+ 8 - 2
libs/LuaJIT/src/lj_gc.c

@@ -27,6 +27,7 @@
 #include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
+#include "lj_vmevent.h"
 
 #define GCSTEPSIZE	1024u
 #define GCSWEEPMAX	40
@@ -521,8 +522,13 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
   hook_restore(g, oldh);
   if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
   g->gc.threshold = oldt;  /* Restore GC threshold. */
-  if (errcode)
-    lj_err_throw(L, errcode);  /* Propagate errors. */
+  if (errcode) {
+    ptrdiff_t errobj = savestack(L, L->top-1);  /* Stack may be resized. */
+    lj_vmevent_send(L, ERRFIN,
+      copyTV(L, L->top++, restorestack(L, errobj));
+    );
+    L->top--;
+  }
 }
 
 /* Finalize one userdata or cdata object from the mmudata list. */

+ 6 - 1
libs/LuaJIT/src/lj_jit.h

@@ -87,10 +87,11 @@
 #define JIT_F_OPT_ABC		(JIT_F_OPT << 7)
 #define JIT_F_OPT_SINK		(JIT_F_OPT << 8)
 #define JIT_F_OPT_FUSE		(JIT_F_OPT << 9)
+#define JIT_F_OPT_FMA		(JIT_F_OPT << 10)
 
 /* Optimizations names for -O. Must match the order above. */
 #define JIT_F_OPTSTRING	\
-  "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
+  "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse\3fma"
 
 /* Optimization levels set a fixed combination of flags. */
 #define JIT_F_OPT_0	0
@@ -99,6 +100,7 @@
 #define JIT_F_OPT_3	(JIT_F_OPT_2|\
   JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
 #define JIT_F_OPT_DEFAULT	JIT_F_OPT_3
+/* Note: FMA is not set by default. */
 
 /* -- JIT engine parameters ----------------------------------------------- */
 
@@ -271,6 +273,9 @@ typedef struct GCtrace {
   BCIns startins;	/* Original bytecode of starting instruction. */
   MSize szmcode;	/* Size of machine code. */
   MCode *mcode;		/* Start of machine code. */
+#if LJ_ABI_PAUTH
+  ASMFunction mcauth;	/* Start of machine code, with ptr auth applied. */
+#endif
   MSize mcloop;		/* Offset of loop start in machine code. */
   uint16_t nchild;	/* Number of child traces (root trace only). */
   uint16_t spadjust;	/* Stack pointer adjustment (offset in bytes). */

+ 15 - 1
libs/LuaJIT/src/lj_obj.h

@@ -413,7 +413,7 @@ typedef struct GCproto {
 #define PROTO_UV_IMMUTABLE	0x4000	/* Immutable upvalue. */
 
 #define proto_kgc(pt, idx) \
-  check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
+  check_exp((uintptr_t)(intptr_t)(idx) >= ~(uintptr_t)(pt)->sizekgc+1u, \
 	    gcref(mref((pt)->k, GCRef)[(idx)]))
 #define proto_knumtv(pt, idx) \
   check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)])
@@ -1042,4 +1042,18 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
 LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
 LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
 
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+#include <ptrauth.h>
+#define lj_ptr_sign(ptr, ctx) \
+  ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx))
+#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer)
+#else
+#error "No support for pointer authentication for this architecture"
+#endif
+#else
+#define lj_ptr_sign(ptr, ctx) (ptr)
+#define lj_ptr_strip(ptr) (ptr)
+#endif
+
 #endif

+ 3 - 3
libs/LuaJIT/src/lj_opt_fold.c

@@ -267,7 +267,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
   case IR_SUB: k1 -= k2; break;
   case IR_MUL: k1 *= k2; break;
   case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
-  case IR_NEG: k1 = -k1; break;
+  case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
@@ -1366,7 +1366,7 @@ LJFOLDF(simplify_intsub_k)
   if (fright->i == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i);  /* Overflow for -2^31 ok. */
+  fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u));  /* Overflow for -2^31 ok. */
   return RETRYFOLD;
 }
 
@@ -1397,7 +1397,7 @@ LJFOLDF(simplify_intsub_k64)
   if (k == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
+  fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u);
   return RETRYFOLD;
 }
 

+ 8 - 2
libs/LuaJIT/src/lj_opt_mem.c

@@ -186,6 +186,7 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
 	fwd_aa_tab_clear(J, tab, tab)) {
       /* A NEWREF with a number key may end up pointing to the array part.
       ** But it's referenced from HSTORE and not found in the ASTORE chain.
+      ** Or a NEWREF may rehash the table and move unrelated number keys.
       ** For now simply consider this a conflict without forwarding anything.
       */
       if (xr->o == IR_AREF) {
@@ -196,6 +197,11 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
 	    goto cselim;
 	  ref2 = newref->prev;
 	}
+      } else {
+	IRIns *key = IR(xr->op2);
+	if (key->o == IR_KSLOT) key = IR(key->op1);
+	if (irt_isnum(key->t) && J->chain[IR_NEWREF] > tab)
+	  goto cselim;
       }
       /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
       ** But the above search for conflicting stores was limited by xref.
@@ -223,8 +229,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
 	if (key->o == IR_KSLOT) key = IR(key->op1);
 	lj_ir_kvalue(J->L, &keyv, key);
 	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
-	lj_assertJ(itype2irt(tv) == irt_type(fins->t),
-		   "mismatched type in constant table");
+	if (itype2irt(tv) != irt_type(fins->t))
+	  return 0;  /* Type instability in loop-carried dependency. */
 	if (irt_isnum(fins->t))
 	  return lj_ir_knum_u64(J, tv->u64);
 	else if (LJ_DUALNUM && irt_isint(fins->t))

+ 7 - 6
libs/LuaJIT/src/lj_parse.c

@@ -964,22 +964,22 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
 #if LJ_HASFFI
       if (e->k == VKCDATA) {  /* Fold in-place since cdata is not interned. */
 	GCcdata *cd = cdataV(&e->u.nval);
-	int64_t *p = (int64_t *)cdataptr(cd);
+	uint64_t *p = (uint64_t *)cdataptr(cd);
 	if (cd->ctypeid == CTID_COMPLEX_DOUBLE)
-	  p[1] ^= (int64_t)U64x(80000000,00000000);
+	  p[1] ^= U64x(80000000,00000000);
 	else
-	  *p = -*p;
+	  *p = ~*p+1u;
 	return;
       } else
 #endif
       if (expr_isnumk(e) && !expr_numiszero(e)) {  /* Avoid folding to -0. */
 	TValue *o = expr_numtv(e);
 	if (tvisint(o)) {
-	  int32_t k = intV(o);
-	  if (k == -k)
+	  int32_t k = intV(o), negk = (int32_t)(~(uint32_t)k+1u);
+	  if (k == negk)
 	    setnumV(o, -(lua_Number)k);
 	  else
-	    setintV(o, -k);
+	    setintV(o, negk);
 	  return;
 	} else {
 	  o->u64 ^= U64x(80000000,00000000);
@@ -2518,6 +2518,7 @@ static int predict_next(LexState *ls, FuncState *fs, BCPos pc)
   cTValue *o;
   switch (bc_op(ins)) {
   case BC_MOV:
+    if (bc_d(ins) >= fs->nactvar) return 0;
     name = gco2str(gcref(var_get(ls, fs, bc_d(ins)).name));
     break;
   case BC_UGET:

+ 12 - 7
libs/LuaJIT/src/lj_record.c

@@ -116,6 +116,7 @@ static void rec_check_slots(jit_State *J)
       cTValue *tv = &base[s];
       IRRef ref = tref_ref(tr);
       IRIns *ir = NULL;  /* Silence compiler. */
+      lj_assertJ(tv < J->L->top, "slot %d above top of Lua stack", s);
       if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
 	lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
 		   "slot %d ref %04d out of range", s, ref - REF_BIAS);
@@ -1600,6 +1601,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
 	TRef key = ix->key;
 	if (tref_isinteger(key))  /* NEWREF needs a TValue as a key. */
 	  key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
+	else if (tref_isnumber(key) && tref_isk(key) && tvismzero(&ix->keyv))
+	  key = lj_ir_knum_zero(J);  /* Canonicalize -0.0 to +0.0. */
 	xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
 	keybarrier = 0;  /* NEWREF already takes care of the key barrier. */
 #ifdef LUAJIT_ENABLE_TABLE_BUMP
@@ -1939,12 +1942,14 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
   if (J->framedepth > 0) {  /* Simple case: varargs defined on-trace. */
     ptrdiff_t i;
     if (nvararg < 0) nvararg = 0;
-    if (nresults == -1) {
-      nresults = nvararg;
-      J->maxslot = dst + (BCReg)nvararg;
-    } else if (dst + nresults > J->maxslot) {
+    if (nresults != 1) {
+      if (nresults == -1) nresults = nvararg;
       J->maxslot = dst + (BCReg)nresults;
+    } else if (dst >= J->maxslot) {
+      J->maxslot = dst + 1;
     }
+    if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
+      lj_trace_err(J, LJ_TRERR_STACKOV);
     for (i = 0; i < nresults; i++)
       J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
   } else {  /* Unknown number of varargs passed to trace. */
@@ -1972,8 +1977,9 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
       }
       for (i = nvararg; i < nresults; i++)
 	J->base[dst+i] = TREF_NIL;
-      if (dst + (BCReg)nresults > J->maxslot)
+      if (nresults != 1 || dst >= J->maxslot) {
 	J->maxslot = dst + (BCReg)nresults;
+      }
     } else if (select_detect(J)) {  /* y = select(x, ...) */
       TRef tridx = J->base[dst-1];
       TRef tr = TREF_NIL;
@@ -2022,8 +2028,6 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
       lj_trace_err_info(J, LJ_TRERR_NYIBC);
     }
   }
-  if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
-    lj_trace_err(J, LJ_TRERR_STACKOV);
 }
 
 /* -- Record allocations -------------------------------------------------- */
@@ -2475,6 +2479,7 @@ void lj_record_ins(jit_State *J)
 
   case BC_TSETM:
     rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
+    J->maxslot = ra;  /* The table slot at ra-1 is the highest used slot. */
     break;
 
   case BC_TNEW:

+ 1 - 1
libs/LuaJIT/src/lj_state.c

@@ -114,7 +114,7 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
       n = LJ_STACK_MAX;
   }
   resizestack(L, n);
-  if (L->stacksize > LJ_STACK_MAXEX)
+  if (L->stacksize >= LJ_STACK_MAXEX)
     lj_err_msg(L, LJ_ERR_STKOV);
 }
 

+ 2 - 2
libs/LuaJIT/src/lj_strfmt.c

@@ -102,7 +102,7 @@ retlit:
 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
 {
   uint32_t u = (uint32_t)k;
-  if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
+  if (k < 0) { u = ~u+1u; *p++ = '-'; }
   if (u < 10000) {
     if (u < 10) goto dig1;
     if (u < 100) goto dig2;
@@ -287,7 +287,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
   /* Figure out signed prefixes. */
   if (STRFMT_TYPE(sf) == STRFMT_INT) {
     if ((int64_t)k < 0) {
-      k = (uint64_t)-(int64_t)k;
+      k = ~k+1u;
       prefix = 256 + '-';
     } else if ((sf & STRFMT_F_PLUS)) {
       prefix = 256 + '+';

+ 13 - 13
libs/LuaJIT/src/lj_strscan.c

@@ -124,19 +124,19 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
   case STRSCAN_INT:
     if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg &&
 	!(x == 0 && neg)) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 8) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
     if (dig > 16) return STRSCAN_ERROR;
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -168,12 +168,12 @@ static StrScanFmt strscan_oct(const uint8_t *p, TValue *o,
     /* fallthrough */
   case STRSCAN_U32:
     if ((x >> 32)) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~(uint32_t)x+1u) : (int32_t)x;
     break;
   default:
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     break;
   }
   return fmt;
@@ -229,18 +229,18 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
       switch (fmt) {
       case STRSCAN_INT:
 	if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-	  o->i = neg ? -(int32_t)x : (int32_t)x;
+	  o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	  return STRSCAN_INT;  /* Fast path for 32 bit integers. */
 	}
 	if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; }
 	/* fallthrough */
       case STRSCAN_U32:
 	if ((x >> 32) != 0) return STRSCAN_ERROR;
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_U32;
       case STRSCAN_I64:
       case STRSCAN_U64:
-	o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+	o->u64 = neg ? ~x+1u : x;
 	return fmt;
       default:
       plainnumber:  /* Fast path for plain numbers < 2^63. */
@@ -348,18 +348,18 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
   switch (fmt) {
   case STRSCAN_INT:
     if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 32) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -468,7 +468,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
 	if (xx >= STRSCAN_MAXEXP) return STRSCAN_ERROR;
 	p++;
       }
-      ex += negx ? -(int32_t)xx : (int32_t)xx;
+      ex += negx ? (int32_t)(~xx+1u) : (int32_t)xx;
     }
 
     /* Parse suffix. */
@@ -507,7 +507,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
 	o->n = -0.0;
 	return STRSCAN_NUM;
       } else {
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_INT;
       }
     }

+ 6 - 0
libs/LuaJIT/src/lj_target_arm64.h

@@ -260,6 +260,9 @@ typedef enum A64Ins {
   A64I_CBZ = 0x34000000,
   A64I_CBNZ = 0x35000000,
 
+  A64I_BRAAZ = 0xd61f081f,
+  A64I_BLRAAZ = 0xd63f081f,
+
   A64I_NOP = 0xd503201f,
 
   /* FP */
@@ -317,6 +320,9 @@ typedef enum A64Ins {
   A64I_FMOV_DI = 0x1e601000,
 } A64Ins;
 
+#define A64I_BR_AUTH	(LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR)
+#define A64I_BLR_AUTH	(LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR)
+
 typedef enum A64Shift {
   A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
 } A64Shift;

+ 23 - 17
libs/LuaJIT/src/lj_trace.c

@@ -153,6 +153,9 @@ static void trace_save(jit_State *J, GCtrace *T)
   newwhite(J2G(J), T);
   T->gct = ~LJ_TTRACE;
   T->ir = (IRIns *)p - J->cur.nk;  /* The IR has already been copied above. */
+#if LJ_ABI_PAUTH
+  T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T);
+#endif
   p += szins;
   TRACE_APPENDVEC(snap, nsnap, SnapShot)
   TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
@@ -428,6 +431,12 @@ static void trace_start(jit_State *J)
     return;
   }
 
+  /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */
+  if (!J->parent && bc_op(*J->pc) == BC_JLOOP) {  /* Already compiled. */
+    J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
+    return;
+  }
+
   /* Get a new trace number. */
   traceno = trace_findfree(J);
   if (LJ_UNLIKELY(traceno == 0)) {  /* No free trace? */
@@ -864,7 +873,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   ExitDataCP exd;
   int errcode, exitcode = J->exitcode;
   TValue exiterr;
-  const BCIns *pc;
+  const BCIns *pc, *retpc;
   void *cf;
   GCtrace *T;
 
@@ -916,22 +925,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   } else {
     trace_hotside(J, pc);
   }
-  if (bc_op(*pc) == BC_JLOOP) {
-    BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
-    int isret = bc_isret(bc_op(*retpc));
-    if (isret || bc_op(*retpc) == BC_ITERN) {
-      if (J->state == LJ_TRACE_RECORD) {
-	J->patchins = *pc;
-	J->patchpc = (BCIns *)pc;
-	*J->patchpc = *retpc;
-	J->bcskip = 1;
-      } else if (isret) {
-	pc = retpc;
-	setcframe_pc(cf, pc);
-      }
-    }
-  }
-  /* Return MULTRES or 0. */
+  /* Return MULTRES or 0 or -17. */
   ERRNO_RESTORE
   switch (bc_op(*pc)) {
   case BC_CALLM: case BC_CALLMT:
@@ -940,6 +934,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
   case BC_TSETM:
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc));
+  case BC_JLOOP:
+    retpc = &traceref(J, bc_d(*pc))->startins;
+    if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) {
+      /* Dispatch to original ins to ensure forward progress. */
+      if (J->state != LJ_TRACE_RECORD) return -17;
+      /* Unpatch bytecode when recording. */
+      J->patchins = *pc;
+      J->patchpc = (BCIns *)pc;
+      *J->patchpc = *retpc;
+      J->bcskip = 1;
+    }
+    return 0;
   default:
     if (bc_op(*pc) >= BC_FUNCF)
       return (int)((BCReg)(L->top - L->base) + 1);

+ 3 - 3
libs/LuaJIT/src/lj_vm.h

@@ -54,8 +54,8 @@ LJ_ASMF void lj_vm_profhook(void);
 LJ_ASMF void lj_vm_IITERN(void);
 
 /* Trace exit handling. */
-LJ_ASMF void lj_vm_exit_handler(void);
-LJ_ASMF void lj_vm_exit_interp(void);
+LJ_ASMF char lj_vm_exit_handler[];
+LJ_ASMF char lj_vm_exit_interp[];
 
 /* Internal math helper functions. */
 #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
@@ -111,6 +111,6 @@ LJ_ASMF void lj_cont_stitch(void);  /* Trace stitching. */
 LJ_ASMF char lj_vm_asm_begin[];
 
 /* Bytecode offsets are relative to lj_vm_asm_begin. */
-#define makeasmfunc(ofs)	((ASMFunction)(lj_vm_asm_begin + (ofs)))
+#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0)
 
 #endif

+ 4 - 3
libs/LuaJIT/src/lj_vmevent.h

@@ -24,9 +24,10 @@
 /* VM event IDs. */
 typedef enum {
   VMEVENT_DEF(BC,	0x00003883),
-  VMEVENT_DEF(TRACE,	0xb2d91467),
-  VMEVENT_DEF(RECORD,	0x9284bf4f),
-  VMEVENT_DEF(TEXIT,	0xb29df2b0),
+  VMEVENT_DEF(TRACE,	0x12d91467),
+  VMEVENT_DEF(RECORD,	0x1284bf4f),
+  VMEVENT_DEF(TEXIT,	0x129df2b0),
+  VMEVENT_DEF(ERRFIN,	0x12d93888),
   LJ_VMEVENT__MAX
 } VMEvent;
 

+ 15 - 4
libs/LuaJIT/src/lj_vmmath.c

@@ -36,6 +36,17 @@ LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
 
 /* -- Helper functions ---------------------------------------------------- */
 
+/* Required to prevent the C compiler from applying FMA optimizations.
+**
+** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory.
+** But the current state of C compilers is a mess in this regard.
+** Also, this function is not performance sensitive at all.
+*/
+LJ_NOINLINE static double lj_vm_floormul(double x, double y)
+{
+  return lj_vm_floor(x / y) * y;
+}
+
 double lj_vm_foldarith(double x, double y, int op)
 {
   switch (op) {
@@ -43,7 +54,7 @@ double lj_vm_foldarith(double x, double y, int op)
   case IR_SUB - IR_ADD: return x-y; break;
   case IR_MUL - IR_ADD: return x*y; break;
   case IR_DIV - IR_ADD: return x/y; break;
-  case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
+  case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break;
   case IR_POW - IR_ADD: return pow(x, y); break;
   case IR_NEG - IR_ADD: return -x; break;
   case IR_ABS - IR_ADD: return fabs(x); break;
@@ -64,11 +75,11 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
   uint32_t y, ua, ub;
   /* This must be checked before using this function. */
   lj_assertX(b != 0, "modulo with zero divisor");
-  ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
-  ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
+  ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a;
+  ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b;
   y = ua % ub;
   if (y != 0 && (a^b) < 0) y = y - ub;
-  if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
+  if (((int32_t)y^b) < 0) y = ~y+1u;
   return (int32_t)y;
 }
 #endif

+ 1 - 0
libs/LuaJIT/src/nxbuild.bat

@@ -121,6 +121,7 @@ goto :BUILD
 @set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
 :BUILD
 del %TARGETLIB%
+@set LJCOMPILE=%LJCOMPILE% -fPIC
 @if "%1" neq "noamalg" goto :AMALG
 for %%f in (lj_*.c lib_*.c) do (
   %LJCOMPILE% %%f

+ 2 - 2
libs/LuaJIT/src/ps4build.bat

@@ -51,7 +51,7 @@ if exist minilua.exe.manifest^
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
 @if errorlevel 1 goto :BAD
 
-%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -DLUAJIT_NO_UNWIND host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
 @if errorlevel 1 goto :BAD
@@ -78,7 +78,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
 @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
 @set INCLUDE=""
 
-orbis-as -o lj_vm.o lj_vm.s
+"%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-as" -o lj_vm.o lj_vm.s
 
 @if "%1" neq "debug" goto :NODEBUG
 @shift

+ 2 - 2
libs/LuaJIT/src/ps5build.bat

@@ -74,11 +74,11 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
 @if errorlevel 1 goto :BAD
 
 @rem ---- Cross compiler ----
-@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
+@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC %GC64%
 @set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus
 @set INCLUDE=""
 
-%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-as -o lj_vm.o lj_vm.s
+"%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -o lj_vm.o lj_vm.s
 
 @if "%1" neq "debug" goto :NODEBUG
 @shift

+ 15 - 2
libs/LuaJIT/src/vm_arm.dasc

@@ -2196,8 +2196,8 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1, #0
-  |  blt >9				// Check for error from exit.
+  |  cmn CARG1, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |   str RC, SAVE_MULTRES
@@ -2213,6 +2213,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   ldr INS, [PC], #4
   |     lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
   |    st_vmstate CARG4
+  |  cmn CARG1, #17			// Static dispatch?
+  |  beq >5
   |  cmp OP, #BC_FUNCC+2		// Fast function?
   |  bhs >4
   |2:
@@ -2238,6 +2240,17 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:CARG1, [CARG1, RC, lsl #2]
+  |  ldr INS, TRACE:CARG1->startins
+  |  decode_OP OP, INS
+  |   decode_RA8 RA, INS
+  |  add OP, DISPATCH, OP, lsl #2
+  |   decode_RD RC, INS
+  |  ldr pc, [OP, #GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
   |  rsb CARG2, CARG1, #0
   |  mov CARG1, L

+ 62 - 19
libs/LuaJIT/src/vm_arm64.dasc

@@ -77,6 +77,23 @@
 |.define CRET1,		x0
 |.define CRET1w,	w0
 |
+|//-----------------------------------------------------------------------
+|
+|// ARM64e pointer authentication codes (PAC).
+|.if PAUTH
+|.macro sp_auth; pacibsp; .endmacro
+|.macro br_auth, reg; braaz reg; .endmacro
+|.macro blr_auth, reg; blraaz reg; .endmacro
+|.macro ret_auth; retab; .endmacro
+|.else
+|.macro sp_auth; .endmacro
+|.macro br_auth, reg; br reg; .endmacro
+|.macro blr_auth, reg; blr reg; .endmacro
+|.macro ret_auth; ret; .endmacro
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
 |// Stack layout while in interpreter. Must match with lj_frame.h.
 |
 |.define CFRAME_SPACE,	208
@@ -106,6 +123,7 @@
 |.endmacro
 |
 |.macro saveregs
+|  sp_auth
 |  sub sp, sp, # CFRAME_SPACE
 |  stp fp, lr, [sp, # SAVE_FP_LR_]
 |  add fp, sp, # SAVE_FP_LR_
@@ -180,7 +198,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   decode_RD RC, INS
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |// Instruction footer.
@@ -209,7 +227,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   add RA, BASE, RA, lsl #3
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |.macro ins_call
@@ -356,7 +374,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_leave_unw:
   |  restoreregs
-  |  ret
+  |  ret_auth
   |
   |6:
   |  bgt >7				// Less results wanted?
@@ -542,7 +560,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   str RC, SAVE_CFRAME
   |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |    str L, GL->cur_L
-  |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  blr_auth CARG4		// (lua_State *L, lua_CFunction func, void *ud)
   |  mov BASE, CRET1
   |   mov PC, #FRAME_CP
   |  cbnz BASE, <3			// Else continue with the call.
@@ -573,7 +591,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr CARG3, LFUNC:CARG3->pc
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  // BASE = base, RA = resultptr, CARG4 = meta base
-  |    br CARG1
+  |    br_auth CARG1
   |
   |.if FFI
   |1:
@@ -1707,7 +1725,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cmp TMP1, TMP2
   |   mov CARG1, L
   |  bhi >5				// Need to grow stack.
-  |   blr CARG3				// (lua_State *L)
+  |   blr_auth CARG3			// (lua_State *L)
   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   |   ldr BASE, L->base
   |  cmp CRET1w, #0
@@ -1743,6 +1761,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->fff_gcstep:			// Call GC step function.
   |  // BASE = new base, RC = nargs*8
+  |  sp_auth
   |   add CARG2, BASE, NARGS8:RC	// Calculate L->top.
   |  mov RA, lr
   |   stp BASE, CARG2, L->base
@@ -1754,7 +1773,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov lr, RA				// Help return address predictor.
   |  sub NARGS8:RC, CARG2, BASE		// Calculate nargs*8.
   |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  ret
+  |  ret_auth
   |
   |//-----------------------------------------------------------------------
   |//-- Special dispatch targets -------------------------------------------
@@ -1781,7 +1800,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
   |5:  // Re-dispatch to static ins.
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->vm_inshook:			// Dispatch target for instr/line hooks.
   |  ldrb TMP2w, GL->hookmask
@@ -1807,7 +1826,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA RA, INS
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
   |   decode_RD RC, INS
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->cont_hook:				// Continue from hook yield.
   |  ldr CARG1, [CARG4, #-40]
@@ -1857,7 +1876,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  sub NARGS8:RC, TMP1, BASE
   |   ldr INSw, [PC, #-4]
   |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  br CRET1
+  |  br_auth CRET1
   |
   |->cont_stitch:			// Trace stitching.
   |.if JIT
@@ -1986,8 +2005,8 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1w, #0
-  |  blt >9				// Check for error from exit.
+  |  cmn CARG1w, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
@@ -2004,6 +2023,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldrb RBw, [PC, # OFS_OP]
   |   ldr INSw, [PC], #4
   |    st_vmstate CARG4w
+  |  cmn CARG1w, #17			// Static dispatch?
+  |  beq >5
   |  cmp RBw, #BC_FUNCC+2		// Fast function?
   |   add TMP1, GL, INS, uxtb #3
   |  bhs >4
@@ -2014,13 +2035,13 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA RA, INS
   |   lsr TMP0, INS, #16
   |   csel RC, TMP0, RC, lo
-  |   blo >5
+  |   blo >3
   |   ldr CARG3, [BASE, FRAME_FUNC]
   |   sub RC, RC, #8
   |   add RA, BASE, RA, lsl #3	// Yes: RA = BASE+framesize*8, RC = nargs*8
   |   and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |5:
-  |  br RB
+  |3:
+  |  br_auth RB
   |
   |4:  // Check frame below fast function.
   |  ldr CARG1, [BASE, FRAME_PC]
@@ -2036,6 +2057,17 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr RA, [GL, #GL_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:RA, [RA, RC, lsl #3]
+  |  ldr INSw, TRACE:RA->startins
+  |  add TMP0, GL, INS, uxtb #3
+  |   decode_RA RA, INS
+  |  ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC]
+  |   decode_RD RC, INS
+  |  br_auth RB
+  |
   |9:  // Rethrow error from the right C frame.
   |  neg CARG2w, CARG1w
   |  mov CARG1, L
@@ -2182,6 +2214,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
+  |  sp_auth
   |  stp x20, CCSTATE, [sp, #-32]!
   |  stp fp, lr, [sp, #16]
   |  add fp, sp, #16
@@ -2208,14 +2241,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldp x6, x7, CCSTATE->gpr[6]
   |   ldp d6, d7, CCSTATE->fpr[6]
   |  ldr x8, CCSTATE->retp
-  |  blr TMP3
+  |  blr_auth TMP3
   |  sub sp, fp, #16
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
   |  ldp fp, lr, [sp, #16]
   |  ldp x20, CCSTATE, [sp], #32
-  |  ret
+  |  ret_auth
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
   |
@@ -2636,7 +2669,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.macro ins_arithmod, res, reg1, reg2
     |  fdiv d2, reg1, reg2
     |  frintm d2, d2
-    |  fmsub res, d2, reg2, reg1
+    |  // Cannot use fmsub, because FMA is not enabled by default.
+    |  fmul d2, d2, reg2
+    |  fsub res, reg1, d2
     |.endmacro
     |
     |.macro ins_arithdn, intins, fpins
@@ -3784,12 +3819,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   mov CARG2w, #0  // Traces on ARM64 don't store the trace #, so use 0.
     |  ldr TRACE:RC, [CARG1, RC, lsl #3]
     |   st_vmstate CARG2w
+    |.if PAUTH
+    |  ldr RA, TRACE:RC->mcauth
+    |.else
     |  ldr RA, TRACE:RC->mcode
+    |.endif
     |   str BASE, GL->jit_base
     |   str L, GL->tmpbuf.L
     |  sub sp, sp, #16	// See SPS_FIXED. Avoids sp adjust in every root trace.
+    |.if PAUTH
+    |  braa RA, RC
+    |.else
     |  br RA
     |.endif
+    |.endif
     break;
 
   case BC_JMP:
@@ -3899,7 +3942,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
-    |  blr CARG4			// (lua_State *L [, lua_CFunction f])
+    |  blr_auth CARG4			// (lua_State *L [, lua_CFunction f])
     |  // Returns nresults.
     |  ldp BASE, TMP1, L->base
     |    str L, GL->cur_L

+ 23 - 4
libs/LuaJIT/src/vm_mips.dasc

@@ -2466,7 +2466,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   addiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sw BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  lw LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0			// TOBIT = 2^52 + 2^51 (float).
   |  sll MULTRES, CRET1, 3
@@ -2480,14 +2481,16 @@ static void build_subroutines(BuildCtx *ctx)
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   addiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  addiu PC, PC, 4
   |  decode_OP4a TMP1, INS
   |  decode_OP4b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  addu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  lw AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2515,6 +2518,22 @@ static void build_subroutines(BuildCtx *ctx)
   |  jr AT
   |.  addu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lw TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4b RD
+  |  addu TMP0, TMP0, RD
+  |  lw TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP4a TMP1, INS
+  |  decode_OP4b TMP1
+  |  addu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  lw AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
   |  load_got lj_err_trace
   |  sub CARG2, r0, CRET1

+ 23 - 4
libs/LuaJIT/src/vm_mips64.dasc

@@ -2571,7 +2571,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   daddiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sd BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  ld LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   |  dsll MULTRES, CRET1, 3
@@ -2586,14 +2587,16 @@ static void build_subroutines(BuildCtx *ctx)
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   daddiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  daddiu PC, PC, 4
   |  decode_OP8a TMP1, INS
   |  decode_OP8b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  daddu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  ld AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2622,6 +2625,22 @@ static void build_subroutines(BuildCtx *ctx)
   |  jr AT
   |.  daddu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ld TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD8b RD
+  |  daddu TMP0, TMP0, RD
+  |  ld TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP8a TMP1, INS
+  |  decode_OP8b TMP1
+  |  daddu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
   |  load_got lj_err_trace
   |  sub CARG2, r0, CRET1

+ 20 - 2
libs/LuaJIT/src/vm_ppc.dasc

@@ -3015,8 +3015,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  addi DISPATCH, JGL, -GG_DISP2G-32768
   |  stp BASE, L->base
   |1:
-  |  cmpwi CARG1, 0
-  |  blt >9				// Check for error from exit.
+  |  li TMP2, -LUA_ERRERR
+  |  cmplw CARG1, TMP2
+  |  bge >9				// Check for error from exit.
   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   |   slwi MULTRES, CARG1, 3
   |    li TMP2, 0
@@ -3041,6 +3042,8 @@ static void build_subroutines(BuildCtx *ctx)
   |   addi PC, PC, 4
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1.
   |    stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |  cmpwi CARG1, -17			// Static dispatch?
+  |  beq >5
   |  decode_OPP TMP1, INS
   |   decode_RA8 RA, INS
   |  lpx TMP0, DISPATCH, TMP1
@@ -3070,6 +3073,21 @@ static void build_subroutines(BuildCtx *ctx)
   |   add RA, RA, BASE
   |  bctr
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4 RD, INS
+  |  lwzx TRACE:TMP1, TMP1, RD
+  |  lwz INS, TRACE:TMP1->startins
+  |  decode_OPP TMP1, INS
+  |  addi TMP1, TMP1, GG_DISP2STATIC
+  |  lpx TMP0, DISPATCH, TMP1
+  |  mtctr TMP0
+  |   decode_RB8 RB, INS
+  |   decode_RD8 RD, INS
+  |   decode_RA8 RA, INS
+  |   decode_RC8 RC, INS
+  |  bctr
+  |
   |9:  // Rethrow error from the right C frame.
   |  neg CARG2, CARG1
   |  mr CARG1, L

+ 12 - 1
libs/LuaJIT/src/vm_x64.dasc

@@ -2453,7 +2453,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov r12, [RA]
   |  mov rsp, RA			// Reposition stack to C frame.
   |.endif
-  |  test RDd, RDd; js >9		// Check for error from exit.
+  |  cmp RDd, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RDd
   |  mov LFUNC:KBASE, [BASE-16]
@@ -2469,6 +2469,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RCd, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2491,6 +2493,15 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*8]
+  |  mov RCd, TRACE:RA->startins
+  |  movzx RAd, RCH
+  |  movzx OP, RCL
+  |  shr RCd, 16
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
   |  mov CARG2d, RDd
   |  mov CARG1, L:RB

+ 16 - 1
libs/LuaJIT/src/vm_x86.dasc

@@ -2902,7 +2902,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov r13, TMPa
   |  mov r12, TMPQ
   |.endif
-  |  test RD, RD; js >9			// Check for error from exit.
+  |  cmp RD, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RD
   |  mov LFUNC:KBASE, [BASE-8]
@@ -2917,6 +2917,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RC, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2942,6 +2944,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*4]
+  |  mov RC, TRACE:RA->startins
+  |  movzx RA, RCH
+  |  movzx OP, RCL
+  |  shr RC, 16
+  |.if X64
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |.else
+  |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
+  |.endif
+  |
   |9:  // Rethrow error from the right C frame.
   |  mov FCARG2, RD
   |  mov FCARG1, L:RB