Explorar el Código

Handle on-trace OOM errors from helper functions.

Mike Pall hace 4 años
padre
commit
a32aeadc68
Se han modificado 24 ficheros con 421 adiciones y 51 borrados
  1. 0 6
      doc/status.html
  2. 12 0
      src/lj_arch.h
  3. 54 17
      src/lj_asm.c
  4. 2 2
      src/lj_dispatch.h
  5. 236 11
      src/lj_err.c
  6. 18 1
      src/lj_err.h
  7. 2 0
      src/lj_ffrecord.c
  8. 2 0
      src/lj_jit.h
  9. 4 1
      src/lj_mcode.c
  10. 1 0
      src/lj_opt_loop.c
  11. 2 1
      src/lj_record.c
  12. 1 0
      src/lj_snap.c
  13. 1 0
      src/lj_state.c
  14. 2 0
      src/lj_target_x86.h
  15. 52 3
      src/lj_trace.c
  16. 3 0
      src/lj_trace.h
  17. 3 0
      src/lj_vm.h
  18. 2 1
      src/vm_arm.dasc
  19. 2 1
      src/vm_arm64.dasc
  20. 7 2
      src/vm_mips.dasc
  21. 7 2
      src/vm_mips64.dasc
  22. 2 1
      src/vm_ppc.dasc
  23. 3 1
      src/vm_x64.dasc
  24. 3 1
      src/vm_x86.dasc

+ 0 - 6
doc/status.html

@@ -90,12 +90,6 @@ The Lua <b>debug API</b> is missing a couple of features (return
 hooks for non-Lua functions) and shows slightly different behavior
 in LuaJIT (no per-coroutine hooks, no tail call counting).
 </li>
-<li>
-Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
-handled correctly. The error may fall through an on-trace
-<tt>pcall</tt> or it may be passed on to the function set with
-<tt>lua_atpanic</tt> on x64.
-</li>
 </ul>
 <br class="flush">
 </div>

+ 12 - 0
src/lj_arch.h

@@ -173,6 +173,7 @@
 #define LJ_TARGET_X86		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	8
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
@@ -186,6 +187,7 @@
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	16
 #define LJ_TARGET_JUMPRANGE	31	/* +-2^31 = +-2GB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -211,6 +213,7 @@
 #define LJ_ABI_EABI		1
 #define LJ_TARGET_ARM		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	14
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
@@ -241,6 +244,7 @@
 #endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	30
 #define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -296,6 +300,7 @@
 
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
+#define LJ_TARGET_EHRAREG	65
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
@@ -398,6 +403,7 @@
 #endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4
+#define LJ_TARGET_EHRAREG	31
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -631,6 +637,12 @@ extern void *LJ_WIN_LOADLIBA(const char *path);
 #define LJ_UNWIND_EXT		0
 #endif
 
+#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
+#define LJ_UNWIND_JIT		1
+#else
+#define LJ_UNWIND_JIT		0
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1

+ 54 - 17
src/lj_asm.c

@@ -71,6 +71,7 @@ typedef struct ASMState {
   IRRef snaprename;	/* Rename highwater mark for snapshot check. */
   SnapNo snapno;	/* Current snapshot number. */
   SnapNo loopsnapno;	/* Loop snapshot number. */
+  int snapalloc;	/* Current snapshot needs allocation. */
 
   IRRef fuseref;	/* Fusion limit (loopref, 0 or FUSE_DISABLED). */
   IRRef sectref;	/* Section base reference (loopref or 0). */
@@ -84,6 +85,7 @@ typedef struct ASMState {
 
   MCode *mcbot;		/* Bottom of reserved MCode. */
   MCode *mctop;		/* Top of generated MCode. */
+  MCode *mctoporig;	/* Original top of generated MCode. */
   MCode *mcloop;	/* Pointer to loop MCode (or NULL). */
   MCode *invmcp;	/* Points to invertible loop branch (or NULL). */
   MCode *flagmcp;	/* Pending opportunity to merge flag setting ins. */
@@ -947,9 +949,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
 }
 
 /* Allocate refs escaping to a snapshot. */
-static void asm_snap_alloc(ASMState *as)
+static void asm_snap_alloc(ASMState *as, int snapno)
 {
-  SnapShot *snap = &as->T->snap[as->snapno];
+  SnapShot *snap = &as->T->snap[snapno];
   SnapEntry *map = &as->T->snapmap[snap->mapofs];
   MSize n, nent = snap->nent;
   for (n = 0; n < nent; n++) {
@@ -960,7 +962,7 @@ static void asm_snap_alloc(ASMState *as)
       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
 	lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
 		   "snap %d[%d] points to bad SOFTFP IR %04d",
-		   as->snapno, n, ref - REF_BIAS);
+		   snapno, n, ref - REF_BIAS);
 	asm_snap_alloc1(as, ref+1);
       }
     }
@@ -992,19 +994,16 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
   return 0;  /* Not found. */
 }
 
-/* Prepare snapshot for next guard instruction. */
+/* Prepare snapshot for next guard or throwing instruction. */
 static void asm_snap_prep(ASMState *as)
 {
-  if (as->curins < as->snapref) {
-    do {
-      if (as->snapno == 0) return;  /* Called by sunk stores before snap #0. */
-      as->snapno--;
-      as->snapref = as->T->snap[as->snapno].ref;
-    } while (as->curins < as->snapref);
-    asm_snap_alloc(as);
+  if (as->snapalloc) {
+    /* Alloc on first invocation for each snapshot. */
+    as->snapalloc = 0;
+    asm_snap_alloc(as, as->snapno);
     as->snaprename = as->T->nins;
   } else {
-    /* Process any renames above the highwater mark. */
+    /* Check any renames above the highwater mark. */
     for (; as->snaprename < as->T->nins; as->snaprename++) {
       IRIns *ir = &as->T->ir[as->snaprename];
       if (asm_snap_checkrename(as, ir->op1))
@@ -1013,6 +1012,35 @@ static void asm_snap_prep(ASMState *as)
   }
 }
 
+/* Move to previous snapshot when we cross the current snapshot ref. */
+static void asm_snap_prev(ASMState *as)
+{
+  if (as->curins < as->snapref) {
+    ptrdiff_t ofs = as->mctoporig - as->mcp;
+    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
+    do {
+      if (as->snapno == 0) return;
+      as->snapno--;
+      as->snapref = as->T->snap[as->snapno].ref;
+      as->T->snap[as->snapno].mcofs = ofs;  /* Remember mcode offset. */
+    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
+    as->snapalloc = 1;
+  }
+}
+
+/* Fixup snapshot mcode offsetst. */
+static void asm_snap_fixup_mcofs(ASMState *as)
+{
+  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
+  SnapShot *snap = as->T->snap;
+  SnapNo i;
+  for (i = as->T->nsnap-1; i > 0; i--) {
+    /* Compute offset from mcode start and store in correct snapshot. */
+    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
+  }
+  snap[0].mcofs = 0;
+}
+
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
 /* Calculate stack adjustment. */
@@ -1057,6 +1085,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
   IRRef args[3];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const char *str */
   args[2] = ir->op2;   /* size_t len      */
@@ -1069,6 +1098,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L    */
   args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
   as->gcsteps++;
@@ -1081,6 +1111,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const GCtab *kt */
   as->gcsteps++;
@@ -1201,6 +1232,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci;
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;
   as->gcsteps++;
   if (ir->op2 == IRTOSTR_NUM) {
@@ -1257,6 +1289,7 @@ static void asm_newref(ASMState *as, IRIns *ir)
   IRRef args[3];
   if (ir->r == RID_SINK)
     return;
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L */
   args[1] = ir->op1;      /* GCtab *t     */
   args[2] = ASMREF_TMP1;  /* cTValue *key */
@@ -1838,8 +1871,7 @@ static void asm_head_side(ASMState *as)
 
   if (as->snapno && as->topslot > as->parent->topslot) {
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
-    as->snapno = 0;
-    asm_snap_alloc(as);
+    asm_snap_alloc(as, 0);
   }
   allow = asm_head_side_base(as, irp, allow);
 
@@ -2100,6 +2132,7 @@ static void asm_setup_regsp(ASMState *as)
   as->snaprename = nins;
   as->snapref = nins;
   as->snapno = T->nsnap;
+  as->snapalloc = 0;
 
   as->stopins = REF_BASE;
   as->orignins = nins;
@@ -2327,7 +2360,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
 {
   ASMState as_;
   ASMState *as = &as_;
-  MCode *origtop;
 
   /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
   {
@@ -2355,7 +2387,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   as->parent = J->parent ? traceref(J, J->parent) : NULL;
 
   /* Reserve MCode memory. */
-  as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
+  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
   as->mcp = as->mctop;
   as->mclim = as->mcbot + MCLIM_REDZONE;
   asm_setup_target(as);
@@ -2417,6 +2449,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
       lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
 		 "IR %04d has unsplit 64 bit type",
 		 (int)(ir - as->ir) - REF_BIAS);
+      asm_snap_prev(as);
       if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
 	continue;  /* Dead-code elimination can be soooo easy. */
       if (irt_isguard(ir->t))
@@ -2450,6 +2483,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
       memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
 	     (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
       T->nins = J->curfinal->nins;
+      /* Fill mcofs of any unprocessed snapshots. */
+      as->curins = REF_FIRST;
+      asm_snap_prev(as);
       break;  /* Done. */
     }
 
@@ -2471,10 +2507,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   if (!as->loopref)
     asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
   T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
+  asm_snap_fixup_mcofs(as);
 #if LJ_TARGET_MCODE_FIXUP
   asm_mcode_fixup(T->mcode, T->szmcode);
 #endif
-  lj_mcode_sync(T->mcode, origtop);
+  lj_mcode_sync(T->mcode, as->mctoporig);
 }
 
 #undef IR

+ 2 - 2
src/lj_dispatch.h

@@ -31,7 +31,7 @@ extern double __divdf3(double a, double b);
 #define SFGOTDEF(_)
 #endif
 #if LJ_HASJIT
-#define JITGOTDEF(_)	_(lj_trace_exit) _(lj_trace_hot)
+#define JITGOTDEF(_)	_(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
 #else
 #define JITGOTDEF(_)
 #endif
@@ -46,7 +46,7 @@ extern double __divdf3(double a, double b);
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
   _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
-  _(lj_dispatch_profile) _(lj_err_throw) _(lj_err_run) \
+  _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \

+ 236 - 11
src/lj_err.c

@@ -52,6 +52,11 @@
 **   the wrapper function feature. Lua errors thrown through C++ frames
 **   cannot be caught by C++ code and C++ destructors are not run.
 **
+** - EXT can handle errors from internal helper functions that are called
+**   from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
+**   INT has no choice but to call the panic handler, if this happens.
+**   Note: this is mainly relevant for out-of-memory errors.
+**
 ** EXT is the default on all systems where the toolchain produces unwind
 ** tables by default (*). This is hard-coded and/or detected in src/Makefile.
 ** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
@@ -305,12 +310,59 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
   return 1;  /* ExceptionContinueSearch */
 }
 
+#if LJ_UNWIND_JIT
+
+#if LJ_TARGET_X64
+#define CONTEXT_REG_PC	Rip
+#elif LJ_TARGET_ARM64
+#define CONTEXT_REG_PC	Pc
+#else
+#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
+#endif
+
+/* Windows unwinder for JIT-compiled code. */
+static void err_unwind_win_jit(global_State *g, int errcode)
+{
+  CONTEXT ctx;
+  UNWIND_HISTORY_TABLE hist;
+
+  memset(&hist, 0, sizeof(hist));
+  RtlCaptureContext(&ctx);
+  while (1) {
+    uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
+    void *hdata;
+    PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
+    if (!func) {  /* Found frame without .pdata: must be JIT-compiled code. */
+      ExitNo exitno;
+      uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+      if (stub) {  /* Jump to side exit to unwind the trace. */
+	ctx.CONTEXT_REG_PC = stub;
+	G2J(g)->exitcode = errcode;
+	RtlRestoreContext(&ctx, NULL);  /* Does not return. */
+      }
+      break;
+    }
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
+		     &ctx, &hdata, &frame, NULL);
+    if (!addr) break;
+  }
+  /* Unwinding failed, if we end up here. */
+}
+#endif
+
 /* Raise Windows exception. */
 static void err_raise_ext(global_State *g, int errcode)
 {
-#if LJ_HASJIT
+#if LJ_UNWIND_JIT
+  if (tvref(g->jit_base)) {
+    err_unwind_win_jit(g, errcode);
+    return;  /* Unwinding failed. */
+  }
+#elif LJ_HASJIT
+  /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
   setmref(g->jit_base, NULL);
 #endif
+  UNUSED(g);
   RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
 }
 
@@ -324,6 +376,7 @@ static void err_raise_ext(global_State *g, int errcode)
 typedef struct _Unwind_Context _Unwind_Context;
 
 #define _URC_OK			0
+#define _URC_FATAL_PHASE2_ERROR	2
 #define _URC_FATAL_PHASE1_ERROR	3
 #define _URC_HANDLER_FOUND	6
 #define _URC_INSTALL_CONTEXT	7
@@ -343,9 +396,11 @@ typedef struct _Unwind_Exception
   void (*excleanup)(int, struct _Unwind_Exception *);
   uintptr_t p1, p2;
 } __attribute__((__aligned__)) _Unwind_Exception;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Exception
 
 extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
 extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
+extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
 extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
 extern void _Unwind_DeleteException(_Unwind_Exception *);
 extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -418,8 +473,130 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Exception static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+struct dwarf_eh_bases { void *tbase, *dbase, *func; };
+extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
+
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+  struct dwarf_eh_bases ehb;
+  lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
+  lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
+}
+#endif
+
+#if LJ_UNWIND_JIT
+/* DWARF2 personality handler for JIT-compiled code. */
+static int err_unwind_jit(int version, int actions,
+  uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
+{
+  /* NYI: FFI C++ exception interoperability. */
+  if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
+    return _URC_FATAL_PHASE1_ERROR;
+  if ((actions & _UA_SEARCH_PHASE)) {
+    return _URC_HANDLER_FOUND;
+  }
+  if ((actions & _UA_CLEANUP_PHASE)) {
+    global_State *g = *(global_State **)(uex+1);
+    ExitNo exitno;
+    uintptr_t addr = _Unwind_GetIP(ctx);  /* Return address _after_ call. */
+    uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+    lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
+    if (stub) {  /* Jump to side exit to unwind the trace. */
+      G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
+#ifdef LJ_TARGET_MIPS
+      _Unwind_SetGR(ctx, 4, stub);
+      _Unwind_SetGR(ctx, 5, exitno);
+      _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
+#else
+      _Unwind_SetIP(ctx, stub);
+#endif
+      return _URC_INSTALL_CONTEXT;
+    }
+    return _URC_FATAL_PHASE2_ERROR;
+  }
+  return _URC_FATAL_PHASE1_ERROR;
+}
+
+/* DWARF2 template frame info for JIT-compiled code.
+**
+** After copying the template to the start of the mcode segment,
+** the frame handler function and the code size is patched.
+** The frame handler always installs a new context to jump to the exit,
+** so don't bother to add any unwind opcodes.
+*/
+static const uint8_t err_frame_jit_template[] = {
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x1c : 0x14,  /* CIE length. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 1, 'z','P','R',0,  /* CIE mark, CIE version, augmentation. */
+  1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG,  /* Code/data align, RA. */
+#if LJ_64
+  10, 0, 0,0,0,0,0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,0,0,0,0,  /* Alignment. */
+#else
+  6, 0, 0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,  /* Alignment. */
+#endif
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x14 : 0x10,  /* FDE length. */
+  0,0,0,
+  LJ_64 ? 0x24 : 0x1c,  /* CIE offset. */
+  0,0,0,
+  LJ_64 ? 0x14 : 0x10,  /* Code offset. After Final FDE. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
+#if LJ_64
+  0,0,0,0,  /* Alignment. */
+#endif
+  0,0,0,0  /* Final FDE. */
+};
+
+#define ERR_FRAME_JIT_OFS_HANDLER	0x12
+#define ERR_FRAME_JIT_OFS_FDE		(LJ_64 ? 0x20 : 0x18)
+#define ERR_FRAME_JIT_OFS_CODE_SIZE	(LJ_64 ? 0x2c : 0x24)
+#if LJ_TARGET_OSX
+#define ERR_FRAME_JIT_OFS_REGISTER	ERR_FRAME_JIT_OFS_FDE
+#else
+#define ERR_FRAME_JIT_OFS_REGISTER	0
+#endif
+
+extern void __register_frame(const void *);
+extern void __deregister_frame(const void *);
+
+uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
+{
+  void **handler;
+  memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
+  handler = (void *)err_unwind_jit;
+  memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
+  *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
+    (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
+  __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+#ifdef LUA_USE_ASSERT
+  {
+    struct dwarf_eh_bases ehb;
+    lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
+	       "bad JIT unwind table registration");
+  }
+#endif
+  return info + sizeof(err_frame_jit_template);
+}
+
+void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
+{
+  UNUSED(base); UNUSED(sz);
+  __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+}
 #endif
 
 #else /* LJ_TARGET_ARM */
@@ -430,6 +607,7 @@ static __thread _Unwind_Exception static_uex;
 #define _US_FORCE_UNWIND		8
 
 typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Control_Block
 
 struct _Unwind_Control_Block {
   uint64_t exclass;
@@ -488,25 +666,62 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
   }
   if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
     return _URC_FAILURE;
+#ifdef LUA_USE_ASSERT
+  /* We should never get here unless this is a forced unwind aka backtrace. */
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
+    _Unwind_SetGR(ctx, 0, 0xff33aa88);
+  }
+#endif
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Control_Block static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
+extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+static int err_verify_bt(_Unwind_Context *ctx, int *got)
+{
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
+  else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
+  return _URC_OK;
+}
+
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+  int got = 0;
+  _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
+  lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
+}
 #endif
+
+/*
+** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
+**
+** The quirky ARM unwind API doesn't have __register_frame().
+** A potential workaround might involve _Unwind_Backtrace.
+** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
+** since they are built without unwind tables by default.
+*/
+
 #endif /* LJ_TARGET_ARM */
 
+
 #if LJ_UNWIND_EXT
+static __thread struct {
+  UNWIND_EXCEPTION_TYPE ex;
+  global_State *g;
+} static_uex;
+
 /* Raise external exception. */
 static void err_raise_ext(global_State *g, int errcode)
 {
-#if LJ_HASJIT
-  setmref(g->jit_base, NULL);
-#endif
   memset(&static_uex, 0, sizeof(static_uex));
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  _Unwind_RaiseException(&static_uex);
+  static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
+  static_uex.g = g;
+  _Unwind_RaiseException(&static_uex.ex);
 }
+
 #endif
 
 #endif
@@ -615,7 +830,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
 /* Runtime error. */
 LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
 {
-  ptrdiff_t ef = finderrfunc(L);
+  ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
   if (ef) {
     TValue *errfunc = restorestack(L, ef);
     TValue *top = L->top;
@@ -634,6 +849,16 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
   lj_err_throw(L, LUA_ERRRUN);
 }
 
+#if LJ_HASJIT
+LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
+{
+  if (errcode == LUA_ERRRUN)
+    lj_err_run(L);
+  else
+    lj_err_throw(L, errcode);
+}
+#endif
+
 /* Formatted runtime error message. */
 LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
 {

+ 18 - 1
src/lj_err.h

@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
 LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
 LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
 LJ_FUNC_NORET void lj_err_mem(lua_State *L);
-LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+#if LJ_HASJIT
+LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
+#endif
 LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
 LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
 			      BCLine line, ErrMsg em, va_list argp);
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
 LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
 LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
 
+#if LJ_UNWIND_JIT && !LJ_ABI_WIN
+LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
+LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
+#else
+#define lj_err_register_mcode(base, sz, info)	(info)
+#define lj_err_deregister_mcode(base, sz, info)	UNUSED(base)
+#endif
+
+#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
+LJ_FUNC void lj_err_verify(void);
+#else
+#define lj_err_verify()		((void)0)
+#endif
+
 #endif

+ 2 - 0
src/lj_ffrecord.c

@@ -455,6 +455,7 @@ static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
 #endif
     lj_record_call(J, 0, J->maxslot - 1);
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 
@@ -490,6 +491,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
     if (errcode)
       lj_err_throw(J->L, errcode);  /* Propagate errors. */
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 

+ 2 - 0
src/lj_jit.h

@@ -184,6 +184,7 @@ typedef struct MCLink {
 typedef struct SnapShot {
   uint32_t mapofs;	/* Offset into snapshot map. */
   IRRef1 ref;		/* First IR ref for this snapshot. */
+  uint16_t mcofs;	/* Offset into machine code in MCode units. */
   uint8_t nslots;	/* Number of valid slots. */
   uint8_t topslot;	/* Maximum frame extent. */
   uint8_t nent;		/* Number of compressed entries. */
@@ -485,6 +486,7 @@ typedef struct jit_State {
   const BCIns *startpc;	/* Bytecode PC of starting instruction. */
   TraceNo parent;	/* Parent of current side trace (0 for root traces). */
   ExitNo exitno;	/* Exit number in parent of current side trace. */
+  int exitcode;		/* Exit code from unwound trace. */
 
   BCIns *patchpc;	/* PC for pending re-patch. */
   BCIns patchins;	/* Instruction for pending re-patch. */

+ 4 - 1
src/lj_mcode.c

@@ -269,6 +269,7 @@ static void mcode_allocarea(jit_State *J)
   ((MCLink *)J->mcarea)->next = oldarea;
   ((MCLink *)J->mcarea)->size = sz;
   J->szallmcarea += sz;
+  J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
 }
 
 /* Free all MCode areas. */
@@ -279,7 +280,9 @@ void lj_mcode_free(jit_State *J)
   J->szallmcarea = 0;
   while (mc) {
     MCode *next = ((MCLink *)mc)->next;
-    mcode_free(J, mc, ((MCLink *)mc)->size);
+    size_t sz = ((MCLink *)mc)->size;
+    lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
+    mcode_free(J, mc, sz);
     mc = next;
   }
 }

+ 1 - 0
src/lj_opt_loop.c

@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
   /* Setup new snapshot. */
   snap->mapofs = (uint32_t)nmapofs;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = nslots;
   snap->topslot = osnap->topslot;
   snap->count = 0;

+ 2 - 1
src/lj_record.c

@@ -832,6 +832,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
     J->base -= cbase;
     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
     frame = frame_prevd(frame);
+    J->needsnap = 1;  /* Stop catching on-trace errors. */
   }
   /* Return to lower frame via interpreter for unhandled cases. */
   if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
@@ -2050,7 +2051,7 @@ void lj_record_ins(jit_State *J)
   /* Need snapshot before recording next bytecode (e.g. after a store). */
   if (J->needsnap) {
     J->needsnap = 0;
-    lj_snap_purge(J);
+    if (J->pt) lj_snap_purge(J);
     lj_snap_add(J);
     J->mergesnap = 1;
   }

+ 1 - 0
src/lj_snap.c

@@ -171,6 +171,7 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
   nent += snapshot_framelinks(J, p + nent, &snap->topslot);
   snap->mapofs = (uint32_t)nsnapmap;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = (uint8_t)nslots;
   snap->count = 0;
   J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);

+ 1 - 0
src/lj_state.c

@@ -156,6 +156,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
   fixstring(lj_err_str(L, LJ_ERR_ERRMEM));  /* Preallocate memory error msg. */
   g->gc.threshold = 4*g->gc.total;
   lj_trace_initstate(g);
+  lj_err_verify();
   return NULL;
 }
 

+ 2 - 0
src/lj_target_x86.h

@@ -165,6 +165,8 @@ typedef struct {
 #define EXITSTUB_SPACING	(2+2)
 #define EXITSTUBS_PER_GROUP	32
 
+#define EXITTRACE_VMSTATE	1	/* g->vmstate has traceno on exit. */
+
 /* -- x86 ModRM operand encoding ------------------------------------------ */
 
 typedef enum {

+ 52 - 3
src/lj_trace.c

@@ -821,7 +821,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex)
 }
 #endif
 
-#ifdef EXITSTATE_PCREG
+#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
 /* Determine trace number from pc of exit instruction. */
 static TraceNo trace_exit_find(jit_State *J, MCode *pc)
 {
@@ -843,10 +843,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   lua_State *L = J->L;
   ExitState *ex = (ExitState *)exptr;
   ExitDataCP exd;
-  int errcode;
+  int errcode, exitcode = J->exitcode;
+  TValue exiterr;
   const BCIns *pc;
   void *cf;
   GCtrace *T;
+
+  setnilV(&exiterr);
+  if (exitcode) {  /* Trace unwound with error code. */
+    J->exitcode = 0;
+    copyTV(L, &exiterr, L->top-1);
+  }
+
 #ifdef EXITSTATE_PCREG
   J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
 #endif
@@ -866,6 +874,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   if (errcode)
     return -errcode;  /* Return negated error code. */
 
+  if (exitcode) copyTV(L, L->top++, &exiterr);  /* Anchor the error object. */
+
   if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
     lj_vmevent_send(L, TEXIT,
       lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
@@ -877,7 +887,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   pc = exd.pc;
   cf = cframe_raw(L->cframe);
   setcframe_pc(cf, pc);
-  if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
+  if (exitcode) {
+    return -exitcode;
+  } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
     /* Just exit to interpreter. */
   } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
     if (!(G(L)->hookmask & HOOK_GC))
@@ -915,4 +927,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   }
 }
 
+#if LJ_UNWIND_JIT
+/* Given an mcode address determine trace exit address for unwinding. */
+uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
+{
+#if EXITTRACE_VMSTATE
+  TraceNo traceno = J2G(J)->vmstate;
+#else
+  TraceNo traceno = trace_exit_find(J, (MCode *)addr);
+#endif
+  GCtrace *T = traceref(J, traceno);
+  if (T
+#if EXITTRACE_VMSTATE
+      && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode
+#endif
+     ) {
+    SnapShot *snap = T->snap;
+    SnapNo lo = 0, exitno = T->nsnap;
+    uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode);  /* MCode units! */
+    /* Rightmost binary search for mcode offset to determine exit number. */
+    do {
+      SnapNo mid = (lo+exitno) >> 1;
+      if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
+    } while (lo < exitno);
+    exitno--;
+    *ep = exitno;
+#ifdef EXITSTUBS_PER_GROUP
+    return (uintptr_t)exitstub_addr(J, exitno);
+#else
+    return (uintptr_t)exitstub_trace_addr(T, exitno);
+#endif
+  }
+  /* Cannot correlate addr with trace/exit. This will be fatal. */
+  lj_assertJ(0, "bad exit pc");
+  return 0;
+}
+#endif
+
 #endif

+ 3 - 0
src/lj_trace.h

@@ -37,6 +37,9 @@ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
 LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
 LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
 LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
+#if LJ_UNWIND_EXT
+LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep);
+#endif
 
 /* Signal asynchronous abort of trace or end of trace. */
 #define lj_trace_abort(g)	(G2J(g)->state &= ~LJ_TRACE_ACTIVE)

+ 3 - 0
src/lj_vm.h

@@ -26,6 +26,9 @@ LJ_ASMF void lj_vm_unwind_ff_eh(void);
 #if LJ_TARGET_X86ORX64
 LJ_ASMF void lj_vm_unwind_rethrow(void);
 #endif
+#if LJ_TARGET_MIPS
+LJ_ASMF void lj_vm_unwind_stub(void);
+#endif
 
 /* Miscellaneous functions. */
 #if LJ_TARGET_X86ORX64

+ 2 - 1
src/vm_arm.dasc

@@ -2245,8 +2245,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  b <2
   |
   |9:  // Rethrow error from the right C frame.
+  |  rsb CARG2, CARG1, #0
   |  mov CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------

+ 2 - 1
src/vm_arm64.dasc

@@ -2035,8 +2035,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  b <2
   |
   |9:  // Rethrow error from the right C frame.
+  |  neg CARG2w, CARG1w
   |  mov CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------

+ 7 - 2
src/vm_mips.dasc

@@ -501,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_returnc
   |.  li RD, 16				// 2 results: false + error message.
   |
+  |->vm_unwind_stub:			// Jump to exit stub from unwinder.
+  |  jr CARG1
+  |.  move ra, CARG2
+  |
   |//-----------------------------------------------------------------------
   |//-- Grow stack for calls -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2520,8 +2524,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.  addu RA, RA, BASE
   |
   |9:  // Rethrow error from the right C frame.
-  |  load_got lj_err_run
-  |  call_intern lj_err_run		// (lua_State *L)
+  |  load_got lj_err_trace
+  |  sub CARG2, r0, CRET1
+  |  call_intern lj_err_trace		// (lua_State *L, int errcode)
   |.  move CARG1, L
   |.endif
   |

+ 7 - 2
src/vm_mips64.dasc

@@ -556,6 +556,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_returnc
   |.  li RD, 16				// 2 results: false + error message.
   |
+  |->vm_unwind_stub:			// Jump to exit stub from unwinder.
+  |  jr CARG1
+  |.  move ra, CARG2
+  |
   |//-----------------------------------------------------------------------
   |//-- Grow stack for calls -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2622,8 +2626,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.  daddu RA, RA, BASE
   |
   |9:  // Rethrow error from the right C frame.
-  |  load_got lj_err_run
-  |  call_intern lj_err_run		// (lua_State *L)
+  |  load_got lj_err_trace
+  |  sub CARG2, r0, CRET1
+  |  call_intern lj_err_trace		// (lua_State *L, int errcode)
   |.  move CARG1, L
   |.endif
   |

+ 2 - 1
src/vm_ppc.dasc

@@ -3090,8 +3090,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  bctr
   |
   |9:  // Rethrow error from the right C frame.
+  |  neg CARG2, CARG1
   |  mr CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------

+ 3 - 1
src/vm_x64.dasc

@@ -2509,8 +2509,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp <2
   |
   |9:  // Rethrow error from the right C frame.
+  |  mov CARG2d, RDd
   |  mov CARG1, L:RB
-  |  call extern lj_err_run		// (lua_State *L)
+  |  neg CARG2d
+  |  call extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------

+ 3 - 1
src/vm_x86.dasc

@@ -2964,8 +2964,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp <2
   |
   |9:  // Rethrow error from the right C frame.
+  |  mov FCARG2, RD
   |  mov FCARG1, L:RB
-  |  call extern lj_err_run@4		// (lua_State *L)
+  |  neg FCARG2
+  |  call extern lj_err_trace@8		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------