Browse Source

Add support for WIN64 exception handling to external unwinder.

Modify unwinding to always return _ff or _c unwind type.
Generate PE object .pdata/.xdata sections for x64 interpreter.
Can drop r12-r15 saves in Windows/x64 interpreter now.
Mike Pall 15 years ago
parent
commit
c31ac26fb9
5 changed files with 177 additions and 44 deletions
  1. 69 1
      src/buildvm_peobj.c
  2. 13 17
      src/buildvm_x86.dasc
  3. 1 1
      src/buildvm_x86.h
  4. 83 15
      src/lj_err.c
  5. 11 10
      src/lj_frame.h

+ 69 - 1
src/buildvm_peobj.c

@@ -90,6 +90,7 @@ typedef struct PEsymaux {
 #define PEOBJ_ARCH_TARGET	0x8664
 #define PEOBJ_RELOC_REL32	0x04  /* MS: REL32, GNU: DISP32. */
 #define PEOBJ_RELOC_DIR32	0x02
+#define PEOBJ_RELOC_ADDR32NB	0x03
 #define PEOBJ_SYM_PREFIX	""
 #endif
 
@@ -98,7 +99,10 @@ enum {
   PEOBJ_SECT_ABS = -2,
   PEOBJ_SECT_UNDEF = -1,
   PEOBJ_SECT_TEXT,
-  /* TODO: add .pdata/.xdata for x64. */
+#if LJ_TARGET_X64
+  PEOBJ_SECT_PDATA,
+  PEOBJ_SECT_XDATA,
+#endif
   PEOBJ_SECT_RDATA,
   PEOBJ_SECT_RDATA_Z,
   PEOBJ_NSECTIONS
@@ -196,6 +200,24 @@ void emit_peobj(BuildCtx *ctx)
   /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
   pesect[PEOBJ_SECT_TEXT].flags = 0x60500020;
 
+#if LJ_TARGET_X64
+  memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
+  pesect[PEOBJ_SECT_PDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_PDATA].size = 3*4);
+  pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 3) * PEOBJ_RELOC_SIZE;
+  /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+  pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
+
+  memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
+  pesect[PEOBJ_SECT_XDATA].ofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4);  /* See below. */
+  pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
+  sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
+  /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+  pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
+#endif
+
   memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1);
   pesect[PEOBJ_SECT_RDATA].ofs = sofs;
   sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t));
@@ -228,6 +250,9 @@ void emit_peobj(BuildCtx *ctx)
 #if !LJ_HASJIT
   pehdr.nsyms -= 7;
 #endif
+#if LJ_TARGET_X64
+  pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win64. */
+#endif
 
   /* Write PE object header and all sections. */
   owrite(ctx, &pehdr, sizeof(PEheader));
@@ -243,6 +268,41 @@ void emit_peobj(BuildCtx *ctx)
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }
 
+#if LJ_TARGET_X64
+  { /* Write .pdata section. */
+    uint32_t pdata[3];  /* Start of .text, end of .text and .xdata. */
+    PEreloc reloc;
+    pdata[0] = 0; pdata[1] = (uint32_t)ctx->codesz; pdata[2] = 0;
+    owrite(ctx, &pdata, sizeof(pdata));
+    reloc.vaddr = 0; reloc.symidx = 1+2+relocsyms+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 4; reloc.symidx = 1+2+relocsyms+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 8; reloc.symidx = 1+2+relocsyms+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+  { /* Write .xdata section. */
+    uint16_t xdata[8+2];
+    PEreloc reloc;
+    xdata[0] = 0x01|0x08|0x10;  /* Ver. 1, uhander/ehandler, prolog size 0. */
+    xdata[1] = 5;  /* Number of unwind codes, no frame pointer. */
+    xdata[2] = 0x4200;  /* Stack offset 4*8+8 = aword*5. */
+    xdata[3] = 0x3000;  /* Push rbx. */
+    xdata[4] = 0x6000;  /* Push rsi. */
+    xdata[5] = 0x7000;  /* Push rdi. */
+    xdata[6] = 0x5000;  /* Push rbp. */
+    xdata[7] = 0;  /* Alignment. */
+    xdata[8] = xdata[9] = 0;  /* Relocated address of exception handler. */
+    owrite(ctx, &xdata, sizeof(xdata));
+    reloc.vaddr = sizeof(xdata)-4; reloc.symidx = 1+2+relocsyms+2+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+#endif
+
   /* Write .rdata section. */
   for (i = 0; i < ctx->npc; i++) {
     uint16_t pcofs = (uint16_t)ctx->sym_ofs[i];
@@ -279,6 +339,14 @@ void emit_peobj(BuildCtx *ctx)
       emit_peobj_sym(ctx, name, 0,
 		     PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
     }
+
+#if LJ_TARGET_X64
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
+    emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
+    emit_peobj_sym(ctx, PEOBJ_SYM_PREFIX "lj_err_unwind_win64", 0,
+		   PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+#endif
+
     emit_peobj_sym(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0,
 		   PEOBJ_SECT_TEXT, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN);
     for (i = nzsym; i < ctx->nsym; i++) {

+ 13 - 17
src/buildvm_x86.dasc

@@ -168,28 +168,22 @@
 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
 |.macro saveregs
 |  push rbp; push rdi; push rsi; push rbx
-|  push r15; push r14; push r13; push r12
 |  sub rsp, CFRAME_SPACE
 |.endmacro
 |.macro restoreregs
 |  add rsp, CFRAME_SPACE
-|  pop r12; pop r13; pop r14; pop r15
 |  pop rbx; pop rsi; pop rdi; pop rbp
 |.endmacro
 |
-|.define SAVE_CFRAME,	aword [rsp+aword*17]
-|.define SAVE_PC,	dword [rsp+dword*33]
-|.define SAVE_L,	dword [rsp+dword*32]
-|.define SAVE_ERRF,	dword [rsp+dword*31]
-|.define SAVE_NRES,	dword [rsp+dword*30]
-|.define TMP2,		dword [rsp+dword*29]
-|.define TMP1,		dword [rsp+dword*28]
+|.define SAVE_CFRAME,	aword [rsp+aword*13]
+|.define SAVE_PC,	dword [rsp+dword*25]
+|.define SAVE_L,	dword [rsp+dword*24]
+|.define SAVE_ERRF,	dword [rsp+dword*23]
+|.define SAVE_NRES,	dword [rsp+dword*22]
+|.define TMP2,		dword [rsp+dword*21]
+|.define TMP1,		dword [rsp+dword*20]
 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
-|.define SAVE_RET,	aword [rsp+aword*13]	//<-- rsp entering interpreter.
-|.define SAVE_R8,	aword [rsp+aword*12]
-|.define SAVE_R7,	aword [rsp+aword*11]
-|.define SAVE_R6,	aword [rsp+aword*10]
-|.define SAVE_R5,	aword [rsp+aword*9]
+|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
 |.define SAVE_R4,	aword [rsp+aword*8]
 |.define SAVE_R3,	aword [rsp+aword*7]
 |.define SAVE_R2,	aword [rsp+aword*6]
@@ -202,7 +196,7 @@
 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
 |
 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
-|.define TMPQ,		qword [rsp+aword*14]
+|.define TMPQ,		qword [rsp+aword*10]
 |.define MULTRES,	TMP2
 |.define TMPa,		ARG5
 |.define ARG5d,		dword [rsp+aword*4]
@@ -861,10 +855,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov dword [RA+RD*8-4], LJ_TNIL	// Ensure one valid arg.
   |  mov RC, RA				// ... in [RC]
   |  mov PC, [RB-12]			// Restore PC from [cont|PC].
-  |  mov RA, dword [RB-16]
   |.if X64
+  |  movsxd RAa, dword [RB-16]		// May be negative on WIN64 with debug.
   |  lea KBASEa, qword [=>0]
   |  add RAa, KBASEa
+  |.else
+  |  mov RA, dword [RB-16]
   |.endif
   |  mov LFUNC:KBASE, [BASE-8]
   |  mov PROTO:KBASE, LFUNC:KBASE->pt
@@ -1854,7 +1850,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |.ffunc coroutine_yield
   |  mov L:RB, SAVE_L
   |  mov [RA-4], PC
-  |  test aword L:RB->cframe, CFRAME_CANYIELD
+  |  test aword L:RB->cframe, CFRAME_RESUME
   |  jz ->fff_fallback
   |  mov L:RB->base, RA
   |  lea RC, [RA+NARGS:RC*8-8]

+ 1 - 1
src/buildvm_x86.h

@@ -1177,7 +1177,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   dasm_put(Dst, 3443, Dt1(->top), Dt1(->base), Dt8(->upvalue[0].gcr), Dt1(->cframe), Dt1(->status), LUA_YIELD, Dt1(->top), Dt1(->base));
   dasm_put(Dst, 3523, Dt1(->maxstack), Dt1(->top), Dt1(->base), Dt1(->top), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->base));
   dasm_put(Dst, 3631, LUA_YIELD, Dt1(->base), Dt1(->top), Dt1(->top), Dt1(->maxstack), FRAME_TYPE);
-  dasm_put(Dst, 3727, Dt1(->top), Dt1(->base), Dt1(->cframe), CFRAME_CANYIELD, Dt1(->base), Dt1(->top), Dt1(->cframe), LUA_YIELD, Dt1(->status));
+  dasm_put(Dst, 3727, Dt1(->top), Dt1(->base), Dt1(->cframe), CFRAME_RESUME, Dt1(->base), Dt1(->top), Dt1(->cframe), LUA_YIELD, Dt1(->status));
   if (sse) {
     dasm_put(Dst, 3813, 1+1, LJ_TISNUM);
   } else {

+ 83 - 15
src/lj_err.c

@@ -493,7 +493,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	  L->cframe = NULL;
 	  L->status = cast_byte(errcode);
 	}
-	return cframe_raw(cf);
+	return cf;
       }
       if (errcode) {
 	L->cframe = cframe_prev(cf);
@@ -514,9 +514,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	L->cframe = cf;
 	L->base = frame_prevd(frame) + 1;
 	unwindstack(L, L->base);
-	return NULL;  /* Call special handler. */
       }
-      return cf;
+      return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
     }
   }
   /* No C frame. */
@@ -528,7 +527,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       G(L)->panic(L);
     exit(EXIT_FAILURE);
   }
-  return L;  /* Anything not-NULL will do. */
+  return L;  /* Anything non-NULL will do. */
 }
 
 /* -- External frame unwinding -------------------------------------------- */
@@ -574,12 +573,12 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, _Unwind_Action actions,
       errcode = LUA_ERRRUN;
     }
 #if LJ_UNWIND_EXT
-    if (err_unwind(L, cf, errcode)) {
+    cf = err_unwind(L, cf, errcode);
+    if (cf) {
       _Unwind_SetGR(ctx, 0, errcode);
-      _Unwind_SetIP(ctx, (_Unwind_Ptr)lj_vm_unwind_c_eh);
-      return _URC_INSTALL_CONTEXT;
-    } else if ((actions & _UA_HANDLER_FRAME)) {
-      _Unwind_SetIP(ctx, (_Unwind_Ptr)lj_vm_unwind_ff_eh);
+      _Unwind_SetIP(ctx, (_Unwind_Ptr)(cframe_unwind_ff(cf) ?
+				       lj_vm_unwind_ff_eh :
+				       lj_vm_unwind_c_eh));
       return _URC_INSTALL_CONTEXT;
     }
 #else
@@ -607,20 +606,89 @@ static void err_raise_ext(int errcode)
 
 #elif defined(_WIN64)
 
+/*
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+**
+** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+** unless you compile your C++ code with /EHa. Unfortunately this means
+** catch (...) also catches things like access violations. The use of
+** _set_se_translator doesn't really help, because it requires /EHa, too.
+*/
+
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-#define LJ_EXCODE		((DWORD)0x024c4a00)
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  PEXCEPTION_ROUTINE LanguageHandler;
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+} UndocumentedDispatcherContext;
+
+#ifdef _MSC_VER
+/* Another wild guess. */
+extern __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+#endif
+
+#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
+
+#define LJ_EXCODE		((DWORD)0xe24c4a00)
 #define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
 #define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
 #define LJ_EXCODE_ERRCODE(cl)	(cast_int((cl) & 0xff))
 
-/* NYI: Win64 exception handler for interpreter frame. */
+/* Win64 exception handler for interpreter frame. */
+LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
+  void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+{
+  lua_State *L = cframe_L(cf);
+  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    err_unwind(L, cf, 1);  /* Unwind internal frames. */
+  } else {
+    void *cf2 = err_unwind(L, cf, 0);
+    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+      int errcode;
+      if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	errcode = LJ_EXCODE_ERRCODE(rec->ExceptionCode);
+      } else if (rec->ExceptionCode == LJ_MSVC_EXCODE) {
+#ifdef _MSC_VER
+	__DestructExceptionObject(rec, 1);
+#endif
+	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+	errcode = LUA_ERRRUN;
+      } else {  /* Don't catch access violations etc. */
+	return ExceptionContinueSearch;
+      }
+      /* Unwind the stack and call all handlers for all lower C frames
+      ** (including ourselves) again with EH_UNWINDING set. Then set
+      ** rsp = cf, rax = errcode and jump to the specified target.
+      */
+      RtlUnwindEx(cf, (void *)(cframe_unwind_ff(cf2) ?
+			       lj_vm_unwind_ff_eh :
+			       lj_vm_unwind_c_eh),
+		  rec, (void *)errcode, ctx, dispatch->HistoryTable);
+      /* RtlUnwindEx should never return. */
+    }
+  }
+  return ExceptionContinueSearch;
+}
 
 /* Raise Windows exception. */
 static void err_raise_ext(int errcode)
 {
-  RaiseException(LJ_EXCODE_MAKE(errcode), 0, 0, NULL);
+  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
 }
 
 #endif
@@ -650,10 +718,10 @@ LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode)
 #else
   {
     void *cf = err_unwind(L, NULL, errcode);
-    if (cf)
-      lj_vm_unwind_c(cf, errcode);
+    if (cframe_unwind_ff(cf))
+      lj_vm_unwind_ff(cframe_raw(cf));
     else
-      lj_vm_unwind_ff(cframe_raw(L->cframe));
+      lj_vm_unwind_c(cframe_raw(cf), errcode);
   }
 #endif
   exit(EXIT_FAILURE);

+ 11 - 10
src/lj_frame.h

@@ -67,13 +67,13 @@ enum {
 #define CFRAME_SIZE		(12*4)
 #elif LJ_TARGET_X64
 #if _WIN64
-#define CFRAME_OFS_PREV		(17*8)
-#define CFRAME_OFS_PC		(33*4)
-#define CFRAME_OFS_L		(32*4)
-#define CFRAME_OFS_ERRF		(31*4)
-#define CFRAME_OFS_NRES		(30*4)
-#define CFRAME_OFS_MULTRES	(29*4)
-#define CFRAME_SIZE		(14*8)
+#define CFRAME_OFS_PREV		(13*8)
+#define CFRAME_OFS_PC		(25*4)
+#define CFRAME_OFS_L		(24*4)
+#define CFRAME_OFS_ERRF		(23*4)
+#define CFRAME_OFS_NRES		(22*4)
+#define CFRAME_OFS_MULTRES	(21*4)
+#define CFRAME_SIZE		(10*8)
 #else
 #define CFRAME_OFS_PREV		(4*8)
 #define CFRAME_OFS_PC		(5*4)
@@ -88,8 +88,8 @@ enum {
 #endif
 
 #define CFRAME_RESUME		1
-#define CFRAME_CANYIELD		((intptr_t)(CFRAME_RESUME))
-#define CFRAME_RAWMASK		(~CFRAME_CANYIELD)
+#define CFRAME_UNWIND_FF	2  /* Only used in unwinder. */
+#define CFRAME_RAWMASK		(~(intptr_t)(CFRAME_RESUME|CFRAME_UNWIND_FF))
 
 #define cframe_errfunc(cf)	(*(int32_t *)(((char *)(cf))+CFRAME_OFS_ERRF))
 #define cframe_nres(cf)		(*(int32_t *)(((char *)(cf))+CFRAME_OFS_NRES))
@@ -101,7 +101,8 @@ enum {
   (mref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), const BCIns))
 #define setcframe_pc(cf, pc) \
   (setmref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), (pc)))
-#define cframe_canyield(cf)	((intptr_t)(cf) & CFRAME_CANYIELD)
+#define cframe_canyield(cf)	((intptr_t)(cf) & CFRAME_RESUME)
+#define cframe_unwind_ff(cf)	((intptr_t)(cf) & CFRAME_UNWIND_FF)
 #define cframe_raw(cf)		((void *)((intptr_t)(cf) & CFRAME_RAWMASK))
 #define cframe_Lpc(L)		cframe_pc(cframe_raw(L->cframe))