Browse Source

FFI: Add unwind definitions for lj_vm_ffi_call.

Adds exception interoperability for C/C++ functions called via FFI
from the interpreter.
Mike Pall 14 years ago
parent
commit
635371c212
12 changed files with 1681 additions and 1437 deletions
  1. 2 1
      doc/ext_ffi_semantics.html
  2. 24 4
      src/buildvm_arm.dasc
  3. 21 3
      src/buildvm_arm.h
  4. 13 0
      src/buildvm_asm.c
  5. 26 8
      src/buildvm_peobj.c
  6. 63 13
      src/buildvm_ppc.dasc
  7. 320 317
      src/buildvm_ppc.h
  8. 406 407
      src/buildvm_x64.h
  9. 348 349
      src/buildvm_x64win.h
  10. 153 29
      src/buildvm_x86.dasc
  11. 303 304
      src/buildvm_x86.h
  12. 2 2
      src/lj_ccall.h

+ 2 - 1
doc/ext_ffi_semantics.html

@@ -1005,7 +1005,8 @@ Other missing features:
 <li>Callbacks from C&nbsp;code to Lua functions.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
-does not extend to C&nbsp;functions called via the FFI.</li>
+does not extend to C&nbsp;functions called via the FFI, if the call is
+compiled.</li>
 </ul>
 <br class="flush">
 </div>

+ 24 - 4
src/buildvm_arm.dasc

@@ -2178,7 +2178,8 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
   |
-  |->vm_ffi_call:
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
 #if LJ_HASFFI
   |  .type CCSTATE, CCallState, r4
   |  push {CCSTATE, r5, r11, lr}
@@ -2207,6 +2208,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  str CRET2, CCSTATE->gpr[1]
   |  pop {CCSTATE, r5, r11, pc}
 #endif
+  |// Note: vm_ffi_call must be the last function in this object file!
   |
   |//-----------------------------------------------------------------------
 }
@@ -4003,6 +4005,7 @@ static int build_backend(BuildCtx *ctx)
 /* Emit pseudo frame-info for all assembler functions. */
 static void emit_asm_debug(BuildCtx *ctx)
 {
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
   int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
@@ -4028,13 +4031,30 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lbegin\n"
 	"\t.long %d\n"
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x8e\n\t.uleb128 1\n",		/* Restore lr. */
-	(int)ctx->codesz, CFRAME_SIZE);
-    for (i = 11; i >= 4; i--)  /* Restore r4-r11. */
+	"\t.byte 0x8e\n\t.uleb128 1\n",		/* offset lr */
+	fcofs, CFRAME_SIZE);
+    for (i = 11; i >= 4; i--)  /* offset r4-r11 */
       fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
     fprintf(ctx->fp,
 	"\t.align 2\n"
 	".LEFDE0:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.long lj_vm_ffi_call\n"
+	"\t.long %d\n"
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x8e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x8b\n\t.uleb128 2\n"		/* offset r11 */
+	"\t.byte 0x85\n\t.uleb128 3\n"		/* offset r5 */
+	"\t.byte 0x84\n\t.uleb128 4\n"		/* offset r4 */
+	"\t.byte 0xd\n\t.uleb128 0xb\n"		/* def_cfa_register r11 */
+	"\t.align 2\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
     break;
   default:
     break;

+ 21 - 3
src/buildvm_arm.h

@@ -7332,6 +7332,7 @@ static int build_backend(BuildCtx *ctx)
 /* Emit pseudo frame-info for all assembler functions. */
 static void emit_asm_debug(BuildCtx *ctx)
 {
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
   int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
@@ -7357,13 +7358,30 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lbegin\n"
 	"\t.long %d\n"
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x8e\n\t.uleb128 1\n",		/* Restore lr. */
-	(int)ctx->codesz, CFRAME_SIZE);
-    for (i = 11; i >= 4; i--)  /* Restore r4-r11. */
+	"\t.byte 0x8e\n\t.uleb128 1\n",		/* offset lr */
+	fcofs, CFRAME_SIZE);
+    for (i = 11; i >= 4; i--)  /* offset r4-r11 */
       fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
     fprintf(ctx->fp,
 	"\t.align 2\n"
 	".LEFDE0:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.long lj_vm_ffi_call\n"
+	"\t.long %d\n"
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x8e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x8b\n\t.uleb128 2\n"		/* offset r11 */
+	"\t.byte 0x85\n\t.uleb128 3\n"		/* offset r5 */
+	"\t.byte 0x84\n\t.uleb128 4\n"		/* offset r4 */
+	"\t.byte 0xd\n\t.uleb128 0xb\n"		/* def_cfa_register r11 */
+	"\t.align 2\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
     break;
   default:
     break;

+ 13 - 0
src/buildvm_asm.c

@@ -202,6 +202,17 @@ void emit_asm(BuildCtx *ctx)
   for (i = rel = 0; i < ctx->nsym; i++) {
     int32_t ofs = ctx->sym[i].ofs;
     int32_t next = ctx->sym[i+1].ofs;
+#if LJ_TARGET_ARM && defined(__GNUC__) && !defined(LUAJIT_NO_UNWIND) && \
+    LJ_HASFFI
+    if (!strcmp(ctx->sym[i].name, "lj_vm_ffi_call"))
+      fprintf(ctx->fp,
+	      ".globl lj_err_unwind_arm\n"
+	      ".personality lj_err_unwind_arm\n"
+	      ".fnend\n"
+	      ".fnstart\n"
+	      ".save {r4, r5, r11, lr}\n"
+	      ".setfp r11, sp\n");
+#endif
     emit_asm_label(ctx, ctx->sym[i].name, next - ofs, 1);
     while (rel < ctx->nreloc && ctx->reloc[rel].ofs <= next) {
       BuildReloc *r = &ctx->reloc[rel];
@@ -229,8 +240,10 @@ void emit_asm(BuildCtx *ctx)
 
 #if LJ_TARGET_ARM && defined(__GNUC__) && !defined(LUAJIT_NO_UNWIND)
   fprintf(ctx->fp,
+#if !LJ_HASFFI
 	  ".globl lj_err_unwind_arm\n"
 	  ".personality lj_err_unwind_arm\n"
+#endif
 	  ".fnend\n");
 #endif
 

+ 26 - 8
src/buildvm_peobj.c

@@ -191,15 +191,15 @@ void emit_peobj(BuildCtx *ctx)
 #if LJ_TARGET_X64
   memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
   pesect[PEOBJ_SECT_PDATA].ofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_PDATA].size = 3*4);
+  sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
   pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 3) * PEOBJ_RELOC_SIZE;
+  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
   pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
 
   memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
   pesect[PEOBJ_SECT_XDATA].ofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4);  /* See below. */
+  sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2);  /* See below. */
   pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
   sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@@ -247,9 +247,12 @@ void emit_peobj(BuildCtx *ctx)
 
 #if LJ_TARGET_X64
   { /* Write .pdata section. */
+    uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
     uint32_t pdata[3];  /* Start of .text, end of .text and .xdata. */
     PEreloc reloc;
-    pdata[0] = 0; pdata[1] = (uint32_t)ctx->codesz; pdata[2] = 0;
+    pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
+    owrite(ctx, &pdata, sizeof(pdata));
+    pdata[0] = fcofs; pdata[1] = (uint32_t)ctx->codesz; pdata[2] = 20;
     owrite(ctx, &pdata, sizeof(pdata));
     reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
     reloc.type = PEOBJ_RELOC_ADDR32NB;
@@ -260,12 +263,21 @@ void emit_peobj(BuildCtx *ctx)
     reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2;
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 16; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 20; reloc.symidx = 1+2+nrsym+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }
   { /* Write .xdata section. */
-    uint16_t xdata[8+2];
+    uint16_t xdata[8+2+6];
     PEreloc reloc;
-    xdata[0] = 0x01|0x08|0x10;  /* Ver. 1, uhander/ehandler, prolog size 0. */
-    xdata[1] = 5;  /* Number of unwind codes, no frame pointer. */
+    xdata[0] = 0x01|0x08|0x10;  /* Ver. 1, uhandler/ehandler, prolog size 0. */
+    xdata[1] = 0x0005;  /* Number of unwind codes, no frame pointer. */
     xdata[2] = 0x4200;  /* Stack offset 4*8+8 = aword*5. */
     xdata[3] = 0x3000;  /* Push rbx. */
     xdata[4] = 0x6000;  /* Push rsi. */
@@ -273,8 +285,14 @@ void emit_peobj(BuildCtx *ctx)
     xdata[6] = 0x5000;  /* Push rbp. */
     xdata[7] = 0;  /* Alignment. */
     xdata[8] = xdata[9] = 0;  /* Relocated address of exception handler. */
+    xdata[10] = 0x01;  /* Ver. 1, no handler, prolog size 0. */
+    xdata[11] = 0x1504;  /* Number of unwind codes, fp = rbp, fpofs = 16. */
+    xdata[12] = 0x0300;  /* set_fpreg. */
+    xdata[13] = 0x0200;  /* stack offset 0*8+8 = aword*1. */
+    xdata[14] = 0x3000;  /* Push rbx. */
+    xdata[15] = 0x5000;  /* Push rbp. */
     owrite(ctx, &xdata, sizeof(xdata));
-    reloc.vaddr = sizeof(xdata)-4; reloc.symidx = 1+2+nrsym+2+2;
+    reloc.vaddr = 2*8; reloc.symidx = 1+2+nrsym+2+2;
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }

+ 63 - 13
src/buildvm_ppc.dasc

@@ -2527,7 +2527,8 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
   |
-  |->vm_ffi_call:
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
 #if LJ_HASFFI
   |  .type CCSTATE, CCallState, CARG1
   |  lwz TMP1, CCSTATE->spadj
@@ -2541,8 +2542,10 @@ static void build_subroutines(BuildCtx *ctx)
   |   addic. CARG2, CARG2, -1
   |  stwux sp, sp, TMP1
   |   crnot 4*cr1+eq, 4*cr1+eq		// For vararg calls.
-  |  stw CCSTATE, -4(TMP2)
+  |  stw r14, -4(TMP2)
   |   li TMP3, 0
+  |  stw CCSTATE, -8(TMP2)
+  |  mr r14, TMP2
   |  la TMP1, CCSTATE->stack
   |   slwi CARG2, CARG2, 2
   |   blty >2
@@ -2574,18 +2577,20 @@ static void build_subroutines(BuildCtx *ctx)
   |  lwz r10, CCSTATE->gpr[7]
   |  lwz CARG1, CCSTATE->gpr[0]		// Do this last, since CCSTATE is CARG1.
   |   bctrl
-  |  lwz TMP2, 0(sp)
-  |  lwz CCSTATE:TMP1, -4(TMP2)
-  |   lwz TMP0, 4(TMP2)
+  |  lwz CCSTATE:TMP1, -8(r14)
+  |  lwz TMP2, -4(r14)
+  |   lwz TMP0, 4(r14)
   |  stw CARG1, CCSTATE:TMP1->gpr[0]
   |  stfd FARG1, CCSTATE:TMP1->fpr[0]
   |  stw CARG2, CCSTATE:TMP1->gpr[1]
   |   mtlr TMP0
   |  stw CARG3, CCSTATE:TMP1->gpr[2]
-  |   mr sp, TMP2
+  |   mr sp, r14
   |  stw CARG4, CCSTATE:TMP1->gpr[3]
+  |   mr r14, TMP2
   |  blr
 #endif
+  |// Note: vm_ffi_call must be the last function in this object file!
   |
   |//-----------------------------------------------------------------------
 }
@@ -4665,6 +4670,7 @@ static int build_backend(BuildCtx *ctx)
 /* Emit pseudo frame-info for all assembler functions. */
 static void emit_asm_debug(BuildCtx *ctx)
 {
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
   int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
@@ -4692,7 +4698,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 0xe\n\t.uleb128 %d\n"
 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
 	"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-	(int)ctx->codesz, CFRAME_SIZE);
+	fcofs, CFRAME_SIZE);
     for (i = 14; i <= 31; i++)
       fprintf(ctx->fp,
 	"\t.byte %d\n\t.uleb128 %d\n"
@@ -4701,6 +4707,20 @@ static void emit_asm_debug(BuildCtx *ctx)
     fprintf(ctx->fp,
 	"\t.align 2\n"
 	".LEFDE0:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.long lj_vm_ffi_call\n"
+	"\t.long %d\n"
+	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
+	"\t.byte 0x8e\n\t.uleb128 2\n"
+	"\t.byte 0xd\n\t.uleb128 0xe\n"
+	"\t.align 2\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
     fprintf(ctx->fp,
 	".Lframe1:\n"
@@ -4720,17 +4740,17 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.align 2\n"
 	".LECIE1:\n\n");
     fprintf(ctx->fp,
-	".LSFDE1:\n"
-	"\t.long .LEFDE1-.LASFDE1\n"
-	".LASFDE1:\n"
-	"\t.long .LASFDE1-.Lframe1\n"
+	".LSFDE2:\n"
+	"\t.long .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.long .LASFDE2-.Lframe1\n"
 	"\t.long .Lbegin-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 %d\n"
 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
 	"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-	(int)ctx->codesz, CFRAME_SIZE);
+	fcofs, CFRAME_SIZE);
     for (i = 14; i <= 31; i++)
       fprintf(ctx->fp,
 	"\t.byte %d\n\t.uleb128 %d\n"
@@ -4738,7 +4758,37 @@ static void emit_asm_debug(BuildCtx *ctx)
 	0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
     fprintf(ctx->fp,
 	"\t.align 2\n"
-	".LEFDE1:\n\n");
+	".LEFDE2:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.long .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -4\n"
+	"\t.byte 65\n"
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
+	"\t.align 2\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.long .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.long .LASFDE3-.Lframe2\n"
+	"\t.long lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
+	"\t.byte 0x8e\n\t.uleb128 2\n"
+	"\t.byte 0xd\n\t.uleb128 0xe\n"
+	"\t.align 2\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
     break;
   default:
     break;

File diff suppressed because it is too large
+ 320 - 317
src/buildvm_ppc.h


File diff suppressed because it is too large
+ 406 - 407
src/buildvm_x64.h


File diff suppressed because it is too large
+ 348 - 349
src/buildvm_x64win.h


+ 153 - 29
src/buildvm_x86.dasc

@@ -3687,10 +3687,21 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |.endif
   |
   |//-----------------------------------------------------------------------
+  |//-- Assertions ---------------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->assert_bad_for_arg_type:
+#ifdef LUA_USE_ASSERT
+  |  int3
+#endif
+  |  int3
+  |
+  |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
   |
-  |->vm_ffi_call@4:
+  |->vm_ffi_call@4:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
 #if LJ_HASFFI
   |.if X64
   |  .type CCSTATE, CCallState, rbx
@@ -3786,16 +3797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov ebx, [ebp-4]; leave; ret
   |.endif
 #endif
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Assertions ---------------------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->assert_bad_for_arg_type:
-#ifdef LUA_USE_ASSERT
-  |  int3
-#endif
-  |  int3
+  |// Note: vm_ffi_call must be the last function in this object file!
   |
   |//-----------------------------------------------------------------------
 }
@@ -6001,6 +6003,7 @@ static int build_backend(BuildCtx *ctx)
 /* Emit pseudo frame-info for all assembler functions. */
 static void emit_asm_debug(BuildCtx *ctx)
 {
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
 #if LJ_64
 #define SZPTR	"8"
 #define BSZPTR	"3"
@@ -6034,22 +6037,49 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .LEFDE0-.LASFDE0\n"
 	".LASFDE0:\n"
 	"\t.long .Lframe0\n"
-	"\t.long .Lbegin\n"
-	"\t.long %d\n"
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 #if LJ_64
+	"\t.quad .Lbegin\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
 	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
 	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
 #else
+	"\t.long .Lbegin\n"
+	"\t.long %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
 	"\t.byte 0x87\n\t.uleb128 0x3\n"	/* offset edi */
 	"\t.byte 0x86\n\t.uleb128 0x4\n"	/* offset esi */
 	"\t.byte 0x83\n\t.uleb128 0x5\n"	/* offset ebx */
 #endif
 	"\t.align " SZPTR "\n"
-	".LEFDE0:\n\n", (int)ctx->codesz, CFRAME_SIZE);
+	".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+#if LJ_64
+	"\t.quad lj_vm_ffi_call\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+#else
+	"\t.long lj_vm_ffi_call\n"
+	"\t.long %d\n"
+	"\t.byte 0xe\n\t.uleb128 8\n"		/* def_cfa_offset */
+	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
+	"\t.byte 0xd\n\t.uleb128 0x5\n"		/* def_cfa_register ebp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset ebx */
+#endif
+	"\t.align " SZPTR "\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
 #if (defined(__sun__) && defined(__svr4__)) || defined(__solaris_)
     fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
 #else
@@ -6074,10 +6104,10 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.align " SZPTR "\n"
 	".LECIE1:\n\n");
     fprintf(ctx->fp,
-	".LSFDE1:\n"
-	"\t.long .LEFDE1-.LASFDE1\n"
-	".LASFDE1:\n"
-	"\t.long .LASFDE1-.Lframe1\n"
+	".LSFDE2:\n"
+	"\t.long .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.long .LASFDE2-.Lframe1\n"
 	"\t.long .Lbegin-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
@@ -6094,7 +6124,46 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 0x83\n\t.uleb128 0x5\n"	/* offset ebx */
 #endif
 	"\t.align " SZPTR "\n"
-	".LEFDE1:\n\n", (int)ctx->codesz, CFRAME_SIZE);
+	".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.long .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -" SZPTR "\n"
+	"\t.byte " REG_RA "\n"
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
+	"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
+	"\t.align " SZPTR "\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.long .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.long .LASFDE3-.Lframe2\n"
+	"\t.long lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+#if LJ_64
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+#else
+	"\t.byte 0xe\n\t.uleb128 8\n"		/* def_cfa_offset */
+	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
+	"\t.byte 0xd\n\t.uleb128 0x5\n"		/* def_cfa_register ebp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset ebx */
+#endif
+	"\t.align " SZPTR "\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
     break;
   case BUILD_coffasm:
     fprintf(ctx->fp, "\t.section .eh_frame,\"dr\"\n");
@@ -6145,6 +6214,9 @@ static void emit_asm_debug(BuildCtx *ctx)
   ** Or a linker. Or a plastic case. But I digress.
   */
   case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
     int i;
     fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
     fprintf(ctx->fp,
@@ -6176,6 +6248,9 @@ static void emit_asm_debug(BuildCtx *ctx)
       const char *name = ctx->sym[i].name;
       int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
       if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
       fprintf(ctx->fp,
 	  "%s.eh:\n"
 	  "LSFDE%d:\n"
@@ -6185,23 +6260,72 @@ static void emit_asm_debug(BuildCtx *ctx)
 	  "\t.long LASFDE%d-EH_frame1\n"
 	  "\t.long %s-.\n"
 	  "\t.long %d\n"
-	  "\t.byte 0\n"			/* augmentation length */
+	  "\t.byte 0\n"				/* augmentation length */
 	  "\t.byte 0xe\n\t.byte %d\n"		/* def_cfa_offset */
 #if LJ_64
-	  "\t.byte 0x86\n\t.byte 0x2\n"	/* offset rbp */
-	  "\t.byte 0x83\n\t.byte 0x3\n"	/* offset rbx */
-	  "\t.byte 0x8f\n\t.byte 0x4\n"	/* offset r15 */
-	  "\t.byte 0x8e\n\t.byte 0x5\n"	/* offset r14 */
+	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
+	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
+	  "\t.byte 0x8f\n\t.byte 0x4\n"		/* offset r15 */
+	  "\t.byte 0x8e\n\t.byte 0x5\n"		/* offset r14 */
 #else
-	  "\t.byte 0x84\n\t.byte 0x2\n"	/* offset ebp (4 for MACH-O)*/
-	  "\t.byte 0x87\n\t.byte 0x3\n"	/* offset edi */
-	  "\t.byte 0x86\n\t.byte 0x4\n"	/* offset esi */
-	  "\t.byte 0x83\n\t.byte 0x5\n"	/* offset ebx */
+	  "\t.byte 0x84\n\t.byte 0x2\n"		/* offset ebp (4 for MACH-O)*/
+	  "\t.byte 0x87\n\t.byte 0x3\n"		/* offset edi */
+	  "\t.byte 0x86\n\t.byte 0x4\n"		/* offset esi */
+	  "\t.byte 0x83\n\t.byte 0x5\n"		/* offset ebx */
 #endif
 	  "\t.align " BSZPTR "\n"
 	  "LEFDE%d:\n\n",
 	  name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
     }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	  "EH_frame2:\n"
+	  "\t.set L$set$y,LECIEY-LSCIEY\n"
+	  "\t.long L$set$y\n"
+	  "LSCIEY:\n"
+	  "\t.long 0\n"
+	  "\t.byte 0x1\n"
+	  "\t.ascii \"zR\\0\"\n"
+	  "\t.byte 0x1\n"
+	  "\t.byte 128-" SZPTR "\n"
+	  "\t.byte " REG_RA "\n"
+	  "\t.byte 1\n"				/* augmentation length */
+#if LJ_64
+	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	  "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
+#else
+	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	  "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n"  /* esp=5 on 32 bit MACH. */
+#endif
+	  "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
+	  "\t.align " BSZPTR "\n"
+	  "LECIEY:\n\n");
+      fprintf(ctx->fp,
+	  "_lj_vm_ffi_call.eh:\n"
+	  "LSFDEY:\n"
+	  "\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	  "\t.long L$set$yy\n"
+	  "LASFDEY:\n"
+	  "\t.long LASFDEY-EH_frame2\n"
+	  "\t.long _lj_vm_ffi_call-.\n"
+	  "\t.long %d\n"
+	  "\t.byte 0\n"				/* augmentation length */
+#if LJ_64
+	  "\t.byte 0xe\n\t.byte 16\n"		/* def_cfa_offset */
+	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
+	  "\t.byte 0xd\n\t.uleb128 0x6\n"	/* def_cfa_register rbp */
+	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
+#else
+	  "\t.byte 0xe\n\t.byte 8\n"		/* def_cfa_offset */
+	  "\t.byte 0x84\n\t.byte 0x2\n"		/* offset ebp (4 for MACH-O)*/
+	  "\t.byte 0xd\n\t.uleb128 0x4\n"	/* def_cfa_register ebp */
+	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset ebx */
+#endif
+	  "\t.align " BSZPTR "\n"
+	  "LEFDEY:\n\n", fcsize);
+    }
+#endif
 #if LJ_64
     fprintf(ctx->fp, "\t.subsections_via_symbols\n");
 #else

File diff suppressed because it is too large
+ 303 - 304
src/buildvm_x86.h


+ 2 - 2
src/lj_ccall.h

@@ -64,8 +64,8 @@ typedef intptr_t GPRArg;
 #define CCALL_NARG_FPR		8
 #define CCALL_NRET_GPR		4	/* For complex double. */
 #define CCALL_NRET_FPR		1
-#define CCALL_SPS_EXTRA		3
-#define CCALL_SPS_FREE		1
+#define CCALL_SPS_EXTRA		4
+#define CCALL_SPS_FREE		0
 
 typedef intptr_t GPRArg;
 typedef double FPRArg;

Some files were not shown because too many files changed in this diff