瀏覽代碼

FFI: Add callback support (for x86/x64).

Mike Pall 14 年之前
父節點
當前提交
71d00a56db
共有 27 個文件被更改,包括 2982 次插入2058 次删除
  1. 28 0
      doc/ext_ffi_api.html
  2. 125 3
      doc/ext_ffi_semantics.html
  3. 2 2
      src/Makefile
  4. 24 19
      src/Makefile.dep
  5. 1 0
      src/buildvm.c
  6. 491 480
      src/buildvm_x64.h
  7. 746 737
      src/buildvm_x64win.h
  8. 125 11
      src/buildvm_x86.dasc
  9. 798 787
      src/buildvm_x86.h
  10. 54 5
      src/lib_ffi.c
  11. 8 1
      src/lj_ccall.c
  12. 5 1
      src/lj_ccall.h
  13. 461 0
      src/lj_ccallback.c
  14. 25 0
      src/lj_ccallback.h
  15. 8 0
      src/lj_cconv.c
  16. 8 1
      src/lj_crecord.c
  17. 8 1
      src/lj_ctype.c
  18. 21 1
      src/lj_ctype.h
  19. 2 1
      src/lj_debug.c
  20. 2 0
      src/lj_def.h
  21. 26 7
      src/lj_err.c
  22. 6 0
      src/lj_errmsg.h
  23. 2 0
      src/lj_frame.h
  24. 1 1
      src/lj_meta.c
  25. 2 0
      src/lj_target_x86.h
  26. 2 0
      src/lj_vm.h
  27. 1 0
      src/ljamalg.c

+ 28 - 0
doc/ext_ffi_api.html

@@ -78,6 +78,9 @@ corresponding <b>ctype</b>.</li>
 <li><b>ct</b> &mdash; A C&nbsp;type specification which can be used for
 most of the API functions. Either a <b>cdecl</b>, a <b>ctype</b> or a
 <b>cdata</b> serving as a template type.</li>
+<li><b>cb</b> &mdash; A callback object. This is a C&nbsp;data object
+holding a special function pointer. Calling this function from
+C&nbsp;code runs an associated Lua function.</li>
 <li><b>VLA</b> &mdash; A variable-length array is declared with a
 <tt>?</tt> instead of the number of elements, e.g. <tt>"int[?]"</tt>.
 The number of elements (<tt>nelem</tt>) must be given when it's
@@ -473,6 +476,31 @@ Contains the target architecture name. Same contents as
 <a href="ext_jit.html#jit_arch"><tt>jit.arch</tt></a>.
 </p>
 
+<h2 id="callback">Methods for Callbacks</h2>
+<p>
+The C&nbsp;types for <a href="ext_ffi_semantics.html#callback">callbacks</a>
+have some extra methods:
+</p>
+
+<h3 id="callback_free"><tt>cb:free()</tt></h3>
+<p>
+Free the resources associated with a callback. The associated Lua
+function is unanchored and may be garbage collected. The callback
+function pointer is no longer valid and must not be called anymore
+(it may be reused by a subsequently created callback).
+</p>
+
+<h3 id="callback_set"><tt>cb:set(func)</tt></h3>
+<p>
+Associate a new Lua function with a callback. The C&nbsp;type of the
+callback and the callback function pointer are unchanged.
+</p>
+<p>
+This method is useful to dynamically switch the receiver of callbacks
+without creating a new callback each time and registering it again (e.g.
+with a GUI library).
+</p>
+
 <h2 id="extended">Extended Standard Library Functions</h2>
 <p>
 The following standard library functions have been extended to work

+ 125 - 3
doc/ext_ffi_semantics.html

@@ -297,10 +297,12 @@ arguments to C&nbsp;calls:
 <tr class="even">
 <td class="convin">string</td><td class="convop">string data &rarr;</td><td class="convout"><tt>const char[]</tt></td></tr>
 <tr class="odd separate">
+<td class="convin">function</td><td class="convop"><a href="#callback">create callback</a> &rarr;</td><td class="convout">C function type</td></tr>
+<tr class="even separate">
 <td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout">Array</td></tr>
-<tr class="even">
+<tr class="odd">
 <td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr>
-<tr class="odd separate">
+<tr class="even separate">
 <td class="convin">cdata</td><td class="convop">cdata payload &rarr;</td><td class="convout">C type</td></tr>
 </table>
 <p>
@@ -821,6 +823,127 @@ cdata objects are indistinguishable from pointers returned by C
 functions (which is one of the reasons why the GC cannot follow them).
 </p>
 
+<h2 id="callback">Callbacks</h2>
+<p>
+The LuaJIT FFI automatically generates special callback functions
+whenever a Lua function is converted to a C&nbsp;function pointer. This
+associates the generated callback function pointer with the C&nbsp;type
+of the function pointer and the Lua function object (closure).
+</p>
+<p>
+This can happen implicitly due to the usual conversions, e.g. when
+passing a Lua function to a function pointer argument. Or you can use
+<tt>ffi.cast()</tt> to explicitly cast a Lua function to a
+C&nbsp;function pointer.
+</p>
+<p>
+Currently only certain C&nbsp;function types can be used as callback
+functions. Neither C&nbsp;vararg functions nor functions with
+pass-by-value aggregate argument or result types are supported. There
+are no restrictions for the kind of Lua functions that can be called
+from the callback &mdash; no checks for the proper number of arguments
+are made. The return value of the Lua function will be converted to the
+result type and an error will be thrown for invalid conversions.
+</p>
+<p>
+It's allowed to throw errors across a callback invocation, but it's not
+advisable in general. Do this only if you know the C&nbsp;function, that
+called the callback, copes with the forced stack unwinding and doesn't
+leak resources.
+</p>
+
+<h3 id="callback_resources">Callback resource handling</h3>
+<p>
+Callbacks take up resources &mdash; you can only have a limited number
+of them at the same time (500&nbsp;-&nbsp;1000, depending on the
+architecture). The associated Lua functions are anchored to prevent
+garbage collection, too.
+</p>
+<p>
+<b>Callbacks due to implicit conversions are permanent!</b> There is no
+way to guess their lifetime, since the C&nbsp;side might store the
+function pointer for later use (typical for GUI toolkits). The associated
+resources cannot be reclaimed until termination:
+</p>
+<pre class="code">
+ffi.cdef[[
+typedef int (__stdcall *WNDENUMPROC)(void *hwnd, intptr_t l);
+int EnumWindows(WNDENUMPROC func, intptr_t l);
+]]
+
+-- Implicit conversion to a callback via function pointer argument.
+local count = 0
+ffi.C.EnumWindows(function(hwnd, l)
+  count = count + 1
+end, 0)
+-- The callback is permanent and its resources cannot be reclaimed!
+-- Ok, so this may not be a problem, if you do this only once.
+</pre>
+<p>
+Note: this example shows that you <em>must</em> properly declare
+<tt>__stdcall</tt> callbacks on Windows/x86 systems. The calling
+convention cannot be automatically detected, unlike for
+<tt>__stdcall</tt> calls <em>to</em> Windows functions.
+</p>
+<p>
+For some use cases it's necessary to free up the resources or to
+dynamically redirect callbacks. Use an explicit cast to a
+C&nbsp;function pointer and keep the resulting cdata object. Then use
+the <a href="ext_ffi_api.html#callback_free"><tt>cb:free()</tt></a>
+or <a href="ext_ffi_api.html#callback_set"><tt>cb:set()</tt></a> methods
+on the cdata object:
+</p>
+<pre class="code">
+-- Explicitly convert to a callback via cast.
+local count = 0
+local cb = ffi.cast("WNDENUMPROC", function(hwnd, l)
+  count = count + 1
+end)
+
+-- Pass it to a C function.
+ffi.C.EnumWindows(cb, 0)
+-- EnumWindows doesn't need the callback after it returns, so free it.
+
+cb:free()
+-- The callback function pointer is no longer valid and its resources
+-- will be reclaimed. The created Lua closure will be garbage collected.
+</pre>
+
+<h3 id="callback_performance">Callback performance</h3>
+<p>
+<b>Callbacks are slow!</b> First, the C&nbsp;to Lua transition itself
+has an unavoidable cost, similar to a <tt>lua_call()</tt> or
+<tt>lua_pcall()</tt>. Argument and result marshalling add to that cost.
+And finally, neither the C&nbsp;compiler nor LuaJIT can inline or
+optimize across the language barrier and hoist repeated computations out
+of a callback function.
+</p>
+<p>
+Do not use callbacks for performance-sensitive work: e.g. consider a
+numerical integration routine which takes a user-defined function to
+integrate over. It's a bad idea to call a user-defined Lua function from
+C&nbsp;code millions of times. The callback overhead will be absolutely
+detrimental for performance.
+</p>
+<p>
+It's considerably faster to write the numerical integration routine
+itself in Lua &mdash; the JIT compiler will be able to inline the
+user-defined function and optimize it together with its calling context,
+with very competitive performance.
+</p>
+<p>
+As a general guideline: <b>use callbacks only when you must</b>, because
+of existing C&nbsp;APIs. E.g. callback performance is irrelevant for a
+GUI application, which waits for user input most of the time, anyway.
+</p>
+<p>
+For new designs <b>avoid push-style APIs</b> (C&nbsp;function repeatedly
+calling a callback for each result). Instead <b>use pull-style APIs</b>
+(call a C&nbsp;function repeatedly to get a new result). Calls from Lua
+to C via the FFI are much faster than the other way round. Most well
+designed libraries already use pull-style APIs (read/write, get/put).
+</p>
+
 <h2 id="clib">C Library Namespaces</h2>
 <p>
 A C&nbsp;library namespace is a special kind of object which allows
@@ -1002,7 +1125,6 @@ Other missing features:
 <ul>
 <li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
-<li>Callbacks from C&nbsp;code to Lua functions.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
 does not extend to C&nbsp;functions called via the FFI, if the call is

+ 2 - 2
src/Makefile

@@ -377,8 +377,8 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
 	  lj_asm.o lj_trace.o lj_gdbjit.o \
-	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_carith.o lj_clib.o \
-	  lj_cparse.o \
+	  lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
+	  lj_carith.o lj_clib.o lj_cparse.o \
 	  lj_lib.o lj_alloc.o lib_aux.o \
 	  $(LJLIB_O) lib_init.o
 

+ 24 - 19
src/Makefile.dep

@@ -1,6 +1,6 @@
 buildvm.o: buildvm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
  lj_obj.h lj_gc.h lj_bc.h lj_ir.h lj_ircall.h lj_jit.h lj_frame.h \
- lj_dispatch.h lj_ccall.h lj_ctype.h luajit.h \
+ lj_dispatch.h lj_ctype.h lj_ccall.h luajit.h \
  lj_traceerr.h
 buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
  lj_bc.h
@@ -23,7 +23,7 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
@@ -69,11 +69,16 @@ lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
  lj_cdata.h lj_carith.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cconv.h lj_cdata.h \
- lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
+ lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h
+lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
+ lj_bc.h lj_ctype.h lj_cconv.h lj_ccall.h lj_ccallback.h lj_target.h \
+ lj_target_*.h lj_vm.h
 lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h
+ lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
+ lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
  lj_cdata.h
@@ -86,11 +91,11 @@ lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_bc.h lj_vm.h lj_char.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h lj_jit.h \
- lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_record.h lj_ffrecord.h lj_crecord.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
  lj_bc.h lj_jit.h lj_ir.h
@@ -188,15 +193,15 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_debug.c \
  lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c luajit.h lj_vmevent.c \
  lj_vmevent.h lj_vmmath.c lj_api.c lj_bcdump.h lj_parse.h lj_lex.c \
- lualib.h lj_parse.c lj_bcread.c lj_bcwrite.c lj_ctype.c lj_cdata.c \
- lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_carith.c lj_carith.h \
- lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c \
- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
- lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_*.h lj_record.c \
- lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
- lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
- lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
- lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
- lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
+ lualib.h lj_parse.c lj_bcread.c lj_bcwrite.c lj_ctype.c lj_ccallback.h \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
+ lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
+ lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
+ lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_mcode.h lj_snap.c \
+ lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \
+ lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \
+ lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \
+ lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \
+ lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h

+ 1 - 0
src/buildvm.c

@@ -23,6 +23,7 @@
 #include "lj_frame.h"
 #include "lj_dispatch.h"
 #if LJ_HASFFI
+#include "lj_ctype.h"
 #include "lj_ccall.h"
 #endif
 #include "luajit.h"

File diff suppressed because it is too large
+ 491 - 480
src/buildvm_x64.h


File diff suppressed because it is too large
+ 746 - 737
src/buildvm_x64win.h


+ 125 - 11
src/buildvm_x86.dasc

@@ -114,10 +114,13 @@
 |.if not X64		// x86 stack layout.
 |
 |.define CFRAME_SPACE,	aword*7			// Delta for esp (see <--).
-|.macro saveregs
-|  push ebp; push edi; push esi; push ebx
+|.macro saveregs_
+|  push edi; push esi; push ebx
 |  sub esp, CFRAME_SPACE
 |.endmacro
+|.macro saveregs
+|  push ebp; saveregs_
+|.endmacro
 |.macro restoreregs
 |  add esp, CFRAME_SPACE
 |  pop ebx; pop esi; pop edi; pop ebp
@@ -166,10 +169,13 @@
 |.elif X64WIN		// x64/Windows stack layout
 |
 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
-|.macro saveregs
-|  push rbp; push rdi; push rsi; push rbx
+|.macro saveregs_
+|  push rdi; push rsi; push rbx
 |  sub rsp, CFRAME_SPACE
 |.endmacro
+|.macro saveregs
+|  push rbp; saveregs_
+|.endmacro
 |.macro restoreregs
 |  add rsp, CFRAME_SPACE
 |  pop rbx; pop rsi; pop rdi; pop rbp
@@ -206,10 +212,13 @@
 |.else			// x64/POSIX stack layout
 |
 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
-|.macro saveregs
-|  push rbp; push rbx; push r15; push r14
+|.macro saveregs_
+|  push rbx; push r15; push r14
 |  sub rsp, CFRAME_SPACE
 |.endmacro
+|.macro saveregs
+|  push rbp; saveregs_
+|.endmacro
 |.macro restoreregs
 |  add rsp, CFRAME_SPACE
 |  pop r14; pop r15; pop rbx; pop rbp
@@ -760,14 +769,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov PC, [RB-12]			// Restore PC from [cont|PC].
   |.if X64
   |  movsxd RAa, dword [RB-16]		// May be negative on WIN64 with debug.
-  |  test RA, RA
-  |  jz >1
+#if LJ_HASFFI
+  |  cmp RA, 1
+  |  jbe >1
+#endif
   |  lea KBASEa, qword [=>0]
   |  add RAa, KBASEa
   |.else
   |  mov RA, dword [RB-16]
-  |  test RA, RA
-  |  jz >1
+#if LJ_HASFFI
+  |  cmp RA, 1
+  |  jbe >1
+#endif
   |.endif
   |  mov LFUNC:KBASE, [BASE-8]
   |  mov KBASE, LFUNC:KBASE->pc
@@ -775,11 +788,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  // BASE = base, RC = result, RB = meta base
   |  jmp RAa				// Jump to continuation.
   |
-  |1:  // Tail call from C function.
+#if LJ_HASFFI
+  |1:
+  |  je ->cont_ffi_callback		// cont = 1: return from FFI callback.
+  |  // cont = 0: Tail call from C function.
   |  sub RB, BASE
   |  shr RB, 3
   |  lea RD, [RB-1]
   |  jmp ->vm_call_tail
+#endif
   |
   |->cont_cat:				// BASE = base, RC = result, RB = mbase
   |  movzx RA, PC_RB
@@ -3699,6 +3716,103 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions. Callback slot number in ah/al.
+  |->vm_ffi_callback:
+#if LJ_HASFFI
+  |.type CTSTATE, CTState, PC
+  |.if not X64
+  |  sub esp, 16			// Leave room for SAVE_ERRF etc.
+  |.endif
+  |  saveregs_	// ebp/rbp already saved. ebp now holds global_State *.
+  |  lea DISPATCH, [ebp+GG_G2DISP]
+  |  mov CTSTATE, GL:ebp->ctype_state
+  |  movzx eax, ax
+  |  mov CTSTATE->cb.slot, eax
+  |.if X64
+  |  mov CTSTATE->cb.gpr[0], CARG1
+  |  mov CTSTATE->cb.gpr[1], CARG2
+  |  mov CTSTATE->cb.gpr[2], CARG3
+  |  mov CTSTATE->cb.gpr[3], CARG4
+  |  movsd qword CTSTATE->cb.fpr[0], xmm0
+  |  movsd qword CTSTATE->cb.fpr[1], xmm1
+  |  movsd qword CTSTATE->cb.fpr[2], xmm2
+  |  movsd qword CTSTATE->cb.fpr[3], xmm3
+  |.if X64WIN
+  |  lea rax, [rsp+CFRAME_SIZE+4*8]
+  |.else
+  |  lea rax, [rsp+CFRAME_SIZE]
+  |  mov CTSTATE->cb.gpr[4], CARG5
+  |  mov CTSTATE->cb.gpr[5], CARG6
+  |  movsd qword CTSTATE->cb.fpr[4], xmm4
+  |  movsd qword CTSTATE->cb.fpr[5], xmm5
+  |  movsd qword CTSTATE->cb.fpr[6], xmm6
+  |  movsd qword CTSTATE->cb.fpr[7], xmm7
+  |.endif
+  |  mov CTSTATE->cb.stack, rax
+  |  mov CARG2, rsp
+  |.else
+  |  lea eax, [esp+CFRAME_SIZE+16]
+  |  mov CTSTATE->cb.gpr[0], FCARG1
+  |  mov CTSTATE->cb.gpr[1], FCARG2
+  |  mov CTSTATE->cb.stack, eax
+  |  mov FCARG1, [esp+CFRAME_SIZE+12]	// Move around misplaced retaddr/ebp.
+  |  mov FCARG2, [esp+CFRAME_SIZE+8]
+  |  mov SAVE_RET, FCARG1
+  |  mov SAVE_R4, FCARG2
+  |  mov FCARG2, esp
+  |.endif
+  |  mov SAVE_PC, CTSTATE		// Any value outside of bytecode is ok.
+  |  mov FCARG1, CTSTATE
+  |  call extern lj_ccallback_enter@8	// (CTState *cts, void *cf)
+  |  // lua_State * returned in eax (RD).
+  |  set_vmstate INTERP
+  |  mov BASE, L:RD->base
+  |  mov RD, L:RD->top
+  |  sub RD, BASE
+  |  mov LFUNC:RB, [BASE-8]
+  |  shr RD, 3
+  |  add RD, 1
+  |  ins_callt
+#endif
+  |
+  |->cont_ffi_callback:			// Return from FFI callback.
+#if LJ_HASFFI
+  |  mov L:RA, SAVE_L
+  |  mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
+  |  mov aword CTSTATE->L, L:RAa
+  |  mov L:RA->base, BASE
+  |  mov L:RA->top, RB
+  |  mov FCARG1, CTSTATE
+  |  mov FCARG2, RC
+  |  call extern lj_ccallback_leave@8	// (CTState *cts, TValue *o)
+  |.if X64
+  |  mov rax, CTSTATE->cb.gpr[0]
+  |  movsd xmm0, qword CTSTATE->cb.fpr[0]
+  |  jmp ->vm_leave_unw
+  |.else
+  |  mov L:RB, SAVE_L
+  |  mov eax, CTSTATE->cb.gpr[0]
+  |  mov edx, CTSTATE->cb.gpr[1]
+  |  cmp dword CTSTATE->cb.gpr[2], 1
+  |  jb >7
+  |  je >6
+  |  fld qword CTSTATE->cb.fpr[0].d
+  |  jmp >7
+  |6:
+  |  fld dword CTSTATE->cb.fpr[0].f
+  |7:
+  |  mov ecx, L:RB->top
+  |  movzx ecx, word [ecx+6]		// Get stack adjustment and copy up.
+  |  mov SAVE_L, ecx			// Must be one slot above SAVE_RET
+  |  restoreregs
+  |  pop ecx				// Move return addr from SAVE_RET.
+  |  add esp, [esp]			// Adjust stack.
+  |  add esp, 16
+  |  push ecx
+  |  ret
+  |.endif
+#endif
   |
   |->vm_ffi_call@4:			// Call C function via FFI.
   |  // Caveat: needs special frame unwinding, see below.

File diff suppressed because it is too large
+ 798 - 787
src/buildvm_x86.h


+ 54 - 5
src/lib_ffi.c

@@ -27,6 +27,7 @@
 #include "lj_cconv.h"
 #include "lj_carith.h"
 #include "lj_ccall.h"
+#include "lj_ccallback.h"
 #include "lj_clib.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
@@ -384,6 +385,50 @@ LJLIB_CF(ffi_clib___gc)
 
 #include "lj_libdef.h"
 
+/* -- Callback function metamethods --------------------------------------- */
+
+#define LJLIB_MODULE_ffi_callback
+
+static int ffi_callback_set(lua_State *L, GCfunc *fn)
+{
+  GCcdata *cd = ffi_checkcdata(L, 1);
+  CTState *cts = ctype_cts(L);
+  CType *ct = ctype_raw(cts, cd->typeid);
+  if (ctype_isptr(ct->info) && (LJ_32 || ct->size == 8)) {
+    MSize slot = lj_ccallback_ptr2slot(cts, *(void **)cdataptr(cd));
+    if (slot < cts->cb.sizeid && cts->cb.cbid[slot] != 0) {
+      GCtab *t = cts->miscmap;
+      TValue *tv = lj_tab_setint(L, t, (int32_t)slot);
+      if (fn) {
+	setfuncV(L, tv, fn);
+	lj_gc_anybarriert(L, t);
+      } else {
+	setnilV(tv);
+	cts->cb.cbid[slot] = 0;
+	cts->cb.topid = slot < cts->cb.topid ? slot : cts->cb.topid;
+      }
+      return 0;
+    }
+  }
+  lj_err_caller(L, LJ_ERR_FFI_BADCBACK);
+  return 0;
+}
+
+LJLIB_CF(ffi_callback_free)
+{
+  return ffi_callback_set(L, NULL);
+}
+
+LJLIB_CF(ffi_callback_set)
+{
+  GCfunc *fn = lj_lib_checkfunc(L, 2);
+  return ffi_callback_set(L, fn);
+}
+
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+#include "lj_libdef.h"
+
 /* -- FFI library functions ----------------------------------------------- */
 
 #define LJLIB_MODULE_ffi
@@ -428,7 +473,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
   if (ctype_isstruct(ct->info)) {
     /* Handle ctype __gc metamethod. Use the fast lookup here. */
-    cTValue *tv = lj_tab_getint(cts->metatype, (int32_t)id);
+    cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
     if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
       GCtab *t = cts->finalizer;
       if (gcref(t->metatable)) {
@@ -650,21 +695,21 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 
 #undef H_
 
-LJLIB_PUSH(top-8) LJLIB_SET(!)  /* Store reference to metatype table. */
+LJLIB_PUSH(top-8) LJLIB_SET(!)  /* Store reference to miscmap table. */
 
 LJLIB_CF(ffi_metatype)
 {
   CTState *cts = ctype_cts(L);
   CTypeID id = ffi_checkctype(L, cts);
   GCtab *mt = lj_lib_checktab(L, 2);
-  GCtab *t = cts->metatype;
+  GCtab *t = cts->miscmap;
   CType *ct = ctype_get(cts, id);  /* Only allow raw types. */
   TValue *tv;
   GCcdata *cd;
   if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) ||
 	ctype_isvector(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  tv = lj_tab_setint(L, t, (int32_t)id);
+  tv = lj_tab_setinth(L, t, -(int32_t)id);
   if (!tvisnil(tv))
     lj_err_caller(L, LJ_ERR_PROTMT);
   settabV(L, tv, mt);
@@ -745,12 +790,16 @@ static void ffi_register_module(lua_State *L)
 LUALIB_API int luaopen_ffi(lua_State *L)
 {
   CTState *cts = lj_ctype_init(L);
-  settabV(L, L->top++, (cts->metatype = lj_tab_new(L, 0, 0)));
+  settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
   cts->finalizer = ffi_finalizer(L);
   LJ_LIB_REG(L, NULL, ffi_meta);
   /* NOBARRIER: basemt is a GC root. */
   setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));
   LJ_LIB_REG(L, NULL, ffi_clib);
+  LJ_LIB_REG(L, NULL, ffi_callback);
+  /* NOBARRIER: the key is new and lj_tab_newkey() handles the barrier. */
+  settabV(L, lj_tab_setstr(L, cts->miscmap, &cts->g->strempty), tabV(L->top-1));
+  L->top--;
   lj_clib_default(L, tabV(L->top-1));  /* Create ffi.C default namespace. */
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);

+ 8 - 1
src/lj_ccall.c

@@ -10,6 +10,7 @@
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_str.h"
+#include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
@@ -290,7 +291,7 @@
   }
 
 #else
-#error "missing calling convention definitions for this architecture"
+#error "Missing calling convention definitions for this architecture"
 #endif
 
 #ifndef CCALL_HANDLE_STRUCTRET2
@@ -649,7 +650,13 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
     int gcsteps, ret;
     cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz);
     gcsteps = ccall_set_args(L, cts, ct, &cc);
+    cts->cb.slot = ~0u;
     lj_vm_ffi_call(&cc);
+    if (cts->cb.slot != ~0u) {  /* Blacklist function that called a callback. */
+      TValue tv;
+      setlightudV(&tv, (void *)cc.func);
+      setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
+    }
     gcsteps += ccall_get_results(L, cts, ct, &cc, &ret);
 #if LJ_TARGET_X86 && LJ_ABI_WIN
     /* Automatically detect __stdcall and fix up C function declaration. */

+ 5 - 1
src/lj_ccall.h

@@ -81,7 +81,7 @@ typedef double FPRArg;
 typedef intptr_t GPRArg;
 
 #else
-#error "missing calling convention definitions for this architecture"
+#error "Missing calling convention definitions for this architecture"
 #endif
 
 #ifndef CCALL_SPS_EXTRA
@@ -99,6 +99,10 @@ typedef intptr_t GPRArg;
 #define CCALL_NUM_FPR \
   (CCALL_NARG_FPR > CCALL_NRET_FPR ? CCALL_NARG_FPR : CCALL_NRET_FPR)
 
+/* Check against constants in lj_ctype.h. */
+LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
+LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
+
 #define CCALL_MAXSTACK		32
 
 /* -- C call state -------------------------------------------------------- */

+ 461 - 0
src/lj_ccallback.c

@@ -0,0 +1,461 @@
+/*
+** FFI C callback handling.
+** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "lj_obj.h"
+
+#if LJ_HASFFI
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_tab.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_ctype.h"
+#include "lj_cconv.h"
+#include "lj_ccall.h"
+#include "lj_ccallback.h"
+#include "lj_target.h"
+#include "lj_vm.h"
+
+/* -- Target-specific handling of callback slots -------------------------- */
+
+#define CALLBACK_MCODE_SIZE	(LJ_PAGESIZE * LJ_NUM_CBPAGE)
+
+#if LJ_TARGET_X86ORX64
+
+#define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
+#define CALLBACK_MCODE_GROUP	(-2+1+2+5+(LJ_64 ? 6 : 5))
+
+#define CALLBACK_SLOT2OFS(slot) \
+  (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
+
+static MSize CALLBACK_OFS2SLOT(MSize ofs)
+{
+  MSize group;
+  ofs -= CALLBACK_MCODE_HEAD;
+  group = ofs / (32*4 + CALLBACK_MCODE_GROUP);
+  return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32;
+}
+
+#define CALLBACK_MAX_SLOT \
+  (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32)
+
+#else
+
+/* Missing support for this architecture. */
+#define CALLBACK_SLOT2OFS(slot)	(0*(slot))
+#define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
+#define CALLBACK_MAX_SLOT	0
+
+#endif
+
+/* Convert callback slot number to callback function pointer. */
+static void *callback_slot2ptr(CTState *cts, MSize slot)
+{
+  return (uint8_t *)cts->cb.mcode + CALLBACK_SLOT2OFS(slot);
+}
+
+/* Convert callback function pointer to slot number. */
+MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
+{
+  uintptr_t ofs = (uintptr_t)((uint8_t *)p -(uint8_t *)cts->cb.mcode);
+  if (ofs < CALLBACK_MCODE_SIZE) {
+    MSize slot = CALLBACK_OFS2SLOT((MSize)ofs);
+    if (CALLBACK_SLOT2OFS(slot) == (MSize)ofs)
+      return slot;
+  }
+  return ~0u;  /* Not a known callback function pointer. */
+}
+
+#if LJ_TARGET_X86ORX64
+/* Initialize machine code for callback function pointers. */
+static void callback_mcode_init(global_State *g, uint8_t *page)
+{
+  uint8_t *p = page;
+  uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
+  MSize slot;
+#if LJ_64
+  *(void **)p = target; p += 8;
+#endif
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    /* mov al, slot; jmp group */
+    *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot;
+    if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) {
+      /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
+      *p++ = XI_PUSH + RID_EBP;
+      *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
+      *p++ = XI_MOVri | RID_EBP;
+      *(int32_t *)p = i32ptr(g); p += 4;
+#if LJ_64
+      /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
+      *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
+      *(int32_t *)p = (int32_t)(page-(p+4)); p += 4;
+#else
+      /* jmp lj_vm_ffi_callback. */
+      *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4;
+#endif
+    } else {
+      *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
+    }
+  }
+  lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+}
+#else
+/* Missing support for this architecture. */
+#define callback_mcode_init(g, p)	UNUSED(p)
+#endif
+
+/* -- Machine code management --------------------------------------------- */
+
+#if LJ_TARGET_WINDOWS
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#elif LJ_TARGET_POSIX
+
+#include <sys/mman.h>
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS   MAP_ANON
+#endif
+
+#endif
+
+/* Allocate and initialize area for callback function pointers. */
+static void callback_mcode_new(CTState *cts)
+{
+  size_t sz = (size_t)CALLBACK_MCODE_SIZE;
+  void *p;
+  if (CALLBACK_MAX_SLOT == 0)
+    lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+#if LJ_TARGET_WINDOWS
+  p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  if (!p)
+    lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+#elif LJ_TARGET_POSIX
+  p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS,
+	   -1, 0);
+  if (p == MAP_FAILED)
+    lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+#else
+  /* Fallback allocator. Fails if memory is not executable by default. */
+  p = lj_mem_new(cts->L, sz);
+#endif
+  cts->cb.mcode = p;
+  callback_mcode_init(cts->g, p);
+#if LJ_TARGET_WINDOWS
+  {
+    DWORD oprot;
+    VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
+  }
+#elif LJ_TARGET_POSIX
+  mprotect(p, sz, (PROT_READ|PROT_EXEC));
+#endif
+}
+
+/* Free area for callback function pointers. */
+void lj_ccallback_mcode_free(CTState *cts)
+{
+  size_t sz = (size_t)CALLBACK_MCODE_SIZE;
+  void *p = cts->cb.mcode;
+  if (p == NULL) return;
+#if LJ_TARGET_WINDOWS
+  VirtualFree(p, 0, MEM_RELEASE);
+  UNUSED(sz);
+#elif LJ_TARGET_POSIX
+  munmap(p, sz);
+#else
+  lj_mem_free(cts->g, p, sz);
+#endif
+}
+
+/* -- C callback entry ---------------------------------------------------- */
+
+/* Target-specific handling of register arguments. Similar to lj_ccall.c. */
+#if LJ_TARGET_X86
+
+#define CALLBACK_HANDLE_REGARG \
+  if (!isfp) {  /* Only non-FP values may be passed in registers. */ \
+    if (n > 1) {  /* Anything > 32 bit is passed on the stack. */ \
+      if (!LJ_ABI_WIN) ngpr = maxgpr;  /* Prevent reordering. */ \
+    } else if (ngpr + 1 <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#elif LJ_TARGET_X64 && LJ_ABI_WIN
+
+/* Windows/x64 argument registers are strictly positional (use ngpr). */
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (ngpr < 4) { sp = &cts->cb.fpr[ngpr++]; nfpr = ngpr; goto done; } \
+  } else { \
+    if (ngpr < 4) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
+  }
+
+#elif LJ_TARGET_X64
+
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
+      goto done; \
+    } \
+  } else { \
+    if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#elif LJ_TARGET_ARM
+
+#define CALLBACK_HANDLE_REGARG \
+  UNUSED(ngpr); UNUSED(maxgpr); goto done;  /* NYI */
+
+#elif LJ_TARGET_PPC
+
+#define CALLBACK_HANDLE_REGARG \
+  UNUSED(ngpr); UNUSED(nfpr); UNUSED(maxgpr); goto done;  /* NYI */
+#define CALLBACK_HANDLE_RET		/* NYI */
+
+#else
+#error "Missing calling convention definitions for this architecture"
+#endif
+
+/* Convert and push callback arguments to Lua stack. */
+static void callback_conv_args(CTState *cts, lua_State *L)
+{
+  TValue *o = L->top;
+  intptr_t *stack = cts->cb.stack;
+  MSize slot = cts->cb.slot;
+  CTypeID id = 0, rid, fid;
+  CType *ct;
+  GCfunc *fn;
+  MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
+#if CCALL_NARG_FPR
+  MSize nfpr = 0;
+#endif
+
+  if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
+    ct = ctype_get(cts, id);
+    rid = ctype_cid(ct->info);
+    fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+  } else {  /* Must set up frame first, before throwing the error. */
+    ct = NULL;
+    rid = 0;
+    fn = (GCfunc *)L;
+  }
+  o->u32.lo = LJ_CONT_FFI_CALLBACK;  /* Continuation returns from callback. */
+  o->u32.hi = rid;  /* Return type. x86: +(spadj<<16). */
+  o++;
+  setframe_gc(o, obj2gco(fn));
+  setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+  L->top = L->base = ++o;
+  if (!ct)
+    lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
+  if (isluafunc(fn))
+    setcframe_pc(L->cframe, proto_bc(funcproto(fn))+1);
+  lj_state_checkstack(L, LUA_MINSTACK);  /* May throw. */
+  o = L->base;  /* Might have been reallocated. */
+
+#if LJ_TARGET_X86
+  /* x86 has several different calling conventions. */
+  switch (ctype_cconv(ct->info)) {
+  case CTCC_FASTCALL: maxgpr = 2; break;
+  case CTCC_THISCALL: maxgpr = 1; break;
+  default: maxgpr = 0; break;
+  }
+#endif
+
+  fid = ct->sib;
+  while (fid) {
+    CType *ctf = ctype_get(cts, fid);
+    if (!ctype_isattrib(ctf->info)) {
+      CType *cta;
+      void *sp;
+      CTSize sz;
+      int isfp;
+      MSize n;
+      lua_assert(ctype_isfield(ctf->info));
+      cta = ctype_rawchild(cts, ctf);
+      if (ctype_isenum(cta->info)) cta = ctype_child(cts, cta);
+      isfp = ctype_isfp(cta->info);
+      sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+      n = sz / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
+
+      CALLBACK_HANDLE_REGARG  /* Handle register arguments. */
+
+      /* Otherwise pass argument on stack. */
+      if (CCALL_ALIGN_STACKARG && LJ_32 && sz == 8)
+	nsp = (nsp + 1) & ~1u;  /* Align 64 bit argument on stack. */
+      sp = &stack[nsp];
+      nsp += n;
+
+    done:
+      if (LJ_BE && cta->size < CTSIZE_PTR)
+	sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
+      lj_cconv_tv_ct(cts, cta, 0, o++, sp);
+    }
+    fid = ctf->sib;
+  }
+  L->top = o;
+#if LJ_TARGET_X86
+  /* Store stack adjustment for returns from fastcall/stdcall callbacks. */
+  switch (ctype_cconv(ct->info)) {
+  case CTCC_FASTCALL: case CTCC_STDCALL:
+    (L->base-2)->u32.hi |= (nsp << (16+2));
+    break;
+  }
+#endif
+}
+
+/* Convert Lua object to callback result. */
+static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
+{
+  CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#if LJ_TARGET_X86
+  cts->cb.gpr[2] = 0;
+#endif
+  if (!ctype_isvoid(ctr->info)) {
+    uint8_t *dp = (uint8_t *)&cts->cb.gpr[0];
+#ifdef CALLBACK_HANDLE_RET
+    CALLBACK_HANDLE_RET
+#endif
+#if CCALL_NUM_FPR
+    if (ctype_isfp(ctr->info))
+      dp = (uint8_t *)&cts->cb.fpr[0];
+#endif
+    lj_cconv_ct_tv(cts, ctr, dp, o, 0);
+    /* Extend returned integers to (at least) 32 bits. */
+    if (ctype_isinteger_or_bool(ctr->info) && ctr->size < 4) {
+      if (ctr->info & CTF_UNSIGNED)
+	*(uint32_t *)dp = ctr->size == 1 ? (uint32_t)*(uint8_t *)dp :
+					   (uint32_t)*(uint16_t *)dp;
+      else
+	*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
+					  (int32_t)*(int16_t *)dp;
+    }
+#if LJ_TARGET_X86
+    if (ctype_isfp(ctr->info))
+      cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
+#endif
+  }
+}
+
+/* Enter callback. */
+lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
+{
+  lua_State *L = cts->L;
+  lua_assert(L != NULL);
+  if (gcref(cts->g->jit_L))
+    lj_err_caller(gco2th(gcref(cts->g->jit_L)), LJ_ERR_FFI_BADCBACK);
+  /* Setup C frame. */
+  cframe_prev(cf) = L->cframe;
+  setcframe_L(cf, L);
+  cframe_errfunc(cf) = -1;
+  cframe_nres(cf) = 0;
+  L->cframe = cf;
+  callback_conv_args(cts, L);
+  return L;  /* Now call the function on this stack. */
+}
+
+/* Leave callback. */
+void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
+{
+  lua_State *L = cts->L;
+  GCfunc *fn;
+  TValue *obase = L->base;
+  L->base = L->top;  /* Keep continuation frame for throwing errors. */
+  /* PC of RET* is lost. Point to last line for result conv. errors. */
+  fn = curr_func(L);
+  if (isluafunc(fn)) {
+    GCproto *pt = funcproto(fn);
+    setcframe_pc(L->cframe, proto_bc(pt)+pt->sizebc);
+  }
+  callback_conv_result(cts, L, o);
+  /* Finally drop C frame and continuation frame. */
+  L->cframe = cframe_prev(L->cframe);
+  L->top -= 2;
+  L->base = obase;
+}
+
+/* -- C callback management ----------------------------------------------- */
+
+/* Get an unused slot in the callback slot table. */
+static MSize callback_slot_new(CTState *cts, CType *ct)
+{
+  CTypeID id = ctype_typeid(cts, ct);
+  CTypeID1 *cbid = cts->cb.cbid;
+  MSize top;
+  for (top = cts->cb.topid; top < cts->cb.sizeid; top++)
+    if (LJ_LIKELY(cbid[top] == 0))
+      goto found;
+#if CALLBACK_MAX_SLOT
+  if (top >= CALLBACK_MAX_SLOT)
+#endif
+    lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+  if (!cts->cb.mcode)
+    callback_mcode_new(cts);
+  lj_mem_growvec(cts->L, cbid, cts->cb.sizeid, CALLBACK_MAX_SLOT, CTypeID1);
+  cts->cb.cbid = cbid;
+  memset(cbid+top, 0, (cts->cb.sizeid-top)*sizeof(CTypeID1));
+found:
+  cbid[top] = id;
+  cts->cb.topid = top+1;
+  return top;
+}
+
+/* Check for function pointer and supported argument/result types. */
+static CType *callback_checkfunc(CTState *cts, CType *ct)
+{
+  int narg = 0;
+  if (!ctype_isptr(ct->info) || (LJ_64 && ct->size != CTSIZE_PTR))
+    return NULL;
+  ct = ctype_rawchild(cts, ct);
+  if (ctype_isfunc(ct->info)) {
+    CType *ctr = ctype_rawchild(cts, ct);
+    CTypeID fid = ct->sib;
+    if (!(ctype_isvoid(ctr->info) || ctype_isenum(ctr->info) ||
+	  ctype_isptr(ctr->info) || (ctype_isnum(ctr->info) && ctr->size <= 8)))
+      return NULL;
+    if ((ct->info & CTF_VARARG))
+      return NULL;
+    while (fid) {
+      CType *ctf = ctype_get(cts, fid);
+      if (!ctype_isattrib(ctf->info)) {
+	CType *cta;
+	lua_assert(ctype_isfield(ctf->info));
+	cta = ctype_rawchild(cts, ctf);
+	if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
+	      (ctype_isnum(cta->info) && cta->size <= 8)) ||
+	    ++narg >= LUA_MINSTACK-3)
+	  return NULL;
+      }
+      fid = ctf->sib;
+    }
+    return ct;
+  }
+  return NULL;
+}
+
+/* Create a new callback and return the callback function pointer. */
+void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn)
+{
+  ct = callback_checkfunc(cts, ct);
+  if (ct) {
+    MSize slot = callback_slot_new(cts, ct);
+    GCtab *t = cts->miscmap;
+    setfuncV(cts->L, lj_tab_setint(cts->L, t, (int32_t)slot), fn);
+    return callback_slot2ptr(cts, slot);
+  }
+  return NULL;  /* Bad conversion. */
+}
+
+#endif

+ 25 - 0
src/lj_ccallback.h

@@ -0,0 +1,25 @@
+/*
+** FFI C callback handling.
+** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_CCALLBACK_H
+#define _LJ_CCALLBACK_H
+
+#include "lj_obj.h"
+#include "lj_ctype.h"
+
+#if LJ_HASFFI
+
+/* Really belongs to lj_vm.h. */
+LJ_ASMF void lj_vm_ffi_callback(void);
+
+LJ_FUNC MSize lj_ccallback_ptr2slot(CTState *cts, void *p);
+LJ_FUNCA lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf);
+LJ_FUNCA void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o);
+LJ_FUNC void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn);
+LJ_FUNC void lj_ccallback_mcode_free(CTState *cts);
+
+#endif
+
+#endif

+ 8 - 0
src/lj_cconv.c

@@ -12,6 +12,7 @@
 #include "lj_ctype.h"
 #include "lj_cdata.h"
 #include "lj_cconv.h"
+#include "lj_ccallback.h"
 
 /* -- Conversion errors --------------------------------------------------- */
 
@@ -603,6 +604,13 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
     tmpptr = uddata(udataV(o));
   } else if (tvislightud(o)) {
     tmpptr = lightudV(o);
+  } else if (tvisfunc(o)) {
+    void *p = lj_ccallback_new(cts, d, funcV(o));
+    if (p) {
+      *(void **)dp = p;
+      return;
+    }
+    goto err_conv;
   } else {
   err_conv:
     cconv_err_convtv(cts, d, o, flags);

+ 8 - 1
src/lj_crecord.c

@@ -15,6 +15,7 @@
 #include "lj_tab.h"
 #include "lj_frame.h"
 #include "lj_ctype.h"
+#include "lj_cdata.h"
 #include "lj_cparse.h"
 #include "lj_cconv.h"
 #include "lj_clib.h"
@@ -785,7 +786,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
       did = ctype_cid(ctf->info);
     } else {
       if (!(ct->info & CTF_VARARG))
-        lj_trace_err(J, LJ_TRERR_NYICALL);  /* Too many arguments. */
+	lj_trace_err(J, LJ_TRERR_NYICALL);  /* Too many arguments. */
       did = lj_ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
     }
     d = ctype_raw(cts, did);
@@ -853,6 +854,12 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
     CType *ctr = ctype_rawchild(cts, ct);
     IRType t = crec_ct2irt(ctr);
     TRef tr;
+    TValue tv;
+    /* Check for blacklisted C functions that might call a callback. */
+    setlightudV(&tv,
+		cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4));
+    if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
+      lj_trace_err(J, LJ_TRERR_BLACKL);
     if (ctype_isvoid(ctr->info)) {
       t = IRT_NIL;
       rd->nres = 0;

+ 8 - 1
src/lj_ctype.c

@@ -12,6 +12,7 @@
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
+#include "lj_ccallback.h"
 
 /* -- C type definitions -------------------------------------------------- */
 
@@ -315,7 +316,11 @@ cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm)
     id = ctype_cid(ct->info);
     ct = ctype_get(cts, id);
   }
-  tv = lj_tab_getint(cts->metatype, (int32_t)id);
+  if (ctype_isptr(ct->info) &&
+      ctype_isfunc(ctype_get(cts, ctype_cid(ct->info))->info))
+    tv = lj_tab_getstr(cts->miscmap, &cts->g->strempty);
+  else
+    tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
   if (tv && tvistab(tv) &&
       (tv = lj_tab_getstr(tabV(tv), mmname_str(cts->g, mm))) && !tvisnil(tv))
     return tv;
@@ -592,7 +597,9 @@ void lj_ctype_freestate(global_State *g)
 {
   CTState *cts = ctype_ctsG(g);
   if (cts) {
+    lj_ccallback_mcode_free(cts);
     lj_mem_freevec(g, cts->tab, cts->sizetab, CType);
+    lj_mem_freevec(g, cts->cb.cbid, cts->cb.sizeid, CTypeID1);
     lj_mem_freet(g, cts);
   }
 }

+ 21 - 1
src/lj_ctype.h

@@ -151,6 +151,25 @@ typedef struct CType {
 #define CTHASH_SIZE	128	/* Number of hash anchors. */
 #define CTHASH_MASK	(CTHASH_SIZE-1)
 
+/* Simplify target-specific configuration. Checked in lj_ccall.h. */
+#define CCALL_MAX_GPR		8
+#define CCALL_MAX_FPR		8
+
+typedef LJ_ALIGN(8) union FPRCBArg { double d; float f; } FPRCBArg;
+
+/* C callback state. Defined here, to avoid dragging in lj_ccall.h. */
+
+typedef LJ_ALIGN(8) struct CCallback {
+  FPRCBArg fpr[CCALL_MAX_FPR];	/* Arguments/results in FPRs. */
+  intptr_t gpr[CCALL_MAX_GPR];	/* Arguments/results in GPRs. */
+  intptr_t *stack;		/* Pointer to arguments on stack. */
+  void *mcode;			/* Machine code for callback func. pointers. */
+  CTypeID1 *cbid;		/* Callback type table. */
+  MSize sizeid;			/* Size of callback type table. */
+  MSize topid;			/* Highest unused callback type table slot. */
+  MSize slot;			/* Current callback slot. */
+} CCallback;
+
 /* C type state. */
 typedef struct CTState {
   CType *tab;		/* C type table. */
@@ -159,7 +178,8 @@ typedef struct CTState {
   lua_State *L;		/* Lua state (needed for errors and allocations). */
   global_State *g;	/* Global state. */
   GCtab *finalizer;	/* Map of cdata to finalizer. */
-  GCtab *metatype;	/* Map of CTypeID to metatable. */
+  GCtab *miscmap;	/* Map of -CTypeID to metatable and cb slot to func. */
+  CCallback cb;		/* Temporary callback state. */
   CTypeID1 hash[CTHASH_SIZE];  /* Hash anchors for C type table. */
 } CTState;
 

+ 2 - 1
src/lj_debug.c

@@ -107,6 +107,7 @@ BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc)
   const void *lineinfo = proto_lineinfo(pt);
   if (pc < pt->sizebc && lineinfo) {
     BCLine first = pt->firstline;
+    if (pc == pt->sizebc-1) return first + pt->numline;
     if (pc-- == 0) return first;
     if (pt->numline < 256)
       return first + (BCLine)((const uint8_t *)lineinfo)[pc];
@@ -124,7 +125,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
   BCPos pc = debug_framepc(L, fn, nextframe);
   if (pc != NO_BCPOS) {
     GCproto *pt = funcproto(fn);
-    lua_assert(pc < pt->sizebc);
+    lua_assert(pc <= pt->sizebc);
     return lj_debug_line(pt, pc);
   }
   return -1;

+ 2 - 0
src/lj_def.h

@@ -67,6 +67,8 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
 #define LJ_STACK_EXTRA	5		/* Extra stack space (metamethods). */
 
+#define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
+
 /* Minimum table/buffer sizes. */
 #define LJ_MIN_GLOBAL	6		/* Min. global table size (hbits). */
 #define LJ_MIN_REGISTRY	2		/* Min. registry size (hbits). */

+ 26 - 7
src/lj_err.c

@@ -113,6 +113,9 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       frame = frame_prevl(frame);
       break;
     case FRAME_C:  /* C frame. */
+#if LJ_HASFFI
+    unwind_c:
+#endif
 #if LJ_UNWIND_EXT
       if (errcode) {
 	L->cframe = cframe_prev(cf);
@@ -145,6 +148,10 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       }
       return cf;
     case FRAME_CONT:  /* Continuation frame. */
+#if LJ_HASFFI
+      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+	goto unwind_c;
+#endif
     case FRAME_VARG:  /* Vararg frame. */
       frame = frame_prevd(frame);
       break;
@@ -464,6 +471,10 @@ static ptrdiff_t finderrfunc(lua_State *L)
       cf = cframe_prev(cf);
       /* fallthrough */
     case FRAME_CONT:
+#if LJ_HASFFI
+      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+	cf = cframe_prev(cf);
+#endif
     case FRAME_VARG:
       frame = frame_prevd(frame);
       break;
@@ -591,15 +602,23 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
   if (frame_islua(frame)) {
     pframe = frame_prevl(frame);
   } else if (frame_iscont(frame)) {
-    pframe = frame_prevd(frame);
 #if LJ_HASFFI
-    /* Remove frame for FFI metamethods. */
-    if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
-	frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
-      L->base = pframe+1;
-      L->top = frame;
-    }
+    if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
+      pframe = frame;
+      frame = NULL;
+    } else
+#endif
+    {
+      pframe = frame_prevd(frame);
+#if LJ_HASFFI
+      /* Remove frame for FFI metamethods. */
+      if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+	  frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+	L->base = pframe+1;
+	L->top = frame;
+      }
 #endif
+    }
   }
   lj_debug_addloc(L, msg, pframe, frame);
   lj_err_run(L);

+ 6 - 0
src/lj_errmsg.h

@@ -160,6 +160,12 @@ ERRDEF(FFI_BADMEMBER,	LUA_QS " has no member named " LUA_QS)
 ERRDEF(FFI_BADIDX,	LUA_QS " cannot be indexed")
 ERRDEF(FFI_WRCONST,	"attempt to write to constant location")
 ERRDEF(FFI_NODECL,	"missing declaration for symbol " LUA_QS)
+ERRDEF(FFI_BADCBACK,	"bad callback")
+#if LJ_TARGET_X86ORX64
+ERRDEF(FFI_CBACKOV,	"too many callbacks")
+#else
+ERRDEF(FFI_CBACKOV,	"no support for callbacks (yet)")
+#endif
 ERRDEF(FFI_NYIPACKBIT,	"NYI: packed bit fields")
 ERRDEF(FFI_NYICALL,	"NYI: cannot call this C function (yet)")
 #endif

+ 2 - 0
src/lj_frame.h

@@ -138,6 +138,8 @@ enum {
   (&gcref(*(GCRef *)(((char *)(cf))+CFRAME_OFS_L))->th)
 #define cframe_pc(cf) \
   (mref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), const BCIns))
+#define setcframe_L(cf, L) \
+  (setmref(*(MRef *)(((char *)(cf))+CFRAME_OFS_L), (L)))
 #define setcframe_pc(cf, pc) \
   (setmref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), (pc)))
 #define cframe_canyield(cf)	((intptr_t)(cf) & CFRAME_RESUME)

+ 1 - 1
src/lj_meta.c

@@ -77,7 +77,7 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
   TValue *top = L->top;
   const BCIns *pc = frame_pc(base-1);  /* Preserve old PC from frame. */
   copyTV(L, base-1, tv);  /* Replace frame with new object. */
-  top->u64 = 0;
+  top->u32.lo = LJ_CONT_TAILCALL;
   setframe_pc(top, pc);
   setframe_gc(top+1, obj2gco(L));  /* Dummy frame object. */
   setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT);

+ 2 - 0
src/lj_target_x86.h

@@ -192,6 +192,7 @@ typedef enum {
   XI_CALL =	0xe8,
   XI_JMP =	0xe9,
   XI_JMPs =	0xeb,
+  XI_PUSH =	0x50, /* Really 50+r. */
   XI_JCCs =	0x70, /* Really 7x. */
   XI_JCCn =	0x80, /* Really 0f8x. */
   XI_LEA =	0x8d,
@@ -203,6 +204,7 @@ typedef enum {
   XI_PUSHi8 =	0x6a,
   XI_TEST =	0x85,
   XI_MOVmi =	0xc7,
+  XI_GROUP5 =	0xff,
 
   /* Note: little-endian byte-order! */
   XI_FLDZ =	0xeed9,

+ 2 - 0
src/lj_vm.h

@@ -88,6 +88,8 @@ LJ_ASMF void lj_cont_condt(void);  /* Branch if result is true. */
 LJ_ASMF void lj_cont_condf(void);  /* Branch if result is false. */
 LJ_ASMF void lj_cont_hook(void);  /* Continue from hook yield. */
 
+enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
+
 /* Start of the ASM code. */
 LJ_ASMF char lj_vm_asm_begin[];
 

+ 1 - 0
src/ljamalg.c

@@ -52,6 +52,7 @@
 #include "lj_cdata.c"
 #include "lj_cconv.c"
 #include "lj_ccall.c"
+#include "lj_ccallback.c"
 #include "lj_carith.c"
 #include "lj_clib.c"
 #include "lj_cparse.c"

Some files were not shown because too many files changed in this diff