Browse Source

Major redesign of function call handling.

Drop call gates. Use function headers, dispatched like bytecodes.
Emit BC_FUNCF/BC_FUNCV bytecode at PC 0 for all Lua functions.
C functions and ASM fast functions get extra bytecodes.
Modify internal calling convention: new base in BASE (formerly in RA).
Can now use better C function wrapper semantics (dynamic on/off).
Prerequisite for call hooks with zero-overhead if disabled.
Prerequisite for compiling recursive calls.
Prerequisite for efficient 32/64 bit prototype guards.
Mike Pall 15 years ago
parent
commit
c93138b59e
34 changed files with 2314 additions and 2430 deletions
  1. 13 25
      doc/api.html
  2. 2 2
      src/Makefile
  3. 26 25
      src/Makefile.dep
  4. 7 5
      src/buildvm.c
  5. 22 9
      src/buildvm_lib.c
  6. 4 3
      src/buildvm_peobj.c
  7. 620 623
      src/buildvm_x64.h
  8. 650 657
      src/buildvm_x64win.h
  9. 282 416
      src/buildvm_x86.dasc
  10. 516 534
      src/buildvm_x86.h
  11. 13 5
      src/lib_base.c
  12. 1 2
      src/lib_io.c
  13. 4 4
      src/lib_jit.c
  14. 2 2
      src/lib_package.c
  15. 1 2
      src/lib_string.c
  16. 3 2
      src/lj_api.c
  17. 2 2
      src/lj_asm.c
  18. 1 6
      src/lj_bc.c
  19. 18 3
      src/lj_bc.h
  20. 4 8
      src/lj_def.h
  21. 42 20
      src/lj_dispatch.c
  22. 15 7
      src/lj_dispatch.h
  23. 7 10
      src/lj_err.c
  24. 1 2
      src/lj_func.c
  25. 1 4
      src/lj_gdbjit.c
  26. 7 4
      src/lj_lib.c
  27. 13 0
      src/lj_lib.h
  28. 5 4
      src/lj_obj.h
  29. 21 17
      src/lj_parse.c
  30. 1 7
      src/lj_record.c
  31. 6 8
      src/lj_state.c
  32. 3 5
      src/lj_trace.c
  33. 0 6
      src/lj_vm.h
  34. 1 1
      src/msvcbuild.bat

+ 13 - 25
doc/api.html

@@ -258,13 +258,12 @@ side traces from the cache.
 
 <h3 id="mode_engine"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_WRAPCFUNC|flag)</tt></h3>
 <p>
-This mode defines a wrapper function for calls to C functions. The
-first time this is called with <tt>LUAJIT_MODE_ON</tt>, the stack
-index at <tt>idx</tt> must be a <tt>lightuserdata</tt> object holding
-a pointer to the wrapper function. All <b>subsequently created C
-functions</b> are called through the wrapper functions. After the initial
-definition <tt>idx</tt> can be left at <tt>0</tt> when turning the mode
-on or off.
+This mode defines a wrapper function for calls to C functions. If
+called with <tt>LUAJIT_MODE_ON</tt>, the stack index at <tt>idx</tt>
+must be a <tt>lightuserdata</tt> object holding a pointer to the wrapper
+function. From now on all C functions are called through the wrapper
+function. If called with <tt>LUAJIT_MODE_OFF</tt> this mode is turned
+off and all C functions are directly called.
 </p>
 <p>
 The wrapper function can be used for debugging purposes or to catch
@@ -291,38 +290,27 @@ static int wrap_exceptions(lua_State *L, lua_CFunction f)
   return lua_error(L);  // Rethrow as a Lua error.
 }
 
-static int myregister(lua_State *L)
+static int myinit(lua_State *L)
 {
   ...
   // Define wrapper function and enable it.
   lua_pushlightuserdata(L, (void *)wrap_exceptions);
   luaJIT_setmode(L, -1, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_ON);
   lua_pop(L, 1);
-  luaL_register(L, "mymodule", myfuncs);  // Pass luaL_Reg list.
-  luaJIT_setmode(L, 0, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_OFF);
-  ...
-  // Wrap some more C++ functions which might throw an exception.
-  luaJIT_setmode(L, 0, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_ON);
-  lua_pushcfunction(L, mythrowingfunc1);
-  lua_pushcclosure(L, mythrowingfunc2, 1);
-  luaJIT_setmode(L, 0, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_OFF);
   ...
 }
 </pre>
 <p>
 Note that you can only define <b>a single global wrapper function</b>,
 so be careful when using this mechanism from multiple C++ modules.
-Also note that this mechanism is not without overhead. It should only
-be enabled for definitions of C++ functions that can actually throw
-exceptions. If you're embedding LuaJIT into an application, only
-enable it <b>after</b> running <tt>luaL_openlibs</tt>.
+Also note that this mechanism is not without overhead.
 </p>
 <p>
-LuaJIT already intercepts exception handling for all x64 systems and
-for x86 systems using DWARF2 stack unwinding (e.g. Linux, OSX). This
-is a zero-cost mechanism and always enabled. You don't need to use any
-wrapper functions, except when you want to get a more specific error
-message than <tt>"C++&nbsp;exception"</tt>.
+LuaJIT already intercepts exception handling for systems using DWARF2
+stack unwinding (e.g. Linux or OSX) and for Windows/x64 (but <b>not</b>
+for Windows/x86). This is a zero-cost mechanism and always enabled.
+You don't need to use any wrapper functions, except when you want to get
+a more specific error message than <tt>"C++&nbsp;exception"</tt>.
 </p>
 <br class="flush">
 </div>

+ 2 - 2
src/Makefile

@@ -412,9 +412,9 @@ $(LJVM_BOUT): $(BUILDVM_T)
 	$(E) "BUILDVM   $@"
 	$(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@
 
-lj_bcdef.h: $(BUILDVM_T)
+lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C)
 	$(E) "BUILDVM   $@"
-	$(Q)$(BUILDVM_X) -m bcdef -o $@
+	$(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C)
 
 lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C)
 	$(E) "BUILDVM   $@"

+ 26 - 25
src/Makefile.dep

@@ -14,7 +14,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
   lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
   lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
-  lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
+  lj_meta.h lj_state.h lj_bc.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h \
+  lj_ir.h lj_ctype.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
   lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
@@ -22,8 +23,8 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
   lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
-  lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_lib.h \
-  lj_libdef.h
+  lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \
+  lj_lib.h lj_libdef.h
 lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
   lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h \
   lj_ir.h lj_jit.h lj_iropt.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h \
@@ -43,7 +44,7 @@ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
-  lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+  lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
   lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
@@ -53,14 +54,15 @@ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
   lj_bcdef.h
 lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
-  lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h \
-  lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h
+  lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+  lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h \
+  luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
   lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
   lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
-  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
-  lj_vm.h
+  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
   lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
@@ -70,14 +72,14 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
   lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
-  lj_traceerr.h lj_lib.h
+  lj_bc.h lj_traceerr.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
-  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_vm.h \
-  lj_lib.h
+  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
+  lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_lib.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
-  lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h \
+  lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
   lj_traceerr.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
   lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_bc.h lj_vm.h
@@ -85,11 +87,11 @@ lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
   lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
-  lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+  lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
   lj_traceerr.h lj_vm.h lj_folddef.h
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
   lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
-  lj_dispatch.h lj_traceerr.h lj_snap.h lj_vm.h
+  lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
   lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
@@ -122,21 +124,20 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
   lj_gc.h lj_udata.h
 lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
-  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h \
-  lj_vmevent.h
+  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
+  lj_vm.h lj_vmevent.h
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
   lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
   lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h \
   lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c lj_ctype.c \
   lj_ctype.h lj_bc.c lj_bcdef.h lj_obj.c lj_str.c lj_tab.c lj_func.c \
   lj_udata.c lj_meta.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c \
-  luajit.h lj_vmevent.c lj_vmevent.h lj_api.c lj_parse.h lj_lex.c \
-  lj_parse.c lj_lib.c lj_lib.h lj_ir.c lj_iropt.h lj_opt_mem.c \
-  lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
-  lj_snap.h lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_x86.h \
-  lj_record.c lj_ff.h lj_ffdef.h lj_record.h lj_asm.h lj_recdef.h \
-  lj_asm.c lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
-  lib_base.c lualib.h lj_libdef.h lib_math.c lib_string.c lib_table.c \
-  lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
-  lib_init.c
+  lj_ff.h lj_ffdef.h luajit.h lj_vmevent.c lj_vmevent.h lj_api.c \
+  lj_parse.h lj_lex.c lj_parse.c lj_lib.c lj_lib.h lj_ir.c lj_iropt.h \
+  lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
+  lj_opt_loop.c lj_snap.h lj_mcode.c lj_mcode.h lj_snap.c lj_target.h \
+  lj_target_x86.h lj_record.c lj_record.h lj_asm.h lj_recdef.h lj_asm.c \
+  lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \
+  lualib.h lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
+  lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h

+ 7 - 5
src/buildvm.c

@@ -256,12 +256,12 @@ static void emit_bcdef(BuildCtx *ctx)
 {
   int i;
   fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
-  fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[BC__MAX+1] = {\n ");
+  fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n");
   for (i = 0; i < ctx->npc; i++) {
-    fprintf(ctx->fp, " %4d,", ctx->sym_ofs[i]);
-    if ((i & 7) == 7) fprintf(ctx->fp, "\n ");
+    if (i != 0)
+      fprintf(ctx->fp, ",\n");
+    fprintf(ctx->fp, "%d", ctx->sym_ofs[i]);
   }
-  fprintf(ctx->fp, " 0\n};\n\n");
 }
 
 /* Emit VM definitions as Lua code for debug modules. */
@@ -433,10 +433,12 @@ int main(int argc, char **argv)
     break;
   case BUILD_bcdef:
     emit_bcdef(ctx);
+    emit_lib(ctx);
     break;
   case BUILD_vmdef:
     emit_vmdef(ctx);
-    /* fallthrough */
+    emit_lib(ctx);
+    break;
   case BUILD_ffdef:
   case BUILD_libdef:
   case BUILD_recdef:

+ 22 - 9
src/buildvm_lib.c

@@ -15,7 +15,7 @@ static char modname[80];
 static size_t modnamelen;
 static char funcname[80];
 static int modstate, regfunc;
-static int ffid, recffid;
+static int ffid, recffid, ffasmfunc;
 
 enum {
   REGFUNC_OK,
@@ -77,7 +77,8 @@ static void libdef_module(BuildCtx *ctx, char *p, int arg)
     libdef_endmodule(ctx);
     optr = obuf;
     *optr++ = (uint8_t)ffid;
-    *optr++ = 0;
+    *optr++ = (uint8_t)ffasmfunc;
+    *optr++ = 0;  /* Hash table size. */
     modstate = 1;
     fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p);
     fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p);
@@ -108,8 +109,9 @@ static int find_ffofs(BuildCtx *ctx, const char *name)
 
 static void libdef_func(BuildCtx *ctx, char *p, int arg)
 {
+  if (arg != LIBINIT_CF)
+    ffasmfunc++;
   if (ctx->mode == BUILD_libdef) {
-    int ofs = arg != LIBINIT_CF ? find_ffofs(ctx, p) : 0;
     if (modstate == 0) {
       fprintf(stderr, "Error: no module for function definition %s\n", p);
       exit(1);
@@ -126,12 +128,8 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
 	modstate = 2;
 	fprintf(ctx->fp, "  %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p);
       }
-      if (regfunc != REGFUNC_NOREGUV) obuf[1]++;  /* Bump hash table size. */
+      if (regfunc != REGFUNC_NOREGUV) obuf[2]++;  /* Bump hash table size. */
       libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg);
-      if (arg) {
-	*optr++ = (uint8_t)ofs;
-	*optr++ = (uint8_t)(ofs >> 8);
-      }
     }
   } else if (ctx->mode == BUILD_ffdef) {
     fprintf(ctx->fp, "FFDEF(%s)\n", p);
@@ -146,6 +144,9 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
     for (i = 1; p[i] && modname[i-1]; i++)
       if (p[i] == '_') p[i] = '.';
     fprintf(ctx->fp, "\"%s\",\n", p);
+  } else if (ctx->mode == BUILD_bcdef) {
+    if (arg != LIBINIT_CF)
+      fprintf(ctx->fp, ",\n%d", find_ffofs(ctx, p));
   }
   ffid++;
   regfunc = REGFUNC_OK;
@@ -253,7 +254,7 @@ static void libdef_set(BuildCtx *ctx, char *p, int arg)
     if (p[0] == '!' && p[1] == '\0') p[0] = '\0';  /* Set env. */
     libdef_name(p, LIBINIT_STRING);
     *optr++ = LIBINIT_SET;
-    obuf[1]++;  /* Bump hash table size. */
+    obuf[2]++;  /* Bump hash table size. */
   }
 }
 
@@ -298,6 +299,7 @@ void emit_lib(BuildCtx *ctx)
   if (ctx->mode == BUILD_recdef)
     fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100");
   recffid = ffid = FF_C+1;
+  ffasmfunc = 0;
 
   while ((fname = *ctx->args++)) {
     char buf[256];  /* We don't care about analyzing lines longer than that. */
@@ -347,8 +349,19 @@ void emit_lib(BuildCtx *ctx)
 
   if (ctx->mode == BUILD_ffdef) {
     fprintf(ctx->fp, "\n#undef FFDEF\n\n");
+    fprintf(ctx->fp,
+      "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
+      ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
     fprintf(ctx->fp, "}\n\n");
+  } else if (ctx->mode == BUILD_bcdef) {
+    int i;
+    fprintf(ctx->fp, "\n};\n\n");
+    fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_mode[] = {\n");
+    fprintf(ctx->fp, "BCDEF(BCMODE)\n");
+    for (i = ffasmfunc-1; i > 0; i--)
+      fprintf(ctx->fp, "BCMODE_FF,\n");
+    fprintf(ctx->fp, "BCMODE_FF\n};\n\n");
   } else if (ctx->mode == BUILD_recdef) {
     char *p = (char *)obuf;
     fprintf(ctx->fp, "\n};\n\n");

+ 4 - 3
src/buildvm_peobj.c

@@ -238,7 +238,7 @@ void emit_peobj(BuildCtx *ctx)
   for (relocsyms = 0; ctx->extnames[relocsyms]; relocsyms++) ;
   pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+(ctx->nsym-nzsym) + relocsyms;
 #if !LJ_HASJIT
-  pehdr.nsyms -= 7;
+  pehdr.nsyms -= 11;  /* See below, removes [IJ]* opcode symbols. */
 #endif
 #if LJ_TARGET_X64
   pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win64. */
@@ -353,8 +353,9 @@ void emit_peobj(BuildCtx *ctx)
       } else {
 #else
       } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL ||
-		   pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL ||
-		   pi == BC_ILOOP)) {
+		   pi == BC_JLOOP || pi == BC_JFUNCF || pi == BC_JFUNCV ||
+		   pi == BC_IFORL || pi == BC_IITERL || pi == BC_ILOOP ||
+		   pi == BC_IFUNCF || pi == BC_IFUNCV)) {
 #endif
 	sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX_BC "%s",
 		bc_names[pi]);

File diff suppressed because it is too large
+ 620 - 623
src/buildvm_x64.h


File diff suppressed because it is too large
+ 650 - 657
src/buildvm_x64win.h


File diff suppressed because it is too large
+ 282 - 416
src/buildvm_x86.dasc


File diff suppressed because it is too large
+ 516 - 534
src/buildvm_x86.h


+ 13 - 5
src/lib_base.c

@@ -22,7 +22,9 @@
 #include "lj_tab.h"
 #include "lj_meta.h"
 #include "lj_state.h"
+#include "lj_bc.h"
 #include "lj_ff.h"
+#include "lj_dispatch.h"
 #include "lj_ctype.h"
 #include "lj_lib.h"
 
@@ -521,19 +523,25 @@ void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
     lj_err_run(L);
 }
 
+/* Forward declaration. */
+static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
+
 LJLIB_CF(coroutine_wrap)
 {
-  GCfunc *fn;
   lj_cf_coroutine_create(L);
-  lua_pushcclosure(L, lj_ffh_coroutine_wrap_aux, 1);
-  fn = funcV(L->top-1);
-  fn->c.gate = lj_ff_coroutine_wrap_aux;
-  fn->c.ffid = FF_coroutine_wrap_aux;
+  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, funcV(L->top-1));
   return 1;
 }
 
 #include "lj_libdef.h"
 
+/* Fix the PC of wrap_aux. Really ugly workaround. */
+static void setpc_wrap_aux(lua_State *L, GCfunc *fn)
+{
+  setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]);
+}
+
 /* ------------------------------------------------------------------------ */
 
 static void newproxy_weaktable(lua_State *L)

+ 1 - 2
src/lib_io.c

@@ -502,8 +502,7 @@ static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
 
 LUALIB_API int luaopen_io(lua_State *L)
 {
-  lua_pushcfunction(L, lj_cf_io_lines_iter);
-  funcV(L->top-1)->c.ffid = FF_io_lines_iter;
+  lj_lib_pushcf(L, lj_cf_io_lines_iter, FF_io_lines_iter);
   LJ_LIB_REG_(L, NULL, io_method);
   copyTV(L, L->top, L->top-1); L->top++;
   lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);

+ 4 - 4
src/lib_jit.c

@@ -177,7 +177,7 @@ LJLIB_CF(jit_util_funcinfo)
     GCtab *t;
     lua_createtable(L, 0, 16);  /* Increment hash size if fields are added. */
     t = tabV(L->top-1);
-    setintfield(L, t, "linedefined", pt->linedefined);
+    setintfield(L, t, "linedefined", proto_line(pt, 0));
     setintfield(L, t, "lastlinedefined", pt->lastlinedefined);
     setintfield(L, t, "stackslots", pt->framesize);
     setintfield(L, t, "params", pt->numparams);
@@ -185,9 +185,9 @@ LJLIB_CF(jit_util_funcinfo)
     setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc);
     setintfield(L, t, "nconsts", (int32_t)pt->sizekn);
     setintfield(L, t, "upvalues", (int32_t)pt->sizeuv);
-    if (pc-1 < pt->sizebc)
+    if (pc < pt->sizebc)
       setintfield(L, t, "currentline",
-		  proto_lineinfo(pt) ? proto_line(pt, pc-1) : 0);
+		  proto_lineinfo(pt) ? proto_line(pt, pc) : 0);
     lua_pushboolean(L, (pt->flags & PROTO_IS_VARARG));
     lua_setfield(L, -2, "isvararg");
     setstrV(L, L->top++, proto_chunkname(pt));
@@ -209,7 +209,7 @@ LJLIB_CF(jit_util_funcinfo)
 LJLIB_CF(jit_util_funcbc)
 {
   GCproto *pt = check_Lproto(L, 0);
-  BCPos pc = (BCPos)lj_lib_checkint(L, 2) - 1;
+  BCPos pc = (BCPos)lj_lib_checkint(L, 2);
   if (pc < pt->sizebc) {
     BCIns ins = proto_bc(pt)[pc];
     BCOp op = bc_op(ins);

+ 2 - 2
src/lib_package.c

@@ -482,14 +482,14 @@ LUALIB_API int luaopen_package(lua_State *L)
 {
   int i;
   luaL_newmetatable(L, "_LOADLIB");
-  lua_pushcfunction(L, lj_cf_package_unloadlib);
+  lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
   lua_setfield(L, -2, "__gc");
   luaL_register(L, LUA_LOADLIBNAME, package_lib);
   lua_pushvalue(L, -1);
   lua_replace(L, LUA_ENVIRONINDEX);
   lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
   for (i = 0; package_loaders[i] != NULL; i++) {
-    lua_pushcfunction(L, package_loaders[i]);
+    lj_lib_pushcf(L, package_loaders[i], 1);
     lua_rawseti(L, -2, i+1);
   }
   lua_setfield(L, -2, "loaders");

+ 1 - 2
src/lib_string.c

@@ -536,8 +536,7 @@ LJLIB_CF(string_gmatch)
   lj_lib_checkstr(L, 2);
   L->top = L->base+3;
   (L->top-1)->u64 = 0;
-  lua_pushcclosure(L, lj_cf_string_gmatch_aux, 3);
-  funcV(L->top-1)->c.ffid = FF_string_gmatch_aux;
+  lj_lib_pushcc(L, lj_cf_string_gmatch_aux, FF_string_gmatch_aux, 3);
   return 1;
 }
 

+ 3 - 2
src/lj_api.c

@@ -18,6 +18,7 @@
 #include "lj_udata.h"
 #include "lj_meta.h"
 #include "lj_state.h"
+#include "lj_bc.h"
 #include "lj_frame.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
@@ -487,8 +488,8 @@ LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx)
 {
   cTValue *o = index2adr(L, idx);
   if (tvisfunc(o)) {
-    ASMFunction gate = funcV(o)->c.gate;
-    if (gate == lj_gate_c || gate == lj_gate_cwrap)
+    BCOp op = bc_op(*mref(funcV(o)->c.pc, BCIns));
+    if (op == BC_FUNCC || op == BC_FUNCCW)
       return funcV(o)->c.f;
   }
   return NULL;

+ 2 - 2
src/lj_asm.c

@@ -906,7 +906,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
   *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
   *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
   *mxp++ = 2*sizeof(void *);
-  *(int32_t *)mxp = ptr2addr(GG2DISP(J2GG(as->J))); mxp += 4;
+  *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
   /* Jump to exit handler which fills in the ExitState. */
   *mxp++ = XI_JMP; mxp += 4;
   *((int32_t *)(mxp-4)) = (int32_t)((MCode *)lj_vm_exit_handler - mxp);
@@ -3066,7 +3066,7 @@ static void asm_tail_sync(ASMState *as)
 
   if (as->T->link == TRACE_INTERP) {
     /* Setup fixed registers for exit to interpreter. */
-    emit_loada(as, RID_DISPATCH, GG2DISP(J2GG(as->J)));
+    emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
     emit_loadi(as, RID_PC, (int32_t)map[nent]);
   } else if (newbase) {
     /* Save modified BASE for linking to trace with higher start frame. */

+ 1 - 6
src/lj_bc.c

@@ -9,11 +9,6 @@
 #include "lj_obj.h"
 #include "lj_bc.h"
 
-/* Bytecode instruction modes. */
-LJ_DATADEF const uint16_t lj_bc_mode[BC__MAX+1] = {
-BCDEF(BCMODE)
-  0
-};
-
+/* Bytecode offsets and bytecode instruction modes. */
 #include "lj_bcdef.h"
 

+ 18 - 3
src/lj_bc.h

@@ -178,7 +178,17 @@
   _(ILOOP,	rbase,	___,	jump,	___) \
   _(JLOOP,	rbase,	___,	lit,	___) \
   \
-  _(JMP,	rbase,	___,	jump,	___)
+  _(JMP,	rbase,	___,	jump,	___) \
+  \
+  /* Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func. */ \
+  _(FUNCF,	rbase,	___,	___,	___) \
+  _(IFUNCF,	rbase,	___,	___,	___) \
+  _(JFUNCF,	rbase,	___,	lit,	___) \
+  _(FUNCV,	rbase,	___,	___,	___) \
+  _(IFUNCV,	rbase,	___,	___,	___) \
+  _(JFUNCV,	rbase,	___,	lit,	___) \
+  _(FUNCC,	___,	___,	___,	___) \
+  _(FUNCCW,	___,	___,	___,	___)
 
 /* Bytecode opcode numbers. */
 typedef enum {
@@ -206,6 +216,10 @@ LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
 LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
 LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
 LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 1 == (int)BC_IFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 2 == (int)BC_JFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 1 == (int)BC_IFUNCV);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 2 == (int)BC_JFUNCV);
 
 /* Stack slots used by FORI/FORL, relative to operand A. */
 enum {
@@ -229,8 +243,9 @@ typedef enum {
 
 #define BCMODE(name, ma, mb, mc, mm) \
   (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)),
+#define BCMODE_FF	0
 
-LJ_DATA const uint16_t lj_bc_mode[BC__MAX+1];
-LJ_DATA const uint16_t lj_bc_ofs[BC__MAX+1];
+LJ_DATA const uint16_t lj_bc_mode[];
+LJ_DATA const uint16_t lj_bc_ofs[];
 
 #endif

+ 4 - 8
src/lj_def.h

@@ -193,18 +193,14 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
 #endif
 
 /* Attributes for internal functions. */
-#if defined(ljamalg_c)
-#define LJ_DATA		static
-#define LJ_DATADEF	static
-#define LJ_FUNC		static
-#define LJ_ASMF		LJ_NOAPI
-#define LJ_FUNCA	LJ_NOAPI
-#else
 #define LJ_DATA		LJ_NOAPI
 #define LJ_DATADEF
-#define LJ_FUNC		LJ_NOAPI
 #define LJ_ASMF		LJ_NOAPI
 #define LJ_FUNCA	LJ_NOAPI
+#if defined(ljamalg_c)
+#define LJ_FUNC		static
+#else
+#define LJ_FUNC		LJ_NOAPI
 #endif
 #define LJ_FUNC_NORET	LJ_FUNC LJ_NORET
 #define LJ_FUNCA_NORET	LJ_FUNCA LJ_NORET

+ 42 - 20
src/lj_dispatch.c

@@ -11,6 +11,7 @@
 #include "lj_state.h"
 #include "lj_frame.h"
 #include "lj_bc.h"
+#include "lj_ff.h"
 #if LJ_HASJIT
 #include "lj_jit.h"
 #endif
@@ -19,7 +20,8 @@
 #include "lj_vm.h"
 #include "luajit.h"
 
-#define GG_DISP_STATIC		BC__MAX
+/* Bump GG_NUM_ASMFF in lj_dispatch.h as needed. Ugly. */
+LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
 
 /* -- Dispatch table management ------------------------------------------- */
 
@@ -27,13 +29,20 @@
 void lj_dispatch_init(GG_State *GG)
 {
   uint32_t i;
-  ASMFunction *disp = GG2DISP(GG);
-  for (i = 0; i < BC__MAX; i++)
-    disp[GG_DISP_STATIC+i] = disp[i] = makeasmfunc(lj_bc_ofs[i]);
+  ASMFunction *disp = GG->dispatch;
+  for (i = 0; i < GG_LEN_SDISP; i++)
+    disp[GG_LEN_DDISP+i] = disp[i] = makeasmfunc(lj_bc_ofs[i]);
+  for (i = GG_LEN_SDISP; i < GG_LEN_DDISP; i++)
+    disp[i] = makeasmfunc(lj_bc_ofs[i]);
   /* The JIT engine is off by default. luaopen_jit() turns it on. */
   disp[BC_FORL] = disp[BC_IFORL];
   disp[BC_ITERL] = disp[BC_IITERL];
   disp[BC_LOOP] = disp[BC_ILOOP];
+  disp[BC_FUNCF] = disp[BC_IFUNCF];
+  disp[BC_FUNCV] = disp[BC_IFUNCV];
+  GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, 0, 0);
+  for (i = 0; i < GG_NUM_ASMFF; i++)
+    GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
 }
 
 #if LJ_HASJIT
@@ -57,39 +66,50 @@ void lj_dispatch_update(global_State *g)
   mode |= (G2J(g)->flags & JIT_F_ON) ? 1 : 0;
   mode |= G2J(g)->state != LJ_TRACE_IDLE ? 6 : 0;
 #endif
-  mode |= (g->hookmask & HOOK_EVENTMASK) ? 2 : 0;
+  mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? 2 : 0;
   if (oldmode != mode) {  /* Mode changed? */
-    ASMFunction *disp = GG2DISP(G2GG(g));
-    ASMFunction f_forl, f_iterl, f_loop;
+    ASMFunction *disp = G2GG(g)->dispatch;
+    ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv;
     g->dispatchmode = mode;
     if ((mode & 5) == 1) {  /* Hotcount if JIT is on, but not when recording. */
       f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
       f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
       f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
+      f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
+      f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
     } else {  /* Otherwise use the non-hotcounting instructions. */
-      f_forl = disp[GG_DISP_STATIC+BC_IFORL];
-      f_iterl = disp[GG_DISP_STATIC+BC_IITERL];
-      f_loop = disp[GG_DISP_STATIC+BC_ILOOP];
+      f_forl = disp[GG_LEN_DDISP+BC_IFORL];
+      f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
+      f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
+      f_funcf = disp[GG_LEN_DDISP+BC_IFUNCF];
+      f_funcv = disp[GG_LEN_DDISP+BC_IFUNCV];
     }
-    /* Set static loop ins first (may be copied below). */
-    disp[GG_DISP_STATIC+BC_FORL] = f_forl;
-    disp[GG_DISP_STATIC+BC_ITERL] = f_iterl;
-    disp[GG_DISP_STATIC+BC_LOOP] = f_loop;
+    /* Set static counting ins first (may be copied below). */
+    disp[GG_LEN_DDISP+BC_FORL] = f_forl;
+    disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
+    disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
+    disp[GG_LEN_DDISP+BC_FUNCF] = f_funcf;
+    disp[GG_LEN_DDISP+BC_FUNCV] = f_funcv;
     if ((oldmode & 6) != (mode & 6)) {  /* Need to change whole table? */
       if ((mode & 6) == 0) {  /* No hooks and no recording? */
 	/* Copy static dispatch table to dynamic dispatch table. */
-	memcpy(&disp[0], &disp[GG_DISP_STATIC], sizeof(ASMFunction)*BC__MAX);
+	memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
       } else {
 	/* The recording dispatch also checks for hooks. */
 	ASMFunction f = (mode & 6) == 6 ? lj_vm_record : lj_vm_hook;
 	uint32_t i;
-	for (i = 0; i < BC__MAX; i++)
+	for (i = 0; i < BC_FUNCF; i++)
 	  disp[i] = f;
+	/* NYI: call hooks for function headers. */
+	memcpy(&disp[BC_FUNCF], &disp[GG_LEN_DDISP+BC_FUNCF],
+	       (GG_LEN_SDISP-BC_FUNCF)*sizeof(ASMFunction));
       }
-    } else if ((mode & 6) == 0) {  /* Fix dynamic loop ins unless overriden. */
+    } else if ((mode & 6) == 0) {  /* Set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
       disp[BC_LOOP] = f_loop;
+      disp[BC_FUNCF] = f_funcf;
+      disp[BC_FUNCV] = f_funcv;
     }
 #if LJ_HASJIT
     if ((mode & 1) && !(oldmode & 1))  /* JIT off to on transition. */
@@ -186,14 +206,16 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
     if ((mode & LUAJIT_MODE_ON)) {
       if (idx != 0) {
 	cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx;
-	if (tvislightud(tv) && lightudV(tv) != NULL)
+	if (tvislightud(tv))
 	  g->wrapf = (lua_CFunction)lightudV(tv);
 	else
 	  return 0;  /* Failed. */
+      } else {
+	return 0;  /* Failed. */
       }
-      g->wrapmode = 1;
+      g->bc_cfunc_ext = BCINS_AD(BC_FUNCCW, 0, 0);
     } else {
-      g->wrapmode = 0;
+      g->bc_cfunc_ext = BCINS_AD(BC_FUNCC, 0, 0);
     }
     break;
   default:

+ 15 - 7
src/lj_dispatch.h

@@ -7,6 +7,7 @@
 #define _LJ_DISPATCH_H
 
 #include "lj_obj.h"
+#include "lj_bc.h"
 #if LJ_HASJIT
 #include "lj_jit.h"
 #endif
@@ -21,6 +22,13 @@ typedef uint16_t HotCount;
 #define HOTCOUNT_MIN_PENALTY	103
 #define HOTCOUNT_MAX_PENALTY	60000
 
+/* This solves a circular dependency problem -- bump as needed. Sigh. */
+#define GG_NUM_ASMFF	62
+
+#define GG_LEN_DDISP	(BC__MAX + GG_NUM_ASMFF)
+#define GG_LEN_SDISP	BC_FUNCC
+#define GG_LEN_DISP	(GG_LEN_DDISP + GG_LEN_SDISP)
+
 /* Global state, main thread and extra fields are allocated together. */
 typedef struct GG_State {
   lua_State L;				/* Main thread. */
@@ -29,22 +37,22 @@ typedef struct GG_State {
   jit_State J;				/* JIT state. */
   HotCount hotcount[HOTCOUNT_SIZE];	/* Hot counters. */
 #endif
-  /* Instruction dispatch tables follow. */
+  ASMFunction dispatch[GG_LEN_DISP];	/* Instruction dispatch tables. */
+  BCIns bcff[GG_NUM_ASMFF];		/* Bytecode for ASM fast functions. */
 } GG_State;
 
 #define GG_OFS(field)	((int)offsetof(GG_State, field))
-#define GG_OFS_DISP	((int)sizeof(GG_State))
-#define GG2DISP(gg)	((ASMFunction *)((char *)(gg) + GG_OFS_DISP))
 #define G2GG(gl)	((GG_State *)((char *)(gl) - GG_OFS(g)))
 #define J2GG(j)		((GG_State *)((char *)(j) - GG_OFS(J)))
 #define L2GG(L)		(G2GG(G(L)))
 #define J2G(J)		(&J2GG(J)->g)
 #define G2J(gl)		(&G2GG(gl)->J)
 #define L2J(L)		(&L2GG(L)->J)
-#define GG_G2DISP	(GG_OFS_DISP - GG_OFS(g))
-#define GG_DISP2G	(GG_OFS(g) - GG_OFS_DISP)
-#define GG_DISP2J	(GG_OFS(J) - GG_OFS_DISP)
-#define GG_DISP2HOT	(GG_OFS(hotcount) - GG_OFS_DISP)
+#define GG_G2DISP	(GG_OFS(dispatch) - GG_OFS(g))
+#define GG_DISP2G	(GG_OFS(g) - GG_OFS(dispatch))
+#define GG_DISP2J	(GG_OFS(J) - GG_OFS(dispatch))
+#define GG_DISP2HOT	(GG_OFS(hotcount) - GG_OFS(dispatch))
+#define GG_DISP2STATIC	(GG_LEN_DDISP*(int)sizeof(ASMFunction))
 
 #define hotcount_get(gg, pc) \
   (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)]

+ 7 - 10
src/lj_err.c

@@ -152,7 +152,7 @@ static const char *getobjname(GCproto *pt, const BCIns *ip, BCReg slot,
 restart:
   lname = getvarname(pt, proto_bcpos(pt, ip), slot);
   if (lname != NULL) { *name = lname; return "local"; }
-  while (--ip >= proto_bc(pt)) {
+  while (--ip > proto_bc(pt)) {
     BCIns ins = *ip;
     BCOp op = bc_op(ins);
     BCReg ra = bc_a(ins);
@@ -222,11 +222,7 @@ void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc)
   if (name) {
     const char *s = strdata(name);
     MSize i, len = name->len;
-    BCLine line;
-    if (pc)
-      line = proto_line(pt, pc-1);
-    else
-      line = pt->linedefined;
+    BCLine line = pc < pt->sizebc ? proto_line(pt, pc) : 0;
     if (*s == '@') {
       s++; len--;
       for (i = len; i > 0; i--)
@@ -345,9 +341,10 @@ LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar)
     switch (*what) {
     case 'S':
       if (isluafunc(fn)) {
-	ar->source = strdata(proto_chunkname(funcproto(fn)));
-	ar->linedefined = cast_int(funcproto(fn)->linedefined);
-	ar->lastlinedefined = cast_int(funcproto(fn)->lastlinedefined);
+	GCproto *pt = funcproto(fn);
+	ar->source = strdata(proto_chunkname(pt));
+	ar->linedefined = (int)proto_line(pt, 0);
+	ar->lastlinedefined = (int)pt->lastlinedefined;
 	ar->what = (ar->linedefined == 0) ? "main" : "Lua";
       } else {
 	ar->source = "=[C]";
@@ -380,7 +377,7 @@ LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar)
 	GCproto *pt = funcproto(fn);
 	BCLine *lineinfo = proto_lineinfo(pt);
 	MSize i, szl = pt->sizebc;
-	for (i = 0; i < szl; i++)
+	for (i = 1; i < szl; i++)
 	  setboolV(lj_tab_setint(L, t, lineinfo[i]), 1);
 	settabV(L, L->top, t);
       } else {

+ 1 - 2
src/lj_func.c

@@ -101,8 +101,8 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
   fn->c.ffid = FF_C;
   fn->c.nupvalues = cast_byte(nelems);
   /* NOBARRIER: The GCfunc is new (marked white). */
+  setmref(fn->c.pc, &G(L)->bc_cfunc_ext);
   setgcref(fn->c.env, obj2gco(env));
-  fn->c.gate = G(L)->wrapmode ? lj_gate_cwrap : lj_gate_c;
   return fn;
 }
 
@@ -115,7 +115,6 @@ GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env)
   /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */
   setmref(fn->l.pc, proto_bc(pt));
   setgcref(fn->l.env, obj2gco(env));
-  fn->l.gate = (pt->flags & PROTO_IS_VARARG) ? lj_gate_lv : lj_gate_lf;
   return fn;
 }
 

+ 1 - 4
src/lj_gdbjit.c

@@ -705,10 +705,7 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
   ctx.szmcode = T->szmcode;
   ctx.spadjp = CFRAME_SIZE + (MSize)(parent ? J->trace[parent]->spadjust : 0);
   ctx.spadj = CFRAME_SIZE + T->spadjust;
-  if (startpc >= proto_bc(pt))
-    ctx.lineno = proto_line(pt,proto_bcpos(pt,startpc));
-  else
-    ctx.lineno = pt->linedefined;
+  ctx.lineno = proto_line(pt, proto_bcpos(pt, startpc));
   ctx.filename = strdata(proto_chunkname(pt));
   if (*ctx.filename == '@' || *ctx.filename == '=')
     ctx.filename++;

+ 7 - 4
src/lj_lib.c

@@ -14,6 +14,8 @@
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_func.h"
+#include "lj_bc.h"
+#include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_lib.h"
 
@@ -46,6 +48,7 @@ void lj_lib_register(lua_State *L, const char *libname,
   GCtab *env = tabref(L->env);
   GCfunc *ofn = NULL;
   int ffid = *p++;
+  BCIns *bcff = &L2GG(L)->bcff[*p++];
   GCtab *tab = lib_create_table(L, libname, *p++);
   ptrdiff_t tpos = L->top - L->base;
 
@@ -68,10 +71,10 @@ void lj_lib_register(lua_State *L, const char *libname,
       fn->c.ffid = (uint8_t)(ffid++);
       name = (const char *)p;
       p += len;
-      if (tag != LIBINIT_CF) {
-	fn->c.gate = makeasmfunc(p[0] + (p[1] << 8));
-	p += 2;
-      }
+      if (tag == LIBINIT_CF)
+	setmref(fn->c.pc, &G(L)->bc_cfunc_int);
+      else
+	setmref(fn->c.pc, bcff++);
       if (tag == LIBINIT_ASM_)
 	fn->c.f = ofn->c.f;  /* Copy handler from previous function. */
       else

+ 13 - 0
src/lj_lib.h

@@ -57,6 +57,19 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
 #define lj_lib_checkfpu(L)	UNUSED(L)
 #endif
 
+/* Push internal function on the stack. */
+static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
+				     int id, int n)
+{
+  GCfunc *fn;
+  lua_pushcclosure(L, f, n);
+  fn = funcV(L->top-1);
+  fn->c.ffid = (uint8_t)id;
+  setmref(fn->c.pc, &G(L)->bc_cfunc_int);
+}
+
+#define lj_lib_pushcf(L, fn, id)	(lj_lib_pushcc(L, (fn), (id), 0))
+
 /* Library function declarations. Scanned by buildvm. */
 #define LJLIB_CF(name)		static int lj_cf_##name(lua_State *L)
 #define LJLIB_ASM(name)		static int lj_ffh_##name(lua_State *L)

+ 5 - 4
src/lj_obj.h

@@ -360,7 +360,6 @@ typedef struct GCproto {
   uint16_t trace;	/* Anchor for chain of root traces. */
   /* ------ The following fields are for debugging/tracebacks only ------ */
   GCRef chunkname;	/* Name of the chunk this function was defined in. */
-  BCLine linedefined;	/* First line of the function definition. */
   BCLine lastlinedefined;  /* Last line of the function definition. */
   MSize sizevarinfo;	/* Size of local var info array. */
   MRef varinfo;		/* Names and extents of local variables. */
@@ -419,7 +418,7 @@ typedef struct GCupval {
 /* Common header for functions. env should be at same offset in GCudata. */
 #define GCfuncHeader \
   GCHeader; uint8_t ffid; uint8_t nupvalues; \
-  GCRef env; GCRef gclist; ASMFunction gate
+  GCRef env; GCRef gclist; MRef pc
 
 typedef struct GCfuncC {
   GCfuncHeader;
@@ -429,7 +428,6 @@ typedef struct GCfuncC {
 
 typedef struct GCfuncL {
   GCfuncHeader;
-  MRef pc;		/* Start PC (and GCproto reference). */
   GCRef uvptr[1];	/* Array of _pointers_ to upvalue objects (GCupval). */
 } GCfuncL;
 
@@ -558,7 +556,7 @@ typedef struct global_State {
   uint8_t hookmask;	/* Hook mask. */
   uint8_t dispatchmode;	/* Dispatch mode. */
   uint8_t vmevmask;	/* VM event mask. */
-  uint8_t wrapmode;	/* Wrap mode. */
+  uint8_t unused1;
   GCRef mainthref;	/* Link to main thread. */
   TValue registrytv;	/* Anchor for registry. */
   TValue tmptv;		/* Temporary TValue. */
@@ -569,6 +567,8 @@ typedef struct global_State {
   lua_CFunction wrapf;	/* Wrapper for C function calls. */
   lua_CFunction panic;	/* Called as a last resort for errors. */
   volatile int32_t vmstate;  /* VM state or current JIT code trace number. */
+  BCIns bc_cfunc_int;	/* Bytecode for internal C function calls. */
+  BCIns bc_cfunc_ext;	/* Bytecode for external C function calls. */
   GCRef jit_L;		/* Current JIT code lua_State or NULL. */
   MRef jit_base;	/* Current JIT code L->base. */
   GCRef gcroot[GCROOT__MAX];  /* GC roots. */
@@ -584,6 +584,7 @@ typedef struct global_State {
 /* Hook management. Hook event masks are defined in lua.h. */
 #define HOOK_EVENTMASK		0x0f
 #define HOOK_ACTIVE		0x10
+#define HOOK_ACTIVE_SHIFT	4
 #define HOOK_VMEVENT		0x20
 #define HOOK_GC			0x40
 #define hook_active(g)		((g)->hookmask & HOOK_ACTIVE)

+ 21 - 17
src/lj_parse.c

@@ -1036,7 +1036,10 @@ static void fs_fixup_bc(FuncState *fs, GCproto *pt, BCIns *bc, BCLine *lineinfo)
   setmref(pt->lineinfo, lineinfo);
   pt->sizebc = n;
   bc[n] = ~0u;  /* Close potentially uninitialized gap between bc and kgc. */
-  for (i = 0; i < n; i++) {
+  bc[0] = BCINS_AD((fs->flags & PROTO_IS_VARARG) ? BC_FUNCV : BC_FUNCF,
+		   fs->framesize, 0);
+  lineinfo[0] = fs->linedefined;
+  for (i = 1; i < n; i++) {
     bc[i] = base[i].ins;
     lineinfo[i] = base[i].line;
   }
@@ -1181,7 +1184,6 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
   pt->flags = fs->flags;
   pt->numparams = fs->numparams;
   pt->framesize = fs->framesize;
-  pt->linedefined = fs->linedefined;
   pt->lastlinedefined = line;
 
   fs_fixup_bc(fs, pt, (BCIns *)((char *)pt + sizeof(GCproto)),
@@ -1416,29 +1418,30 @@ static void parse_chunk(LexState *ls);
 /* Parse body of a function. */
 static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
 {
-  FuncState cfs, *fs = ls->fs;
+  FuncState fs, *pfs = ls->fs;
   BCReg kidx;
   BCLine lastline;
   GCproto *pt;
-  ptrdiff_t oldbase = fs->bcbase - ls->bcstack;
-  fs_init(ls, &cfs);
-  cfs.linedefined = line;
-  cfs.numparams = (uint8_t)parse_params(ls, needself);
-  cfs.bcbase = fs->bcbase + fs->pc;
-  cfs.bclim = fs->bclim - fs->pc;
+  ptrdiff_t oldbase = pfs->bcbase - ls->bcstack;
+  fs_init(ls, &fs);
+  fs.linedefined = line;
+  fs.numparams = (uint8_t)parse_params(ls, needself);
+  fs.bcbase = pfs->bcbase + pfs->pc;
+  fs.bclim = pfs->bclim - pfs->pc;
+  bcemit_AD(&fs, BC_FUNCF, 0, 0);  /* Placeholder. */
   parse_chunk(ls);
   lastline = ls->linenumber;
   lex_match(ls, TK_end, TK_function, line);
   pt = fs_finish(ls, lastline);
-  fs->bcbase = ls->bcstack + oldbase;  /* May have been reallocated. */
-  fs->bclim = (BCPos)(ls->sizebcstack - oldbase);
+  pfs->bcbase = ls->bcstack + oldbase;  /* May have been reallocated. */
+  pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
   /* Store new prototype in the constant array of the parent. */
-  kidx = const_gc(fs, obj2gco(pt), LJ_TPROTO);
-  expr_init(e, VRELOCABLE, bcemit_AD(fs, BC_FNEW, 0, kidx));
-  if (!(fs->flags & PROTO_HAS_FNEW)) {
-    if (fs->flags & PROTO_HAS_RETURN)
-      fs->flags |= PROTO_FIXUP_RETURN;
-    fs->flags |= PROTO_HAS_FNEW;
+  kidx = const_gc(pfs, obj2gco(pt), LJ_TPROTO);
+  expr_init(e, VRELOCABLE, bcemit_AD(pfs, BC_FNEW, 0, kidx));
+  if (!(pfs->flags & PROTO_HAS_FNEW)) {
+    if (pfs->flags & PROTO_HAS_RETURN)
+      pfs->flags |= PROTO_FIXUP_RETURN;
+    pfs->flags |= PROTO_HAS_FNEW;
   }
 }
 
@@ -2227,6 +2230,7 @@ GCproto *lj_parse(LexState *ls)
   fs.bcbase = NULL;
   fs.bclim = 0;
   fs.flags |= PROTO_IS_VARARG;  /* Main chunk is always a vararg func. */
+  bcemit_AD(&fs, BC_FUNCV, 0, 0);  /* Placeholder. */
   lj_lex_next(ls);  /* Read-ahead first token. */
   parse_chunk(ls);
   if (ls->token != TK_eof)

+ 1 - 7
src/lj_record.c

@@ -1671,14 +1671,8 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
     GCproto *pt = funcproto(rd.fn);
     if ((pt->flags & PROTO_NO_JIT))
       lj_trace_err(J, LJ_TRERR_CJITOFF);
-    if ((pt->flags & PROTO_IS_VARARG)) {
-      if (rd.fn->l.gate != lj_gate_lv)
-	lj_trace_err(J, LJ_TRERR_NYILNKF);
+    if ((pt->flags & PROTO_IS_VARARG))
       lj_trace_err(J, LJ_TRERR_NYIVF);
-    } else {
-      if (rd.fn->l.gate != lj_gate_lf)
-	lj_trace_err(J, LJ_TRERR_NYILNKF);
-    }
     if (cres == CALLRES_TAILCALL) {
       ptrdiff_t i;
       /* Tailcalls can form a loop, so count towards the loop unroll limit. */

+ 6 - 8
src/lj_state.c

@@ -37,8 +37,8 @@
 ** Calls to metamethods store their arguments beyond the current top
 ** without checking for the stack limit. This avoids stack resizes which
 ** would invalidate passed TValue pointers. The stack check is performed
-** later by the call gate. This can safely resize the stack or raise an
-** error. Thus we need some extra slots beyond the current stack limit.
+** later by the function header. This can safely resize the stack or raise
+** an error. Thus we need some extra slots beyond the current stack limit.
 **
 ** Most metamethods need 4 slots above top (cont, mobj, arg1, arg2) plus
 ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
@@ -119,8 +119,6 @@ static void stack_init(lua_State *L1, lua_State *L)
 
 /* -- State handling ------------------------------------------------------ */
 
-#define GG_SIZE		(sizeof(GG_State)+(BC__MAX*2)*sizeof(ASMFunction))
-
 /* Open parts that may cause memory-allocation errors. */
 static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
 {
@@ -156,8 +154,8 @@ static void close_state(lua_State *L)
     lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
     lj_str_freebuf(g, &g->tmpbuf);
     lj_mem_freevec(g, L->stack, L->stacksize, TValue);
-    lua_assert(g->gc.total == GG_SIZE);
-    g->allocf(g->allocd, G2GG(g), GG_SIZE, 0);
+    lua_assert(g->gc.total == sizeof(GG_State));
+    g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
   }
 }
 
@@ -167,7 +165,7 @@ lua_State *lj_state_newstate(lua_Alloc f, void *ud)
 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
 #endif
 {
-  GG_State *GG = cast(GG_State *, f(ud, NULL, 0, GG_SIZE));
+  GG_State *GG = cast(GG_State *, f(ud, NULL, 0, sizeof(GG_State)));
   lua_State *L = &GG->L;
   global_State *g = &GG->g;
   if (GG == NULL || !checkptr32(GG)) return NULL;
@@ -190,7 +188,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
   g->gc.state = GCSpause;
   setgcref(g->gc.root, obj2gco(L));
   g->gc.sweep = &g->gc.root;
-  g->gc.total = GG_SIZE;
+  g->gc.total = sizeof(GG_State);
   g->gc.pause = LUAI_GCPAUSE;
   g->gc.stepmul = LUAI_GCMUL;
   lj_dispatch_init((GG_State *)L);

+ 3 - 5
src/lj_trace.c

@@ -361,7 +361,7 @@ static void trace_start(jit_State *J)
     setstrV(L, L->top++, lj_str_newlit(L, "start"));
     setintV(L->top++, J->curtrace);
     setfuncV(L, L->top++, J->fn);
-    setintV(L->top++, proto_bcpos(J->pt, J->pc) + 1);
+    setintV(L->top++, proto_bcpos(J->pt, J->pc));
     if (J->parent) {
       setintV(L->top++, J->parent);
       setintV(L->top++, J->exitno);
@@ -444,7 +444,7 @@ static int trace_abort(jit_State *J)
       setstrV(L, L->top++, lj_str_newlit(L, "abort"));
       setintV(L->top++, J->curtrace);
       setfuncV(L, L->top++, J->fn);
-      setintV(L->top++, proto_bcpos(J->pt, J->pc) + 1);
+      setintV(L->top++, proto_bcpos(J->pt, J->pc));
       copyTV(L, L->top++, restorestack(L, errobj));
       copyTV(L, L->top++, &J->errinfo);
     );
@@ -478,7 +478,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
       lj_vmevent_send(L, RECORD,
 	setintV(L->top++, J->curtrace);
 	setfuncV(L, L->top++, J->fn);
-	setintV(L->top++, proto_bcpos(J->pt, J->pc) + 1);
+	setintV(L->top++, proto_bcpos(J->pt, J->pc));
 	setintV(L->top++, J->framedepth);
 	if (bcmode_mm(bc_op(*J->pc)) == MM_call) {
 	  cTValue *o = &L->base[bc_a(*J->pc)];
@@ -555,8 +555,6 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
   /* Only start a new trace if not recording or inside __gc call or vmevent. */
   if (J->state == LJ_TRACE_IDLE &&
       !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
-    lua_State *L = J->L;
-    L->top = curr_topL(L);  /* Only called from Lua and NRESULTS is not used. */
     J->parent = 0;  /* Root trace. */
     J->exitno = 0;
     J->pc = pc-1;  /* The interpreter bytecode PC is offset by 1. */

+ 0 - 6
src/lj_vm.h

@@ -44,12 +44,6 @@ LJ_ASMF void lj_vm_exp2(void);
 LJ_ASMF void lj_vm_pow_sse(void);
 LJ_ASMF void lj_vm_powi_sse(void);
 
-/* Call gates for functions. */
-LJ_ASMF void lj_gate_lf(void);
-LJ_ASMF void lj_gate_lv(void);
-LJ_ASMF void lj_gate_c(void);
-LJ_ASMF void lj_gate_cwrap(void);
-
 /* Continuations for metamethods. */
 LJ_ASMF void lj_cont_cat(void);  /* Continue with concatenation. */
 LJ_ASMF void lj_cont_ra(void);  /* Store result in RA from instruction. */

+ 1 - 1
src/msvcbuild.bat

@@ -32,7 +32,7 @@ if exist buildvm.exe.manifest^
   %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
 
 buildvm -m peobj -o lj_vm.obj
-buildvm -m bcdef -o lj_bcdef.h
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
 buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
 buildvm -m libdef -o lj_libdef.h %ALL_LIB%
 buildvm -m recdef -o lj_recdef.h %ALL_LIB%

Some files were not shown because too many files changed in this diff