Browse Source

Record calls to vararg functions.

This loop is now roughly 1000x faster than the Lua interpreter:
  local function f(a,b,...) end; for i=1,2e8 do f(1,2,i) end
Yet another silly microbenchmark -- I know.
Mike Pall 15 years ago
parent
commit
c2c08ba9b3
4 changed files with 56 additions and 16 deletions
  1. 17 9
      src/lj_dispatch.c
  2. 36 2
      src/lj_record.c
  3. 3 4
      src/lj_snap.c
  4. 0 1
      src/lj_traceerr.h

+ 17 - 9
src/lj_dispatch.c

@@ -384,17 +384,18 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
     callhook(L, LUA_HOOKRET, -1);
     callhook(L, LUA_HOOKRET, -1);
 }
 }
 
 
-/* Initialize call. Ensure stack space and clear missing parameters. */
-static void call_init(lua_State *L, GCfunc *fn)
+/* Initialize call. Ensure stack space and return # of missing parameters. */
+static int call_init(lua_State *L, GCfunc *fn)
 {
 {
   if (isluafunc(fn)) {
   if (isluafunc(fn)) {
-    MSize numparams = funcproto(fn)->numparams;
-    TValue *o;
-    lj_state_checkstack(L, numparams);
-    for (o = L->base + numparams; L->top < o; L->top++)
-      setnilV(L->top);  /* Clear missing parameters. */
+    int numparams = funcproto(fn)->numparams;
+    int gotparams = (int)(L->top - L->base);
+    lj_state_checkstack(L, (MSize)numparams);
+    numparams -= gotparams;
+    return numparams >= 0 ? numparams : 0;
   } else {
   } else {
     lj_state_checkstack(L, LUA_MINSTACK);
     lj_state_checkstack(L, LUA_MINSTACK);
+    return 0;
   }
   }
 }
 }
 
 
@@ -407,7 +408,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
 #if LJ_HASJIT
 #if LJ_HASJIT
   jit_State *J = G2J(g);
   jit_State *J = G2J(g);
 #endif
 #endif
-  call_init(L, fn);
+  int missing = call_init(L, fn);
 #if LJ_HASJIT
 #if LJ_HASJIT
   J->L = L;
   J->L = L;
   if ((uintptr_t)pc & 1) {  /* Marker for hot call. */
   if ((uintptr_t)pc & 1) {  /* Marker for hot call. */
@@ -420,8 +421,15 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
     lj_trace_ins(J, pc-1);  /* The interpreter bytecode PC is offset by 1. */
     lj_trace_ins(J, pc-1);  /* The interpreter bytecode PC is offset by 1. */
   }
   }
 #endif
 #endif
-  if ((g->hookmask & LUA_MASKCALL))
+  if ((g->hookmask & LUA_MASKCALL)) {
+    int i;
+    for (i = 0; i < missing; i++)  /* Add missing parameters. */
+      setnilV(L->top++);
     callhook(L, LUA_HOOKCALL, -1);
     callhook(L, LUA_HOOKCALL, -1);
+    /* Preserve modifications of missing parameters by lua_setlocal(). */
+    while (missing-- > 0 && tvisnil(L->top - 1))
+      L->top--;
+  }
 #if LJ_HASJIT
 #if LJ_HASJIT
 out:
 out:
 #endif
 #endif

+ 36 - 2
src/lj_record.c

@@ -570,6 +570,17 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
     frame = frame_prevd(frame);
     frame = frame_prevd(frame);
   }
   }
+  if (frame_isvarg(frame)) {
+    BCReg cbase = (BCReg)frame_delta(frame);
+    lua_assert(J->framedepth != 1);
+    if (--J->framedepth < 0)  /* NYI: return of vararg func to lower frame. */
+      lj_trace_err(J, LJ_TRERR_NYIRETL);
+    lua_assert(J->baseslot > 1);
+    rbase += cbase;
+    J->baseslot -= (BCReg)cbase;
+    J->base -= cbase;
+    frame = frame_prevd(frame);
+  }
   if (frame_islua(frame)) {  /* Return to Lua frame. */
   if (frame_islua(frame)) {  /* Return to Lua frame. */
     BCIns callins = *(frame_pc(frame)-1);
     BCIns callins = *(frame_pc(frame)-1);
     ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
     ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
@@ -1840,7 +1851,6 @@ static void rec_func_setup(jit_State *J)
   BCReg s, numparams = pt->numparams;
   BCReg s, numparams = pt->numparams;
   if ((pt->flags & PROTO_NO_JIT))
   if ((pt->flags & PROTO_NO_JIT))
     lj_trace_err(J, LJ_TRERR_CJITOFF);
     lj_trace_err(J, LJ_TRERR_CJITOFF);
-  lua_assert(!(pt->flags & PROTO_IS_VARARG));
   if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
   if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
     lj_trace_err(J, LJ_TRERR_STACKOV);
     lj_trace_err(J, LJ_TRERR_STACKOV);
   /* Fill up missing parameters with nil. */
   /* Fill up missing parameters with nil. */
@@ -1850,6 +1860,27 @@ static void rec_func_setup(jit_State *J)
   J->maxslot = numparams;
   J->maxslot = numparams;
 }
 }
 
 
+/* Record Lua vararg function setup. */
+static void rec_func_vararg(jit_State *J)
+{
+  GCproto *pt = J->pt;
+  BCReg s, fixargs, vframe = J->maxslot+1;
+  lua_assert((pt->flags & PROTO_IS_VARARG));
+  if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
+    lj_trace_err(J, LJ_TRERR_STACKOV);
+  J->base[vframe-1] = J->base[-1];  /* Copy function up. */
+  /* Copy fixarg slots up and set their original slots to nil. */
+  fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
+  for (s = 0; s < fixargs; s++) {
+    J->base[vframe+s] = J->base[s];
+    J->base[s] = TREF_NIL;
+  }
+  J->maxslot = fixargs;
+  J->framedepth++;
+  J->base += vframe;
+  J->baseslot += vframe;
+}
+
 /* Record entry to a Lua function. */
 /* Record entry to a Lua function. */
 static void rec_func_lua(jit_State *J)
 static void rec_func_lua(jit_State *J)
 {
 {
@@ -2258,8 +2289,11 @@ void lj_record_ins(jit_State *J)
     break;
     break;
 
 
   case BC_FUNCV:
   case BC_FUNCV:
+    rec_func_vararg(J);
+    rec_func_lua(J);
+    break;
   case BC_JFUNCV:
   case BC_JFUNCV:
-    lj_trace_err(J, LJ_TRERR_NYIVF);
+    lua_assert(0);  /* Cannot happen. No hotcall counting for varag funcs. */
     break;
     break;
 
 
   case BC_FUNCC:
   case BC_FUNCC:

+ 3 - 4
src/lj_snap.c

@@ -87,15 +87,14 @@ static void snapshot_framelinks(jit_State *J, SnapEntry *map)
     if (frame_islua(frame)) {
     if (frame_islua(frame)) {
       map[f++] = SNAP_MKPC(frame_pc(frame));
       map[f++] = SNAP_MKPC(frame_pc(frame));
       frame = frame_prevl(frame);
       frame = frame_prevl(frame);
-    } else if (frame_ispcall(frame)) {
-      map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
-      frame = frame_prevd(frame);
     } else if (frame_iscont(frame)) {
     } else if (frame_iscont(frame)) {
       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
       map[f++] = SNAP_MKPC(frame_contpc(frame));
       map[f++] = SNAP_MKPC(frame_contpc(frame));
       frame = frame_prevd(frame);
       frame = frame_prevd(frame);
     } else {
     } else {
-      lua_assert(0);
+      lua_assert(!frame_isc(frame));
+      map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
+      frame = frame_prevd(frame);
     }
     }
   }
   }
   lua_assert(f == (MSize)(1 + J->framedepth));
   lua_assert(f == (MSize)(1 + J->framedepth));

+ 0 - 1
src/lj_traceerr.h

@@ -23,7 +23,6 @@ TREDEF(BADTYPE,	"bad argument type")
 TREDEF(CJITOFF,	"call to JIT-disabled function")
 TREDEF(CJITOFF,	"call to JIT-disabled function")
 TREDEF(CUNROLL,	"call unroll limit reached")
 TREDEF(CUNROLL,	"call unroll limit reached")
 TREDEF(DOWNREC,	"down-recursion, restarting")
 TREDEF(DOWNREC,	"down-recursion, restarting")
-TREDEF(NYIVF,	"NYI: vararg function")
 TREDEF(NYICF,	"NYI: C function %p")
 TREDEF(NYICF,	"NYI: C function %p")
 TREDEF(NYIFF,	"NYI: FastFunc %s")
 TREDEF(NYIFF,	"NYI: FastFunc %s")
 TREDEF(NYIFFU,	"NYI: unsupported variant of FastFunc %s")
 TREDEF(NYIFFU,	"NYI: unsupported variant of FastFunc %s")