Browse Source

Specialize to prototype for non-monomorphic functions.

Solves the trace-explosion problem with closure-heavy programming.
Mike Pall 13 years ago
parent
commit
cecbe3c15f
6 changed files with 31 additions and 15 deletions
  1. 0 8
      doc/status.html
  2. 4 0
      src/lj_func.c
  3. 1 0
      src/lj_ir.h
  4. 3 0
      src/lj_obj.h
  5. 1 1
      src/lj_parse.c
  6. 22 6
      src/lj_record.c

+ 0 - 8
doc/status.html

@@ -148,14 +148,6 @@ with me, before writing major improvements, to avoid duplication of
 effort.
 </li>
 <li>
-The trace compiler currently doesn't back off specialization for
-function call dispatch. It should really fall back to specializing on
-the prototype, not the closure identity. This can lead to the so-called
-"trace explosion" problem with <b>closure-heavy programming</b>. The
-trace linking heuristics prevent this, but in the worst case this
-means the code always falls back to the interpreter.
-</li>
-<li>
 <b>Trace management</b> needs more tuning: less drastic countermeasures
 against trace explosion and better heuristics in general.
 </li>

+ 4 - 0
src/lj_func.c

@@ -118,6 +118,7 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
 
 static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
 {
+  uint32_t count;
   GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv));
   fn->l.gct = ~LJ_TFUNC;
   fn->l.ffid = FF_LUA;
@@ -125,6 +126,9 @@ static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
   /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */
   setmref(fn->l.pc, proto_bc(pt));
   setgcref(fn->l.env, obj2gco(env));
+  /* Saturating 3 bit counter (0..7) for created closures. */
+  count = (uint32_t)pt->flags + PROTO_CLCOUNT;
+  pt->flags = (uint8_t)(count - ((count >> PROTO_CLC_BITS) & PROTO_CLCOUNT));
   return fn;
 }
 

+ 1 - 0
src/lj_ir.h

@@ -183,6 +183,7 @@ IRFPMDEF(FPMENUM)
 #define IRFLDEF(_) \
   _(STR_LEN,	offsetof(GCstr, len)) \
   _(FUNC_ENV,	offsetof(GCfunc, l.env)) \
+  _(FUNC_PC,	offsetof(GCfunc, l.pc)) \
   _(TAB_META,	offsetof(GCtab, metatable)) \
   _(TAB_ARRAY,	offsetof(GCtab, array)) \
   _(TAB_NODE,	offsetof(GCtab, node)) \

+ 3 - 0
src/lj_obj.h

@@ -318,6 +318,9 @@ typedef struct GCproto {
 /* Only used during parsing. */
 #define PROTO_HAS_RETURN	0x20	/* Already emitted a return. */
 #define PROTO_FIXUP_RETURN	0x40	/* Need to fixup emitted returns. */
+/* Top bits used for counting created closures. */
+#define PROTO_CLCOUNT		0x20	/* Base of saturating 3 bit counter. */
+#define PROTO_CLC_BITS		3
 
 #define proto_kgc(pt, idx) \
   check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \

+ 1 - 1
src/lj_parse.c

@@ -1392,7 +1392,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
   pt->gct = ~LJ_TPROTO;
   pt->sizept = (MSize)sizept;
   pt->trace = 0;
-  pt->flags = fs->flags;
+  pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
   pt->numparams = fs->numparams;
   pt->framesize = fs->framesize;
   setgcref(pt->chunkname, obj2gco(ls->chunkname));

+ 22 - 6
src/lj_record.c

@@ -563,12 +563,32 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
 
 /* -- Record calls and returns -------------------------------------------- */
 
+/* Specialize to the runtime value of the called function or its prototype. */
+static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
+{
+  TRef kfunc;
+  if (isluafunc(fn)) {
+    GCproto *pt = funcproto(fn);
+    /* 3 or more closures created? Probably not a monomorphic function. */
+    if (pt->flags >= 3*PROTO_CLCOUNT) {  /* Specialize to prototype instead. */
+      TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
+      emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
+      (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);  /* Prevent GC of proto. */
+      return tr;
+    }
+  }
+  /* Otherwise specialize to the function (closure) value itself. */
+  kfunc = lj_ir_kfunc(J, fn);
+  emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc);
+  return kfunc;
+}
+
 /* Record call setup. */
 static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
 {
   RecordIndex ix;
   TValue *functv = &J->L->base[func];
-  TRef trfunc, *fbase = &J->base[func];
+  TRef *fbase = &J->base[func];
   ptrdiff_t i;
   for (i = 0; i <= nargs; i++)
     (void)getslot(J, func+i);  /* Ensure func and all args have a reference. */
@@ -582,11 +602,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
     fbase[0] = ix.mobj;  /* Replace function. */
     functv = &ix.mobjv;
   }
-
-  /* Specialize to the runtime value of the called function. */
-  trfunc = lj_ir_kfunc(J, funcV(functv));
-  emitir(IRTG(IR_EQ, IRT_FUNC), fbase[0], trfunc);
-  fbase[0] = trfunc | TREF_FRAME;
+  fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
   J->maxslot = (BCReg)nargs;
 }