Browse Source

Add shadow frame link stack for trace recorder.

Simplifies snapshots. Prerequisite for pre-call snapshots.
Increases consistency for fast function calls, too.
Mike Pall 16 năm trước cách đây
mục cha
commit
7256690364
6 tập tin đã thay đổi với 82 bổ sung56 xóa
  1. 3 3
      src/lj_asm.c
  2. 1 0
      src/lj_def.h
  3. 2 1
      src/lj_jit.h
  4. 6 6
      src/lj_opt_loop.c
  5. 55 8
      src/lj_record.c
  6. 15 38
      src/lj_snap.c

+ 3 - 3
src/lj_asm.c

@@ -3042,7 +3042,7 @@ static void asm_tail_sync(ASMState *as)
   SnapShot *snap = &as->T->snap[as->T->nsnap-1];  /* Last snapshot. */
   MSize n, nent = snap->nent;
   SnapEntry *map = &as->T->snapmap[snap->mapofs];
-  SnapEntry *flinks = map + nent + snap->nframelinks;
+  SnapEntry *flinks = map + nent + 1;
   BCReg newbase = 0;
   BCReg nslots, topslot = 0;
 
@@ -3116,11 +3116,11 @@ static void asm_tail_sync(ASMState *as)
       if (!(sn & (SNAP_CONT|SNAP_FRAME)))
 	emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
       else if (s != 0)  /* Do not overwrite link to previous frame. */
-	emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
+	emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks++));
     }
     checkmclim(as);
   }
-  lua_assert(map + nent == flinks-1);
+  lua_assert(map + nent + 1 + snap->depth == flinks);
 }
 
 /* Fixup the tail code. */

+ 1 - 0
src/lj_def.h

@@ -66,6 +66,7 @@ typedef unsigned __int32 uintptr_t;
 
 /* JIT compiler limits. */
 #define LJ_MAX_JSLOTS	250		/* Max. # of stack slots for a trace. */
+#define LJ_MAX_JFRAME	20		/* Max. # of frames for a trace. */
 #define LJ_MAX_PHI	32		/* Max. # of PHIs for a loop. */
 #define LJ_MAX_EXITSTUBGR	8	/* Max. # of exit stub groups. */
 

+ 2 - 1
src/lj_jit.h

@@ -114,7 +114,7 @@ typedef struct SnapShot {
   IRRef1 ref;		/* First IR ref for this snapshot. */
   uint8_t nslots;	/* Number of valid slots. */
   uint8_t nent;		/* Number of compressed entries. */
-  uint8_t nframelinks;	/* Number of frame links. */
+  uint8_t depth;	/* Number of frame links. */
   uint8_t count;	/* Count of taken exits for this snapshot. */
 } SnapShot;
 
@@ -252,6 +252,7 @@ typedef struct jit_State {
 
   IRRef1 chain[IR__MAX];  /* IR instruction skip-list chain anchors. */
   TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA];  /* Stack slot map. */
+  SnapEntry frame[LJ_MAX_JFRAME+2];  /* Frame link stack. */
 
   int32_t param[JIT_P__MAX];  /* JIT engine parameters. */
 

+ 6 - 6
src/lj_opt_loop.c

@@ -167,7 +167,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
 			    SnapEntry *loopmap, IRRef1 *subst)
 {
   SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
-  MSize nmapofs, nframelinks;
+  MSize nmapofs, depth;
   MSize on, ln, nn, onent = osnap->nent;
   BCReg nslots = osnap->nslots;
   SnapShot *snap = &J->cur.snap[J->cur.nsnap];
@@ -179,11 +179,11 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
     nmapofs = snap->mapofs;
   }
   J->guardemit.irt = 0;
-  nframelinks = osnap->nframelinks;
+  depth = osnap->depth;
   /* Setup new snapshot. */
   snap->mapofs = (uint16_t)nmapofs;
   snap->ref = (IRRef1)J->cur.nins;
-  snap->nframelinks = (uint8_t)nframelinks;
+  snap->depth = (uint8_t)depth;
   snap->nslots = nslots;
   snap->count = 0;
   nmap = &J->cur.snapmap[nmapofs];
@@ -205,10 +205,10 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
   while (snap_slot(loopmap[ln]) < nslots)  /* Copy remaining loop slots. */
     nmap[nn++] = loopmap[ln++];
   snap->nent = (uint8_t)nn;
-  J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
+  J->cur.nsnapmap = (uint16_t)(nmapofs + nn + 1 + depth);
   omap += onent;
   nmap += nn;
-  for (nn = 0; nn < nframelinks; nn++)  /* Copy frame links. */
+  for (nn = 0; nn <= depth; nn++)  /* Copy PC + frame links. */
     nmap[nn] = omap[nn];
 }
 
@@ -314,7 +314,7 @@ static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
   SnapShot *snap = &J->cur.snap[nsnap-1];
   SnapEntry *map = J->cur.snapmap;
   map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent];  /* Restore PC. */
-  J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
+  J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + 1 + snap->depth);
   J->cur.nsnap = nsnap;
   J->guardemit.irt = 0;
   lj_ir_rollback(J, ins);

+ 55 - 8
src/lj_record.c

@@ -101,20 +101,45 @@ static void rec_check_ir(jit_State *J)
   }
 }
 
+/* Compare frame stack of the recorder and the VM. */
+static void rec_check_frames(jit_State *J)
+{
+  cTValue *frame = J->L->base - 1;
+  cTValue *lim = J->L->base - J->baseslot;
+  int32_t depth = J->framedepth;
+  while (frame > lim) {
+    depth--;
+    lua_assert(depth >= 0);
+    lua_assert((SnapEntry)frame_ftsz(frame) == J->frame[depth]);
+    if (frame_iscont(frame)) {
+      depth--;
+      lua_assert(depth >= 0);
+      lua_assert((SnapEntry)frame_ftsz(frame-1) == J->frame[depth]);
+    }
+    frame = frame_prev(frame);
+  }
+  lua_assert(depth == 0);
+}
+
 /* Sanity check the slots. */
 static void rec_check_slots(jit_State *J)
 {
   BCReg s, nslots = J->baseslot + J->maxslot;
+  int32_t depth;
   lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
   lua_assert(nslots < LJ_MAX_JSLOTS);
   for (s = 0; s < nslots; s++) {
     TRef tr = J->slot[s];
+    if (s != 0 && (tr & (TREF_CONT|TREF_FRAME)))
+      depth++;
     if (tr) {
       IRRef ref = tref_ref(tr);
       lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
       lua_assert(irt_t(IR(ref)->t) == tref_t(tr));
     }
   }
+  lua_assert(J->framedepth == depth);
+  rec_check_frames(J);
 }
 #endif
 
@@ -854,6 +879,7 @@ typedef struct RecordFFData {
   ptrdiff_t nres;	/* Number of returned results (defaults to 1). */
   ptrdiff_t cres;	/* Wanted number of call results. */
   uint32_t data;	/* Per-ffid auxiliary data (opcode, literal etc.). */
+  int metacall;		/* True if function was resolved via __call. */
 } RecordFFData;
 
 /* Type of handler to record a fast function. */
@@ -1020,9 +1046,14 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
     ix.tab = tr;
     copyTV(J->L, &ix.tabv, &rd->argv[0]);
     if (rec_mm_lookup(J, &ix, MM_tostring)) {  /* Has __tostring metamethod? */
+      if (rd->metacall)  /* Must not use kludge. */
+	recff_err_nyi(J, rd);
       res[0] = ix.mobj;
-      copyTV(J->L, rd->argv - 1, &ix.mobjv);
-      if (!rec_call(J, (BCReg)(res - J->base), 1, 1))  /* Pending call? */
+      copyTV(J->L, rd->argv - 1, &ix.mobjv);  /* Kludge. */
+      J->framedepth--;
+      if (rec_call(J, (BCReg)(res - J->base), 1, 1))
+	J->framedepth++;
+      else
 	rd->cres = CALLRES_PENDING;
       /* Otherwise res[0] already contains the result. */
     } else if (tref_isnumber(tr)) {
@@ -1067,6 +1098,8 @@ static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd)
 {
   if (rd->nargs >= 1) {
     BCReg parg = (BCReg)(arg - J->base);
+    J->pc = (const BCIns *)(sizeof(TValue) - 4 +
+			    (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL));
     if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) {  /* Resolved call. */
       res[0] = TREF_TRUE;  /* Prepend true result. No need to move results. */
       rd->nres = (ptrdiff_t)J->maxslot - (ptrdiff_t)parg + 1;
@@ -1108,6 +1141,8 @@ static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd)
     copyTV(J->L, &rd->argv[0], &argv1);
     copyTV(J->L, &rd->argv[1], &argv0);
     oargv = savestack(J->L, rd->argv);
+    J->pc = (const BCIns *)(2*sizeof(TValue) - 4 +
+			    (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL));
     /* Need to protect rec_call because the recorder may throw. */
     rx.parg = parg;
     rx.nargs = rd->nargs - 2;
@@ -1549,7 +1584,7 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
   } else if (frame_iscont(frame)) {  /* Return to continuation frame. */
     ASMFunction cont = frame_contf(frame);
     BCReg cbase = (BCReg)frame_delta(frame);
-    if (J->framedepth-- <= 0)
+    if ((J->framedepth -= 2) <= 0)
       lj_trace_err(J, LJ_TRERR_NYIRETL);
     J->baseslot -= (BCReg)cbase;
     J->base -= cbase;
@@ -1602,6 +1637,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
   if (tref_isfunc(res[0])) {  /* Regular function call. */
     rd.fn = funcV(tv);
     rd.argv = tv+1;
+    rd.metacall = 0;
   } else {  /* Otherwise resolve __call metamethod for called object. */
     RecordIndex ix;
     ptrdiff_t i;
@@ -1615,13 +1651,21 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
     res[0] = ix.mobj;
     rd.fn = funcV(&ix.mobjv);
     rd.argv = tv;  /* The called object is the 1st arg. */
+    rd.metacall = 1;
   }
 
   /* Specialize to the runtime value of the called function. */
   trfunc = lj_ir_kfunc(J, rd.fn);
   emitir(IRTG(IR_EQ, IRT_FUNC), res[0], trfunc);
   res[0] = trfunc | TREF_FRAME;
-  J->framedepth++;
+
+  /* Add frame links. */
+  J->frame[J->framedepth++] = SNAP_MKPC(J->pc+1);
+  if (cres == CALLRES_CONT)  /* Continuations need an extra frame stack slot. */
+    J->frame[J->framedepth++] = SNAP_MKFTSZ((func+1)*sizeof(TValue)+FRAME_CONT);
+    /* NYI: func is wrong if any fast function ever sets up a continuation. */
+  if (J->framedepth > LJ_MAX_JFRAME)
+    lj_trace_err(J, LJ_TRERR_STACKOV);
 
   if (isluafunc(rd.fn)) {  /* Record call to Lua function. */
     GCproto *pt = funcproto(rd.fn);
@@ -1659,6 +1703,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
     return 0;  /* No result yet. */
   } else {  /* Record call to C function or fast function. */
     uint32_t m = 0;
+    BCReg oldmaxslot = J->maxslot;
     res[1+nargs] = 0;
     rd.nargs = nargs;
     if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0]))
@@ -1682,10 +1727,12 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
       rec_ret(J, func, rd.nres);
     } else if (cres == CALLRES_CONT) {
       /* Note: immediately resolved continuations must not change J->maxslot. */
+      J->maxslot = oldmaxslot;
+      J->framedepth--;
       res[rd.nres] = TREF_NIL;  /* Turn 0 results into nil result. */
     } else {
-      J->framedepth++;
       lua_assert(cres == CALLRES_PENDING);
+      J->framedepth++;
       return 0;  /* Pending call, no result yet. */
     }
     return 1;  /* Result resolved immediately. */
@@ -2213,13 +2260,13 @@ static void rec_setup_side(jit_State *J, Trace *T)
     }
   setslot:
     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
-    if ((sn & SNAP_FRAME) && s != 0) {
+    if ((sn & SNAP_FRAME) && s != 0)
       J->baseslot = s+1;
-      J->framedepth++;
-    }
   }
   J->base = J->slot + J->baseslot;
   J->maxslot = snap->nslots - J->baseslot;
+  J->framedepth = snap->depth;  /* Copy frames from snapshot. */
+  memcpy(J->frame, &map[nent+1], sizeof(SnapEntry)*(size_t)snap->depth);
   lj_snap_add(J);
 }
 

+ 15 - 38
src/lj_snap.c

@@ -68,49 +68,26 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
   return n;
 }
 
-/* Add frame links at the end of the snapshot. */
-static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
-{
-  cTValue *frame = J->L->base - 1;
-  cTValue *lim = J->L->base - J->baseslot;
-  MSize f = 0;
-  map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
-  while (frame > lim) {  /* Backwards traversal of all frames above base. */
-    if (frame_islua(frame)) {
-      map[f++] = SNAP_MKPC(frame_pc(frame));
-      frame = frame_prevl(frame);
-    } else if (frame_ispcall(frame)) {
-      map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
-      frame = frame_prevd(frame);
-    } else if (frame_iscont(frame)) {
-      map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
-      map[f++] = SNAP_MKPC(frame_contpc(frame));
-      frame = frame_prevd(frame);
-    } else {
-      lua_assert(0);
-    }
-  }
-  return f;
-}
-
 /* Take a snapshot of the current stack. */
 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
 {
   BCReg nslots = J->baseslot + J->maxslot;
-  MSize nent, nframelinks;
+  MSize nent;
   SnapEntry *p;
-  /* Conservative estimate. Continuation frames need 2 slots. */
-  lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
+  /* Conservative estimate. */
+  lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
   p = &J->cur.snapmap[nsnapmap];
   nent = snapshot_slots(J, p, nslots);
-  nframelinks = snapshot_framelinks(J, p + nent);
-  J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
   snap->mapofs = (uint16_t)nsnapmap;
   snap->ref = (IRRef1)J->cur.nins;
   snap->nent = (uint8_t)nent;
-  snap->nframelinks = (uint8_t)nframelinks;
+  snap->depth = (uint8_t)J->framedepth;
   snap->nslots = (uint8_t)nslots;
   snap->count = 0;
+  J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
+  /* Add frame links at the end of the snapshot. */
+  p[nent] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
+  memcpy(&p[nent+1], J->frame, sizeof(SnapEntry)*(size_t)J->framedepth);
 }
 
 /* Add or merge a snapshot. */
@@ -141,14 +118,14 @@ void lj_snap_shrink(jit_State *J)
   lua_assert(nslots < snap->nslots);
   snap->nslots = (uint8_t)nslots;
   if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
-    MSize s, delta, nframelinks = snap->nframelinks;
+    MSize s, delta, depth = snap->depth;
     for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
       ;
     delta = snap->nent - nent;
     snap->nent = (uint8_t)nent;
-    J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
+    J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + 1 + depth);
     map += nent;
-    for (s = 0; s < nframelinks; s++)  /* Move frame links down. */
+    for (s = 0; s <= depth; s++)  /* Move PC + frame links down. */
       map[s] = map[s+delta];
   }
 }
@@ -210,7 +187,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
   SnapShot *snap = &T->snap[snapno];
   MSize n, nent = snap->nent;
   SnapEntry *map = &T->snapmap[snap->mapofs];
-  SnapEntry *flinks = map + nent + snap->nframelinks;
+  SnapEntry *flinks = map + nent;
   int32_t ftsz0;
   BCReg nslots = snap->nslots;
   TValue *frame;
@@ -224,6 +201,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
   }
 
   /* Fill stack slots with data from the registers and spill slots. */
+  J->pc = snap_pc(*flinks++);
   frame = L->base-1;
   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
   for (n = 0; n < nent; n++) {
@@ -236,7 +214,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
       lj_ir_kvalue(L, o, ir);
       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
 	/* Overwrite tag with frame link. */
-	o->fr.tp.ftsz = s != 0 ? (int32_t)*--flinks : ftsz0;
+	o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks++ : ftsz0;
 	if ((sn & SNAP_FRAME)) {
 	  GCfunc *fn = ir_kfunc(ir);
 	  if (isluafunc(fn)) {
@@ -291,8 +269,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
     }
   }
   L->top = curr_topL(L);
-  J->pc = snap_pc(*--flinks);
-  lua_assert(map + nent == flinks);
+  lua_assert(map + nent + 1 + snap->depth == flinks);
 }
 
 #undef IR