Browse Source

Re-enable trace stitching.

Thanks to Vyacheslav Egorov.
Mike Pall 10 years ago
parent
commit
a3a6866d4c
13 changed files with 60 additions and 84 deletions
  1. 1 1
      doc/changes.html
  2. 4 35
      src/lj_ffrecord.c
  3. 1 1
      src/lj_gc.c
  4. 30 14
      src/lj_ir.c
  5. 1 0
      src/lj_iropt.h
  6. 1 0
      src/lj_jit.h
  7. 2 2
      src/lj_snap.c
  8. 6 1
      src/lj_trace.c
  9. 0 2
      src/lj_traceerr.h
  10. 5 8
      src/vm_arm.dasc
  11. 3 7
      src/vm_mips.dasc
  12. 2 6
      src/vm_ppc.dasc
  13. 4 7
      src/vm_x86.dasc

+ 1 - 1
doc/changes.html

@@ -90,7 +90,7 @@ Please take a look at the commit history for more details.
 </ul></li>
 </ul></li>
 <li>Improvements to the JIT compiler:
 <li>Improvements to the JIT compiler:
 <ul>
 <ul>
-<li>Add trace stitching (disabled for now).</li>
+<li>Add trace stitching.</li>
 <li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
 <li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
 <li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
 <li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
 <li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
 <li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>

+ 4 - 35
src/lj_ffrecord.c

@@ -96,18 +96,10 @@ static ptrdiff_t results_wanted(jit_State *J)
     return -1;
     return -1;
 }
 }
 
 
-#ifdef LUAJIT_TRACE_STITCHING
-/* This feature is disabled for now due to a design mistake. Sorry.
-**
-** It causes unpredictable behavior and crashes when a full trace flush
-** happens with a stitching continuation still in the stack somewhere.
-*/
-
 /* Trace stitching: add continuation below frame to start a new trace. */
 /* Trace stitching: add continuation below frame to start a new trace. */
 static void recff_stitch(jit_State *J)
 static void recff_stitch(jit_State *J)
 {
 {
   ASMFunction cont = lj_cont_stitch;
   ASMFunction cont = lj_cont_stitch;
-  TraceNo traceno = J->cur.traceno;
   lua_State *L = J->L;
   lua_State *L = J->L;
   TValue *base = L->base;
   TValue *base = L->base;
   const BCIns *pc = frame_pc(base-1);
   const BCIns *pc = frame_pc(base-1);
@@ -120,7 +112,7 @@ static void recff_stitch(jit_State *J)
   setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
   setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
   setcont(base, cont);
   setcont(base, cont);
   setframe_pc(base, pc);
   setframe_pc(base, pc);
-  if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
+  setnilV(base-1);  /* Incorrect, but rec_check_slots() won't run anymore. */
   L->base += 2;
   L->base += 2;
   L->top += 2;
   L->top += 2;
 
 
@@ -132,7 +124,9 @@ static void recff_stitch(jit_State *J)
   trcont = lj_ir_kptr(J, (void *)cont);
   trcont = lj_ir_kptr(J, (void *)cont);
 #endif
 #endif
   J->base[0] = trcont | TREF_CONT;
   J->base[0] = trcont | TREF_CONT;
-  J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
+  J->ktracep = lj_ir_k64_reserve(J);
+  lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
+  J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
   J->base += 2;
   J->base += 2;
   J->baseslot += 2;
   J->baseslot += 2;
   J->framedepth++;
   J->framedepth++;
@@ -181,31 +175,6 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
 
 
 /* Must stop the trace for classic C functions with arbitrary side-effects. */
 /* Must stop the trace for classic C functions with arbitrary side-effects. */
 #define recff_c		recff_nyi
 #define recff_c		recff_nyi
-#else
-/* Fallback handler for fast functions that are not recorded (yet). */
-static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
-{
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYIFF);
-  UNUSED(rd);
-}
-
-/* Throw error for unsupported variant of fast function. */
-LJ_NORET static void recff_nyiu(jit_State *J, RecordFFData *rd)
-{
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYIFFU);
-  UNUSED(rd);
-}
-
-/* Must abort the trace for classic C functions with arbitrary side-effects. */
-static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
-{
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYICF);
-  UNUSED(rd);
-}
-#endif
 
 
 /* Emit BUFHDR for the global temporary buffer. */
 /* Emit BUFHDR for the global temporary buffer. */
 static TRef recff_bufhdr(jit_State *J)
 static TRef recff_bufhdr(jit_State *J)

+ 1 - 1
src/lj_gc.c

@@ -69,7 +69,7 @@ static void gc_mark(global_State *g, GCobj *o)
       gray2black(o);  /* Closed upvalues are never gray. */
       gray2black(o);  /* Closed upvalues are never gray. */
   } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
   } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
     lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
     lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
-	       gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO);
+	       gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
     setgcrefr(o->gch.gclist, g->gc.gray);
     setgcrefr(o->gch.gclist, g->gc.gray);
     setgcref(g->gc.gray, o);
     setgcref(g->gc.gray, o);
   }
   }

+ 30 - 14
src/lj_ir.c

@@ -209,24 +209,13 @@ void lj_ir_k64_freeall(jit_State *J)
     lj_mem_free(J2G(J), k, sizeof(K64Array));
     lj_mem_free(J2G(J), k, sizeof(K64Array));
     k = next;
     k = next;
   }
   }
+  setmref(J->k64, NULL);
 }
 }
 
 
-/* Find 64 bit constant in chained array or add it. */
-cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
+/* Get new 64 bit constant slot. */
+static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
 {
 {
-  K64Array *k, *kp = NULL;
   TValue *ntv;
   TValue *ntv;
-  MSize idx;
-  /* Search for the constant in the whole chain of arrays. */
-  for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
-    kp = k;  /* Remember previous element in list. */
-    for (idx = 0; idx < k->numk; idx++) {  /* Search one array. */
-      TValue *tv = &k->k[idx];
-      if (tv->u64 == u64)  /* Needed for +-0/NaN/absmask. */
-	return tv;
-    }
-  }
-  /* Constant was not found, need to add it. */
   if (!(kp && kp->numk < LJ_MIN_K64SZ)) {  /* Allocate a new array. */
   if (!(kp && kp->numk < LJ_MIN_K64SZ)) {  /* Allocate a new array. */
     K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
     K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
     setmref(kn->next, NULL);
     setmref(kn->next, NULL);
@@ -242,6 +231,33 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
   return ntv;
   return ntv;
 }
 }
 
 
+/* Find 64 bit constant in chained array or add it. */
+cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
+{
+  K64Array *k, *kp = NULL;
+  MSize idx;
+  /* Search for the constant in the whole chain of arrays. */
+  for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
+    kp = k;  /* Remember previous element in list. */
+    for (idx = 0; idx < k->numk; idx++) {  /* Search one array. */
+      TValue *tv = &k->k[idx];
+      if (tv->u64 == u64)  /* Needed for +-0/NaN/absmask. */
+	return tv;
+    }
+  }
+  /* Otherwise add a new constant. */
+  return ir_k64_add(J, kp, u64);
+}
+
+TValue *lj_ir_k64_reserve(jit_State *J)
+{
+  K64Array *k, *kp = NULL;
+  lj_ir_k64_find(J, 0);  /* Intern dummy 0 to protect the reserved slot. */
+  /* Find last K64Array, if any. */
+  for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
+  return ir_k64_add(J, kp, 0);  /* Set to 0. Final value is set later. */
+}
+
 /* Intern 64 bit constant, given by its address. */
 /* Intern 64 bit constant, given by its address. */
 TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
 TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
 {
 {

+ 1 - 0
src/lj_iropt.h

@@ -40,6 +40,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
 LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
 LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
 LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
 LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
 LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
 LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
+LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
 LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
 LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
 LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
 LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
 LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
 LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);

+ 1 - 0
src/lj_jit.h

@@ -381,6 +381,7 @@ typedef struct jit_State {
   GCRef *trace;		/* Array of traces. */
   GCRef *trace;		/* Array of traces. */
   TraceNo freetrace;	/* Start of scan for next free trace. */
   TraceNo freetrace;	/* Start of scan for next free trace. */
   MSize sizetrace;	/* Size of trace array. */
   MSize sizetrace;	/* Size of trace array. */
+  TValue *ktracep;	/* Pointer to K64Array slot with GCtrace pointer. */
 
 
   IRRef1 chain[IR__MAX];  /* IR instruction skip-list chain anchors. */
   IRRef1 chain[IR__MAX];  /* IR instruction skip-list chain anchors. */
   TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA];  /* Stack slot map. */
   TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA];  /* Stack slot map. */

+ 2 - 2
src/lj_snap.c

@@ -631,8 +631,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
     } else if (irt_isnum(t)) {
     } else if (irt_isnum(t)) {
       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
 #endif
 #endif
-    } else if (LJ_64 && irt_islightud(t)) {
-      /* 64 bit lightuserdata which may escape already has the tag bits. */
+    } else if (LJ_64 && irt_is64(t)) {
+      /* 64 bit values that already have the tag bits. */
       o->u64 = ex->gpr[r-RID_MIN_GPR];
       o->u64 = ex->gpr[r-RID_MIN_GPR];
     } else if (irt_ispri(t)) {
     } else if (irt_ispri(t)) {
       setpriV(o, irt_toitype(t));
       setpriV(o, irt_toitype(t));

+ 6 - 1
src/lj_trace.c

@@ -274,7 +274,7 @@ int lj_trace_flushall(lua_State *L)
       if (T->root == 0)
       if (T->root == 0)
 	trace_flushroot(J, T);
 	trace_flushroot(J, T);
       lj_gdbjit_deltrace(J, T);
       lj_gdbjit_deltrace(J, T);
-      T->traceno = 0;
+      T->traceno = T->link = 0;  /* Blacklist the link for cont_stitch. */
       setgcrefnull(J->trace[i]);
       setgcrefnull(J->trace[i]);
     }
     }
   }
   }
@@ -284,6 +284,7 @@ int lj_trace_flushall(lua_State *L)
   memset(J->penalty, 0, sizeof(J->penalty));
   memset(J->penalty, 0, sizeof(J->penalty));
   /* Free the whole machine code and invalidate all exit stub groups. */
   /* Free the whole machine code and invalidate all exit stub groups. */
   lj_mcode_free(J);
   lj_mcode_free(J);
+  lj_ir_k64_freeall(J);
   memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
   memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
   lj_vmevent_send(L, TRACE,
   lj_vmevent_send(L, TRACE,
     setstrV(L, L->top++, lj_str_newlit(L, "flush"));
     setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@@ -402,6 +403,7 @@ static void trace_start(jit_State *J)
   J->postproc = LJ_POST_NONE;
   J->postproc = LJ_POST_NONE;
   lj_resetsplit(J);
   lj_resetsplit(J);
   J->retryrec = 0;
   J->retryrec = 0;
+  J->ktracep = NULL;
   setgcref(J->cur.startpt, obj2gco(J->pt));
   setgcref(J->cur.startpt, obj2gco(J->pt));
 
 
   L = J->L;
   L = J->L;
@@ -477,6 +479,9 @@ static void trace_stop(jit_State *J)
   lj_mcode_commit(J, J->cur.mcode);
   lj_mcode_commit(J, J->cur.mcode);
   J->postproc = LJ_POST_NONE;
   J->postproc = LJ_POST_NONE;
   trace_save(J, T);
   trace_save(J, T);
+  if (J->ktracep) {  /* Patch K64Array slot with the final GCtrace pointer. */
+    setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
+  }
 
 
   L = J->L;
   L = J->L;
   lj_vmevent_send(L, TRACE,
   lj_vmevent_send(L, TRACE,

+ 0 - 2
src/lj_traceerr.h

@@ -25,8 +25,6 @@ TREDEF(BADTYPE,	"bad argument type")
 TREDEF(CJITOFF,	"JIT compilation disabled for function")
 TREDEF(CJITOFF,	"JIT compilation disabled for function")
 TREDEF(CUNROLL,	"call unroll limit reached")
 TREDEF(CUNROLL,	"call unroll limit reached")
 TREDEF(DOWNREC,	"down-recursion, restarting")
 TREDEF(DOWNREC,	"down-recursion, restarting")
-TREDEF(NYICF,	"NYI: C function %s")
-TREDEF(NYIFF,	"NYI: FastFunc %s")
 TREDEF(NYIFFU,	"NYI: unsupported variant of FastFunc %s")
 TREDEF(NYIFFU,	"NYI: unsupported variant of FastFunc %s")
 TREDEF(NYIRETL,	"NYI: return to lower frame")
 TREDEF(NYIRETL,	"NYI: return to lower frame")
 
 

+ 5 - 8
src/vm_arm.dasc

@@ -2086,7 +2086,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // RA = resultptr, CARG4 = meta base
   |  // RA = resultptr, CARG4 = meta base
   |   ldr RB, SAVE_MULTRES
   |   ldr RB, SAVE_MULTRES
   |  ldr INS, [PC, #-4]
   |  ldr INS, [PC, #-4]
-  |    ldr CARG3, [CARG4, #-24]		// Save previous trace number.
+  |    ldr TRACE:CARG3, [CARG4, #-24]	// Save previous trace.
   |   subs RB, RB, #8
   |   subs RB, RB, #8
   |  decode_RA8 RC, INS			// Call base.
   |  decode_RA8 RC, INS			// Call base.
   |   beq >2
   |   beq >2
@@ -2101,23 +2101,20 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA8 RA, INS
   |   decode_RA8 RA, INS
   |   decode_RB8 RB, INS
   |   decode_RB8 RB, INS
   |   add RA, RA, RB
   |   add RA, RA, RB
-  |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
   |3:
   |3:
   |   cmp RA, RC
   |   cmp RA, RC
   |  mvn CARG2, #~LJ_TNIL
   |  mvn CARG2, #~LJ_TNIL
   |   bhi >9				// More results wanted?
   |   bhi >9				// More results wanted?
   |
   |
-  |  ldr TRACE:RA, [CARG1, CARG3, lsl #2]
-  |  cmp TRACE:RA, #0
-  |  beq ->cont_nop
-  |  ldrh RC, TRACE:RA->link
-  |  cmp RC, CARG3
+  |  ldrh RA, TRACE:CARG3->traceno
+  |  ldrh RC, TRACE:CARG3->link
+  |  cmp RC, RA
   |  beq ->cont_nop			// Blacklisted.
   |  beq ->cont_nop			// Blacklisted.
   |  cmp RC, #0
   |  cmp RC, #0
   |  bne =>BC_JLOOP			// Jump to stitched trace.
   |  bne =>BC_JLOOP			// Jump to stitched trace.
   |
   |
   |  // Stitch a new trace to the previous trace.
   |  // Stitch a new trace to the previous trace.
-  |  str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
+  |  str RA, [DISPATCH, #DISPATCH_J(exitno)]
   |  str L, [DISPATCH, #DISPATCH_J(L)]
   |  str L, [DISPATCH, #DISPATCH_J(L)]
   |  str BASE, L->base
   |  str BASE, L->base
   |  sub CARG1, DISPATCH, #-GG_DISP2J
   |  sub CARG1, DISPATCH, #-GG_DISP2J

+ 3 - 7
src/vm_mips.dasc

@@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |.if JIT
   |  // RA = resultptr, RB = meta base
   |  // RA = resultptr, RB = meta base
   |  lw INS, -4(PC)
   |  lw INS, -4(PC)
-  |    lw TMP3, -24+LO(RB)		// Save previous trace number.
+  |    lw TMP2, -24+LO(RB)		// Save previous trace.
   |  decode_RA8a RC, INS
   |  decode_RA8a RC, INS
   |   addiu AT, MULTRES, -8
   |   addiu AT, MULTRES, -8
   |  decode_RA8b RC
   |  decode_RA8b RC
@@ -2034,17 +2034,13 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA8b RA
   |   decode_RA8b RA
   |    decode_RB8b RB
   |    decode_RB8b RB
   |   addu RA, RA, RB
   |   addu RA, RA, RB
-  |  lw TMP1, DISPATCH_J(trace)(DISPATCH)
   |   addu RA, BASE, RA
   |   addu RA, BASE, RA
   |3:
   |3:
   |   sltu AT, RC, RA
   |   sltu AT, RC, RA
   |   bnez AT, >9			// More results wanted?
   |   bnez AT, >9			// More results wanted?
-  |. sll TMP2, TMP3, 2
+  |.   nop
   |
   |
-  |  addu TMP2, TMP1, TMP2
-  |  lw TRACE:TMP2, 0(TMP2)
-  |  beqz TRACE:TMP2, ->cont_nop
-  |.  nop
+  |  lhu TMP3, TRACE:TMP2->traceno
   |  lhu RD, TRACE:TMP2->link
   |  lhu RD, TRACE:TMP2->link
   |  beq RD, TMP3, ->cont_nop		// Blacklisted.
   |  beq RD, TMP3, ->cont_nop		// Blacklisted.
   |.  load_got lj_dispatch_stitch
   |.  load_got lj_dispatch_stitch

+ 2 - 6
src/vm_ppc.dasc

@@ -2525,7 +2525,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |.if JIT
   |  // RA = resultptr, RB = meta base
   |  // RA = resultptr, RB = meta base
   |  lwz INS, -4(PC)
   |  lwz INS, -4(PC)
-  |    lwz TMP3, -20(RB)		// Save previous trace number.
+  |    lwz TRACE:TMP2, -20(RB)		// Save previous trace.
   |   addic. TMP1, MULTRES, -8
   |   addic. TMP1, MULTRES, -8
   |  decode_RA8 RC, INS			// Call base.
   |  decode_RA8 RC, INS			// Call base.
   |   beq >2
   |   beq >2
@@ -2540,15 +2540,11 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RA8 RA, INS
   |   decode_RA8 RA, INS
   |   decode_RB8 RB, INS
   |   decode_RB8 RB, INS
   |   add RA, RA, RB
   |   add RA, RA, RB
-  |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
   |3:
   |3:
   |   cmplw RA, RC
   |   cmplw RA, RC
   |   bgt >9				// More results wanted?
   |   bgt >9				// More results wanted?
   |
   |
-  |  slwi TMP2, TMP3, 2
-  |  lwzx TRACE:TMP2, TMP1, TMP2
-  |  cmpwi TRACE:TMP2, 0
-  |  beq ->cont_nop
+  |  lhz TMP3, TRACE:TMP2->traceno
   |  lhz RD, TRACE:TMP2->link
   |  lhz RD, TRACE:TMP2->link
   |  cmpw RD, TMP3
   |  cmpw RD, TMP3
   |   cmpwi cr1, RD, 0
   |   cmpwi cr1, RD, 0

+ 4 - 7
src/vm_x86.dasc

@@ -2667,8 +2667,8 @@ static void build_subroutines(BuildCtx *ctx)
   |->cont_stitch:			// Trace stitching.
   |->cont_stitch:			// Trace stitching.
   |.if JIT
   |.if JIT
   |  // BASE = base, RC = result, RB = mbase
   |  // BASE = base, RC = result, RB = mbase
-  |  mov RA, [RB-24]			// Save previous trace number.
-  |  mov TMP1, RA
+  |  mov TRACE:RA, [RB-24]		// Save previous trace.
+  |  mov TMP1, TRACE:RA
   |  mov TMP3, DISPATCH			// Need one more register.
   |  mov TMP3, DISPATCH			// Need one more register.
   |  mov DISPATCH, MULTRES
   |  mov DISPATCH, MULTRES
   |  movzx RA, PC_RA
   |  movzx RA, PC_RA
@@ -2699,11 +2699,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  ja >9				// More results wanted?
   |  ja >9				// More results wanted?
   |
   |
   |  mov DISPATCH, TMP3
   |  mov DISPATCH, TMP3
-  |  mov RB, TMP1			// Get previous trace number.
-  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
-  |  mov TRACE:RD, [RA+RB*4]
-  |  test TRACE:RD, TRACE:RD
-  |  jz ->cont_nop
+  |  mov TRACE:RD, TMP1			// Get previous trace.
+  |  movzx RB, word TRACE:RD->traceno
   |  movzx RD, word TRACE:RD->link
   |  movzx RD, word TRACE:RD->link
   |  cmp RD, RB
   |  cmp RD, RB
   |  je ->cont_nop			// Blacklisted.
   |  je ->cont_nop			// Blacklisted.