Browse Source

Turn traces into true GC objects (GCtrace).

Mike Pall 15 years ago
parent
commit
721b73fecb
24 changed files with 193 additions and 201 deletions
  1. 7 6
      doc/api.html
  2. 2 2
      src/buildvm_x64.h
  3. 2 2
      src/buildvm_x64win.h
  4. 1 5
      src/buildvm_x86.dasc
  5. 1 1
      src/buildvm_x86.h
  6. 1 1
      src/lib_base.c
  7. 9 10
      src/lib_jit.c
  8. 8 8
      src/lj_asm.c
  9. 2 2
      src/lj_asm.h
  10. 2 1
      src/lj_dispatch.c
  11. 0 1
      src/lj_func.c
  12. 45 23
      src/lj_gc.c
  13. 1 1
      src/lj_gc.h
  14. 5 4
      src/lj_gdbjit.c
  15. 2 2
      src/lj_gdbjit.h
  16. 1 1
      src/lj_ir.h
  17. 22 14
      src/lj_jit.h
  18. 1 1
      src/lj_obj.c
  19. 1 1
      src/lj_obj.h
  20. 6 7
      src/lj_record.c
  21. 4 4
      src/lj_snap.c
  22. 1 1
      src/lj_snap.h
  23. 67 100
      src/lj_trace.c
  24. 2 3
      src/lj_trace.h

+ 7 - 6
doc/api.html

@@ -138,11 +138,11 @@ of a module to turn off JIT compilation for the whole module for
 debugging purposes.
 </p>
 
-<h3 id="jit_flush_tr"><tt>status = jit.flush(tr)</tt></h3>
+<h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3>
 <p>
-Tries to flush the code for the specified trace and all of its
-side traces from the cache. Returns <tt>true</tt> on success.
-Returns <tt>false</tt> if there are still links to this trace.
+Flushes the specified root trace and all of its side traces from the cache.
+The code for the trace will be retained as long as there are any other
+traces which link to it.
 </p>
 
 <h3 id="jit_status"><tt>status, ... = jit.status()</tt></h3>
@@ -262,8 +262,9 @@ applies recursively to all sub-functions of the function with
 <h3 id="mode_engine"><tt>luaJIT_setmode(L, trace,<br>
 &nbsp;&nbsp;LUAJIT_MODE_TRACE|LUAJIT_MODE_FLUSH)</tt></h3>
 <p>
-Tries to flush the code for the specified trace and all of its
-side traces from the cache.
+Flushes the specified root trace and all of its side traces from the cache.
+The code for the trace will be retained as long as there are any other
+traces which link to it.
 </p>
 
 <h3 id="mode_engine"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_WRAPCFUNC|flag)</tt></h3>

+ 2 - 2
src/buildvm_x64.h

@@ -674,7 +674,7 @@ static const unsigned char build_actionlist[14075] = {
   255,248,3,102,15,46,193,252,233,244,1,255,141,12,202,139,105,4,129,252,253,
   239,15,132,244,247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,
   255,141,156,253,131,233,139,1,137,105,252,252,137,65,252,248,255,65,139,142,
-  233,139,4,193,72,139,128,233,139,108,36,24,65,137,150,233,65,137,174,233,
+  233,139,4,129,72,139,128,233,139,108,36,24,65,137,150,233,65,137,174,233,
   76,137,36,36,76,137,108,36,8,72,131,252,236,16,252,255,224,255,141,156,253,
   131,233,139,3,15,182,204,15,182,232,131,195,4,193,232,16,65,252,255,36,252,
   238,255,137,221,209,252,237,129,229,239,102,65,131,172,253,46,233,1,15,132,
@@ -1048,7 +1048,7 @@ static const char *const extnames[] = {
 #define DtA(_V) (int)(ptrdiff_t)&(((GCupval *)0)_V)
 #define DtB(_V) (int)(ptrdiff_t)&(((Node *)0)_V)
 #define DtC(_V) (int)(ptrdiff_t)&(((int *)0)_V)
-#define DtD(_V) (int)(ptrdiff_t)&(((Trace *)0)_V)
+#define DtD(_V) (int)(ptrdiff_t)&(((GCtrace *)0)_V)
 #define DtE(_V) (int)(ptrdiff_t)&(((ExitInfo *)0)_V)
 #define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
 #define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))

+ 2 - 2
src/buildvm_x64win.h

@@ -671,7 +671,7 @@ static const unsigned char build_actionlist[14023] = {
   180,253,134,233,255,248,3,102,15,46,193,252,233,244,1,255,141,12,202,139,
   105,4,129,252,253,239,15,132,244,247,255,137,105,252,252,139,41,137,105,252,
   248,252,233,245,255,141,180,253,134,233,139,1,137,105,252,252,137,65,252,
-  248,255,139,139,233,139,4,193,72,139,128,233,139,108,36,96,137,147,233,137,
+  248,255,139,139,233,139,4,129,72,139,128,233,139,108,36,96,137,147,233,137,
   171,233,76,137,100,36,80,76,137,108,36,32,76,137,116,36,24,76,137,124,36,
   16,72,137,225,72,129,252,236,239,102,15,127,49,102,15,127,185,233,102,68,
   15,127,129,233,102,68,15,127,137,233,102,68,15,127,145,233,102,68,15,127,
@@ -1048,7 +1048,7 @@ static const char *const extnames[] = {
 #define DtA(_V) (int)(ptrdiff_t)&(((GCupval *)0)_V)
 #define DtB(_V) (int)(ptrdiff_t)&(((Node *)0)_V)
 #define DtC(_V) (int)(ptrdiff_t)&(((int *)0)_V)
-#define DtD(_V) (int)(ptrdiff_t)&(((Trace *)0)_V)
+#define DtD(_V) (int)(ptrdiff_t)&(((GCtrace *)0)_V)
 #define DtE(_V) (int)(ptrdiff_t)&(((ExitInfo *)0)_V)
 #define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
 #define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))

+ 1 - 5
src/buildvm_x86.dasc

@@ -107,7 +107,7 @@
 |.type UPVAL,		GCupval
 |.type NODE,		Node
 |.type NARGS,		int
-|.type TRACE,		Trace
+|.type TRACE,		GCtrace
 |.type EXITINFO,	ExitInfo
 |
 |// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -4746,11 +4746,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
 #if LJ_HASJIT
     |  ins_AD	// RA = base (ignored), RD = traceno
     |  mov RA, [DISPATCH+DISPATCH_J(trace)]
-    |.if X64
-    |  mov TRACE:RD, [RA+RD*8]
-    |.else
     |  mov TRACE:RD, [RA+RD*4]
-    |.endif
     |  mov RDa, TRACE:RD->mcode
     |  mov L:RB, SAVE_L
     |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE

+ 1 - 1
src/buildvm_x86.h

@@ -1102,7 +1102,7 @@ static const char *const extnames[] = {
 #define DtA(_V) (int)(ptrdiff_t)&(((GCupval *)0)_V)
 #define DtB(_V) (int)(ptrdiff_t)&(((Node *)0)_V)
 #define DtC(_V) (int)(ptrdiff_t)&(((int *)0)_V)
-#define DtD(_V) (int)(ptrdiff_t)&(((Trace *)0)_V)
+#define DtD(_V) (int)(ptrdiff_t)&(((GCtrace *)0)_V)
 #define DtE(_V) (int)(ptrdiff_t)&(((ExitInfo *)0)_V)
 #define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
 #define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))

+ 1 - 1
src/lib_base.c

@@ -54,7 +54,7 @@ LJLIB_PUSH("upval")
 LJLIB_PUSH("thread")
 LJLIB_PUSH("proto")
 LJLIB_PUSH("function")
-LJLIB_PUSH("")  /* Unused. */
+LJLIB_PUSH("trace")
 LJLIB_PUSH("table")
 LJLIB_PUSH(top-8)  /* userdata */
 LJLIB_PUSH("number")

+ 9 - 10
src/lib_jit.c

@@ -72,9 +72,8 @@ LJLIB_CF(jit_flush)
 #if LJ_HASJIT
   if (L->base < L->top && (tvisnum(L->base) || tvisstr(L->base))) {
     int traceno = lj_lib_checkint(L, 1);
-    setboolV(L->top-1,
-	     luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE));
-    return 1;
+    luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
+    return 0;
   }
 #endif
   return setjitmode(L, LUAJIT_MODE_FLUSH);
@@ -260,19 +259,19 @@ LJLIB_CF(jit_util_funcuvname)
 #if LJ_HASJIT
 
 /* Check trace argument. Must not throw for non-existent trace numbers. */
-static Trace *jit_checktrace(lua_State *L)
+static GCtrace *jit_checktrace(lua_State *L)
 {
   TraceNo tr = (TraceNo)lj_lib_checkint(L, 1);
   jit_State *J = L2J(L);
   if (tr > 0 && tr < J->sizetrace)
-    return J->trace[tr];
+    return traceref(J, tr);
   return NULL;
 }
 
 /* local info = jit.util.traceinfo(tr) */
 LJLIB_CF(jit_util_traceinfo)
 {
-  Trace *T = jit_checktrace(L);
+  GCtrace *T = jit_checktrace(L);
   if (T) {
     GCtab *t;
     lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
@@ -290,7 +289,7 @@ LJLIB_CF(jit_util_traceinfo)
 /* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */
 LJLIB_CF(jit_util_traceir)
 {
-  Trace *T = jit_checktrace(L);
+  GCtrace *T = jit_checktrace(L);
   IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
   if (T && ref >= REF_BIAS && ref < T->nins) {
     IRIns *ir = &T->ir[ref];
@@ -308,7 +307,7 @@ LJLIB_CF(jit_util_traceir)
 /* local k, t [, slot] = jit.util.tracek(tr, idx) */
 LJLIB_CF(jit_util_tracek)
 {
-  Trace *T = jit_checktrace(L);
+  GCtrace *T = jit_checktrace(L);
   IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
   if (T && ref >= T->nk && ref < REF_BIAS) {
     IRIns *ir = &T->ir[ref];
@@ -330,7 +329,7 @@ LJLIB_CF(jit_util_tracek)
 /* local snap = jit.util.tracesnap(tr, sn) */
 LJLIB_CF(jit_util_tracesnap)
 {
-  Trace *T = jit_checktrace(L);
+  GCtrace *T = jit_checktrace(L);
   SnapNo sn = (SnapNo)lj_lib_checkint(L, 2);
   if (T && sn < T->nsnap) {
     SnapShot *snap = &T->snap[sn];
@@ -352,7 +351,7 @@ LJLIB_CF(jit_util_tracesnap)
 /* local mcode, addr, loop = jit.util.tracemc(tr) */
 LJLIB_CF(jit_util_tracemc)
 {
-  Trace *T = jit_checktrace(L);
+  GCtrace *T = jit_checktrace(L);
   if (T && T->mcode != NULL) {
     setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode));
     setnumV(L->top++, cast_num((intptr_t)T->mcode));

+ 8 - 8
src/lj_asm.c

@@ -67,8 +67,8 @@ typedef struct ASMState {
   BCReg topslot;	/* Number of slots for stack check (unless 0). */
   MSize gcsteps;	/* Accumulated number of GC steps (per section). */
 
-  Trace *T;		/* Trace to assemble. */
-  Trace *parent;	/* Parent trace (or NULL). */
+  GCtrace *T;		/* Trace to assemble. */
+  GCtrace *parent;	/* Parent trace (or NULL). */
 
   MCode *mcbot;		/* Bottom of reserved MCode. */
   MCode *mctop;		/* Top of generated MCode. */
@@ -3228,7 +3228,7 @@ static void asm_tail_link(ASMState *as)
     const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
     int32_t mres;
     if (bc_op(*pc) == BC_JLOOP) {  /* NYI: find a better way to do this. */
-      BCIns *retpc = &as->J->trace[bc_d(*pc)]->startins;
+      BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
       if (bc_isret(bc_op(*retpc)))
 	pc = retpc;
     }
@@ -3294,7 +3294,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
   }
   /* Patch exit branch. */
   target = lnk == TRACE_INTERP ? (MCode *)lj_vm_exit_interp :
-				 as->J->trace[lnk]->mcode;
+				 traceref(as->J, lnk)->mcode;
   *(int32_t *)(p-4) = jmprel(p, target);
   p[-5] = XI_JMP;
   /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
@@ -3437,7 +3437,7 @@ static void asm_trace(ASMState *as)
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Clear reg/sp for all instructions and add register hints. */
-static void asm_setup_regsp(ASMState *as, Trace *T)
+static void asm_setup_regsp(ASMState *as, GCtrace *T)
 {
   IRRef i, nins;
   int inloop;
@@ -3577,7 +3577,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
 #endif
 
 /* Assemble a trace. */
-void lj_asm_trace(jit_State *J, Trace *T)
+void lj_asm_trace(jit_State *J, GCtrace *T)
 {
   ASMState as_;
   ASMState *as = &as_;
@@ -3591,7 +3591,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
   as->realign = NULL;
   as->loopinv = 0;
   if (J->parent) {
-    as->parent = J->trace[J->parent];
+    as->parent = traceref(J, J->parent);
     lj_snap_regspmap(as->parentmap, as->parent, J->exitno);
   } else {
     as->parent = NULL;
@@ -3667,7 +3667,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
 }
 
 /* Patch exit jumps of existing machine code to a new target. */
-void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, MCode *target)
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
 {
   MCode *p = T->mcode;
   MCode *mcarea = lj_mcode_patch(J, p, 0);

+ 2 - 2
src/lj_asm.h

@@ -9,8 +9,8 @@
 #include "lj_jit.h"
 
 #if LJ_HASJIT
-LJ_FUNC void lj_asm_trace(jit_State *J, Trace *T);
-LJ_FUNC void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno,
+LJ_FUNC void lj_asm_trace(jit_State *J, GCtrace *T);
+LJ_FUNC void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
 			      MCode *target);
 #endif
 

+ 2 - 1
src/lj_dispatch.c

@@ -239,7 +239,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
   case LUAJIT_MODE_TRACE:
     if (!(mode & LUAJIT_MODE_FLUSH))
       return 0;  /* Failed. */
-    return lj_trace_flush(G2J(g), idx);
+    lj_trace_flush(G2J(g), idx);
+    break;
 #else
   case LUAJIT_MODE_ENGINE:
   case LUAJIT_MODE_FUNC:

+ 0 - 1
src/lj_func.c

@@ -19,7 +19,6 @@
 
 void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
 {
-  lj_trace_freeproto(g, pt);
   lj_mem_free(g, pt, pt->sizept);
 }
 

+ 45 - 23
src/lj_gc.c

@@ -214,8 +214,21 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
 }
 
 #if LJ_HASJIT
+/* Mark a trace. */
+static void gc_marktrace(global_State *g, TraceNo traceno)
+{
+  if (traceno && traceno != G2J(g)->curtrace) {
+    GCobj *o = obj2gco(traceref(G2J(g), traceno));
+    if (iswhite(o)) {
+      white2gray(o);
+      setgcrefr(o->gch.gclist, g->gc.gray);
+      setgcref(g->gc.gray, o);
+    }
+  }
+}
+
 /* Traverse a trace. */
-static void gc_traverse_trace(global_State *g, Trace *T)
+static void gc_traverse_trace(global_State *g, GCtrace *T)
 {
   IRRef ref;
   for (ref = T->nk; ref < REF_TRUE; ref++) {
@@ -223,31 +236,23 @@ static void gc_traverse_trace(global_State *g, Trace *T)
     if (ir->o == IR_KGC)
       gc_markobj(g, ir_kgc(ir));
   }
+  gc_marktrace(g, T->link);
+  gc_marktrace(g, T->nextroot);
+  gc_marktrace(g, T->nextside);
+  gc_markobj(g, gcref(T->startpt));
 }
 
 /* The current trace is a GC root while not anchored in the prototype (yet). */
-#define gc_mark_curtrace(g) \
+#define gc_traverse_curtrace(g) \
   { if (G2J(g)->curtrace != 0) gc_traverse_trace(g, &G2J(g)->cur); }
 #else
-#define gc_mark_curtrace(g)	UNUSED(g)
+#define gc_traverse_curtrace(g)	UNUSED(g)
 #endif
 
 /* Traverse a prototype. */
 static void gc_traverse_proto(global_State *g, GCproto *pt)
 {
   ptrdiff_t i;
-#if LJ_HASJIT
-  jit_State *J = G2J(g);
-  TraceNo root, side;
-  /* Mark all root traces and attached side traces. */
-  for (root = pt->trace; root != 0; root = J->trace[root]->nextroot) {
-    for (side = J->trace[root]->nextside; side != 0;
-	 side = J->trace[side]->nextside)
-      gc_traverse_trace(g, J->trace[side]);
-    gc_traverse_trace(g, J->trace[root]);
-  }
-#endif
-  /* GC during prototype creation could cause NULL fields. */
   gc_mark_str(proto_chunkname(pt));
   for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++)  /* Mark collectable consts. */
     gc_markobj(g, proto_kgc(pt, i));
@@ -255,6 +260,9 @@ static void gc_traverse_proto(global_State *g, GCproto *pt)
     gc_mark_str(proto_uvname(pt, i));
   for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++)  /* Mark names of locals. */
     gc_mark_str(gco2str(gcref(proto_varinfo(pt)[i].name)));
+#if LJ_HASJIT
+  gc_marktrace(g, pt->trace);
+#endif
 }
 
 /* Traverse the frame structure of a stack. */
@@ -311,13 +319,23 @@ static size_t propagatemark(global_State *g)
     GCproto *pt = gco2pt(o);
     gc_traverse_proto(g, pt);
     return pt->sizept;
-  } else {
+  } else if (LJ_LIKELY(o->gch.gct == ~LJ_TTHREAD)) {
     lua_State *th = gco2th(o);
     setgcrefr(th->gclist, g->gc.grayagain);
     setgcref(g->gc.grayagain, o);
     black2gray(o);  /* Threads are never black. */
     gc_traverse_thread(g, th);
     return sizeof(lua_State) + sizeof(TValue) * th->stacksize;
+  } else {
+#if LJ_HASJIT
+    GCtrace *T = gco2trace(o);
+    gc_traverse_trace(g, T);
+    return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
+	   T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry);
+#else
+    lua_assert(0);
+    return 0;
+#endif
   }
 }
 
@@ -351,7 +369,11 @@ static const GCFreeFunc gc_freefunc[] = {
   (GCFreeFunc)lj_state_free,
   (GCFreeFunc)lj_func_freeproto,
   (GCFreeFunc)lj_func_free,
+#if LJ_HASJIT
+  (GCFreeFunc)lj_trace_free,
+#else
   (GCFreeFunc)0,
+#endif
   (GCFreeFunc)lj_tab_free,
   (GCFreeFunc)lj_udata_free
 };
@@ -502,7 +524,7 @@ static void atomic(global_State *g, lua_State *L)
   setgcrefnull(g->gc.weak);
   lua_assert(!iswhite(obj2gco(mainthread(g))));
   gc_markobj(g, L);  /* Mark running thread. */
-  gc_mark_curtrace(g);  /* Mark current trace. */
+  gc_traverse_curtrace(g);  /* Traverse current trace. */
   gc_mark_gcroot(g);  /* Mark GC roots (again). */
   gc_propagate_gray(g);  /* Propagate all of the above. */
 
@@ -681,7 +703,7 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
   lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
   lua_assert(o->gch.gct != ~LJ_TTAB);
   /* Preserve invariant during propagation. Otherwise it doesn't matter. */
-  if (g->gc.state == GCSpropagate)
+  if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
     gc_mark(g, v);  /* Move frontier forward. */
   else
     makewhite(g, o);  /* Make it white to avoid the following barrier. */
@@ -692,7 +714,7 @@ void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv)
 {
 #define TV2MARKED(x) \
   (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked)))
-  if (g->gc.state == GCSpropagate)
+  if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
     gc_mark(g, gcV(tv));
   else
     TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g);
@@ -710,7 +732,7 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
   setgcrefr(o->gch.nextgc, g->gc.root);
   setgcref(g->gc.root, o);
   if (isgray(o)) {  /* A closed upvalue is never gray, so fix this. */
-    if (g->gc.state == GCSpropagate) {
+    if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) {
       gray2black(o);  /* Make it black and preserve invariant. */
       if (tviswhite(&uv->tv))
 	lj_gc_barrierf(g, o, gcV(&uv->tv));
@@ -723,10 +745,10 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
 
 #if LJ_HASJIT
 /* Mark a trace if it's saved during the propagation phase. */
-void lj_gc_barriertrace(global_State *g, void *T)
+void lj_gc_barriertrace(global_State *g, uint32_t traceno)
 {
-  if (g->gc.state == GCSpropagate)
-    gc_traverse_trace(g, (Trace *)T);
+  if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
+    gc_marktrace(g, traceno);
 }
 #endif
 

+ 1 - 1
src/lj_gc.h

@@ -65,7 +65,7 @@ LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
 LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv);
 LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
 #if LJ_HASJIT
-LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
+LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno);
 #endif
 
 /* Barrier for stores to table objects. TValue and GCobj variant. */

+ 5 - 4
src/lj_gdbjit.c

@@ -378,7 +378,7 @@ static const ELFheader elfhdr_template = {
 typedef struct GDBJITctx {
   uint8_t *p;		/* Pointer to next address in obj.space. */
   uint8_t *startp;	/* Pointer to start address in obj.space. */
-  Trace *T;		/* Generate symbols for this trace. */
+  GCtrace *T;		/* Generate symbols for this trace. */
   uintptr_t mcaddr;	/* Machine code address. */
   MSize szmcode;	/* Size of machine code. */
   MSize spadjp;		/* Stack adjustment for parent trace or interpreter. */
@@ -698,7 +698,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
 }
 
 /* Add debug info for newly compiled trace and notify GDB. */
-void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
+void lj_gdbjit_addtrace(jit_State *J, GCtrace *T, TraceNo traceno)
 {
   GDBJITctx ctx;
   lua_State *L = J->L;
@@ -709,7 +709,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
   ctx.T = T;
   ctx.mcaddr = (uintptr_t)T->mcode;
   ctx.szmcode = T->szmcode;
-  ctx.spadjp = CFRAME_SIZE_JIT + (MSize)(parent?J->trace[parent]->spadjust:0);
+  ctx.spadjp = CFRAME_SIZE_JIT +
+	       (MSize)(parent ? traceref(J, parent)->spadjust : 0);
   ctx.spadj = CFRAME_SIZE_JIT + T->spadjust;
   if (startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc)
     ctx.lineno = proto_line(pt, proto_bcpos(pt, startpc));
@@ -727,7 +728,7 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
 }
 
 /* Delete debug info for trace and notify GDB. */
-void lj_gdbjit_deltrace(jit_State *J, Trace *T)
+void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
 {
   GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
   if (eo) {

+ 2 - 2
src/lj_gdbjit.h

@@ -11,8 +11,8 @@
 
 #if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT)
 
-LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno);
-LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, Trace *T);
+LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, GCtrace *T, TraceNo traceno);
+LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, GCtrace *T);
 
 #else
 #define lj_gdbjit_addtrace(J, T, tn)	UNUSED(T)

+ 1 - 1
src/lj_ir.h

@@ -317,7 +317,7 @@ typedef enum {
   IRT_THREAD,
   IRT_PROTO,
   IRT_FUNC,
-  IRT_9,		/* Never used in the IR. */
+  IRT_9,		/* Unused (map of LJ_TTRACE). */
   IRT_TAB,
   IRT_UDATA,
   /* ... until here. */

+ 22 - 14
src/lj_jit.h

@@ -152,34 +152,42 @@ typedef uint16_t TraceNo1;	/* Stored trace number. */
 
 #define TRACE_INTERP	0	/* Fallback to interpreter. */
 
-/* Trace anchor. */
-typedef struct Trace {
-  IRIns *ir;		/* IR instructions/constants. Biased with REF_BIAS. */
+/* Trace object. */
+typedef struct GCtrace {
+  GCHeader;
+  uint8_t topslot;	/* Top stack slot already checked to be allocated. */
+  uint8_t unused1;
   IRRef nins;		/* Next IR instruction. Biased with REF_BIAS. */
+  GCRef gclist;
+  IRIns *ir;		/* IR instructions/constants. Biased with REF_BIAS. */
   IRRef nk;		/* Lowest IR constant. Biased with REF_BIAS. */
-  SnapShot *snap;	/* Snapshot array. */
-  SnapEntry *snapmap;	/* Snapshot map. */
   uint16_t nsnap;	/* Number of snapshots. */
   uint16_t nsnapmap;	/* Number of snapshot map elements. */
+  SnapShot *snap;	/* Snapshot array. */
+  SnapEntry *snapmap;	/* Snapshot map. */
   GCRef startpt;	/* Starting prototype. */
   BCIns startins;	/* Original bytecode of starting instruction. */
   MCode *mcode;		/* Start of machine code. */
   MSize szmcode;	/* Size of machine code. */
   MSize mcloop;		/* Offset of loop start in machine code. */
+  uint16_t nchild;	/* Number of child traces (root trace only). */
+  uint16_t spadjust;	/* Stack pointer adjustment (offset in bytes). */
+  TraceNo1 traceno;	/* Trace number. */
   TraceNo1 link;	/* Linked trace (or self for loops). */
   TraceNo1 root;	/* Root trace of side trace (or 0 for root traces). */
   TraceNo1 nextroot;	/* Next root trace for same prototype. */
   TraceNo1 nextside;	/* Next side trace of same root trace. */
-  uint16_t nchild;	/* Number of child traces (root trace only). */
-  uint16_t spadjust;	/* Stack pointer adjustment (offset in bytes). */
-  uint8_t topslot;	/* Top stack slot already checked to be allocated. */
-  uint8_t unused1;
-  uint8_t unused2;
-  uint8_t unused3;
+  uint16_t unused2;
 #ifdef LUAJIT_USE_GDBJIT
   void *gdbjit_entry;	/* GDB JIT entry. */
 #endif
-} Trace;
+} GCtrace;
+
+#define gco2trace(o)	check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o))
+#define traceref(J, n) \
+  check_exp((n)>0 && (MSize)(n)<J->sizetrace, (GCtrace *)gcref(J->trace[(n)]))
+
+LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtrace, gclist));
 
 /* Round-robin penalty cache for bytecodes leading to aborted traces. */
 typedef struct HotPenalty {
@@ -233,7 +241,7 @@ typedef struct FoldState {
 
 /* JIT compiler state. */
 typedef struct jit_State {
-  Trace cur;		/* Current trace. */
+  GCtrace cur;		/* Current trace. */
 
   lua_State *L;		/* Current Lua state. */
   const BCIns *pc;	/* Current PC. */
@@ -277,7 +285,7 @@ typedef struct jit_State {
   MSize sizesnapmap;	/* Size of temp. snapshot map buffer. */
 
   TraceNo curtrace;	/* Current trace number (if not 0). Kept in J->cur. */
-  Trace **trace;	/* Array of traces. */
+  GCRef *trace;		/* Array of traces. */
   TraceNo freetrace;	/* Start of scan for next free trace. */
   MSize sizetrace;	/* Size of trace array. */
 

+ 1 - 1
src/lj_obj.c

@@ -16,7 +16,7 @@ LJ_DATADEF const char *const lj_obj_typename[] = {  /* ORDER LUA_T */
 
 LJ_DATADEF const char *const lj_obj_itypename[] = {  /* ORDER LJ_T */
   "nil", "boolean", "boolean", "userdata", "string", "upval", "thread",
-  "proto", "function", "" /* Unused */, "table", "userdata", "number"
+  "proto", "function", "trace", "table", "userdata", "number"
 };
 
 /* Compare two objects without calling metamethods. */

+ 1 - 1
src/lj_obj.h

@@ -195,7 +195,7 @@ typedef const TValue cTValue;
 #define LJ_TTHREAD		(-7)
 #define LJ_TPROTO		(-8)
 #define LJ_TFUNC		(-9)
-/* Unused			(-10) */
+#define LJ_TTRACE		(-10)
 #define LJ_TTAB			(-11)
 #define LJ_TUDATA		(-12)
 /* This is just the canonical number type used in some places. */

+ 6 - 7
src/lj_record.c

@@ -265,7 +265,7 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
       if (op == BC_KSHORT || op == BC_KNUM) {  /* Found const. initializer. */
 	/* Now try to verify there's no forward jump across it. */
 	const BCIns *kpc = pc;
-	for ( ; pc > startpc; pc--)
+	for (; pc > startpc; pc--)
 	  if (bc_op(*pc) == BC_JMP) {
 	    const BCIns *target = pc+bc_j(*pc)+1;
 	    if (target > kpc && target <= endpc)
@@ -2237,10 +2237,10 @@ void lj_record_ins(jit_State *J)
     break;
 
   case BC_JFORL:
-    rec_loop_jit(J, rc, rec_for(J, pc+bc_j(J->trace[rc]->startins), 1));
+    rec_loop_jit(J, rc, rec_for(J, pc+bc_j(traceref(J, rc)->startins), 1));
     break;
   case BC_JITERL:
-    rec_loop_jit(J, rc, rec_iterl(J, J->trace[rc]->startins));
+    rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins));
     break;
   case BC_JLOOP:
     rec_loop_jit(J, rc, rec_loop(J, ra));
@@ -2412,7 +2412,7 @@ static const BCIns *rec_setup_root(jit_State *J)
 }
 
 /* Setup recording for a side trace. */
-static void rec_setup_side(jit_State *J, Trace *T)
+static void rec_setup_side(jit_State *J, GCtrace *T)
 {
   SnapShot *snap = &T->snap[J->exitno];
   SnapEntry *map = &T->snapmap[snap->mapofs];
@@ -2500,10 +2500,9 @@ void lj_record_setup(jit_State *J)
   }
   J->cur.nk = REF_TRUE;
 
-  setgcref(J->cur.startpt, obj2gco(J->pt));
   J->startpc = J->pc;
   if (J->parent) {  /* Side trace. */
-    Trace *T = J->trace[J->parent];
+    GCtrace *T = traceref(J, J->parent);
     TraceNo root = T->root ? T->root : J->parent;
     J->cur.root = (uint16_t)root;
     J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
@@ -2521,7 +2520,7 @@ void lj_record_setup(jit_State *J)
     }
     rec_setup_side(J, T);
   sidecheck:
-    if (J->trace[J->cur.root]->nchild >= J->param[JIT_P_maxside] ||
+    if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
 	T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
 				    J->param[JIT_P_tryside])
       rec_stop(J, TRACE_INTERP);

+ 4 - 4
src/lj_snap.c

@@ -168,7 +168,7 @@ void lj_snap_shrink(jit_State *J)
 ** There are very few renames (often none), so the filter has
 ** very few bits set. This makes it suitable for negative filtering.
 */
-static BloomFilter snap_renamefilter(Trace *T, SnapNo lim)
+static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
 {
   BloomFilter rfilt = 0;
   IRIns *ir;
@@ -179,7 +179,7 @@ static BloomFilter snap_renamefilter(Trace *T, SnapNo lim)
 }
 
 /* Process matching renames to find the original RegSP. */
-static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
+static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
 {
   IRIns *ir;
   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
@@ -191,7 +191,7 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
 /* Convert a snapshot into a linear slot -> RegSP map.
 ** Note: unused slots are not initialized!
 */
-void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
+void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
 {
   SnapShot *snap = &T->snap[snapno];
   MSize n, nent = snap->nent;
@@ -215,7 +215,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
 {
   ExitState *ex = (ExitState *)exptr;
   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
-  Trace *T = J->trace[J->parent];
+  GCtrace *T = traceref(J, J->parent);
   SnapShot *snap = &T->snap[snapno];
   MSize n, nent = snap->nent;
   SnapEntry *map = &T->snapmap[snap->mapofs];

+ 1 - 1
src/lj_snap.h

@@ -12,7 +12,7 @@
 #if LJ_HASJIT
 LJ_FUNC void lj_snap_add(jit_State *J);
 LJ_FUNC void lj_snap_shrink(jit_State *J);
-LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
+LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno);
 LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
 LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
 LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);

+ 67 - 100
src/lj_trace.c

@@ -52,7 +52,7 @@ void lj_trace_err_info(jit_State *J, TraceError e)
 /* The current trace is first assembled in J->cur. The variable length
 ** arrays point to shared, growable buffers (J->irbuf etc.). The trace is
 ** kept in this state until a new trace needs to be created. Then the current
-** trace and its data structures are copied to a new (compact) Trace object.
+** trace and its data structures are copied to a new (compact) GCtrace object.
 */
 
 /* Find a free trace number. */
@@ -62,7 +62,7 @@ static TraceNo trace_findfree(jit_State *J)
   if (J->freetrace == 0)
     J->freetrace = 1;
   for (; J->freetrace < J->sizetrace; J->freetrace++)
-    if (J->trace[J->freetrace] == NULL)
+    if (traceref(J, J->freetrace) == NULL)
       return J->freetrace++;
   /* Need to grow trace array. */
   lim = (MSize)J->param[JIT_P_maxtrace] + 1;
@@ -70,9 +70,9 @@ static TraceNo trace_findfree(jit_State *J)
   osz = J->sizetrace;
   if (osz >= lim)
     return 0;  /* Too many traces. */
-  lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, Trace *);
-  while (osz < J->sizetrace)
-    J->trace[osz++] = NULL;
+  lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, GCRef);
+  for (; osz < J->sizetrace; osz++)
+    setgcrefnull(J->trace[osz]);
   return J->freetrace;
 }
 
@@ -82,64 +82,40 @@ static TraceNo trace_findfree(jit_State *J)
   p += T->szfield*sizeof(tp);
 
 /* Save a trace by copying and compacting it. */
-static Trace *trace_save(jit_State *J, Trace *T)
+static GCtrace *trace_save(jit_State *J, GCtrace *T)
 {
-  size_t sztr = ((sizeof(Trace)+7)&~7);
+  size_t sztr = ((sizeof(GCtrace)+7)&~7);
   size_t szins = (T->nins-T->nk)*sizeof(IRIns);
   size_t sz = sztr + szins +
 	      T->nsnap*sizeof(SnapShot) +
 	      T->nsnapmap*sizeof(SnapEntry);
-  Trace *T2 = lj_mem_newt(J->L, (MSize)sz, Trace);
+  GCtrace *T2 = lj_mem_newt(J->L, (MSize)sz, GCtrace);
   char *p = (char *)T2 + sztr;
-  memcpy(T2, T, sizeof(Trace));
+  memcpy(T2, T, sizeof(GCtrace));
+  setgcrefr(T2->nextgc, J2G(J)->gc.root);
+  setgcrefp(J2G(J)->gc.root, T2);
+  newwhite(J2G(J), T2);
+  T2->gct = ~LJ_TTRACE;
   T2->ir = (IRIns *)p - T->nk;
   memcpy(p, T->ir+T->nk, szins);
   p += szins;
   TRACE_COPYELEM(snap, nsnap, SnapShot)
   TRACE_COPYELEM(snapmap, nsnapmap, SnapEntry)
-  lj_gc_barriertrace(J2G(J), T);
   return T2;
 }
 
-/* Free a trace. */
-static void trace_free(jit_State *J, TraceNo traceno)
-{
-  lua_assert(traceno != 0);
-  if (traceno < J->freetrace)
-    J->freetrace = traceno;
-  lj_gdbjit_deltrace(J, J->trace[traceno]);
-  if (traceno == J->curtrace) {
-    lua_assert(J->trace[traceno] == &J->cur);
-    J->trace[traceno] = NULL;
-    J->curtrace = 0;
-  } else {
-    Trace *T = J->trace[traceno];
-    lua_assert(T != NULL && T != &J->cur);
-    J->trace[traceno] = NULL;
-    lj_mem_free(J2G(J), T,
-      ((sizeof(Trace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
-      T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry));
-  }
-}
-
-/* Free all traces associated with a prototype. No unpatching needed. */
-void lj_trace_freeproto(global_State *g, GCproto *pt)
+void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T)
 {
   jit_State *J = G2J(g);
-  TraceNo traceno;
-  /* Free all root traces. */
-  for (traceno = pt->trace; traceno != 0; ) {
-    TraceNo side, nextroot = J->trace[traceno]->nextroot;
-    /* Free all side traces. */
-    for (side = J->trace[traceno]->nextside; side != 0; ) {
-      TraceNo next = J->trace[side]->nextside;
-      trace_free(J, side);
-      side = next;
-    }
-    /* Now free the trace itself. */
-    trace_free(J, traceno);
-    traceno = nextroot;
+  if (T->traceno) {
+    lj_gdbjit_deltrace(J, T);
+    if (T->traceno < J->freetrace)
+      J->freetrace = T->traceno;
+    setgcrefnull(J->trace[T->traceno]);
   }
+  lj_mem_free(g, T,
+    ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
+    T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry));
 }
 
 /* Re-enable compiling a prototype by unpatching any modified bytecode. */
@@ -160,7 +136,7 @@ void lj_trace_reenableproto(GCproto *pt)
 }
 
 /* Unpatch the bytecode modified by a root trace. */
-static void trace_unpatch(jit_State *J, Trace *T)
+static void trace_unpatch(jit_State *J, GCtrace *T)
 {
   BCOp op = bc_op(T->startins);
   MSize pcofs = T->snap[0].mapofs + T->snap[0].nent;
@@ -171,21 +147,21 @@ static void trace_unpatch(jit_State *J, Trace *T)
     lua_assert(bc_op(*pc) == BC_JFORI);
     setbc_op(pc, BC_FORI);  /* Unpatch JFORI, too. */
     pc += bc_j(*pc);
-    lua_assert(bc_op(*pc) == BC_JFORL && J->trace[bc_d(*pc)] == T);
+    lua_assert(bc_op(*pc) == BC_JFORL && traceref(J, bc_d(*pc)) == T);
     *pc = T->startins;
     break;
   case BC_LOOP:
-    lua_assert(bc_op(*pc) == BC_JLOOP && J->trace[bc_d(*pc)] == T);
+    lua_assert(bc_op(*pc) == BC_JLOOP && traceref(J, bc_d(*pc)) == T);
     *pc = T->startins;
     break;
   case BC_ITERL:
     lua_assert(bc_op(*pc) == BC_JMP);
     pc += bc_j(*pc)+2;
-    lua_assert(bc_op(*pc) == BC_JITERL && J->trace[bc_d(*pc)] == T);
+    lua_assert(bc_op(*pc) == BC_JITERL && traceref(J, bc_d(*pc)) == T);
     *pc = T->startins;
     break;
   case BC_FUNCF:
-    lua_assert(bc_op(*pc) == BC_JFUNCF && J->trace[bc_d(*pc)] == T);
+    lua_assert(bc_op(*pc) == BC_JFUNCF && traceref(J, bc_d(*pc)) == T);
     *pc = T->startins;
     break;
   case BC_JMP:  /* No need to unpatch branches in parent traces (yet). */
@@ -195,58 +171,41 @@ static void trace_unpatch(jit_State *J, Trace *T)
   }
 }
 
-/* Free a root trace and any attached side traces. */
-static void trace_freeroot(jit_State *J, Trace *T, TraceNo traceno)
+/* Flush a root trace. */
+static void trace_flushroot(jit_State *J, GCtrace *T)
 {
   GCproto *pt = &gcref(T->startpt)->pt;
-  TraceNo side;
   lua_assert(T->root == 0 && pt != NULL);
   /* First unpatch any modified bytecode. */
   trace_unpatch(J, T);
   /* Unlink root trace from chain anchored in prototype. */
-  if (pt->trace == traceno) {  /* Trace is first in chain. Easy. */
+  if (pt->trace == T->traceno) {  /* Trace is first in chain. Easy. */
     pt->trace = T->nextroot;
   } else {  /* Otherwise search in chain of root traces. */
-    Trace *T2 = J->trace[pt->trace];
-    while (T2->nextroot != traceno) {
+    GCtrace *T2 = traceref(J, pt->trace);
+    while (T2->nextroot != T->traceno) {
       lua_assert(T2->nextroot != 0);
-      T2 = J->trace[T2->nextroot];
+      T2 = traceref(J, T2->nextroot);
     }
     T2->nextroot = T->nextroot;  /* Unlink from chain. */
   }
-  /* Free all side traces. */
-  for (side = T->nextside; side != 0; ) {
-    TraceNo next = J->trace[side]->nextside;
-    trace_free(J, side);
-    side = next;
-  }
-  /* Now free the trace itself. */
-  trace_free(J, traceno);
 }
 
-/* Flush a root trace + side traces, if there are no links to it. */
-int lj_trace_flush(jit_State *J, TraceNo traceno)
+/* Flush a trace. Only root traces are considered. */
+void lj_trace_flush(jit_State *J, TraceNo traceno)
 {
   if (traceno > 0 && traceno < J->sizetrace) {
-    Trace *T = J->trace[traceno];
-    if (T && T->root == 0) {
-      ptrdiff_t i;
-      for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--)
-	if (i != (ptrdiff_t)traceno && J->trace[i] &&
-	    J->trace[i]->root != traceno && J->trace[i]->link == traceno)
-	  return 0;  /* Failed: existing link to trace. */
-      trace_freeroot(J, T, traceno);
-      return 1;  /* Ok. */
-    }
+    GCtrace *T = traceref(J, traceno);
+    if (T && T->root == 0)
+      trace_flushroot(J, T);
   }
-  return 0;  /* Failed. */
 }
 
 /* Flush all traces associated with a prototype. */
 void lj_trace_flushproto(global_State *g, GCproto *pt)
 {
   while (pt->trace != 0)
-    trace_freeroot(G2J(g), G2J(g)->trace[pt->trace], pt->trace);
+    trace_flushroot(G2J(g), traceref(G2J(g), pt->trace));
 }
 
 /* Flush all traces. */
@@ -257,14 +216,16 @@ int lj_trace_flushall(lua_State *L)
   if ((J2G(J)->hookmask & HOOK_GC))
     return 1;
   for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--) {
-    Trace *T = J->trace[i];
-    if (T && T->root == 0)
-      trace_freeroot(J, T, (TraceNo)i);
+    GCtrace *T = traceref(J, i);
+    if (T) {
+      if (T->root == 0)
+	trace_flushroot(J, T);
+      lj_gdbjit_deltrace(J, T);
+      T->traceno = 0;
+      setgcrefnull(J->trace[i]);
+    }
   }
-#ifdef LUA_USE_ASSERT
-  for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
-    lua_assert(J->trace[i] == NULL);
-#endif
+  J->curtrace = 0;
   J->freetrace = 0;
   /* Free the whole machine code and invalidate all exit stub groups. */
   lj_mcode_free(J);
@@ -293,11 +254,13 @@ void lj_trace_initstate(global_State *g)
 void lj_trace_freestate(global_State *g)
 {
   jit_State *J = G2J(g);
+  if (J->curtrace)
+    lj_gdbjit_deltrace(J, &J->cur);
 #ifdef LUA_USE_ASSERT
   {  /* This assumes all traces have already been freed. */
     ptrdiff_t i;
-    for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
-      lua_assert(J->trace[i] == NULL);
+    for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
+      lua_assert(i == (ptrdiff_t)J->curtrace || traceref(J, i) == NULL);
   }
 #endif
   lj_mcode_free(J);
@@ -305,7 +268,7 @@ void lj_trace_freestate(global_State *g)
   lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
   lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
   lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
-  lj_mem_freevec(g, J->trace, J->sizetrace, Trace *);
+  lj_mem_freevec(g, J->trace, J->sizetrace, GCRef);
 }
 
 /* -- Penalties and blacklisting ------------------------------------------ */
@@ -349,9 +312,11 @@ static void trace_start(jit_State *J)
 {
   lua_State *L;
 
-  if (J->curtrace != 0 && J->trace[J->curtrace] == &J->cur) {
-    J->trace[J->curtrace] = trace_save(J, &J->cur);  /* Save current trace. */
+  if (J->curtrace != 0 && traceref(J, J->curtrace) == &J->cur) {
+    TraceNo tr = J->curtrace;  /* Save current trace. */
+    setgcrefp(J->trace[tr], trace_save(J, &J->cur));
     J->curtrace = 0;
+    lj_gc_barriertrace(J2G(J), tr);
   }
 
   if ((J->pt->flags & PROTO_NO_JIT)) {  /* JIT disabled for this proto? */
@@ -374,10 +339,11 @@ static void trace_start(jit_State *J)
     J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
     return;
   }
-  J->trace[J->curtrace] = &J->cur;
+  setgcrefp(J->trace[J->curtrace], &J->cur);
 
   /* Setup enough of the current trace to be able to send the vmevent. */
-  memset(&J->cur, 0, sizeof(Trace));
+  memset(&J->cur, 0, sizeof(GCtrace));
+  J->cur.traceno = J->curtrace;
   J->cur.nins = J->cur.nk = REF_BASE;
   J->cur.ir = J->irbuf;
   J->cur.snap = J->snapbuf;
@@ -385,6 +351,7 @@ static void trace_start(jit_State *J)
   J->mergesnap = 0;
   J->needsnap = 0;
   J->guardemit.irt = 0;
+  setgcref(J->cur.startpt, obj2gco(J->pt));
 
   L = J->L;
   lj_vmevent_send(L, TRACE,
@@ -431,12 +398,12 @@ static void trace_stop(jit_State *J)
   case BC_JMP:
     /* Patch exit branch in parent to side trace entry. */
     lua_assert(J->parent != 0 && J->cur.root != 0);
-    lj_asm_patchexit(J, J->trace[J->parent], J->exitno, J->cur.mcode);
+    lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
     /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
-    J->trace[J->parent]->snap[J->exitno].count = SNAPCOUNT_DONE;
+    traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
     /* Add to side trace chain in root trace. */
     {
-      Trace *root = J->trace[J->cur.root];
+      GCtrace *root = traceref(J, J->cur.root);
       root->nchild++;
       J->cur.nextside = root->nextside;
       root->nextside = (TraceNo1)J->curtrace;
@@ -510,7 +477,7 @@ static int trace_abort(jit_State *J)
       copyTV(L, L->top++, &J->errinfo);
     );
     /* Drop aborted trace after the vmevent (which may still access it). */
-    J->trace[J->curtrace] = NULL;
+    setgcrefnull(J->trace[J->curtrace]);
     if (J->curtrace < J->freetrace)
       J->freetrace = J->curtrace;
     J->curtrace = 0;
@@ -631,7 +598,7 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
 /* Check for a hot side exit. If yes, start recording a side trace. */
 static void trace_hotside(jit_State *J, const BCIns *pc)
 {
-  SnapShot *snap = &J->trace[J->parent]->snap[J->exitno];
+  SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
   if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
       snap->count != SNAPCOUNT_DONE &&
       ++snap->count >= J->param[JIT_P_hotexit]) {
@@ -699,7 +666,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   else
     trace_hotside(J, pc);
   if (bc_op(*pc) == BC_JLOOP) {
-    BCIns *retpc = &J->trace[bc_d(*pc)]->startins;
+    BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
     if (bc_isret(bc_op(*retpc))) {
       if (J->state == LJ_TRACE_RECORD) {
 	J->patchins = *pc;

+ 2 - 3
src/lj_trace.h

@@ -23,10 +23,10 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
 LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
 
 /* Trace management. */
-LJ_FUNC void lj_trace_freeproto(global_State *g, GCproto *pt);
+LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
 LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
 LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
-LJ_FUNC int lj_trace_flush(jit_State *J, TraceNo traceno);
+LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno);
 LJ_FUNC int lj_trace_flushall(lua_State *L);
 LJ_FUNC void lj_trace_initstate(global_State *g);
 LJ_FUNC void lj_trace_freestate(global_State *g);
@@ -45,7 +45,6 @@ LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
 #define lj_trace_flushall(L)	(UNUSED(L), 0)
 #define lj_trace_initstate(g)	UNUSED(g)
 #define lj_trace_freestate(g)	UNUSED(g)
-#define lj_trace_freeproto(g, pt)  (UNUSED(g), UNUSED(pt), (void)0)
 #define lj_trace_abort(g)	UNUSED(g)
 #define lj_trace_end(J)		UNUSED(J)