Browse Source

Save currently executing lua_State in g->cur_L.

This is only a good approximation due to deficiencies in the design of
the Lua/C API. It indicates _some_ valid state that is/was executing.
Also reorder L->cframe stores to achieve a synchronously consistent state.
Mike Pall 12 years ago
parent
commit
517500ba48
15 changed files with 89 additions and 67 deletions
  1. 1 1
      src/lj_asm.c
  2. 4 4
      src/lj_asm_arm.h
  3. 1 1
      src/lj_asm_mips.h
  4. 1 1
      src/lj_asm_ppc.h
  5. 1 1
      src/lj_asm_x86.h
  6. 1 1
      src/lj_ccallback.c
  7. 1 0
      src/lj_dispatch.c
  8. 5 5
      src/lj_err.c
  9. 1 1
      src/lj_gc.c
  10. 1 1
      src/lj_obj.h
  11. 5 2
      src/lj_state.c
  12. 15 11
      src/vm_arm.dasc
  13. 19 15
      src/vm_mips.dasc
  14. 16 12
      src/vm_ppc.dasc
  15. 17 11
      src/vm_x86.dasc

+ 1 - 1
src/lj_asm.c

@@ -342,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);

+ 4 - 4
src/lj_asm_arm.h

@@ -1944,7 +1944,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1952,7 +1952,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
 }
 
@@ -2061,13 +2061,13 @@ static void asm_loop_fixup(ASMState *as)
 
 /* -- Head of trace ------------------------------------------------------- */
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 {
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
   }
 }

+ 1 - 1
src/lj_asm_mips.h

@@ -1586,7 +1586,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
 }

+ 1 - 1
src/lj_asm_ppc.h

@@ -1759,7 +1759,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
 }

+ 1 - 1
src/lj_asm_x86.h

@@ -2369,7 +2369,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
 	      ptr2addr(&J2G(as->J)->jit_base));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
-  emit_getgl(as, r, jit_L);
+  emit_getgl(as, r, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
 }

+ 1 - 1
src/lj_ccallback.c

@@ -562,9 +562,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
   }
   callback_conv_result(cts, L, o);
   /* Finally drop C frame and continuation frame. */
-  L->cframe = cframe_prev(L->cframe);
   L->top -= 2;
   L->base = obase;
+  L->cframe = cframe_prev(L->cframe);
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
 }
 

+ 1 - 0
src/lj_dispatch.c

@@ -357,6 +357,7 @@ static void callhook(lua_State *L, int event, BCLine line)
     hook_enter(g);
     hookf(L, &ar);
     lua_assert(hook_active(g));
+    setgcref(g->cur_L, obj2gco(L));
     hook_leave(g);
   }
 }

+ 5 - 5
src/lj_err.c

@@ -99,8 +99,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       TValue *top = restorestack(L, -nres);
       if (frame < top) {  /* Frame reached? */
 	if (errcode) {
-	  L->cframe = cframe_prev(cf);
 	  L->base = frame+1;
+	  L->cframe = cframe_prev(cf);
 	  unwindstack(L, top);
 	}
 	return cf;
@@ -119,8 +119,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 #endif
 #if LJ_UNWIND_EXT
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
+	L->cframe = cframe_prev(cf);
 	unwindstack(L, frame);
       } else if (cf != stopcf) {
 	cf = cframe_prev(cf);
@@ -144,8 +144,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	return cf;
       }
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
+	L->cframe = cframe_prev(cf);
 	unwindstack(L, frame);
       }
       return cf;
@@ -166,8 +166,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	}
 	if (frame_typep(frame) == FRAME_PCALL)
 	  hook_leave(G(L));
-	L->cframe = cf;
 	L->base = frame_prevd(frame) + 1;
+	L->cframe = cf;
 	unwindstack(L, L->base);
       }
       return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -175,8 +175,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
   }
   /* No C frame. */
   if (errcode) {
-    L->cframe = NULL;
     L->base = tvref(L->stack)+1;
+    L->cframe = NULL;
     unwindstack(L, L->base);
     if (G(L)->panic)
       G(L)->panic(L);

+ 1 - 1
src/lj_gc.c

@@ -696,7 +696,7 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
 /* Perform multiple GC steps. Called from JIT-compiled code. */
 int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
 {
-  lua_State *L = gco2th(gcref(g->jit_L));
+  lua_State *L = gco2th(gcref(g->cur_L));
   L->base = tvref(G(L)->jit_base);
   L->top = curr_topL(L);
   while (steps-- > 0 && lj_gc_step(L) == 0)

+ 1 - 1
src/lj_obj.h

@@ -536,7 +536,7 @@ typedef struct global_State {
   lua_CFunction panic;	/* Called as a last resort for errors. */
   BCIns bc_cfunc_int;	/* Bytecode for internal C function calls. */
   BCIns bc_cfunc_ext;	/* Bytecode for external C function calls. */
-  GCRef jit_L;		/* Current JIT code lua_State. */
+  GCRef cur_L;		/* Currently executing lua_State. */
   MRef jit_base;	/* Current JIT code L->base or NULL. */
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */

+ 5 - 2
src/lj_state.c

@@ -91,7 +91,7 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
   if (4*used < L->stacksize &&
       2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
       /* Don't shrink stack of live trace. */
-      (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->jit_L)))
+      (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
     resizestack(L, L->stacksize >> 1);
 }
 
@@ -237,6 +237,7 @@ LUA_API void lua_close(lua_State *L)
 {
   global_State *g = G(L);
   int i;
+  setgcrefnull(g->cur_L);
   L = mainthread(g);  /* Only the main thread can be closed. */
   lj_func_closeuv(L, tvref(L->stack));
   lj_gc_separateudata(g, 1);  /* Separate udata which have GC metamethods. */
@@ -248,8 +249,8 @@ LUA_API void lua_close(lua_State *L)
   for (i = 0;;) {
     hook_enter(g);
     L->status = 0;
-    L->cframe = NULL;
     L->base = L->top = tvref(L->stack) + 1;
+    L->cframe = NULL;
     if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
       if (++i >= 10) break;
       lj_gc_separateudata(g, 1);  /* Separate udata again. */
@@ -281,6 +282,8 @@ lua_State *lj_state_new(lua_State *L)
 void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
 {
   lua_assert(L != mainthread(g));
+  if (obj2gco(L) == gcref(g->cur_L))
+    setgcrefnull(g->cur_L);
   lj_func_closeuv(L, tvref(L->stack));
   lua_assert(gcref(L->openupval) == NULL);
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);

+ 15 - 11
src/vm_arm.dasc

@@ -419,13 +419,14 @@ static void build_subroutines(BuildCtx *ctx)
   |    add CARG2, sp, #CFRAME_RESUME
   |  ldrb CARG1, L->status
   |   str CARG3, SAVE_ERRF
-  |    str CARG2, L->cframe
+  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
   |   str CARG3, SAVE_CFRAME
   |  cmp CARG1, #0
-  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
+  |    str CARG2, L->cframe
   |  beq >3
   |
   |  // Resume after yield (like a return).
+  |  str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  mov RA, BASE
   |   ldr BASE, L->base
   |   ldr CARG1, L->top
@@ -459,14 +460,15 @@ static void build_subroutines(BuildCtx *ctx)
   |   str CARG3, SAVE_NRES
   |    mov L, CARG1
   |   str CARG1, SAVE_L
-  |    mov BASE, CARG2
-  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |    ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |     mov BASE, CARG2
   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  str RC, SAVE_CFRAME
   |    add DISPATCH, DISPATCH, #GG_G2DISP
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  ldr RB, L->base			// RB = old base (for vmeta_call).
   |   ldr CARG1, L->top
   |    mov MASKR8, #255
@@ -492,20 +494,21 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L, CARG1
   |   ldr RA, L:CARG1->stack
   |  str CARG1, SAVE_L
+  |    ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
   |   ldr RB, L->top
   |  str CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |  ldr RC, L->cframe
+  |    add DISPATCH, DISPATCH, #GG_G2DISP
   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
-  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |  mov RB, #0
   |   str RA, SAVE_NRES			// Neg. delta means cframe w/o frame.
   |  str RB, SAVE_ERRF			// No error function.
   |  str RC, SAVE_CFRAME
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
+  |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  blx CARG4			// (lua_State *L, lua_CFunction func, void *ud)
-  |   ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  movs BASE, CRET1
-  |    mov PC, #FRAME_CP
-  |   add DISPATCH, DISPATCH, #GG_G2DISP
+  |   mov PC, #FRAME_CP
   |  bne <3				// Else continue with the call.
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |
@@ -1262,9 +1265,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr CARG3, L:RA->base
   |    mv_vmstate CARG2, INTERP
   |  ldr CARG4, L:RA->top
-  |    st_vmstate CARG2
   |   cmp CRET1, #LUA_YIELD
   |  ldr BASE, L->base
+  |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+  |    st_vmstate CARG2
   |   bhi >8
   |  subs RC, CARG4, CARG3
   |   ldr CARG1, L->maxstack
@@ -2102,7 +2106,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  add CARG1, CARG1, CARG2, asr #6
   |   ldr CARG2, [lr, #4]	// Load exit stub group offset.
   |   sub CARG1, CARG1, lr
-  |  ldr L, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |  ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |   add CARG1, CARG2, CARG1, lsr #2	// Compute exit number.
   |    ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
   |   str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
@@ -4285,7 +4289,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   st_vmstate CARG2
     |  ldr RA, TRACE:RC->mcode
     |   str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
-    |   str L, [DISPATCH, #DISPATCH_GL(jit_L)]
     |   str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
     |  bx RA
     |.endif
@@ -4404,6 +4407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ldr BASE, L->base
     |    mv_vmstate CARG3, INTERP
     |   ldr CRET2, L->top
+    |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
     |   lsl RC, CRET1, #3
     |    st_vmstate CARG3
     |  ldr PC, [BASE, FRAME_PC]

+ 19 - 15
src/vm_mips.dasc

@@ -487,12 +487,13 @@ static void build_subroutines(BuildCtx *ctx)
   |    addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw r0, SAVE_NRES
   |   sw r0, SAVE_ERRF
-  |  sw TMP0, L->cframe
+  |   sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |   sw r0, SAVE_CFRAME
   |    beqz TMP1, >3
-  |.  sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |. sw TMP0, L->cframe
   |
   |  // Resume after yield (like a return).
+  |  sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  move RA, BASE
   |   lw BASE, L->base
   |   lw TMP1, L->top
@@ -526,17 +527,18 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |1:  // Entry point for vm_pcall above (PC = ftype).
   |  lw TMP1, L:CARG1->cframe
-  |   sw CARG3, SAVE_NRES
   |    move L, CARG1
-  |   sw CARG1, SAVE_L
-  |    move BASE, CARG2
-  |  sw sp, L->cframe			// Add our C frame to cframe chain.
+  |   sw CARG3, SAVE_NRES
   |    lw DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |   sw CARG1, SAVE_L
+  |     move BASE, CARG2
+  |    addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  sw TMP1, SAVE_CFRAME
-  |    addiu DISPATCH, DISPATCH, GG_G2DISP
+  |  sw sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  lw TMP2, L->base			// TMP2 = old base (used in vmeta_call).
   |     lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   |   lw TMP1, L->top
@@ -567,20 +569,21 @@ static void build_subroutines(BuildCtx *ctx)
   |   lw TMP0, L:CARG1->stack
   |  sw CARG1, SAVE_L
   |   lw TMP1, L->top
+  |     lw DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  sw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |   subu TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
   |    lw TMP1, L->cframe
-  |    sw sp, L->cframe			// Add our C frame to cframe chain.
+  |     addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  sw r0, SAVE_ERRF			// No error function.
-  |  move CFUNCADDR, CARG4
+  |    sw TMP1, SAVE_CFRAME
+  |    sw sp, L->cframe			// Add our C frame to cframe chain.
+  |     sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  jalr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
-  |.   sw TMP1, SAVE_CFRAME
+  |.  move CFUNCADDR, CARG4
   |  move BASE, CRET1
-  |   lw DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |    li PC, FRAME_CP
   |  bnez CRET1, <3			// Else continue with the call.
-  |.  addiu DISPATCH, DISPATCH, GG_G2DISP
+  |.  li PC, FRAME_CP
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |.  nop
   |
@@ -1364,6 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw TMP3, L:RA->top
   |    li_vmstate INTERP
   |  lw BASE, L->base
+  |    sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |    st_vmstate
   |   beqz AT, >8
   |. subu RD, TMP3, TMP2
@@ -2045,7 +2049,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw TMP1, 0(TMP2)			// Load exit number.
   |  st_vmstate
   |   sw TMP2, 16+32*8+29*4(sp)		// Store sp in RID_SP.
-  |  lw L, DISPATCH_GL(jit_L)(DISPATCH)
+  |  lw L, DISPATCH_GL(cur_L)(DISPATCH)
   |   lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
   |  load_got lj_trace_exit
   |  sw L, DISPATCH_J(L)(DISPATCH)
@@ -3980,7 +3984,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   sw AT, DISPATCH_GL(vmstate)(DISPATCH)
     |  lw TRACE:TMP2, 0(TMP1)
     |   sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
-    |   sw L, DISPATCH_GL(jit_L)(DISPATCH)
     |  lw TMP2, TRACE:TMP2->mcode
     |   sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
     |  jr TMP2
@@ -4108,6 +4111,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |    li_vmstate INTERP
     |  lw PC, FRAME_PC(BASE)		// Fetch PC of caller.
     |   subu RA, TMP1, RD		// RA = L->top - nresults*8
+    |    sw L, DISPATCH_GL(cur_L)(DISPATCH)
     |  b ->vm_returnc
     |.   st_vmstate
     break;

+ 16 - 12
src/vm_ppc.dasc

@@ -662,12 +662,13 @@ static void build_subroutines(BuildCtx *ctx)
   |   stw CARG3, SAVE_NRES
   |    cmplwi TMP1, 0
   |   stw CARG3, SAVE_ERRF
-  |  stp TMP0, L->cframe
   |   stp CARG3, SAVE_CFRAME
   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |  stp TMP0, L->cframe
   |    beq >3
   |
   |  // Resume after yield (like a return).
+  |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  mr RA, BASE
   |   lp BASE, L->base
   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
@@ -707,17 +708,18 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |1:  // Entry point for vm_pcall above (PC = ftype).
   |  lp TMP1, L:CARG1->cframe
-  |   stw CARG3, SAVE_NRES
   |    mr L, CARG1
-  |   stw CARG1, SAVE_L
-  |    mr BASE, CARG2
-  |  stp sp, L->cframe			// Add our C frame to cframe chain.
+  |   stw CARG3, SAVE_NRES
   |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |   stw CARG1, SAVE_L
+  |     mr BASE, CARG2
+  |    addi DISPATCH, DISPATCH, GG_G2DISP
   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  stp TMP1, SAVE_CFRAME
-  |    addi DISPATCH, DISPATCH, GG_G2DISP
+  |  stp sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  lp TMP2, L->base			// TMP2 = old base (used in vmeta_call).
   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
   |   lp TMP1, L->top
@@ -754,15 +756,18 @@ static void build_subroutines(BuildCtx *ctx)
   |   lwz TMP0, L:CARG1->stack
   |  stw CARG1, SAVE_L
   |   lp TMP1, L->top
+  |     lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  stw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |   sub TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
   |    lp TMP1, L->cframe
-  |    stp sp, L->cframe		// Add our C frame to cframe chain.
+  |     addi DISPATCH, DISPATCH, GG_G2DISP
   |  .toc lp CARG4, 0(CARG4)
   |  li TMP2, 0
   |   stw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  stw TMP2, SAVE_ERRF		// No error function.
   |    stp TMP1, SAVE_CFRAME
+  |    stp sp, L->cframe		// Add our C frame to cframe chain.
+  |     stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  mtctr CARG4
   |  bctrl			// (lua_State *L, lua_CFunction func, void *ud)
   |.if PPE
@@ -771,9 +776,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  mr. BASE, CRET1
   |.endif
-  |   lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |    li PC, FRAME_CP
-  |   addi DISPATCH, DISPATCH, GG_G2DISP
+  |   li PC, FRAME_CP
   |  bne <3				// Else continue with the call.
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |
@@ -1629,6 +1632,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lp TMP3, L:SAVE0->top
   |    li_vmstate INTERP
   |  lp BASE, L->base
+  |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |    st_vmstate
   |   bgt >8
   |  sub RD, TMP3, TMP2
@@ -2535,7 +2539,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  savex_ 20,21,22,23
   |   lhz CARG4, 2(CARG3)		// Load trace number.
   |  savex_ 24,25,26,27
-  |  lwz L, DISPATCH_GL(jit_L)(DISPATCH)
+  |  lwz L, DISPATCH_GL(cur_L)(DISPATCH)
   |  savex_ 28,29,30,31
   |   sub CARG3, TMP0, CARG3		// Compute exit number.
   |  lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
@@ -4852,7 +4856,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  lp TMP2, TRACE:TMP2->mcode
     |   stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
     |  mtctr TMP2
-    |   stw L, DISPATCH_GL(jit_L)(DISPATCH)
     |   addi JGL, DISPATCH, GG_DISP2G+32768
     |   stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
     |  bctr
@@ -4989,6 +4992,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  lp TMP1, L->top
     |    li_vmstate INTERP
     |  lwz PC, FRAME_PC(BASE)		// Fetch PC of caller.
+    |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
     |   sub RA, TMP1, RD		// RA = L->top - nresults*8
     |    st_vmstate
     |  b ->vm_returnc

+ 17 - 11
src/vm_x86.dasc

@@ -630,17 +630,18 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASEa, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
-  |  mov L:RB->cframe, KBASEa
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RDa
   |.if X64
   |  mov SAVE_NRES, RD
   |  mov SAVE_ERRF, RD
   |.endif
+  |  mov L:RB->cframe, KBASEa
   |  cmp byte L:RB->status, RDL
-  |  je >3				// Initial resume (like a call).
+  |  je >2				// Initial resume (like a call).
   |
   |  // Resume after yield (like a return).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |  set_vmstate INTERP
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
@@ -680,20 +681,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
   |.endif
   |
+  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASEa
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
+  |  add DISPATCH, GG_G2DISP
   |.if X64
   |  mov L:RB->cframe, rsp
   |.else
   |  mov L:RB->cframe, esp
   |.endif
   |
-  |2:  // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
-  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
-  |  add DISPATCH, GG_G2DISP
-  |
-  |3:  // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
+  |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |  set_vmstate INTERP
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
@@ -731,14 +731,17 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  mov KBASE, L:RB->stack		// Compute -savestack(L, L->top).
   |  sub KBASE, L:RB->top
+  |   mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
+  |   add DISPATCH, GG_G2DISP
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |.if X64
   |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASEa
   |  mov L:RB->cframe, rsp
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |
   |  call CARG4			// (lua_State *L, lua_CFunction func, void *ud)
   |.else
@@ -749,6 +752,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASE
   |  mov L:RB->cframe, esp
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |
   |  call BASE			// (lua_State *L, lua_CFunction func, void *ud)
   |.endif
@@ -1840,7 +1844,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov ARG3, RA
   |.endif
   |  call ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
-  |  set_vmstate INTERP
   |
   |  mov L:RB, SAVE_L
   |.if X64
@@ -1849,6 +1852,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:PC, ARG1			// The callee doesn't modify SAVE_L.
   |.endif
   |  mov BASE, L:RB->base
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  set_vmstate INTERP
+  |
   |  cmp eax, LUA_YIELD
   |  ja >8
   |4:
@@ -2705,7 +2711,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
   |.endif
   |  // Caveat: RB is ebp.
-  |  mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
+  |  mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
   |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
   |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
   |  mov L:RB->base, BASE
@@ -5382,7 +5388,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov RDa, TRACE:RD->mcode
     |  mov L:RB, SAVE_L
     |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
-    |  mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
     |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
     |  // Save additional callee-save registers only used in compiled code.
     |.if X64WIN
@@ -5550,9 +5555,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  // (lua_State *L, lua_CFunction f)
       |  call aword [DISPATCH+DISPATCH_GL(wrapf)]
     }
-    |  set_vmstate INTERP
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
+    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+    |  set_vmstate INTERP
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8