Browse Source

Switch to pre-initialized stacks. Drop frame clearing in interpreter.

Mike Pall 15 years ago
parent
commit
ab90b8fc2b
7 changed files with 1701 additions and 1694 deletions
  1. 487 483
      src/buildvm_x64.h
  2. 426 421
      src/buildvm_x64win.h
  3. 32 28
      src/buildvm_x86.dasc
  4. 724 724
      src/buildvm_x86.h
  5. 12 21
      src/lj_gc.c
  6. 3 1
      src/lj_gc.h
  7. 17 16
      src/lj_state.c

File diff suppressed because it is too large
+ 487 - 483
src/buildvm_x64.h


File diff suppressed because it is too large
+ 426 - 421
src/buildvm_x64win.h


+ 32 - 28
src/buildvm_x86.dasc

@@ -414,23 +414,25 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov KBASE, [PC+PC2PROTO(k)]
   |  mov L:RB, SAVE_L
   |  lea RA, [BASE+RA*8]		// Top of frame.
-  |  lea RC, [BASE+NARGS:RC*8-4]	// Points to tag of 1st free slot.
   |  cmp RA, L:RB->maxstack
   |  ja ->gate_lf_growstack
-  |9:  // Entry point from vararg setup below.
-  |  mov RB, LJ_TNIL
-  |1:  // Clear free slots until top of frame.
-  |  mov [RC], RB
-  |  mov [RC+8], RB
-  |  add RC, 16
-  |  cmp RC, RA
-  |  jb <1
+  |  movzx RA, byte [PC+PC2PROTO(numparams)]
+  |  cmp NARGS:RC, RA			// Check for missing parameters.
+  |  jbe >3
+  |2:
 #if LJ_HASJIT
   |  // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
   |  // hotcall RB
 #endif
   |  ins_next
   |
+  |3:  // Clear missing parameters.
+  |  mov dword [BASE+NARGS:RC*8-4], LJ_TNIL
+  |  add NARGS:RC, 1
+  |  cmp NARGS:RC, RA			// Check for missing parameters.
+  |  jbe <3
+  |  jmp <2
+  |
   |->gate_lv:				// Call gate for vararg Lua functions.
   |  // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
   |  // DISPATCH initialized
@@ -443,29 +445,38 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  movzx RB, byte [PC+PC2PROTO(framesize)]
   |  lea KBASE, [BASE+RB*8]
   |  mov L:RB, SAVE_L
-  |  lea RC, [BASE+4]
   |  cmp KBASE, L:RB->maxstack
   |  ja ->gate_lv_growstack		// Need to grow stack.
+  |  mov RC, BASE
   |  movzx RB, byte [PC+PC2PROTO(numparams)]
   |  test RB, RB
   |  jz >2
-  |1:  // Copy fixarg slots up.
+  |1:  // Copy fixarg slots up to new frame.
   |  add RA, 8
   |  cmp RA, BASE
-  |  jnb >2
+  |  jnb >3				// Less args than parameters?
   |  mov KBASE, [RA-8]
-  |  mov [RC-4], KBASE
-  |  mov KBASE, [RA-4]
   |  mov [RC], KBASE
+  |  mov KBASE, [RA-4]
+  |  mov [RC+4], KBASE
   |  add RC, 8
   |  mov dword [RA-4], LJ_TNIL		// Clear old fixarg slot (help the GC).
   |  sub RB, 1
   |  jnz <1
   |2:
-  |  movzx RA, byte [PC+PC2PROTO(framesize)]
   |  mov KBASE, [PC+PC2PROTO(k)]
-  |  lea RA, [BASE+RA*8]
-  |  jmp <9
+#if LJ_HASJIT
+  |  // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
+  |  // hotcall RB
+#endif
+  |  ins_next
+  |
+  |3:  // Clear missing parameters.
+  |  mov dword [RC+4], LJ_TNIL
+  |  add RC, 8
+  |  sub RB, 1
+  |  jnz <3
+  |  jmp <2
   |
   |->gate_cwrap:			// Call gate for wrapped C functions.
   |  // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
@@ -663,19 +674,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  jmp >1
   |
   |->gate_lv_growstack:			// Grow stack for vararg Lua function.
-  //XXX
-  |  sub RC, 8
-  |  mov BASE, RA
-  |  mov RA, KBASE
-  |  lea PC, [PROTO:RB+sizeof(GCproto)]
-  |  mov L:RB, SAVE_L
+  |  mov BASE, RA			// Drop vararg frame again.
   |
   |->gate_lf_growstack:			// Grow stack for fixarg Lua function.
-  |  // BASE = new base, RA = requested top, RC = top (offset +4 bytes)
-  |  // RB = L, PC = first PC of called function (or anything if C function)
-  |  sub RC, 4				// Adjust top.
-  |  sub RA, BASE
-  |  shr RA, 3				// n = pt->framesize - L->top
+  |  // BASE = new base, RC = nargs+1, RB = L, PC = first PC
+  |  lea RC, [BASE+NARGS:RC*8-8]
+  |  movzx RA, byte [PC+PC2PROTO(framesize)]
   |  add PC, 4				// Must point after first instruction.
   |  mov L:RB->base, BASE
   |  mov L:RB->top, RC

File diff suppressed because it is too large
+ 724 - 724
src/buildvm_x86.h


+ 12 - 21
src/lj_gc.c

@@ -261,7 +261,7 @@ static void gc_traverse_proto(global_State *g, GCproto *pt)
 }
 
 /* Traverse the frame structure of a stack. */
-static TValue *gc_traverse_frames(global_State *g, lua_State *th)
+static MSize gc_traverse_frames(global_State *g, lua_State *th)
 {
   TValue *frame, *top = th->top-1;
   /* Note: extra vararg frame not skipped, marks function twice (harmless). */
@@ -274,32 +274,22 @@ static TValue *gc_traverse_frames(global_State *g, lua_State *th)
   }
   top++;  /* Correct bias of -1 (frame == base-1). */
   if (top > th->maxstack) top = th->maxstack;
-  return top;
+  return (MSize)(top - th->stack);  /* Return minimum needed stack size. */
 }
 
 /* Traverse a thread object. */
 static void gc_traverse_thread(global_State *g, lua_State *th)
 {
-  TValue *o, *lim;
-  gc_markobj(g, tabref(th->env));
-  for (o = th->stack+1; o < th->top; o++)
+  TValue *o, *top = th->top;
+  for (o = th->stack+1; o < top; o++)
     gc_marktv(g, o);
-  lim = gc_traverse_frames(g, th);
-  /* Extra cleanup required to avoid this marking problem:
-  **
-  ** [aa[bb.X|   X created.
-  ** [aa[cc|     GC called from (small) inner frame, X destroyed.
-  ** [aa....X.|  GC called again in (larger) outer frame, X resurrected (ouch).
-  **
-  ** During GC in step 2 the stack must be cleaned up to the max. frame extent:
-  **
-  **       ***|  Slots cleaned
-  **    [cc|      from top of last frame
-  ** [aa......|   to max. frame extent.
-  */
-  for (; o <= lim; o++)
-    setnilV(o);
-  lj_state_shrinkstack(th, (MSize)(lim - th->stack));
+  if (g->gc.state == GCSatomic) {
+    top = th->stack + th->stacksize;
+    for (; o < top; o++)  /* Clear unmarked slots. */
+      setnilV(o);
+  }
+  gc_markobj(g, tabref(th->env));
+  lj_state_shrinkstack(th, gc_traverse_frames(g, th));
 }
 
 /* Propagate one gray object. Traverse it and turn it black. */
@@ -524,6 +514,7 @@ static void atomic(global_State *g, lua_State *L)
 {
   size_t udsize;
 
+  g->gc.state = GCSatomic;
   gc_mark_uv(g);  /* Need to remark open upvalues (the thread may be dead). */
   gc_propagate_gray(g);  /* Propagate any left-overs. */
 

+ 3 - 1
src/lj_gc.h

@@ -9,7 +9,9 @@
 #include "lj_obj.h"
 
 /* Garbage collector states. Order matters. */
-enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize };
+enum {
+  GCSpause, GCSpropagate, GCSatomic, GCSsweepstring, GCSsweep, GCSfinalize
+};
 
 /* Bitmasks for marked field of GCobj. */
 #define LJ_GC_WHITE0	0x01

+ 17 - 16
src/lj_state.c

@@ -49,14 +49,18 @@
 /* Resize stack slots and adjust pointers in state. */
 static void resizestack(lua_State *L, MSize n)
 {
-  TValue *oldst = L->stack;
+  TValue *st, *oldst = L->stack;
   ptrdiff_t delta;
+  MSize oldsize = L->stacksize;
   MSize realsize = n + 1 + LJ_STACK_EXTRA;
   GCobj *up;
   lua_assert((MSize)(L->maxstack-L->stack) == L->stacksize-LJ_STACK_EXTRA-1);
   lj_mem_reallocvec(L, L->stack, L->stacksize, realsize, TValue);
-  delta = (char *)L->stack - (char *)oldst;
-  L->maxstack = L->stack + n;
+  st = L->stack;
+  delta = (char *)st - (char *)oldst;
+  L->maxstack = st + n;
+  while (oldsize < realsize)  /* Clear new slots. */
+    setnilV(st + oldsize++);
   L->stacksize = realsize;
   L->base = (TValue *)((char *)L->base + delta);
   L->top = (TValue *)((char *)L->top + delta);
@@ -90,13 +94,8 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
   if (L->stacksize > LJ_STACK_MAXEX)  /* overflow while handling overflow? */
     lj_err_throw(L, LUA_ERRERR);
   resizestack(L, L->stacksize + (need > L->stacksize ? need : L->stacksize));
-  if (L->stacksize > LJ_STACK_MAXEX) {
-    if (curr_funcisL(L)) {  /* Clear slots of incomplete Lua frame. */
-      TValue *top = curr_topL(L);
-      while (--top >= L->top) setnilV(top);
-    }
-    lj_err_msg(L, LJ_ERR_STKOV);  /* ... to allow L->top = curr_topL(L). */
-  }
+  if (L->stacksize > LJ_STACK_MAXEX)
+    lj_err_msg(L, LJ_ERR_STKOV);
 }
 
 void LJ_FASTCALL lj_state_growstack1(lua_State *L)
@@ -107,13 +106,15 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L)
 /* Allocate basic stack for new state. */
 static void stack_init(lua_State *L1, lua_State *L)
 {
-  L1->stack = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue);
+  TValue *st, *stend;
+  L1->stack = st = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue);
   L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
-  L1->top = L1->stack;
-  L1->maxstack = L1->stack+(L1->stacksize - LJ_STACK_EXTRA)-1;
-  setthreadV(L1, L1->top, L1);  /* needed for curr_funcisL() on empty stack */
-  setnilV(L1->top);  /* but clear its type */
-  L1->base = ++L1->top;
+  stend = st + L1->stacksize;
+  L1->maxstack = stend - LJ_STACK_EXTRA - 1;
+  L1->base = L1->top = st+1;
+  setthreadV(L1, st, L1);  /* Needed for curr_funcisL() on empty stack. */
+  while (st < stend)  /* Clear new slots. */
+    setnilV(st++);
 }
 
 /* -- State handling ------------------------------------------------------ */

Some files were not shown because too many files changed in this diff