Răsfoiți Sursa

change in string table: string table is now independent of GC lists; all
strings live in 'normal' GC lists

Roberto Ierusalimschy 12 ani în urmă
părinte
comite
ae800656c9
7 a modificat fișierele cu 125 adăugiri și 114 ștergeri
  1. 14 32
      lgc.c
  2. 5 6
      lgc.h
  3. 2 3
      lstate.c
  4. 5 7
      lstate.h
  5. 88 52
      lstring.c
  6. 2 1
      lstring.h
  7. 9 13
      ltests.c

+ 14 - 32
lgc.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lgc.c,v 2.147 2013/08/19 14:18:43 roberto Exp roberto $
+** $Id: lgc.c,v 2.148 2013/08/20 17:46:34 roberto Exp roberto $
 ** Garbage Collector
 ** See Copyright Notice in lua.h
 */
@@ -667,7 +667,7 @@ static void freeobj (lua_State *L, GCObject *o) {
     case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
     case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
     case LUA_TSHRSTR:
-      G(L)->strt.nuse--;
+      luaS_remove(L, rawgco2ts(o));  /* remove it from hash table */
       /* go through */
     case LUA_TLNGSTR: {
       luaM_freemem(L, o, sizestring(gco2ts(o)));
@@ -749,14 +749,10 @@ static GCObject **sweeptolive (lua_State *L, GCObject **p, int *n) {
 ** =======================================================
 */
 
-static void checkSizes (lua_State *L) {
+static void checkBuffer (lua_State *L) {
   global_State *g = G(L);
-  if (g->gckind != KGC_EMERGENCY) {  /* do not change sizes in emergency */
-    int hs = g->strt.size / 2;  /* half the size of the string table */
-    if (g->strt.nuse < cast(lu_int32, hs))  /* using less than that half? */
-      luaS_resize(L, hs);  /* halve its size */
+  if (g->gckind != KGC_EMERGENCY)
     luaZ_freebuffer(L, &g->buff);  /* free concatenation buffer */
-  }
 }
 
 
@@ -855,7 +851,7 @@ void luaC_checkfinalizer (lua_State *L, GCObject *o, Table *mt) {
     }
     /* search for pointer pointing to 'o' */
     for (p = &g->allgc; *p != o; p = &gch(*p)->next) { /* empty */ }
-    *p = ho->next;  /* remove 'o' from root list */
+    *p = ho->next;  /* remove 'o' from 'allgc' list */
     ho->next = g->finobj;  /* link it in list 'finobj' */
     g->finobj = o;
     l_setbit(ho->marked, FINALIZEDBIT);  /* mark it as such */
@@ -889,25 +885,23 @@ static void setpause (global_State *g, l_mem estimate) {
 }
 
 
-#define sweepphases  \
-	(bitmask(GCSsweepstring) | bitmask(GCSsweepudata) | bitmask(GCSsweep))
+#define sweepphases	(bitmask(GCSsweepudata) | bitmask(GCSsweep))
 
 
 /*
-** enter first sweep phase (strings) and prepare pointers for other
-** sweep phases.  The calls to 'sweeptolive' make pointers point to an
-** object inside the list (instead of to the header), so that the real
-** sweep do not need to skip objects created between "now" and the start
-** of the real sweep.
+** enter first sweep phase and prepare pointers for other sweep phases.
+** The calls to 'sweeptolive' make pointers point to an object inside
+** the list (instead of to the header), so that the real sweep do not
+** need to skip objects created between "now" and the start of the real
+** sweep.
 ** Returns how many objects it swept.
 */
 static int entersweep (lua_State *L) {
   global_State *g = G(L);
   int n = 0;
-  g->gcstate = GCSsweepstring;
+  g->gcstate = GCSsweepudata;
   lua_assert(g->sweepgc == NULL && g->sweepfin == NULL);
-  /* prepare to sweep strings, finalizable objects, and regular objects */
-  g->sweepstrgc = 0;
+  /* prepare to sweep finalizable objects and regular objects */
   g->sweepfin = sweeptolive(L, &g->finobj, &n);
   g->sweepgc = sweeptolive(L, &g->allgc, &n);
   return n;
@@ -926,7 +920,6 @@ static void callallpendingfinalizers (lua_State *L, int propagateerrors) {
 
 void luaC_freeallobjects (lua_State *L) {
   global_State *g = G(L);
-  int i;
   separatetobefnz(L, 1);  /* separate all objects with finalizers */
   lua_assert(g->finobj == NULL);
   callallpendingfinalizers(L, 0);
@@ -934,8 +927,6 @@ void luaC_freeallobjects (lua_State *L) {
   g->gckind = KGC_NORMAL;
   sweepwholelist(L, &g->finobj);  /* finalizers can create objs. in 'finobj' */
   sweepwholelist(L, &g->allgc);
-  for (i = 0; i < g->strt.size; i++)  /* free all string lists */
-    sweepwholelist(L, &g->strt.hash[i]);
   lua_assert(g->strt.nuse == 0);
 }
 
@@ -1009,15 +1000,6 @@ static lu_mem singlestep (lua_State *L) {
       sw = entersweep(L);
       return work + sw * GCSWEEPCOST;
     }
-    case GCSsweepstring: {
-      int i;
-      for (i = 0; i < GCSWEEPMAX && g->sweepstrgc + i < g->strt.size; i++)
-        sweepwholelist(L, &g->strt.hash[g->sweepstrgc + i]);
-      g->sweepstrgc += i;
-      if (g->sweepstrgc >= g->strt.size)  /* no more strings to sweep? */
-        g->gcstate = GCSsweepudata;
-      return i * GCSWEEPCOST;
-    }
     case GCSsweepudata: {
       if (g->sweepfin) {
         g->sweepfin = sweeplist(L, g->sweepfin, GCSWEEPMAX);
@@ -1037,7 +1019,7 @@ static lu_mem singlestep (lua_State *L) {
         /* sweep main thread */
         GCObject *mt = obj2gco(g->mainthread);
         sweeplist(L, &mt, 1);
-        checkSizes(L);
+        checkBuffer(L);
         g->gcstate = GCSpause;  /* finish collection */
         return GCSWEEPCOST;
       }

+ 5 - 6
lgc.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lgc.h,v 2.62 2013/08/19 14:18:43 roberto Exp roberto $
+** $Id: lgc.h,v 2.63 2013/08/20 17:46:34 roberto Exp roberto $
 ** Garbage Collector
 ** See Copyright Notice in lua.h
 */
@@ -38,14 +38,13 @@
 */
 #define GCSpropagate	0
 #define GCSatomic	1
-#define GCSsweepstring	2
-#define GCSsweepudata	3
-#define GCSsweep	4
-#define GCSpause	5
+#define GCSsweepudata	2
+#define GCSsweep	3
+#define GCSpause	4
 
 
 #define issweepphase(g)  \
-	(GCSsweepstring <= (g)->gcstate && (g)->gcstate <= GCSsweep)
+	(GCSsweepudata <= (g)->gcstate && (g)->gcstate <= GCSsweep)
 
 
 /*

+ 2 - 3
lstate.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstate.c,v 2.101 2013/08/07 12:18:11 roberto Exp roberto $
+** $Id: lstate.c,v 2.102 2013/08/16 18:55:49 roberto Exp roberto $
 ** Global State
 ** See Copyright Notice in lua.h
 */
@@ -280,8 +280,7 @@ LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
   g->seed = makeseed(L);
   g->gcrunning = 0;  /* no GC while building state */
   g->GCestimate = 0;
-  g->strt.size = 0;
-  g->strt.nuse = 0;
+  g->strt.size = g->strt.nuse = g->strt.empty = 0;
   g->strt.hash = NULL;
   setnilvalue(&g->l_registry);
   luaZ_initbuffer(L, &g->buff);

+ 5 - 7
lstate.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lstate.h,v 2.84 2013/08/07 12:18:11 roberto Exp roberto $
+** $Id: lstate.h,v 2.85 2013/08/20 17:46:34 roberto Exp roberto $
 ** Global State
 ** See Copyright Notice in lua.h
 */
@@ -24,8 +24,6 @@
 ** at the end of the 'allgc' list, after the 'l_registry' (which is
 ** the first object to be added to the list).
 **
-** Short strings are kept in several lists headed by the array g->strt.hash.
-**
 ** Open upvalues are not subject to independent garbage collection. They
 ** are collected together with their respective threads. (They are
 ** always gray, so they must be remarked in the atomic step. Usually
@@ -57,9 +55,10 @@ struct lua_longjmp;  /* defined in ldo.c */
 
 
 typedef struct stringtable {
-  GCObject **hash;
-  lu_int32 nuse;  /* number of elements */
-  int size;
+  TString **hash;
+  unsigned int nuse;  /* number of elements */
+  unsigned int empty;  /* number of available empty slots */
+  unsigned int size;
 } stringtable;
 
 
@@ -123,7 +122,6 @@ typedef struct global_State {
   lu_byte gcstate;  /* state of garbage collector */
   lu_byte gckind;  /* kind of GC running */
   lu_byte gcrunning;  /* true if GC is running */
-  int sweepstrgc;  /* position of sweep in `strt' */
   GCObject *allgc;  /* list of all collectable objects */
   GCObject *finobj;  /* list of collectable objects with finalizers */
   GCObject **sweepgc;  /* current position of sweep in list 'allgc' */

+ 88 - 52
lstring.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstring.c,v 2.27 2013/06/19 14:27:00 roberto Exp roberto $
+** $Id: lstring.c,v 2.28 2013/08/05 16:58:28 roberto Exp roberto $
 ** String table (keeps all strings handled by Lua)
 ** See Copyright Notice in lua.h
 */
@@ -12,12 +12,21 @@
 
 #include "lua.h"
 
+#include "ldebug.h"
 #include "lmem.h"
 #include "lobject.h"
 #include "lstate.h"
 #include "lstring.h"
 
 
+/* mark for vacant places in hash table */
+#define VACANTK		cast(TString *, cast(size_t, -1))
+
+
+/* second hash (for double hash) */
+#define h2(h1,hash,size)	lmod(h1 + ((hash % 31) | 1), size)
+
+
 /*
 ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
 ** compute its hash
@@ -64,30 +73,27 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
 void luaS_resize (lua_State *L, int newsize) {
   int i;
   stringtable *tb = &G(L)->strt;
-  /* cannot resize while GC is traversing strings */
-  luaC_runtilstate(L, ~bitmask(GCSsweepstring));
-  if (newsize > tb->size) {
-    luaM_reallocvector(L, tb->hash, tb->size, newsize, GCObject *);
-    for (i = tb->size; i < newsize; i++) tb->hash[i] = NULL;
-  }
+  TString **oldhash = tb->hash;
+  int oldsize = tb->size;
+  tb->hash = luaM_newvector(L, newsize, TString *);
+  tb->size = newsize;
+  /* keep load factor below 75% */
+  tb->empty = newsize/2 + newsize/4 - tb->nuse;
+  for (i = 0; i < newsize; i++) tb->hash[i] = NULL;
+  tb->nuse = 0;
   /* rehash */
-  for (i=0; i<tb->size; i++) {
-    GCObject *p = tb->hash[i];
-    tb->hash[i] = NULL;
-    while (p) {  /* for each node in the list */
-      GCObject *next = gch(p)->next;  /* save next */
-      unsigned int h = lmod(gco2ts(p)->hash, newsize);  /* new position */
-      gch(p)->next = tb->hash[h];  /* chain it */
-      tb->hash[h] = p;
-      p = next;
+  for (i = 0; i < oldsize; i++) {
+    TString *ts = oldhash[i];
+    if (ts != NULL && ts != VACANTK) {
+      unsigned int hash = ts->tsv.hash;
+      int h1 = lmod(hash, tb->size);
+      while (tb->hash[h1] != NULL)
+        h1 = h2(h1, hash, tb->size);
+      tb->hash[h1] = ts;
+      tb->nuse++;
     }
   }
-  if (newsize < tb->size) {
-    /* shrinking slice must be empty */
-    lua_assert(tb->hash[newsize] == NULL && tb->hash[tb->size - 1] == NULL);
-    luaM_reallocvector(L, tb->hash, tb->size, newsize, GCObject *);
-  }
-  tb->size = newsize;
+  luaM_freearray(L, oldhash, oldsize);
 }
 
 
@@ -95,11 +101,11 @@ void luaS_resize (lua_State *L, int newsize) {
 ** creates a new string object
 */
 static TString *createstrobj (lua_State *L, const char *str, size_t l,
-                              int tag, unsigned int h, GCObject **list) {
+                              int tag, unsigned int h) {
   TString *ts;
   size_t totalsize;  /* total size of TString object */
   totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
-  ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts;
+  ts = &luaC_newobj(L, tag, totalsize, NULL, 0)->ts;
   ts->tsv.len = l;
   ts->tsv.hash = h;
   ts->tsv.extra = 0;
@@ -109,20 +115,33 @@ static TString *createstrobj (lua_State *L, const char *str, size_t l,
 }
 
 
-/*
-** creates a new short string, inserting it into string table
-*/
-static TString *newshrstr (lua_State *L, const char *str, size_t l,
-                                       unsigned int h) {
-  GCObject **list;  /* (pointer to) list where it will be inserted */
+static void rehash (lua_State *L, stringtable *tb) {
+  int newsize;
+  if (tb->nuse < tb->size / 2) {  /* using less than half the size? */
+    if (tb->nuse < tb->size / 4)  /* using less than half of that? */
+      newsize = tb->size / 2;  /* shrink table */
+    else
+      newsize = tb->size;  /* keep size (but reorganize table) */
+  }
+  else {  /* table must grow */
+    if (tb->size >= MAX_INT/2)
+      luaG_runerror(L, "string-table overflow: too many strings");
+    newsize = tb->size * 2;
+  }
+  luaS_resize(L, newsize);
+}
+
+
+LUAI_FUNC void luaS_remove (lua_State *L, TString *ts) {
   stringtable *tb = &G(L)->strt;
-  TString *s;
-  if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
-    luaS_resize(L, tb->size*2);  /* too crowded */
-  list = &tb->hash[lmod(h, tb->size)];
-  s = createstrobj(L, str, l, LUA_TSHRSTR, h, list);
-  tb->nuse++;
-  return s;
+  unsigned int hash = ts->tsv.hash;
+  int h1 = lmod(hash, tb->size);
+  while (tb->hash[h1] != ts) {
+    lua_assert(tb->hash[h1] != NULL);
+    h1 = h2(h1, hash, tb->size);
+  }
+  tb->hash[h1] = VACANTK;
+  tb->nuse--;
 }
 
 
@@ -130,22 +149,39 @@ static TString *newshrstr (lua_State *L, const char *str, size_t l,
 ** checks whether short string exists and reuses it or creates a new one
 */
 static TString *internshrstr (lua_State *L, const char *str, size_t l) {
-  GCObject *o;
-  global_State *g = G(L);
-  unsigned int h = luaS_hash(str, l, g->seed);
-  for (o = g->strt.hash[lmod(h, g->strt.size)];
-       o != NULL;
-       o = gch(o)->next) {
-    TString *ts = rawgco2ts(o);
-    if (h == ts->tsv.hash &&
-        l == ts->tsv.len &&
-        (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
-      if (isdead(G(L), o))  /* string is dead (but was not collected yet)? */
-        changewhite(o);  /* resurrect it */
-      return ts;
+  TString *ts;
+  unsigned int hash = luaS_hash(str, l, G(L)->seed);
+  stringtable *tb = &G(L)->strt;
+  int vacant = -1;
+  int h1;
+  if (tb->empty <= 0)
+    rehash(L, tb);
+  h1 = lmod(hash, tb->size);  /* previous call can changed 'size' */
+  while ((ts = tb->hash[h1]) != NULL) {  /* search the string in hash table */
+    if (ts == VACANTK) {
+      if (vacant < 0) vacant = h1;  /* keep track of a vacant place */
+    }
+    else if (l == ts->tsv.len &&
+            (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
+      /* found! */
+      if (isdead(G(L), obj2gco(ts)))  /* dead (but was not collected yet)? */
+        changewhite(obj2gco(ts));  /* resurrect it */
+      if (vacant >= 0) {  /* is there a better place for this string? */
+        tb->hash[vacant] = ts;  /* move it up the line */
+        tb->hash[h1] = VACANTK;
+      }
+      return ts;  /* found */
     }
+    h1 = h2(h1, hash, tb->size);
   }
-  return newshrstr(L, str, l, h);  /* not found; create a new string */
+  ts = createstrobj(L, str, l, LUA_TSHRSTR, hash);
+  tb->nuse++;
+  if (vacant < 0)  /* found no vacant place? */
+    tb->empty--;  /* will have to use the empty place */
+  else
+    h1 = vacant;  /* insert string into vacant place */
+  tb->hash[h1] = ts;
+  return ts;
 }
 
 
@@ -158,7 +194,7 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
   else {
     if (l + 1 > (MAX_SIZE - sizeof(TString))/sizeof(char))
       luaM_toobig(L);
-    return createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed, NULL);
+    return createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed);
   }
 }
 

+ 2 - 1
lstring.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lstring.h,v 1.49 2012/02/01 21:57:15 roberto Exp roberto $
+** $Id: lstring.h,v 1.50 2013/08/16 18:55:49 roberto Exp roberto $
 ** String table (keep all strings handled by Lua)
 ** See Copyright Notice in lua.h
 */
@@ -38,6 +38,7 @@ LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
 LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
 LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
+LUAI_FUNC void luaS_remove (lua_State *L, TString *ts);
 LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
 LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);

+ 9 - 13
ltests.c

@@ -1,5 +1,5 @@
 /*
-** $Id: ltests.c,v 2.145 2013/08/19 14:16:33 roberto Exp roberto $
+** $Id: ltests.c,v 2.146 2013/08/20 17:46:34 roberto Exp roberto $
 ** Internal Module for Debugging of the Lua Implementation
 ** See Copyright Notice in lua.h
 */
@@ -661,7 +661,7 @@ static int gc_local (lua_State *L) {
 
 static int gc_state (lua_State *L) {
   static const char *statenames[] = {"propagate", "atomic",
-    "sweepstring", "sweepudata", "sweep", "pause", ""};
+    "sweepudata", "sweep", "pause", ""};
   int option = luaL_checkoption(L, 1, "", statenames);
   if (option == GCSpause + 1) {
     lua_pushstring(L, statenames[G(L)->gcstate]);
@@ -742,22 +742,18 @@ static int table_query (lua_State *L) {
 static int string_query (lua_State *L) {
   stringtable *tb = &G(L)->strt;
   int s = luaL_optint(L, 2, 0) - 1;
-  if (s==-1) {
+  if (s < 0) {
     lua_pushinteger(L ,tb->nuse);
     lua_pushinteger(L ,tb->size);
     return 2;
   }
-  else if (s < tb->size) {
-    GCObject *ts;
-    int n = 0;
-    for (ts = tb->hash[s]; ts; ts = gch(ts)->next) {
-      setsvalue2s(L, L->top, rawgco2ts(ts));
-      api_incr_top(L);
-      n++;
-    }
-    return n;
+  else if ((unsigned int)s < tb->size) {
+    TString *ts = tb->hash[s];
+    setsvalue2s(L, L->top, ts);
+    api_incr_top(L);
+    return 1;
   }
-  return 0;
+  else return 0;
 }