Browse Source

back to open hashing for the string table (but with a different
'hnext' field, to strings are still collected like all other
objects)

Roberto Ierusalimschy 12 years ago
parent
commit
d3bbb34c24
5 changed files with 61 additions and 92 deletions
  1. 3 2
      lobject.h
  2. 2 2
      lstate.c
  3. 1 2
      lstate.h
  4. 43 78
      lstring.c
  5. 12 8
      ltests.c

+ 3 - 2
lobject.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lobject.h,v 2.80 2013/08/18 16:12:18 roberto Exp roberto $
+** $Id: lobject.h,v 2.81 2013/08/27 18:53:35 roberto Exp roberto $
 ** Type definitions for Lua objects
 ** See Copyright Notice in lua.h
 */
@@ -310,8 +310,9 @@ typedef union TString {
   struct {
     CommonHeader;
     lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
-    unsigned int hash;
     size_t len;  /* number of characters in string */
+    union TString *hnext;  /* linked list for hash table */
+    unsigned int hash;
   } tsv;
 } TString;
 

+ 2 - 2
lstate.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstate.c,v 2.109 2013/08/30 19:14:26 roberto Exp roberto $
+** $Id: lstate.c,v 2.110 2013/09/03 15:37:10 roberto Exp roberto $
 ** Global State
 ** See Copyright Notice in lua.h
 */
@@ -289,7 +289,7 @@ LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
   g->gcrunning = 0;  /* no GC while building state */
   g->GCestimate = 0;
   g->GCthreshold = 10000;
-  g->strt.size = g->strt.nuse = g->strt.empty = 0;
+  g->strt.size = g->strt.nuse = 0;
   g->strt.hash = NULL;
   setnilvalue(&g->l_registry);
   luaZ_initbuffer(L, &g->buff);

+ 1 - 2
lstate.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lstate.h,v 2.92 2013/08/30 19:14:26 roberto Exp roberto $
+** $Id: lstate.h,v 2.93 2013/09/03 15:37:10 roberto Exp roberto $
 ** Global State
 ** See Copyright Notice in lua.h
 */
@@ -53,7 +53,6 @@ struct lua_longjmp;  /* defined in ldo.c */
 typedef struct stringtable {
   TString **hash;
   int nuse;  /* number of elements */
-  int empty;  /* number of available empty slots */
   int size;
 } stringtable;
 

+ 43 - 78
lstring.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstring.c,v 2.32 2013/08/27 20:04:00 roberto Exp roberto $
+** $Id: lstring.c,v 2.33 2013/08/28 18:30:26 roberto Exp roberto $
 ** String table (keeps all strings handled by Lua)
 ** See Copyright Notice in lua.h
 */
@@ -20,13 +20,6 @@
 #include "lstring.h"
 
 
-/* mark for vacant places in hash table */
-#define VACANTK		cast(TString *, cast(size_t, -1))
-
-
-/* second hash (for double hash) */
-#define h2(h1,hash,size)	lmod(h1 + ((hash % 61) | 1), size)
-
 
 /*
 ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
@@ -74,30 +67,32 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
 void luaS_resize (lua_State *L, int newsize) {
   int i;
   stringtable *tb = &G(L)->strt;
-  TString **oldhash = tb->hash;
-  int oldsize = tb->size;
-  tb->hash = luaM_newvector(L, newsize, TString *);
-  tb->size = newsize;
-  /* keep load factor below 75% */
-  tb->empty = newsize/2 + newsize/4 - tb->nuse;
-  for (i = 0; i < newsize; i++) tb->hash[i] = NULL;
-  tb->nuse = 0;
-  /* rehash */
-  for (i = 0; i < oldsize; i++) {
-    TString *ts = oldhash[i];
-    if (ts != NULL && ts != VACANTK) {
-      unsigned int hash = ts->tsv.hash;
-      int h1 = lmod(hash, tb->size);
-      while (tb->hash[h1] != NULL)
-        h1 = h2(h1, hash, tb->size);
-      tb->hash[h1] = ts;
-      tb->nuse++;
+  if (newsize > tb->size) {  /* grow table if needed */
+    luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *);
+    for (i = tb->size; i < newsize; i++)
+      tb->hash[i] = NULL;
+  }
+  for (i = 0; i < tb->size; i++) {  /* rehash */
+    TString *p = tb->hash[i];
+    tb->hash[i] = NULL;
+    while (p) {  /* for each node in the list */
+      TString *hnext = p->tsv.hnext;  /* save next */
+      unsigned int h = lmod(p->tsv.hash, newsize);  /* new position */
+      p->tsv.hnext = tb->hash[h];  /* chain it */
+      tb->hash[h] = p;
+      p = hnext;
     }
   }
-  luaM_freearray(L, oldhash, oldsize);
+  if (newsize < tb->size) {  /* shrink table if needed */
+    /* vanishing slice should be empty */
+    lua_assert(tb->hash[newsize] == NULL && tb->hash[tb->size - 1] == NULL);
+    luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *);
+  }
+  tb->size = newsize;
 }
 
 
+
 /*
 ** creates a new string object
 */
@@ -116,32 +111,15 @@ static TString *createstrobj (lua_State *L, const char *str, size_t l,
 }
 
 
-static void rehash (lua_State *L, stringtable *tb) {
-  int size = tb->size;
-  if (tb->nuse < size / 2) {  /* using less than half the size? */
-    if (tb->nuse < size / 4)  /* using less than half of that? */
-      size /= 2;  /* shrink table */
-    /* else keep size (but reorganize table) */
-  }
-  else {  /* table must grow */
-    if (size >= MAX_INT/2)  /* avoid arith. overflow */
-      luaD_throw(L, LUA_ERRMEM);  /* regular errors need new strings... */
-    size *= 2;
-  }
-  luaS_resize(L, size);
-}
-
-
 LUAI_FUNC void luaS_remove (lua_State *L, TString *ts) {
   stringtable *tb = &G(L)->strt;
-  unsigned int hash = ts->tsv.hash;
-  int h1 = lmod(hash, tb->size);
-  while (tb->hash[h1] != ts) {
-    lua_assert(tb->hash[h1] != NULL);
-    h1 = h2(h1, hash, tb->size);
-  }
-  tb->hash[h1] = VACANTK;
+  TString **p = &tb->hash[lmod(ts->tsv.hash, tb->size)];
+  while (*p != ts)  /* find previous element */
+    p = &(*p)->tsv.hnext;
+  *p = (*p)->tsv.hnext;  /* remove element from its list */
   tb->nuse--;
+  if (tb->nuse < tb->size/4)
+    luaS_resize(L, tb->size/2);
 }
 
 
@@ -150,39 +128,26 @@ LUAI_FUNC void luaS_remove (lua_State *L, TString *ts) {
 */
 static TString *internshrstr (lua_State *L, const char *str, size_t l) {
   TString *ts;
-  unsigned int hash = luaS_hash(str, l, G(L)->seed);
-  stringtable *tb = &G(L)->strt;
-  int vacant = -1;
-  int h1;
-  h1 = lmod(hash, tb->size);  /* previous call can changed 'size' */
-  while ((ts = tb->hash[h1]) != NULL) {  /* search the string in hash table */
-    if (ts == VACANTK) {
-      if (vacant < 0) vacant = h1;  /* keep track of first vacant place */
-    }
-    else if (l == ts->tsv.len &&
-            (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
+  global_State *g = G(L);
+  unsigned int h = luaS_hash(str, l, g->seed);
+  TString **list = &g->strt.hash[lmod(h, g->strt.size)];
+  for (ts = *list; ts != NULL; ts = ts->tsv.hnext) {
+    if (l == ts->tsv.len &&
+        (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
       /* found! */
-      if (isdead(G(L), obj2gco(ts)))  /* dead (but was not collected yet)? */
+      if (isdead(g, obj2gco(ts)))  /* dead (but not collected yet)? */
         changewhite(obj2gco(ts));  /* resurrect it */
-      if (vacant >= 0) {  /* is there a better place for this string? */
-        tb->hash[vacant] = ts;  /* move it up the line */
-        tb->hash[h1] = VACANTK;
-      }
-      return ts;  /* found */
+      return ts;
     }
-    h1 = h2(h1, hash, tb->size);
   }
-  if (tb->empty <= 0) {  /* no more empty spaces? */
-    rehash(L, tb);
-    return internshrstr(L, str, l);  /* recompute insertion with new size */
+  if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
+    luaS_resize(L, g->strt.size * 2);
+    list = &g->strt.hash[lmod(h, g->strt.size)];  /* recompute with new size */
   }
-  ts = createstrobj(L, str, l, LUA_TSHRSTR, hash);
-  tb->nuse++;
-  if (vacant < 0)  /* found no vacant place? */
-    tb->empty--;  /* will have to use the empty place */
-  else
-    h1 = vacant;  /* use vacant place */
-  tb->hash[h1] = ts;
+  ts = createstrobj(L, str, l, LUA_TSHRSTR, h);
+  ts->tsv.hnext = *list;
+  *list = ts;
+  g->strt.nuse++;
   return ts;
 }
 

+ 12 - 8
ltests.c

@@ -1,5 +1,5 @@
 /*
-** $Id: ltests.c,v 2.153 2013/09/03 15:37:10 roberto Exp roberto $
+** $Id: ltests.c,v 2.154 2013/09/04 15:34:24 roberto Exp roberto $
 ** Internal Module for Debugging of the Lua Implementation
 ** See Copyright Notice in lua.h
 */
@@ -730,17 +730,21 @@ static int table_query (lua_State *L) {
 
 static int string_query (lua_State *L) {
   stringtable *tb = &G(L)->strt;
-  int s = luaL_optint(L, 2, 0) - 1;
-  if (s < 0) {
-    lua_pushinteger(L ,tb->nuse);
+  int s = luaL_optint(L, 1, 0) - 1;
+  if (s == -1) {
     lua_pushinteger(L ,tb->size);
+    lua_pushinteger(L ,tb->nuse);
     return 2;
   }
   else if (s < tb->size) {
-    TString *ts = tb->hash[s];
-    setsvalue2s(L, L->top, ts);
-    api_incr_top(L);
-    return 1;
+    TString *ts;
+    int n = 0;
+    for (ts = tb->hash[s]; ts != NULL; ts = ts->tsv.hnext) {
+      setsvalue2s(L, L->top, ts);
+      api_incr_top(L);
+      n++;
+    }
+    return n;
   }
   else return 0;
 }