Jelajahi Sumber

new implementation for string hashing, with chaining.

Roberto Ierusalimschy 26 tahun lalu
induk
melakukan
c5fee7615e
7 mengubah file dengan 117 tambahan dan 127 penghapusan
  1. 5 5
      lapi.c
  2. 14 7
      lbuiltin.c
  3. 22 16
      lgc.c
  4. 4 4
      llex.c
  5. 12 9
      lobject.h
  6. 50 82
      lstring.c
  7. 10 4
      lstring.h

+ 5 - 5
lapi.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lapi.c,v 1.51 1999/10/04 17:51:04 roberto Exp roberto $
+** $Id: lapi.c,v 1.52 1999/10/07 19:04:30 roberto Exp roberto $
 ** Lua API
 ** See Copyright Notice in lua.h
 */
@@ -393,11 +393,11 @@ TaggedString *luaA_nextvar (TaggedString *g) {
     g = L->rootglobal;  /* first variable */
   else {
     /* check whether name is in global var list */
-    luaL_arg_check(g != g->next, 1, "variable name expected");
-    g = g->next;  /* get next */
+    luaL_arg_check(g != g->nextglobal, 1, "variable name expected");
+    g = g->nextglobal;  /* get next */
   }
   while (g && g->u.s.globalval.ttype == LUA_T_NIL)  /* skip globals with nil */
-    g = g->next;
+    g = g->nextglobal;
   if (g) {
     ttype(L->stack.top) = LUA_T_STRING; tsvalue(L->stack.top) = g;
     incr_top;
@@ -579,7 +579,7 @@ const char *lua_getobjname (lua_Object o, const char **name) {
   /* try to find a name for given function */
   TaggedString *g;
   set_normalized(L->stack.top, Address(o)); /* to be accessed by "checkfunc" */
-  for (g=L->rootglobal; g; g=g->next) {
+  for (g=L->rootglobal; g; g=g->nextglobal) {
     if (checkfunc(&g->u.s.globalval)) {
       *name = g->str;
       return "global";

+ 14 - 7
lbuiltin.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lbuiltin.c,v 1.64 1999/10/04 17:51:04 roberto Exp roberto $
+** $Id: lbuiltin.c,v 1.65 1999/10/07 19:04:30 roberto Exp roberto $
 ** Built-in functions
 ** See Copyright Notice in lua.h
 */
@@ -447,7 +447,7 @@ static void luaB_foreachvar (void) {
   TObject f;  /* see comment in 'foreachi' */
   f = *luaA_Address(luaL_functionarg(1));
   luaD_checkstack(4);  /* for extra var name, f, var name, and globalval */
-  for (s = L->rootglobal; s; s = s->next) {
+  for (s = L->rootglobal; s; s = s->nextglobal) {
     if (s->u.s.globalval.ttype != LUA_T_NIL) {
       pushtagstring(s);  /* keep (extra) s on stack to avoid GC */
       *(L->stack.top++) = f;
@@ -606,6 +606,12 @@ static void mem_query (void) {
 }
 
 
+static void hash_query (void) {
+  const TObject *o = luaA_Address(luaL_nonnullarg(1));
+  lua_pushnumber(luaH_hashindex(o));
+}
+
+
 static void query_strings (void) {
   int h = luaL_check_int(1) - 1;
   int s = luaL_opt_int(2, 0) - 1;
@@ -617,10 +623,10 @@ static void query_strings (void) {
   }
   else {
     TaggedString *ts = L->string_root[h].hash[s];
-    if (ts == NULL) lua_pushstring("<NIL>");
-    else if (ts == &luaS_EMPTY) lua_pushstring("<EMPTY>");
-    else if (ts->constindex == -1) lua_pushstring("<USERDATA>");
-    else lua_pushstring(ts->str);
+    for (ts = L->string_root[h].hash[s]; ts; ts = ts->nexthash) {
+      if (ts->constindex == -1) lua_pushstring("<USERDATA>");
+      else lua_pushstring(ts->str);
+    }
   }
 }
 
@@ -707,9 +713,10 @@ static void testC (void) {
 static const struct luaL_reg builtin_funcs[] = {
 #ifdef DEBUG
   {"extra", extra_services},
+  {"hash", hash_query},
+  {"querystr", query_strings},
   {"testC", testC},
   {"totalmem", mem_query},
-  {"querystr", query_strings},
 #endif
   {"_ALERT", luaB_alert},
   {"_ERRORMESSAGE", error_message},

+ 22 - 16
lgc.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lgc.c,v 1.26 1999/09/27 18:00:25 roberto Exp roberto $
+** $Id: lgc.c,v 1.27 1999/10/04 17:51:04 roberto Exp roberto $
 ** Garbage Collector
 ** See Copyright Notice in lua.h
 */
@@ -64,7 +64,7 @@ static void hashmark (Hash *h) {
 
 static void globalmark (void) {
   TaggedString *g;
-  for (g=L->rootglobal; g; g=g->next) {
+  for (g=L->rootglobal; g; g=g->nextglobal) {
     LUA_ASSERT(g->constindex >= 0, "userdata in global list");
     if (g->u.s.globalval.ttype != LUA_T_NIL) {
       markobject(&g->u.s.globalval);
@@ -161,8 +161,8 @@ static void clear_global_list (void) {
   TaggedString **p = &L->rootglobal;
   TaggedString *next;
   while ((next = *p) != NULL) {
-    if (next->marked) p = &next->next;
-    else *p = next->next;
+    if (next->marked) p = &next->nextglobal;
+    else *p = next->nextglobal;
   }
 }
 
@@ -177,22 +177,28 @@ static void collectstring (int limit) {
   int i;
   ttype(&o) = LUA_T_USERDATA;
   clear_global_list();
-  for (i=0; i<NUM_HASHS; i++) {
+  for (i=0; i<NUM_HASHS; i++) {  /* for each hash table */
     stringtable *tb = &L->string_root[i];
     int j;
-    for (j=0; j<tb->size; j++) {
-      TaggedString *t = tb->hash[j];
-      if (t == NULL) continue;
-      if (t->marked < limit) {
-        if (t->constindex == -1) {  /* is userdata? */
-          tsvalue(&o) = t;
-          luaD_gcIM(&o);
+    for (j=0; j<tb->size; j++) {  /* for each list */
+      TaggedString **p = &tb->hash[j];
+      TaggedString *next;
+      while ((next = *p) != NULL) {
+       if (next->marked >= limit) {
+         if (next->marked < FIXMARK)  /* does not change FIXMARKs */
+           next->marked = 0;
+         p = &next->nexthash;
+       } 
+       else {  /* collect */
+          if (next->constindex == -1) {  /* is userdata? */
+            tsvalue(&o) = next;
+            luaD_gcIM(&o);
+          }
+          *p = next->nexthash;
+          luaS_free(next);
+          tb->nuse--;
         }
-        luaS_free(t);
-        tb->hash[j] = &luaS_EMPTY;
       }
-      else if (t->marked == 1)
-        t->marked = 0;
     }
   }
 }

+ 4 - 4
llex.c

@@ -1,5 +1,5 @@
 /*
-** $Id: llex.c,v 1.39 1999/09/06 13:55:09 roberto Exp roberto $
+** $Id: llex.c,v 1.40 1999/10/04 17:51:04 roberto Exp roberto $
 ** Lexical Analyzer
 ** See Copyright Notice in lua.h
 */
@@ -37,7 +37,7 @@ void luaX_init (void) {
   int i;
   for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) {
     TaggedString *ts = luaS_new(reserved[i]);
-    ts->marked = FIRST_RESERVED+i;  /* reserved word  (always > 255) */
+    ts->marked = RESERVEDMARK+i;  /* reserved word */
   }
 }
 
@@ -426,8 +426,8 @@ int luaX_lex (LexState *LS) {
           } while (isalnum(LS->current) || LS->current == '_');
           save('\0');
           ts = luaS_new(L->Mbuffer+L->Mbuffbase);
-          if (ts->marked >= FIRST_RESERVED)
-            return ts->marked;  /* reserved word */
+          if (ts->marked >= RESERVEDMARK)  /* reserved word? */
+            return ts->marked-RESERVEDMARK+FIRST_RESERVED;
           LS->seminfo.ts = ts;
           return NAME;
         }

+ 12 - 9
lobject.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lobject.h,v 1.30 1999/09/06 20:34:18 roberto Exp roberto $
+** $Id: lobject.h,v 1.31 1999/10/04 17:51:04 roberto Exp roberto $
 ** Type definitions for Lua objects
 ** See Copyright Notice in lua.h
 */
@@ -14,13 +14,15 @@
 
 
 #ifdef DEBUG
-#include "lauxlib.h"
-#define LUA_INTERNALERROR(s)	\
-	luaL_verror("INTERNAL ERROR - %s [%s:%d]",(s),__FILE__,__LINE__)
-#define LUA_ASSERT(c,s) { if (!(c)) LUA_INTERNALERROR(s); }
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <assert.h>
+#define LUA_INTERNALERROR(s)	assert(0)
+#define LUA_ASSERT(c,s)		assert(c)
 #else
-#define LUA_INTERNALERROR(s)  /* empty */
-#define LUA_ASSERT(c,s)  /* empty */
+#define LUA_INTERNALERROR(s)	/* empty */
+#define LUA_ASSERT(c,s)		/* empty */
 #endif
 
 
@@ -90,8 +92,8 @@ typedef struct TObject {
 */
 
 typedef struct TaggedString {
-  struct TaggedString *next;
-  int marked;
+  struct TaggedString *nexthash;  /* chain hash table */
+  struct TaggedString *nextglobal;  /* chain global variables */
   unsigned long hash;
   int constindex;  /* hint to reuse constants (= -1 if this is a userdata) */
   union {
@@ -104,6 +106,7 @@ typedef struct TaggedString {
       void *v;  /* if this is a userdata, here is its value */
     } d;
   } u;
+  unsigned char marked;
   char str[1];   /* \0 byte already reserved */
 } TaggedString;
 

+ 50 - 82
lstring.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $
+** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $
 ** String table (keeps all strings handled by Lua)
 ** See Copyright Notice in lua.h
 */
@@ -19,11 +19,6 @@
 
 
 
-TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0,
-                           {{{LUA_T_NIL, {NULL}}, 0L}}, {0}};
-
-
-
 /*
 ** to avoid hash tables with size = 0 (cannot hash with size=0), all
 ** hash tables are initialized with this `array'. Elements are never
@@ -48,6 +43,7 @@ void luaS_init (void) {
 void luaS_freeall (void) {
   int i;
   for (i=0; i<NUM_HASHS; i++) {
+    LUA_ASSERT(L->string_root[i].nuse==0, "non-empty string table");
     if (L->string_root[i].hash != init_hash)
       luaM_free(L->string_root[i].hash);
   }
@@ -56,43 +52,27 @@ void luaS_freeall (void) {
 
 
 static unsigned long hash_s (const char *s, long l) {
-  unsigned long h = 0;  /* seed */
+  unsigned long h = l;  /* seed */
   while (l--)
       h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++));
   return h;
 }
 
 
-static int newsize (const stringtable *tb) {
-  int realuse = 0;
-  int i;
-  /* count how many entries are really in use */
-  for (i=0; i<tb->size; i++) {
-    if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY)
-      realuse++;
-  }
-  return luaO_redimension(realuse*2);
-}
-
-
 static void grow (stringtable *tb) {
-  int ns = newsize(tb);
+  int ns = luaO_redimension(tb->size*2);  /* new size */
   TaggedString **newhash = luaM_newvector(ns, TaggedString *);
   int i;
-  for (i=0; i<ns; i++)
-    newhash[i] = NULL;
+  for (i=0; i<ns; i++) newhash[i] = NULL;
   /* rehash */
-  tb->nuse = 0;
   for (i=0; i<tb->size; i++) {
-    if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) {
-      unsigned long h = tb->hash[i]->hash;
-      int h1 = h%ns;
-      while (newhash[h1]) {
-        h1 += (h&(ns-2)) + 1;  /* double hashing */
-        if (h1 >= ns) h1 -= ns;
-      }
-      newhash[h1] = tb->hash[i];
-      tb->nuse++;
+    TaggedString *p = tb->hash[i];
+    while (p) {  /* for each node in the list */
+      TaggedString *next = p->nexthash;  /* save next */
+      int h = p->hash%ns;  /* new position */
+      p->nexthash = newhash[h];  /* chain it in new position */
+      newhash[h] = p;
+      p = next;
     }
   }
   luaM_free(tb->hash);
@@ -101,91 +81,79 @@ static void grow (stringtable *tb) {
 }
 
 
+static TaggedString *newone (long l, unsigned long h) {
+  TaggedString *ts = (TaggedString *)luaM_malloc(
+                                       sizeof(TaggedString)+l*sizeof(char));
+  ts->marked = 0;
+  ts->nexthash = NULL;
+  ts->nextglobal = ts;  /* signal it is not in global list */
+  ts->hash = h;
+  return ts;
+}
+
+
 static TaggedString *newone_s (const char *str, long l, unsigned long h) {
-  TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l);
+  TaggedString *ts = newone(l, h);
   memcpy(ts->str, str, l);
   ts->str[l] = 0;  /* ending 0 */
   ts->u.s.globalval.ttype = LUA_T_NIL;  /* initialize global value */
   ts->u.s.len = l;
   ts->constindex = 0;
   L->nblocks += gcsizestring(l);
-  ts->marked = 0;
-  ts->next = ts;  /* signal it is in no list */
-  ts->hash = h;
   return ts;
 }
 
+
 static TaggedString *newone_u (void *buff, int tag, unsigned long h) {
-  TaggedString *ts = luaM_new(TaggedString);
+  TaggedString *ts = newone(0, h);
   ts->u.d.v = buff;
   ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag;
   ts->constindex = -1;  /* tag -> this is a userdata */
   L->nblocks++;
-  ts->marked = 0;
-  ts->next = ts;  /* signal it is in no list */
-  ts->hash = h;
   return ts;
 }
 
 
-static void newentry (stringtable *tb, TaggedString *ts, int h1) {
+static void newentry (stringtable *tb, TaggedString *ts, int h) {
   tb->nuse++;
-  if ((long)tb->nuse*3 < (long)tb->size*2)  /* still have room? */
-    tb->hash[h1] = ts;
-  else {  /* must grow */
+  if (tb->nuse >= tb->size) {  /* no more room? */
     if (tb->hash == init_hash) {  /* cannot change init_hash */
-      LUA_ASSERT(h1==0, "`init_hash' has size 1");
+      LUA_ASSERT(h==0, "`init_hash' has size 1");
       tb->hash = luaM_newvector(1, TaggedString *);  /* so, `clone' it */
+      tb->hash[0] = NULL;
     }
-    tb->hash[h1] = ts;
     grow(tb);
+    h = ts->hash%tb->size;  /* new hash position */
   }
+  ts->nexthash = tb->hash[h];  /* chain new entry */
+  tb->hash[h] = ts;
 }
 
 
-static TaggedString *insert_s (const char *str, long l, stringtable *tb) {
+static TaggedString *insert_s (const char *str, long l,
+                               stringtable *tb, unsigned long h) {
+  int h1 = h%tb->size;
   TaggedString *ts;
-  unsigned long h = hash_s(str, l);
-  int size = tb->size;
-  int j = -1;  /* last empty place found (or -1) */
-  int h1 = h%size;
-  while ((ts = tb->hash[h1]) != NULL) {
-    if (ts == &luaS_EMPTY)
-      j = h1;
-    else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
+  for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
+    if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
       return ts;
-    h1 += (h&(size-2)) + 1;  /* double hashing */
-    if (h1 >= size) h1 -= size;
-  }
   /* not found */
   ts = newone_s(str, l, h);  /* create new entry */
-  if (j != -1)  /* is there an EMPTY space? */
-    tb->hash[j] = ts;  /* use it for new element */
-  else
-    newentry(tb, ts, h1);  /* no EMPTY places; must use a virgin one */
+  newentry(tb, ts, h1);  /* insert it on table */
   return ts;
 }
 
 
 static TaggedString *insert_u (void *buff, int tag, stringtable *tb) {
-  TaggedString *ts;
   unsigned long h = (unsigned long)buff;
-  int size = tb->size;
-  int j = -1;
-  int h1 = h%size;
-  while ((ts = tb->hash[h1]) != NULL) {
-    if (ts == &luaS_EMPTY)
-      j = h1;
-    else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
+  int h1 = h%tb->size;
+  TaggedString *ts;
+  for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
+    if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
       return ts;
-    h1 += (h&(size-2)) + 1;
-    if (h1 >= size) h1 -= size;
-  }
+  /* not found */
   ts = newone_u(buff, tag, h);
-  if (j != -1)
-    tb->hash[j] = ts;
-  else
-    newentry(tb, ts, h1);
+  newentry(tb, ts, h1);
   return ts;
 }
 
@@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) {
 }
 
 TaggedString *luaS_newlstr (const char *str, long l) {
-  int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR;
-  return insert_s(str, l, &L->string_root[t]);
+  unsigned long h = hash_s(str, l);
+  return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h);
 }
 
 TaggedString *luaS_new (const char *str) {
@@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) {
 
 TaggedString *luaS_newfixedstring (const char *str) {
   TaggedString *ts = luaS_new(str);
-  if (ts->marked == 0) ts->marked = 2;  /* avoid GC */
+  if (ts->marked == 0) ts->marked = FIXMARK;  /* avoid GC */
   return ts;
 }
 
@@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) {
 
 void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) {
   ts->u.s.globalval = *newval;
-  if (ts->next == ts) {  /* is not in list? */
-    ts->next = L->rootglobal;
+  if (ts->nextglobal == ts) {  /* is not in list? */
+    ts->nextglobal = L->rootglobal;
     L->rootglobal = ts;
   }
 }

+ 10 - 4
lstring.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lstring.h,v 1.8 1999/08/16 20:52:00 roberto Exp roberto $
+** $Id: lstring.h,v 1.9 1999/10/04 17:51:04 roberto Exp roberto $
 ** String table (keep all strings handled by Lua)
 ** See Copyright Notice in lua.h
 */
@@ -11,12 +11,18 @@
 #include "lobject.h"
 
 
-#define NUM_HASHSTR     31
-#define NUM_HASHUDATA   31
+#define NUM_HASHSTR     31  /* a prime not in array `dimensions' */
+#define NUM_HASHUDATA   31  /* idem */
 #define NUM_HASHS  (NUM_HASHSTR+NUM_HASHUDATA)
 
 
-extern TaggedString luaS_EMPTY;
+/*
+** any taggedstring with mark>=FIXMARK is never collected.
+** Marks>=RESERVEDMARK are used to identify reserved words.
+*/
+#define FIXMARK		2
+#define RESERVEDMARK	3
+
 
 void luaS_init (void);
 TaggedString *luaS_createudata (void *udata, int tag);