Browse Source

Short strings always use all bytes in the hash

Collisions in short strings occurr just by their existence, when
internalizing them. (Collisions in long strings is caused/controlled
by the program, when adding them as keys to the same table.)
Roberto Ierusalimschy 5 năm trước cách đây
mục cha
commit
7288528a1e
3 tập tin đã thay đổi với 10 bổ sung7 xóa
  1. 1 1
      lstate.c
  2. 7 5
      lstring.c
  3. 2 1
      lstring.h

+ 1 - 1
lstate.c

@@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) {
   addbuff(buff, p, &h);  /* local variable */
   addbuff(buff, p, &lua_newstate);  /* public function */
   lua_assert(p == sizeof(buff));
-  return luaS_hash(buff, p, h);
+  return luaS_hash(buff, p, h, 1);
 }
 
 #endif

+ 7 - 5
lstring.c

@@ -23,7 +23,7 @@
 
 
 /*
-** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
+** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to
 ** compute its hash
 */
 #if !defined(LUAI_HASHLIMIT)
@@ -50,9 +50,9 @@ int luaS_eqlngstr (TString *a, TString *b) {
 }
 
 
-unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
+unsigned int luaS_hash (const char *str, size_t l, unsigned int seed,
+                        size_t step) {
   unsigned int h = seed ^ cast_uint(l);
-  size_t step = (l >> LUAI_HASHLIMIT) + 1;
   for (; l >= step; l -= step)
     h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
   return h;
@@ -62,7 +62,9 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
 unsigned int luaS_hashlongstr (TString *ts) {
   lua_assert(ts->tt == LUA_VLNGSTR);
   if (ts->extra == 0) {  /* no hash? */
-    ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash);
+    size_t len = ts->u.lnglen;
+    size_t step = (len >> LUAI_HASHLIMIT) + 1;
+    ts->hash = luaS_hash(getstr(ts), len, ts->hash, step);
     ts->extra = 1;  /* now it has its hash */
   }
   return ts->hash;
@@ -199,7 +201,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
   TString *ts;
   global_State *g = G(L);
   stringtable *tb = &g->strt;
-  unsigned int h = luaS_hash(str, l, g->seed);
+  unsigned int h = luaS_hash(str, l, g->seed, 1);
   TString **list = &tb->hash[lmod(h, tb->size)];
   lua_assert(str != NULL);  /* otherwise 'memcmp'/'memcpy' are undefined */
   for (ts = *list; ts != NULL; ts = ts->u.hnext) {

+ 2 - 1
lstring.h

@@ -37,7 +37,8 @@
 #define eqshrstr(a,b)	check_exp((a)->tt == LUA_VSHRSTR, (a) == (b))
 
 
-LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
+LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l,
+                                  unsigned int seed, size_t step);
 LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts);
 LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);