浏览代码

Hash always use all characters in a long string

Hashes for long strings are computed only when they are used as keys
in a table, not a too common case. And, in that case, it is to easy to
force collisions changing only the characters which are not part of the
hash.
Roberto Ierusalimschy 4 年之前
父节点
当前提交
9a89fb1c9d
共有 4 个文件被更改,包括 6 次插入20 次删除
  1. 1 1
      lstate.c
  2. 4 16
      lstring.c
  3. 1 2
      lstring.h
  4. 0 1
      ltests.c

+ 1 - 1
lstate.c

@@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) {
   addbuff(buff, p, &h);  /* local variable */
   addbuff(buff, p, &lua_newstate);  /* public function */
   lua_assert(p == sizeof(buff));
-  return luaS_hash(buff, p, h, 1);
+  return luaS_hash(buff, p, h);
 }
 
 #endif

+ 4 - 16
lstring.c

@@ -22,16 +22,6 @@
 #include "lstring.h"
 
 
-/*
-** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to
-** compute its hash
-*/
-#if !defined(LUAI_HASHLIMIT)
-#define LUAI_HASHLIMIT		5
-#endif
-
-
-
 /*
 ** Maximum size for string table.
 */
@@ -50,10 +40,9 @@ int luaS_eqlngstr (TString *a, TString *b) {
 }
 
 
-unsigned int luaS_hash (const char *str, size_t l, unsigned int seed,
-                        size_t step) {
+unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
   unsigned int h = seed ^ cast_uint(l);
-  for (; l >= step; l -= step)
+  for (; l > 0; l--)
     h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
   return h;
 }
@@ -63,8 +52,7 @@ unsigned int luaS_hashlongstr (TString *ts) {
   lua_assert(ts->tt == LUA_VLNGSTR);
   if (ts->extra == 0) {  /* no hash? */
     size_t len = ts->u.lnglen;
-    size_t step = (len >> LUAI_HASHLIMIT) + 1;
-    ts->hash = luaS_hash(getstr(ts), len, ts->hash, step);
+    ts->hash = luaS_hash(getstr(ts), len, ts->hash);
     ts->extra = 1;  /* now it has its hash */
   }
   return ts->hash;
@@ -201,7 +189,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
   TString *ts;
   global_State *g = G(L);
   stringtable *tb = &g->strt;
-  unsigned int h = luaS_hash(str, l, g->seed, 1);
+  unsigned int h = luaS_hash(str, l, g->seed);
   TString **list = &tb->hash[lmod(h, tb->size)];
   lua_assert(str != NULL);  /* otherwise 'memcmp'/'memcpy' are undefined */
   for (ts = *list; ts != NULL; ts = ts->u.hnext) {

+ 1 - 2
lstring.h

@@ -41,8 +41,7 @@
 #define eqshrstr(a,b)	check_exp((a)->tt == LUA_VSHRSTR, (a) == (b))
 
 
-LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l,
-                                  unsigned int seed, size_t step);
+LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
 LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts);
 LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
 LUAI_FUNC void luaS_resize (lua_State *L, int newsize);

+ 0 - 1
ltests.c

@@ -523,7 +523,6 @@ static lu_mem checkgraylist (global_State *g, GCObject *o) {
   ((void)g);  /* better to keep it available if we need to print an object */
   while (o) {
     lua_assert(!!isgray(o) ^ (getage(o) == G_TOUCHED2));
-    //lua_assert(isgray(o) || getage(o) == G_TOUCHED2);
     lua_assert(!testbit(o->marked, TESTBIT));
     if (keepinvariant(g))
       l_setbit(o->marked, TESTBIT);  /* mark that object is in a gray list */