Browse Source

stb_ds: major string hash fix, minor other changes
- arena and strdup string hashes were badly broken due to not setting up default slot correctly
- tweak use of seed in 4-byte and 8-byte hash functions to hopefully be slightly stronger
- a few internal #ifdefs for performance tuning

Sean Barrett 6 years ago
parent
commit
b8960f32b8
1 changed files with 29 additions and 19 deletions
  1. 29 19
      stb_ds.h

+ 29 - 19
stb_ds.h

@@ -274,10 +274,10 @@ NOTES - HASH MAP
 
   * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel
     and variants, the key must be an lvalue (so the macro can take the address of it).
-    For GCC and clang, extensions are used that eliminate this requirement if you're
-    using C99 and later or using C++.
+    Extensions are used that eliminate this requirement if you're using C99 and later
+    in GCC or clang, or if you're using C++ in GCC.
 
-  * To test for presence of a key in a hashmap, just do 'hmget(foo,key) >= 0'.
+  * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.
 
   * The iteration order of your data in the hashmap is determined solely by the
     order of insertions and deletions. In particular, if you never delete, new
@@ -417,7 +417,7 @@ extern void * stbds_shmode_func(size_t elemsize, int mode);
   #if __clang__
   #define STBDS_ADDRESSOF(typevar, value)     ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value
   #else
-  #define STBDS_ADDRESSOF(typevar, value)     ((typeof(typevar)[]){value}) // literal array decays to pointer to value
+  #define STBDS_ADDRESSOF(typevar, value)     ((typeof(typevar)[1]){value}) // literal array decays to pointer to value
   #endif
 #else
 #define STBDS_ADDRESSOF(typevar, value)     &(value)
@@ -648,10 +648,15 @@ void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)
 // stbds_hm hash table implementation
 //
 
-#define STBDS_CACHE_LINE_SIZE   64
+#ifdef STBDS_INTERNAL_SMALL_BUCKET
+#define STBDS_BUCKET_LENGTH      4
+#else
 #define STBDS_BUCKET_LENGTH      8
-#define STBDS_BUCKET_SHIFT       3
+#endif
+
+#define STBDS_BUCKET_SHIFT      (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)
 #define STBDS_BUCKET_MASK       (STBDS_BUCKET_LENGTH-1)
+#define STBDS_CACHE_LINE_SIZE   64
 
 #define STBDS_ALIGN_FWD(n,a)   (((n) + (a) - 1) & ~((a)-1))
 
@@ -698,13 +703,12 @@ void stbds_rand_seed(size_t seed)
 
 static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)
 {
-  #if 1
-  size_t pos = (hash >> (STBDS_SIZE_T_BITS-slot_log2));
-  STBDS_ASSERT(pos < slot_count);
-  return pos;
-  #else
-  return hash & (slot_count-1);
+  size_t pos;
+  pos = hash & (slot_count-1);
+  #ifdef STBDS_INTERNAL_BUCKET_START
+  pos &= ~STBDS_BUCKET_MASK;
   #endif
+  return pos;
 }
 
 static size_t stbds_log2(size_t slot_count)
@@ -812,7 +816,6 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
           for (;;) {
             size_t limit,z;
             stbds_hash_bucket *bucket;
-            pos &= (t->slot_count-1);
             bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];
             STBDS_STATS(++stbds_rehash_probes);
 
@@ -835,6 +838,7 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind
 
             pos += step;                  // quadratic probing
             step += STBDS_BUCKET_LENGTH;
+            pos &= (t->slot_count-1);
           }
         }
        done:
@@ -939,7 +943,7 @@ static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)
 #ifdef STBDS_SIPHASH_2_4
   return v0^v1^v2^v3;
 #else
-  return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation
+  return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply
 #endif
 }
 
@@ -954,10 +958,11 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
     unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
     #if 0
     // HASH32-A  Bob Jenkin's hash function w/o large constants
-    hash ^= seed ^ len;
+    hash ^= seed;
     hash -= (hash<<6);
     hash ^= (hash>>17);
     hash -= (hash<<9);
+    hash ^= seed;
     hash ^= (hash<<4);
     hash -= (hash<<3);
     hash ^= (hash<<10);
@@ -966,22 +971,24 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
     // HASH32-BB  Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.
     // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm
     // not really sure what's going on.
-    hash ^= seed ^ len;
+    hash ^= seed;
     hash = (hash ^ 61) ^ (hash >> 16);
     hash = hash + (hash << 3);
     hash = hash ^ (hash >> 4);
     hash = hash * 0x27d4eb2d;
+    hash ^= seed;
     hash = hash ^ (hash >> 15);
     #else  // HASH32-C   -  Murmur3
+    hash ^= seed;
     hash *= 0xcc9e2d51;
     hash = (hash << 17) | (hash >> 15);
     hash *= 0x1b873593;
     hash ^= seed;
     hash = (hash << 19) | (hash >> 13);
     hash = hash*5 + 0xe6546b64;
-    hash ^= len;
     hash ^= hash >> 16;
     hash *= 0x85ebca6b;
+    hash ^= seed;
     hash ^= hash >> 13;
     hash *= 0xc2b2ae35;
     hash ^= hash >> 16;
@@ -1006,16 +1013,17 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
   } else if (len == 8 && sizeof(size_t) == 8) {
     size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
     hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4
-    hash ^= seed ^ len;
+    hash ^= seed;
     hash = (~hash) + (hash << 21);
     hash ^= STBDS_ROTATE_RIGHT(hash,24);
     hash *= 265;
     hash ^= STBDS_ROTATE_RIGHT(hash,14);
+    hash ^= seed;
     hash *= 21;
     hash ^= STBDS_ROTATE_RIGHT(hash,28);
     hash += (hash << 31);
     hash = (~hash) + (hash << 18);
-    return hash^seed;
+    return hash;
   } else {
     return stbds_siphash_bytes(p,len,seed);
   }
@@ -1272,6 +1280,8 @@ void * stbds_shmode_func(size_t elemsize, int mode)
 {
   void *a = stbds_arrgrowf(0, elemsize, 0, 1);
   stbds_hash_index *h;
+  memset(a, 0, elemsize);
+  stbds_header(a)->length = 1;
   stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);
   h->string.mode = mode;
   return STBDS_ARR_TO_HASH(a,elemsize);