12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184 |
- /*
- * simplehash.h
- *
- * When included this file generates a "templated" (by way of macros)
- * open-addressing hash table implementation specialized to user-defined
- * types.
- *
- * It's probably not worthwhile to generate such a specialized implementation
- * for hash tables that aren't performance or space sensitive.
- *
- * Compared to dynahash, simplehash has the following benefits:
- *
- * - Due to the "templated" code generation has known structure sizes and no
- * indirect function calls (which show up substantially in dynahash
- * profiles). These features considerably increase speed for small
- * entries.
- * - Open addressing has better CPU cache behavior than dynahash's chained
- * hashtables.
- * - The generated interface is type-safe and easier to use than dynahash,
- * though at the cost of more complex setup.
- * - Allocates memory in a MemoryContext or another allocator with a
- * malloc/free style interface (which isn't easily usable in a shared
- * memory context)
- * - Does not require the overhead of a separate memory context.
- *
- * Usage notes:
- *
- * To generate a hash-table and associated functions for a use case several
- * macros have to be #define'ed before this file is included. Including
- * the file #undef's all those, so a new hash table can be generated
- * afterwards.
- * The relevant parameters are:
- * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
- * will result in hash table type 'foo_hash' and functions like
- * 'foo_insert'/'foo_lookup' and so forth.
- * - SH_ELEMENT_TYPE - type of the contained elements
- * - SH_KEY_TYPE - type of the hashtable's key
- * - SH_DECLARE - if defined function prototypes and type declarations are
- * generated
- * - SH_DEFINE - if defined function definitions are generated
- * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
- * declarations reside
- * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
- * use this to allocate bytes. The allocator must zero the returned space.
- * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
- * are defined, so you can supply your own
- * The following parameters are only relevant when SH_DEFINE is defined:
- * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
- * - SH_EQUAL(table, a, b) - compare two table keys
- * - SH_HASH_KEY(table, key) - generate hash for the key
- * - SH_STORE_HASH - if defined the hash is stored in the elements
- * - SH_GET_HASH(tb, a) - return the field to store the hash in
- *
- * The element type is required to contain a "status" member that can store
- * the range of values defined in the SH_STATUS enum.
- *
- * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
- * the hash table implementation needs to compare hashes to move elements
- * (particularly when growing the hash), it's preferable, if possible, to
- * store the element's hash in the element's data type. If the hash is so
- * stored, the hash table will also compare hashes before calling SH_EQUAL
- * when comparing two keys.
- *
- * For convenience the hash table create functions accept a void pointer
- * that will be stored in the hash table type's member private_data. This
- * allows callbacks to reference caller provided data.
- *
- * For examples of usage look at tidbitmap.c (file local definition) and
- * execnodes.h/execGrouping.c (exposed declaration, file local
- * implementation).
- *
- * Hash table design:
- *
- * The hash table design chosen is a variant of linear open-addressing. The
- * reason for doing so is that linear addressing is CPU cache & pipeline
- * friendly. The biggest disadvantage of simple linear addressing schemes
- * are highly variable lookup times due to clustering, and deletions
- * leaving a lot of tombstones around. To address these issues a variant
- * of "robin hood" hashing is employed. Robin hood hashing optimizes
- * chaining lengths by moving elements close to their optimal bucket
- * ("rich" elements), out of the way if a to-be-inserted element is further
- * away from its optimal position (i.e. it's "poor"). While that can make
- * insertions slower, the average lookup performance is a lot better, and
- * higher fill factors can be used in a still performant manner. To avoid
- * tombstones - which normally solve the issue that a deleted node's
- * presence is relevant to determine whether a lookup needs to continue
- * looking or is done - buckets following a deleted element are shifted
- * backwards, unless they're empty or already at their optimal position.
- *
- * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/lib/simplehash.h
- */
- #include "port/pg_bitutils.h"
- /* helpers */
- #define SH_MAKE_PREFIX(a) CppConcat(a,_)
- #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
- #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
- /* name macros for: */
- /* type declarations */
- #define SH_TYPE SH_MAKE_NAME(hash)
- #define SH_STATUS SH_MAKE_NAME(status)
- #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
- #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
- #define SH_ITERATOR SH_MAKE_NAME(iterator)
- /* function declarations */
- #define SH_CREATE SH_MAKE_NAME(create)
- #define SH_DESTROY SH_MAKE_NAME(destroy)
- #define SH_RESET SH_MAKE_NAME(reset)
- #define SH_INSERT SH_MAKE_NAME(insert)
- #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
- #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
- #define SH_DELETE SH_MAKE_NAME(delete)
- #define SH_LOOKUP SH_MAKE_NAME(lookup)
- #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
- #define SH_GROW SH_MAKE_NAME(grow)
- #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
- #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
- #define SH_ITERATE SH_MAKE_NAME(iterate)
- #define SH_ALLOCATE SH_MAKE_NAME(allocate)
- #define SH_FREE SH_MAKE_NAME(free)
- #define SH_STAT SH_MAKE_NAME(stat)
- /* internal helper functions (no externally visible prototypes) */
- #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
- #define SH_NEXT SH_MAKE_NAME(next)
- #define SH_PREV SH_MAKE_NAME(prev)
- #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
- #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
- #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
- #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
- #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
- /* generate forward declarations necessary to use the hash table */
- #ifdef SH_DECLARE
- /* type definitions */
- typedef struct SH_TYPE
- {
- /*
- * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
- * tables. Note that the maximum number of elements is lower
- * (SH_MAX_FILLFACTOR)
- */
- uint64 size;
- /* how many elements have valid contents */
- uint32 members;
- /* mask for bucket and size calculations, based on size */
- uint32 sizemask;
- /* boundary after which to grow hashtable */
- uint32 grow_threshold;
- /* hash buckets */
- SH_ELEMENT_TYPE *data;
- #ifndef SH_RAW_ALLOCATOR
- /* memory context to use for allocations */
- MemoryContext ctx;
- #endif
- /* user defined data, useful for callbacks */
- void *private_data;
- } SH_TYPE;
- typedef enum SH_STATUS
- {
- SH_STATUS_EMPTY = 0x00,
- SH_STATUS_IN_USE = 0x01
- } SH_STATUS;
- typedef struct SH_ITERATOR
- {
- uint32 cur; /* current element */
- uint32 end;
- bool done; /* iterator exhausted? */
- } SH_ITERATOR;
- /* externally visible function prototypes */
- #ifdef SH_RAW_ALLOCATOR
- /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
- SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
- #else
- /*
- * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
- * void *private_data)
- */
- SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
- void *private_data);
- #endif
- /* void <prefix>_destroy(<prefix>_hash *tb) */
- SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
- /* void <prefix>_reset(<prefix>_hash *tb) */
- SH_SCOPE void SH_RESET(SH_TYPE * tb);
- /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
- SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
- /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
- SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
- /*
- * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
- * bool *found)
- */
- SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
- uint32 hash, bool *found);
- /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
- SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
- /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
- SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
- uint32 hash);
- /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
- SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
- /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
- SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
- /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
- SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
- /*
- * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
- * uint32 at)
- */
- SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
- /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
- SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
- /* void <prefix>_stat(<prefix>_hash *tb */
- SH_SCOPE void SH_STAT(SH_TYPE * tb);
- #endif /* SH_DECLARE */
- /* generate implementation of the hash table */
- #ifdef SH_DEFINE
- #ifndef SH_RAW_ALLOCATOR
- #include "utils/memutils.h"
- #endif
- /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
- #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
- /* normal fillfactor, unless already close to maximum */
- #ifndef SH_FILLFACTOR
- #define SH_FILLFACTOR (0.9)
- #endif
- /* increase fillfactor if we otherwise would error out */
- #define SH_MAX_FILLFACTOR (0.98)
- /* grow if actual and optimal location bigger than */
- #ifndef SH_GROW_MAX_DIB
- #define SH_GROW_MAX_DIB 25
- #endif
- /* grow if more than elements to move when inserting */
- #ifndef SH_GROW_MAX_MOVE
- #define SH_GROW_MAX_MOVE 150
- #endif
- #ifndef SH_GROW_MIN_FILLFACTOR
- /* but do not grow due to SH_GROW_MAX_* if below */
- #define SH_GROW_MIN_FILLFACTOR 0.1
- #endif
- #ifdef SH_STORE_HASH
- #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
- #else
- #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
- #endif
- /*
- * Wrap the following definitions in include guards, to avoid multiple
- * definition errors if this header is included more than once. The rest of
- * the file deliberately has no include guards, because it can be included
- * with different parameters to define functions and types with non-colliding
- * names.
- */
- #ifndef SIMPLEHASH_H
- #define SIMPLEHASH_H
- #ifdef FRONTEND
- #define sh_error(...) pg_fatal(__VA_ARGS__)
- #define sh_log(...) pg_log_info(__VA_ARGS__)
- #else
- #define sh_error(...) elog(ERROR, __VA_ARGS__)
- #define sh_log(...) elog(LOG, __VA_ARGS__)
- #endif
- #endif
- /*
- * Compute sizing parameters for hashtable. Called when creating and growing
- * the hashtable.
- */
- static inline void
- SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
- {
- uint64 size;
- /* supporting zero sized hashes would complicate matters */
- size = Max(newsize, 2);
- /* round up size to the next power of 2, that's how bucketing works */
- size = pg_nextpower2_64(size);
- Assert(size <= SH_MAX_SIZE);
- /*
- * Verify that allocation of ->data is possible on this platform, without
- * overflowing Size.
- */
- if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
- sh_error("hash table too large");
- /* now set size */
- tb->size = size;
- tb->sizemask = (uint32) (size - 1);
- /*
- * Compute the next threshold at which we need to grow the hash table
- * again.
- */
- if (tb->size == SH_MAX_SIZE)
- tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
- else
- tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
- }
- /* return the optimal bucket for the hash */
- static inline uint32
- SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
- {
- return hash & tb->sizemask;
- }
- /* return next bucket after the current, handling wraparound */
- static inline uint32
- SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
- {
- curelem = (curelem + 1) & tb->sizemask;
- Assert(curelem != startelem);
- return curelem;
- }
- /* return bucket before the current, handling wraparound */
- static inline uint32
- SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
- {
- curelem = (curelem - 1) & tb->sizemask;
- Assert(curelem != startelem);
- return curelem;
- }
- /* return distance between bucket and its optimal position */
- static inline uint32
- SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
- {
- if (optimal <= bucket)
- return bucket - optimal;
- else
- return (tb->size + bucket) - optimal;
- }
- static inline uint32
- SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
- {
- #ifdef SH_STORE_HASH
- return SH_GET_HASH(tb, entry);
- #else
- return SH_HASH_KEY(tb, entry->SH_KEY);
- #endif
- }
- /* default memory allocator function */
- static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
- static inline void SH_FREE(SH_TYPE * type, void *pointer);
- #ifndef SH_USE_NONDEFAULT_ALLOCATOR
- /* default memory allocator function */
- static inline void *
- SH_ALLOCATE(SH_TYPE * type, Size size)
- {
- #ifdef SH_RAW_ALLOCATOR
- return SH_RAW_ALLOCATOR(size);
- #else
- return MemoryContextAllocExtended(type->ctx, size,
- MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
- #endif
- }
- /* default memory free function */
- static inline void
- SH_FREE(SH_TYPE * type, void *pointer)
- {
- pfree(pointer);
- }
- #endif
- /*
- * Create a hash table with enough space for `nelements` distinct members.
- * Memory for the hash table is allocated from the passed-in context. If
- * desired, the array of elements can be allocated using a passed-in allocator;
- * this could be useful in order to place the array of elements in a shared
- * memory, or in a context that will outlive the rest of the hash table.
- * Memory other than for the array of elements will still be allocated from
- * the passed-in context.
- */
- #ifdef SH_RAW_ALLOCATOR
- SH_SCOPE SH_TYPE *
- SH_CREATE(uint32 nelements, void *private_data)
- #else
- SH_SCOPE SH_TYPE *
- SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
- #endif
- {
- SH_TYPE *tb;
- uint64 size;
- #ifdef SH_RAW_ALLOCATOR
- tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
- #else
- tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
- tb->ctx = ctx;
- #endif
- tb->private_data = private_data;
- /* increase nelements by fillfactor, want to store nelements elements */
- size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
- SH_COMPUTE_PARAMETERS(tb, size);
- tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
- return tb;
- }
- /* destroy a previously created hash table */
- SH_SCOPE void
- SH_DESTROY(SH_TYPE * tb)
- {
- SH_FREE(tb, tb->data);
- pfree(tb);
- }
- /* reset the contents of a previously created hash table */
- SH_SCOPE void
- SH_RESET(SH_TYPE * tb)
- {
- memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
- tb->members = 0;
- }
- /*
- * Grow a hash table to at least `newsize` buckets.
- *
- * Usually this will automatically be called by insertions/deletions, when
- * necessary. But resizing to the exact input size can be advantageous
- * performance-wise, when known at some point.
- */
- SH_SCOPE void
- SH_GROW(SH_TYPE * tb, uint64 newsize)
- {
- uint64 oldsize = tb->size;
- SH_ELEMENT_TYPE *olddata = tb->data;
- SH_ELEMENT_TYPE *newdata;
- uint32 i;
- uint32 startelem = 0;
- uint32 copyelem;
- Assert(oldsize == pg_nextpower2_64(oldsize));
- Assert(oldsize != SH_MAX_SIZE);
- Assert(oldsize < newsize);
- /* compute parameters for new table */
- SH_COMPUTE_PARAMETERS(tb, newsize);
- tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
- newdata = tb->data;
- /*
- * Copy entries from the old data to newdata. We theoretically could use
- * SH_INSERT here, to avoid code duplication, but that's more general than
- * we need. We neither want tb->members increased, nor do we need to do
- * deal with deleted elements, nor do we need to compare keys. So a
- * special-cased implementation is lot faster. As resizing can be time
- * consuming and frequent, that's worthwhile to optimize.
- *
- * To be able to simply move entries over, we have to start not at the
- * first bucket (i.e olddata[0]), but find the first bucket that's either
- * empty, or is occupied by an entry at its optimal position. Such a
- * bucket has to exist in any table with a load factor under 1, as not all
- * buckets are occupied, i.e. there always has to be an empty bucket. By
- * starting at such a bucket we can move the entries to the larger table,
- * without having to deal with conflicts.
- */
- /* search for the first element in the hash that's not wrapped around */
- for (i = 0; i < oldsize; i++)
- {
- SH_ELEMENT_TYPE *oldentry = &olddata[i];
- uint32 hash;
- uint32 optimal;
- if (oldentry->status != SH_STATUS_IN_USE)
- {
- startelem = i;
- break;
- }
- hash = SH_ENTRY_HASH(tb, oldentry);
- optimal = SH_INITIAL_BUCKET(tb, hash);
- if (optimal == i)
- {
- startelem = i;
- break;
- }
- }
- /* and copy all elements in the old table */
- copyelem = startelem;
- for (i = 0; i < oldsize; i++)
- {
- SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
- if (oldentry->status == SH_STATUS_IN_USE)
- {
- uint32 hash;
- uint32 startelem;
- uint32 curelem;
- SH_ELEMENT_TYPE *newentry;
- hash = SH_ENTRY_HASH(tb, oldentry);
- startelem = SH_INITIAL_BUCKET(tb, hash);
- curelem = startelem;
- /* find empty element to put data into */
- while (true)
- {
- newentry = &newdata[curelem];
- if (newentry->status == SH_STATUS_EMPTY)
- {
- break;
- }
- curelem = SH_NEXT(tb, curelem, startelem);
- }
- /* copy entry to new slot */
- memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
- }
- /* can't use SH_NEXT here, would use new size */
- copyelem++;
- if (copyelem >= oldsize)
- {
- copyelem = 0;
- }
- }
- SH_FREE(tb, olddata);
- }
- /*
- * This is a separate static inline function, so it can be reliably be inlined
- * into its wrapper functions even if SH_SCOPE is extern.
- */
- static inline SH_ELEMENT_TYPE *
- SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
- {
- uint32 startelem;
- uint32 curelem;
- SH_ELEMENT_TYPE *data;
- uint32 insertdist;
- restart:
- insertdist = 0;
- /*
- * We do the grow check even if the key is actually present, to avoid
- * doing the check inside the loop. This also lets us avoid having to
- * re-find our position in the hashtable after resizing.
- *
- * Note that this also reached when resizing the table due to
- * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
- */
- if (unlikely(tb->members >= tb->grow_threshold))
- {
- if (unlikely(tb->size == SH_MAX_SIZE))
- sh_error("hash table size exceeded");
- /*
- * When optimizing, it can be very useful to print these out.
- */
- /* SH_STAT(tb); */
- SH_GROW(tb, tb->size * 2);
- /* SH_STAT(tb); */
- }
- /* perform insert, start bucket search at optimal location */
- data = tb->data;
- startelem = SH_INITIAL_BUCKET(tb, hash);
- curelem = startelem;
- while (true)
- {
- uint32 curdist;
- uint32 curhash;
- uint32 curoptimal;
- SH_ELEMENT_TYPE *entry = &data[curelem];
- /* any empty bucket can directly be used */
- if (entry->status == SH_STATUS_EMPTY)
- {
- tb->members++;
- entry->SH_KEY = key;
- #ifdef SH_STORE_HASH
- SH_GET_HASH(tb, entry) = hash;
- #endif
- entry->status = SH_STATUS_IN_USE;
- *found = false;
- return entry;
- }
- /*
- * If the bucket is not empty, we either found a match (in which case
- * we're done), or we have to decide whether to skip over or move the
- * colliding entry. When the colliding element's distance to its
- * optimal position is smaller than the to-be-inserted entry's, we
- * shift the colliding entry (and its followers) forward by one.
- */
- if (SH_COMPARE_KEYS(tb, hash, key, entry))
- {
- Assert(entry->status == SH_STATUS_IN_USE);
- *found = true;
- return entry;
- }
- curhash = SH_ENTRY_HASH(tb, entry);
- curoptimal = SH_INITIAL_BUCKET(tb, curhash);
- curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
- if (insertdist > curdist)
- {
- SH_ELEMENT_TYPE *lastentry = entry;
- uint32 emptyelem = curelem;
- uint32 moveelem;
- int32 emptydist = 0;
- /* find next empty bucket */
- while (true)
- {
- SH_ELEMENT_TYPE *emptyentry;
- emptyelem = SH_NEXT(tb, emptyelem, startelem);
- emptyentry = &data[emptyelem];
- if (emptyentry->status == SH_STATUS_EMPTY)
- {
- lastentry = emptyentry;
- break;
- }
- /*
- * To avoid negative consequences from overly imbalanced
- * hashtables, grow the hashtable if collisions would require
- * us to move a lot of entries. The most likely cause of such
- * imbalance is filling a (currently) small table, from a
- * currently big one, in hash-table order. Don't grow if the
- * hashtable would be too empty, to prevent quick space
- * explosion for some weird edge cases.
- */
- if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
- ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
- {
- tb->grow_threshold = 0;
- goto restart;
- }
- }
- /* shift forward, starting at last occupied element */
- /*
- * TODO: This could be optimized to be one memcpy in many cases,
- * excepting wrapping around at the end of ->data. Hasn't shown up
- * in profiles so far though.
- */
- moveelem = emptyelem;
- while (moveelem != curelem)
- {
- SH_ELEMENT_TYPE *moveentry;
- moveelem = SH_PREV(tb, moveelem, startelem);
- moveentry = &data[moveelem];
- memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
- lastentry = moveentry;
- }
- /* and fill the now empty spot */
- tb->members++;
- entry->SH_KEY = key;
- #ifdef SH_STORE_HASH
- SH_GET_HASH(tb, entry) = hash;
- #endif
- entry->status = SH_STATUS_IN_USE;
- *found = false;
- return entry;
- }
- curelem = SH_NEXT(tb, curelem, startelem);
- insertdist++;
- /*
- * To avoid negative consequences from overly imbalanced hashtables,
- * grow the hashtable if collisions lead to large runs. The most
- * likely cause of such imbalance is filling a (currently) small
- * table, from a currently big one, in hash-table order. Don't grow
- * if the hashtable would be too empty, to prevent quick space
- * explosion for some weird edge cases.
- */
- if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
- ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
- {
- tb->grow_threshold = 0;
- goto restart;
- }
- }
- }
- /*
- * Insert the key key into the hash-table, set *found to true if the key
- * already exists, false otherwise. Returns the hash-table entry in either
- * case.
- */
- SH_SCOPE SH_ELEMENT_TYPE *
- SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
- {
- uint32 hash = SH_HASH_KEY(tb, key);
- return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
- }
- /*
- * Insert the key key into the hash-table using an already-calculated
- * hash. Set *found to true if the key already exists, false
- * otherwise. Returns the hash-table entry in either case.
- */
- SH_SCOPE SH_ELEMENT_TYPE *
- SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
- {
- return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
- }
- /*
- * This is a separate static inline function, so it can be reliably be inlined
- * into its wrapper functions even if SH_SCOPE is extern.
- */
- static inline SH_ELEMENT_TYPE *
- SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
- {
- const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
- uint32 curelem = startelem;
- while (true)
- {
- SH_ELEMENT_TYPE *entry = &tb->data[curelem];
- if (entry->status == SH_STATUS_EMPTY)
- {
- return NULL;
- }
- Assert(entry->status == SH_STATUS_IN_USE);
- if (SH_COMPARE_KEYS(tb, hash, key, entry))
- return entry;
- /*
- * TODO: we could stop search based on distance. If the current
- * buckets's distance-from-optimal is smaller than what we've skipped
- * already, the entry doesn't exist. Probably only do so if
- * SH_STORE_HASH is defined, to avoid re-computing hashes?
- */
- curelem = SH_NEXT(tb, curelem, startelem);
- }
- }
- /*
- * Lookup up entry in hash table. Returns NULL if key not present.
- */
- SH_SCOPE SH_ELEMENT_TYPE *
- SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
- {
- uint32 hash = SH_HASH_KEY(tb, key);
- return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
- }
- /*
- * Lookup up entry in hash table using an already-calculated hash.
- *
- * Returns NULL if key not present.
- */
- SH_SCOPE SH_ELEMENT_TYPE *
- SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
- {
- return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
- }
- /*
- * Delete entry from hash table by key. Returns whether to-be-deleted key was
- * present.
- */
- SH_SCOPE bool
- SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
- {
- uint32 hash = SH_HASH_KEY(tb, key);
- uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
- uint32 curelem = startelem;
- while (true)
- {
- SH_ELEMENT_TYPE *entry = &tb->data[curelem];
- if (entry->status == SH_STATUS_EMPTY)
- return false;
- if (entry->status == SH_STATUS_IN_USE &&
- SH_COMPARE_KEYS(tb, hash, key, entry))
- {
- SH_ELEMENT_TYPE *lastentry = entry;
- tb->members--;
- /*
- * Backward shift following elements till either an empty element
- * or an element at its optimal position is encountered.
- *
- * While that sounds expensive, the average chain length is short,
- * and deletions would otherwise require tombstones.
- */
- while (true)
- {
- SH_ELEMENT_TYPE *curentry;
- uint32 curhash;
- uint32 curoptimal;
- curelem = SH_NEXT(tb, curelem, startelem);
- curentry = &tb->data[curelem];
- if (curentry->status != SH_STATUS_IN_USE)
- {
- lastentry->status = SH_STATUS_EMPTY;
- break;
- }
- curhash = SH_ENTRY_HASH(tb, curentry);
- curoptimal = SH_INITIAL_BUCKET(tb, curhash);
- /* current is at optimal position, done */
- if (curoptimal == curelem)
- {
- lastentry->status = SH_STATUS_EMPTY;
- break;
- }
- /* shift */
- memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
- lastentry = curentry;
- }
- return true;
- }
- /* TODO: return false; if distance too big */
- curelem = SH_NEXT(tb, curelem, startelem);
- }
- }
- /*
- * Delete entry from hash table by entry pointer
- */
- SH_SCOPE void
- SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
- {
- SH_ELEMENT_TYPE *lastentry = entry;
- uint32 hash = SH_ENTRY_HASH(tb, entry);
- uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
- uint32 curelem;
- /* Calculate the index of 'entry' */
- curelem = entry - &tb->data[0];
- tb->members--;
- /*
- * Backward shift following elements till either an empty element or an
- * element at its optimal position is encountered.
- *
- * While that sounds expensive, the average chain length is short, and
- * deletions would otherwise require tombstones.
- */
- while (true)
- {
- SH_ELEMENT_TYPE *curentry;
- uint32 curhash;
- uint32 curoptimal;
- curelem = SH_NEXT(tb, curelem, startelem);
- curentry = &tb->data[curelem];
- if (curentry->status != SH_STATUS_IN_USE)
- {
- lastentry->status = SH_STATUS_EMPTY;
- break;
- }
- curhash = SH_ENTRY_HASH(tb, curentry);
- curoptimal = SH_INITIAL_BUCKET(tb, curhash);
- /* current is at optimal position, done */
- if (curoptimal == curelem)
- {
- lastentry->status = SH_STATUS_EMPTY;
- break;
- }
- /* shift */
- memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
- lastentry = curentry;
- }
- }
- /*
- * Initialize iterator.
- */
- SH_SCOPE void
- SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
- {
- uint64 startelem = PG_UINT64_MAX;
- /*
- * Search for the first empty element. As deletions during iterations are
- * supported, we want to start/end at an element that cannot be affected
- * by elements being shifted.
- */
- for (uint32 i = 0; i < tb->size; i++)
- {
- SH_ELEMENT_TYPE *entry = &tb->data[i];
- if (entry->status != SH_STATUS_IN_USE)
- {
- startelem = i;
- break;
- }
- }
- /* we should have found an empty element */
- Assert(startelem < SH_MAX_SIZE);
- /*
- * Iterate backwards, that allows the current element to be deleted, even
- * if there are backward shifts
- */
- iter->cur = startelem;
- iter->end = iter->cur;
- iter->done = false;
- }
- /*
- * Initialize iterator to a specific bucket. That's really only useful for
- * cases where callers are partially iterating over the hashspace, and that
- * iteration deletes and inserts elements based on visited entries. Doing that
- * repeatedly could lead to an unbalanced keyspace when always starting at the
- * same position.
- */
- SH_SCOPE void
- SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
- {
- /*
- * Iterate backwards, that allows the current element to be deleted, even
- * if there are backward shifts.
- */
- iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
- iter->end = iter->cur;
- iter->done = false;
- }
- /*
- * Iterate over all entries in the hash-table. Return the next occupied entry,
- * or NULL if done.
- *
- * During iteration the current entry in the hash table may be deleted,
- * without leading to elements being skipped or returned twice. Additionally
- * the rest of the table may be modified (i.e. there can be insertions or
- * deletions), but if so, there's neither a guarantee that all nodes are
- * visited at least once, nor a guarantee that a node is visited at most once.
- */
- SH_SCOPE SH_ELEMENT_TYPE *
- SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
- {
- while (!iter->done)
- {
- SH_ELEMENT_TYPE *elem;
- elem = &tb->data[iter->cur];
- /* next element in backward direction */
- iter->cur = (iter->cur - 1) & tb->sizemask;
- if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
- iter->done = true;
- if (elem->status == SH_STATUS_IN_USE)
- {
- return elem;
- }
- }
- return NULL;
- }
- /*
- * Report some statistics about the state of the hashtable. For
- * debugging/profiling purposes only.
- */
- SH_SCOPE void
- SH_STAT(SH_TYPE * tb)
- {
- uint32 max_chain_length = 0;
- uint32 total_chain_length = 0;
- double avg_chain_length;
- double fillfactor;
- uint32 i;
- uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
- uint32 total_collisions = 0;
- uint32 max_collisions = 0;
- double avg_collisions;
- for (i = 0; i < tb->size; i++)
- {
- uint32 hash;
- uint32 optimal;
- uint32 dist;
- SH_ELEMENT_TYPE *elem;
- elem = &tb->data[i];
- if (elem->status != SH_STATUS_IN_USE)
- continue;
- hash = SH_ENTRY_HASH(tb, elem);
- optimal = SH_INITIAL_BUCKET(tb, hash);
- dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
- if (dist > max_chain_length)
- max_chain_length = dist;
- total_chain_length += dist;
- collisions[optimal]++;
- }
- for (i = 0; i < tb->size; i++)
- {
- uint32 curcoll = collisions[i];
- if (curcoll == 0)
- continue;
- /* single contained element is not a collision */
- curcoll--;
- total_collisions += curcoll;
- if (curcoll > max_collisions)
- max_collisions = curcoll;
- }
- if (tb->members > 0)
- {
- fillfactor = tb->members / ((double) tb->size);
- avg_chain_length = ((double) total_chain_length) / tb->members;
- avg_collisions = ((double) total_collisions) / tb->members;
- }
- else
- {
- fillfactor = 0;
- avg_chain_length = 0;
- avg_collisions = 0;
- }
- sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
- tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
- total_collisions, max_collisions, avg_collisions);
- }
- #endif /* SH_DEFINE */
- /* undefine external parameters, so next hash table can be defined */
- #undef SH_PREFIX
- #undef SH_KEY_TYPE
- #undef SH_KEY
- #undef SH_ELEMENT_TYPE
- #undef SH_HASH_KEY
- #undef SH_SCOPE
- #undef SH_DECLARE
- #undef SH_DEFINE
- #undef SH_GET_HASH
- #undef SH_STORE_HASH
- #undef SH_USE_NONDEFAULT_ALLOCATOR
- #undef SH_EQUAL
- /* undefine locally declared macros */
- #undef SH_MAKE_PREFIX
- #undef SH_MAKE_NAME
- #undef SH_MAKE_NAME_
- #undef SH_FILLFACTOR
- #undef SH_MAX_FILLFACTOR
- #undef SH_GROW_MAX_DIB
- #undef SH_GROW_MAX_MOVE
- #undef SH_GROW_MIN_FILLFACTOR
- #undef SH_MAX_SIZE
- /* types */
- #undef SH_TYPE
- #undef SH_STATUS
- #undef SH_STATUS_EMPTY
- #undef SH_STATUS_IN_USE
- #undef SH_ITERATOR
- /* external function names */
- #undef SH_CREATE
- #undef SH_DESTROY
- #undef SH_RESET
- #undef SH_INSERT
- #undef SH_INSERT_HASH
- #undef SH_DELETE_ITEM
- #undef SH_DELETE
- #undef SH_LOOKUP
- #undef SH_LOOKUP_HASH
- #undef SH_GROW
- #undef SH_START_ITERATE
- #undef SH_START_ITERATE_AT
- #undef SH_ITERATE
- #undef SH_ALLOCATE
- #undef SH_FREE
- #undef SH_STAT
- /* internal function names */
- #undef SH_COMPUTE_PARAMETERS
- #undef SH_COMPARE_KEYS
- #undef SH_INITIAL_BUCKET
- #undef SH_NEXT
- #undef SH_PREV
- #undef SH_DISTANCE_FROM_OPTIMAL
- #undef SH_ENTRY_HASH
- #undef SH_INSERT_HASH_INTERNAL
- #undef SH_LOOKUP_HASH_INTERNAL
|