MultiHashSet.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #pragma once
  2. #include "../Common.h"
  3. #include "Array.h"
  4. NS_BF_BEGIN;
  5. struct MultiHashSetFuncs
  6. {
  7. void* Allocate(intptr size, intptr align)
  8. {
  9. return malloc(size);
  10. }
  11. void* AllocateZero(intptr size, intptr align)
  12. {
  13. void* mem = malloc(size);
  14. memset(mem, 0, size);
  15. return mem;
  16. }
  17. void Deallocate(void* ptr)
  18. {
  19. free(ptr);
  20. }
  21. bool DeallocateAll()
  22. {
  23. return false;
  24. }
  25. };
  26. template <typename T, typename TFuncs = AllocatorCLib >
  27. class MultiHashSet : public TFuncs
  28. {
  29. public:
  30. struct Entry
  31. {
  32. T mValue;
  33. int mNext;
  34. int mHashCode;
  35. };
  36. struct EntryRef
  37. {
  38. public:
  39. MultiHashSet* mSet;
  40. int mIndex;
  41. public:
  42. EntryRef()
  43. {
  44. mSet = NULL;
  45. mIndex = -1;
  46. }
  47. EntryRef(MultiHashSet* set, int index)
  48. {
  49. mSet = set;
  50. mIndex = index;
  51. }
  52. Entry* operator*()
  53. {
  54. return &this->mSet->mEntries[this->mIndex];
  55. }
  56. Entry* operator->()
  57. {
  58. return &this->mSet->mEntries[this->mIndex];
  59. }
  60. operator bool() const
  61. {
  62. return this->mIndex != -1;
  63. }
  64. };
  65. struct Iterator
  66. {
  67. public:
  68. MultiHashSet* mSet;
  69. int mCurEntry;
  70. int mCurBucket;
  71. public:
  72. Iterator(MultiHashSet* set)
  73. {
  74. this->mSet = set;
  75. this->mCurBucket = 0;
  76. this->mCurEntry = -1;
  77. }
  78. Iterator& operator++()
  79. {
  80. if (this->mCurEntry != -1)
  81. {
  82. this->mCurEntry = this->mSet->mEntries[this->mCurEntry].mNext;
  83. if (this->mCurEntry != -1)
  84. return *this;
  85. this->mCurBucket++;
  86. }
  87. if (mSet->mHashHeads == NULL)
  88. {
  89. this->mCurBucket = this->mSet->mHashSize;
  90. return *this; // At end
  91. }
  92. while (this->mCurBucket < mSet->mHashSize)
  93. {
  94. this->mCurEntry = this->mSet->mHashHeads[mCurBucket];
  95. if (this->mCurEntry != -1)
  96. return *this;
  97. this->mCurBucket++;
  98. }
  99. return *this; // At end
  100. }
  101. T operator*()
  102. {
  103. return this->mSet->mEntries[this->mCurEntry].mValue;
  104. }
  105. T operator->()
  106. {
  107. return this->mSet->mEntries[this->mCurEntry].mValue;
  108. }
  109. bool operator!=(const Iterator& itr) const
  110. {
  111. return ((itr.mCurEntry != this->mCurEntry) || (itr.mCurBucket != this->mCurBucket));
  112. }
  113. operator bool() const
  114. {
  115. return this->mCurEntry != -1;
  116. }
  117. void MoveToNextHashMatch()
  118. {
  119. int wantHash = this->mSet->mEntries[this->mCurEntry].mHashCode;
  120. do
  121. {
  122. this->mCurEntry = this->mSet->mEntries[this->mCurEntry].mNext;
  123. }
  124. while ((this->mCurEntry != -1) && (this->mSet->mEntries[this->mCurEntry].mHashCode != wantHash));
  125. if (this->mCurEntry == -1)
  126. this->mCurBucket = mSet->mHashSize;
  127. }
  128. };
  129. protected:
  130. int GetPrimeish(int min)
  131. {
  132. // This is a minimal effort to help address-aligned dataa
  133. return (min | 1);
  134. }
  135. int ExpandSize(int oldSize)
  136. {
  137. int newSize = 2 * oldSize;
  138. // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
  139. // Note that this check works even when mAllocSize overflowed thanks to the (uint) cast
  140. /*if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
  141. {
  142. Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
  143. return MaxPrimeArrayLength;
  144. }*/
  145. return GetPrimeish(newSize);
  146. }
  147. void ResizeEntries()
  148. {
  149. ResizeEntries(ExpandSize(mCount));
  150. }
  151. void ResizeEntries(int newSize)
  152. {
  153. BF_ASSERT(newSize >= mAllocSize);
  154. Entry* newEntries = TFuncs::template allocate<Entry>(newSize);
  155. for (int i = 0; i < mCount; i++)
  156. {
  157. auto& newEntry = newEntries[i];
  158. auto& oldEntry = mEntries[i];
  159. newEntry.mHashCode = oldEntry.mHashCode;
  160. newEntry.mNext = oldEntry.mNext;
  161. new (&newEntry.mValue) T(std::move(*(T*)&oldEntry.mValue));
  162. }
  163. for (int i = mCount; i < newSize; i++)
  164. {
  165. newEntries[i].mHashCode = -1;
  166. }
  167. TFuncs::deallocate(mEntries);
  168. mEntries = newEntries;
  169. mAllocSize = (int)newSize;
  170. }
  171. void FreeIdx(int entryIdx)
  172. {
  173. this->mEntries[entryIdx].mNext = this->mFreeList;
  174. this->mFreeList = entryIdx;
  175. this->mFreeCount++;
  176. }
  177. int AllocEntry()
  178. {
  179. int index;
  180. if (this->mFreeCount > 0)
  181. {
  182. index = this->mFreeList;
  183. this->mFreeList = this->mEntries[index].mNext;
  184. this->mFreeCount--;
  185. }
  186. else
  187. {
  188. if (this->mCount == this->mAllocSize)
  189. ResizeEntries();
  190. index = mCount;
  191. this->mCount++;
  192. }
  193. return index;
  194. }
  195. public:
  196. int* mHashHeads;
  197. int mAllocSize;
  198. Entry* mEntries;
  199. int mFreeList;
  200. int mFreeCount;
  201. static const int cDefaultHashSize = 17;
  202. int mHashSize;
  203. int mCount;
  204. MultiHashSet()
  205. {
  206. this->mHashHeads = NULL;
  207. this->mHashSize = cDefaultHashSize;
  208. this->mEntries = NULL;
  209. this->mAllocSize = 0;
  210. this->mCount = 0;
  211. this->mFreeList = -1;
  212. this->mFreeCount = 0;
  213. }
  214. ~MultiHashSet()
  215. {
  216. this->Clear();
  217. }
  218. void EnsureFreeCount(int wantFreeCount)
  219. {
  220. int freeCount = mFreeCount + (mAllocSize - mCount);
  221. if (freeCount >= wantFreeCount)
  222. return;
  223. ResizeEntries(BF_MAX(ExpandSize(mCount), mAllocSize + wantFreeCount - freeCount));
  224. }
  225. int GetCount() const
  226. {
  227. return mCount - mFreeCount;
  228. }
  229. int size() const
  230. {
  231. return mCount - mFreeCount;
  232. }
  233. EntryRef AddRaw(int hash)
  234. {
  235. if (this->mHashHeads == NULL)
  236. {
  237. this->mHashHeads = TFuncs::template allocate<int>(mHashSize);
  238. memset(this->mHashHeads, -1, sizeof(int) * mHashSize);
  239. }
  240. int index = AllocEntry();
  241. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  242. int headEntry = this->mHashHeads[hashIdx];
  243. Entry* newEntry = &mEntries[index];
  244. newEntry->mValue = T();
  245. newEntry->mNext = headEntry;
  246. newEntry->mHashCode = hash;
  247. mHashHeads[hashIdx] = index;
  248. return EntryRef(this, index);
  249. }
  250. void Add(T value)
  251. {
  252. if (this->mHashHeads == NULL)
  253. {
  254. this->mHashHeads = TFuncs::template allocate<int>(mHashSize);
  255. memset(this->mHashHeads, -1, sizeof(int) * mHashSize);
  256. }
  257. int index = AllocEntry();
  258. int hash = TFuncs::GetHash(value);
  259. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  260. int headEntry = this->mHashHeads[hashIdx];
  261. Entry* newEntry = &mEntries[index];
  262. newEntry->mValue = value;
  263. newEntry->mNext = headEntry;
  264. newEntry->mHashCode = hash;
  265. mHashHeads[hashIdx] = index;
  266. }
  267. void AddAfter(T value, Entry* afterEntry)
  268. {
  269. int hash = TFuncs::GetHash(value);
  270. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  271. BF_ASSERT(hash == afterEntry->mHashCode);
  272. int index = AllocEntry();
  273. Entry* newEntry = &mEntries[index];
  274. newEntry->mValue = value;
  275. newEntry->mNext = afterEntry->mNext;
  276. newEntry->mHashCode = hash;
  277. afterEntry->mNext = index;
  278. }
  279. void Rehash(int newHashSize)
  280. {
  281. auto newHashHeads = TFuncs::template allocate<int>(newHashSize);
  282. memset(newHashHeads, -1, sizeof(int) * newHashSize);
  283. if (mHashHeads != NULL)
  284. {
  285. SizedArray<int, 1024> entryList;
  286. for (int hashIdx = 0; hashIdx < mHashSize; hashIdx++)
  287. {
  288. int checkEntryIdx = mHashHeads[hashIdx];
  289. if (checkEntryIdx != -1)
  290. {
  291. // We want to keep elements with equal hashes in their insert order so we need to
  292. // iterate through the linked list in reverse
  293. entryList.Clear();
  294. while (checkEntryIdx != -1)
  295. {
  296. entryList.Add(checkEntryIdx);
  297. checkEntryIdx = mEntries[checkEntryIdx].mNext;
  298. }
  299. for (int i = (int)entryList.mSize - 1; i >= 0; i--)
  300. {
  301. int checkEntryIdx = entryList[i];
  302. auto checkEntry = &mEntries[checkEntryIdx];
  303. int newHashIdx = (checkEntry->mHashCode & 0x7FFFFFFF) % newHashSize;
  304. checkEntry->mNext = newHashHeads[newHashIdx];
  305. newHashHeads[newHashIdx] = checkEntryIdx;
  306. }
  307. }
  308. }
  309. TFuncs::deallocate(mHashHeads);
  310. }
  311. mHashHeads = newHashHeads;
  312. mHashSize = newHashSize;
  313. }
  314. void CheckRehash()
  315. {
  316. // Make the lookup load reasonable
  317. if (mHashSize < mCount)
  318. {
  319. this->Rehash(BF_MAX(mCount, (int)(mHashSize * 1.5f)) | 1);
  320. }
  321. }
  322. template <typename TKey>
  323. bool TryGet(const TKey& key, T* val)
  324. {
  325. if (mHashHeads == NULL)
  326. return false;
  327. this->CheckRehash();
  328. int hash = TFuncs::GetHash(key);
  329. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  330. int checkEntryIdx = this->mHashHeads[hashIdx];
  331. while (checkEntryIdx != -1)
  332. {
  333. Entry* checkEntry = &mEntries[checkEntryIdx];
  334. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  335. {
  336. if (val != NULL)
  337. *val = checkEntry->mValue;
  338. return true;
  339. }
  340. checkEntryIdx = checkEntry->mNext;
  341. }
  342. return false;
  343. }
  344. template <typename TKey>
  345. Iterator TryGet(const TKey& key)
  346. {
  347. if (mHashHeads == NULL)
  348. return end();
  349. this->CheckRehash();
  350. int hash = TFuncs::GetHash(key);
  351. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  352. int checkEntryIdx = this->mHashHeads[hashIdx];
  353. while (checkEntryIdx != -1)
  354. {
  355. auto checkEntry = &this->mEntries[checkEntryIdx];
  356. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  357. {
  358. Iterator itr(this);
  359. itr.mCurEntry = checkEntryIdx;
  360. itr.mCurBucket = hashIdx;
  361. return itr;
  362. }
  363. checkEntryIdx = checkEntry->mNext;
  364. }
  365. return end();
  366. }
  367. template <typename TKey>
  368. bool Remove(const TKey& key)
  369. {
  370. if (mHashHeads == NULL)
  371. return false;
  372. this->CheckRehash();
  373. int hash = TFuncs::GetHash(key);
  374. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  375. int* srcCheckEntryPtr = &this->mHashHeads[hashIdx];
  376. int checkEntryIdx = *srcCheckEntryPtr;
  377. while (checkEntryIdx != -1)
  378. {
  379. auto checkEntry = &mEntries[checkEntryIdx];
  380. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  381. {
  382. *srcCheckEntryPtr = checkEntry->mNext;
  383. FreeIdx(checkEntryIdx);
  384. return true;
  385. }
  386. srcCheckEntryPtr = &checkEntry->mNext;
  387. checkEntryIdx = checkEntry->mNext;
  388. }
  389. return false;
  390. }
  391. Iterator Erase(const Iterator& itr)
  392. {
  393. Iterator next = itr;
  394. ++next;
  395. bool found = false;
  396. auto entryIdx = itr.mCurEntry;
  397. auto entry = &mEntries[entryIdx];
  398. int hashIdx = (entry->mHashCode & 0x7FFFFFFF) % this->mHashSize;
  399. int* srcCheckEntryPtr = &this->mHashHeads[hashIdx];
  400. int checkEntryIdx = *srcCheckEntryPtr;
  401. while (checkEntryIdx != -1)
  402. {
  403. auto checkEntry = &mEntries[checkEntryIdx];
  404. if (checkEntryIdx == itr.mCurEntry)
  405. {
  406. *srcCheckEntryPtr = checkEntry->mNext;
  407. found = true;
  408. }
  409. srcCheckEntryPtr = &checkEntry->mNext;
  410. checkEntryIdx = checkEntry->mNext;
  411. }
  412. BF_ASSERT(found);
  413. FreeIdx(entryIdx);
  414. return next;
  415. }
  416. void Clear()
  417. {
  418. if (!TFuncs::deallocateAll())
  419. {
  420. auto itr = begin();
  421. auto endItr = end();
  422. while (itr != endItr)
  423. {
  424. auto entry = itr.mCurEntry;
  425. ++itr;
  426. FreeIdx(entry);
  427. }
  428. TFuncs::deallocate(this->mHashHeads);
  429. TFuncs::deallocate(this->mEntries);
  430. }
  431. this->mHashSize = cDefaultHashSize;
  432. this->mHashHeads = NULL;
  433. this->mEntries = NULL;
  434. this->mCount = 0;
  435. }
  436. Iterator begin()
  437. {
  438. return ++Iterator(this);
  439. }
  440. Iterator end()
  441. {
  442. Iterator itr(this);
  443. itr.mCurBucket = this->mHashSize;
  444. return itr;
  445. }
  446. };
  447. NS_BF_END;