MultiHashSet.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. #pragma once
  2. #include "../Common.h"
  3. #include "Array.h"
  4. NS_BF_BEGIN;
  5. struct MultiHashSetFuncs
  6. {
  7. void* Allocate(intptr size, intptr align)
  8. {
  9. return malloc(size);
  10. }
  11. void* AllocateZero(intptr size, intptr align)
  12. {
  13. void* mem = malloc(size);
  14. memset(mem, 0, size);
  15. return mem;
  16. }
  17. void Deallocate(void* ptr)
  18. {
  19. free(ptr);
  20. }
  21. bool DeallocateAll()
  22. {
  23. return false;
  24. }
  25. };
  26. template <typename T, typename TFuncs = AllocatorCLib >
  27. class MultiHashSet : public TFuncs
  28. {
  29. public:
  30. struct Entry
  31. {
  32. T mValue;
  33. int mNext;
  34. int mHashCode;
  35. };
  36. struct EntryRef
  37. {
  38. public:
  39. MultiHashSet* mSet;
  40. int mIndex;
  41. public:
  42. EntryRef()
  43. {
  44. mSet = NULL;
  45. mIndex = -1;
  46. }
  47. EntryRef(MultiHashSet* set, int index)
  48. {
  49. mSet = set;
  50. mIndex = index;
  51. }
  52. Entry* operator*()
  53. {
  54. return &this->mSet->mEntries[this->mIndex];
  55. }
  56. Entry* operator->()
  57. {
  58. return &this->mSet->mEntries[this->mIndex];
  59. }
  60. operator bool() const
  61. {
  62. return this->mIndex != -1;
  63. }
  64. };
  65. struct Iterator
  66. {
  67. public:
  68. MultiHashSet* mSet;
  69. int mCurEntry;
  70. int mCurBucket;
  71. public:
  72. Iterator(MultiHashSet* set)
  73. {
  74. this->mSet = set;
  75. this->mCurBucket = 0;
  76. this->mCurEntry = -1;
  77. }
  78. Iterator& operator++()
  79. {
  80. if (this->mCurEntry != -1)
  81. {
  82. this->mCurEntry = this->mSet->mEntries[this->mCurEntry].mNext;
  83. if (this->mCurEntry != -1)
  84. return *this;
  85. this->mCurBucket++;
  86. }
  87. if (mSet->mHashHeads == NULL)
  88. {
  89. this->mCurBucket = this->mSet->mHashSize;
  90. return *this; // At end
  91. }
  92. while (this->mCurBucket < mSet->mHashSize)
  93. {
  94. this->mCurEntry = this->mSet->mHashHeads[mCurBucket];
  95. if (this->mCurEntry != -1)
  96. return *this;
  97. this->mCurBucket++;
  98. }
  99. return *this; // At end
  100. }
  101. T operator*()
  102. {
  103. return this->mSet->mEntries[this->mCurEntry].mValue;
  104. }
  105. T operator->()
  106. {
  107. return this->mSet->mEntries[this->mCurEntry].mValue;
  108. }
  109. bool operator!=(const Iterator& itr) const
  110. {
  111. return ((itr.mCurEntry != this->mCurEntry) || (itr.mCurBucket != this->mCurBucket));
  112. }
  113. operator bool() const
  114. {
  115. return this->mCurEntry != -1;
  116. }
  117. void MoveToNextHashMatch()
  118. {
  119. int wantHash = this->mSet->mEntries[this->mCurEntry].mHashCode;
  120. do
  121. {
  122. this->mCurEntry = this->mSet->mEntries[this->mCurEntry].mNext;
  123. }
  124. while ((this->mCurEntry != -1) && (this->mSet->mEntries[this->mCurEntry].mHashCode != wantHash));
  125. }
  126. };
  127. protected:
  128. int GetPrimeish(int min)
  129. {
  130. // This is a minimal effort to help address-aligned dataa
  131. return (min | 1);
  132. }
  133. int ExpandSize(int oldSize)
  134. {
  135. int newSize = 2 * oldSize;
  136. // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
  137. // Note that this check works even when mAllocSize overflowed thanks to the (uint) cast
  138. /*if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
  139. {
  140. Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
  141. return MaxPrimeArrayLength;
  142. }*/
  143. return GetPrimeish(newSize);
  144. }
  145. void ResizeEntries()
  146. {
  147. ResizeEntries(ExpandSize(mCount));
  148. }
  149. void ResizeEntries(int newSize)
  150. {
  151. BF_ASSERT(newSize >= mAllocSize);
  152. Entry* newEntries = TFuncs::allocate<Entry>(newSize);
  153. for (int i = 0; i < mCount; i++)
  154. {
  155. auto& newEntry = newEntries[i];
  156. auto& oldEntry = mEntries[i];
  157. newEntry.mHashCode = oldEntry.mHashCode;
  158. newEntry.mNext = oldEntry.mNext;
  159. new (&newEntry.mValue) T(std::move(*(T*)&oldEntry.mValue));
  160. }
  161. for (int i = mCount; i < newSize; i++)
  162. {
  163. newEntries[i].mHashCode = -1;
  164. }
  165. TFuncs::deallocate(mEntries);
  166. mEntries = newEntries;
  167. mAllocSize = (int)newSize;
  168. }
  169. void FreeIdx(int entryIdx)
  170. {
  171. this->mEntries[entryIdx].mNext = this->mFreeList;
  172. this->mFreeList = entryIdx;
  173. this->mFreeCount++;
  174. }
  175. int AllocEntry()
  176. {
  177. int index;
  178. if (this->mFreeCount > 0)
  179. {
  180. index = this->mFreeList;
  181. this->mFreeList = this->mEntries[index].mNext;
  182. this->mFreeCount--;
  183. }
  184. else
  185. {
  186. if (this->mCount == this->mAllocSize)
  187. ResizeEntries();
  188. index = mCount;
  189. this->mCount++;
  190. }
  191. return index;
  192. }
  193. public:
  194. int* mHashHeads;
  195. int mAllocSize;
  196. Entry* mEntries;
  197. int mFreeList;
  198. int mFreeCount;
  199. static const int cDefaultHashSize = 17;
  200. int mHashSize;
  201. int mCount;
  202. MultiHashSet()
  203. {
  204. this->mHashHeads = NULL;
  205. this->mHashSize = cDefaultHashSize;
  206. this->mEntries = NULL;
  207. this->mAllocSize = 0;
  208. this->mCount = 0;
  209. this->mFreeList = -1;
  210. this->mFreeCount = 0;
  211. }
  212. ~MultiHashSet()
  213. {
  214. this->Clear();
  215. }
  216. void EnsureFreeCount(int wantFreeCount)
  217. {
  218. int freeCount = mFreeCount + (mAllocSize - mCount);
  219. if (freeCount >= wantFreeCount)
  220. return;
  221. ResizeEntries(BF_MAX(ExpandSize(mCount), mAllocSize + wantFreeCount - freeCount));
  222. }
  223. int GetCount() const
  224. {
  225. return mCount - mFreeCount;
  226. }
  227. int size() const
  228. {
  229. return mCount - mFreeCount;
  230. }
  231. EntryRef AddRaw(int hash)
  232. {
  233. if (this->mHashHeads == NULL)
  234. {
  235. this->mHashHeads = TFuncs::allocate<int>(mHashSize);
  236. memset(this->mHashHeads, -1, sizeof(int) * mHashSize);
  237. }
  238. int index = AllocEntry();
  239. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  240. int headEntry = this->mHashHeads[hashIdx];
  241. Entry* newEntry = &mEntries[index];
  242. newEntry->mValue = T();
  243. newEntry->mNext = headEntry;
  244. newEntry->mHashCode = hash;
  245. mHashHeads[hashIdx] = index;
  246. return EntryRef(this, index);
  247. }
  248. void Add(T value)
  249. {
  250. if (this->mHashHeads == NULL)
  251. {
  252. this->mHashHeads = TFuncs::allocate<int>(mHashSize);
  253. memset(this->mHashHeads, -1, sizeof(int) * mHashSize);
  254. }
  255. int index = AllocEntry();
  256. int hash = TFuncs::GetHash(value);
  257. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  258. int headEntry = this->mHashHeads[hashIdx];
  259. Entry* newEntry = &mEntries[index];
  260. newEntry->mValue = value;
  261. newEntry->mNext = headEntry;
  262. newEntry->mHashCode = hash;
  263. mHashHeads[hashIdx] = index;
  264. }
  265. void AddAfter(T value, Entry* afterEntry)
  266. {
  267. int hash = TFuncs::GetHash(value);
  268. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  269. BF_ASSERT(hash == afterEntry->mHashCode);
  270. int index = AllocEntry();
  271. Entry* newEntry = &mEntries[index];
  272. newEntry->mValue = value;
  273. newEntry->mNext = afterEntry->mNext;
  274. newEntry->mHashCode = hash;
  275. afterEntry->mNext = index;
  276. }
  277. void Rehash(int newHashSize)
  278. {
  279. auto newHashHeads = TFuncs::allocate<int>(newHashSize);
  280. memset(newHashHeads, -1, sizeof(int) * newHashSize);
  281. if (mHashHeads != NULL)
  282. {
  283. SizedArray<int, 1024> entryList;
  284. for (int hashIdx = 0; hashIdx < mHashSize; hashIdx++)
  285. {
  286. int checkEntryIdx = mHashHeads[hashIdx];
  287. if (checkEntryIdx != -1)
  288. {
  289. // We want to keep elements with equal hashes in their insert order so we need to
  290. // iterate through the linked list in reverse
  291. entryList.Clear();
  292. while (checkEntryIdx != -1)
  293. {
  294. entryList.Add(checkEntryIdx);
  295. checkEntryIdx = mEntries[checkEntryIdx].mNext;
  296. }
  297. for (int i = (int)entryList.mSize - 1; i >= 0; i--)
  298. {
  299. int checkEntryIdx = entryList[i];
  300. auto checkEntry = &mEntries[checkEntryIdx];
  301. int newHashIdx = (checkEntry->mHashCode & 0x7FFFFFFF) % newHashSize;
  302. checkEntry->mNext = newHashHeads[newHashIdx];
  303. newHashHeads[newHashIdx] = checkEntryIdx;
  304. }
  305. }
  306. }
  307. TFuncs::deallocate(mHashHeads);
  308. }
  309. mHashHeads = newHashHeads;
  310. mHashSize = newHashSize;
  311. }
  312. void CheckRehash()
  313. {
  314. // Make the lookup load reasonable
  315. if (mHashSize < mCount)
  316. {
  317. this->Rehash(BF_MAX(mCount, (int)(mHashSize * 1.5f)) | 1);
  318. }
  319. }
  320. template <typename TKey>
  321. bool TryGet(const TKey& key, T* val)
  322. {
  323. if (mHashHeads == NULL)
  324. return false;
  325. this->CheckRehash();
  326. int hash = TFuncs::GetHash(key);
  327. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  328. int checkEntryIdx = this->mHashHeads[hashIdx];
  329. while (checkEntryIdx != -1)
  330. {
  331. Entry* checkEntry = &mEntries[checkEntryIdx];
  332. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  333. {
  334. if (val != NULL)
  335. *val = checkEntry->mValue;
  336. return true;
  337. }
  338. checkEntryIdx = checkEntry->mNext;
  339. }
  340. return false;
  341. }
  342. template <typename TKey>
  343. Iterator TryGet(const TKey& key)
  344. {
  345. if (mHashHeads == NULL)
  346. return end();
  347. this->CheckRehash();
  348. int hash = TFuncs::GetHash(key);
  349. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  350. int checkEntryIdx = this->mHashHeads[hashIdx];
  351. while (checkEntryIdx != -1)
  352. {
  353. auto checkEntry = &this->mEntries[checkEntryIdx];
  354. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  355. {
  356. Iterator itr(this);
  357. itr.mCurEntry = checkEntryIdx;
  358. itr.mCurBucket = hashIdx;
  359. return itr;
  360. }
  361. checkEntryIdx = checkEntry->mNext;
  362. }
  363. return end();
  364. }
  365. template <typename TKey>
  366. bool Remove(const TKey& key)
  367. {
  368. if (mHashHeads == NULL)
  369. return false;
  370. this->CheckRehash();
  371. int hash = TFuncs::GetHash(key);
  372. int hashIdx = (hash & 0x7FFFFFFF) % this->mHashSize;
  373. int* srcCheckEntryPtr = &this->mHashHeads[hashIdx];
  374. int checkEntryIdx = *srcCheckEntryPtr;
  375. while (checkEntryIdx != -1)
  376. {
  377. auto checkEntry = &mEntries[checkEntryIdx];
  378. if ((checkEntry->mHashCode == hash) && (TFuncs::Matches(key, checkEntry->mValue)))
  379. {
  380. *srcCheckEntryPtr = checkEntry->mNext;
  381. FreeIdx(checkEntryIdx);
  382. return true;
  383. }
  384. srcCheckEntryPtr = &checkEntry->mNext;
  385. checkEntryIdx = checkEntry->mNext;
  386. }
  387. return false;
  388. }
  389. Iterator Erase(const Iterator& itr)
  390. {
  391. Iterator next = itr;
  392. ++next;
  393. bool found = false;
  394. auto entryIdx = itr.mCurEntry;
  395. auto entry = &mEntries[entryIdx];
  396. int hashIdx = (entry->mHashCode & 0x7FFFFFFF) % this->mHashSize;
  397. int* srcCheckEntryPtr = &this->mHashHeads[hashIdx];
  398. int checkEntryIdx = *srcCheckEntryPtr;
  399. while (checkEntryIdx != -1)
  400. {
  401. auto checkEntry = &mEntries[checkEntryIdx];
  402. if (checkEntryIdx == itr.mCurEntry)
  403. {
  404. *srcCheckEntryPtr = checkEntry->mNext;
  405. found = true;
  406. }
  407. srcCheckEntryPtr = &checkEntry->mNext;
  408. checkEntryIdx = checkEntry->mNext;
  409. }
  410. BF_ASSERT(found);
  411. FreeIdx(entryIdx);
  412. return next;
  413. }
  414. void Clear()
  415. {
  416. if (!TFuncs::deallocateAll())
  417. {
  418. auto itr = begin();
  419. auto endItr = end();
  420. while (itr != endItr)
  421. {
  422. auto entry = itr.mCurEntry;
  423. ++itr;
  424. FreeIdx(entry);
  425. }
  426. TFuncs::deallocate(this->mHashHeads);
  427. TFuncs::deallocate(this->mEntries);
  428. }
  429. this->mHashSize = cDefaultHashSize;
  430. this->mHashHeads = NULL;
  431. this->mEntries = NULL;
  432. this->mCount = 0;
  433. }
  434. Iterator begin()
  435. {
  436. return ++Iterator(this);
  437. }
  438. Iterator end()
  439. {
  440. Iterator itr(this);
  441. itr.mCurBucket = this->mHashSize;
  442. return itr;
  443. }
  444. };
  445. NS_BF_END;