HashSet.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. #pragma once
  2. #include "Array.h"
  3. #include <unordered_map>
  4. NS_BF_BEGIN;
  5. template <typename TKey, typename TAlloc = AllocatorCLib >
  6. class HashSet : public TAlloc
  7. {
  8. public:
  9. typedef int int_cosize;
  10. typedef TKey key_type;
  11. public:
  12. struct Entry
  13. {
  14. typename std::aligned_storage<sizeof(TKey), alignof(TKey)>::type mKey;
  15. int_cosize mNext; // Index of next entry, -1 if last
  16. int32 mHashCode; // Lower 31 bits of hash code, -1 if unused
  17. };
  18. public:
  19. int_cosize* mBuckets;
  20. Entry* mEntries;
  21. int_cosize mAllocSize;
  22. int_cosize mCount;
  23. int_cosize mFreeList;
  24. int_cosize mFreeCount;
  25. public:
  26. struct iterator
  27. {
  28. public:
  29. typedef std::bidirectional_iterator_tag iterator_category;
  30. public:
  31. HashSet* mDictionary;
  32. int_cosize mIdx;
  33. int_cosize mCurrentIdx;
  34. protected:
  35. void MoveNext()
  36. {
  37. while ((uintptr)mIdx < (uintptr)mDictionary->mCount)
  38. {
  39. if (mDictionary->mEntries[mIdx].mHashCode >= 0)
  40. {
  41. mCurrentIdx = mIdx;
  42. mIdx++;
  43. return;
  44. }
  45. mIdx++;
  46. }
  47. mIdx = mDictionary->mCount + 1;
  48. mCurrentIdx = -1;
  49. }
  50. public:
  51. iterator()
  52. {
  53. mDictionary = NULL;
  54. mIdx = 0;
  55. mCurrentIdx = -1;
  56. }
  57. iterator(HashSet* dict, int idx)
  58. {
  59. mDictionary = dict;
  60. mIdx = idx;
  61. mCurrentIdx = -1;
  62. }
  63. iterator& operator++()
  64. {
  65. MoveNext();
  66. return *this;
  67. }
  68. iterator operator++(int)
  69. {
  70. auto prevVal = *this;
  71. MoveNext();
  72. return prevVal;
  73. }
  74. bool operator!=(const iterator& itr) const
  75. {
  76. return (itr.mDictionary != mDictionary) || (itr.mIdx != mIdx);
  77. }
  78. bool operator==(const iterator& itr) const
  79. {
  80. return (itr.mDictionary == mDictionary) && (itr.mIdx == mIdx);
  81. }
  82. TKey& operator*()
  83. {
  84. return *(TKey*)&mDictionary->mEntries[mCurrentIdx].mKey;
  85. }
  86. TKey* operator->()
  87. {
  88. return (TKey*)&mDictionary->mEntries[mCurrentIdx].mKey;
  89. }
  90. };
  91. struct HashSelector
  92. {
  93. HashSet* mDictionary;
  94. int32 mHashCode;
  95. HashSelector(HashSet* dictionary, int32 hashCode)
  96. {
  97. mDictionary = dictionary;
  98. mHashCode = hashCode;
  99. }
  100. struct iterator
  101. {
  102. public:
  103. typedef std::bidirectional_iterator_tag iterator_category;
  104. public:
  105. HashSelector* mHashSelector;
  106. int_cosize mIdx;
  107. int_cosize mCurrentIdx;
  108. protected:
  109. void MoveNext()
  110. {
  111. auto dictionary = mHashSelector->mDictionary;
  112. while (mIdx >= 0)
  113. {
  114. if (dictionary->mEntries[mIdx].mHashCode == mHashSelector->mHashCode)
  115. {
  116. mCurrentIdx = mIdx;
  117. mIdx = dictionary->mEntries[mIdx].mNext;
  118. return;
  119. }
  120. else
  121. {
  122. mIdx = dictionary->mEntries[mIdx].mNext;
  123. }
  124. }
  125. mIdx = -1;
  126. mCurrentIdx = -1;
  127. }
  128. public:
  129. iterator()
  130. {
  131. mHashSelector = NULL;
  132. mIdx = 0;
  133. mCurrentIdx = -1;
  134. }
  135. iterator(HashSelector* hashSelector, int idx)
  136. {
  137. mHashSelector = hashSelector;
  138. mIdx = idx;
  139. mCurrentIdx = -1;
  140. }
  141. iterator& operator++()
  142. {
  143. MoveNext();
  144. return *this;
  145. }
  146. iterator operator++(int)
  147. {
  148. auto prevVal = *this;
  149. MoveNext();
  150. return prevVal;
  151. }
  152. /*iterator& operator--()
  153. {
  154. mPtr--;
  155. return *this;
  156. }
  157. iterator operator--(int)
  158. {
  159. auto prevVal = *this;
  160. mPtr--;
  161. return prevVal;
  162. }*/
  163. bool operator!=(const iterator& itr) const
  164. {
  165. return (itr.mHashSelector != mHashSelector) || (itr.mCurrentIdx != mCurrentIdx );
  166. }
  167. bool operator==(const iterator& itr) const
  168. {
  169. return (itr.mHashSelector == mHashSelector) && (itr.mCurrentIdx == mCurrentIdx );
  170. }
  171. TKey& operator*()
  172. {
  173. return *(TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey;
  174. }
  175. TKey* operator->()
  176. {
  177. return (TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey;
  178. }
  179. };
  180. iterator begin()
  181. {
  182. int_cosize hashCode = (int_cosize)(mHashCode & 0x7FFFFFFF);
  183. iterator itr(this, mDictionary->mBuckets[hashCode % mDictionary->mAllocSize]);
  184. ++itr;
  185. return itr;
  186. }
  187. iterator end()
  188. {
  189. return iterator(this, -1);
  190. }
  191. };
  192. private:
  193. int_cosize GetPrimeish(int_cosize min)
  194. {
  195. // This is a minimal effort to help address-aligned dataa
  196. return (min | 1);
  197. }
  198. int_cosize ExpandSize(int_cosize oldSize)
  199. {
  200. int_cosize newSize = 2 * oldSize;
  201. // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
  202. // Note that this check works even when mAllocSize overflowed thanks to the (uint) cast
  203. /*if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
  204. {
  205. Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
  206. return MaxPrimeArrayLength;
  207. }*/
  208. return GetPrimeish(newSize);
  209. }
  210. void Resize()
  211. {
  212. Resize(ExpandSize(mCount), false);
  213. }
  214. void Resize(intptr newSize, bool forceNewHashCodes)
  215. {
  216. BF_ASSERT(newSize >= mAllocSize);
  217. Entry* newEntries;
  218. int_cosize* newBuckets;
  219. AllocData(newSize, newEntries, newBuckets);
  220. for (int_cosize i = 0; i < newSize; i++)
  221. newBuckets[i] = -1;
  222. for (int i = 0; i < mCount; i++)
  223. {
  224. auto& newEntry = newEntries[i];
  225. auto& oldEntry = mEntries[i];
  226. newEntry.mHashCode = oldEntry.mHashCode;
  227. newEntry.mNext = oldEntry.mNext;
  228. new (&newEntry.mKey) TKey(std::move(*(TKey*)&oldEntry.mKey));
  229. }
  230. for (int i = mCount; i < newSize; i++)
  231. {
  232. newEntries[i].mHashCode = -1;
  233. }
  234. if (forceNewHashCodes)
  235. {
  236. for (int_cosize i = 0; i < mCount; i++)
  237. {
  238. if (newEntries[i].mHashCode != -1)
  239. {
  240. newEntries[i].mHashCode = (int_cosize)BeefHash<TKey>()(*(TKey*)&newEntries[i].mKey) & 0x7FFFFFFF;
  241. }
  242. }
  243. }
  244. for (int_cosize i = 0; i < mCount; i++)
  245. {
  246. if (newEntries[i].mHashCode >= 0)
  247. {
  248. int_cosize bucket = (int_cosize)(newEntries[i].mHashCode % newSize);
  249. newEntries[i].mNext = newBuckets[bucket];
  250. newBuckets[bucket] = i;
  251. }
  252. }
  253. DeleteData();
  254. mBuckets = newBuckets;
  255. mEntries = newEntries;
  256. mAllocSize = (int_cosize)newSize;
  257. }
  258. int FindEntry(const TKey& key)
  259. {
  260. if (mBuckets != NULL)
  261. {
  262. int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
  263. for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext)
  264. {
  265. if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i;
  266. }
  267. }
  268. return -1;
  269. }
  270. template <typename TOther>
  271. int FindEntryWith(const TOther& key)
  272. {
  273. if (mBuckets != NULL)
  274. {
  275. int_cosize hashCode = (int_cosize)BeefHash<TOther>()(key) & 0x7FFFFFFF;
  276. for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext)
  277. {
  278. if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i;
  279. }
  280. }
  281. return -1;
  282. }
  283. void Initialize(intptr capacity)
  284. {
  285. int_cosize size = GetPrimeish((int_cosize)capacity);
  286. AllocData(size, mEntries, mBuckets);
  287. mAllocSize = size;
  288. for (int_cosize i = 0; i < (int_cosize)mAllocSize; i++) mBuckets[i] = -1;
  289. mFreeList = -1;
  290. }
  291. bool Insert(const TKey& key, bool add, TKey** keyPtr)
  292. {
  293. if (mBuckets == NULL) Initialize(0);
  294. int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
  295. int_cosize targetBucket = hashCode % (int_cosize)mAllocSize;
  296. for (int_cosize i = mBuckets[targetBucket]; i >= 0; i = mEntries[i].mNext)
  297. {
  298. if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key))
  299. {
  300. if (add)
  301. {
  302. BF_FATAL("Duplicate key");
  303. //ThrowUnimplemented();
  304. //ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
  305. }
  306. //entries[i].value = value;
  307. //mVersion++;
  308. if (keyPtr != NULL)
  309. *keyPtr = (TKey*)&mEntries[i].mKey;
  310. return false;
  311. }
  312. }
  313. int_cosize index;
  314. if (mFreeCount > 0)
  315. {
  316. index = mFreeList;
  317. mFreeList = mEntries[index].mNext;
  318. mFreeCount--;
  319. }
  320. else
  321. {
  322. if (mCount == mAllocSize)
  323. {
  324. Resize();
  325. targetBucket = hashCode % (int_cosize)mAllocSize;
  326. }
  327. index = mCount;
  328. mCount++;
  329. }
  330. mEntries[index].mHashCode = hashCode;
  331. mEntries[index].mNext = mBuckets[targetBucket];
  332. new (&mEntries[index].mKey) TKey(key);
  333. mBuckets[targetBucket] = index;
  334. //mVersion++;
  335. if (keyPtr != NULL)
  336. *keyPtr = (TKey*)&mEntries[index].mKey;
  337. return true;
  338. }
  339. void RemoveIdx(int_cosize bucket, int_cosize i, int_cosize last)
  340. {
  341. if (last < 0)
  342. {
  343. mBuckets[bucket] = mEntries[i].mNext;
  344. }
  345. else
  346. {
  347. mEntries[last].mNext = mEntries[i].mNext;
  348. }
  349. mEntries[i].mHashCode = -1;
  350. mEntries[i].mNext = mFreeList;
  351. ((TKey*)&mEntries[i].mKey)->~TKey();
  352. mFreeList = i;
  353. mFreeCount++;
  354. }
  355. public:
  356. HashSet()
  357. {
  358. mBuckets = NULL;
  359. mEntries = NULL;
  360. mAllocSize = 0;
  361. mCount = 0;
  362. mFreeList = 0;
  363. mFreeCount = 0;
  364. }
  365. HashSet(const HashSet& val)
  366. {
  367. mAllocSize = val.mAllocSize;
  368. mCount = val.mCount;
  369. if (mAllocSize == 0)
  370. {
  371. mBuckets = NULL;
  372. mEntries = NULL;
  373. }
  374. else
  375. {
  376. AllocData(mAllocSize, mEntries, mBuckets);
  377. for (int_cosize i = 0; i < mAllocSize; i++)
  378. mBuckets[i] = val.mBuckets[i];
  379. for (int i = 0; i < mCount; i++)
  380. {
  381. auto& newEntry = mEntries[i];
  382. auto& oldEntry = val.mEntries[i];
  383. newEntry.mHashCode = oldEntry.mHashCode;
  384. newEntry.mNext = oldEntry.mNext;
  385. new (&newEntry.mKey) TKey(*(TKey*)&oldEntry.mKey);
  386. }
  387. for (int i = mCount; i < mAllocSize; i++)
  388. {
  389. mEntries[i].mHashCode = -1;
  390. }
  391. }
  392. mFreeCount = val.mFreeCount;
  393. mFreeList = val.mFreeList;
  394. }
  395. HashSet(HashSet&& val)
  396. {
  397. mAllocSize = val.mAllocSize;
  398. mCount = val.mCount;
  399. mBuckets = val.mBuckets;
  400. mEntries = val.mEntries;
  401. mFreeCount = val.mFreeCount;
  402. mFreeList = val.mFreeList;
  403. }
  404. void AllocData(intptr size, Entry*& outEntries, int_cosize*& outBuckets)
  405. {
  406. uint8* data = (uint8*)this->rawAllocate(size * (sizeof(Entry) + sizeof(int_cosize)));
  407. outEntries = (Entry*)data;
  408. outBuckets = (int_cosize*)(data + size * sizeof(Entry));
  409. }
  410. void DeleteData()
  411. {
  412. this->rawDeallocate(mEntries);
  413. }
  414. ~HashSet()
  415. {
  416. if (!std::is_pod<TKey>::value)
  417. {
  418. for (int_cosize i = 0; i < mCount; i++)
  419. {
  420. if (mEntries[i].mHashCode != -1)
  421. ((TKey*)&mEntries[i].mKey)->~TKey();
  422. }
  423. }
  424. DeleteData();
  425. }
  426. HashSet& operator=(const HashSet& rhs)
  427. {
  428. Clear();
  429. for (auto& val : rhs)
  430. Add(val);
  431. return *this;
  432. }
  433. intptr GetCount() const
  434. {
  435. return mCount - mFreeCount;
  436. }
  437. intptr size() const
  438. {
  439. return mCount - mFreeCount;
  440. }
  441. bool IsEmpty() const
  442. {
  443. return mCount - mFreeCount == 0;
  444. }
  445. void Reserve(intptr size)
  446. {
  447. if (size > mAllocSize)
  448. Resize(size, false);
  449. }
  450. bool Add(const TKey& key)
  451. {
  452. if (!Insert(key, false, NULL))
  453. return false;
  454. return true;
  455. }
  456. bool TryAdd(const TKey& key, TKey** keyPtr)
  457. {
  458. if (!Insert(key, false, keyPtr))
  459. return false;
  460. return true;
  461. }
  462. bool TryGet(const TKey& key, TKey** keyPtr)
  463. {
  464. int idx = FindEntry(key);
  465. if (idx == -1)
  466. return false;
  467. *keyPtr = (TKey*)&mEntries[idx].mKey;
  468. return true;
  469. }
  470. template <typename TOther>
  471. bool TryGetWith(const TOther& key, TKey** keyPtr)
  472. {
  473. int idx = FindEntryWith(key);
  474. if (idx == -1)
  475. return false;
  476. *keyPtr = (TKey*)&mEntries[idx].mKey;
  477. return true;
  478. }
  479. bool Remove(const TKey& key)
  480. {
  481. if (mBuckets != NULL)
  482. {
  483. int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
  484. int_cosize bucket = hashCode % (int_cosize)mAllocSize;
  485. int_cosize last = -1;
  486. for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext)
  487. {
  488. if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key))
  489. {
  490. RemoveIdx(bucket, i, last);
  491. return true;
  492. }
  493. }
  494. }
  495. return false;
  496. }
  497. iterator Remove(const iterator& itr)
  498. {
  499. iterator nextItr = itr;
  500. ++nextItr;
  501. auto& entry = mEntries[itr.mCurrentIdx];
  502. BF_ASSERT(entry.mHashCode >= 0);
  503. // We have to iterate through to mCurrentIdx so we can get 'last'
  504. int_cosize hashCode = entry.mHashCode;
  505. int_cosize bucket = hashCode % (int_cosize)mAllocSize;
  506. int_cosize last = -1;
  507. for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext)
  508. {
  509. if ((mEntries[i].mHashCode == hashCode) && (i == itr.mCurrentIdx))
  510. {
  511. RemoveIdx(bucket, i, last);
  512. return nextItr;
  513. }
  514. }
  515. BF_FATAL("not found");
  516. return nextItr;
  517. }
  518. void Clear()
  519. {
  520. if (mCount > 0)
  521. {
  522. for (int_cosize i = 0; i < mAllocSize; i++) mBuckets[i] = -1;
  523. if (!std::is_pod<TKey>::value)
  524. {
  525. for (int_cosize i = 0; i < mCount; i++)
  526. {
  527. if (mEntries[i].mHashCode != -1)
  528. ((TKey*)&mEntries[i].mKey)->~TKey();
  529. }
  530. }
  531. for (int_cosize i = 0; i < mCount; i++)
  532. {
  533. mEntries[i].mHashCode = -1;
  534. }
  535. mFreeList = -1;
  536. mCount = 0;
  537. mFreeCount = 0;
  538. }
  539. }
  540. bool Contains(const TKey& key)
  541. {
  542. return FindEntry(key) >= 0;
  543. }
  544. template <typename TOther>
  545. bool ContainsWith(const TOther& key)
  546. {
  547. return FindEntryWith(key) != -1;
  548. }
  549. // Note: there's no const_iterator
  550. iterator begin() const
  551. {
  552. iterator itr((HashSet*)this, 0);
  553. ++itr;
  554. return itr;
  555. }
  556. iterator end() const
  557. {
  558. return iterator((HashSet*)this, mCount + 1);
  559. }
  560. HashSelector SelectHashes(size_t hashCode)
  561. {
  562. return HashSelector(this, (int32)hashCode);
  563. }
  564. };
  565. NS_BF_END;