string_set.cpp 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. struct StringSetEntry {
  2. u32 hash;
  3. MapIndex next;
  4. String value;
  5. operator String const() const noexcept {
  6. return this->value;
  7. }
  8. operator String const &() const noexcept {
  9. return this->value;
  10. }
  11. };
  12. struct StringSet {
  13. Slice<MapIndex> hashes;
  14. Array<StringSetEntry> entries;
  15. };
  16. gb_internal void string_set_init (StringSet *s, isize capacity = 16);
  17. gb_internal void string_set_destroy(StringSet *s);
  18. gb_internal void string_set_add (StringSet *s, String const &str);
  19. gb_internal bool string_set_update (StringSet *s, String const &str); // returns true if it previously existed
  20. gb_internal bool string_set_exists (StringSet *s, String const &str);
  21. gb_internal void string_set_remove (StringSet *s, String const &str);
  22. gb_internal void string_set_clear (StringSet *s);
  23. gb_internal void string_set_grow (StringSet *s);
  24. gb_internal void string_set_rehash (StringSet *s, isize new_count);
  25. gb_internal gbAllocator string_set_allocator(void) {
  26. return heap_allocator();
  27. }
  28. gb_internal gb_inline void string_set_init(StringSet *s, isize capacity) {
  29. capacity = next_pow2_isize(gb_max(16, capacity));
  30. slice_init(&s->hashes, string_set_allocator(), capacity);
  31. array_init(&s->entries, string_set_allocator(), 0, capacity);
  32. for (isize i = 0; i < capacity; i++) {
  33. s->hashes.data[i] = MAP_SENTINEL;
  34. }
  35. }
  36. gb_internal gb_inline void string_set_destroy(StringSet *s) {
  37. if (s->entries.allocator.proc == nullptr) {
  38. s->entries.allocator = string_set_allocator();
  39. }
  40. slice_free(&s->hashes, s->entries.allocator);
  41. array_free(&s->entries);
  42. }
  43. gb_internal MapIndex string_set__add_entry(StringSet *s, StringHashKey const &key) {
  44. StringSetEntry e = {};
  45. e.hash = key.hash;
  46. e.next = MAP_SENTINEL;
  47. e.value = key.string;
  48. array_add(&s->entries, e);
  49. return cast(MapIndex)(s->entries.count-1);
  50. }
  51. gb_internal MapFindResult string_set__find(StringSet *s, StringHashKey const &key) {
  52. MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
  53. if (s->hashes.count > 0) {
  54. fr.hash_index = cast(MapIndex)(((u64)key.hash) % s->hashes.count);
  55. fr.entry_index = s->hashes[fr.hash_index];
  56. while (fr.entry_index != MAP_SENTINEL) {
  57. auto const &entry = s->entries[fr.entry_index];
  58. if (entry.hash == key.hash && entry.value == key.string) {
  59. return fr;
  60. }
  61. fr.entry_prev = fr.entry_index;
  62. fr.entry_index = s->entries[fr.entry_index].next;
  63. }
  64. }
  65. return fr;
  66. }
  67. gb_internal MapFindResult string_set__find_from_entry(StringSet *s, StringSetEntry *e) {
  68. MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
  69. if (s->hashes.count > 0) {
  70. fr.hash_index = cast(MapIndex)(e->hash % s->hashes.count);
  71. fr.entry_index = s->hashes[fr.hash_index];
  72. while (fr.entry_index != MAP_SENTINEL) {
  73. if (&s->entries[fr.entry_index] == e) {
  74. return fr;
  75. }
  76. fr.entry_prev = fr.entry_index;
  77. fr.entry_index = s->entries[fr.entry_index].next;
  78. }
  79. }
  80. return fr;
  81. }
  82. gb_internal b32 string_set__full(StringSet *s) {
  83. return 0.75f * s->hashes.count <= s->entries.count;
  84. }
  85. gb_internal gb_inline void string_set_grow(StringSet *s) {
  86. isize new_count = gb_max(s->hashes.count<<1, 16);
  87. string_set_rehash(s, new_count);
  88. }
  89. gb_internal void string_set_reset_entries(StringSet *s) {
  90. for (isize i = 0; i < s->hashes.count; i++) {
  91. s->hashes.data[i] = MAP_SENTINEL;
  92. }
  93. for (isize i = 0; i < s->entries.count; i++) {
  94. MapFindResult fr;
  95. StringSetEntry *e = &s->entries.data[i];
  96. e->next = MAP_SENTINEL;
  97. fr = string_set__find_from_entry(s, e);
  98. if (fr.entry_prev == MAP_SENTINEL) {
  99. s->hashes[fr.hash_index] = cast(MapIndex)i;
  100. } else {
  101. s->entries[fr.entry_prev].next = cast(MapIndex)i;
  102. }
  103. }
  104. }
  105. gb_internal void string_set_reserve(StringSet *s, isize cap) {
  106. if (s->entries.allocator.proc == nullptr) {
  107. s->entries.allocator = string_set_allocator();
  108. }
  109. array_reserve(&s->entries, cap);
  110. if (s->entries.count*2 < s->hashes.count) {
  111. return;
  112. }
  113. slice_resize(&s->hashes, s->entries.allocator, cap*2);
  114. string_set_reset_entries(s);
  115. }
  116. gb_internal void string_set_rehash(StringSet *s, isize new_count) {
  117. string_set_reserve(s, new_count);
  118. }
  119. gb_inline bool string_set_exists(StringSet *s, String const &str) {
  120. StringHashKey key = string_hash_string(str);
  121. isize index = string_set__find(s, key).entry_index;
  122. return index != MAP_SENTINEL;
  123. }
  124. gb_internal void string_set_add(StringSet *s, String const &str) {
  125. MapIndex index;
  126. MapFindResult fr;
  127. StringHashKey key = string_hash_string(str);
  128. if (s->hashes.count == 0) {
  129. string_set_grow(s);
  130. }
  131. fr = string_set__find(s, key);
  132. if (fr.entry_index != MAP_SENTINEL) {
  133. index = fr.entry_index;
  134. } else {
  135. index = string_set__add_entry(s, key);
  136. if (fr.entry_prev != MAP_SENTINEL) {
  137. s->entries[fr.entry_prev].next = index;
  138. } else {
  139. s->hashes[fr.hash_index] = index;
  140. }
  141. }
  142. s->entries[index].value = str;
  143. if (string_set__full(s)) {
  144. string_set_grow(s);
  145. }
  146. }
  147. gb_internal bool string_set_update(StringSet *s, String const &str) {
  148. bool exists = false;
  149. MapIndex index;
  150. MapFindResult fr;
  151. StringHashKey key = string_hash_string(str);
  152. if (s->hashes.count == 0) {
  153. string_set_grow(s);
  154. }
  155. fr = string_set__find(s, key);
  156. if (fr.entry_index != MAP_SENTINEL) {
  157. index = fr.entry_index;
  158. exists = true;
  159. } else {
  160. index = string_set__add_entry(s, key);
  161. if (fr.entry_prev != MAP_SENTINEL) {
  162. s->entries[fr.entry_prev].next = index;
  163. } else {
  164. s->hashes[fr.hash_index] = index;
  165. }
  166. }
  167. s->entries[index].value = str;
  168. if (string_set__full(s)) {
  169. string_set_grow(s);
  170. }
  171. return exists;
  172. }
  173. gb_internal void string_set__erase(StringSet *s, MapFindResult fr) {
  174. MapFindResult last;
  175. if (fr.entry_prev == MAP_SENTINEL) {
  176. s->hashes[fr.hash_index] = s->entries[fr.entry_index].next;
  177. } else {
  178. s->entries[fr.entry_prev].next = s->entries[fr.entry_index].next;
  179. }
  180. if (fr.entry_index == s->entries.count-1) {
  181. array_pop(&s->entries);
  182. return;
  183. }
  184. auto *entry = &s->entries[fr.entry_index];
  185. *entry = s->entries[s->entries.count-1];
  186. StringHashKey key;
  187. key.hash = entry->hash;
  188. key.string = entry->value;
  189. last = string_set__find(s, key);
  190. if (last.entry_prev != MAP_SENTINEL) {
  191. s->entries[last.entry_prev].next = fr.entry_index;
  192. } else {
  193. s->hashes[last.hash_index] = fr.entry_index;
  194. }
  195. }
  196. gb_internal void string_set_remove(StringSet *s, String const &str) {
  197. StringHashKey key = string_hash_string(str);
  198. MapFindResult fr = string_set__find(s, key);
  199. if (fr.entry_index != MAP_SENTINEL) {
  200. string_set__erase(s, fr);
  201. }
  202. }
  203. gb_internal gb_inline void string_set_clear(StringSet *s) {
  204. array_clear(&s->entries);
  205. for_array(i, s->hashes) {
  206. s->hashes.data[i] = MAP_SENTINEL;
  207. }
  208. }
  209. gb_internal StringSetEntry *begin(StringSet &m) noexcept {
  210. return m.entries.data;
  211. }
  212. gb_internal StringSetEntry const *begin(StringSet const &m) noexcept {
  213. return m.entries.data;
  214. }
  215. gb_internal StringSetEntry *end(StringSet &m) noexcept {
  216. return m.entries.data + m.entries.count;
  217. }
  218. gb_internal StringSetEntry const *end(StringSet const &m) noexcept {
  219. return m.entries.data + m.entries.count;
  220. }