IntHash.hx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. * Copyright (c) 2005, The haXe Project Contributors
  3. * All rights reserved.
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. *
  7. * - Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * - Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  17. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  19. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  20. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  23. * DAMAGE.
  24. */
  25. import java.NativeArray;
  26. /*
  27. * This IntHash implementation is based on khash (https://github.com/attractivechaos/klib/blob/master/khash.h)
  28. * Copyright goes to Attractive Chaos <[email protected]> and his contributors
  29. *
  30. * Thanks also to Jonas Malaco Filho for his Haxe-written IntHash code inspired by Python tables.
  31. * (https://jonasmalaco.com/fossil/test/jonas-haxe/artifact/887b53126e237d6c68951111d594033403889304)
  32. */
  33. @:core_api class IntHash<T>
  34. {
  35. private static inline var HASH_UPPER = 0.7;
  36. private var flags:NativeArray<Int>;
  37. private var _keys:NativeArray<Int>;
  38. private var vals:NativeArray<T>;
  39. private var nBuckets:Int;
  40. private var size:Int;
  41. private var nOccupied:Int;
  42. private var upperBound:Int;
  43. private var cachedKey:Int;
  44. private var cachedIndex:Int;
  45. public function new() : Void
  46. {
  47. cachedIndex = -1;
  48. }
  49. public function set( key : Int, value : T ) : Void
  50. {
  51. var x:Int;
  52. if (nOccupied >= upperBound)
  53. {
  54. if (nBuckets > (size << 1))
  55. resize(nBuckets - 1); //clear "deleted" elements
  56. else
  57. resize(nBuckets + 1);
  58. }
  59. var flags = flags, _keys = _keys;
  60. {
  61. var mask = nBuckets - 1;
  62. var site = x = nBuckets;
  63. var k = hash(key);
  64. var i = k & mask;
  65. //for speed up
  66. if (flagIsEmpty(flags, i)) {
  67. x = i;
  68. } else {
  69. var inc = getInc(k, mask);
  70. var last = i;
  71. while (! (isEither(flags, i) || _keys[i] == key) )
  72. {
  73. i = (i + inc) & mask;
  74. #if DEBUG_HASHTBL
  75. if (i == last)
  76. {
  77. throw "assert";
  78. }
  79. #end
  80. }
  81. x = i;
  82. }
  83. }
  84. if (flagIsEmpty(flags, x))
  85. {
  86. _keys[x] = key;
  87. vals[x] = value;
  88. setIsBothFalse(flags, x);
  89. size++;
  90. nOccupied++;
  91. } else if (flagIsDel(flags, x)) {
  92. _keys[x] = key;
  93. vals[x] = value;
  94. setIsBothFalse(flags, x);
  95. size++;
  96. } else {
  97. assert(_keys[x] == key);
  98. vals[x] = value;
  99. }
  100. }
  101. @:final private function lookup( key : Int ) : Int
  102. {
  103. if (nBuckets != 0)
  104. {
  105. var flags = flags, _keys = _keys;
  106. var mask = nBuckets - 1, k = hash(key);
  107. var i = k & mask;
  108. var inc = getInc(k, mask); /* inc == 1 for linear probing */
  109. var last = i;
  110. while (!flagIsEmpty(flags, i) && (flagIsDel(flags, i) || _keys[i] != key))
  111. {
  112. i = (i + inc) & mask;
  113. if (i == last)
  114. return -1;
  115. }
  116. return isEither(flags, i) ? -1 : i;
  117. }
  118. return -1;
  119. }
  120. public function get( key : Int ) : Null<T>
  121. {
  122. var idx = -1;
  123. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  124. {
  125. return vals[idx];
  126. }
  127. idx = lookup(key);
  128. if (idx != -1)
  129. {
  130. cachedKey = key;
  131. cachedIndex = idx;
  132. return vals[idx];
  133. }
  134. return null;
  135. }
  136. private function getDefault( key : Int, def : T ) : T
  137. {
  138. var idx = -1;
  139. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  140. {
  141. return vals[idx];
  142. }
  143. idx = lookup(key);
  144. if (idx != -1)
  145. {
  146. cachedKey = key;
  147. cachedIndex = idx;
  148. return vals[idx];
  149. }
  150. return def;
  151. }
  152. public function exists( key : Int ) : Bool
  153. {
  154. var idx = -1;
  155. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  156. {
  157. return true;
  158. }
  159. idx = lookup(key);
  160. if (idx != -1)
  161. {
  162. cachedKey = key;
  163. cachedIndex = idx;
  164. return true;
  165. }
  166. return false;
  167. }
  168. public function remove( key : Int ) : Bool
  169. {
  170. var idx = -1;
  171. if (! (cachedKey == key && ( (idx = cachedIndex) != -1 )))
  172. {
  173. idx = lookup(key);
  174. }
  175. if (idx == -1)
  176. {
  177. return false;
  178. } else {
  179. if (cachedKey == key)
  180. cachedIndex = -1;
  181. if (!isEither(flags, idx))
  182. {
  183. setIsDelTrue(flags, idx);
  184. --size;
  185. vals[idx] = null;
  186. _keys[idx] = 0;
  187. }
  188. return true;
  189. }
  190. }
  191. @:final private function resize(newNBuckets:Int) : Void
  192. {
  193. //This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets.
  194. var newFlags = null;
  195. var j = 1;
  196. {
  197. newNBuckets = roundUp(newNBuckets);
  198. if (newNBuckets < 4) newNBuckets = 4;
  199. if (size >= (newNBuckets * HASH_UPPER + 0.5)) /* requested size is too small */
  200. {
  201. j = 0;
  202. } else { /* hash table size to be changed (shrink or expand); rehash */
  203. var nfSize = flagsSize(newNBuckets);
  204. newFlags = new NativeArray( nfSize );
  205. for (i in 0...nfSize)
  206. newFlags[i] = 0xaaaaaaaa;
  207. if (nBuckets < newNBuckets) //expand
  208. {
  209. var k = new NativeArray(newNBuckets);
  210. if (_keys != null)
  211. arrayCopy(_keys, 0, k, 0, nBuckets);
  212. _keys = k;
  213. var v = new NativeArray(newNBuckets);
  214. if (vals != null)
  215. arrayCopy(vals, 0, v, 0, nBuckets);
  216. vals = v;
  217. } //otherwise shrink
  218. }
  219. }
  220. if (j != 0)
  221. { //rehashing is required
  222. //resetting cache
  223. cachedKey = 0;
  224. cachedIndex = -1;
  225. j = -1;
  226. var nBuckets = nBuckets, _keys = _keys, vals = vals, flags = flags;
  227. var newMask = newNBuckets - 1;
  228. while (++j < nBuckets)
  229. {
  230. if (!isEither(flags, j))
  231. {
  232. var key = _keys[j];
  233. var val = vals[j];
  234. setIsDelTrue(flags, j);
  235. while (true) /* kick-out process; sort of like in Cuckoo hashing */
  236. {
  237. var k = hash(key);
  238. var inc = getInc(k, newMask);
  239. var i = k & newMask;
  240. while (!flagIsEmpty(newFlags, i))
  241. i = (i + inc) & newMask;
  242. setIsEmptyFalse(newFlags, i);
  243. if (i < nBuckets && !isEither(flags, i)) /* kick out the existing element */
  244. {
  245. {
  246. var tmp = _keys[i];
  247. _keys[i] = key;
  248. key = tmp;
  249. }
  250. {
  251. var tmp = vals[i];
  252. vals[i] = val;
  253. val = tmp;
  254. }
  255. setIsDelTrue(flags, i); /* mark it as deleted in the old hash table */
  256. } else { /* write the element and jump out of the loop */
  257. _keys[i] = key;
  258. vals[i] = val;
  259. break;
  260. }
  261. }
  262. }
  263. }
  264. if (nBuckets > newNBuckets) /* shrink the hash table */
  265. {
  266. {
  267. var k = new NativeArray(newNBuckets);
  268. arrayCopy(_keys, 0, k, 0, newNBuckets);
  269. this._keys = k;
  270. }
  271. {
  272. var v = new NativeArray(newNBuckets);
  273. arrayCopy(vals, 0, v, 0, newNBuckets);
  274. this.vals = v;
  275. }
  276. }
  277. this.flags = newFlags;
  278. this.nBuckets = newNBuckets;
  279. this.nOccupied = size;
  280. this.upperBound = Std.int(newNBuckets * HASH_UPPER + .5);
  281. }
  282. }
  283. /**
  284. Returns an iterator of all keys in the hashtable.
  285. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  286. **/
  287. public function keys() : Iterator<Int>
  288. {
  289. var i = 0;
  290. var len = nBuckets;
  291. return {
  292. hasNext: function() {
  293. for (j in i...len)
  294. {
  295. if (!isEither(flags, j))
  296. {
  297. i = j;
  298. return true;
  299. }
  300. }
  301. return false;
  302. },
  303. next: function() {
  304. var ret = _keys[i];
  305. cachedIndex = i;
  306. cachedKey = ret;
  307. i = i + 1;
  308. return ret;
  309. }
  310. };
  311. }
  312. /**
  313. Returns an iterator of all values in the hashtable.
  314. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  315. **/
  316. public function iterator() : Iterator<T>
  317. {
  318. var i = 0;
  319. var len = nBuckets;
  320. return {
  321. hasNext: function() {
  322. for (j in i...len)
  323. {
  324. if (!isEither(flags, j))
  325. {
  326. i = j;
  327. return true;
  328. }
  329. }
  330. return false;
  331. },
  332. next: function() {
  333. var ret = vals[i];
  334. i = i + 1;
  335. return ret;
  336. }
  337. };
  338. }
  339. /**
  340. Returns an displayable representation of the hashtable content.
  341. **/
  342. public function toString() : String {
  343. var s = new StringBuf();
  344. s.add("{");
  345. var it = keys();
  346. for( i in it ) {
  347. s.add(i);
  348. s.add(" => ");
  349. s.add(Std.string(get(i)));
  350. if( it.hasNext() )
  351. s.add(", ");
  352. }
  353. s.add("}");
  354. return s.toString();
  355. }
  356. private static inline function assert(x:Bool):Void
  357. {
  358. #if debug
  359. if (!x) throw "assert failed";
  360. #end
  361. }
  362. private static inline function defaultK():Int return 0
  363. private static inline function arrayCopy(sourceArray:Dynamic, sourceIndex:Int, destinationArray:Dynamic, destinationIndex:Int, length:Int):Void
  364. java.lang.System.arraycopy(sourceArray, sourceIndex, destinationArray, destinationIndex, length)
  365. private static inline function getInc(k:Int, mask:Int):Int
  366. return (((k) >> 3 ^ (k) << 3) | 1) & (mask)
  367. private static inline function hash(i:Int):Int
  368. return i
  369. private static inline function flagIsEmpty(flag:NativeArray<Int>, i:Int):Bool
  370. return ( (flag[i >> 4] >>> ((i & 0xf) << 1)) & 2 ) != 0
  371. private static inline function flagIsDel(flag:NativeArray<Int>, i:Int):Bool
  372. return ((flag[i >> 4] >>> ((i & 0xf) << 1)) & 1) != 0
  373. private static inline function isEither(flag:NativeArray<Int>, i:Int):Bool
  374. return ((flag[i >> 4] >>> ((i & 0xf) << 1)) & 3) != 0
  375. private static inline function setIsDelFalse(flag:NativeArray<Int>, i:Int):Void
  376. flag[i >> 4] &= ~(1 << ((i & 0xf) << 1))
  377. private static inline function setIsEmptyFalse(flag:NativeArray<Int>, i:Int):Void
  378. flag[i >> 4] &= ~(2 << ((i & 0xf) << 1))
  379. private static inline function setIsBothFalse(flag:NativeArray<Int>, i:Int):Void
  380. flag[i >> 4] &= ~(3 << ((i & 0xf) << 1))
  381. private static inline function setIsDelTrue(flag:NativeArray<Int>, i:Int):Void
  382. flag[i >> 4] |= 1 << ((i & 0xf) << 1)
  383. private static inline function roundUp(x:Int):Int
  384. {
  385. --x;
  386. x |= (x) >>> 1;
  387. x |= (x) >>> 2;
  388. x |= (x) >>> 4;
  389. x |= (x) >>> 8;
  390. x |= (x) >>> 16;
  391. return ++x;
  392. }
  393. private static inline function flagsSize(m:Int):Int
  394. return ((m) < 16? 1 : (m) >> 4)
  395. }