Hash.hx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. import java.NativeArray;
  2. /*
  3. * Copyright (c) 2005, The haXe Project Contributors
  4. * All rights reserved.
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * - Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * - Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  18. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  20. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  21. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  24. * DAMAGE.
  25. */
  26. @:core_api class Hash<T>
  27. {
  28. @:extern private static inline var HASH_UPPER = 0.77;
  29. @:extern private static inline var FLAG_EMPTY = 0;
  30. @:extern private static inline var FLAG_DEL = 1;
  31. /**
  32. * This is the most important structure here and the reason why it's so fast.
  33. * It's an array of all the hashes contained in the table. These hashes cannot be 0 nor 1,
  34. * which stand for "empty" and "deleted" states.
  35. *
  36. * The lookup algorithm will keep looking until a 0 or the key wanted is found;
  37. * The insertion algorithm will do the same but will also break when FLAG_DEL is found;
  38. */
  39. private var hashes:NativeArray<HashType>;
  40. private var _keys:NativeArray<String>;
  41. private var vals:NativeArray<T>;
  42. private var nBuckets:Int;
  43. private var size:Int;
  44. private var nOccupied:Int;
  45. private var upperBound:Int;
  46. private var cachedKey:String;
  47. private var cachedIndex:Int;
  48. #if DEBUG_HASHTBL
  49. private var totalProbes:Int;
  50. private var probeTimes:Int;
  51. private var sameHash:Int;
  52. private var maxProbe:Int;
  53. #end
  54. public function new() : Void
  55. {
  56. cachedIndex = -1;
  57. }
  58. public function set( key : String, value : T ) : Void
  59. {
  60. var x:Int, k:Int;
  61. if (nOccupied >= upperBound)
  62. {
  63. if (nBuckets > (size << 1))
  64. resize(nBuckets - 1); //clear "deleted" elements
  65. else
  66. resize(nBuckets + 2);
  67. }
  68. var hashes = hashes, keys = _keys, hashes = hashes;
  69. {
  70. var mask = (nBuckets == 0) ? 0 : nBuckets - 1;
  71. var site = x = nBuckets;
  72. k = hash(key);
  73. var i = k & mask, nProbes = 0;
  74. //for speed up
  75. if (isEither(hashes[i])) {
  76. x = i;
  77. } else {
  78. //var inc = getInc(k, mask);
  79. var last = i, flag;
  80. while(! (isEither(flag = hashes[i]) || (flag == k && _keys[i] == key)) )
  81. {
  82. i = (i + ++nProbes) & mask;
  83. #if DEBUG_HASHTBL
  84. probeTimes++;
  85. if (i == last)
  86. throw "assert";
  87. #end
  88. }
  89. x = i;
  90. }
  91. #if DEBUG_HASHTBL
  92. if (nProbes > maxProbe)
  93. maxProbe = nProbes;
  94. totalProbes++;
  95. #end
  96. }
  97. var flag = hashes[x];
  98. if (isEmpty(flag))
  99. {
  100. keys[x] = key;
  101. vals[x] = value;
  102. hashes[x] = k;
  103. size++;
  104. nOccupied++;
  105. } else if (isDel(flag)) {
  106. keys[x] = key;
  107. vals[x] = value;
  108. hashes[x] = k;
  109. size++;
  110. } else {
  111. assert(_keys[x] == key);
  112. vals[x] = value;
  113. }
  114. cachedIndex = x;
  115. cachedKey = key;
  116. }
  117. @:final private function lookup( key : String ) : Int
  118. {
  119. if (nBuckets != 0)
  120. {
  121. var hashes = hashes, keys = _keys;
  122. var mask = nBuckets - 1, hash = hash(key), k = hash, nProbes = 0;
  123. var i = k & mask;
  124. var last = i, flag;
  125. //var inc = getInc(k, mask);
  126. while (!isEmpty(flag = hashes[i]) && (isDel(flag) || flag != k || keys[i] != key))
  127. {
  128. i = (i + ++nProbes) & mask;
  129. #if DEBUG_HASHTBL
  130. probeTimes++;
  131. if (i == last)
  132. throw "assert";
  133. #end
  134. }
  135. #if DEBUG_HASHTBL
  136. if (nProbes > maxProbe)
  137. maxProbe = nProbes;
  138. totalProbes++;
  139. #end
  140. return isEither(flag) ? -1 : i;
  141. }
  142. return -1;
  143. }
  144. @:final @:private function resize(newNBuckets:Int) : Void
  145. {
  146. //This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets.
  147. var newHash = null;
  148. var j = 1;
  149. {
  150. newNBuckets = roundUp(newNBuckets);
  151. if (newNBuckets < 4) newNBuckets = 4;
  152. if (size >= (newNBuckets * HASH_UPPER + 0.5)) /* requested size is too small */
  153. {
  154. j = 0;
  155. } else { /* hash table size to be changed (shrink or expand); rehash */
  156. var nfSize = newNBuckets;
  157. newHash = new NativeArray( nfSize );
  158. if (nBuckets < newNBuckets) //expand
  159. {
  160. var k = new NativeArray(newNBuckets);
  161. if (_keys != null)
  162. arrayCopy(_keys, 0, k, 0, nBuckets);
  163. _keys = k;
  164. var v = new NativeArray(newNBuckets);
  165. if (vals != null)
  166. arrayCopy(vals, 0, v, 0, nBuckets);
  167. vals = v;
  168. } //otherwise shrink
  169. }
  170. }
  171. if (j != 0)
  172. { //rehashing is required
  173. //resetting cache
  174. cachedKey = null;
  175. cachedIndex = -1;
  176. j = -1;
  177. var nBuckets = nBuckets, _keys = _keys, vals = vals, hashes = hashes;
  178. var newMask = newNBuckets - 1;
  179. while (++j < nBuckets)
  180. {
  181. var k;
  182. if (!isEither(k = hashes[j]))
  183. {
  184. var key = _keys[j];
  185. var val = vals[j];
  186. hashes[j] = FLAG_DEL;
  187. while (true) /* kick-out process; sort of like in Cuckoo hashing */
  188. {
  189. var nProbes = 0;
  190. //var inc = getInc(k, newMask);
  191. var i = k & newMask;
  192. while (!isEmpty(newHash[i]))
  193. i = (i + ++nProbes) & newMask;
  194. newHash[i] = k;
  195. if (i < nBuckets && !isEither(k = hashes[i])) /* kick out the existing element */
  196. {
  197. {
  198. var tmp = _keys[i];
  199. _keys[i] = key;
  200. key = tmp;
  201. }
  202. {
  203. var tmp = vals[i];
  204. vals[i] = val;
  205. val = tmp;
  206. }
  207. hashes[i] = FLAG_DEL; /* mark it as deleted in the old hash table */
  208. } else { /* write the element and jump out of the loop */
  209. _keys[i] = key;
  210. vals[i] = val;
  211. break;
  212. }
  213. }
  214. }
  215. }
  216. if (nBuckets > newNBuckets) /* shrink the hash table */
  217. {
  218. {
  219. var k = new NativeArray(newNBuckets);
  220. arrayCopy(_keys, 0, k, 0, newNBuckets);
  221. this._keys = k;
  222. }
  223. {
  224. var v = new NativeArray(newNBuckets);
  225. arrayCopy(vals, 0, v, 0, newNBuckets);
  226. this.vals = v;
  227. }
  228. }
  229. this.hashes = newHash;
  230. this.nBuckets = newNBuckets;
  231. this.nOccupied = size;
  232. this.upperBound = Std.int(newNBuckets * HASH_UPPER + .5);
  233. }
  234. }
  235. public function get( key : String ) : Null<T>
  236. {
  237. var idx = -1;
  238. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  239. {
  240. return vals[idx];
  241. }
  242. idx = lookup(key);
  243. if (idx != -1)
  244. {
  245. cachedKey = key;
  246. cachedIndex = idx;
  247. return vals[idx];
  248. }
  249. return null;
  250. }
  251. private function getDefault( key : String, def : T ) : T
  252. {
  253. var idx = -1;
  254. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  255. {
  256. return vals[idx];
  257. }
  258. idx = lookup(key);
  259. if (idx != -1)
  260. {
  261. cachedKey = key;
  262. cachedIndex = idx;
  263. return vals[idx];
  264. }
  265. return def;
  266. }
  267. public function exists( key : String ) : Bool
  268. {
  269. var idx = -1;
  270. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  271. {
  272. return true;
  273. }
  274. idx = lookup(key);
  275. if (idx != -1)
  276. {
  277. cachedKey = key;
  278. cachedIndex = idx;
  279. return true;
  280. }
  281. return false;
  282. }
  283. public function remove( key : String ) : Bool
  284. {
  285. var idx = -1;
  286. if (! (cachedKey == key && ( (idx = cachedIndex) != -1 )))
  287. {
  288. idx = lookup(key);
  289. }
  290. if (idx == -1)
  291. {
  292. return false;
  293. } else {
  294. if (cachedKey == key)
  295. cachedIndex = -1;
  296. hashes[idx] = FLAG_EMPTY;
  297. _keys[idx] = null;
  298. vals[idx] = null;
  299. --size;
  300. return true;
  301. }
  302. }
  303. /**
  304. Returns an iterator of all keys in the hashtable.
  305. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  306. **/
  307. public function keys() : Iterator<String>
  308. {
  309. var i = 0;
  310. var len = nBuckets;
  311. return {
  312. hasNext: function() {
  313. for (j in i...len)
  314. {
  315. if (!isEither(hashes[j]))
  316. {
  317. i = j;
  318. return true;
  319. }
  320. }
  321. return false;
  322. },
  323. next: function() {
  324. var ret = _keys[i];
  325. cachedIndex = i;
  326. cachedKey = ret;
  327. i = i + 1;
  328. return ret;
  329. }
  330. };
  331. }
  332. /**
  333. Returns an iterator of all values in the hashtable.
  334. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  335. **/
  336. public function iterator() : Iterator<T>
  337. {
  338. var i = 0;
  339. var len = nBuckets;
  340. return {
  341. hasNext: function() {
  342. for (j in i...len)
  343. {
  344. if (!isEither(hashes[j]))
  345. {
  346. i = j;
  347. return true;
  348. }
  349. }
  350. return false;
  351. },
  352. next: function() {
  353. var ret = vals[i];
  354. i = i + 1;
  355. return ret;
  356. }
  357. };
  358. }
  359. /**
  360. Returns an displayable representation of the hashtable content.
  361. **/
  362. public function toString() : String {
  363. var s = new StringBuf();
  364. s.add("{");
  365. var it = keys();
  366. for( i in it ) {
  367. s.add(i);
  368. s.add(" => ");
  369. s.add(Std.string(get(i)));
  370. if( it.hasNext() )
  371. s.add(", ");
  372. }
  373. s.add("}");
  374. return s.toString();
  375. }
  376. @:extern private static inline function roundUp(x:Int):Int
  377. {
  378. --x;
  379. x |= (x) >>> 1;
  380. x |= (x) >>> 2;
  381. x |= (x) >>> 4;
  382. x |= (x) >>> 8;
  383. x |= (x) >>> 16;
  384. return ++x;
  385. }
  386. @:extern private static inline function getInc(k:Int, mask:Int):Int //return 1 for linear probing
  387. return (((k) >> 3 ^ (k) << 3) | 1) & (mask)
  388. @:extern private static inline function isEither(v:HashType):Bool
  389. return (v & 0xFFFFFFFE) == 0
  390. @:extern private static inline function isEmpty(v:HashType):Bool
  391. return v == FLAG_EMPTY
  392. @:extern private static inline function isDel(v:HashType):Bool
  393. return v == FLAG_DEL
  394. //guarantee: Whatever this function is, it will never return 0 nor 1
  395. @:extern private static inline function hash(s:String):HashType
  396. {
  397. var k:Int = untyped s.hashCode();
  398. //k *= 357913941;
  399. //k ^= k << 24;
  400. //k += ~357913941;
  401. //k ^= k >> 31;
  402. //k ^= k << 31;
  403. k = (k+0x7ed55d16) + (k<<12);
  404. k = (k^0xc761c23c) ^ (k>>19);
  405. k = (k+0x165667b1) + (k<<5);
  406. k = (k+0xd3a2646c) ^ (k<<9);
  407. k = (k+0xfd7046c5) + (k<<3);
  408. k = (k^0xb55a4f09) ^ (k>>16);
  409. var ret = k;
  410. if (isEither(ret))
  411. {
  412. if (ret == 0)
  413. ret = 2;
  414. else
  415. ret = 0xFFFFFFFF;
  416. }
  417. return ret;
  418. }
  419. @:extern private static inline function arrayCopy(sourceArray:Dynamic, sourceIndex:Int, destinationArray:Dynamic, destinationIndex:Int, length:Int):Void
  420. java.lang.System.arraycopy(sourceArray, sourceIndex, destinationArray, destinationIndex, length)
  421. @:extern private static inline function assert(x:Bool):Void
  422. {
  423. #if DEBUG_HASHTBL
  424. if (!x) throw "assert failed";
  425. #end
  426. }
  427. }
  428. private typedef HashType = Int;