Hash.hx 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. import cs.NativeArray;
  23. /*
  24. * Copyright (c) 2005, The haXe Project Contributors
  25. * All rights reserved.
  26. * Redistribution and use in source and binary forms, with or without
  27. * modification, are permitted provided that the following conditions are met:
  28. *
  29. * - Redistributions of source code must retain the above copyright
  30. * notice, this list of conditions and the following disclaimer.
  31. * - Redistributions in binary form must reproduce the above copyright
  32. * notice, this list of conditions and the following disclaimer in the
  33. * documentation and/or other materials provided with the distribution.
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  36. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  37. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  39. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  40. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  41. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  42. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  43. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  44. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  45. * DAMAGE.
  46. */
  47. @:coreApi class Hash<T>
  48. {
  49. @:extern private static inline var HASH_UPPER = 0.77;
  50. @:extern private static inline var FLAG_EMPTY = 0;
  51. @:extern private static inline var FLAG_DEL = 1;
  52. /**
  53. * This is the most important structure here and the reason why it's so fast.
  54. * It's an array of all the hashes contained in the table. These hashes cannot be 0 nor 1,
  55. * which stand for "empty" and "deleted" states.
  56. *
  57. * The lookup algorithm will keep looking until a 0 or the key wanted is found;
  58. * The insertion algorithm will do the same but will also break when FLAG_DEL is found;
  59. */
  60. private var hashes:NativeArray<HashType>;
  61. private var _keys:NativeArray<String>;
  62. private var vals:NativeArray<T>;
  63. private var nBuckets:Int;
  64. private var size:Int;
  65. private var nOccupied:Int;
  66. private var upperBound:Int;
  67. private var cachedKey:String;
  68. private var cachedIndex:Int;
  69. #if DEBUG_HASHTBL
  70. private var totalProbes:Int;
  71. private var probeTimes:Int;
  72. private var sameHash:Int;
  73. private var maxProbe:Int;
  74. #end
  75. public function new() : Void
  76. {
  77. cachedIndex = -1;
  78. }
  79. public function set( key : String, value : T ) : Void
  80. {
  81. var x:Int, k:Int;
  82. if (nOccupied >= upperBound)
  83. {
  84. if (nBuckets > (size << 1))
  85. resize(nBuckets - 1); //clear "deleted" elements
  86. else
  87. resize(nBuckets + 2);
  88. }
  89. var hashes = hashes, keys = _keys, hashes = hashes;
  90. {
  91. var mask = (nBuckets == 0) ? 0 : nBuckets - 1;
  92. var site = x = nBuckets;
  93. k = hash(key);
  94. var i = k & mask, nProbes = 0;
  95. //for speed up
  96. if (isEither(hashes[i])) {
  97. x = i;
  98. } else {
  99. //var inc = getInc(k, mask);
  100. var last = i, flag;
  101. while(! (isEither(flag = hashes[i]) || (flag == k && _keys[i] == key)) )
  102. {
  103. i = (i + ++nProbes) & mask;
  104. #if DEBUG_HASHTBL
  105. probeTimes++;
  106. if (i == last)
  107. throw "assert";
  108. #end
  109. }
  110. x = i;
  111. }
  112. #if DEBUG_HASHTBL
  113. if (nProbes > maxProbe)
  114. maxProbe = nProbes;
  115. totalProbes++;
  116. #end
  117. }
  118. var flag = hashes[x];
  119. if (isEmpty(flag))
  120. {
  121. keys[x] = key;
  122. vals[x] = value;
  123. hashes[x] = k;
  124. size++;
  125. nOccupied++;
  126. } else if (isDel(flag)) {
  127. keys[x] = key;
  128. vals[x] = value;
  129. hashes[x] = k;
  130. size++;
  131. } else {
  132. assert(_keys[x] == key);
  133. vals[x] = value;
  134. }
  135. cachedIndex = x;
  136. cachedKey = key;
  137. }
  138. @:final private function lookup( key : String ) : Int
  139. {
  140. if (nBuckets != 0)
  141. {
  142. var hashes = hashes, keys = _keys;
  143. var mask = nBuckets - 1, hash = hash(key), k = hash, nProbes = 0;
  144. var i = k & mask;
  145. var last = i, flag;
  146. //var inc = getInc(k, mask);
  147. while (!isEmpty(flag = hashes[i]) && (isDel(flag) || flag != k || keys[i] != key))
  148. {
  149. i = (i + ++nProbes) & mask;
  150. #if DEBUG_HASHTBL
  151. probeTimes++;
  152. if (i == last)
  153. throw "assert";
  154. #end
  155. }
  156. #if DEBUG_HASHTBL
  157. if (nProbes > maxProbe)
  158. maxProbe = nProbes;
  159. totalProbes++;
  160. #end
  161. return isEither(flag) ? -1 : i;
  162. }
  163. return -1;
  164. }
  165. @:final @:private function resize(newNBuckets:Int) : Void
  166. {
  167. //This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets.
  168. var newHash = null;
  169. var j = 1;
  170. {
  171. newNBuckets = roundUp(newNBuckets);
  172. if (newNBuckets < 4) newNBuckets = 4;
  173. if (size >= (newNBuckets * HASH_UPPER + 0.5)) /* requested size is too small */
  174. {
  175. j = 0;
  176. } else { /* hash table size to be changed (shrink or expand); rehash */
  177. var nfSize = newNBuckets;
  178. newHash = new NativeArray( nfSize );
  179. if (nBuckets < newNBuckets) //expand
  180. {
  181. var k = new NativeArray(newNBuckets);
  182. if (_keys != null)
  183. arrayCopy(_keys, 0, k, 0, nBuckets);
  184. _keys = k;
  185. var v = new NativeArray(newNBuckets);
  186. if (vals != null)
  187. arrayCopy(vals, 0, v, 0, nBuckets);
  188. vals = v;
  189. } //otherwise shrink
  190. }
  191. }
  192. if (j != 0)
  193. { //rehashing is required
  194. //resetting cache
  195. cachedKey = null;
  196. cachedIndex = -1;
  197. j = -1;
  198. var nBuckets = nBuckets, _keys = _keys, vals = vals, hashes = hashes;
  199. var newMask = newNBuckets - 1;
  200. while (++j < nBuckets)
  201. {
  202. var k;
  203. if (!isEither(k = hashes[j]))
  204. {
  205. var key = _keys[j];
  206. var val = vals[j];
  207. hashes[j] = FLAG_DEL;
  208. while (true) /* kick-out process; sort of like in Cuckoo hashing */
  209. {
  210. var nProbes = 0;
  211. //var inc = getInc(k, newMask);
  212. var i = k & newMask;
  213. while (!isEmpty(newHash[i]))
  214. i = (i + ++nProbes) & newMask;
  215. newHash[i] = k;
  216. if (i < nBuckets && !isEither(k = hashes[i])) /* kick out the existing element */
  217. {
  218. {
  219. var tmp = _keys[i];
  220. _keys[i] = key;
  221. key = tmp;
  222. }
  223. {
  224. var tmp = vals[i];
  225. vals[i] = val;
  226. val = tmp;
  227. }
  228. hashes[i] = FLAG_DEL; /* mark it as deleted in the old hash table */
  229. } else { /* write the element and jump out of the loop */
  230. _keys[i] = key;
  231. vals[i] = val;
  232. break;
  233. }
  234. }
  235. }
  236. }
  237. if (nBuckets > newNBuckets) /* shrink the hash table */
  238. {
  239. {
  240. var k = new NativeArray(newNBuckets);
  241. arrayCopy(_keys, 0, k, 0, newNBuckets);
  242. this._keys = k;
  243. }
  244. {
  245. var v = new NativeArray(newNBuckets);
  246. arrayCopy(vals, 0, v, 0, newNBuckets);
  247. this.vals = v;
  248. }
  249. }
  250. this.hashes = newHash;
  251. this.nBuckets = newNBuckets;
  252. this.nOccupied = size;
  253. this.upperBound = Std.int(newNBuckets * HASH_UPPER + .5);
  254. }
  255. }
  256. public function get( key : String ) : Null<T>
  257. {
  258. var idx = -1;
  259. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  260. {
  261. return vals[idx];
  262. }
  263. idx = lookup(key);
  264. if (idx != -1)
  265. {
  266. cachedKey = key;
  267. cachedIndex = idx;
  268. return vals[idx];
  269. }
  270. return null;
  271. }
  272. private function getDefault( key : String, def : T ) : T
  273. {
  274. var idx = -1;
  275. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  276. {
  277. return vals[idx];
  278. }
  279. idx = lookup(key);
  280. if (idx != -1)
  281. {
  282. cachedKey = key;
  283. cachedIndex = idx;
  284. return vals[idx];
  285. }
  286. return def;
  287. }
  288. public function exists( key : String ) : Bool
  289. {
  290. var idx = -1;
  291. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  292. {
  293. return true;
  294. }
  295. idx = lookup(key);
  296. if (idx != -1)
  297. {
  298. cachedKey = key;
  299. cachedIndex = idx;
  300. return true;
  301. }
  302. return false;
  303. }
  304. public function remove( key : String ) : Bool
  305. {
  306. var idx = -1;
  307. if (! (cachedKey == key && ( (idx = cachedIndex) != -1 )))
  308. {
  309. idx = lookup(key);
  310. }
  311. if (idx == -1)
  312. {
  313. return false;
  314. } else {
  315. if (cachedKey == key)
  316. cachedIndex = -1;
  317. hashes[idx] = FLAG_EMPTY;
  318. _keys[idx] = null;
  319. vals[idx] = null;
  320. --size;
  321. return true;
  322. }
  323. }
  324. /**
  325. Returns an iterator of all keys in the hashtable.
  326. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  327. **/
  328. public function keys() : Iterator<String>
  329. {
  330. var i = 0;
  331. var len = nBuckets;
  332. return {
  333. hasNext: function() {
  334. for (j in i...len)
  335. {
  336. if (!isEither(hashes[j]))
  337. {
  338. i = j;
  339. return true;
  340. }
  341. }
  342. return false;
  343. },
  344. next: function() {
  345. var ret = _keys[i];
  346. cachedIndex = i;
  347. cachedKey = ret;
  348. i = i + 1;
  349. return ret;
  350. }
  351. };
  352. }
  353. /**
  354. Returns an iterator of all values in the hashtable.
  355. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  356. **/
  357. public function iterator() : Iterator<T>
  358. {
  359. var i = 0;
  360. var len = nBuckets;
  361. return {
  362. hasNext: function() {
  363. for (j in i...len)
  364. {
  365. if (!isEither(hashes[j]))
  366. {
  367. i = j;
  368. return true;
  369. }
  370. }
  371. return false;
  372. },
  373. next: function() {
  374. var ret = vals[i];
  375. i = i + 1;
  376. return ret;
  377. }
  378. };
  379. }
  380. /**
  381. Returns an displayable representation of the hashtable content.
  382. **/
  383. public function toString() : String {
  384. var s = new StringBuf();
  385. s.add("{");
  386. var it = keys();
  387. for( i in it ) {
  388. s.add(i);
  389. s.add(" => ");
  390. s.add(Std.string(get(i)));
  391. if( it.hasNext() )
  392. s.add(", ");
  393. }
  394. s.add("}");
  395. return s.toString();
  396. }
  397. @:extern private static inline function roundUp(x:Int):Int
  398. {
  399. --x;
  400. x |= (x) >>> 1;
  401. x |= (x) >>> 2;
  402. x |= (x) >>> 4;
  403. x |= (x) >>> 8;
  404. x |= (x) >>> 16;
  405. return ++x;
  406. }
  407. @:extern private static inline function getInc(k:Int, mask:Int):Int //return 1 for linear probing
  408. return (((k) >> 3 ^ (k) << 3) | 1) & (mask)
  409. @:extern private static inline function isEither(v:HashType):Bool
  410. return (v & 0xFFFFFFFE) == 0
  411. @:extern private static inline function isEmpty(v:HashType):Bool
  412. return v == FLAG_EMPTY
  413. @:extern private static inline function isDel(v:HashType):Bool
  414. return v == FLAG_DEL
  415. //guarantee: Whatever this function is, it will never return 0 nor 1
  416. @:extern private static inline function hash(s:String):HashType
  417. {
  418. var k:Int = untyped s.GetHashCode();
  419. //k *= 357913941;
  420. //k ^= k << 24;
  421. //k += ~357913941;
  422. //k ^= k >> 31;
  423. //k ^= k << 31;
  424. k = (k+0x7ed55d16) + (k<<12);
  425. k = (k^0xc761c23c) ^ (k>>19);
  426. k = (k+0x165667b1) + (k<<5);
  427. k = (k+0xd3a2646c) ^ (k<<9);
  428. k = (k+0xfd7046c5) + (k<<3);
  429. k = (k^0xb55a4f09) ^ (k>>16);
  430. var ret = k;
  431. if (isEither(ret))
  432. {
  433. if (ret == 0)
  434. ret = 2;
  435. else
  436. ret = 0xFFFFFFFF;
  437. }
  438. return ret;
  439. }
  440. @:extern private static inline function arrayCopy(sourceArray:cs.system.Array, sourceIndex:Int, destinationArray:cs.system.Array, destinationIndex:Int, length:Int):Void
  441. cs.system.Array.Copy(sourceArray, sourceIndex, destinationArray, destinationIndex, length)
  442. @:extern private static inline function assert(x:Bool):Void
  443. {
  444. #if DEBUG_HASHTBL
  445. if (!x) throw "assert failed";
  446. #end
  447. }
  448. }
  449. private typedef HashType = Int;