StringMap.hx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. /*
  2. * Copyright (C)2005-2012 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. import cs.NativeArray;
  23. class StringMap<T>
  24. {
  25. @:extern private static inline var HASH_UPPER = 0.77;
  26. @:extern private static inline var FLAG_EMPTY = 0;
  27. @:extern private static inline var FLAG_DEL = 1;
  28. /**
  29. * This is the most important structure here and the reason why it's so fast.
  30. * It's an array of all the hashes contained in the table. These hashes cannot be 0 nor 1,
  31. * which stand for "empty" and "deleted" states.
  32. *
  33. * The lookup algorithm will keep looking until a 0 or the key wanted is found;
  34. * The insertion algorithm will do the same but will also break when FLAG_DEL is found;
  35. */
  36. private var hashes:NativeArray<HashType>;
  37. private var _keys:NativeArray<String>;
  38. private var vals:NativeArray<T>;
  39. private var nBuckets:Int;
  40. private var size:Int;
  41. private var nOccupied:Int;
  42. private var upperBound:Int;
  43. private var cachedKey:String;
  44. private var cachedIndex:Int;
  45. #if DEBUG_HASHTBL
  46. private var totalProbes:Int;
  47. private var probeTimes:Int;
  48. private var sameHash:Int;
  49. private var maxProbe:Int;
  50. #end
  51. public function new() : Void
  52. {
  53. cachedIndex = -1;
  54. }
  55. public function set( key : String, value : T ) : Void
  56. {
  57. var x:Int, k:Int;
  58. if (nOccupied >= upperBound)
  59. {
  60. if (nBuckets > (size << 1))
  61. resize(nBuckets - 1); //clear "deleted" elements
  62. else
  63. resize(nBuckets + 2);
  64. }
  65. var hashes = hashes, keys = _keys, hashes = hashes;
  66. {
  67. var mask = (nBuckets == 0) ? 0 : nBuckets - 1;
  68. var site = x = nBuckets;
  69. k = hash(key);
  70. var i = k & mask, nProbes = 0;
  71. //for speed up
  72. if (isEither(hashes[i])) {
  73. x = i;
  74. } else {
  75. //var inc = getInc(k, mask);
  76. var last = i, flag;
  77. while(! (isEither(flag = hashes[i]) || (flag == k && _keys[i] == key)) )
  78. {
  79. i = (i + ++nProbes) & mask;
  80. #if DEBUG_HASHTBL
  81. probeTimes++;
  82. if (i == last)
  83. throw "assert";
  84. #end
  85. }
  86. x = i;
  87. }
  88. #if DEBUG_HASHTBL
  89. if (nProbes > maxProbe)
  90. maxProbe = nProbes;
  91. totalProbes++;
  92. #end
  93. }
  94. var flag = hashes[x];
  95. if (isEmpty(flag))
  96. {
  97. keys[x] = key;
  98. vals[x] = value;
  99. hashes[x] = k;
  100. size++;
  101. nOccupied++;
  102. } else if (isDel(flag)) {
  103. keys[x] = key;
  104. vals[x] = value;
  105. hashes[x] = k;
  106. size++;
  107. } else {
  108. assert(_keys[x] == key);
  109. vals[x] = value;
  110. }
  111. cachedIndex = x;
  112. cachedKey = key;
  113. }
  114. @:final private function lookup( key : String ) : Int
  115. {
  116. if (nBuckets != 0)
  117. {
  118. var hashes = hashes, keys = _keys;
  119. var mask = nBuckets - 1, hash = hash(key), k = hash, nProbes = 0;
  120. var i = k & mask;
  121. var last = i, flag;
  122. //var inc = getInc(k, mask);
  123. while (!isEmpty(flag = hashes[i]) && (isDel(flag) || flag != k || keys[i] != key))
  124. {
  125. i = (i + ++nProbes) & mask;
  126. #if DEBUG_HASHTBL
  127. probeTimes++;
  128. if (i == last)
  129. throw "assert";
  130. #end
  131. }
  132. #if DEBUG_HASHTBL
  133. if (nProbes > maxProbe)
  134. maxProbe = nProbes;
  135. totalProbes++;
  136. #end
  137. return isEither(flag) ? -1 : i;
  138. }
  139. return -1;
  140. }
  141. @:final @:private function resize(newNBuckets:Int) : Void
  142. {
  143. //This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets.
  144. var newHash = null;
  145. var j = 1;
  146. {
  147. newNBuckets = roundUp(newNBuckets);
  148. if (newNBuckets < 4) newNBuckets = 4;
  149. if (size >= (newNBuckets * HASH_UPPER + 0.5)) /* requested size is too small */
  150. {
  151. j = 0;
  152. } else { /* hash table size to be changed (shrink or expand); rehash */
  153. var nfSize = newNBuckets;
  154. newHash = new NativeArray( nfSize );
  155. if (nBuckets < newNBuckets) //expand
  156. {
  157. var k = new NativeArray(newNBuckets);
  158. if (_keys != null)
  159. arrayCopy(_keys, 0, k, 0, nBuckets);
  160. _keys = k;
  161. var v = new NativeArray(newNBuckets);
  162. if (vals != null)
  163. arrayCopy(vals, 0, v, 0, nBuckets);
  164. vals = v;
  165. } //otherwise shrink
  166. }
  167. }
  168. if (j != 0)
  169. { //rehashing is required
  170. //resetting cache
  171. cachedKey = null;
  172. cachedIndex = -1;
  173. j = -1;
  174. var nBuckets = nBuckets, _keys = _keys, vals = vals, hashes = hashes;
  175. var newMask = newNBuckets - 1;
  176. while (++j < nBuckets)
  177. {
  178. var k;
  179. if (!isEither(k = hashes[j]))
  180. {
  181. var key = _keys[j];
  182. var val = vals[j];
  183. hashes[j] = FLAG_DEL;
  184. while (true) /* kick-out process; sort of like in Cuckoo hashing */
  185. {
  186. var nProbes = 0;
  187. //var inc = getInc(k, newMask);
  188. var i = k & newMask;
  189. while (!isEmpty(newHash[i]))
  190. i = (i + ++nProbes) & newMask;
  191. newHash[i] = k;
  192. if (i < nBuckets && !isEither(k = hashes[i])) /* kick out the existing element */
  193. {
  194. {
  195. var tmp = _keys[i];
  196. _keys[i] = key;
  197. key = tmp;
  198. }
  199. {
  200. var tmp = vals[i];
  201. vals[i] = val;
  202. val = tmp;
  203. }
  204. hashes[i] = FLAG_DEL; /* mark it as deleted in the old hash table */
  205. } else { /* write the element and jump out of the loop */
  206. _keys[i] = key;
  207. vals[i] = val;
  208. break;
  209. }
  210. }
  211. }
  212. }
  213. if (nBuckets > newNBuckets) /* shrink the hash table */
  214. {
  215. {
  216. var k = new NativeArray(newNBuckets);
  217. arrayCopy(_keys, 0, k, 0, newNBuckets);
  218. this._keys = k;
  219. }
  220. {
  221. var v = new NativeArray(newNBuckets);
  222. arrayCopy(vals, 0, v, 0, newNBuckets);
  223. this.vals = v;
  224. }
  225. }
  226. this.hashes = newHash;
  227. this.nBuckets = newNBuckets;
  228. this.nOccupied = size;
  229. this.upperBound = Std.int(newNBuckets * HASH_UPPER + .5);
  230. }
  231. }
  232. public function get( key : String ) : Null<T>
  233. {
  234. var idx = -1;
  235. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  236. {
  237. return vals[idx];
  238. }
  239. idx = lookup(key);
  240. if (idx != -1)
  241. {
  242. cachedKey = key;
  243. cachedIndex = idx;
  244. return vals[idx];
  245. }
  246. return null;
  247. }
  248. private function getDefault( key : String, def : T ) : T
  249. {
  250. var idx = -1;
  251. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  252. {
  253. return vals[idx];
  254. }
  255. idx = lookup(key);
  256. if (idx != -1)
  257. {
  258. cachedKey = key;
  259. cachedIndex = idx;
  260. return vals[idx];
  261. }
  262. return def;
  263. }
  264. public function exists( key : String ) : Bool
  265. {
  266. var idx = -1;
  267. if (cachedKey == key && ( (idx = cachedIndex) != -1 ))
  268. {
  269. return true;
  270. }
  271. idx = lookup(key);
  272. if (idx != -1)
  273. {
  274. cachedKey = key;
  275. cachedIndex = idx;
  276. return true;
  277. }
  278. return false;
  279. }
  280. public function remove( key : String ) : Bool
  281. {
  282. var idx = -1;
  283. if (! (cachedKey == key && ( (idx = cachedIndex) != -1 )))
  284. {
  285. idx = lookup(key);
  286. }
  287. if (idx == -1)
  288. {
  289. return false;
  290. } else {
  291. if (cachedKey == key)
  292. cachedIndex = -1;
  293. hashes[idx] = FLAG_EMPTY;
  294. _keys[idx] = null;
  295. vals[idx] = null;
  296. --size;
  297. return true;
  298. }
  299. }
  300. /**
  301. Returns an iterator of all keys in the hashtable.
  302. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  303. **/
  304. public function keys() : Iterator<String>
  305. {
  306. var i = 0;
  307. var len = nBuckets;
  308. return {
  309. hasNext: function() {
  310. for (j in i...len)
  311. {
  312. if (!isEither(hashes[j]))
  313. {
  314. i = j;
  315. return true;
  316. }
  317. }
  318. return false;
  319. },
  320. next: function() {
  321. var ret = _keys[i];
  322. cachedIndex = i;
  323. cachedKey = ret;
  324. i = i + 1;
  325. return ret;
  326. }
  327. };
  328. }
  329. /**
  330. Returns an iterator of all values in the hashtable.
  331. Implementation detail: Do not set() any new value while iterating, as it may cause a resize, which will break iteration
  332. **/
  333. public function iterator() : Iterator<T>
  334. {
  335. var i = 0;
  336. var len = nBuckets;
  337. return {
  338. hasNext: function() {
  339. for (j in i...len)
  340. {
  341. if (!isEither(hashes[j]))
  342. {
  343. i = j;
  344. return true;
  345. }
  346. }
  347. return false;
  348. },
  349. next: function() {
  350. var ret = vals[i];
  351. i = i + 1;
  352. return ret;
  353. }
  354. };
  355. }
  356. /**
  357. Returns an displayable representation of the hashtable content.
  358. **/
  359. public function toString() : String {
  360. var s = new StringBuf();
  361. s.add("{");
  362. var it = keys();
  363. for( i in it ) {
  364. s.add(i);
  365. s.add(" => ");
  366. s.add(Std.string(get(i)));
  367. if( it.hasNext() )
  368. s.add(", ");
  369. }
  370. s.add("}");
  371. return s.toString();
  372. }
  373. @:extern private static inline function roundUp(x:Int):Int
  374. {
  375. --x;
  376. x |= (x) >>> 1;
  377. x |= (x) >>> 2;
  378. x |= (x) >>> 4;
  379. x |= (x) >>> 8;
  380. x |= (x) >>> 16;
  381. return ++x;
  382. }
  383. @:extern private static inline function getInc(k:Int, mask:Int):Int //return 1 for linear probing
  384. return (((k) >> 3 ^ (k) << 3) | 1) & (mask)
  385. @:extern private static inline function isEither(v:HashType):Bool
  386. return (v & 0xFFFFFFFE) == 0
  387. @:extern private static inline function isEmpty(v:HashType):Bool
  388. return v == FLAG_EMPTY
  389. @:extern private static inline function isDel(v:HashType):Bool
  390. return v == FLAG_DEL
  391. //guarantee: Whatever this function is, it will never return 0 nor 1
  392. @:extern private static inline function hash(s:String):HashType
  393. {
  394. var k:Int = untyped s.GetHashCode();
  395. //k *= 357913941;
  396. //k ^= k << 24;
  397. //k += ~357913941;
  398. //k ^= k >> 31;
  399. //k ^= k << 31;
  400. k = (k+0x7ed55d16) + (k<<12);
  401. k = (k^0xc761c23c) ^ (k>>19);
  402. k = (k+0x165667b1) + (k<<5);
  403. k = (k+0xd3a2646c) ^ (k<<9);
  404. k = (k+0xfd7046c5) + (k<<3);
  405. k = (k^0xb55a4f09) ^ (k>>16);
  406. var ret = k;
  407. if (isEither(ret))
  408. {
  409. if (ret == 0)
  410. ret = 2;
  411. else
  412. ret = 0xFFFFFFFF;
  413. }
  414. return ret;
  415. }
  416. @:extern private static inline function arrayCopy(sourceArray:cs.system.Array, sourceIndex:Int, destinationArray:cs.system.Array, destinationIndex:Int, length:Int):Void
  417. cs.system.Array.Copy(sourceArray, sourceIndex, destinationArray, destinationIndex, length)
  418. @:extern private static inline function assert(x:Bool):Void
  419. {
  420. #if DEBUG_HASHTBL
  421. if (!x) throw "assert failed";
  422. #end
  423. }
  424. }
  425. private typedef HashType = Int;