Browse Source

A simple and fast Hashtable, tested but not yet integrated with anything.

Adam Ierymenko 10 years ago
parent
commit
3947807b1f
2 changed files with 367 additions and 1 deletions
  1. 252 0
      node/Hashtable.hpp
  2. 115 1
      selftest.cpp

+ 252 - 0
node/Hashtable.hpp

@@ -0,0 +1,252 @@
+/*
+ * ZeroTier One - Network Virtualization Everywhere
+ * Copyright (C) 2011-2015  ZeroTier, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --
+ *
+ * ZeroTier may be used and distributed under the terms of the GPLv3, which
+ * are available at: http://www.gnu.org/licenses/gpl-3.0.html
+ *
+ */
+
+#ifndef ZT_HASHTABLE_HPP
+#define ZT_HASHTABLE_HPP
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <stdexcept>
+
+namespace ZeroTier {
+
+/**
+ * A minimal hash table implementation for the ZeroTier core
+ *
+ * This is not a drop-in replacement for STL containers, and has several
+ * limitations. It's designed to be small and fast for use in the
+ * ZeroTier core.
+ *
+ * Pairs of values can also be used as a key. In this case the first and
+ * second element of the pair's hash codes are XORed.
+ */
+template<typename K,typename V>
+class Hashtable
+{
+private:
+	struct _Bucket
+	{
+		_Bucket(const K &k,const V &v) :
+			k(k),
+			v(v) {}
+		_Bucket *next;
+		K k;
+		V v;
+	};
+
+public:
+	/**
+	 * A simple forward iterator (different from STL)
+	 *
+	 * It's safe to erase the last key, but not others. Don't use set() since that
+	 * may rehash and invalidate the iterator. Note the erasing the key will destroy
+	 * the targets of the pointers returned by next().
+	 */
+	class Iterator
+	{
+	public:
+		/**
+		 * @param ht Hash table to iterate over
+		 */
+		Iterator(Hashtable &ht) :
+			_idx(0),
+			_ht(&ht),
+			_b(ht._t[0])
+		{
+		}
+
+		/**
+		 * @param kptr Pointer to set to point to next key
+		 * @param vptr Pointer to set to point to next value
+		 * @return True if kptr and vptr are set, false if no more entries
+		 */
+		inline bool next(K *&kptr,V *&vptr)
+		{
+			for(;;) {
+				if (_b) {
+					kptr = &(_b->k);
+					vptr = &(_b->v);
+					_b = _b->next;
+					return true;
+				}
+				++_idx;
+				if (_idx >= _ht->_bc)
+					return false;
+				_b = _ht->_t[_idx];
+			}
+		}
+
+	private:
+		unsigned long _idx;
+		Hashtable *_ht;
+		Hashtable::_Bucket *_b;
+	};
+	friend class Hashtable::Iterator;
+
+	/**
+	 * @param bc Initial capacity in buckets (default: 128, must be nonzero)
+	 */
+	Hashtable(unsigned long bc = 128) :
+		_t(reinterpret_cast<_Bucket **>(::malloc(sizeof(_Bucket *) * bc))),
+		_bc(bc),
+		_s(0)
+	{
+		if (!_t)
+			throw std::bad_alloc();
+		for(unsigned long i=0;i<bc;++i)
+			_t[i] = (_Bucket *)0;
+	}
+
+	~Hashtable()
+	{
+		clear();
+		::free(_t);
+	}
+
+	/**
+	 * Erase all entries
+	 */
+	inline void clear()
+	{
+		if (_s) {
+			for(unsigned long i=0;i<_bc;++i) {
+				_Bucket *b = _t[i];
+				while (b) {
+					_Bucket *const nb = b->next;
+					delete b;
+					b = nb;
+				}
+				_t[i] = (_Bucket *)0;
+			}
+			_s = 0;
+		}
+	}
+
+	/**
+	 * @param k Key
+	 * @return Pointer to value or NULL if not found
+	 */
+	inline V *get(const K &k)
+	{
+		_Bucket *b = _t[_hc(k) % _bc];
+		while (b) {
+			if (b->k == k)
+				return &(b->v);
+			b = b->next;
+		}
+		return (V *)0;
+	}
+	inline const V *get(const K &k) const { return const_cast<Hashtable *>(this)->get(k); }
+
+	/**
+	 * @param k Key
+	 * @return True if value was present
+	 */
+	inline bool erase(const K &k)
+	{
+		const unsigned long bidx = _hc(k) % _bc;
+		_Bucket *lastb = (_Bucket *)0;
+		_Bucket *b = _t[bidx];
+		while (b) {
+			if (b->k == k) {
+				if (lastb)
+					lastb->next = b->next;
+				else _t[bidx] = b->next;
+				delete b;
+				--_s;
+				return true;
+			}
+			lastb = b;
+			b = b->next;
+		}
+		return false;
+	}
+
+	/**
+	 * @param k Key
+	 * @param v Value
+	 */
+	inline void set(const K &k,const V &v)
+	{
+		if (_s >= _bc) {
+			const unsigned long nc = _bc * 2;
+			_Bucket **nt = reinterpret_cast<_Bucket **>(::malloc(sizeof(_Bucket *) * nc));
+			if (nt) {
+				for(unsigned long i=0;i<nc;++i)
+					nt[i] = (_Bucket *)0;
+				for(unsigned long i=0;i<_bc;++i) {
+					_Bucket *b = _t[i];
+					while (b) {
+						_Bucket *const nb = b->next;
+						const unsigned long nidx = _hc(b->k) % nc;
+						b->next = nt[nidx];
+						nt[nidx] = b;
+						b = nb;
+					}
+				}
+				::free(_t);
+				_t = nt;
+				_bc = nc;
+			}
+		}
+		const unsigned long bidx = _hc(k) % _bc;
+		_Bucket *const b = new _Bucket(k,v);
+		b->next = _t[bidx];
+		_t[bidx] = b;
+		++_s;
+	}
+
+	/**
+	 * @return Number of entries
+	 */
+	inline unsigned long size() const throw() { return _s; }
+
+	/**
+	 * @return True if table is empty
+	 */
+	inline bool empty() const throw() { return (_s == 0); }
+
+private:
+	template<typename O>
+	static inline unsigned long _hc(const O &obj)
+	{
+		return obj.hashCode();
+	}
+	static inline unsigned long _hc(const uint64_t i)
+	{
+		// NOTE: this is fine for network IDs, but might be bad for other kinds
+		// of IDs if they are not evenly or randomly distributed.
+		return (unsigned long)((i ^ (i >> 32)) * 2654435761ULL);
+	}
+
+	_Bucket **_t;
+	unsigned long _bc;
+	unsigned long _s;
+};
+
+} // namespace ZeroTier
+
+#endif

+ 115 - 1
selftest.cpp

@@ -36,6 +36,7 @@
 #include <vector>
 #include <vector>
 
 
 #include "node/Constants.hpp"
 #include "node/Constants.hpp"
+#include "node/Hashtable.hpp"
 #include "node/RuntimeEnvironment.hpp"
 #include "node/RuntimeEnvironment.hpp"
 #include "node/InetAddress.hpp"
 #include "node/InetAddress.hpp"
 #include "node/Utils.hpp"
 #include "node/Utils.hpp"
@@ -578,6 +579,119 @@ static int testPacket()
 
 
 static int testOther()
 static int testOther()
 {
 {
+	std::cout << "[other] Testing Hashtable... "; std::cout.flush();
+	{
+		Hashtable<uint64_t,std::string> ht(128);
+		std::map<uint64_t,std::string> ref; // assume std::map works correctly :)
+		for(int x=0;x<2;++x) {
+			for(int i=0;i<25000;++i) {
+				uint64_t k = rand();
+				while ((k == 0)||(ref.count(k) > 0))
+					++k;
+				std::string v;
+				for(int j=0;j<(int)(k % 64);++j)
+					v.push_back("0123456789"[rand() % 10]);
+				ht.set(k,v);
+				ref[k] = v;
+			}
+			if (ht.size() != ref.size()) {
+				std::cout << "FAILED! (size mismatch)" << std::endl;
+				return -1;
+			}
+			for(std::map<uint64_t,std::string>::iterator i(ref.begin());i!=ref.end();++i) {
+				std::string *v = ht.get(i->first);
+				if (!v) {
+					std::cout << "FAILED! (key not found)" << std::endl;
+					return -1;
+				}
+				if (*v != i->second) {
+					std::cout << "FAILED! (key not equal)" << std::endl;
+					return -1;
+				}
+			}
+			{
+				uint64_t *k;
+				std::string *v;
+				Hashtable<uint64_t,std::string>::Iterator i(ht);
+				unsigned long ic = 0;
+				while (i.next(k,v)) {
+					if (ref[*k] != *v) {
+						std::cout << "FAILED! (iterate)" << std::endl;
+						return -1;
+					}
+					++ic;
+				}
+				if (ic != ht.size()) {
+					std::cout << "FAILED! (iterate coverage)" << std::endl;
+					return -1;
+				}
+			}
+			for(std::map<uint64_t,std::string>::iterator i(ref.begin());i!=ref.end();) {
+				if (!ht.get(i->first)) {
+					std::cout << "FAILED! (erase, check if exists)" << std::endl;
+					return -1;
+				}
+				ht.erase(i->first);
+				if (ht.get(i->first)) {
+					std::cout << "FAILED! (erase, check if erased)" << std::endl;
+					return -1;
+				}
+				ref.erase(i++);
+				if (ht.size() != ref.size()) {
+					std::cout << "FAILED! (erase, size)" << std::endl;
+					return -1;
+				}
+			}
+			if (!ht.empty()) {
+				std::cout << "FAILED! (erase, empty)" << std::endl;
+				return -1;
+			}
+			for(int i=0;i<10000;++i) {
+				uint64_t k = rand();
+				while ((k == 0)||(ref.count(k) > 0))
+					++k;
+				std::string v;
+				for(int j=0;j<(int)(k % 64);++j)
+					v.push_back("0123456789"[rand() % 10]);
+				ht.set(k,v);
+				ref[k] = v;
+			}
+			if (ht.size() != ref.size()) {
+				std::cout << "FAILED! (second populate)" << std::endl;
+				return -1;
+			}
+			ht.clear();
+			ref.clear();
+			if (ht.size() != ref.size()) {
+				std::cout << "FAILED! (clear)" << std::endl;
+				return -1;
+			}
+			for(int i=0;i<10000;++i) {
+				uint64_t k = rand();
+				while ((k == 0)||(ref.count(k) > 0))
+					++k;
+				std::string v;
+				for(int j=0;j<(int)(k % 64);++j)
+					v.push_back("0123456789"[rand() % 10]);
+				ht.set(k,v);
+				ref[k] = v;
+			}
+			{
+				Hashtable<uint64_t,std::string>::Iterator i(ht);
+				uint64_t *k;
+				std::string *v;
+				while (i.next(k,v))
+					ht.erase(*k);
+			}
+			ref.clear();
+			if (ht.size() != ref.size()) {
+				std::cout << "FAILED! (clear by iterate, " << ht.size() << ")" << std::endl;
+				return -1;
+			}
+		}
+	}
+	std::cout << "PASS" << std::endl;
+
 	std::cout << "[other] Testing hex encode/decode... "; std::cout.flush();
 	std::cout << "[other] Testing hex encode/decode... "; std::cout.flush();
 	for(unsigned int k=0;k<1000;++k) {
 	for(unsigned int k=0;k<1000;++k) {
 		unsigned int flen = (rand() % 8194) + 1;
 		unsigned int flen = (rand() % 8194) + 1;
@@ -909,9 +1023,9 @@ int main(int argc,char **argv)
 	srand((unsigned int)time(0));
 	srand((unsigned int)time(0));
 
 
 	r |= testSqliteNetworkController();
 	r |= testSqliteNetworkController();
+	r |= testOther();
 	r |= testCrypto();
 	r |= testCrypto();
 	r |= testPacket();
 	r |= testPacket();
-	r |= testOther();
 	r |= testIdentity();
 	r |= testIdentity();
 	r |= testCertificate();
 	r |= testCertificate();
 	r |= testPhy();
 	r |= testPhy();