ソースを参照

central controller metrics & request path updates (#2012)

* internal db metrics

* use shared mutexes for read/write locks

* remove this lock. only used for a metric

* more metrics

* remove exploratory metrics

place controller request benchmarks behind ifdef
Grant Limberg 2 年 前
コミット
17f6b3a10b

+ 28 - 21
controller/DB.cpp

@@ -108,16 +108,17 @@ DB::~DB() {}
 bool DB::get(const uint64_t networkId,nlohmann::json &network)
 {
 	waitForReady();
+	Metrics::db_get_network++;
 	std::shared_ptr<_Network> nw;
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		auto nwi = _networks.find(networkId);
 		if (nwi == _networks.end())
 			return false;
 		nw = nwi->second;
 	}
 	{
-		std::lock_guard<std::mutex> l2(nw->lock);
+		std::shared_lock<std::shared_mutex> l2(nw->lock);
 		network = nw->config;
 	}
 	return true;
@@ -126,16 +127,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network)
 bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member)
 {
 	waitForReady();
+	Metrics::db_get_network_and_member++;
 	std::shared_ptr<_Network> nw;
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		auto nwi = _networks.find(networkId);
 		if (nwi == _networks.end())
 			return false;
 		nw = nwi->second;
 	}
 	{
-		std::lock_guard<std::mutex> l2(nw->lock);
+		std::shared_lock<std::shared_mutex> l2(nw->lock);
 		network = nw->config;
 		auto m = nw->members.find(memberId);
 		if (m == nw->members.end())
@@ -148,16 +150,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t mem
 bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member,NetworkSummaryInfo &info)
 {
 	waitForReady();
+	Metrics::db_get_network_and_member_and_summary++;
 	std::shared_ptr<_Network> nw;
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		auto nwi = _networks.find(networkId);
 		if (nwi == _networks.end())
 			return false;
 		nw = nwi->second;
 	}
 	{
-		std::lock_guard<std::mutex> l2(nw->lock);
+		std::shared_lock<std::shared_mutex> l2(nw->lock);
 		network = nw->config;
 		_fillSummaryInfo(nw,info);
 		auto m = nw->members.find(memberId);
@@ -171,16 +174,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t mem
 bool DB::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohmann::json> &members)
 {
 	waitForReady();
+	Metrics::db_get_member_list++;
 	std::shared_ptr<_Network> nw;
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		auto nwi = _networks.find(networkId);
 		if (nwi == _networks.end())
 			return false;
 		nw = nwi->second;
 	}
 	{
-		std::lock_guard<std::mutex> l2(nw->lock);
+		std::shared_lock<std::shared_mutex> l2(nw->lock);
 		network = nw->config;
 		for(auto m=nw->members.begin();m!=nw->members.end();++m) {
 			members.push_back(m->second);
@@ -192,13 +196,15 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohma
 void DB::networks(std::set<uint64_t> &networks)
 {
 	waitForReady();
-	std::lock_guard<std::mutex> l(_networks_l);
+	Metrics::db_get_member_list++;
+	std::shared_lock<std::shared_mutex> l(_networks_l);
 	for(auto n=_networks.begin();n!=_networks.end();++n)
 		networks.insert(n->first);
 }
 
 void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners)
 {
+	Metrics::db_member_change++;
 	uint64_t memberId = 0;
 	uint64_t networkId = 0;
 	bool isAuth = false;
@@ -210,14 +216,14 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 		networkId = OSUtils::jsonIntHex(old["nwid"],0ULL);
 		if ((memberId)&&(networkId)) {
 			{
-				std::lock_guard<std::mutex> l(_networks_l);
+				std::unique_lock<std::shared_mutex> l(_networks_l);
 				auto nw2 = _networks.find(networkId);
 				if (nw2 != _networks.end()) {
 					nw = nw2->second;
 				}
 			}
 			if (nw) {
-				std::lock_guard<std::mutex> l(nw->lock);
+				std::unique_lock<std::shared_mutex> l(nw->lock);
 				if (OSUtils::jsonBool(old["activeBridge"],false)) {
 					nw->activeBridgeMembers.erase(memberId);
 				}
@@ -247,7 +253,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 			networkId = OSUtils::jsonIntHex(memberConfig["nwid"],0ULL);
 			if ((!memberId)||(!networkId))
 				return;
-			std::lock_guard<std::mutex> l(_networks_l);
+			std::unique_lock<std::shared_mutex> l(_networks_l);
 			std::shared_ptr<_Network> &nw2 = _networks[networkId];
 			if (!nw2)
 				nw2.reset(new _Network);
@@ -255,7 +261,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 		}
 
 		{
-			std::lock_guard<std::mutex> l(nw->lock);
+			std::unique_lock<std::shared_mutex> l(nw->lock);
 
 			nw->members[memberId] = memberConfig;
 
@@ -288,18 +294,18 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 		}
 
 		if (notifyListeners) {
-			std::lock_guard<std::mutex> ll(_changeListeners_l);
+			std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
 			for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
 				(*i)->onNetworkMemberUpdate(this,networkId,memberId,memberConfig);
 			}
 		}
 	} else if (memberId) {
 		if (nw) {
-			std::lock_guard<std::mutex> l(nw->lock);
+			std::unique_lock<std::shared_mutex> l(nw->lock);
 			nw->members.erase(memberId);
 		}
 		if (networkId) {
-			std::lock_guard<std::mutex> l(_networks_l);
+			std::unique_lock<std::shared_mutex> l(_networks_l);
 			auto er = _networkByMember.equal_range(memberId);
 			for(auto i=er.first;i!=er.second;++i) {
 				if (i->second == networkId) {
@@ -329,7 +335,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 	}
 
 	if ((notifyListeners)&&((wasAuth)&&(!isAuth)&&(networkId)&&(memberId))) {
-		std::lock_guard<std::mutex> ll(_changeListeners_l);
+		std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
 		for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
 			(*i)->onNetworkMemberDeauthorize(this,networkId,memberId);
 		}
@@ -338,6 +344,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
 
 void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners)
 {
+	Metrics::db_network_change++;
 	if (notifyListeners) {
 		if (old.is_object() && old.contains("id") && networkConfig.is_object() && networkConfig.contains("id")) {
 			Metrics::network_changes++;
@@ -354,18 +361,18 @@ void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool
 		if (networkId) {
 			std::shared_ptr<_Network> nw;
 			{
-				std::lock_guard<std::mutex> l(_networks_l);
+				std::unique_lock<std::shared_mutex> l(_networks_l);
 				std::shared_ptr<_Network> &nw2 = _networks[networkId];
 				if (!nw2)
 					nw2.reset(new _Network);
 				nw = nw2;
 			}
 			{
-				std::lock_guard<std::mutex> l2(nw->lock);
+				std::unique_lock<std::shared_mutex> l2(nw->lock);
 				nw->config = networkConfig;
 			}
 			if (notifyListeners) {
-				std::lock_guard<std::mutex> ll(_changeListeners_l);
+				std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
 				for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
 					(*i)->onNetworkUpdate(this,networkId,networkConfig);
 				}
@@ -375,7 +382,7 @@ void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool
 		const std::string ids = old["id"];
 		const uint64_t networkId = Utils::hexStrToU64(ids.c_str());
 		if (networkId) {
-			std::lock_guard<std::mutex> l(_networks_l);
+			std::unique_lock<std::shared_mutex> l(_networks_l);
 			_networks.erase(networkId);
 		}
 	}

+ 7 - 7
controller/DB.hpp

@@ -29,7 +29,7 @@
 #include <unordered_set>
 #include <vector>
 #include <atomic>
-#include <mutex>
+#include <shared_mutex>
 #include <set>
 #include <map>
 
@@ -109,7 +109,7 @@ public:
 
 	inline bool hasNetwork(const uint64_t networkId) const
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		return (_networks.find(networkId) != _networks.end());
 	}
 
@@ -124,7 +124,7 @@ public:
 	inline void each(F f)
 	{
 		nlohmann::json nullJson;
-		std::lock_guard<std::mutex> lck(_networks_l);
+		std::unique_lock<std::shared_mutex> lck(_networks_l);
 		for(auto nw=_networks.begin();nw!=_networks.end();++nw) {
 			f(nw->first,nw->second->config,0,nullJson); // first provide network with 0 for member ID
 			for(auto m=nw->second->members.begin();m!=nw->second->members.end();++m) {
@@ -142,7 +142,7 @@ public:
 
 	inline void addListener(DB::ChangeListener *const listener)
 	{
-		std::lock_guard<std::mutex> l(_changeListeners_l);
+		std::unique_lock<std::shared_mutex> l(_changeListeners_l);
 		_changeListeners.push_back(listener);
 	}
 
@@ -178,7 +178,7 @@ protected:
 		std::unordered_set<uint64_t> authorizedMembers;
 		std::unordered_set<InetAddress,InetAddress::Hasher> allocatedIps;
 		int64_t mostRecentDeauthTime;
-		std::mutex lock;
+		std::shared_mutex lock;
 	};
 
 	virtual void _memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners);
@@ -188,8 +188,8 @@ protected:
 	std::vector<DB::ChangeListener *> _changeListeners;
 	std::unordered_map< uint64_t,std::shared_ptr<_Network> > _networks;
 	std::unordered_multimap< uint64_t,uint64_t > _networkByMember;
-	mutable std::mutex _changeListeners_l;
-	mutable std::mutex _networks_l;
+	mutable std::shared_mutex _changeListeners_l;
+	mutable std::shared_mutex _networks_l;
 };
 
 } // namespace ZeroTier

+ 16 - 16
controller/DBMirrorSet.cpp

@@ -32,7 +32,7 @@ DBMirrorSet::DBMirrorSet(DB::ChangeListener *listener)
 
 			std::vector< std::shared_ptr<DB> > dbs;
 			{
-				std::lock_guard<std::mutex> l(_dbs_l);
+				std::unique_lock<std::shared_mutex> l(_dbs_l);
 				if (_dbs.size() <= 1)
 					continue; // no need to do this if there's only one DB, so skip the iteration
 				dbs = _dbs;
@@ -79,7 +79,7 @@ DBMirrorSet::~DBMirrorSet()
 
 bool DBMirrorSet::hasNetwork(const uint64_t networkId) const
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if ((*d)->hasNetwork(networkId))
 			return true;
@@ -89,7 +89,7 @@ bool DBMirrorSet::hasNetwork(const uint64_t networkId) const
 
 bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if ((*d)->get(networkId,network)) {
 			return true;
@@ -100,7 +100,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network)
 
 bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if ((*d)->get(networkId,network,memberId,member))
 			return true;
@@ -110,7 +110,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uin
 
 bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member,DB::NetworkSummaryInfo &info)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if ((*d)->get(networkId,network,memberId,member,info))
 			return true;
@@ -120,7 +120,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uin
 
 bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohmann::json> &members)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if ((*d)->get(networkId,network,members))
 			return true;
@@ -130,7 +130,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,std::vect
 
 AuthInfo DBMirrorSet::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL) 
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) { 
 		AuthInfo info = (*d)->getSSOAuthInfo(member, redirectURL);
 		if (info.enabled) {
@@ -142,7 +142,7 @@ AuthInfo DBMirrorSet::getSSOAuthInfo(const nlohmann::json &member, const std::st
 
 void DBMirrorSet::networks(std::set<uint64_t> &networks)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		(*d)->networks(networks);
 	}
@@ -151,7 +151,7 @@ void DBMirrorSet::networks(std::set<uint64_t> &networks)
 bool DBMirrorSet::waitForReady()
 {
 	bool r = false;
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		r |= (*d)->waitForReady();
 	}
@@ -160,7 +160,7 @@ bool DBMirrorSet::waitForReady()
 
 bool DBMirrorSet::isReady()
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if (!(*d)->isReady())
 			return false;
@@ -172,7 +172,7 @@ bool DBMirrorSet::save(nlohmann::json &record,bool notifyListeners)
 {
 	std::vector< std::shared_ptr<DB> > dbs;
 	{
-		std::lock_guard<std::mutex> l(_dbs_l);
+		std::unique_lock<std::shared_mutex> l(_dbs_l);
 		dbs = _dbs;
 	}
 	if (notifyListeners) {
@@ -192,7 +192,7 @@ bool DBMirrorSet::save(nlohmann::json &record,bool notifyListeners)
 
 void DBMirrorSet::eraseNetwork(const uint64_t networkId)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::unique_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		(*d)->eraseNetwork(networkId);
 	}
@@ -200,7 +200,7 @@ void DBMirrorSet::eraseNetwork(const uint64_t networkId)
 
 void DBMirrorSet::eraseMember(const uint64_t networkId,const uint64_t memberId)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::unique_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		(*d)->eraseMember(networkId,memberId);
 	}
@@ -208,7 +208,7 @@ void DBMirrorSet::eraseMember(const uint64_t networkId,const uint64_t memberId)
 
 void DBMirrorSet::nodeIsOnline(const uint64_t networkId,const uint64_t memberId,const InetAddress &physicalAddress)
 {
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::shared_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		(*d)->nodeIsOnline(networkId,memberId,physicalAddress);
 	}
@@ -217,7 +217,7 @@ void DBMirrorSet::nodeIsOnline(const uint64_t networkId,const uint64_t memberId,
 void DBMirrorSet::onNetworkUpdate(const void *db,uint64_t networkId,const nlohmann::json &network)
 {
 	nlohmann::json record(network);
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::unique_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if (d->get() != db) {
 			(*d)->save(record,false);
@@ -229,7 +229,7 @@ void DBMirrorSet::onNetworkUpdate(const void *db,uint64_t networkId,const nlohma
 void DBMirrorSet::onNetworkMemberUpdate(const void *db,uint64_t networkId,uint64_t memberId,const nlohmann::json &member)
 {
 	nlohmann::json record(member);
-	std::lock_guard<std::mutex> l(_dbs_l);
+	std::unique_lock<std::shared_mutex> l(_dbs_l);
 	for(auto d=_dbs.begin();d!=_dbs.end();++d) {
 		if (d->get() != db) {
 			(*d)->save(record,false);

+ 3 - 3
controller/DBMirrorSet.hpp

@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include <mutex>
+#include <shared_mutex>
 #include <set>
 #include <thread>
 
@@ -56,7 +56,7 @@ public:
 	inline void addDB(const std::shared_ptr<DB> &db)
 	{
 		db->addListener(this);
-		std::lock_guard<std::mutex> l(_dbs_l);
+		std::unique_lock<std::shared_mutex> l(_dbs_l);
 		_dbs.push_back(db);
 	}
 
@@ -65,7 +65,7 @@ private:
 	std::atomic_bool _running;
 	std::thread _syncCheckerThread;
 	std::vector< std::shared_ptr< DB > > _dbs;
-	mutable std::mutex _dbs_l;
+	mutable std::shared_mutex _dbs_l;
 };
 
 } // namespace ZeroTier

+ 170 - 23
controller/EmbeddedNetworkController.cpp

@@ -460,17 +460,41 @@ static bool _parseRule(json &r,ZT_VirtualNetworkRule &rule)
 
 } // anonymous namespace
 
-EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPath,const char *dbPath, int listenPort, RedisConfig *rc) :
-	_startTime(OSUtils::now()),
-	_listenPort(listenPort),
-	_node(node),
-	_ztPath(ztPath),
-	_path(dbPath),
-	_sender((NetworkController::Sender *)0),
-	_db(this),
-	_ssoExpiryRunning(true),
-	_ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this)),
-	_rc(rc)
+EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPath,const char *dbPath, int listenPort, RedisConfig *rc) 
+	: _startTime(OSUtils::now())
+	, _listenPort(listenPort)
+	, _node(node)
+	, _ztPath(ztPath)
+	, _path(dbPath)
+	, _sender((NetworkController::Sender *)0)
+	, _db(this)
+	, _ssoExpiryRunning(true)
+	, _ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this))
+	, _rc(rc)
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK	
+	, _member_status_lookup{"nc_member_status_lookup",""}
+	, _member_status_lookup_count{"nc_member_status_lookup_count",""}
+	, _node_is_online{"nc_node_is_online",""}
+	, _node_is_online_count{"nc_node_is_online_count",""}
+	, _get_and_init_member{"nc_get_and_init_member",""}
+	, _get_and_init_member_count{"nc_get_and_init_member_count",""}
+	, _have_identity{"nc_have_identity",""}
+	, _have_identity_count{"nc_have_identity_count",""}
+	, _determine_auth{"nc_determine_auth",""}
+	, _determine_auth_count{"nc_determine_auth_count",""}
+	, _sso_check{"nc_sso_check",""}
+	, _sso_check_count{"nc_sso_check_count",""}
+	, _auth_check{"nc_auth_check",""}
+	, _auth_check_count{"nc_auth_check_count",""}
+	, _json_schlep{"nc_json_schlep",""}
+	, _json_schlep_count{"nc_json_schlep_count",""}
+	, _issue_certificate{"nc_issue_certificate", ""}
+	, _issue_certificate_count{"nc_issue_certificate_count",""}
+	, _save_member{"nc_save_member",""}
+	, _save_member_count{"nc_save_member_count",""}
+	, _send_netconf{"nc_send_netconf2",""}
+	, _send_netconf_count{"nc_send_netconf2_count",""}
+#endif
 {
 }
 
@@ -549,6 +573,18 @@ void EmbeddedNetworkController::request(
 	if (((!_signingId)||(!_signingId.hasPrivate()))||(_signingId.address().toInt() != (nwid >> 24))||(!_sender))
 		return;
 	_startThreads();
+
+	const int64_t now = OSUtils::now();
+
+	if (requestPacketId) {
+		std::lock_guard<std::mutex> l(_memberStatus_l);
+		_MemberStatus &ms = _memberStatus[_MemberStatusKey(nwid,identity.address().toInt())];
+		if ((now - ms.lastRequestTime) <= ZT_NETCONF_MIN_REQUEST_PERIOD) {
+			return;
+		}
+		ms.lastRequestTime = now;
+	}
+
 	_RQEntry *qe = new _RQEntry;
 	qe->nwid = nwid;
 	qe->requestPacketId = requestPacketId;
@@ -1176,36 +1212,66 @@ void EmbeddedNetworkController::_request(
 	const Identity &identity,
 	const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData)
 {
+	Metrics::network_config_request++;
+	auto tid = std::this_thread::get_id();
+	std::stringstream ss; ss << tid;
+	std::string threadID = ss.str();
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	auto b1 = _member_status_lookup.Add({{"thread", threadID}});
+	auto c1 = _member_status_lookup_count.Add({{"thread", threadID}});
+	c1++;
+	b1.start();
+#endif
+
 	char nwids[24];
 	DB::NetworkSummaryInfo ns;
 	json network,member;
 
-	if (((!_signingId)||(!_signingId.hasPrivate()))||(_signingId.address().toInt() != (nwid >> 24))||(!_sender))
+	if (((!_signingId)||(!_signingId.hasPrivate()))||(_signingId.address().toInt() != (nwid >> 24))||(!_sender)) {
 		return;
+	}
 
 	const int64_t now = OSUtils::now();
 
-	if (requestPacketId) {
-		std::lock_guard<std::mutex> l(_memberStatus_l);
-		_MemberStatus &ms = _memberStatus[_MemberStatusKey(nwid,identity.address().toInt())];
-		if ((now - ms.lastRequestTime) <= ZT_NETCONF_MIN_REQUEST_PERIOD)
-			return;
-		ms.lastRequestTime = now;
-	}
-
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b1.stop();
+	auto b2 = _node_is_online.Add({{"thread",threadID}});
+	auto c2 = _node_is_online_count.Add({{"thread",threadID}});
+	c2++;
+	b2.start();
+#endif
 	_db.nodeIsOnline(nwid,identity.address().toInt(),fromAddr);
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b2.stop();
 
+	auto b3 = _get_and_init_member.Add({{"thread", threadID}});
+	auto c3 = _get_and_init_member_count.Add({{"thread",threadID}});
+	c3++;
+	b3.start();
+#endif
 	Utils::hex(nwid,nwids);
 	_db.get(nwid,network,identity.address().toInt(),member,ns);
 	if ((!network.is_object())||(network.empty())) {
 		_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_OBJECT_NOT_FOUND, nullptr, 0);
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+		b3.stop();
+#endif
 		return;
 	}
 	const bool newMember = ((!member.is_object())||(member.empty()));
 	DB::initMember(member);
 	_MemberStatusKey msk(nwid,identity.address().toInt());
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b3.stop();
+#endif
 
 	{
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+		auto b4 = _have_identity.Add({{"thread",threadID}});
+		auto c4 = _have_identity_count.Add({{"thread",threadID}});
+		c4++;
+		b4.start();
+#endif
 		const std::string haveIdStr(OSUtils::jsonString(member["identity"],""));
 		if (haveIdStr.length() > 0) {
 			// If we already know this member's identity perform a full compare. This prevents
@@ -1214,10 +1280,16 @@ void EmbeddedNetworkController::_request(
 			try {
 				if (Identity(haveIdStr.c_str()) != identity) {
 					_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
+					#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+					b4.stop();
+					#endif
 					return;
 				}
 			} catch ( ... ) {
 				_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
+				#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+				b4.stop();
+				#endif
 				return;
 			}
 		} else {
@@ -1225,6 +1297,9 @@ void EmbeddedNetworkController::_request(
 			char idtmp[1024];
 			member["identity"] = identity.toString(false,idtmp);
 		}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+		b4.stop();
+#endif
 	}
 
 	// These are always the same, but make sure they are set
@@ -1237,6 +1312,12 @@ void EmbeddedNetworkController::_request(
 	}
 
 	// Determine whether and how member is authorized
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	auto b5 = _determine_auth.Add({{"thread",threadID}});
+	auto c5 = _determine_auth_count.Add({{"thread",threadID}});
+	c5++;
+	b5.start();
+#endif
 	bool authorized = false;
 	bool autoAuthorized = false;
 	json autoAuthCredentialType,autoAuthCredential;
@@ -1273,10 +1354,19 @@ void EmbeddedNetworkController::_request(
 		member["lastAuthorizedCredentialType"] = autoAuthCredentialType;
 		member["lastAuthorizedCredential"] = autoAuthCredential;
 	}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b5.stop();
+#endif
 
 	// Should we check SSO Stuff?
 	// If network is configured with SSO, and the member is not marked exempt: yes
 	// Otherwise no, we use standard auth logic.
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	auto b6 = _sso_check.Add({{"thread",threadID}});
+	auto c6 = _sso_check_count.Add({{"thread",threadID}});
+	c6++;
+	b6.start();
+#endif
 	AuthInfo info;
 	int64_t authenticationExpiryTime = -1;
 	bool networkSSOEnabled = OSUtils::jsonBool(network["ssoEnabled"], false);
@@ -1303,10 +1393,20 @@ void EmbeddedNetworkController::_request(
 			}
 			DB::cleanMember(member);
 			_db.save(member,true);
+			#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+			b6.stop();
+			#endif
 			return;
 		}
 	}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b6.stop();
 
+	auto b7 = _auth_check.Add({{"thread",threadID}});
+	auto c7 = _auth_check_count.Add({{"thread",threadID}});
+	c7++;
+	b7.start();
+#endif
 	if (authorized) {
 		// Update version info and meta-data if authorized and if this is a genuine request
 		if (requestPacketId) {
@@ -1342,8 +1442,14 @@ void EmbeddedNetworkController::_request(
 		DB::cleanMember(member);
 		_db.save(member,true);
 		_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
+		#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+		b7.stop();
+		#endif
 		return;
 	}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b7.stop();
+#endif
 
 	// -------------------------------------------------------------------------
 	// If we made it this far, they are authorized (and authenticated).
@@ -1351,6 +1457,12 @@ void EmbeddedNetworkController::_request(
 
 	// Default timeout: 15 minutes. Maximum: two hours. Can be specified by an optional field in the network config
 	// if something longer than 15 minutes is desired. Minimum is 5 minutes since shorter than that would be flaky.
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	auto b8 = _json_schlep.Add({{"thread",threadID}});
+	auto c8 = _json_schlep_count.Add({{"thread", threadID}});
+	c8++;
+	b8.start();
+#endif
 	int64_t credentialtmd = ZT_NETWORKCONFIG_DEFAULT_CREDENTIAL_TIME_DFL_MAX_DELTA;
 	if (network.contains("certificateTimeoutWindowSize")) {
 		credentialtmd = (int64_t)network["certificateTimeoutWindowSize"];
@@ -1418,8 +1530,9 @@ void EmbeddedNetworkController::_request(
 		nc->remoteTraceLevel = (Trace::Level)OSUtils::jsonInt(network["remoteTraceLevel"],0ULL);
 	}
 
-	for(std::vector<Address>::const_iterator ab(ns.activeBridges.begin());ab!=ns.activeBridges.end();++ab)
+	for(std::vector<Address>::const_iterator ab(ns.activeBridges.begin());ab!=ns.activeBridges.end();++ab) {
 		nc->addSpecialist(*ab,ZT_NETWORKCONFIG_SPECIALIST_TYPE_ACTIVE_BRIDGE);
+	}
 
 	json &v4AssignMode = network["v4AssignMode"];
 	json &v6AssignMode = network["v6AssignMode"];
@@ -1739,12 +1852,22 @@ void EmbeddedNetworkController::_request(
 	} else {
 		dns = json::object();
 	}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b8.stop();
+#endif
 
 	// Issue a certificate of ownership for all static IPs
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	auto b9 = _issue_certificate.Add({{"thread",threadID}});
+	auto c9 = _issue_certificate_count.Add({{"thread",threadID}});
+	c9++;
+	b9.start();
+#endif
 	if (nc->staticIpCount) {
 		nc->certificatesOfOwnership[0] = CertificateOfOwnership(nwid,now,identity.address(),1);
-		for(unsigned int i=0;i<nc->staticIpCount;++i)
+		for(unsigned int i=0;i<nc->staticIpCount;++i) {
 			nc->certificatesOfOwnership[0].addThing(nc->staticIps[i]);
+		}
 		nc->certificatesOfOwnership[0].sign(_signingId);
 		nc->certificateOfOwnershipCount = 1;
 	}
@@ -1754,22 +1877,45 @@ void EmbeddedNetworkController::_request(
 		nc->com = com;
 	} else {
 		_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_INTERNAL_SERVER_ERROR, nullptr, 0);
+		#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+		b9.stop();
+		#endif
 		return;
 	}
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b9.stop();
 
+	auto b10 = _save_member.Add({{"thread",threadID}});
+	auto c10 = _save_member_count.Add({{"thread",threadID}});
+	c10++;
+	b10.start();
+#endif
 	DB::cleanMember(member);
 	_db.save(member,true);
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b10.stop();
+
+	auto b11 = _send_netconf.Add({{"thread",threadID}});
+	auto c11 = _send_netconf_count.Add({{"thread",threadID}});
+	c11++;
+	b11.start();
+#endif
 	_sender->ncSendConfig(nwid,requestPacketId,identity.address(),*(nc.get()),metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_VERSION,0) < 6);
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	b11.stop();
+#endif
 }
 
 void EmbeddedNetworkController::_startThreads()
 {
 	std::lock_guard<std::mutex> l(_threads_l);
-	if (!_threads.empty())
+	if (!_threads.empty()) {
 		return;
+	}
 	const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1);
 	for(long t=0;t<hwc;++t) {
 		_threads.emplace_back([this]() {
+			Metrics::network_config_request_threads++;
 			for(;;) {
 				_RQEntry *qe = (_RQEntry *)0;
 				Metrics::network_config_request_queue_size = _queue.size();
@@ -1790,6 +1936,7 @@ void EmbeddedNetworkController::_startThreads()
 					}
 				}
 			}
+			Metrics::network_config_request_threads--;
 		});
 	}
 }

+ 25 - 0
controller/EmbeddedNetworkController.hpp

@@ -150,6 +150,31 @@ private:
 
 	RedisConfig *_rc;
 	std::string _ssoRedirectURL;
+
+#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
+	prometheus::simpleapi::benchmark_family_t _member_status_lookup;
+	prometheus::simpleapi::counter_family_t   _member_status_lookup_count;
+	prometheus::simpleapi::benchmark_family_t _node_is_online;
+	prometheus::simpleapi::counter_family_t   _node_is_online_count;
+	prometheus::simpleapi::benchmark_family_t _get_and_init_member;
+	prometheus::simpleapi::counter_family_t   _get_and_init_member_count;
+	prometheus::simpleapi::benchmark_family_t _have_identity;
+	prometheus::simpleapi::counter_family_t   _have_identity_count;
+	prometheus::simpleapi::benchmark_family_t _determine_auth;
+	prometheus::simpleapi::counter_family_t   _determine_auth_count;
+	prometheus::simpleapi::benchmark_family_t _sso_check;
+	prometheus::simpleapi::counter_family_t   _sso_check_count;
+	prometheus::simpleapi::benchmark_family_t _auth_check;
+	prometheus::simpleapi::counter_family_t   _auth_check_count;
+	prometheus::simpleapi::benchmark_family_t _json_schlep;
+	prometheus::simpleapi::counter_family_t   _json_schlep_count;
+	prometheus::simpleapi::benchmark_family_t _issue_certificate;
+	prometheus::simpleapi::counter_family_t   _issue_certificate_count;
+	prometheus::simpleapi::benchmark_family_t _save_member;
+	prometheus::simpleapi::counter_family_t   _save_member_count;
+	prometheus::simpleapi::benchmark_family_t _send_netconf;
+	prometheus::simpleapi::counter_family_t   _send_netconf_count;
+#endif
 };
 
 } // namespace ZeroTier

+ 20 - 9
controller/PostgreSQL.cpp

@@ -374,6 +374,7 @@ void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId,
 
 AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL)
 {
+	Metrics::db_get_sso_info++;
 	// NONCE is just a random character string.  no semantic meaning
 	// state = HMAC SHA384 of Nonce based on shared sso key
 	// 
@@ -388,9 +389,16 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
 	char authenticationURL[4096] = {0};
 	AuthInfo info;
 	info.enabled = true;
+
+	//if (memberId == "a10dccea52" && networkId == "8056c2e21c24673d") {
+	//	fprintf(stderr, "invalid authinfo for grant's machine\n");
+	//	info.version=1;
+	//	return info;
+	//}
 	// fprintf(stderr, "PostgreSQL::updateMemberOnLoad: %s-%s\n", networkId.c_str(), memberId.c_str());
+	std::shared_ptr<PostgresConnection> c;
 	try {
-		auto c = _pool->borrow();
+		c = _pool->borrow();
 		pqxx::work w(*c->c);
 
 		char nonceBytes[16] = {0};
@@ -462,11 +470,11 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
 			uint64_t sso_version = 0;
 
 			if (r.size() == 1) {
-				client_id = r.at(0)[0].as<std::string>();
-				authorization_endpoint = r.at(0)[1].as<std::string>();
-				issuer = r.at(0)[2].as<std::string>();
-				provider = r.at(0)[3].as<std::string>();
-				sso_version = r.at(0)[4].as<uint64_t>();
+				client_id = r.at(0)[0].as<std::optional<std::string>>().value_or("");
+				authorization_endpoint = r.at(0)[1].as<std::optional<std::string>>().value_or("");
+				issuer = r.at(0)[2].as<std::optional<std::string>>().value_or("");
+				provider = r.at(0)[3].as<std::optional<std::string>>().value_or("");
+				sso_version = r.at(0)[4].as<std::optional<uint64_t>>().value_or(1);
 			} else if (r.size() > 1) {
 				fprintf(stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n", networkId.c_str());
 			} else {
@@ -519,6 +527,9 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
 
 		_pool->unborrow(c);
 	} catch (std::exception &e) {
+		if (c) {
+			_pool->unborrow(c);
+		}
 		fprintf(stderr, "ERROR: Error updating member on load for network %s: %s\n", networkId.c_str(), e.what());
 	}
 
@@ -776,7 +787,7 @@ void PostgreSQL::initializeMembers()
 
 		if (_redisMemberStatus) {
 			fprintf(stderr, "Initialize Redis for members...\n");
-			std::lock_guard<std::mutex> l(_networks_l);
+			std::unique_lock<std::shared_mutex> l(_networks_l);
 			std::unordered_set<std::string> deletes;
 			for ( auto it : _networks) {
 				uint64_t nwid_i = it.first;
@@ -1270,6 +1281,7 @@ void PostgreSQL::commitThread()
 			continue;
 		}
 		
+		Metrics::pgsql_commit_ticks++;
 		try {
 			nlohmann::json &config = (qitem.first);
 			const std::string objtype = config["objtype"];
@@ -1596,7 +1608,6 @@ void PostgreSQL::commitThread()
 		}
 		_pool->unborrow(c);
 		c.reset();
-		std::this_thread::sleep_for(std::chrono::milliseconds(100));
 	}
 
 	fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str());
@@ -1812,7 +1823,7 @@ uint64_t PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &con
 						sw::redis::RightBoundedInterval<double>(expireOld,
 																sw::redis::BoundType::LEFT_OPEN));
 	{
-		std::lock_guard<std::mutex> l(_networks_l);
+		std::shared_lock<std::shared_mutex> l(_networks_l);
 		for (const auto &it : _networks) {
 			uint64_t nwid_i = it.first;
 			char nwidTmp[64];

+ 24 - 0
node/Metrics.cpp

@@ -215,6 +215,25 @@ namespace ZeroTier {
         prometheus::simpleapi::counter_metric_t sso_member_deauth
         { "controller_sso_timeouts", "number of sso timeouts" };
 
+        prometheus::simpleapi::counter_metric_t network_config_request
+        { "controller_network_config_request", "count of config requests handled" };
+        prometheus::simpleapi::gauge_metric_t network_config_request_threads
+        { "controller_network_config_request_threads", "number of active network config handling threads" };
+        prometheus::simpleapi::counter_metric_t db_get_network
+        { "controller_db_get_network", "counter" };
+        prometheus::simpleapi::counter_metric_t db_get_network_and_member
+        { "controller_db_get_network_and_member", "counter" };
+        prometheus::simpleapi::counter_metric_t db_get_network_and_member_and_summary
+        { "controller_db_get_networK_and_member_summary", "counter" };
+        prometheus::simpleapi::counter_metric_t db_get_member_list
+        { "controller_db_get_member_list", "counter" };
+        prometheus::simpleapi::counter_metric_t db_get_network_list
+        { "controller_db_get_network_list", "counter" };
+        prometheus::simpleapi::counter_metric_t db_member_change
+        { "controller_db_member_change", "counter" };
+        prometheus::simpleapi::counter_metric_t db_network_change
+        { "controller_db_network_change", "counter" };
+
 #ifdef ZT_CONTROLLER_USE_LIBPQ
         // Central Controller Metrics
         prometheus::simpleapi::counter_metric_t pgsql_mem_notification
@@ -223,6 +242,11 @@ namespace ZeroTier {
         { "controller_pgsql_network_notifications_received", "number of network change notifications received via pgsql NOTIFY" };
         prometheus::simpleapi::counter_metric_t pgsql_node_checkin
         { "controller_pgsql_node_checkin_count", "number of node check-ins (pgsql)" };
+        prometheus::simpleapi::counter_metric_t pgsql_commit_ticks
+        { "controller_pgsql_commit_ticks", "number of commit ticks run (pgsql)" };
+        prometheus::simpleapi::counter_metric_t db_get_sso_info
+        { "controller_db_get_sso_info", "counter" };
+
         prometheus::simpleapi::counter_metric_t redis_mem_notification
         { "controller_redis_member_notifications_received", "number of member change notifications received via redis" };
         prometheus::simpleapi::counter_metric_t redis_net_notification

+ 14 - 0
node/Metrics.hpp

@@ -126,12 +126,26 @@ namespace ZeroTier {
         extern prometheus::simpleapi::gauge_metric_t network_config_request_queue_size;
         extern prometheus::simpleapi::counter_metric_t sso_expiration_checks;
         extern prometheus::simpleapi::counter_metric_t sso_member_deauth;
+        extern prometheus::simpleapi::counter_metric_t network_config_request;
+        extern prometheus::simpleapi::gauge_metric_t network_config_request_threads;
+
+        extern prometheus::simpleapi::counter_metric_t db_get_network;
+        extern prometheus::simpleapi::counter_metric_t db_get_network_and_member;
+        extern prometheus::simpleapi::counter_metric_t db_get_network_and_member_and_summary;
+        extern prometheus::simpleapi::counter_metric_t db_get_member_list;
+        extern prometheus::simpleapi::counter_metric_t db_get_network_list;
+        extern prometheus::simpleapi::counter_metric_t db_member_change;
+        extern prometheus::simpleapi::counter_metric_t db_network_change;
+
 
 #ifdef ZT_CONTROLLER_USE_LIBPQ
         // Central Controller Metrics
         extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification;
         extern prometheus::simpleapi::counter_metric_t pgsql_net_notification;
         extern prometheus::simpleapi::counter_metric_t pgsql_node_checkin;
+        extern prometheus::simpleapi::counter_metric_t pgsql_commit_ticks;
+        extern prometheus::simpleapi::counter_metric_t db_get_sso_info;
+        
         extern prometheus::simpleapi::counter_metric_t redis_mem_notification;
         extern prometheus::simpleapi::counter_metric_t redis_net_notification;
         extern prometheus::simpleapi::counter_metric_t redis_node_checkin;

+ 0 - 1
osdep/BlockingQueue.hpp

@@ -117,7 +117,6 @@ public:
 	}
 
 	inline size_t size() const {
-		std::unique_lock<std::mutex> lock(m);
 		return q.size();
 	}