Browse Source

Fix a deadlock in federation/upstream code.

Adam Ierymenko 8 years ago
parent
commit
cbaef66e82
2 changed files with 66 additions and 65 deletions
  1. 27 21
      node/Node.cpp
  2. 39 44
      node/Topology.cpp

+ 27 - 21
node/Node.cpp

@@ -174,12 +174,14 @@ ZT_ResultCode Node::processVirtualNetworkFrame(
 	} else return ZT_RESULT_ERROR_NETWORK_NOT_FOUND;
 	} else return ZT_RESULT_ERROR_NETWORK_NOT_FOUND;
 }
 }
 
 
+// Closure used to ping upstream and active/online peers
 class _PingPeersThatNeedPing
 class _PingPeersThatNeedPing
 {
 {
 public:
 public:
-	_PingPeersThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) :
+	_PingPeersThatNeedPing(const RuntimeEnvironment *renv,const std::vector<Address> &upstreams,uint64_t now) :
 		lastReceiveFromUpstream(0),
 		lastReceiveFromUpstream(0),
 		RR(renv),
 		RR(renv),
+		_upstreams(upstreams),
 		_now(now),
 		_now(now),
 		_world(RR->topology->world())
 		_world(RR->topology->world())
 	{
 	{
@@ -189,29 +191,25 @@ public:
 
 
 	inline void operator()(Topology &t,const SharedPtr<Peer> &p)
 	inline void operator()(Topology &t,const SharedPtr<Peer> &p)
 	{
 	{
-		bool upstream = false;
-		InetAddress stableEndpoint4,stableEndpoint6;
-
-		// If this is a world root, pick (if possible) both an IPv4 and an IPv6 stable endpoint to use if link isn't currently alive.
-		for(std::vector<World::Root>::const_iterator r(_world.roots().begin());r!=_world.roots().end();++r) {
-			if (r->identity == p->identity()) {
-				upstream = true;
-				for(unsigned long k=0,ptr=(unsigned long)RR->node->prng();k<(unsigned long)r->stableEndpoints.size();++k) {
-					const InetAddress &addr = r->stableEndpoints[ptr++ % r->stableEndpoints.size()];
-					if (!stableEndpoint4) {
-						if (addr.ss_family == AF_INET)
-							stableEndpoint4 = addr;
-					}
-					if (!stableEndpoint6) {
-						if (addr.ss_family == AF_INET6)
-							stableEndpoint6 = addr;
+		if (std::find(_upstreams.begin(),_upstreams.end(),p->address()) != _upstreams.end()) {
+			InetAddress stableEndpoint4,stableEndpoint6;
+			for(std::vector<World::Root>::const_iterator r(_world.roots().begin());r!=_world.roots().end();++r) {
+				if (r->identity == p->identity()) {
+					for(unsigned long k=0,ptr=(unsigned long)RR->node->prng();k<(unsigned long)r->stableEndpoints.size();++k) {
+						const InetAddress &addr = r->stableEndpoints[ptr++ % r->stableEndpoints.size()];
+						if (!stableEndpoint4) {
+							if (addr.ss_family == AF_INET)
+								stableEndpoint4 = addr;
+						}
+						if (!stableEndpoint6) {
+							if (addr.ss_family == AF_INET6)
+								stableEndpoint6 = addr;
+						}
 					}
 					}
+					break;
 				}
 				}
-				break;
 			}
 			}
-		}
 
 
-		if (upstream) {
 			// We keep connections to upstream peers alive forever.
 			// We keep connections to upstream peers alive forever.
 			bool needToContactIndirect = true;
 			bool needToContactIndirect = true;
 			if (p->doPingAndKeepalive(_now,AF_INET)) {
 			if (p->doPingAndKeepalive(_now,AF_INET)) {
@@ -246,6 +244,7 @@ public:
 
 
 private:
 private:
 	const RuntimeEnvironment *RR;
 	const RuntimeEnvironment *RR;
+	const std::vector<Address> &_upstreams;
 	uint64_t _now;
 	uint64_t _now;
 	World _world;
 	World _world;
 };
 };
@@ -274,8 +273,15 @@ ZT_ResultCode Node::processBackgroundTasks(uint64_t now,volatile uint64_t *nextB
 			for(std::vector< SharedPtr<Network> >::const_iterator n(needConfig.begin());n!=needConfig.end();++n)
 			for(std::vector< SharedPtr<Network> >::const_iterator n(needConfig.begin());n!=needConfig.end();++n)
 				(*n)->requestConfiguration();
 				(*n)->requestConfiguration();
 
 
+			// Run WHOIS on upstreams we don't know about
+			const std::vector<Address> upstreams(RR->topology->upstreamAddresses());
+			for(std::vector<Address>::const_iterator a(upstreams.begin());a!=upstreams.end();++a) {
+				if (!RR->topology->getPeer(*a))
+					RR->sw->requestWhois(*a);
+			}
+
 			// Do pings and keepalives
 			// Do pings and keepalives
-			_PingPeersThatNeedPing pfunc(RR,now);
+			_PingPeersThatNeedPing pfunc(RR,upstreams,now);
 			RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
 			RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
 
 
 			// Update online status, post status change as event
 			// Update online status, post status change as event

+ 39 - 44
node/Topology.cpp

@@ -191,32 +191,23 @@ SharedPtr<Peer> Topology::getUpstreamPeer(const Address *avoid,unsigned int avoi
 
 
 		for(std::vector<Address>::const_iterator a(_upstreamAddresses.begin());a!=_upstreamAddresses.end();++a) {
 		for(std::vector<Address>::const_iterator a(_upstreamAddresses.begin());a!=_upstreamAddresses.end();++a) {
 			const SharedPtr<Peer> *p = _peers.get(*a);
 			const SharedPtr<Peer> *p = _peers.get(*a);
-
-			if (!p) {
-				const Identity id(_getIdentity(*a));
-				if (id) {
-					p = &(_peers.set(*a,SharedPtr<Peer>(new Peer(RR,RR->identity,id))));
-				} else {
-					RR->sw->requestWhois(*a);
+			if (p) {
+				bool avoiding = false;
+				for(unsigned int i=0;i<avoidCount;++i) {
+					if (avoid[i] == (*p)->address()) {
+						avoiding = true;
+						break;
+					}
 				}
 				}
-				continue; // always skip since even if we loaded it, it's not going to be ready
-			}
-
-			bool avoiding = false;
-			for(unsigned int i=0;i<avoidCount;++i) {
-				if (avoid[i] == (*p)->address()) {
-					avoiding = true;
-					break;
+				const unsigned int q = (*p)->relayQuality(now);
+				if (q <= bestQualityOverall) {
+					bestQualityOverall = q;
+					bestOverall = &(*p);
+				}
+				if ((!avoiding)&&(q <= bestQualityNotAvoid)) {
+					bestQualityNotAvoid = q;
+					bestNotAvoid = &(*p);
 				}
 				}
-			}
-			const unsigned int q = (*p)->relayQuality(now);
-			if (q <= bestQualityOverall) {
-				bestQualityOverall = q;
-				bestOverall = &(*p);
-			}
-			if ((!avoiding)&&(q <= bestQualityNotAvoid)) {
-				bestQualityNotAvoid = q;
-				bestNotAvoid = &(*p);
 			}
 			}
 		}
 		}
 
 
@@ -245,31 +236,35 @@ bool Topology::isUpstream(const Identity &id) const
 
 
 void Topology::setUpstream(const Address &a,bool upstream)
 void Topology::setUpstream(const Address &a,bool upstream)
 {
 {
-	Mutex::Lock _l(_lock);
-	if (std::find(_rootAddresses.begin(),_rootAddresses.end(),a) == _rootAddresses.end()) {
-		if (upstream) {
-			if (std::find(_upstreamAddresses.begin(),_upstreamAddresses.end(),a) == _upstreamAddresses.end()) {
-				_upstreamAddresses.push_back(a);
-
-				const SharedPtr<Peer> *p = _peers.get(a);
-				if (!p) {
-					const Identity id(_getIdentity(a));
-					if (id) {
-						_peers.set(a,SharedPtr<Peer>(new Peer(RR,RR->identity,id)));
-					} else {
-						RR->sw->requestWhois(a);
+	bool needWhois = false;
+	{
+		Mutex::Lock _l(_lock);
+		if (std::find(_rootAddresses.begin(),_rootAddresses.end(),a) == _rootAddresses.end()) {
+			if (upstream) {
+				if (std::find(_upstreamAddresses.begin(),_upstreamAddresses.end(),a) == _upstreamAddresses.end()) {
+					_upstreamAddresses.push_back(a);
+					const SharedPtr<Peer> *p = _peers.get(a);
+					if (!p) {
+						const Identity id(_getIdentity(a));
+						if (id) {
+							_peers.set(a,SharedPtr<Peer>(new Peer(RR,RR->identity,id)));
+						} else {
+							needWhois = true; // need to do this later due to _lock
+						}
 					}
 					}
 				}
 				}
+			} else {
+				std::vector<Address> ua;
+				for(std::vector<Address>::iterator i(_upstreamAddresses.begin());i!=_upstreamAddresses.end();++i) {
+					if (a != *i)
+						ua.push_back(*i);
+				}
+				_upstreamAddresses.swap(ua);
 			}
 			}
-		} else {
-			std::vector<Address> ua;
-			for(std::vector<Address>::iterator i(_upstreamAddresses.begin());i!=_upstreamAddresses.end();++i) {
-				if (a != *i)
-					ua.push_back(*i);
-			}
-			_upstreamAddresses.swap(ua);
 		}
 		}
 	}
 	}
+	if (needWhois)
+		RR->sw->requestWhois(a);
 }
 }
 
 
 bool Topology::worldUpdateIfValid(const World &newWorld)
 bool Topology::worldUpdateIfValid(const World &newWorld)