Procházet zdrojové kódy

Yet more cleanup to TCP logic, this time adding a master switch and adding UDP preference in send().

Adam Ierymenko před 11 roky
rodič
revize
8fb442d81a
6 změnil soubory, kde provedl 110 přidání a 76 odebrání
  1. 1 1
      node/Constants.hpp
  2. 19 3
      node/Node.cpp
  3. 41 61
      node/Peer.cpp
  4. 13 7
      node/Peer.hpp
  5. 4 0
      node/RuntimeEnvironment.hpp
  6. 32 4
      node/Topology.hpp

+ 1 - 1
node/Constants.hpp

@@ -313,7 +313,7 @@ error_no_byte_order_defined;
 #define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + ZT_PING_CHECK_DELAY)
 
 /**
- * Close TCP tunnels if unused for this long
+ * Close TCP tunnels if unused for this long (used in SocketManager)
  */
 #define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT
 

+ 19 - 3
node/Node.cpp

@@ -591,13 +591,29 @@ Node::ReasonForTermination Node::run()
 				LOG("resynchronize forced by user, syncing with network");
 			}
 
-			if (resynchronize)
+			if (resynchronize) {
+				_r->tcpTunnelingEnabled = false; // turn off TCP tunneling master switch at first
 				_r->timeOfLastResynchronize = now;
+			}
 
-			/* Ping supernodes separately, and do so more aggressively if we haven't
-			 * heard anything from anyone since our last resynchronize / startup. */
+			/* Supernodes are pinged separately and more aggressively. The
+			 * ZT_STARTUP_AGGRO parameter sets a limit on how rapidly they are
+			 * tried, while PingSupernodesThatNeedPing contains the logic for
+			 * determining if they need PING. */
 			if ((now - lastSupernodePingCheck) >= ZT_STARTUP_AGGRO) {
 				lastSupernodePingCheck = now;
+
+				uint64_t lastReceiveFromAnySupernode = 0; // function object result paramter
+				_r->topology->eachSupernodePeer(Topology::FindMostRecentDirectReceiveTimestamp(lastReceiveFromAnySupernode));
+
+				// Turn on TCP tunneling master switch if we haven't heard anything since before
+				// the last resynchronize and we've been trying long enough.
+				uint64_t tlr = _r->timeOfLastResynchronize;
+				if ((lastReceiveFromAnySupernode < tlr)&&((now - tlr) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT)) {
+					TRACE("network still unreachable after %u ms, TCP TUNNELING ENABLED",(unsigned int)ZT_TCP_TUNNEL_FAILOVER_TIMEOUT);
+					_r->tcpTunnelingEnabled = true;
+				}
+
 				_r->topology->eachSupernodePeer(Topology::PingSupernodesThatNeedPing(_r,now));
 			}
 

+ 41 - 61
node/Peer.cpp

@@ -120,35 +120,44 @@ void Peer::receive(
 bool Peer::send(const RuntimeEnvironment *_r,const void *data,unsigned int len,uint64_t now)
 {
 	Mutex::Lock _l(_lock);
-	bool useTcpOut = _isTcpFailoverTime(_r,now);
 
-	std::vector<Path>::iterator p(_paths.begin());
-	if (!useTcpOut) {
-		// If we don't want to initiate TCP, seek past TCP paths if they are at the front
-		// to find the first UDP path as our default.
-		while ((p != _paths.end())&&(p->type() == Path::PATH_TYPE_TCP_OUT))
-			++p;
-	}
-	if (p == _paths.end())
-		return false;
-
-	// Treat first path as default and look for a better one based on time of
-	// last packet received.
-	std::vector<Path>::iterator bestPath = p;
-	uint64_t bestPathLastReceived = p->lastReceived();
-	while (++p != _paths.end()) {
+	Path *bestNormalPath = (Path *)0;
+	Path *bestTcpOutPath = (Path *)0;
+	uint64_t bestNormalPathLastReceived = 0;
+	uint64_t bestTcpOutPathLastReceived = 0;
+	for(std::vector<Path>::iterator p(_paths.begin());p!=_paths.end();++p) {
 		uint64_t lr = p->lastReceived();
-		if ( (lr > bestPathLastReceived) && ((useTcpOut)||(p->type() != Path::PATH_TYPE_TCP_OUT)) ) {
-			bestPathLastReceived = lr;
-			bestPath = p;
+		if (p->type() == Path::PATH_TYPE_TCP_OUT) { // TCP_OUT paths initiate TCP connections
+			if (lr >= bestTcpOutPathLastReceived) {
+				bestTcpOutPathLastReceived = lr;
+				bestTcpOutPath = &(*p);
+			}
+		} else { // paths other than TCP_OUT are considered "normal"
+			if (lr >= bestNormalPathLastReceived) {
+				bestNormalPathLastReceived = lr;
+				bestNormalPath = &(*p);
+			}
 		}
 	}
 
+	Path *bestPath = (Path *)0;
+	if (!_r->tcpTunnelingEnabled) { // TCP tunneling master switch is off, use normal path
+		bestPath = bestNormalPath;
+	} else if (bestNormalPath) { // we have a normal path, so use if it looks active
+		if ((bestNormalPathLastReceived > _r->timeOfLastResynchronize)&&((now - bestNormalPathLastReceived) < ZT_PEER_PATH_ACTIVITY_TIMEOUT))
+			bestPath = bestNormalPath;
+		else bestPath = bestTcpOutPath;
+	} else { // no normal path available
+		bestPath = bestTcpOutPath;
+	}
+
+	if (!bestPath)
+		return false;
+
 	if (_r->sm->send(bestPath->address(),bestPath->tcp(),bestPath->type() == Path::PATH_TYPE_TCP_OUT,data,len)) {
 		bestPath->sent(now);
 		return true;
 	}
-
 	return false;
 }
 
@@ -170,7 +179,18 @@ bool Peer::sendPing(const RuntimeEnvironment *_r,uint64_t now)
 	bool sent = false;
 	SharedPtr<Peer> self(this);
 	Mutex::Lock _l(_lock);
-	bool useTcpOut = _isTcpFailoverTime(_r,now);
+
+	uint64_t lastUdpPingSent = 0;
+	uint64_t lastUdpReceive = 0;
+	bool haveUdp = false;
+	for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
+		if (p->type() == Path::PATH_TYPE_UDP) {
+			lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing());
+			lastUdpReceive = std::max(lastUdpReceive,p->lastReceived());
+			haveUdp = true;
+		}
+	}
+	bool useTcpOut = ( (!haveUdp) || ( (_r->tcpTunnelingEnabled) && (lastUdpPingSent > lastUdpReceive) && ((now - lastUdpReceive) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ) );
 
 	TRACE("PING %s (useTcpOut==%d)",_id.address().toString().c_str(),(int)useTcpOut);
 
@@ -199,46 +219,6 @@ void Peer::clean(uint64_t now)
 	_paths.resize(o);
 }
 
-bool Peer::_isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
-	throw()
-{
-	// assumes _lock is locked
-	uint64_t lastResync = _r->timeOfLastResynchronize;
-	if ((now - lastResync) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) {
-		if ((now - _r->timeOfLastPacketReceived) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT)
-			return true;
-
-		uint64_t lastUdpPingSent = 0;
-		uint64_t lastUdpReceive = 0;
-		bool haveUdp = false;
-
-		for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
-			if (p->type() == Path::PATH_TYPE_UDP) {
-				lastUdpPingSent = std::max(lastUdpPingSent,p->lastPing());
-				lastUdpReceive = std::max(lastUdpReceive,p->lastReceived());
-				haveUdp = true;
-			}
-		}
-
-		return ( (!haveUdp) || ( (lastUdpPingSent > lastResync) && ((now - lastUdpReceive) >= ZT_TCP_TUNNEL_FAILOVER_TIMEOUT) ) );
-	}
-	return false;
-}
-
-bool Peer::pingUnanswered(const RuntimeEnvironment *_r,uint64_t now)
-{
-	uint64_t lp = 0;
-	uint64_t lr = 0;
-	{
-		Mutex::Lock _l(_lock);
-		for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
-			lp = std::max(p->lastPing(),lp);
-			lr = std::max(p->lastReceived(),lr);
-		}
-	}
-	return ( (lp > _r->timeOfLastResynchronize) && ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) );
-}
-
 void Peer::getBestActiveUdpPathAddresses(uint64_t now,InetAddress &v4,InetAddress &v6) const
 {
 	uint64_t bestV4 = 0,bestV6 = 0;

+ 13 - 7
node/Peer.hpp

@@ -218,11 +218,20 @@ public:
 	}
 
 	/**
-	 * @param _r Runtime environment
-	 * @param now Current time
-	 * @return True if the last ping is unanswered
+	 * Get max timestamp of last ping and max timestamp of last receive in a single pass
+	 *
+	 * @param lp Last ping result parameter (init to 0 before calling)
+	 * @param lr Last receive result parameter (init to 0 before calling)
 	 */
-	bool pingUnanswered(const RuntimeEnvironment *_r,uint64_t now);
+	inline void lastPingAndDirectReceive(uint64_t &lp,uint64_t &lr)
+		throw()
+	{
+		Mutex::Lock _l(_lock);
+		for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
+			lp = std::max(lp,p->lastPing());
+			lr = std::max(lr,p->lastReceived());
+		}
+	}
 
 	/**
 	 * @return Time of most recent unicast frame received
@@ -449,9 +458,6 @@ public:
 	}
 
 private:
-	bool _isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
-		throw();
-
 	unsigned char _key[ZT_PEER_SECRET_KEY_LENGTH];
 	Identity _id;
 

+ 4 - 0
node/RuntimeEnvironment.hpp

@@ -64,6 +64,7 @@ class RuntimeEnvironment
 public:
 	RuntimeEnvironment() :
 		shutdownInProgress(false),
+		tcpTunnelingEnabled(false),
 		timeOfLastResynchronize(0),
 		timeOfLastPacketReceived(0),
 		log((Logger *)0),
@@ -90,6 +91,9 @@ public:
 	// Indicates that we are shutting down -- this is hacky, want to factor out
 	volatile bool shutdownInProgress;
 
+	// Are we in outgoing TCP failover mode?
+	volatile bool tcpTunnelingEnabled;
+
 	// Time network environment (e.g. fingerprint) last changed -- used to determine online-ness
 	volatile uint64_t timeOfLastResynchronize;
 

+ 32 - 4
node/Topology.hpp

@@ -216,7 +216,13 @@ public:
 	};
 
 	/**
-	 * Pings all peers that need a ping sent, excluding supernodes (which are pinged separately)
+	 * Pings all peers that need a ping sent, excluding supernodes
+	 *
+	 * Ordinary peers are pinged if we haven't heard from them recently. Receive
+	 * time rather than send time as OK is returned on success and we want to
+	 * keep trying if a packet is lost. Ordinary peers are subject to a frame
+	 * inactivity timeout. We give up if we haven't actually transferred any
+	 * data to them recently, and eventually Topology purges them from memory.
 	 */
 	class PingPeersThatNeedPing
 	{
@@ -235,7 +241,7 @@ public:
 			 * than time of last send in order to only count full round trips. */
 			if ( (!_supernodeAddresses.count(p->address())) &&
 			     ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) &&
-			     ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) {
+			     ((_now - p->lastDirectReceive()) >= ZT_PEER_DIRECT_PING_DELAY) ) {
 				p->sendPing(_r,_now);
 			}
 		}
@@ -247,7 +253,13 @@ public:
 	};
 
 	/**
-	 * Ping peers that need ping according to supernode rules (slightly more aggressive)
+	 * Ping peers that need ping according to supernode rules
+	 *
+	 * Supernodes ping aggressively if a ping is unanswered and they are not
+	 * subject to the activity timeout. In other words: we assume they are
+	 * always there and always try to reach them.
+	 *
+	 * The ultimate rate limit for this is controlled up in the Node main loop.
 	 */
 	class PingSupernodesThatNeedPing
 	{
@@ -261,7 +273,11 @@ public:
 			/* For supernodes we always ping even if no frames have been seen, and
 			 * we ping aggressively if pings are unanswered. The limit to this
 			 * frequency is set in the main loop to no more than ZT_STARTUP_AGGRO. */
-			if ( (p->pingUnanswered(_r,_now)) || ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) || (p->lastDirectReceive() < _r->timeOfLastResynchronize) )
+
+			uint64_t lp = 0;
+			uint64_t lr = 0;
+			p->lastPingAndDirectReceive(lp,lr);
+			if ( (lr < _r->timeOfLastResynchronize) || ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) || ((_now - lr) >= ZT_PEER_DIRECT_PING_DELAY) )
 				p->sendPing(_r,_now);
 		}
 
@@ -270,6 +286,18 @@ public:
 		const RuntimeEnvironment *_r;
 	};
 
+	/**
+	 * Computes most recent timestamp of direct packet receive over a list of peers
+	 */
+	class FindMostRecentDirectReceiveTimestamp
+	{
+	public:
+		FindMostRecentDirectReceiveTimestamp(uint64_t &ts) throw() : _ts(ts) {}
+		inline void operator()(Topology &t,const SharedPtr<Peer> &p) throw() { _ts = std::max(p->lastDirectReceive(),_ts); }
+	private:
+		uint64_t &_ts;
+	};
+
 	/**
 	 * Function object to forget direct links to active peers and then ping them indirectly
 	 */