Browse Source

Such ping logic. So edge case.

Adam Ierymenko 11 years ago
parent
commit
c96d3ebf8c
5 changed files with 82 additions and 27 deletions
  1. 9 4
      node/Constants.hpp
  2. 4 8
      node/Node.cpp
  3. 1 1
      node/Peer.cpp
  4. 20 0
      node/Peer.hpp
  5. 48 14
      node/Topology.hpp

+ 9 - 4
node/Constants.hpp

@@ -263,11 +263,11 @@ error_no_byte_order_defined;
 #define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000
 #define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000
 
 
 /**
 /**
- * Delay between pings (actually HELLOs) to direct links
+ * Delay between ordinary case pings of direct links
  */
  */
 #define ZT_PEER_DIRECT_PING_DELAY 120000
 #define ZT_PEER_DIRECT_PING_DELAY 120000
 
 
-/**
+ /**
  * Delay in ms between firewall opener packets to direct links
  * Delay in ms between firewall opener packets to direct links
  *
  *
  * This should be lower than the UDP conversation entry timeout in most
  * This should be lower than the UDP conversation entry timeout in most
@@ -297,12 +297,12 @@ error_no_byte_order_defined;
  * 
  * 
  * A link that hasn't spoken in this long is simply considered inactive.
  * A link that hasn't spoken in this long is simply considered inactive.
  */
  */
-#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 1000)
+#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 10000)
 
 
 /**
 /**
  * Close TCP tunnels if unused for this long
  * Close TCP tunnels if unused for this long
  */
  */
-#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT
+#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT (ZT_PEER_PATH_ACTIVITY_TIMEOUT + 10000)
 
 
 /**
 /**
  * Try TCP tunnels if nothing received for this long
  * Try TCP tunnels if nothing received for this long
@@ -314,6 +314,11 @@ error_no_byte_order_defined;
  */
  */
 #define ZT_STARTUP_AGGRO 5000
 #define ZT_STARTUP_AGGRO 5000
 
 
+/**
+ * Time since a ping was sent to be considered unanswered
+ */
+#define ZT_PING_UNANSWERED_AFTER 2500
+
 /**
 /**
  * Stop relaying via peers that have not responded to direct sends in this long
  * Stop relaying via peers that have not responded to direct sends in this long
  */
  */

+ 4 - 8
node/Node.cpp

@@ -536,10 +536,10 @@ Node::ReasonForTermination Node::run()
 
 
 		uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup
 		uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup
 		uint64_t lastPingCheck = 0;
 		uint64_t lastPingCheck = 0;
-		uint64_t lastSupernodePing = 0;
 		uint64_t lastClean = Utils::now(); // don't need to do this immediately
 		uint64_t lastClean = Utils::now(); // don't need to do this immediately
 		uint64_t lastNetworkFingerprintCheck = 0;
 		uint64_t lastNetworkFingerprintCheck = 0;
 		uint64_t lastMulticastCheck = 0;
 		uint64_t lastMulticastCheck = 0;
+		uint64_t lastSupernodePingCheck = 0;
 		long lastDelayDelta = 0;
 		long lastDelayDelta = 0;
 
 
 		uint64_t networkConfigurationFingerprint = 0;
 		uint64_t networkConfigurationFingerprint = 0;
@@ -592,13 +592,9 @@ Node::ReasonForTermination Node::run()
 
 
 			/* Ping supernodes separately, and do so more aggressively if we haven't
 			/* Ping supernodes separately, and do so more aggressively if we haven't
 			 * heard anything from anyone since our last resynchronize / startup. */
 			 * heard anything from anyone since our last resynchronize / startup. */
-			if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) ||
-			     ((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) {
-				lastSupernodePing = now;
-				std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers());
-				TRACE("pinging %d supernodes",(int)sns.size());
-				for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p)
-					(*p)->sendPing(_r,now);
+			if ((now - lastSupernodePingCheck) >= ZT_STARTUP_AGGRO) {
+				lastSupernodePingCheck = now;
+				_r->topology->eachSupernodePeer(Topology::PingSupernodesThatNeedPing(_r,now));
 			}
 			}
 
 
 			if (resynchronize) {
 			if (resynchronize) {

+ 1 - 1
node/Peer.cpp

@@ -216,7 +216,7 @@ bool Peer::isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
 void Peer::clean(uint64_t now)
 void Peer::clean(uint64_t now)
 {
 {
 	Mutex::Lock _l(_lock);
 	Mutex::Lock _l(_lock);
-	unsigned long i = 0,o = 0,l = _paths.size();
+	unsigned long i = 0,o = 0,l = (unsigned long)_paths.size();
 	while (i != l) {
 	while (i != l) {
 		if (_paths[i].active(now))
 		if (_paths[i].active(now))
 			_paths[o++] = _paths[i];
 			_paths[o++] = _paths[i];

+ 20 - 0
node/Peer.hpp

@@ -226,6 +226,26 @@ public:
 		return x;
 		return x;
 	}
 	}
 
 
+	/**
+	 * @param _r Runtime environment
+	 * @param now Current time
+	 * @return True if the last ping is unanswered
+	 */
+	inline bool pingUnanswered(const RuntimeEnvironment *_r,uint64_t now)
+		throw()
+	{
+		uint64_t lp = 0;
+		uint64_t lr = 0;
+		{
+			Mutex::Lock _l(_lock);
+			for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
+				lp = std::max(p->lastPing(),lp);
+				lr = std::max(p->lastReceived(),lr);
+			}
+		}
+		return ( (lp > _r->timeOfLastResynchronize) && ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) );
+	}
+
 	/**
 	/**
 	 * @return Time of most recent unicast frame received
 	 * @return Time of most recent unicast frame received
 	 */
 	 */

+ 48 - 14
node/Topology.hpp

@@ -180,6 +180,20 @@ public:
 			f(*this,p->second);
 			f(*this,p->second);
 	}
 	}
 
 
+	/**
+	 * Apply a function or function object to all supernode peers
+	 *
+	 * @param f Function to apply
+	 * @tparam F Function or function object type
+	 */
+	template<typename F>
+	inline void eachSupernodePeer(F f)
+	{
+		Mutex::Lock _l(_supernodes_m);
+		for(std::vector< SharedPtr<Peer> >::const_iterator p(_supernodePeers.begin());p!=_supernodePeers.end();++p)
+			f(*this,*p);
+	}
+
 	/**
 	/**
 	 * Function object to collect peers that need a firewall opener sent
 	 * Function object to collect peers that need a firewall opener sent
 	 */
 	 */
@@ -214,20 +228,16 @@ public:
 
 
 		inline void operator()(Topology &t,const SharedPtr<Peer> &p)
 		inline void operator()(Topology &t,const SharedPtr<Peer> &p)
 		{
 		{
-			if ( 
-			     /* 1: we have not heard anything directly in ZT_PEER_DIRECT_PING_DELAY ms */
-			     ((_now - p->lastDirectReceive()) >= ZT_PEER_DIRECT_PING_DELAY) &&
-			     /* 2: */
-			     (
-			       /* 2a: peer has direct path, and has sent us something recently */
-			       (
-			         (p->hasDirectPath())&&
-			         ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT)
-			       ) &&
-			       /* 2b: peer is not a supernode */
-					   (!_supernodeAddresses.count(p->address()))
-			     )
-			   ) { p->sendPing(_r,_now); }
+			/* For ordinary nodes we ping if they've sent us a frame recently,
+			 * otherwise they are stale and we let the link die.
+			 *
+			 * Note that we measure ping time from time of last receive rather
+			 * than time of last send in order to only count full round trips. */
+			if ( (!_supernodeAddresses.count(p->address())) &&
+			     ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) &&
+				 ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) {
+				p->sendPing(_r,_now);
+			}
 		}
 		}
 
 
 	private:
 	private:
@@ -236,6 +246,30 @@ public:
 		const RuntimeEnvironment *_r;
 		const RuntimeEnvironment *_r;
 	};
 	};
 
 
+	/**
+	 * Ping peers that need ping according to supernode rules (slightly more aggressive)
+	 */
+	class PingSupernodesThatNeedPing
+	{
+	public:
+		PingSupernodesThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() :
+			_now(now),
+			_r(renv) {}
+
+		inline void operator()(Topology &t,const SharedPtr<Peer> &p)
+		{
+			/* For supernodes we always ping even if no frames have been seen, and
+			 * we ping aggressively if pings are unanswered. The limit to this
+			 * frequency is set in the main loop to no more than ZT_STARTUP_AGGRO. */
+			if ( (p->pingUnanswered(_r,_now)) || ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) )
+				p->sendPing(_r,_now);
+		}
+
+	private:
+		uint64_t _now;
+		const RuntimeEnvironment *_r;
+	};
+
 	/**
 	/**
 	 * Function object to forget direct links to active peers and then ping them indirectly
 	 * Function object to forget direct links to active peers and then ping them indirectly
 	 *
 	 *