Kaynağa Gözat

Tweak some more timings for better reliability.

Adam Ierymenko 9 yıl önce
ebeveyn
işleme
7fbe2f7adf

+ 1 - 1
node/Cluster.hpp

@@ -55,7 +55,7 @@
 /**
  * How often should we announce that we have a peer?
  */
-#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD (ZT_PEER_DIRECT_PING_DELAY / 2)
+#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD ZT_PEER_DIRECT_PING_DELAY
 
 /**
  * Desired period between doPeriodicTasks() in milliseconds

+ 5 - 5
node/Constants.hpp

@@ -267,14 +267,14 @@
 #define ZT_PEER_DIRECT_PING_DELAY 60000
 
 /**
- * Delay between requests for updated network autoconf information
+ * Timeout for overall peer activity (measured from last receive)
  */
-#define ZT_NETWORK_AUTOCONF_DELAY 60000
+#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 4) + ZT_PING_CHECK_INVERVAL)
 
 /**
- * Timeout for overall peer activity (measured from last receive)
+ * Delay between requests for updated network autoconf information
  */
-#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 3) + (ZT_PING_CHECK_INVERVAL * 2))
+#define ZT_NETWORK_AUTOCONF_DELAY 60000
 
 /**
  * Minimum interval between attempts by relays to unite peers
@@ -283,7 +283,7 @@
  * a RENDEZVOUS message no more than this often. This instructs the peers
  * to attempt NAT-t and gives each the other's corresponding IP:port pair.
  */
-#define ZT_MIN_UNITE_INTERVAL 60000
+#define ZT_MIN_UNITE_INTERVAL 30000
 
 /**
  * Delay between initial direct NAT-t packet and more aggressive techniques

+ 1 - 1
node/Node.cpp

@@ -263,7 +263,7 @@ public:
 			}
 
 			lastReceiveFromUpstream = std::max(p->lastReceive(),lastReceiveFromUpstream);
-		} else if (p->alive(_now)) {
+		} else if (p->activelyTransferringFrames(_now)) {
 			// Normal nodes get their preferred link kept alive if the node has generated frame traffic recently
 			p->doPingAndKeepalive(RR,_now,0);
 		}

+ 2 - 2
node/Peer.hpp

@@ -231,9 +231,9 @@ public:
 	inline uint64_t lastAnnouncedTo() const throw() { return _lastAnnouncedTo; }
 
 	/**
-	 * @return True if peer has received an actual data frame within ZT_PEER_ACTIVITY_TIMEOUT milliseconds
+	 * @return True if this peer is actively sending real network frames
 	 */
-	inline uint64_t alive(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
+	inline uint64_t activelyTransferringFrames(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); }
 
 	/**
 	 * @return Current latency or 0 if unknown (max: 65535)

+ 1 - 1
node/SelfAwareness.cpp

@@ -128,7 +128,7 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi
 		// links to be re-established if possible, possibly using a root server or some
 		// other relay.
 		for(std::vector< SharedPtr<Peer> >::const_iterator p(rset.peersReset.begin());p!=rset.peersReset.end();++p) {
-			if ((*p)->alive(now)) {
+			if ((*p)->activelyTransferringFrames(now)) {
 				Packet outp((*p)->address(),RR->identity.address(),Packet::VERB_NOP);
 				RR->sw->send(outp,true,0);
 			}

+ 3 - 3
node/Switch.cpp

@@ -442,8 +442,8 @@ unsigned long Switch::doTimerTasks(uint64_t now)
 		Mutex::Lock _l(_contactQueue_m);
 		for(std::list<ContactQueueEntry>::iterator qi(_contactQueue.begin());qi!=_contactQueue.end();) {
 			if (now >= qi->fireAtTime) {
-				if ((!qi->peer->alive(now))||(qi->peer->hasActiveDirectPath(now))) {
-					// Cancel attempt if we've already connected or peer is no longer "alive"
+				if (qi->peer->hasActiveDirectPath(now)) {
+					// Cancel if connection has succeeded
 					_contactQueue.erase(qi++);
 					continue;
 				} else {
@@ -539,7 +539,7 @@ unsigned long Switch::doTimerTasks(uint64_t now)
 		_LastUniteKey *k = (_LastUniteKey *)0;
 		uint64_t *v = (uint64_t *)0;
 		while (i.next(k,v)) {
-			if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 16))
+			if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 8))
 				_lastUniteAttempt.erase(*k);
 		}
 	}

+ 6 - 3
node/Topology.hpp

@@ -81,6 +81,11 @@ public:
 	/**
 	 * Get a peer only if it is presently in memory (no disk cache)
 	 *
+	 * This also does not update the lastUsed() time for peers, which means
+	 * that it won't prevent them from falling out of RAM. This is currently
+	 * used in the Cluster code to update peer info without forcing all peers
+	 * across the entire cluster to remain in memory cache.
+	 *
 	 * @param zta ZeroTier address
 	 * @param now Current time
 	 */
@@ -88,10 +93,8 @@ public:
 	{
 		Mutex::Lock _l(_lock);
 		const SharedPtr<Peer> *const ap = _peers.get(zta);
-		if (ap) {
-			(*ap)->use(now);
+		if (ap)
 			return *ap;
-		}
 		return SharedPtr<Peer>();
 	}
 

+ 2 - 2
tests/http/big-test-start.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Edit as needed -- note that >1000 per host is likely problematic due to Linux kernel limits
-NUM_CONTAINERS=25
+NUM_CONTAINERS=50
 CONTAINER_IMAGE=zerotier/http-test
 
 #
@@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin
 #	docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE
 #done
 
-pssh -h big-test-hosts -i -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
+pssh -h big-test-hosts -o big-test-out -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done"
 
 exit 0