瀏覽代碼

Remove bonds for peers that have fully expired. Remove notion of bond health

Joseph Henry 3 年之前
父節點
當前提交
8148c658cf
共有 8 個文件被更改,包括 57 次插入88 次删除
  1. 0 5
      include/ZeroTierOne.h
  2. 39 38
      node/Bond.cpp
  3. 9 11
      node/Bond.hpp
  4. 0 1
      node/Node.cpp
  5. 1 1
      node/Peer.cpp
  6. 4 1
      node/Peer.hpp
  7. 4 29
      one.cpp
  8. 0 2
      service/OneService.cpp

+ 0 - 5
include/ZeroTierOne.h

@@ -1404,11 +1404,6 @@ typedef struct
 	 */
 	int bondingPolicy;
 
-	/**
-	 * The health status of the bond to this peer
-	 */
-	bool isHealthy;
-
 	/**
 	 * The number of links that comprise the bond to this peer that are considered alive
 	 */

+ 39 - 38
node/Bond.cpp

@@ -90,7 +90,7 @@ SharedPtr<Bond> Bond::getBondByPeerId(int64_t identity)
 	return _bonds.count(identity) ? _bonds[identity] : SharedPtr<Bond>();
 }
 
-SharedPtr<Bond> Bond::createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer)
+SharedPtr<Bond> Bond::createBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer)
 {
 	Mutex::Lock _l(_bonds_m);
 	int64_t identity = peer->identity().address().toInt();
@@ -145,6 +145,12 @@ SharedPtr<Bond> Bond::createTransportTriggeredBond(const RuntimeEnvironment* ren
 	return SharedPtr<Bond>();
 }
 
+void Bond::destroyBond(uint64_t peerId)
+{
+	Mutex::Lock _l(_bonds_m);
+	_bonds.erase(peerId);
+}
+
 SharedPtr<Link> Bond::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket)
 {
 	Mutex::Lock _l(_links_m);
@@ -816,6 +822,17 @@ void Bond::curateBond(int64_t now, bool rebuildBond)
 		if (! _paths[i].p) {
 			continue;
 		}
+
+		/**
+		 * Remove expired links from bond
+		 */
+		if ((now - _paths[i].p->_lastIn) > (ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD)) {
+			log("link %s has expired, removing from bond", pathToStr(_paths[i].p).c_str());
+			_paths[i] = NominatedPath();
+			_paths[i].p = SharedPtr<Path>();
+			continue;
+		}
+
 		tmpNumTotalLinks++;
 		if (_paths[i].eligible) {
 			tmpNumAliveLinks++;
@@ -876,42 +893,18 @@ void Bond::curateBond(int64_t now, bool rebuildBond)
 	}
 
 	/**
-	 * Determine health status to report to user
+	 * Trigger status report if number of links change
 	 */
 	_numAliveLinks = tmpNumAliveLinks;
 	_numTotalLinks = tmpNumTotalLinks;
-	bool tmpHealthStatus = true;
-
-	if (_policy == ZT_BOND_POLICY_BROADCAST) {
-		if (_numAliveLinks < 1) {
-			// Considered healthy if we're able to send frames at all
-			tmpHealthStatus = false;
-		}
-	}
-	if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE || (_policy == ZT_BOND_POLICY_ACTIVE_BACKUP))) {
-		if (_numAliveLinks < _numTotalLinks) {
-			tmpHealthStatus = false;
-		}
-	}
-	if (tmpHealthStatus != _isHealthy) {
-		std::string healthStatusStr;
-		if (tmpHealthStatus == true) {
-			healthStatusStr = "HEALTHY";
-		}
-		else {
-			healthStatusStr = "DEGRADED";
-		}
-		log("bond is %s (%d/%d links)", healthStatusStr.c_str(), _numAliveLinks, _numTotalLinks);
+	if ((_numAliveLinks != tmpNumAliveLinks) || (_numTotalLinks != tmpNumTotalLinks)) {
 		dumpInfo(now, true);
 	}
 
-	_isHealthy = tmpHealthStatus;
-
 	/**
 	 * Curate the set of paths that are part of the bond proper. Select a set of paths
 	 * per logical link according to eligibility and user-specified constraints.
 	 */
-
 	if ((_policy == ZT_BOND_POLICY_BALANCE_RR) || (_policy == ZT_BOND_POLICY_BALANCE_XOR) || (_policy == ZT_BOND_POLICY_BALANCE_AWARE)) {
 		if (! _numBondedPaths) {
 			rebuildBond = true;
@@ -1009,14 +1002,14 @@ void Bond::estimatePathQuality(int64_t now)
 	uint32_t totUserSpecifiedLinkSpeed = 0;
 	if (_numBondedPaths) {	 // Compute relative user-specified speeds of links
 		for (unsigned int i = 0; i < _numBondedPaths; ++i) {
-			SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
 			if (_paths[i].p && _paths[i].allowed()) {
+				SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
 				totUserSpecifiedLinkSpeed += link->speed();
 			}
 		}
 		for (unsigned int i = 0; i < _numBondedPaths; ++i) {
-			SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
 			if (_paths[i].p && _paths[i].allowed()) {
+				SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
 				link->setRelativeSpeed((uint8_t)round(((float)link->speed() / (float)totUserSpecifiedLinkSpeed) * 255));
 			}
 		}
@@ -1213,6 +1206,11 @@ void Bond::processActiveBackupTasks(void* tPtr, int64_t now)
 	int nonPreferredPathIdx;
 	bool bFoundPrimaryLink = false;
 
+	if (_abPathIdx != ZT_MAX_PEER_NETWORK_PATHS && !_paths[_abPathIdx].p) {
+		_abPathIdx = ZT_MAX_PEER_NETWORK_PATHS;
+		log("main active-backup path has been removed");
+	}
+
 	/**
 	 * Generate periodic status report
 	 */
@@ -1242,7 +1240,6 @@ void Bond::processActiveBackupTasks(void* tPtr, int64_t now)
 		 * simply find the next eligible path.
 		 */
 		if (! userHasSpecifiedLinks()) {
-			debug("no user-specified links");
 			for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) {
 				if (_paths[i].p && _paths[i].eligible) {
 					SharedPtr<Link> link = RR->bc->getLinkBySocket(_policyAlias, _paths[i].p->localSocket());
@@ -1575,7 +1572,6 @@ void Bond::setBondParameters(int policy, SharedPtr<Bond> templateBond, bool useT
 
 	// Bond status
 
-	_isHealthy = false;
 	_numAliveLinks = 0;
 	_numTotalLinks = 0;
 	_numBondedPaths = 0;
@@ -1685,11 +1681,14 @@ SharedPtr<Link> Bond::getLink(const SharedPtr<Path>& path)
 std::string Bond::pathToStr(const SharedPtr<Path>& path)
 {
 #ifdef ZT_TRACE
-	char pathStr[64] = { 0 };
-	char fullPathStr[384] = { 0 };
-	path->address().toString(pathStr);
-	snprintf(fullPathStr, 384, "%.16llx-%s/%s", (unsigned long long)(path->localSocket()), getLink(path)->ifname().c_str(), pathStr);
-	return std::string(fullPathStr);
+	if (path) {
+		char pathStr[64] = { 0 };
+		char fullPathStr[384] = { 0 };
+		path->address().toString(pathStr);
+		snprintf(fullPathStr, 384, "%.16llx-%s/%s", (unsigned long long)(path->localSocket()), getLink(path)->ifname().c_str(), pathStr);
+		return std::string(fullPathStr);
+	}
+	return "";
 #else
 	return "";
 #endif
@@ -1728,7 +1727,7 @@ void Bond::dumpInfo(int64_t now, bool force)
 	_lastSummaryDump = now;
 	float overhead = (_overheadBytes / (timeSinceLastDump / 1000.0f) / 1000.0f);
 	_overheadBytes = 0;
-	log("bond: bp=%d, fi=%d, mi=%d, ud=%d, dd=%d, flows=%lu, leaf=%d, overhead=%f KB/s",
+	log("bond: bp=%d, fi=%d, mi=%d, ud=%d, dd=%d, flows=%lu, leaf=%d, overhead=%f KB/s, links=(%d/%d)",
 		_policy,
 		_failoverInterval,
 		_monitorInterval,
@@ -1736,7 +1735,9 @@ void Bond::dumpInfo(int64_t now, bool force)
 		_downDelay,
 		(unsigned long)_flows.size(),
 		_isLeaf,
-		overhead);
+		overhead,
+		_numAliveLinks,
+		_numTotalLinks);
 	for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) {
 		if (_paths[i].p) {
 			dumpPathStatus(now, i);

+ 9 - 11
node/Bond.hpp

@@ -436,7 +436,14 @@ class Bond {
 	 * @param peer Remote peer that this bond services
 	 * @return A pointer to the newly created Bond
 	 */
-	static SharedPtr<Bond> createTransportTriggeredBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer);
+	static SharedPtr<Bond> createBond(const RuntimeEnvironment* renv, const SharedPtr<Peer>& peer);
+
+	/**
+	 * Remove a bond from the bond controller.
+	 *
+	 * @param peerId Remote peer that this bond services
+	 */
+	static void destroyBond(uint64_t peerId);
 
 	/**
 	 * Periodically perform maintenance tasks for the bonding layer.
@@ -1020,14 +1027,6 @@ class Bond {
 		return _policy;
 	}
 
-	/**
-	 * @return the health status of the bond
-	 */
-	inline bool isHealthy()
-	{
-		return _isHealthy;
-	}
-
 	/**
 	 * @return the number of links comprising this bond which are considered alive
 	 */
@@ -1344,7 +1343,7 @@ class Bond {
 		int packetsIn;
 		int packetsOut;
 
-		AtomicCounter __refCount;
+		//AtomicCounter __refCount;
 
 		SharedPtr<Path> p;
 		void set(uint64_t now, const SharedPtr<Path>& path)
@@ -1490,7 +1489,6 @@ class Bond {
 	/**
 	 * Link state reporting
 	 */
-	bool _isHealthy;
 	uint8_t _numAliveLinks;
 	uint8_t _numTotalLinks;
 

+ 0 - 1
node/Node.cpp

@@ -509,7 +509,6 @@ ZT_PeerList *Node::peers() const
 		if (pi->second->bond()) {
 			p->isBonded = pi->second->bond();
 			p->bondingPolicy = pi->second->bond()->policy();
-			p->isHealthy = pi->second->bond()->isHealthy();
 			p->numAliveLinks = pi->second->bond()->getNumAliveLinks();
 			p->numTotalLinks = pi->second->bond()->getNumTotalLinks();
 		}

+ 1 - 1
node/Peer.cpp

@@ -503,7 +503,7 @@ void Peer::performMultipathStateCheck(void *tPtr, int64_t now)
 	_localMultipathSupported = ((numAlivePaths >= 1) && (RR->bc->inUse()) && (ZT_PROTO_VERSION > 9));
 	if (_localMultipathSupported && !_bond) {
 		if (RR->bc) {
-			_bond = RR->bc->createTransportTriggeredBond(RR, this);
+			_bond = RR->bc->createBond(RR, this);
 			/**
 			 * Allow new bond to retroactively learn all paths known to this peer
 			 */

+ 4 - 1
node/Peer.hpp

@@ -53,7 +53,10 @@ private:
 	Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized
 
 public:
-	~Peer() { Utils::burn(_key,sizeof(_key)); }
+	~Peer() {
+		Utils::burn(_key,sizeof(_key));
+		RR->bc->destroyBond(_id.address().toInt());
+	}
 
 	/**
 	 * Construct a new peer

+ 4 - 29
one.cpp

@@ -523,31 +523,23 @@ static int cli(int argc,char **argv)
 					printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
 				} else {
 					bool bFoundBond = false;
-					printf("    <peer>                        <bondtype>    <status>    <links>" ZT_EOL_S);
+					printf("    <peer>                        <bondtype>     <links>" ZT_EOL_S);
 					if (j.is_array()) {
 						for(unsigned long k=0;k<j.size();++k) {
 							nlohmann::json &p = j[k];
 							bool isBonded = p["isBonded"];
 							if (isBonded) {
 								int8_t bondingPolicy = p["bondingPolicy"];
-								bool isHealthy = p["isHealthy"];
 								int8_t numAliveLinks = p["numAliveLinks"];
 								int8_t numTotalLinks = p["numTotalLinks"];
 								bFoundBond = true;
-								std::string healthStr;
-								if (isHealthy) {
-									healthStr = "HEALTHY";
-								} else {
-									healthStr = "DEGRADED";
-								}
 								std::string policyStr = "none";
 								if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) {
 									policyStr = Bond::getPolicyStrByCode(bondingPolicy);
 								}
-								printf("%10s  %32s    %8s        %d/%d" ZT_EOL_S,
+								printf("%10s  %32s         %d/%d" ZT_EOL_S,
 									OSUtils::jsonString(p ["address"],"-").c_str(),
 									policyStr.c_str(),
-									healthStr.c_str(),
 									numAliveLinks,
 									numTotalLinks);
 							}
@@ -617,12 +609,6 @@ static int cli(int argc,char **argv)
 					if (json) {
 						printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
 					} else {
-						std::string healthStr;
-						if (OSUtils::jsonInt(j["isHealthy"],0)) {
-							healthStr = "Healthy";
-						} else {
-							healthStr = "Degraded";
-						}
 						int numAliveLinks = OSUtils::jsonInt(j["numAliveLinks"],0);
 						int numTotalLinks = OSUtils::jsonInt(j["numTotalLinks"],0);
 						printf("Peer               : %s\n", arg1.c_str());
@@ -630,7 +616,6 @@ static int cli(int argc,char **argv)
 						//if (bondingPolicy == ZT_BOND_POLICY_ACTIVE_BACKUP) {
 						printf("Link Select Method : %d\n", (int)OSUtils::jsonInt(j["linkSelectMethod"],0));
 						//}
-						printf("Status             : %s\n", healthStr.c_str());
 						printf("Links              : %d/%d\n", numAliveLinks, numTotalLinks);
 						printf("Failover Interval  : %d (ms)\n", (int)OSUtils::jsonInt(j["failoverInterval"],0));
 						printf("Up Delay           : %d (ms)\n", (int)OSUtils::jsonInt(j["upDelay"],0));
@@ -705,33 +690,23 @@ static int cli(int argc,char **argv)
 				printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
 			} else {
 				bool bFoundBond = false;
-				printf("    <peer>                        <bondtype>    <status>    <links>" ZT_EOL_S);
+				printf("    <peer>                        <bondtype>     <links>" ZT_EOL_S);
 				if (j.is_array()) {
 					for(unsigned long k=0;k<j.size();++k) {
 						nlohmann::json &p = j[k];
 						bool isBonded = p["isBonded"];
 						if (isBonded) {
 							int8_t bondingPolicy = p["bondingPolicy"];
-							bool isHealthy = p["isHealthy"];
 							int8_t numAliveLinks = p["numAliveLinks"];
 							int8_t numTotalLinks = p["numTotalLinks"];
-
 							bFoundBond = true;
-							std::string healthStr;
-							if (isHealthy) {
-								healthStr = "Healthy";
-							} else {
-								healthStr = "Degraded";
-							}
 							std::string policyStr = "none";
 							if (bondingPolicy >= ZT_BOND_POLICY_NONE && bondingPolicy <= ZT_BOND_POLICY_BALANCE_AWARE) {
 								policyStr = Bond::getPolicyStrByCode(bondingPolicy);
 							}
-
-							printf("%10s  %32s    %8s        %d/%d" ZT_EOL_S,
+							printf("%10s  %32s         %d/%d" ZT_EOL_S,
 								OSUtils::jsonString(p["address"],"-").c_str(),
 								policyStr.c_str(),
-								healthStr.c_str(),
 								numAliveLinks,
 								numTotalLinks);
 						}

+ 0 - 2
service/OneService.cpp

@@ -510,7 +510,6 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
 	pj["isBonded"] = peer->isBonded;
 	if (peer->isBonded) {
 		pj["bondingPolicy"] = peer->bondingPolicy;
-		pj["isHealthy"] = peer->isHealthy;
 		pj["numAliveLinks"] = peer->numAliveLinks;
 		pj["numTotalLinks"] = peer->numTotalLinks;
 	}
@@ -542,7 +541,6 @@ static void _bondToJson(nlohmann::json &pj, SharedPtr<Bond> &bond)
 		return;
 	}
 
-	pj["isHealthy"] = bond->isHealthy();
 	pj["numAliveLinks"] = bond->getNumAliveLinks();
 	pj["numTotalLinks"] = bond->getNumTotalLinks();
 	pj["failoverInterval"] = bond->getFailoverInterval();