Pārlūkot izejas kodu

Add basic bond health status reporting (listbonds)

Joseph Henry 5 gadi atpakaļ
vecāks
revīzija
9f4985b11a
6 mainītis faili ar 198 papildinājumiem un 5 dzēšanām
  1. 28 5
      include/ZeroTierOne.h
  2. 51 0
      node/Bond.cpp
  3. 19 0
      node/Bond.hpp
  4. 3 0
      node/Node.cpp
  5. 63 0
      one.cpp
  6. 34 0
      service/OneService.cpp

+ 28 - 5
include/ZeroTierOne.h

@@ -1475,17 +1475,40 @@ typedef struct
 	enum ZT_PeerRole role;
 
 	/**
-	 * Number of paths (size of paths[])
+	 * Whether a multi-link bond has formed
 	 */
-	unsigned int pathCount;
+	bool isBonded;
 
 	/**
-	 * Whether multiple paths to this peer are bonded
+	 * The bonding policy used to bond to this peer
 	 */
-	bool isBonded;
-
 	int bondingPolicy;
 
+	/**
+	 * The health status of the bond to this peer
+	 */
+	bool isHealthy;
+
+	/**
+	 * The number of links that comprise the bond to this peer that are considered alive
+	 */
+	int numAliveLinks;
+
+	/**
+	 * The number of links that comprise the bond to this peer
+	 */
+	int numTotalLinks;
+
+	/**
+	 * The user-specified bond template name
+	 */
+	char customBondName[32];
+
+	/**
+	 * Number of paths (size of paths[])
+	 */
+	unsigned int pathCount;
+
 	/**
 	 * Known network paths to peer
 	 */

+ 51 - 0
node/Bond.cpp

@@ -730,6 +730,9 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
 {
 	//fprintf(stderr, "%lu curateBond (rebuildBond=%d), _numBondedPaths=%d\n", ((now - RR->bc->getBondStartTime())), rebuildBond, _numBondedPaths);
 	char pathStr[128];
+
+	uint8_t tmpNumAliveLinks = 0;
+	uint8_t tmpNumTotalLinks = 0;
 	/**
 	 * Update path states
 	 */
@@ -737,6 +740,10 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
 		if (!_paths[i]) {
 			continue;
 		}
+		tmpNumTotalLinks++;
+		if (_paths[i]->alive(now, true)) {
+			tmpNumAliveLinks++;
+		}
 		bool currEligibility = _paths[i]->eligible(now,_ackSendInterval);
 		//_paths[i]->address().toString(pathStr);
 		//fprintf(stderr, "\n\n%ld path eligibility (for %s, %s):\n", (RR->node->now() - RR->bc->getBondStartTime()), getLink(_paths[i])->ifname().c_str(), pathStr);
@@ -764,6 +771,46 @@ void Bond::curateBond(const int64_t now, bool rebuildBond)
 		}
 		_paths[i]->_lastEligibilityState = currEligibility;
 	}
+	_numAliveLinks = tmpNumAliveLinks;
+	_numTotalLinks = tmpNumTotalLinks;
+
+	/* Determine health status to report to user */
+
+	bool tmpHealthStatus = true;
+
+	if (_bondingPolicy == ZT_BONDING_POLICY_ACTIVE_BACKUP) {
+		if (_numAliveLinks < 2) {
+			// Considered healthy if there is at least one failover link
+			tmpHealthStatus = false;
+		}
+	}
+	if (_bondingPolicy == ZT_BONDING_POLICY_BROADCAST) {
+		if (_numAliveLinks < 1) {
+			// Considerd healthy if we're able to send frames at all
+			tmpHealthStatus = false;
+		}
+	}
+	if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_RR) {
+		if (_numAliveLinks < _numTotalLinks) {
+			// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
+			tmpHealthStatus = false;
+		}
+	}
+	if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) {
+		if (_numAliveLinks < _numTotalLinks) {
+			// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
+			tmpHealthStatus = false;
+		}
+	}
+	if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) {
+		if (_numAliveLinks < _numTotalLinks) {
+			// Considerd healthy if all known paths are alive, this should be refined to account for user bond config settings
+			tmpHealthStatus = false;
+		}
+	}
+
+	_isHealthy = tmpHealthStatus;
+
 	/**
 	 * Curate the set of paths that are part of the bond proper. Selects a single path
 	 * per logical link according to eligibility and user-specified constraints.
@@ -1509,6 +1556,10 @@ void Bond::setReasonableDefaults(int policy, SharedPtr<Bond> templateBond, bool
 	_lastCheckUserPreferences = 0;
 	_lastBackgroundTaskCheck = 0;
 
+	_isHealthy = false;
+	_numAliveLinks = 0;
+	_numTotalLinks = 0;
+
 	_downDelay = 0;
 	_upDelay = 0;
 	_allowFlowHashing=false;

+ 19 - 0
node/Bond.hpp

@@ -485,6 +485,21 @@ public:
 	 */
 	inline uint8_t getPolicy() { return _bondingPolicy; }
 
+	/**
+	 * @return the health status of the bond
+	 */
+	inline bool isHealthy() { return _isHealthy; }
+
+	/**
+	 * @return the number of links comprising this bond which are considered alive
+	 */
+	inline uint8_t getNumAliveLinks() { return _numAliveLinks; };
+
+	/**
+	 * @return the number of links comprising this bond
+	 */
+	inline uint8_t getNumTotalLinks() { return _numTotalLinks; }
+
 	/**
 	 *
 	 * @param allowFlowHashing
@@ -626,6 +641,10 @@ private:
 	uint16_t _maxAcceptablePacketDelayVariance;
 	uint8_t _minAcceptableAllocation;
 
+	bool _isHealthy;
+	uint8_t _numAliveLinks;
+	uint8_t _numTotalLinks;
+
 	/**
 	 * Default initial punishment inflicted on misbehaving paths. Punishment slowly
 	 * drains linearly. For each eligibility change the remaining punishment is doubled.

+ 3 - 0
node/Node.cpp

@@ -513,6 +513,9 @@ ZT_PeerList *Node::peers() const
 		if (pi->second->bond()) {
 			p->isBonded = pi->second->bond();
 			p->bondingPolicy = pi->second->bond()->getPolicy();
+			p->isHealthy = pi->second->bond()->isHealthy();
+			p->numAliveLinks = pi->second->bond()->getNumAliveLinks();
+			p->numTotalLinks = pi->second->bond()->getNumTotalLinks();
 		}
 	}
 

+ 63 - 0
one.cpp

@@ -72,6 +72,8 @@
 #include "osdep/Http.hpp"
 #include "osdep/Thread.hpp"
 
+#include "node/BondController.hpp"
+
 #include "service/OneService.hpp"
 
 #include "ext/json/json.hpp"
@@ -467,6 +469,67 @@ static int cli(int argc,char **argv)
 			printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str());
 			return 1;
 		}
+	} else if (command == "listbonds") {
+		const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/bonds",requestHeaders,responseHeaders,responseBody);
+
+		if (scode == 0) {
+			printf("Error connecting to the ZeroTier service: %s\n\nPlease check that the service is running and that TCP port 9993 can be contacted via 127.0.0.1." ZT_EOL_S, responseBody.c_str());
+			return 1;
+		}
+
+		nlohmann::json j;
+		try {
+			j = OSUtils::jsonParse(responseBody);
+		} catch (std::exception &exc) {
+			printf("%u %s invalid JSON response (%s)" ZT_EOL_S,scode,command.c_str(),exc.what());
+			return 1;
+		} catch ( ... ) {
+			printf("%u %s invalid JSON response (unknown exception)" ZT_EOL_S,scode,command.c_str());
+			return 1;
+		}
+
+		if (scode == 200) {
+			if (json) {
+				printf("%s" ZT_EOL_S,OSUtils::jsonDump(j).c_str());
+			} else {
+				printf("    <peer>                        <bondtype>    <status>    <links>" ZT_EOL_S);
+				if (j.is_array()) {
+					for(unsigned long k=0;k<j.size();++k) {
+						nlohmann::json &p = j[k];
+
+						bool isBonded = p["isBonded"];
+						int8_t bondingPolicy = p["bondingPolicy"];
+						bool isHealthy = p["isHealthy"];
+						int8_t numAliveLinks = p["numAliveLinks"];
+						int8_t numTotalLinks = p["numTotalLinks"];
+
+						if (isBonded) {
+							std::string healthStr;
+							if (isHealthy) {
+								healthStr = "HEALTHY";
+							} else {
+								healthStr = "DEGRADED";
+							}
+							std::string policyStr = "none";
+							if (bondingPolicy >= ZT_BONDING_POLICY_NONE && bondingPolicy <= ZT_BONDING_POLICY_BALANCE_AWARE) {
+								policyStr = BondController::getPolicyStrByCode(bondingPolicy);
+							}
+
+							printf("%10s  %32s    %8s        %d/%d" ZT_EOL_S,
+								OSUtils::jsonString(p ["address"],"-").c_str(),
+								policyStr.c_str(),
+								healthStr.c_str(),
+								numAliveLinks,
+								numTotalLinks);
+						}
+					}
+				}
+			}
+			return 0;
+		} else {
+			printf("%u %s %s" ZT_EOL_S,scode,command.c_str(),responseBody.c_str());
+			return 1;
+		}
 	} else if (command == "listnetworks") {
 		const unsigned int scode = Http::GET(1024 * 1024 * 16,60000,(const struct sockaddr *)&addr,"/network",requestHeaders,responseHeaders,responseBody);
 

+ 34 - 0
service/OneService.cpp

@@ -253,6 +253,11 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer)
 	pj["version"] = tmp;
 	pj["latency"] = peer->latency;
 	pj["role"] = prole;
+	pj["isBonded"] = peer->isBonded;
+	pj["bondingPolicy"] = peer->bondingPolicy;
+	pj["isHealthy"] = peer->isHealthy;
+	pj["numAliveLinks"] = peer->numAliveLinks;
+	pj["numTotalLinks"] = peer->numTotalLinks;
 
 	nlohmann::json pa = nlohmann::json::array();
 	for(unsigned int i=0;i<peer->pathCount;++i) {
@@ -1348,6 +1353,35 @@ public:
 						} else scode = 404;
 						_node->freeQueryResult((void *)pl);
 					} else scode = 500;
+				} else if (ps[0] == "bonds") {
+					ZT_PeerList *pl = _node->peers();
+					if (pl) {
+						if (ps.size() == 1) {
+							// Return [array] of all peers
+
+							res = nlohmann::json::array();
+							for(unsigned long i=0;i<pl->peerCount;++i) {
+								nlohmann::json pj;
+								_peerToJson(pj,&(pl->peers[i]));
+								res.push_back(pj);
+							}
+
+							scode = 200;
+						} else if (ps.size() == 2) {
+							// Return a single peer by ID or 404 if not found
+
+							uint64_t wantp = Utils::hexStrToU64(ps[1].c_str());
+							for(unsigned long i=0;i<pl->peerCount;++i) {
+								if (pl->peers[i].address == wantp) {
+									_peerToJson(res,&(pl->peers[i]));
+									scode = 200;
+									break;
+								}
+							}
+
+						} else scode = 404;
+						_node->freeQueryResult((void *)pl);
+					} else scode = 500;
 				} else {
 					if (_controller) {
 						scode = _controller->handleControlPlaneHttpGET(std::vector<std::string>(ps.begin()+1,ps.end()),urlArgs,headers,body,responseBody,responseContentType);