Quellcode durchsuchen

Partial implementation of ZT_MULTIPATH_ACTIVE_BACKUP

Joseph Henry vor 6 Jahren
Ursprung
Commit
b0a91c0187
3 geänderte Dateien mit 83 neuen und 12 gelöschten Zeilen
  1. 18 0
      node/Constants.hpp
  2. 63 12
      node/Peer.cpp
  3. 2 0
      node/Peer.hpp

+ 18 - 0
node/Constants.hpp

@@ -357,16 +357,29 @@
 /**
 /**
  * How much each factor contributes to the "stability" score of a path
  * How much each factor contributes to the "stability" score of a path
  */
  */
+
+#if 0
+#define ZT_PATH_CONTRIB_PDV                    (1.5 / 3.0)
+#define ZT_PATH_CONTRIB_LATENCY                (0.0 / 3.0)
+#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.5 / 3.0)
+#else
 #define ZT_PATH_CONTRIB_PDV                    (1.0 / 3.0)
 #define ZT_PATH_CONTRIB_PDV                    (1.0 / 3.0)
 #define ZT_PATH_CONTRIB_LATENCY                (1.0 / 3.0)
 #define ZT_PATH_CONTRIB_LATENCY                (1.0 / 3.0)
 #define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0)
 #define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0)
+#endif
 
 
 /**
 /**
  * How much each factor contributes to the "quality" score of a path
  * How much each factor contributes to the "quality" score of a path
  */
  */
+#if 0
+#define ZT_PATH_CONTRIB_STABILITY  (2.00 / 3.0)
+#define ZT_PATH_CONTRIB_THROUGHPUT (0.50 / 3.0)
+#define ZT_PATH_CONTRIB_SCOPE      (0.50 / 3.0)
+#else
 #define ZT_PATH_CONTRIB_STABILITY  (0.75 / 3.0)
 #define ZT_PATH_CONTRIB_STABILITY  (0.75 / 3.0)
 #define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0)
 #define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0)
 #define ZT_PATH_CONTRIB_SCOPE      (0.75 / 3.0)
 #define ZT_PATH_CONTRIB_SCOPE      (0.75 / 3.0)
+#endif
 
 
 /**
 /**
  * How often a QoS packet is sent
  * How often a QoS packet is sent
@@ -475,6 +488,11 @@
  */
  */
 #define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10)
 #define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10)
 
 
+/**
+ * How long before we consider a path to be dead in rapid fail-over scenarios
+ */
+#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 1000
+
 /**
 /**
  * Paths are considered expired if they have not sent us a real packet in this long
  * Paths are considered expired if they have not sent us a real packet in this long
  */
  */

+ 63 - 12
node/Peer.cpp

@@ -347,7 +347,7 @@ void Peer::computeAggregateAllocation(int64_t now)
 				+ (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT)
 				+ (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT)
 				+ relScope * (float)ZT_PATH_CONTRIB_SCOPE;
 				+ relScope * (float)ZT_PATH_CONTRIB_SCOPE;
 			relQuality *= age_contrib;
 			relQuality *= age_contrib;
-			// Arbitrary cutoffs
+			// Clamp values
 			relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f;
 			relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f;
 			relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f;
 			relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f;
 			totalRelativeQuality += relQuality;
 			totalRelativeQuality += relQuality;
@@ -357,7 +357,6 @@ void Peer::computeAggregateAllocation(int64_t now)
 	// Convert set of relative performances into an allocation set
 	// Convert set of relative performances into an allocation set
 	for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
 	for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
 		if (_paths[i].p) {
 		if (_paths[i].p) {
-
 			if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) {
 			if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) {
 				_paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255);
 				_paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255);
 			}
 			}
@@ -420,10 +419,10 @@ int Peer::aggregateLinkLogicalPathCount()
 	return pathCount;
 	return pathCount;
 }
 }
 
 
-std::vector<SharedPtr<Path>> Peer::getAllPaths(int64_t now)
+std::vector<SharedPtr<Path> > Peer::getAllPaths(int64_t now)
 {
 {
 	Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX
 	Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX
-	std::vector<SharedPtr<Path>> paths;
+	std::vector<SharedPtr<Path> > paths;
 	for (int i=0; i<_virtualPaths.size(); i++) {
 	for (int i=0; i<_virtualPaths.size(); i++) {
 		if (_virtualPaths[i]->p) {
 		if (_virtualPaths[i]->p) {
 			paths.push_back(_virtualPaths[i]->p);
 			paths.push_back(_virtualPaths[i]->p);
@@ -436,6 +435,8 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
 {
 {
 	Mutex::Lock _l(_paths_m);
 	Mutex::Lock _l(_paths_m);
 	SharedPtr<Path> selectedPath;
 	SharedPtr<Path> selectedPath;
+	char curPathStr[128];
+	char newPathStr[128];
 	unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
 	unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
 
 
 	/**
 	/**
@@ -511,14 +512,66 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
 	 * All traffic is sent on all paths.
 	 * All traffic is sent on all paths.
 	 */
 	 */
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) {
-		// Not handled here. Handled in Switch.cpp
+		// Not handled here. Handled in Switch::_trySend()
 	}
 	}
 
 
 	/**
 	/**
 	 * Only one link is active. Fail-over is immediate.
 	 * Only one link is active. Fail-over is immediate.
 	 */
 	 */
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) {
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) {
-		// fprintf(stderr, "ZT_MULTIPATH_ACTIVE_BACKUP\n");
+		bool bFoundHotPath = false;
+		if (!_activeBackupPath) {
+			/* Select the fist path that appears to still be active.
+			* This will eventually be user-configurable */
+			for (int i=0; i<ZT_MAX_PEER_NETWORK_PATHS; i++) {
+				if (_paths[i].p) {
+					if (_activeBackupPath.ptr() == _paths[i].p.ptr()) {
+						continue;
+					}
+					_activeBackupPath = _paths[i].p;
+					if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
+						bFoundHotPath = true;
+						_activeBackupPath = _paths[i].p;
+						_activeBackupPath->address().toString(curPathStr);
+						fprintf(stderr, "selected %s as the primary active-backup path to %llx\n",
+							curPathStr, this->_id.address().toInt());
+					}
+				}
+			}
+			if (!_activeBackupPath) {
+				return SharedPtr<Path>();
+			}
+			if (!bFoundHotPath) {
+				_activeBackupPath->address().toString(curPathStr);
+				fprintf(stderr, "no hot paths available to to use as active-backup primary to %llx, selected %s anyway\n",
+					this->_id.address().toInt(), curPathStr);
+			}
+		}
+		else {
+			if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
+				_activeBackupPath->address().toString(curPathStr);
+				/* Fail-over to the fist path that appears to still be active.
+				 * This will eventually be user-configurable */
+				for (int i=0; i<ZT_MAX_PEER_NETWORK_PATHS; i++) {
+					if (_paths[i].p) {
+						if (_activeBackupPath.ptr() == _paths[i].p.ptr()) {
+							continue;
+						}
+						if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
+							bFoundHotPath = true;
+							_activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace
+							_activeBackupPath = _paths[i].p;
+							_activeBackupPath->address().toString(newPathStr);
+						}
+					}
+				}
+				if (bFoundHotPath) {
+					fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to path %s\n",
+						curPathStr, this->_id.address().toInt(), newPathStr);
+				}
+			}
+		}
+		return _activeBackupPath;
 	}
 	}
 
 
 	/**
 	/**
@@ -553,27 +606,25 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
 	 */
 	 */
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) {
 	if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) {
 		// fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId);
 		// fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId);
-		char pathStr[128];
 		struct Flow *currFlow = NULL;
 		struct Flow *currFlow = NULL;
 		if (_flows.count(flowId)) {
 		if (_flows.count(flowId)) {
 			currFlow = _flows[flowId];
 			currFlow = _flows[flowId];
 			if (!currFlow->assignedPath) {
 			if (!currFlow->assignedPath) {
 				int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
 				int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
 				currFlow->assignedPath = _virtualPaths[idx];
 				currFlow->assignedPath = _virtualPaths[idx];
-				_virtualPaths[idx]->p->address().toString(pathStr);
+				_virtualPaths[idx]->p->address().toString(curPathStr);
 				fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n",
 				fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n",
-					currFlow->flowId, this->_id.address().toInt(), pathStr, idx);
+					currFlow->flowId, this->_id.address().toInt(), curPathStr, idx);
 			}
 			}
 			else {
 			else {
 				if (!currFlow->assignedPath->p->alive(now)) {
 				if (!currFlow->assignedPath->p->alive(now)) {
-					char newPathStr[128];
-					currFlow->assignedPath->p->address().toString(pathStr);
+					currFlow->assignedPath->p->address().toString(curPathStr);
 					// Re-assign
 					// Re-assign
 					int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
 					int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1)));
 					currFlow->assignedPath = _virtualPaths[idx];
 					currFlow->assignedPath = _virtualPaths[idx];
 					_virtualPaths[idx]->p->address().toString(newPathStr);
 					_virtualPaths[idx]->p->address().toString(newPathStr);
 					fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n",
 					fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n",
-						pathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr);
+						curPathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr);
 				}
 				}
 			}
 			}
 			return currFlow->assignedPath->p;
 			return currFlow->assignedPath->p;

+ 2 - 0
node/Peer.hpp

@@ -725,6 +725,8 @@ private:
 	std::map<int64_t, struct Flow *> _flows;
 	std::map<int64_t, struct Flow *> _flows;
 
 
 	int16_t _roundRobinPathAssignmentIdx;
 	int16_t _roundRobinPathAssignmentIdx;
+
+	SharedPtr<Path> _activeBackupPath;
 };
 };
 
 
 } // namespace ZeroTier
 } // namespace ZeroTier