Browse Source

added wipe failover cases and ceased node update on metrics update

0xdcarns 2 years ago
parent
commit
eb75a6829c
6 changed files with 67 additions and 29 deletions
  1. 6 0
      controllers/node.go
  2. 14 13
      ee/initialize.go
  3. 27 0
      ee/logic/failover.go
  4. 4 4
      logic/gateway.go
  5. 3 0
      logic/server.go
  6. 13 12
      mq/handlers.go

+ 6 - 0
controllers/node.go

@@ -886,6 +886,12 @@ func updateNode(w http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	if ifaceDelta && servercfg.Is_EE {
+		if err = logic.EnterpriseResetAllPeersFailovers.(func(string, string) error)(node.ID, node.Network); err != nil {
+			logger.Log(0, "failed to reset failover lists during node update for node", node.Name, node.Network)
+		}
+	}
+
 	err = logic.UpdateNode(&node, &newNode)
 	if err != nil {
 		logger.Log(0, r.Header.Get("user"),

+ 14 - 13
ee/initialize.go

@@ -30,8 +30,9 @@ func InitEE() {
 		AddLicenseHooks()
 	})
 	logic.EnterpriseFailoverFunc = eelogic.SetFailover
-	// logic.EnterpriseResetFailoverFunc = eelogic.ResetFailover
-	// resetFailover()
+	logic.EnterpriseResetFailoverFunc = eelogic.ResetFailover
+	logic.EnterpriseResetAllPeersFailovers = eelogic.WipeAffectedFailoversOnly
+	resetFailover()
 }
 
 func setControllerLimits() {
@@ -42,17 +43,17 @@ func setControllerLimits() {
 	servercfg.Is_EE = true
 }
 
-// func resetFailover() {
-// 	nets, err := logic.GetNetworks()
-// 	if err == nil {
-// 		for _, net := range nets {
-// 			err = logic.EnterpriseResetFailoverFunc.(func(string) error)(net.NetID)
-// 			if err != nil {
-// 				logger.Log(0, "failed to reset failover on network", net.NetID, ":", err.Error())
-// 			}
-// 		}
-// 	}
-// }
+func resetFailover() {
+	nets, err := logic.GetNetworks()
+	if err == nil {
+		for _, net := range nets {
+			err = ResetFailover(net.NetID)
+			if err != nil {
+				logger.Log(0, "failed to reset failover on network", net.NetID, ":", err.Error())
+			}
+		}
+	}
+}
 
 func retrieveEELogo() string {
 	return `              

+ 27 - 0
ee/logic/failover.go

@@ -92,3 +92,30 @@ func WipeFailover(nodeid string) error {
 	}
 	return nil
 }
+
+// WipeAffectedFailoversOnly - wipes failovers for nodes that have given node (ID)
+// in their respective failover lists
+func WipeAffectedFailoversOnly(nodeid, network string) error {
+	currentNetworkNodes, err := logic.GetNetworkNodes(network)
+	if err != nil {
+		return nil
+	}
+
+	for i := range currentNetworkNodes {
+		currNodeID := currentNetworkNodes[i].ID
+		if currNodeID == nodeid {
+			WipeFailover(nodeid)
+			continue
+		}
+		currMetrics, err := logic.GetMetrics(currNodeID)
+		if err != nil || currMetrics == nil {
+			continue
+		}
+		if currMetrics.FailoverPeers != nil {
+			if len(currMetrics.FailoverPeers[nodeid]) > 0 {
+				WipeFailover(currNodeID)
+			}
+		}
+	}
+	return nil
+}

+ 4 - 4
logic/gateway.go

@@ -276,10 +276,10 @@ func DeleteIngressGateway(networkName string, nodeid string) (models.Node, error
 		}
 	}
 
-	// err = EnterpriseResetFailoverFunc.(func(string) error)(node.Network)
-	// if err != nil {
-	// 	logger.Log(0, "failed to reset failover on network", node.Network, ":", err.Error())
-	// }
+	err = EnterpriseResetFailoverFunc.(func(string) error)(node.Network)
+	if err != nil {
+		logger.Log(0, "failed to reset failover on network", node.Network, ":", err.Error())
+	}
 
 	data, err := json.Marshal(&node)
 	if err != nil {

+ 3 - 0
logic/server.go

@@ -27,6 +27,9 @@ var EnterpriseFailoverFunc interface{}
 // EnterpriseResetFailoverFunc - interface to control reset failover funcs
 var EnterpriseResetFailoverFunc interface{}
 
+// EnterpriseResetAllPeersFailovers - resets all nodes that are considering a node to be failover worthy (inclusive)
+var EnterpriseResetAllPeersFailovers interface{}
+
 // == Join, Checkin, and Leave for Server ==
 
 // KUBERNETES_LISTEN_PORT - starting port for Kubernetes in order to use NodePort range

+ 13 - 12
mq/handlers.go

@@ -122,7 +122,7 @@ func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
 				return
 			}
 
-			updateNodeMetrics(&currentNode, &newMetrics)
+			shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
 
 			if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
 				logger.Log(1, "faield to update node metrics", id, currentNode.Name, err.Error())
@@ -139,13 +139,12 @@ func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
 				err := logic.EnterpriseFailoverFunc.(func(*models.Node) error)(&currentNode)
 				if err != nil {
 					logger.Log(0, "failed to failover for node", currentNode.Name, "on network", currentNode.Network, "-", err.Error())
-				} else {
-					if err := NodeUpdate(&currentNode); err != nil {
-						logger.Log(1, "error publishing node update to node", currentNode.Name, err.Error())
-					}
-					if err := PublishPeerUpdate(&currentNode, true); err != nil {
-						logger.Log(1, "error publishing peer update after auto relay for node", currentNode.Name, err.Error())
-					}
+				}
+			}
+
+			if shouldUpdate {
+				if err = PublishPeerUpdate(&currentNode, true); err != nil {
+					logger.Log(0, "failed to publish update after failover peer change for node", currentNode.Name, currentNode.Network)
 				}
 			}
 
@@ -208,14 +207,14 @@ func updateNodePeers(currentNode *models.Node) {
 	}
 }
 
-func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
+func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
 	if newMetrics.FailoverPeers == nil {
 		newMetrics.FailoverPeers = make(map[string]string)
 	}
 	oldMetrics, err := logic.GetMetrics(currentNode.ID)
 	if err != nil {
 		logger.Log(1, "error finding old metrics for node", currentNode.ID, currentNode.Name)
-		return
+		return false
 	}
 	if oldMetrics.FailoverPeers == nil {
 		oldMetrics.FailoverPeers = make(map[string]string)
@@ -256,7 +255,7 @@ func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
 	nodes, err := logic.GetNetworkNodes(currentNode.Network)
 	if err != nil {
 		logger.Log(0, "failed to retrieve nodes while updating metrics")
-		return
+		return false
 	}
 	for _, node := range nodes {
 		if !newMetrics.Connectivity[node.ID].Connected &&
@@ -265,14 +264,16 @@ func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
 			newMetrics.FailoverPeers[node.ID] = node.FailoverNode
 		}
 	}
-
+	shouldUpdate := false
 	for k, v := range oldMetrics.FailoverPeers {
 		if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
 			newMetrics.FailoverPeers[k] = v
+			shouldUpdate = true
 		}
 	}
 
 	for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
 		delete(newMetrics.Connectivity, k)
 	}
+	return shouldUpdate
 }