Prechádzať zdrojové kódy

NET-2054: Auto Removal of Offline Nodes, fix enrollment key relay function (#3458)

* check host ports on join

* if 443 not available fallback to 51821

* if 443 not available fallback to 51821

* add config for auto delete of offline nodes

* autocleanup offline nodes

* delete offline nodes on startup

* fix relay via join token
Abhishek K 3 mesiacov pred
rodič
commit
a1304b43d8

+ 33 - 29
auth/host_session.go

@@ -3,6 +3,8 @@ package auth
 import (
 	"encoding/json"
 	"fmt"
+	"log/slog"
+	"strings"
 	"time"
 
 	"github.com/google/uuid"
@@ -14,7 +16,6 @@ import (
 	"github.com/gravitl/netmaker/models"
 	"github.com/gravitl/netmaker/mq"
 	"github.com/gravitl/netmaker/servercfg"
-	"golang.org/x/exp/slog"
 )
 
 // SessionHandler - called by the HTTP router when user
@@ -242,37 +243,40 @@ func CheckNetRegAndHostUpdate(networks []string, h *models.Host, relayNodeId uui
 		network := networks[i]
 		if ok, _ := logic.NetworkExists(network); ok {
 			newNode, err := logic.UpdateHostNetwork(h, network, true)
-			if err != nil {
-				logger.Log(0, "failed to add host to network:", h.ID.String(), h.Name, network, err.Error())
-				continue
-			}
-			if len(tags) > 0 {
-				newNode.Tags = make(map[models.TagID]struct{})
-				for _, tagI := range tags {
-					newNode.Tags[tagI] = struct{}{}
-				}
-				logic.UpsertNode(newNode)
-			}
-
-			if relayNodeId != uuid.Nil && !newNode.IsRelayed {
-				// check if relay node exists and acting as relay
-				relaynode, err := logic.GetNodeByID(relayNodeId.String())
-				if err == nil && relaynode.IsRelay && relaynode.Network == newNode.Network {
-					slog.Info(fmt.Sprintf("adding relayed node %s to relay %s on network %s", newNode.ID.String(), relayNodeId.String(), network))
-					newNode.IsRelayed = true
-					newNode.RelayedBy = relayNodeId.String()
-					updatedRelayNode := relaynode
-					updatedRelayNode.RelayedNodes = append(updatedRelayNode.RelayedNodes, newNode.ID.String())
-					logic.UpdateRelayed(&relaynode, &updatedRelayNode)
-					if err := logic.UpsertNode(&updatedRelayNode); err != nil {
-						slog.Error("failed to update node", "nodeid", relayNodeId.String())
+			if err == nil || strings.Contains(err.Error(), "host already part of network") {
+				if len(tags) > 0 {
+					newNode.Tags = make(map[models.TagID]struct{})
+					for _, tagI := range tags {
+						newNode.Tags[tagI] = struct{}{}
 					}
-					if err := logic.UpsertNode(newNode); err != nil {
-						slog.Error("failed to update node", "nodeid", relayNodeId.String())
+					logic.UpsertNode(newNode)
+				}
+				if relayNodeId != uuid.Nil && !newNode.IsRelayed {
+					// check if relay node exists and acting as relay
+					relaynode, err := logic.GetNodeByID(relayNodeId.String())
+					if err == nil && relaynode.IsGw && relaynode.Network == newNode.Network {
+						slog.Error(fmt.Sprintf("adding relayed node %s to relay %s on network %s", newNode.ID.String(), relayNodeId.String(), network))
+						newNode.IsRelayed = true
+						newNode.RelayedBy = relayNodeId.String()
+						updatedRelayNode := relaynode
+						updatedRelayNode.RelayedNodes = append(updatedRelayNode.RelayedNodes, newNode.ID.String())
+						logic.UpdateRelayed(&relaynode, &updatedRelayNode)
+						if err := logic.UpsertNode(&updatedRelayNode); err != nil {
+							slog.Error("failed to update node", "nodeid", relayNodeId.String())
+						}
+						if err := logic.UpsertNode(newNode); err != nil {
+							slog.Error("failed to update node", "nodeid", relayNodeId.String())
+						}
+					} else {
+						slog.Error("failed to relay node. maybe specified relay node is actually not a relay? Or the relayed node is not in the same network with relay?", "err", err)
 					}
-				} else {
-					slog.Error("failed to relay node. maybe specified relay node is actually not a relay? Or the relayed node is not in the same network with relay?", "err", err)
 				}
+				if strings.Contains(err.Error(), "host already part of network") {
+					continue
+				}
+			} else {
+				logger.Log(0, "failed to add host to network:", h.ID.String(), h.Name, network, err.Error())
+				continue
 			}
 			logger.Log(1, "added new node", newNode.ID.String(), "to host", h.Name)
 			hostactions.AddAction(models.HostUpdate{

+ 8 - 8
controllers/enrollmentkeys.go

@@ -380,14 +380,14 @@ func handleHostRegister(w http.ResponseWriter, r *http.Request) {
 	} else {
 		// need to revise the list of networks from key
 		// based on the ones host currently has
-		networksToAdd := []string{}
-		currentNets := logic.GetHostNetworks(newHost.ID.String())
-		for _, newNet := range enrollmentKey.Networks {
-			if !logic.StringSliceContains(currentNets, newNet) {
-				networksToAdd = append(networksToAdd, newNet)
-			}
-		}
-		enrollmentKey.Networks = networksToAdd
+		// networksToAdd := []string{}
+		// currentNets := logic.GetHostNetworks(newHost.ID.String())
+		// for _, newNet := range enrollmentKey.Networks {
+		// 	if !logic.StringSliceContains(currentNets, newNet) {
+		// 		networksToAdd = append(networksToAdd, newNet)
+		// 	}
+		// }
+		// enrollmentKey.Networks = networksToAdd
 		currHost, err := logic.GetHost(newHost.ID.String())
 		if err != nil {
 			slog.Error("failed registration", "hostID", newHost.ID.String(), "hostName", newHost.Name, "error", err.Error())

+ 1 - 1
logic/hosts.go

@@ -393,7 +393,7 @@ func UpdateHostNetwork(h *models.Host, network string, add bool) (*models.Node,
 			if !add {
 				return &node, nil
 			} else {
-				return nil, errors.New("host already part of network " + network)
+				return &node, errors.New("host already part of network " + network)
 			}
 		}
 	}

+ 4 - 1
logic/peers.go

@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"net"
 	"net/netip"
+	"time"
 
 	"github.com/google/uuid"
 	"github.com/gravitl/netmaker/database"
@@ -190,9 +191,11 @@ func GetPeerUpdateForHost(network string, host *models.Host, allNodes []models.N
 		if err != nil {
 			continue
 		}
-		if !node.Connected || node.PendingDelete || node.Action == models.NODE_DELETE {
+
+		if !node.Connected || node.PendingDelete || node.Action == models.NODE_DELETE || time.Since(node.LastCheckIn) > time.Hour {
 			continue
 		}
+
 		GetNodeEgressInfo(&node)
 		hostPeerUpdate = SetDefaultGw(node, hostPeerUpdate)
 		if !hostPeerUpdate.IsInternetGw {

+ 41 - 2
logic/zombie.go

@@ -7,6 +7,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/gravitl/netmaker/logger"
 	"github.com/gravitl/netmaker/models"
+	"github.com/gravitl/netmaker/servercfg"
 )
 
 const (
@@ -77,7 +78,7 @@ func checkForZombieHosts(h *models.Host) {
 func ManageZombies(ctx context.Context, peerUpdate chan *models.Node) {
 	logger.Log(2, "Zombie management started")
 	go InitializeZombies()
-	go checkPendingRemovalNodes()
+	go checkPendingRemovalNodes(peerUpdate)
 	// Zombie Nodes Cleanup Four Times a Day
 	ticker := time.NewTicker(time.Hour * ZOMBIE_TIMEOUT)
 
@@ -135,15 +136,53 @@ func ManageZombies(ctx context.Context, peerUpdate chan *models.Node) {
 					}
 				}
 			}
+			if servercfg.IsAutoCleanUpEnabled() {
+				nodes, _ := GetAllNodes()
+				for _, node := range nodes {
+					if time.Since(node.LastCheckIn) > time.Minute*ZOMBIE_DELETE_TIME {
+						if err := DeleteNode(&node, true); err != nil {
+							continue
+						}
+						node.PendingDelete = true
+						node.Action = models.NODE_DELETE
+						peerUpdate <- &node
+						host, err := GetHost(node.HostID.String())
+						if err == nil && len(host.Nodes) == 0 {
+							RemoveHostByID(host.ID.String())
+						}
+
+					}
+				}
+			}
+
 		}
 	}
 }
-func checkPendingRemovalNodes() {
+func checkPendingRemovalNodes(peerUpdate chan *models.Node) {
 	nodes, _ := GetAllNodes()
 	for _, node := range nodes {
+		node := node
 		pendingDelete := node.PendingDelete || node.Action == models.NODE_DELETE
 		if pendingDelete {
 			DeleteNode(&node, true)
+			peerUpdate <- &node
+			continue
+		}
+		if servercfg.IsAutoCleanUpEnabled() {
+			if time.Since(node.LastCheckIn) > time.Minute*ZOMBIE_DELETE_TIME {
+				if err := DeleteNode(&node, true); err != nil {
+					continue
+				}
+				node.PendingDelete = true
+				node.Action = models.NODE_DELETE
+				peerUpdate <- &node
+				host, err := GetHost(node.HostID.String())
+				if err == nil && len(host.Nodes) == 0 {
+					RemoveHostByID(host.ID.String())
+				}
+
+			}
+
 		}
 	}
 }

+ 1 - 1
main.go

@@ -189,7 +189,7 @@ func runMessageQueue(wg *sync.WaitGroup, ctx context.Context) {
 	defer mq.CloseClient()
 	go mq.Keepalive(ctx)
 	go func() {
-		peerUpdate := make(chan *models.Node)
+		peerUpdate := make(chan *models.Node, 100)
 		go logic.ManageZombies(ctx, peerUpdate)
 		go logic.DeleteExpiredNodes(ctx, peerUpdate)
 		for nodeUpdate := range peerUpdate {

+ 3 - 0
scripts/netmaker.default.env

@@ -102,3 +102,6 @@ STUN=true
 METRICS_PORT=51821
 # Metrics Collection interval in minutes
 PUBLISH_METRIC_INTERVAL=15
+# auto delete offline nodes
+AUTO_DELETE_OFFLINE_NODES=false
+

+ 4 - 0
servercfg/serverconf.go

@@ -875,3 +875,7 @@ func GetAllowedEmailDomains() string {
 func GetNmBaseDomain() string {
 	return os.Getenv("NM_DOMAIN")
 }
+
+func IsAutoCleanUpEnabled() bool {
+	return os.Getenv("AUTO_DELETE_OFFLINE_NODES") == "true"
+}