failover.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. package logic
  2. import (
  3. "context"
  4. "errors"
  5. "net"
  6. "sync"
  7. "github.com/google/uuid"
  8. "github.com/gravitl/netmaker/db"
  9. "github.com/gravitl/netmaker/logger"
  10. "github.com/gravitl/netmaker/logic"
  11. "github.com/gravitl/netmaker/models"
  12. "github.com/gravitl/netmaker/schema"
  13. "golang.org/x/exp/slog"
  14. )
  15. var failOverCtxMutex = &sync.RWMutex{}
  16. var failOverCacheMutex = &sync.RWMutex{}
  17. var failOverCache = make(map[models.NetworkID]string)
  18. func InitFailOverCache() {
  19. failOverCacheMutex.Lock()
  20. defer failOverCacheMutex.Unlock()
  21. networks, err := logic.GetNetworks()
  22. if err != nil {
  23. return
  24. }
  25. allNodes, err := logic.GetAllNodes()
  26. if err != nil {
  27. return
  28. }
  29. for _, network := range networks {
  30. networkNodes := logic.GetNetworkNodesMemory(allNodes, network.NetID)
  31. for _, node := range networkNodes {
  32. if node.IsFailOver {
  33. failOverCache[models.NetworkID(network.NetID)] = node.ID.String()
  34. break
  35. }
  36. }
  37. }
  38. }
  39. func CheckFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  40. failOverCtxMutex.RLock()
  41. defer failOverCtxMutex.RUnlock()
  42. if peerNode.FailOverPeers == nil {
  43. return nil
  44. }
  45. if victimNode.FailOverPeers == nil {
  46. return nil
  47. }
  48. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  49. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  50. if peerHasFailovered && victimHasFailovered &&
  51. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  52. return errors.New("failover ctx is already set")
  53. }
  54. return nil
  55. }
  56. func SetFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  57. failOverCtxMutex.Lock()
  58. defer failOverCtxMutex.Unlock()
  59. if peerNode.FailOverPeers == nil {
  60. peerNode.FailOverPeers = make(map[string]struct{})
  61. }
  62. if victimNode.FailOverPeers == nil {
  63. victimNode.FailOverPeers = make(map[string]struct{})
  64. }
  65. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  66. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  67. if peerHasFailovered && victimHasFailovered &&
  68. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  69. return errors.New("failover ctx is already set")
  70. }
  71. peerNode.FailOverPeers[victimNode.ID.String()] = struct{}{}
  72. victimNode.FailOverPeers[peerNode.ID.String()] = struct{}{}
  73. victimNode.FailedOverBy = failOverNode.ID
  74. peerNode.FailedOverBy = failOverNode.ID
  75. if err := logic.UpsertNode(&victimNode); err != nil {
  76. return err
  77. }
  78. if err := logic.UpsertNode(&peerNode); err != nil {
  79. return err
  80. }
  81. return nil
  82. }
  83. // GetFailOverNode - gets the host acting as failOver
  84. func GetFailOverNode(network string, allNodes []models.Node) (models.Node, error) {
  85. nodes := logic.GetNetworkNodesMemory(allNodes, network)
  86. for _, node := range nodes {
  87. if node.IsFailOver {
  88. return node, nil
  89. }
  90. }
  91. return models.Node{}, errors.New("auto relay not found")
  92. }
  93. func RemoveFailOverFromCache(network string) {
  94. failOverCacheMutex.Lock()
  95. defer failOverCacheMutex.Unlock()
  96. delete(failOverCache, models.NetworkID(network))
  97. }
  98. func SetFailOverInCache(node models.Node) {
  99. failOverCacheMutex.Lock()
  100. defer failOverCacheMutex.Unlock()
  101. failOverCache[models.NetworkID(node.Network)] = node.ID.String()
  102. }
  103. // FailOverExists - checks if failOver exists already in the network
  104. func FailOverExists(network string) (failOverNode models.Node, exists bool) {
  105. failOverCacheMutex.RLock()
  106. defer failOverCacheMutex.RUnlock()
  107. if nodeID, ok := failOverCache[models.NetworkID(network)]; ok {
  108. failOverNode, err := logic.GetNodeByID(nodeID)
  109. if err == nil {
  110. return failOverNode, true
  111. }
  112. }
  113. return
  114. }
  115. // ResetFailedOverPeer - removes failed over node from network peers
  116. func ResetFailedOverPeer(failedOveredNode *models.Node) error {
  117. nodes, err := logic.GetNetworkNodes(failedOveredNode.Network)
  118. if err != nil {
  119. return err
  120. }
  121. failedOveredNode.FailedOverBy = uuid.Nil
  122. failedOveredNode.FailOverPeers = make(map[string]struct{})
  123. err = logic.UpsertNode(failedOveredNode)
  124. if err != nil {
  125. return err
  126. }
  127. for _, node := range nodes {
  128. if node.FailOverPeers == nil || node.ID == failedOveredNode.ID {
  129. continue
  130. }
  131. delete(node.FailOverPeers, failedOveredNode.ID.String())
  132. logic.UpsertNode(&node)
  133. }
  134. return nil
  135. }
  136. // ResetFailOver - reset failovered peers
  137. func ResetFailOver(failOverNode *models.Node) error {
  138. // Unset FailedOverPeers
  139. nodes, err := logic.GetNetworkNodes(failOverNode.Network)
  140. if err != nil {
  141. return err
  142. }
  143. for _, node := range nodes {
  144. if node.FailedOverBy == failOverNode.ID {
  145. node.FailedOverBy = uuid.Nil
  146. node.FailOverPeers = make(map[string]struct{})
  147. logic.UpsertNode(&node)
  148. }
  149. }
  150. return nil
  151. }
  152. // GetFailOverPeerIps - adds the failedOvered peerIps by the peer
  153. func GetFailOverPeerIps(peer, node *models.Node) []net.IPNet {
  154. allowedips := []net.IPNet{}
  155. eli, _ := (&schema.Egress{Network: node.Network}).ListByNetwork(db.WithContext(context.TODO()))
  156. acls, _ := logic.ListAclsByNetwork(models.NetworkID(node.Network))
  157. for failOverpeerID := range node.FailOverPeers {
  158. failOverpeer, err := logic.GetNodeByID(failOverpeerID)
  159. if err == nil && failOverpeer.FailedOverBy == peer.ID {
  160. logic.GetNodeEgressInfo(&failOverpeer, eli, acls)
  161. if failOverpeer.Address.IP != nil {
  162. allowed := net.IPNet{
  163. IP: failOverpeer.Address.IP,
  164. Mask: net.CIDRMask(32, 32),
  165. }
  166. allowedips = append(allowedips, allowed)
  167. }
  168. if failOverpeer.Address6.IP != nil {
  169. allowed := net.IPNet{
  170. IP: failOverpeer.Address6.IP,
  171. Mask: net.CIDRMask(128, 128),
  172. }
  173. allowedips = append(allowedips, allowed)
  174. }
  175. if failOverpeer.EgressDetails.IsEgressGateway {
  176. allowedips = append(allowedips, logic.GetEgressIPs(&failOverpeer)...)
  177. }
  178. if failOverpeer.IsRelay {
  179. for _, id := range failOverpeer.RelayedNodes {
  180. rNode, _ := logic.GetNodeByID(id)
  181. logic.GetNodeEgressInfo(&rNode, eli, acls)
  182. if rNode.Address.IP != nil {
  183. allowed := net.IPNet{
  184. IP: rNode.Address.IP,
  185. Mask: net.CIDRMask(32, 32),
  186. }
  187. allowedips = append(allowedips, allowed)
  188. }
  189. if rNode.Address6.IP != nil {
  190. allowed := net.IPNet{
  191. IP: rNode.Address6.IP,
  192. Mask: net.CIDRMask(128, 128),
  193. }
  194. allowedips = append(allowedips, allowed)
  195. }
  196. if rNode.EgressDetails.IsEgressGateway {
  197. allowedips = append(allowedips, logic.GetEgressIPs(&rNode)...)
  198. }
  199. }
  200. }
  201. // handle ingress gateway peers
  202. if failOverpeer.IsIngressGateway {
  203. extPeers, _, _, err := logic.GetExtPeers(&failOverpeer, node)
  204. if err != nil {
  205. logger.Log(2, "could not retrieve ext peers for ", peer.ID.String(), err.Error())
  206. }
  207. for _, extPeer := range extPeers {
  208. allowedips = append(allowedips, extPeer.AllowedIPs...)
  209. }
  210. }
  211. }
  212. }
  213. return allowedips
  214. }
  215. func CreateFailOver(node models.Node) error {
  216. if _, exists := FailOverExists(node.Network); exists {
  217. return errors.New("failover already exists in the network")
  218. }
  219. host, err := logic.GetHost(node.HostID.String())
  220. if err != nil {
  221. return err
  222. }
  223. if host.OS != models.OS_Types.Linux {
  224. return errors.New("only linux nodes are allowed to be set as failover")
  225. }
  226. if node.IsRelayed {
  227. return errors.New("relayed node cannot be set as failover")
  228. }
  229. node.IsFailOver = true
  230. err = logic.UpsertNode(&node)
  231. if err != nil {
  232. slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
  233. return err
  234. }
  235. SetFailOverInCache(node)
  236. return nil
  237. }