failover.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. package logic
  2. import (
  3. "errors"
  4. "net"
  5. "sync"
  6. "github.com/google/uuid"
  7. "github.com/gravitl/netmaker/logger"
  8. "github.com/gravitl/netmaker/logic"
  9. "github.com/gravitl/netmaker/models"
  10. "golang.org/x/exp/slog"
  11. )
  12. var failOverCtxMutex = &sync.RWMutex{}
  13. var failOverCacheMutex = &sync.RWMutex{}
  14. var failOverCache = make(map[models.NetworkID]string)
  15. func InitFailOverCache() {
  16. failOverCacheMutex.Lock()
  17. defer failOverCacheMutex.Unlock()
  18. networks, err := logic.GetNetworks()
  19. if err != nil {
  20. return
  21. }
  22. allNodes, err := logic.GetAllNodes()
  23. if err != nil {
  24. return
  25. }
  26. for _, network := range networks {
  27. networkNodes := logic.GetNetworkNodesMemory(allNodes, network.NetID)
  28. for _, node := range networkNodes {
  29. if node.IsFailOver {
  30. failOverCache[models.NetworkID(network.NetID)] = node.ID.String()
  31. break
  32. }
  33. }
  34. }
  35. }
  36. func CheckFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  37. failOverCtxMutex.RLock()
  38. defer failOverCtxMutex.RUnlock()
  39. if peerNode.FailOverPeers == nil {
  40. return nil
  41. }
  42. if victimNode.FailOverPeers == nil {
  43. return nil
  44. }
  45. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  46. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  47. if peerHasFailovered && victimHasFailovered &&
  48. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  49. return errors.New("failover ctx is already set")
  50. }
  51. return nil
  52. }
  53. func SetFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  54. failOverCtxMutex.Lock()
  55. defer failOverCtxMutex.Unlock()
  56. if peerNode.FailOverPeers == nil {
  57. peerNode.FailOverPeers = make(map[string]struct{})
  58. }
  59. if victimNode.FailOverPeers == nil {
  60. victimNode.FailOverPeers = make(map[string]struct{})
  61. }
  62. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  63. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  64. if peerHasFailovered && victimHasFailovered &&
  65. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  66. return errors.New("failover ctx is already set")
  67. }
  68. peerNode.FailOverPeers[victimNode.ID.String()] = struct{}{}
  69. victimNode.FailOverPeers[peerNode.ID.String()] = struct{}{}
  70. victimNode.FailedOverBy = failOverNode.ID
  71. peerNode.FailedOverBy = failOverNode.ID
  72. if err := logic.UpsertNode(&victimNode); err != nil {
  73. return err
  74. }
  75. if err := logic.UpsertNode(&peerNode); err != nil {
  76. return err
  77. }
  78. return nil
  79. }
  80. // GetFailOverNode - gets the host acting as failOver
  81. func GetFailOverNode(network string, allNodes []models.Node) (models.Node, error) {
  82. nodes := logic.GetNetworkNodesMemory(allNodes, network)
  83. for _, node := range nodes {
  84. if node.IsFailOver {
  85. return node, nil
  86. }
  87. }
  88. return models.Node{}, errors.New("auto relay not found")
  89. }
  90. func RemoveFailOverFromCache(network string) {
  91. failOverCacheMutex.Lock()
  92. defer failOverCacheMutex.Unlock()
  93. delete(failOverCache, models.NetworkID(network))
  94. }
  95. func SetFailOverInCache(node models.Node) {
  96. failOverCacheMutex.Lock()
  97. defer failOverCacheMutex.Unlock()
  98. failOverCache[models.NetworkID(node.Network)] = node.ID.String()
  99. }
  100. // FailOverExists - checks if failOver exists already in the network
  101. func FailOverExists(network string) (failOverNode models.Node, exists bool) {
  102. failOverCacheMutex.RLock()
  103. defer failOverCacheMutex.RUnlock()
  104. if nodeID, ok := failOverCache[models.NetworkID(network)]; ok {
  105. failOverNode, err := logic.GetNodeByID(nodeID)
  106. if err == nil {
  107. return failOverNode, true
  108. }
  109. }
  110. return
  111. }
  112. // ResetFailedOverPeer - removes failed over node from network peers
  113. func ResetFailedOverPeer(failedOveredNode *models.Node) error {
  114. nodes, err := logic.GetNetworkNodes(failedOveredNode.Network)
  115. if err != nil {
  116. return err
  117. }
  118. failedOveredNode.FailedOverBy = uuid.Nil
  119. failedOveredNode.FailOverPeers = make(map[string]struct{})
  120. err = logic.UpsertNode(failedOveredNode)
  121. if err != nil {
  122. return err
  123. }
  124. for _, node := range nodes {
  125. if node.FailOverPeers == nil || node.ID == failedOveredNode.ID {
  126. continue
  127. }
  128. delete(node.FailOverPeers, failedOveredNode.ID.String())
  129. logic.UpsertNode(&node)
  130. }
  131. return nil
  132. }
  133. // ResetFailOver - reset failovered peers
  134. func ResetFailOver(failOverNode *models.Node) error {
  135. // Unset FailedOverPeers
  136. nodes, err := logic.GetNetworkNodes(failOverNode.Network)
  137. if err != nil {
  138. return err
  139. }
  140. for _, node := range nodes {
  141. if node.FailedOverBy == failOverNode.ID {
  142. node.FailedOverBy = uuid.Nil
  143. node.FailOverPeers = make(map[string]struct{})
  144. logic.UpsertNode(&node)
  145. }
  146. }
  147. return nil
  148. }
  149. // GetFailOverPeerIps - adds the failedOvered peerIps by the peer
  150. func GetFailOverPeerIps(peer, node *models.Node) []net.IPNet {
  151. allowedips := []net.IPNet{}
  152. for failOverpeerID := range node.FailOverPeers {
  153. failOverpeer, err := logic.GetNodeByID(failOverpeerID)
  154. if err == nil && failOverpeer.FailedOverBy == peer.ID {
  155. logic.GetNodeEgressInfo(&failOverpeer)
  156. if failOverpeer.Address.IP != nil {
  157. allowed := net.IPNet{
  158. IP: failOverpeer.Address.IP,
  159. Mask: net.CIDRMask(32, 32),
  160. }
  161. allowedips = append(allowedips, allowed)
  162. }
  163. if failOverpeer.Address6.IP != nil {
  164. allowed := net.IPNet{
  165. IP: failOverpeer.Address6.IP,
  166. Mask: net.CIDRMask(128, 128),
  167. }
  168. allowedips = append(allowedips, allowed)
  169. }
  170. if failOverpeer.EgressDetails.IsEgressGateway {
  171. allowedips = append(allowedips, logic.GetEgressIPs(&failOverpeer)...)
  172. }
  173. if failOverpeer.IsRelay {
  174. for _, id := range failOverpeer.RelayedNodes {
  175. rNode, _ := logic.GetNodeByID(id)
  176. if rNode.Address.IP != nil {
  177. allowed := net.IPNet{
  178. IP: rNode.Address.IP,
  179. Mask: net.CIDRMask(32, 32),
  180. }
  181. allowedips = append(allowedips, allowed)
  182. }
  183. if rNode.Address6.IP != nil {
  184. allowed := net.IPNet{
  185. IP: rNode.Address6.IP,
  186. Mask: net.CIDRMask(128, 128),
  187. }
  188. allowedips = append(allowedips, allowed)
  189. }
  190. if rNode.EgressDetails.IsEgressGateway {
  191. allowedips = append(allowedips, logic.GetEgressIPs(&rNode)...)
  192. }
  193. }
  194. }
  195. // handle ingress gateway peers
  196. if failOverpeer.IsIngressGateway {
  197. extPeers, _, _, err := logic.GetExtPeers(&failOverpeer, node)
  198. if err != nil {
  199. logger.Log(2, "could not retrieve ext peers for ", peer.ID.String(), err.Error())
  200. }
  201. for _, extPeer := range extPeers {
  202. allowedips = append(allowedips, extPeer.AllowedIPs...)
  203. }
  204. }
  205. }
  206. }
  207. return allowedips
  208. }
  209. func CreateFailOver(node models.Node) error {
  210. if _, exists := FailOverExists(node.Network); exists {
  211. return errors.New("failover already exists in the network")
  212. }
  213. host, err := logic.GetHost(node.HostID.String())
  214. if err != nil {
  215. return err
  216. }
  217. if host.OS != models.OS_Types.Linux {
  218. return errors.New("only linux nodes are allowed to be set as failover")
  219. }
  220. if node.IsRelayed {
  221. return errors.New("relayed node cannot be set as failover")
  222. }
  223. node.IsFailOver = true
  224. err = logic.UpsertNode(&node)
  225. if err != nil {
  226. slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
  227. return err
  228. }
  229. SetFailOverInCache(node)
  230. return nil
  231. }