failover.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. package logic
  2. import (
  3. "context"
  4. "errors"
  5. "net"
  6. "sync"
  7. "github.com/google/uuid"
  8. "github.com/gravitl/netmaker/db"
  9. "github.com/gravitl/netmaker/logger"
  10. "github.com/gravitl/netmaker/logic"
  11. "github.com/gravitl/netmaker/models"
  12. "github.com/gravitl/netmaker/schema"
  13. "golang.org/x/exp/slog"
  14. )
  15. var failOverCtxMutex = &sync.RWMutex{}
  16. var failOverCacheMutex = &sync.RWMutex{}
  17. var failOverCache = make(map[models.NetworkID]string)
  18. func InitFailOverCache() {
  19. failOverCacheMutex.Lock()
  20. defer failOverCacheMutex.Unlock()
  21. networks, err := logic.GetNetworks()
  22. if err != nil {
  23. return
  24. }
  25. allNodes, err := logic.GetAllNodes()
  26. if err != nil {
  27. return
  28. }
  29. for _, network := range networks {
  30. networkNodes := logic.GetNetworkNodesMemory(allNodes, network.NetID)
  31. for _, node := range networkNodes {
  32. if node.IsFailOver {
  33. failOverCache[models.NetworkID(network.NetID)] = node.ID.String()
  34. break
  35. }
  36. }
  37. }
  38. }
  39. func CheckFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  40. failOverCtxMutex.RLock()
  41. defer failOverCtxMutex.RUnlock()
  42. if peerNode.FailOverPeers == nil {
  43. return nil
  44. }
  45. if victimNode.FailOverPeers == nil {
  46. return nil
  47. }
  48. if peerNode.Mutex != nil {
  49. peerNode.Mutex.Lock()
  50. }
  51. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  52. if peerNode.Mutex != nil {
  53. peerNode.Mutex.Unlock()
  54. }
  55. if victimNode.Mutex != nil {
  56. victimNode.Mutex.Lock()
  57. }
  58. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  59. if victimNode.Mutex != nil {
  60. victimNode.Mutex.Unlock()
  61. }
  62. if peerHasFailovered && victimHasFailovered &&
  63. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  64. return errors.New("failover ctx is already set")
  65. }
  66. return nil
  67. }
  68. func SetFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  69. failOverCtxMutex.Lock()
  70. defer failOverCtxMutex.Unlock()
  71. if peerNode.FailOverPeers == nil {
  72. peerNode.FailOverPeers = make(map[string]struct{})
  73. }
  74. if victimNode.FailOverPeers == nil {
  75. victimNode.FailOverPeers = make(map[string]struct{})
  76. }
  77. if peerNode.Mutex != nil {
  78. peerNode.Mutex.Lock()
  79. }
  80. _, peerHasFailovered := peerNode.FailOverPeers[victimNode.ID.String()]
  81. if peerNode.Mutex != nil {
  82. peerNode.Mutex.Unlock()
  83. }
  84. if victimNode.Mutex != nil {
  85. victimNode.Mutex.Lock()
  86. }
  87. _, victimHasFailovered := victimNode.FailOverPeers[peerNode.ID.String()]
  88. if victimNode.Mutex != nil {
  89. victimNode.Mutex.Unlock()
  90. }
  91. if peerHasFailovered && victimHasFailovered &&
  92. victimNode.FailedOverBy == failOverNode.ID && peerNode.FailedOverBy == failOverNode.ID {
  93. return errors.New("failover ctx is already set")
  94. }
  95. if peerNode.Mutex != nil {
  96. peerNode.Mutex.Lock()
  97. }
  98. peerNode.FailOverPeers[victimNode.ID.String()] = struct{}{}
  99. if peerNode.Mutex != nil {
  100. peerNode.Mutex.Unlock()
  101. }
  102. if victimNode.Mutex != nil {
  103. victimNode.Mutex.Lock()
  104. }
  105. victimNode.FailOverPeers[peerNode.ID.String()] = struct{}{}
  106. if victimNode.Mutex != nil {
  107. victimNode.Mutex.Unlock()
  108. }
  109. victimNode.FailedOverBy = failOverNode.ID
  110. peerNode.FailedOverBy = failOverNode.ID
  111. if err := logic.UpsertNode(&victimNode); err != nil {
  112. return err
  113. }
  114. if err := logic.UpsertNode(&peerNode); err != nil {
  115. return err
  116. }
  117. return nil
  118. }
  119. // GetFailOverNode - gets the host acting as failOver
  120. func GetFailOverNode(network string, allNodes []models.Node) (models.Node, error) {
  121. nodes := logic.GetNetworkNodesMemory(allNodes, network)
  122. for _, node := range nodes {
  123. if node.IsFailOver {
  124. return node, nil
  125. }
  126. }
  127. return models.Node{}, errors.New("auto relay not found")
  128. }
  129. func RemoveFailOverFromCache(network string) {
  130. failOverCacheMutex.Lock()
  131. defer failOverCacheMutex.Unlock()
  132. delete(failOverCache, models.NetworkID(network))
  133. }
  134. func SetFailOverInCache(node models.Node) {
  135. failOverCacheMutex.Lock()
  136. defer failOverCacheMutex.Unlock()
  137. failOverCache[models.NetworkID(node.Network)] = node.ID.String()
  138. }
  139. // FailOverExists - checks if failOver exists already in the network
  140. func FailOverExists(network string) (failOverNode models.Node, exists bool) {
  141. failOverCacheMutex.RLock()
  142. defer failOverCacheMutex.RUnlock()
  143. if nodeID, ok := failOverCache[models.NetworkID(network)]; ok {
  144. failOverNode, err := logic.GetNodeByID(nodeID)
  145. if err == nil {
  146. return failOverNode, true
  147. }
  148. }
  149. return
  150. }
  151. // ResetFailedOverPeer - removes failed over node from network peers
  152. func ResetFailedOverPeer(failedOveredNode *models.Node) error {
  153. nodes, err := logic.GetNetworkNodes(failedOveredNode.Network)
  154. if err != nil {
  155. return err
  156. }
  157. failedOveredNode.FailedOverBy = uuid.Nil
  158. failedOveredNode.FailOverPeers = make(map[string]struct{})
  159. err = logic.UpsertNode(failedOveredNode)
  160. if err != nil {
  161. return err
  162. }
  163. for _, node := range nodes {
  164. if node.FailOverPeers == nil || node.ID == failedOveredNode.ID {
  165. continue
  166. }
  167. delete(node.FailOverPeers, failedOveredNode.ID.String())
  168. logic.UpsertNode(&node)
  169. }
  170. return nil
  171. }
  172. // ResetFailOver - reset failovered peers
  173. func ResetFailOver(failOverNode *models.Node) error {
  174. // Unset FailedOverPeers
  175. nodes, err := logic.GetNetworkNodes(failOverNode.Network)
  176. if err != nil {
  177. return err
  178. }
  179. for _, node := range nodes {
  180. if node.FailedOverBy == failOverNode.ID {
  181. node.FailedOverBy = uuid.Nil
  182. node.FailOverPeers = make(map[string]struct{})
  183. logic.UpsertNode(&node)
  184. }
  185. }
  186. return nil
  187. }
  188. // GetFailOverPeerIps - adds the failedOvered peerIps by the peer
  189. func GetFailOverPeerIps(peer, node *models.Node) []net.IPNet {
  190. allowedips := []net.IPNet{}
  191. eli, _ := (&schema.Egress{Network: node.Network}).ListByNetwork(db.WithContext(context.TODO()))
  192. acls, _ := logic.ListAclsByNetwork(models.NetworkID(node.Network))
  193. for failOverpeerID := range node.FailOverPeers {
  194. failOverpeer, err := logic.GetNodeByID(failOverpeerID)
  195. if err == nil && failOverpeer.FailedOverBy == peer.ID {
  196. logic.GetNodeEgressInfo(&failOverpeer, eli, acls)
  197. if failOverpeer.Address.IP != nil {
  198. allowed := net.IPNet{
  199. IP: failOverpeer.Address.IP,
  200. Mask: net.CIDRMask(32, 32),
  201. }
  202. allowedips = append(allowedips, allowed)
  203. }
  204. if failOverpeer.Address6.IP != nil {
  205. allowed := net.IPNet{
  206. IP: failOverpeer.Address6.IP,
  207. Mask: net.CIDRMask(128, 128),
  208. }
  209. allowedips = append(allowedips, allowed)
  210. }
  211. if failOverpeer.EgressDetails.IsEgressGateway {
  212. allowedips = append(allowedips, logic.GetEgressIPs(&failOverpeer)...)
  213. }
  214. if failOverpeer.IsRelay {
  215. for _, id := range failOverpeer.RelayedNodes {
  216. rNode, _ := logic.GetNodeByID(id)
  217. logic.GetNodeEgressInfo(&rNode, eli, acls)
  218. if rNode.Address.IP != nil {
  219. allowed := net.IPNet{
  220. IP: rNode.Address.IP,
  221. Mask: net.CIDRMask(32, 32),
  222. }
  223. allowedips = append(allowedips, allowed)
  224. }
  225. if rNode.Address6.IP != nil {
  226. allowed := net.IPNet{
  227. IP: rNode.Address6.IP,
  228. Mask: net.CIDRMask(128, 128),
  229. }
  230. allowedips = append(allowedips, allowed)
  231. }
  232. if rNode.EgressDetails.IsEgressGateway {
  233. allowedips = append(allowedips, logic.GetEgressIPs(&rNode)...)
  234. }
  235. }
  236. }
  237. // handle ingress gateway peers
  238. if failOverpeer.IsIngressGateway {
  239. extPeers, _, _, err := logic.GetExtPeers(&failOverpeer, node)
  240. if err != nil {
  241. logger.Log(2, "could not retrieve ext peers for ", peer.ID.String(), err.Error())
  242. }
  243. for _, extPeer := range extPeers {
  244. allowedips = append(allowedips, extPeer.AllowedIPs...)
  245. }
  246. }
  247. }
  248. }
  249. return allowedips
  250. }
  251. func CreateFailOver(node models.Node) error {
  252. if _, exists := FailOverExists(node.Network); exists {
  253. return errors.New("failover already exists in the network")
  254. }
  255. host, err := logic.GetHost(node.HostID.String())
  256. if err != nil {
  257. return err
  258. }
  259. if host.OS != models.OS_Types.Linux {
  260. return errors.New("only linux nodes are allowed to be set as failover")
  261. }
  262. if node.IsRelayed {
  263. return errors.New("relayed node cannot be set as failover")
  264. }
  265. node.IsFailOver = true
  266. err = logic.UpsertNode(&node)
  267. if err != nil {
  268. slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
  269. return err
  270. }
  271. SetFailOverInCache(node)
  272. return nil
  273. }