failover.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. package logic
  2. import (
  3. "errors"
  4. "fmt"
  5. "net"
  6. "sync"
  7. "time"
  8. "github.com/google/uuid"
  9. "github.com/gravitl/netmaker/database"
  10. "github.com/gravitl/netmaker/logger"
  11. "github.com/gravitl/netmaker/logic"
  12. "github.com/gravitl/netmaker/models"
  13. "golang.org/x/exp/slog"
  14. )
  15. var failOverCache = make(map[string]string)
  16. var failOverCacheMutex = &sync.RWMutex{}
  17. var failOverCtxMutex = &sync.RWMutex{}
  18. func AddFailOverHook() {
  19. logic.HookManagerCh <- models.HookDetails{
  20. Hook: FailOverCleanUpHook,
  21. Interval: time.Minute * 5,
  22. }
  23. }
  24. func SetFailOverCtx(failOverNode, victimNode, peerNode models.Node) error {
  25. failOverCtxMutex.Lock()
  26. defer failOverCtxMutex.Unlock()
  27. if peerNode.FailOverPeers == nil {
  28. peerNode.FailOverPeers = make(map[string]struct{})
  29. }
  30. if victimNode.FailOverPeers == nil {
  31. victimNode.FailOverPeers = make(map[string]struct{})
  32. }
  33. peerNode.FailOverPeers[victimNode.ID.String()] = struct{}{}
  34. victimNode.FailOverPeers[peerNode.ID.String()] = struct{}{}
  35. victimNode.FailedOverBy = failOverNode.ID
  36. peerNode.FailedOverBy = failOverNode.ID
  37. if err := logic.UpsertNode(&failOverNode); err != nil {
  38. return err
  39. }
  40. if err := logic.UpsertNode(&victimNode); err != nil {
  41. return err
  42. }
  43. if err := logic.UpsertNode(&peerNode); err != nil {
  44. return err
  45. }
  46. return nil
  47. }
  48. // FailOverExists - checks if failOver exists already in the network
  49. func FailOverExists(network string) (failOverNode models.Node, exists bool) {
  50. return GetFailOverFromCache(network)
  51. }
  52. func GetFailOverFromCache(network string) (node models.Node, exixts bool) {
  53. failOverCacheMutex.RLock()
  54. defer failOverCacheMutex.RUnlock()
  55. if nodeid, ok := failOverCache[network]; ok {
  56. failOverNode, err := logic.GetNodeByID(nodeid)
  57. if err != nil {
  58. delete(failOverCache, network)
  59. return models.Node{}, false
  60. }
  61. return failOverNode, true
  62. }
  63. return
  64. }
  65. func DeleteFailOverFromCache(network string) {
  66. failOverCacheMutex.Lock()
  67. defer failOverCacheMutex.Unlock()
  68. delete(failOverCache, network)
  69. }
  70. func StoreFailoverInCache(network, nodeId string) {
  71. failOverCacheMutex.Lock()
  72. defer failOverCacheMutex.Unlock()
  73. failOverCache[network] = nodeId
  74. }
  75. // ResetFailedOverPeer - removes failed over node from network peers
  76. func ResetFailedOverPeer(failedOveredNode *models.Node) error {
  77. nodes, err := logic.GetNetworkNodes(failedOveredNode.Network)
  78. if err != nil {
  79. return err
  80. }
  81. failedOveredNode.FailedOverBy = uuid.Nil
  82. failedOveredNode.FailOverPeers = make(map[string]struct{})
  83. err = logic.UpsertNode(failedOveredNode)
  84. if err != nil {
  85. return err
  86. }
  87. for _, node := range nodes {
  88. if node.FailOverPeers == nil || node.ID == failedOveredNode.ID {
  89. continue
  90. }
  91. delete(node.FailOverPeers, failedOveredNode.ID.String())
  92. logic.UpsertNode(&node)
  93. }
  94. return nil
  95. }
  96. // ResetFailOver - reset failovered peers
  97. func ResetFailOver(failOverNode *models.Node) error {
  98. // Unset FailedOverPeers
  99. nodes, err := logic.GetNetworkNodes(failOverNode.Network)
  100. if err != nil {
  101. return err
  102. }
  103. for _, node := range nodes {
  104. if node.FailedOverBy == failOverNode.ID {
  105. node.FailedOverBy = uuid.Nil
  106. node.FailOverPeers = make(map[string]struct{})
  107. logic.UpsertNode(&node)
  108. }
  109. }
  110. return nil
  111. }
  112. // GetFailOverPeerIps - adds the failedOvered peerIps by the peer
  113. func GetFailOverPeerIps(peer, node *models.Node) []net.IPNet {
  114. allowedips := []net.IPNet{}
  115. for failOverpeerID := range node.FailOverPeers {
  116. failOverpeer, err := logic.GetNodeByID(failOverpeerID)
  117. if err == nil && failOverpeer.FailedOverBy == peer.ID {
  118. if failOverpeer.Address.IP != nil {
  119. allowed := net.IPNet{
  120. IP: failOverpeer.Address.IP,
  121. Mask: net.CIDRMask(32, 32),
  122. }
  123. allowedips = append(allowedips, allowed)
  124. }
  125. if failOverpeer.Address6.IP != nil {
  126. allowed := net.IPNet{
  127. IP: failOverpeer.Address6.IP,
  128. Mask: net.CIDRMask(128, 128),
  129. }
  130. allowedips = append(allowedips, allowed)
  131. }
  132. if failOverpeer.IsEgressGateway {
  133. allowedips = append(allowedips, logic.GetEgressIPs(&failOverpeer)...)
  134. }
  135. if failOverpeer.IsRelay {
  136. for _, id := range failOverpeer.RelayedNodes {
  137. rNode, _ := logic.GetNodeByID(id)
  138. if rNode.Address.IP != nil {
  139. allowed := net.IPNet{
  140. IP: rNode.Address.IP,
  141. Mask: net.CIDRMask(32, 32),
  142. }
  143. allowedips = append(allowedips, allowed)
  144. }
  145. if rNode.Address6.IP != nil {
  146. allowed := net.IPNet{
  147. IP: rNode.Address6.IP,
  148. Mask: net.CIDRMask(128, 128),
  149. }
  150. allowedips = append(allowedips, allowed)
  151. }
  152. if rNode.IsEgressGateway {
  153. allowedips = append(allowedips, logic.GetEgressIPs(&rNode)...)
  154. }
  155. }
  156. }
  157. // handle ingress gateway peers
  158. if failOverpeer.IsIngressGateway {
  159. extPeers, _, _, err := logic.GetExtPeers(&failOverpeer, node)
  160. if err != nil {
  161. logger.Log(2, "could not retrieve ext peers for ", peer.ID.String(), err.Error())
  162. }
  163. for _, extPeer := range extPeers {
  164. allowedips = append(allowedips, extPeer.AllowedIPs...)
  165. }
  166. }
  167. }
  168. }
  169. return allowedips
  170. }
  171. func CreateFailOver(node models.Node) error {
  172. if _, exists := FailOverExists(node.Network); exists {
  173. return errors.New("failover already exists in the network")
  174. }
  175. host, err := logic.GetHost(node.HostID.String())
  176. if err != nil {
  177. return err
  178. }
  179. if host.OS != models.OS_Types.Linux {
  180. return errors.New("only linux nodes are allowed to be set as failover")
  181. }
  182. if node.IsRelayed {
  183. return errors.New("relayed node cannot be set as failover")
  184. }
  185. node.IsFailOver = true
  186. err = logic.UpsertNode(&node)
  187. if err != nil {
  188. slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
  189. return err
  190. }
  191. StoreFailoverInCache(node.Network, node.ID.String())
  192. return nil
  193. }
  194. // DoesFailoverAckExists - checks if ack with this id exists
  195. func DoesFailoverAckExists(id string) bool {
  196. failOverCtxMutex.Lock()
  197. defer failOverCtxMutex.Unlock()
  198. _, err := database.FetchRecord(database.PEER_ACK_TABLE, id)
  199. return err == nil
  200. }
  201. // RegisterFailOverAck - registers failover ack signal
  202. func RegisterFailOverAck(nodeid, peerid string) error {
  203. failOverCtxMutex.Lock()
  204. defer failOverCtxMutex.Unlock()
  205. return database.Insert(fmt.Sprintf("%s-%s", nodeid, peerid), time.Now().String(), database.PEER_ACK_TABLE)
  206. }
  207. // DeRegisterFailOverAck - removes the peer and node acks from DB
  208. func DeRegisterFailOverAck(nodeid, peerid string) {
  209. failOverCtxMutex.Lock()
  210. defer failOverCtxMutex.Unlock()
  211. database.DeleteRecord(fmt.Sprintf("%s-%s", nodeid, peerid), database.PEER_ACK_TABLE)
  212. database.DeleteRecord(fmt.Sprintf("%s-%s", peerid, nodeid), database.PEER_ACK_TABLE)
  213. }
  214. func FailOverCleanUpHook() error {
  215. failOverCtxMutex.Lock()
  216. defer failOverCtxMutex.Unlock()
  217. data, err := database.FetchRecords(database.PEER_ACK_TABLE)
  218. if err != nil {
  219. return err
  220. }
  221. for key, value := range data {
  222. parsedTime, err := time.Parse("2006-01-02 15:04:05", value)
  223. if err != nil {
  224. continue
  225. }
  226. if time.Since(parsedTime) > time.Minute*5 {
  227. database.DeleteRecord(key, database.PEER_ACK_TABLE)
  228. }
  229. }
  230. return nil
  231. }