zombie.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. package logic
  2. import (
  3. "context"
  4. "time"
  5. "github.com/google/uuid"
  6. "github.com/gravitl/netmaker/logger"
  7. "github.com/gravitl/netmaker/models"
  8. "github.com/gravitl/netmaker/servercfg"
  9. )
  10. const (
  11. // ZOMBIE_TIMEOUT - timeout in hours for checking zombie status
  12. ZOMBIE_TIMEOUT = 6
  13. // ZOMBIE_DELETE_TIME - timeout in minutes for zombie node deletion
  14. ZOMBIE_DELETE_TIME = 10
  15. )
  16. var (
  17. zombies []uuid.UUID
  18. hostZombies []uuid.UUID
  19. newZombie chan uuid.UUID = make(chan (uuid.UUID), 10)
  20. newHostZombie chan uuid.UUID = make(chan (uuid.UUID), 10)
  21. )
  22. // CheckZombies - checks if new node has same hostid as existing node
  23. // if so, existing node is added to zombie node quarantine list
  24. // also cleans up nodes past their expiration date
  25. func CheckZombies(newnode *models.Node) {
  26. nodes, err := GetNetworkNodes(newnode.Network)
  27. if err != nil {
  28. logger.Log(1, "Failed to retrieve network nodes", newnode.Network, err.Error())
  29. return
  30. }
  31. for _, node := range nodes {
  32. if node.ID == newnode.ID {
  33. //skip self
  34. continue
  35. }
  36. if node.HostID == newnode.HostID {
  37. logger.Log(0, "adding ", node.ID.String(), " to zombie list")
  38. newZombie <- node.ID
  39. }
  40. }
  41. }
  42. // checkForZombieHosts - checks if new host has the same macAddress as an existing host
  43. // if true, existing host is added to host zombie collection
  44. func checkForZombieHosts(h *models.Host) {
  45. hosts, err := GetAllHosts()
  46. if err != nil {
  47. logger.Log(3, "error retrieving all hosts", err.Error())
  48. }
  49. for _, existing := range hosts {
  50. if existing.ID == h.ID {
  51. //probably an unnecessary check as new host should not be in database yet, but just in case
  52. //skip self
  53. continue
  54. }
  55. if existing.MacAddress.String() == h.MacAddress.String() {
  56. //add to hostZombies
  57. newHostZombie <- existing.ID
  58. //add all nodes belonging to host to zombile list
  59. for _, node := range existing.Nodes {
  60. id, err := uuid.Parse(node)
  61. if err != nil {
  62. logger.Log(3, "error parsing uuid from host.Nodes", err.Error())
  63. continue
  64. }
  65. newHostZombie <- id
  66. }
  67. }
  68. }
  69. }
  70. // ManageZombies - goroutine which adds/removes/deletes nodes from the zombie node quarantine list
  71. func ManageZombies(ctx context.Context, peerUpdate chan *models.Node) {
  72. logger.Log(2, "Zombie management started")
  73. go InitializeZombies()
  74. go checkPendingRemovalNodes(peerUpdate)
  75. // Zombie Nodes Cleanup Four Times a Day
  76. ticker := time.NewTicker(time.Hour * ZOMBIE_TIMEOUT)
  77. for {
  78. select {
  79. case <-ctx.Done():
  80. ticker.Stop()
  81. close(peerUpdate)
  82. return
  83. case id := <-newZombie:
  84. zombies = append(zombies, id)
  85. case id := <-newHostZombie:
  86. hostZombies = append(hostZombies, id)
  87. case <-ticker.C: // run this check 4 times a day
  88. logger.Log(3, "checking for zombie nodes")
  89. if len(zombies) > 0 {
  90. for i := len(zombies) - 1; i >= 0; i-- {
  91. node, err := GetNodeByID(zombies[i].String())
  92. if err != nil {
  93. logger.Log(1, "error retrieving zombie node", zombies[i].String(), err.Error())
  94. logger.Log(1, "deleting ", node.ID.String(), " from zombie list")
  95. zombies = append(zombies[:i], zombies[i+1:]...)
  96. continue
  97. }
  98. if time.Since(node.LastCheckIn) > time.Minute*ZOMBIE_DELETE_TIME {
  99. if err := DeleteNode(&node, true); err != nil {
  100. logger.Log(1, "error deleting zombie node", zombies[i].String(), err.Error())
  101. continue
  102. }
  103. node.PendingDelete = true
  104. node.Action = models.NODE_DELETE
  105. peerUpdate <- &node
  106. logger.Log(1, "deleting zombie node", node.ID.String())
  107. zombies = append(zombies[:i], zombies[i+1:]...)
  108. }
  109. }
  110. }
  111. if len(hostZombies) > 0 {
  112. logger.Log(3, "checking host zombies")
  113. for i := len(hostZombies) - 1; i >= 0; i-- {
  114. host, err := GetHost(hostZombies[i].String())
  115. if err != nil {
  116. logger.Log(1, "error retrieving zombie host", err.Error())
  117. if host != nil {
  118. logger.Log(1, "deleting ", host.ID.String(), " from zombie list")
  119. }
  120. hostZombies = append(hostZombies[:i], hostZombies[i+1:]...)
  121. continue
  122. }
  123. if len(host.Nodes) == 0 {
  124. if err := RemoveHost(host, true); err != nil {
  125. logger.Log(0, "error deleting zombie host", host.ID.String(), err.Error())
  126. }
  127. hostZombies = append(hostZombies[:i], hostZombies[i+1:]...)
  128. }
  129. }
  130. }
  131. if servercfg.IsAutoCleanUpEnabled() {
  132. nodes, _ := GetAllNodes()
  133. for _, node := range nodes {
  134. if !node.Connected {
  135. continue
  136. }
  137. if time.Since(node.LastCheckIn) > time.Hour*2 {
  138. if err := DeleteNode(&node, true); err != nil {
  139. continue
  140. }
  141. node.PendingDelete = true
  142. node.Action = models.NODE_DELETE
  143. peerUpdate <- &node
  144. host, err := GetHost(node.HostID.String())
  145. if err == nil && len(host.Nodes) == 0 {
  146. RemoveHostByID(host.ID.String())
  147. }
  148. }
  149. }
  150. }
  151. }
  152. }
  153. }
  154. func checkPendingRemovalNodes(peerUpdate chan *models.Node) {
  155. nodes, _ := GetAllNodes()
  156. for _, node := range nodes {
  157. node := node
  158. pendingDelete := node.PendingDelete || node.Action == models.NODE_DELETE
  159. if pendingDelete {
  160. DeleteNode(&node, true)
  161. peerUpdate <- &node
  162. continue
  163. }
  164. if servercfg.IsAutoCleanUpEnabled() && node.Connected {
  165. if time.Since(node.LastCheckIn) > time.Hour*2 {
  166. if err := DeleteNode(&node, true); err != nil {
  167. continue
  168. }
  169. node.PendingDelete = true
  170. node.Action = models.NODE_DELETE
  171. peerUpdate <- &node
  172. host, err := GetHost(node.HostID.String())
  173. if err == nil && len(host.Nodes) == 0 {
  174. RemoveHostByID(host.ID.String())
  175. }
  176. }
  177. }
  178. }
  179. }
  180. // InitializeZombies - populates the zombie quarantine list (should be called from initialization)
  181. func InitializeZombies() {
  182. nodes, err := GetAllNodes()
  183. if err != nil {
  184. logger.Log(1, "failed to retrieve nodes", err.Error())
  185. return
  186. }
  187. for _, node := range nodes {
  188. othernodes, err := GetNetworkNodes(node.Network)
  189. if err != nil {
  190. logger.Log(1, "failled to retrieve nodes for network", node.Network, err.Error())
  191. continue
  192. }
  193. for _, othernode := range othernodes {
  194. if node.ID == othernode.ID {
  195. continue
  196. }
  197. if node.HostID == othernode.HostID {
  198. if node.LastCheckIn.After(othernode.LastCheckIn) {
  199. newZombie <- othernode.ID
  200. logger.Log(1, "adding", othernode.ID.String(), "to zombie list")
  201. } else {
  202. newZombie <- node.ID
  203. logger.Log(1, "adding", node.ID.String(), "to zombie list")
  204. }
  205. }
  206. }
  207. }
  208. }