publishers.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. package mq
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "sort"
  7. "time"
  8. "github.com/gravitl/netmaker/logger"
  9. "github.com/gravitl/netmaker/logic"
  10. "github.com/gravitl/netmaker/logic/metrics"
  11. "github.com/gravitl/netmaker/models"
  12. "github.com/gravitl/netmaker/servercfg"
  13. "github.com/gravitl/netmaker/serverctl"
  14. )
  15. // PublishPeerUpdate --- deterines and publishes a peer update to all the peers of a node
  16. func PublishPeerUpdate(newNode *models.Node, publishToSelf bool) error {
  17. if !servercfg.IsMessageQueueBackend() {
  18. return nil
  19. }
  20. networkNodes, err := logic.GetNetworkNodes(newNode.Network)
  21. if err != nil {
  22. logger.Log(1, "err getting Network Nodes", err.Error())
  23. return err
  24. }
  25. sort.Slice(networkNodes, func(i, j int) bool {
  26. return networkNodes[i].PublicKey < networkNodes[j].PublicKey
  27. })
  28. for _, node := range networkNodes {
  29. if node.IsServer == "yes" {
  30. continue
  31. }
  32. if !publishToSelf && newNode.ID == node.ID {
  33. //skip self
  34. continue
  35. }
  36. err = PublishSinglePeerUpdate(&node)
  37. if err != nil {
  38. logger.Log(1, "failed to publish peer update to node", node.Name, "on network", node.Network, ":", err.Error())
  39. }
  40. }
  41. return err
  42. }
  43. // PublishSinglePeerUpdate --- determines and publishes a peer update to one node
  44. func PublishSinglePeerUpdate(node *models.Node) error {
  45. peerUpdate, err := logic.GetPeerUpdate(node)
  46. if err != nil {
  47. return err
  48. }
  49. data, err := json.Marshal(&peerUpdate)
  50. if err != nil {
  51. return err
  52. }
  53. return publish(node, fmt.Sprintf("peers/%s/%s", node.Network, node.ID), data)
  54. }
  55. // PublishPeerUpdate --- publishes a peer update to all the peers of a node
  56. func PublishExtPeerUpdate(node *models.Node) error {
  57. var err error
  58. if logic.IsLocalServer(node) {
  59. if err = logic.ServerUpdate(node, false); err != nil {
  60. logger.Log(1, "server node:", node.ID, "failed to update peers with ext clients")
  61. return err
  62. } else {
  63. return nil
  64. }
  65. }
  66. if !servercfg.IsMessageQueueBackend() {
  67. return nil
  68. }
  69. peerUpdate, err := logic.GetPeerUpdate(node)
  70. if err != nil {
  71. return err
  72. }
  73. data, err := json.Marshal(&peerUpdate)
  74. if err != nil {
  75. return err
  76. }
  77. if err = publish(node, fmt.Sprintf("peers/%s/%s", node.Network, node.ID), data); err != nil {
  78. return err
  79. }
  80. go PublishPeerUpdate(node, false)
  81. return nil
  82. }
  83. // NodeUpdate -- publishes a node update
  84. func NodeUpdate(node *models.Node) error {
  85. if !servercfg.IsMessageQueueBackend() || node.IsServer == "yes" {
  86. return nil
  87. }
  88. logger.Log(3, "publishing node update to "+node.Name)
  89. if len(node.NetworkSettings.AccessKeys) > 0 {
  90. node.NetworkSettings.AccessKeys = []models.AccessKey{} // not to be sent (don't need to spread access keys around the network; we need to know how to reach other nodes, not become them)
  91. }
  92. data, err := json.Marshal(node)
  93. if err != nil {
  94. logger.Log(2, "error marshalling node update ", err.Error())
  95. return err
  96. }
  97. if err = publish(node, fmt.Sprintf("update/%s/%s", node.Network, node.ID), data); err != nil {
  98. logger.Log(2, "error publishing node update to peer ", node.ID, err.Error())
  99. return err
  100. }
  101. return nil
  102. }
  103. // sendPeers - retrieve networks, send peer ports to all peers
  104. func sendPeers() {
  105. networks, err := logic.GetNetworks()
  106. if err != nil {
  107. logger.Log(1, "error retrieving networks for keepalive", err.Error())
  108. }
  109. var force bool
  110. peer_force_send++
  111. if peer_force_send == 5 {
  112. // run iptables update to ensure gateways work correctly and mq is forwarded if containerized
  113. if servercfg.ManageIPTables() != "off" {
  114. serverctl.InitIPTables(false)
  115. }
  116. servercfg.SetHost()
  117. force = true
  118. peer_force_send = 0
  119. err := logic.TimerCheckpoint() // run telemetry & log dumps if 24 hours has passed..
  120. if err != nil {
  121. logger.Log(3, "error occurred on timer,", err.Error())
  122. }
  123. collectServerMetrics(networks[:])
  124. }
  125. for _, network := range networks {
  126. serverNode, errN := logic.GetNetworkServerLocal(network.NetID)
  127. if errN == nil {
  128. serverNode.SetLastCheckIn()
  129. if err := logic.UpdateNode(&serverNode, &serverNode); err != nil {
  130. logger.Log(0, "failed checkin for server node", serverNode.Name, "on network", network.NetID, err.Error())
  131. }
  132. }
  133. isLeader := logic.IsLeader(&serverNode)
  134. if errN == nil && isLeader {
  135. if network.DefaultUDPHolePunch == "yes" {
  136. if logic.ShouldPublishPeerPorts(&serverNode) || force {
  137. if force {
  138. logger.Log(2, "sending scheduled peer update (5 min)")
  139. }
  140. err = PublishPeerUpdate(&serverNode, false)
  141. if err != nil {
  142. logger.Log(1, "error publishing udp port updates for network", network.NetID)
  143. logger.Log(1, errN.Error())
  144. }
  145. }
  146. }
  147. } else {
  148. if isLeader {
  149. logger.Log(1, "unable to retrieve leader for network ", network.NetID)
  150. }
  151. logger.Log(2, "server checkin complete for server", serverNode.Name, "on network", network.NetID)
  152. serverctl.SyncServerNetwork(network.NetID)
  153. if errN != nil {
  154. logger.Log(1, errN.Error())
  155. }
  156. }
  157. }
  158. }
  159. // ServerStartNotify - notifies all non server nodes to pull changes after a restart
  160. func ServerStartNotify() error {
  161. nodes, err := logic.GetAllNodes()
  162. if err != nil {
  163. return err
  164. }
  165. for i := range nodes {
  166. nodes[i].Action = models.NODE_FORCE_UPDATE
  167. if err = NodeUpdate(&nodes[i]); err != nil {
  168. logger.Log(1, "error when notifying node", nodes[i].Name, " - ", nodes[i].ID, "of a server startup")
  169. }
  170. }
  171. return nil
  172. }
  173. // function to collect and store metrics for server nodes
  174. func collectServerMetrics(networks []models.Network) {
  175. if !servercfg.Is_EE {
  176. return
  177. }
  178. if len(networks) > 0 {
  179. for i := range networks {
  180. currentNetworkNodes, err := logic.GetNetworkNodes(networks[i].NetID)
  181. if err != nil {
  182. continue
  183. }
  184. currentServerNodes := logic.GetServerNodes(networks[i].NetID)
  185. if len(currentServerNodes) > 0 {
  186. for i := range currentServerNodes {
  187. if logic.IsLocalServer(&currentServerNodes[i]) {
  188. serverMetrics := logic.CollectServerMetrics(currentServerNodes[i].ID, currentNetworkNodes)
  189. if serverMetrics != nil {
  190. serverMetrics.NodeName = currentServerNodes[i].Name
  191. serverMetrics.NodeID = currentServerNodes[i].ID
  192. serverMetrics.IsServer = "yes"
  193. serverMetrics.Network = currentServerNodes[i].Network
  194. if err = metrics.GetExchangedBytesForNode(&currentServerNodes[i], serverMetrics); err != nil {
  195. logger.Log(1, fmt.Sprintf("failed to update exchanged bytes info for server: %s, err: %v",
  196. currentServerNodes[i].Name, err))
  197. }
  198. updateNodeMetrics(&currentServerNodes[i], serverMetrics)
  199. if err = logic.UpdateMetrics(currentServerNodes[i].ID, serverMetrics); err != nil {
  200. logger.Log(1, "failed to update metrics for server node", currentServerNodes[i].ID)
  201. }
  202. if servercfg.IsMetricsExporter() {
  203. logger.Log(2, "-------------> SERVER METRICS: ", fmt.Sprintf("%+v", serverMetrics))
  204. if err := pushMetricsToExporter(*serverMetrics); err != nil {
  205. logger.Log(2, "failed to push server metrics to exporter: ", err.Error())
  206. }
  207. }
  208. }
  209. }
  210. }
  211. }
  212. }
  213. }
  214. }
  215. func pushMetricsToExporter(metrics models.Metrics) error {
  216. logger.Log(2, "----> Pushing metrics to exporter")
  217. data, err := json.Marshal(metrics)
  218. if err != nil {
  219. return errors.New("failed to marshal metrics: " + err.Error())
  220. }
  221. if token := mqclient.Publish("metrics_exporter", 2, true, data); !token.WaitTimeout(MQ_TIMEOUT*time.Second) || token.Error() != nil {
  222. var err error
  223. if token.Error() == nil {
  224. err = errors.New("connection timeout")
  225. } else {
  226. err = token.Error()
  227. }
  228. return err
  229. }
  230. return nil
  231. }