handlers.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "time"
  6. mqtt "github.com/eclipse/paho.mqtt.golang"
  7. "github.com/gravitl/netmaker/database"
  8. "github.com/gravitl/netmaker/logger"
  9. "github.com/gravitl/netmaker/logic"
  10. "github.com/gravitl/netmaker/models"
  11. "github.com/gravitl/netmaker/netclient/ncutils"
  12. "github.com/gravitl/netmaker/servercfg"
  13. )
  14. // DefaultHandler default message queue handler -- NOT USED
  15. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  16. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  17. }
  18. // Ping message Handler -- handles ping topic from client nodes
  19. func Ping(client mqtt.Client, msg mqtt.Message) {
  20. go func() {
  21. id, err := getID(msg.Topic())
  22. if err != nil {
  23. logger.Log(0, "error getting node.ID sent on ping topic ")
  24. return
  25. }
  26. node, err := logic.GetNodeByID(id)
  27. if err != nil {
  28. logger.Log(0, "mq-ping error getting node: ", err.Error())
  29. record, err := database.FetchRecord(database.NODES_TABLE_NAME, id)
  30. if err != nil {
  31. logger.Log(0, "error reading database ", err.Error())
  32. return
  33. }
  34. logger.Log(0, "record from database")
  35. logger.Log(0, record)
  36. return
  37. }
  38. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  39. if decryptErr != nil {
  40. logger.Log(0, "error decrypting when updating node ", node.ID, decryptErr.Error())
  41. return
  42. }
  43. var checkin models.NodeCheckin
  44. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  45. logger.Log(1, "error unmarshaling payload ", err.Error())
  46. return
  47. }
  48. node.SetLastCheckIn()
  49. node.Version = checkin.Version
  50. node.Connected = checkin.Connected
  51. if err := logic.UpdateNode(&node, &node); err != nil {
  52. logger.Log(0, "error updating node", node.Name, node.ID, " on checkin", err.Error())
  53. return
  54. }
  55. logger.Log(3, "ping processed for node", node.Name, node.ID)
  56. // --TODO --set client version once feature is implemented.
  57. //node.SetClientVersion(msg.Payload())
  58. }()
  59. }
  60. // UpdateNode message Handler -- handles updates from client nodes
  61. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  62. go func() {
  63. id, err := getID(msg.Topic())
  64. if err != nil {
  65. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  66. return
  67. }
  68. currentNode, err := logic.GetNodeByID(id)
  69. if err != nil {
  70. logger.Log(1, "error getting node ", id, err.Error())
  71. return
  72. }
  73. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  74. if decryptErr != nil {
  75. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  76. return
  77. }
  78. var newNode models.Node
  79. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  80. logger.Log(1, "error unmarshaling payload ", err.Error())
  81. return
  82. }
  83. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  84. if servercfg.Is_EE && ifaceDelta {
  85. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  86. logger.Log(1, "failed to reset failover list during node update", currentNode.Name, currentNode.Network)
  87. }
  88. }
  89. newNode.SetLastCheckIn()
  90. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  91. logger.Log(1, "error saving node", err.Error())
  92. return
  93. }
  94. updateNodePeers(&currentNode)
  95. logger.Log(1, "updated node", id, newNode.Name)
  96. }()
  97. }
  98. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  99. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  100. if servercfg.Is_EE {
  101. go func() {
  102. id, err := getID(msg.Topic())
  103. if err != nil {
  104. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  105. return
  106. }
  107. currentNode, err := logic.GetNodeByID(id)
  108. if err != nil {
  109. logger.Log(1, "error getting node ", id, err.Error())
  110. return
  111. }
  112. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  113. if decryptErr != nil {
  114. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  115. return
  116. }
  117. var newMetrics models.Metrics
  118. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  119. logger.Log(1, "error unmarshaling payload ", err.Error())
  120. return
  121. }
  122. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  123. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  124. logger.Log(1, "faield to update node metrics", id, currentNode.Name, err.Error())
  125. return
  126. }
  127. if servercfg.IsMetricsExporter() {
  128. if err := pushMetricsToExporter(newMetrics); err != nil {
  129. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  130. currentNode.Name, err))
  131. }
  132. }
  133. if newMetrics.Connectivity != nil {
  134. err := logic.EnterpriseFailoverFunc(&currentNode)
  135. if err != nil {
  136. logger.Log(0, "failed to failover for node", currentNode.Name, "on network", currentNode.Network, "-", err.Error())
  137. }
  138. }
  139. if shouldUpdate {
  140. logger.Log(2, "updating peers after node", currentNode.Name, currentNode.Network, "detected connectivity issues")
  141. if err = PublishSinglePeerUpdate(&currentNode); err != nil {
  142. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.Name, currentNode.Network)
  143. }
  144. }
  145. logger.Log(1, "updated node metrics", id, currentNode.Name)
  146. }()
  147. }
  148. }
  149. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  150. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  151. go func() {
  152. id, err := getID(msg.Topic())
  153. if err != nil {
  154. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  155. return
  156. }
  157. currentNode, err := logic.GetNodeByID(id)
  158. if err != nil {
  159. logger.Log(1, "error getting node ", id, err.Error())
  160. return
  161. }
  162. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  163. if decryptErr != nil {
  164. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  165. return
  166. }
  167. switch decrypted[0] {
  168. case ncutils.ACK:
  169. currentServerNode, err := logic.GetNetworkServerLocal(currentNode.Network)
  170. if err != nil {
  171. return
  172. }
  173. if err := logic.ServerUpdate(&currentServerNode, false); err != nil {
  174. logger.Log(1, "server node:", currentServerNode.ID, "failed update")
  175. return
  176. }
  177. case ncutils.DONE:
  178. updateNodePeers(&currentNode)
  179. }
  180. logger.Log(1, "sent peer updates after signal received from", id, currentNode.Name)
  181. }()
  182. }
  183. func updateNodePeers(currentNode *models.Node) {
  184. currentServerNode, err := logic.GetNetworkServerLocal(currentNode.Network)
  185. if err != nil {
  186. logger.Log(1, "failed to get server node failed update\n", err.Error())
  187. return
  188. }
  189. if err := logic.ServerUpdate(&currentServerNode, false); err != nil {
  190. logger.Log(1, "server node:", currentServerNode.ID, "failed update")
  191. return
  192. }
  193. if logic.IsLeader(&currentServerNode) {
  194. if err := PublishPeerUpdate(currentNode, false); err != nil {
  195. logger.Log(1, "error publishing peer update ", err.Error())
  196. return
  197. }
  198. }
  199. }
  200. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  201. if newMetrics.FailoverPeers == nil {
  202. newMetrics.FailoverPeers = make(map[string]string)
  203. }
  204. oldMetrics, err := logic.GetMetrics(currentNode.ID)
  205. if err != nil {
  206. logger.Log(1, "error finding old metrics for node", currentNode.ID, currentNode.Name)
  207. return false
  208. }
  209. if oldMetrics.FailoverPeers == nil {
  210. oldMetrics.FailoverPeers = make(map[string]string)
  211. }
  212. var attachedClients []models.ExtClient
  213. if currentNode.IsIngressGateway == "yes" {
  214. clients, err := logic.GetExtClientsByID(currentNode.ID, currentNode.Network)
  215. if err == nil {
  216. attachedClients = clients
  217. }
  218. }
  219. if len(attachedClients) > 0 {
  220. // associate ext clients with IDs
  221. for i := range attachedClients {
  222. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  223. extMetric.NodeName = attachedClients[i].ClientID
  224. extMetric.IsServer = "no"
  225. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  226. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  227. }
  228. }
  229. // run through metrics for each peer
  230. for k := range newMetrics.Connectivity {
  231. currMetric := newMetrics.Connectivity[k]
  232. oldMetric := oldMetrics.Connectivity[k]
  233. currMetric.TotalTime += oldMetric.TotalTime
  234. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  235. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  236. currMetric.PercentUp = 0
  237. } else {
  238. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  239. }
  240. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  241. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  242. delete(oldMetrics.Connectivity, k) // remove from old data
  243. newMetrics.Connectivity[k] = currMetric
  244. }
  245. // add nodes that need failover
  246. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  247. if err != nil {
  248. logger.Log(0, "failed to retrieve nodes while updating metrics")
  249. return false
  250. }
  251. for _, node := range nodes {
  252. if !newMetrics.Connectivity[node.ID].Connected &&
  253. len(newMetrics.Connectivity[node.ID].NodeName) > 0 &&
  254. node.Connected == "yes" &&
  255. len(node.FailoverNode) > 0 &&
  256. node.Failover != "yes" {
  257. newMetrics.FailoverPeers[node.ID] = node.FailoverNode
  258. }
  259. }
  260. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  261. for k, v := range oldMetrics.FailoverPeers {
  262. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  263. shouldUpdate = true
  264. }
  265. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  266. newMetrics.FailoverPeers[k] = v
  267. }
  268. }
  269. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  270. delete(newMetrics.Connectivity, k)
  271. }
  272. return shouldUpdate
  273. }