handlers.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "time"
  6. mqtt "github.com/eclipse/paho.mqtt.golang"
  7. "github.com/gravitl/netmaker/database"
  8. "github.com/gravitl/netmaker/logger"
  9. "github.com/gravitl/netmaker/logic"
  10. "github.com/gravitl/netmaker/logic/hostactions"
  11. "github.com/gravitl/netmaker/models"
  12. "github.com/gravitl/netmaker/netclient/ncutils"
  13. "github.com/gravitl/netmaker/servercfg"
  14. )
  15. // DefaultHandler default message queue handler -- NOT USED
  16. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  17. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  18. }
  19. // Ping message Handler -- handles ping topic from client nodes
  20. func Ping(client mqtt.Client, msg mqtt.Message) {
  21. go func() {
  22. id, err := getID(msg.Topic())
  23. if err != nil {
  24. logger.Log(0, "error getting node.ID sent on ping topic ")
  25. return
  26. }
  27. node, err := logic.GetNodeByID(id)
  28. if err != nil {
  29. logger.Log(3, "mq-ping error getting node: ", err.Error())
  30. record, err := database.FetchRecord(database.NODES_TABLE_NAME, id)
  31. if err != nil {
  32. logger.Log(3, "error reading database ", err.Error())
  33. return
  34. }
  35. logger.Log(3, "record from database")
  36. logger.Log(3, record)
  37. return
  38. }
  39. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  40. if decryptErr != nil {
  41. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  42. return
  43. }
  44. var checkin models.NodeCheckin
  45. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  46. logger.Log(1, "error unmarshaling payload ", err.Error())
  47. return
  48. }
  49. host, err := logic.GetHost(node.HostID.String())
  50. if err != nil {
  51. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  52. return
  53. }
  54. node.SetLastCheckIn()
  55. host.Version = checkin.Version
  56. node.Connected = checkin.Connected
  57. host.Interfaces = checkin.Ifaces
  58. for i := range host.Interfaces {
  59. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  60. }
  61. if err := logic.UpdateNode(&node, &node); err != nil {
  62. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  63. return
  64. }
  65. logger.Log(3, "ping processed for node", node.ID.String())
  66. // --TODO --set client version once feature is implemented.
  67. //node.SetClientVersion(msg.Payload())
  68. }()
  69. }
  70. // UpdateNode message Handler -- handles updates from client nodes
  71. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  72. go func() {
  73. id, err := getID(msg.Topic())
  74. if err != nil {
  75. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  76. return
  77. }
  78. currentNode, err := logic.GetNodeByID(id)
  79. if err != nil {
  80. logger.Log(1, "error getting node ", id, err.Error())
  81. return
  82. }
  83. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  84. if decryptErr != nil {
  85. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  86. return
  87. }
  88. var newNode models.Node
  89. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  90. logger.Log(1, "error unmarshaling payload ", err.Error())
  91. return
  92. }
  93. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  94. if servercfg.Is_EE && ifaceDelta {
  95. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  96. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  97. }
  98. }
  99. newNode.SetLastCheckIn()
  100. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  101. logger.Log(1, "error saving node", err.Error())
  102. return
  103. }
  104. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  105. if err = PublishPeerUpdate(); err != nil {
  106. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  107. }
  108. }
  109. logger.Log(1, "updated node", id, newNode.ID.String())
  110. }()
  111. }
  112. // UpdateHost message Handler -- handles host updates from clients
  113. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  114. go func(msg mqtt.Message) {
  115. id, err := getID(msg.Topic())
  116. if err != nil {
  117. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  118. return
  119. }
  120. currentHost, err := logic.GetHost(id)
  121. if err != nil {
  122. logger.Log(1, "error getting host ", id, err.Error())
  123. return
  124. }
  125. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  126. if decryptErr != nil {
  127. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  128. return
  129. }
  130. var hostUpdate models.HostUpdate
  131. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  132. logger.Log(1, "error unmarshaling payload ", err.Error())
  133. return
  134. }
  135. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  136. var sendPeerUpdate bool
  137. switch hostUpdate.Action {
  138. case models.Acknowledgement:
  139. hu := hostactions.GetAction(currentHost.ID.String())
  140. if hu != nil {
  141. if err = HostUpdate(hu); err != nil {
  142. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  143. return
  144. }
  145. sendPeerUpdate = true
  146. }
  147. case models.UpdateHost:
  148. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  149. err := logic.UpsertHost(currentHost)
  150. if err != nil {
  151. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  152. return
  153. }
  154. case models.DeleteHost:
  155. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  156. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  157. return
  158. }
  159. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  160. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  161. return
  162. }
  163. sendPeerUpdate = true
  164. }
  165. if sendPeerUpdate {
  166. err := PublishPeerUpdate()
  167. if err != nil {
  168. logger.Log(0, "failed to pulish peer update: ", err.Error())
  169. }
  170. }
  171. // if servercfg.Is_EE && ifaceDelta {
  172. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  173. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  174. // }
  175. // }
  176. }(msg)
  177. }
  178. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  179. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  180. if servercfg.Is_EE {
  181. go func() {
  182. id, err := getID(msg.Topic())
  183. if err != nil {
  184. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  185. return
  186. }
  187. currentNode, err := logic.GetNodeByID(id)
  188. if err != nil {
  189. logger.Log(1, "error getting node ", id, err.Error())
  190. return
  191. }
  192. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  193. if decryptErr != nil {
  194. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  195. return
  196. }
  197. var newMetrics models.Metrics
  198. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  199. logger.Log(1, "error unmarshaling payload ", err.Error())
  200. return
  201. }
  202. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  203. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  204. logger.Log(1, "faield to update node metrics", id, err.Error())
  205. return
  206. }
  207. if servercfg.IsMetricsExporter() {
  208. if err := pushMetricsToExporter(newMetrics); err != nil {
  209. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  210. currentNode.ID, err))
  211. }
  212. }
  213. if newMetrics.Connectivity != nil {
  214. err := logic.EnterpriseFailoverFunc(&currentNode)
  215. if err != nil {
  216. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  217. }
  218. }
  219. if shouldUpdate {
  220. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  221. host, err := logic.GetHost(currentNode.HostID.String())
  222. if err == nil {
  223. if err = PublishSingleHostPeerUpdate(host, nil); err != nil {
  224. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  225. }
  226. }
  227. }
  228. logger.Log(1, "updated node metrics", id)
  229. }()
  230. }
  231. }
  232. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  233. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  234. go func() {
  235. id, err := getID(msg.Topic())
  236. if err != nil {
  237. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  238. return
  239. }
  240. currentNode, err := logic.GetNodeByID(id)
  241. if err != nil {
  242. logger.Log(1, "error getting node ", id, err.Error())
  243. return
  244. }
  245. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  246. if decryptErr != nil {
  247. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  248. return
  249. }
  250. switch decrypted[0] {
  251. case ncutils.ACK:
  252. //do we still need this
  253. case ncutils.DONE:
  254. updateNodePeers(&currentNode)
  255. }
  256. logger.Log(1, "sent peer updates after signal received from", id)
  257. }()
  258. }
  259. func updateNodePeers(currentNode *models.Node) {
  260. if err := PublishPeerUpdate(); err != nil {
  261. logger.Log(1, "error publishing peer update ", err.Error())
  262. return
  263. }
  264. }
  265. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  266. if newMetrics.FailoverPeers == nil {
  267. newMetrics.FailoverPeers = make(map[string]string)
  268. }
  269. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  270. if err != nil {
  271. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  272. return false
  273. }
  274. if oldMetrics.FailoverPeers == nil {
  275. oldMetrics.FailoverPeers = make(map[string]string)
  276. }
  277. var attachedClients []models.ExtClient
  278. if currentNode.IsIngressGateway {
  279. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  280. if err == nil {
  281. attachedClients = clients
  282. }
  283. }
  284. if len(attachedClients) > 0 {
  285. // associate ext clients with IDs
  286. for i := range attachedClients {
  287. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  288. if len(extMetric.NodeName) == 0 &&
  289. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  290. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  291. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  292. extMetric.Connected = true
  293. }
  294. }
  295. extMetric.NodeName = attachedClients[i].ClientID
  296. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  297. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  298. }
  299. }
  300. // run through metrics for each peer
  301. for k := range newMetrics.Connectivity {
  302. currMetric := newMetrics.Connectivity[k]
  303. oldMetric := oldMetrics.Connectivity[k]
  304. currMetric.TotalTime += oldMetric.TotalTime
  305. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  306. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  307. currMetric.PercentUp = 0
  308. } else {
  309. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  310. }
  311. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  312. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  313. delete(oldMetrics.Connectivity, k) // remove from old data
  314. newMetrics.Connectivity[k] = currMetric
  315. }
  316. // add nodes that need failover
  317. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  318. if err != nil {
  319. logger.Log(0, "failed to retrieve nodes while updating metrics")
  320. return false
  321. }
  322. for _, node := range nodes {
  323. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  324. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  325. node.Connected &&
  326. len(node.FailoverNode) > 0 &&
  327. !node.Failover {
  328. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  329. }
  330. }
  331. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  332. for k, v := range oldMetrics.FailoverPeers {
  333. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  334. shouldUpdate = true
  335. }
  336. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  337. newMetrics.FailoverPeers[k] = v
  338. }
  339. }
  340. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  341. delete(newMetrics.Connectivity, k)
  342. }
  343. return shouldUpdate
  344. }