handlers.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. package mq
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "time"
  8. mqtt "github.com/eclipse/paho.mqtt.golang"
  9. "github.com/google/uuid"
  10. "github.com/gravitl/netmaker/database"
  11. "github.com/gravitl/netmaker/logger"
  12. "github.com/gravitl/netmaker/logic"
  13. "github.com/gravitl/netmaker/logic/hostactions"
  14. "github.com/gravitl/netmaker/models"
  15. "github.com/gravitl/netmaker/netclient/ncutils"
  16. "github.com/gravitl/netmaker/servercfg"
  17. )
  18. // DefaultHandler default message queue handler -- NOT USED
  19. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  20. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  21. }
  22. // UpdateNode message Handler -- handles updates from client nodes
  23. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  24. id, err := getID(msg.Topic())
  25. if err != nil {
  26. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  27. return
  28. }
  29. currentNode, err := logic.GetNodeByID(id)
  30. if err != nil {
  31. logger.Log(1, "error getting node ", id, err.Error())
  32. return
  33. }
  34. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  35. if decryptErr != nil {
  36. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  37. return
  38. }
  39. var newNode models.Node
  40. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  41. logger.Log(1, "error unmarshaling payload ", err.Error())
  42. return
  43. }
  44. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  45. if servercfg.Is_EE && ifaceDelta {
  46. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  47. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  48. }
  49. }
  50. newNode.SetLastCheckIn()
  51. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  52. logger.Log(1, "error saving node", err.Error())
  53. return
  54. }
  55. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  56. if err = PublishPeerUpdate(); err != nil {
  57. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  58. }
  59. }
  60. logger.Log(1, "updated node", id, newNode.ID.String())
  61. }
  62. // UpdateHost message Handler -- handles host updates from clients
  63. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  64. id, err := getID(msg.Topic())
  65. if err != nil {
  66. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  67. return
  68. }
  69. currentHost, err := logic.GetHost(id)
  70. if err != nil {
  71. logger.Log(1, "error getting host ", id, err.Error())
  72. return
  73. }
  74. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  75. if decryptErr != nil {
  76. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  77. return
  78. }
  79. var hostUpdate models.HostUpdate
  80. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  81. logger.Log(1, "error unmarshaling payload ", err.Error())
  82. return
  83. }
  84. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  85. var sendPeerUpdate bool
  86. switch hostUpdate.Action {
  87. case models.CheckIn:
  88. sendPeerUpdate = handleHostCheckin(&hostUpdate.Host, currentHost)
  89. case models.Acknowledgement:
  90. hu := hostactions.GetAction(currentHost.ID.String())
  91. if hu != nil {
  92. if err = HostUpdate(hu); err != nil {
  93. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  94. return
  95. } else {
  96. if err = PublishSingleHostPeerUpdate(context.Background(), currentHost, nil, nil); err != nil {
  97. logger.Log(0, "failed peers publish after join acknowledged", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  98. return
  99. }
  100. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  101. logger.Log(0, "failed to send dns update after node,", hu.Node.ID.String(), ", added to host", hu.Host.Name, err.Error())
  102. return
  103. }
  104. }
  105. }
  106. case models.UpdateHost:
  107. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  108. err := logic.UpsertHost(currentHost)
  109. if err != nil {
  110. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  111. return
  112. }
  113. case models.DeleteHost:
  114. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  115. // delete EMQX credentials for host
  116. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  117. logger.Log(0, "failed to remove host credentials from EMQX: ", currentHost.ID.String(), err.Error())
  118. return
  119. }
  120. }
  121. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  122. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  123. return
  124. }
  125. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  126. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  127. return
  128. }
  129. sendPeerUpdate = true
  130. }
  131. if sendPeerUpdate {
  132. err := PublishPeerUpdate()
  133. if err != nil {
  134. logger.Log(0, "failed to pulish peer update: ", err.Error())
  135. }
  136. }
  137. // if servercfg.Is_EE && ifaceDelta {
  138. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  139. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  140. // }
  141. // }
  142. }
  143. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  144. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  145. if servercfg.Is_EE {
  146. id, err := getID(msg.Topic())
  147. if err != nil {
  148. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  149. return
  150. }
  151. currentNode, err := logic.GetNodeByID(id)
  152. if err != nil {
  153. logger.Log(1, "error getting node ", id, err.Error())
  154. return
  155. }
  156. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  157. if decryptErr != nil {
  158. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  159. return
  160. }
  161. var newMetrics models.Metrics
  162. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  163. logger.Log(1, "error unmarshaling payload ", err.Error())
  164. return
  165. }
  166. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  167. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  168. logger.Log(1, "faield to update node metrics", id, err.Error())
  169. return
  170. }
  171. if servercfg.IsMetricsExporter() {
  172. if err := pushMetricsToExporter(newMetrics); err != nil {
  173. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  174. currentNode.ID, err))
  175. }
  176. }
  177. if newMetrics.Connectivity != nil {
  178. err := logic.EnterpriseFailoverFunc(&currentNode)
  179. if err != nil {
  180. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  181. }
  182. }
  183. if shouldUpdate {
  184. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  185. host, err := logic.GetHost(currentNode.HostID.String())
  186. if err == nil {
  187. if err = PublishSingleHostPeerUpdate(context.Background(), host, nil, nil); err != nil {
  188. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  189. }
  190. }
  191. }
  192. logger.Log(1, "updated node metrics", id)
  193. }
  194. }
  195. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  196. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  197. id, err := getID(msg.Topic())
  198. if err != nil {
  199. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  200. return
  201. }
  202. currentNode, err := logic.GetNodeByID(id)
  203. if err != nil {
  204. logger.Log(1, "error getting node ", id, err.Error())
  205. return
  206. }
  207. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  208. if decryptErr != nil {
  209. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  210. return
  211. }
  212. switch decrypted[0] {
  213. case ncutils.ACK:
  214. // do we still need this
  215. case ncutils.DONE:
  216. if err = PublishPeerUpdate(); err != nil {
  217. logger.Log(1, "error publishing peer update for node", currentNode.ID.String(), err.Error())
  218. return
  219. }
  220. }
  221. logger.Log(1, "sent peer updates after signal received from", id)
  222. }
  223. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  224. if newMetrics.FailoverPeers == nil {
  225. newMetrics.FailoverPeers = make(map[string]string)
  226. }
  227. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  228. if err != nil {
  229. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  230. return false
  231. }
  232. if oldMetrics.FailoverPeers == nil {
  233. oldMetrics.FailoverPeers = make(map[string]string)
  234. }
  235. var attachedClients []models.ExtClient
  236. if currentNode.IsIngressGateway {
  237. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  238. if err == nil {
  239. attachedClients = clients
  240. }
  241. }
  242. if len(attachedClients) > 0 {
  243. // associate ext clients with IDs
  244. for i := range attachedClients {
  245. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  246. if len(extMetric.NodeName) == 0 &&
  247. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  248. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  249. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  250. extMetric.Connected = true
  251. }
  252. }
  253. extMetric.NodeName = attachedClients[i].ClientID
  254. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  255. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  256. }
  257. }
  258. // run through metrics for each peer
  259. for k := range newMetrics.Connectivity {
  260. currMetric := newMetrics.Connectivity[k]
  261. oldMetric := oldMetrics.Connectivity[k]
  262. currMetric.TotalTime += oldMetric.TotalTime
  263. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  264. if currMetric.CollectedByProxy {
  265. currMetric.TotalReceived += oldMetric.TotalReceived
  266. currMetric.TotalSent += oldMetric.TotalSent
  267. } else {
  268. if currMetric.TotalReceived < oldMetric.TotalReceived {
  269. currMetric.TotalReceived += oldMetric.TotalReceived
  270. } else {
  271. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  272. }
  273. if currMetric.TotalSent < oldMetric.TotalSent {
  274. currMetric.TotalSent += oldMetric.TotalSent
  275. } else {
  276. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  277. }
  278. }
  279. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  280. currMetric.PercentUp = 0
  281. } else {
  282. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  283. }
  284. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  285. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  286. delete(oldMetrics.Connectivity, k) // remove from old data
  287. newMetrics.Connectivity[k] = currMetric
  288. }
  289. // add nodes that need failover
  290. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  291. if err != nil {
  292. logger.Log(0, "failed to retrieve nodes while updating metrics")
  293. return false
  294. }
  295. for _, node := range nodes {
  296. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  297. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  298. node.Connected &&
  299. len(node.FailoverNode) > 0 &&
  300. !node.Failover {
  301. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  302. }
  303. }
  304. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  305. for k, v := range oldMetrics.FailoverPeers {
  306. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  307. shouldUpdate = true
  308. }
  309. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  310. newMetrics.FailoverPeers[k] = v
  311. }
  312. }
  313. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  314. delete(newMetrics.Connectivity, k)
  315. }
  316. return shouldUpdate
  317. }
  318. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  319. dns := models.DNSUpdate{
  320. Action: models.DNSInsert,
  321. Name: host.Name + "." + node.Network,
  322. }
  323. if node.Address.IP != nil {
  324. dns.Address = node.Address.IP.String()
  325. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  326. return err
  327. }
  328. } else if node.Address6.IP != nil {
  329. dns.Address = node.Address6.IP.String()
  330. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  331. return err
  332. }
  333. }
  334. if err := PublishAllDNS(node); err != nil {
  335. return err
  336. }
  337. return nil
  338. }
  339. func handleHostCheckin(h, currentHost *models.Host) bool {
  340. if h == nil {
  341. return false
  342. }
  343. for i := range currentHost.Nodes {
  344. currNodeID := currentHost.Nodes[i]
  345. node, err := logic.GetNodeByID(currNodeID)
  346. if err != nil {
  347. if database.IsEmptyRecord(err) {
  348. fakeNode := models.Node{}
  349. fakeNode.ID, _ = uuid.Parse(currNodeID)
  350. fakeNode.Action = models.NODE_DELETE
  351. fakeNode.PendingDelete = true
  352. if err := NodeUpdate(&fakeNode); err != nil {
  353. logger.Log(0, "failed to inform host", currentHost.Name, currentHost.ID.String(), "to remove node", currNodeID, err.Error())
  354. }
  355. }
  356. continue
  357. }
  358. if err := logic.UpdateNodeCheckin(&node); err != nil {
  359. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  360. }
  361. }
  362. for i := range h.Interfaces {
  363. h.Interfaces[i].AddressString = h.Interfaces[i].Address.String()
  364. }
  365. ifaceDelta := len(h.Interfaces) != len(currentHost.Interfaces) || !h.EndpointIP.Equal(currentHost.EndpointIP)
  366. currentHost.EndpointIP = h.EndpointIP
  367. currentHost.Interfaces = h.Interfaces
  368. currentHost.DefaultInterface = h.DefaultInterface
  369. if err := logic.UpsertHost(currentHost); err != nil {
  370. logger.Log(0, "failed to update host after check-in", h.Name, h.ID.String(), err.Error())
  371. return false
  372. }
  373. logger.Log(0, "ping processed for host", h.Name, h.ID.String())
  374. return ifaceDelta
  375. }