handlers.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. package mq
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "time"
  8. mqtt "github.com/eclipse/paho.mqtt.golang"
  9. "github.com/google/uuid"
  10. "github.com/gravitl/netmaker/database"
  11. "github.com/gravitl/netmaker/logger"
  12. "github.com/gravitl/netmaker/logic"
  13. "github.com/gravitl/netmaker/logic/hostactions"
  14. "github.com/gravitl/netmaker/models"
  15. "github.com/gravitl/netmaker/netclient/ncutils"
  16. "github.com/gravitl/netmaker/servercfg"
  17. )
  18. // DefaultHandler default message queue handler -- NOT USED
  19. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  20. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  21. }
  22. // Ping message Handler -- handles ping topic from client nodes
  23. func Ping(client mqtt.Client, msg mqtt.Message) {
  24. id, err := getID(msg.Topic())
  25. if err != nil {
  26. logger.Log(0, "error getting node.ID sent on ping topic ")
  27. return
  28. }
  29. node, err := logic.GetNodeByID(id)
  30. if err != nil {
  31. logger.Log(3, "mq-ping error getting node: ", err.Error())
  32. node, err := logic.GetNodeByID(id)
  33. if err != nil {
  34. logger.Log(3, "mq-ping error getting node: ", err.Error())
  35. if database.IsEmptyRecord(err) {
  36. h := logic.GetHostByNodeID(id) // check if a host is still associated
  37. if h != nil { // inform host that node should be removed
  38. fakeNode := models.Node{}
  39. fakeNode.ID, _ = uuid.Parse(id)
  40. fakeNode.Action = models.NODE_DELETE
  41. fakeNode.PendingDelete = true
  42. if err := NodeUpdate(&fakeNode); err != nil {
  43. logger.Log(0, "failed to inform host", h.Name, h.ID.String(), "to remove node", id, err.Error())
  44. }
  45. }
  46. }
  47. return
  48. }
  49. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  50. if decryptErr != nil {
  51. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  52. return
  53. }
  54. var checkin models.NodeCheckin
  55. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  56. logger.Log(1, "error unmarshaling payload ", err.Error())
  57. return
  58. }
  59. host, err := logic.GetHost(node.HostID.String())
  60. if err != nil {
  61. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  62. return
  63. }
  64. node.SetLastCheckIn()
  65. host.Version = checkin.Version
  66. node.Connected = checkin.Connected
  67. host.Interfaces = checkin.Ifaces
  68. for i := range host.Interfaces {
  69. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  70. }
  71. if err := logic.UpdateNode(&node, &node); err != nil {
  72. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  73. return
  74. }
  75. return
  76. }
  77. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  78. if decryptErr != nil {
  79. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  80. return
  81. }
  82. var checkin models.NodeCheckin
  83. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  84. logger.Log(1, "error unmarshaling payload ", err.Error())
  85. return
  86. }
  87. host, err := logic.GetHost(node.HostID.String())
  88. if err != nil {
  89. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  90. return
  91. }
  92. node.SetLastCheckIn()
  93. host.Version = checkin.Version
  94. node.Connected = checkin.Connected
  95. host.Interfaces = checkin.Ifaces
  96. for i := range host.Interfaces {
  97. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  98. }
  99. if err := logic.UpdateNode(&node, &node); err != nil {
  100. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  101. return
  102. }
  103. logger.Log(3, "ping processed for node", node.ID.String())
  104. // --TODO --set client version once feature is implemented.
  105. //node.SetClientVersion(msg.Payload())
  106. }
  107. // UpdateNode message Handler -- handles updates from client nodes
  108. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  109. id, err := getID(msg.Topic())
  110. if err != nil {
  111. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  112. return
  113. }
  114. currentNode, err := logic.GetNodeByID(id)
  115. if err != nil {
  116. logger.Log(1, "error getting node ", id, err.Error())
  117. return
  118. }
  119. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  120. if decryptErr != nil {
  121. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  122. return
  123. }
  124. var newNode models.Node
  125. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  126. logger.Log(1, "error unmarshaling payload ", err.Error())
  127. return
  128. }
  129. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  130. if servercfg.Is_EE && ifaceDelta {
  131. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  132. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  133. }
  134. }
  135. newNode.SetLastCheckIn()
  136. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  137. logger.Log(1, "error saving node", err.Error())
  138. return
  139. }
  140. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  141. if err = PublishPeerUpdate(); err != nil {
  142. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  143. }
  144. }
  145. logger.Log(1, "updated node", id, newNode.ID.String())
  146. }
  147. // UpdateHost message Handler -- handles host updates from clients
  148. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  149. id, err := getID(msg.Topic())
  150. if err != nil {
  151. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  152. return
  153. }
  154. currentHost, err := logic.GetHost(id)
  155. if err != nil {
  156. logger.Log(1, "error getting host ", id, err.Error())
  157. return
  158. }
  159. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  160. if decryptErr != nil {
  161. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  162. return
  163. }
  164. var hostUpdate models.HostUpdate
  165. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  166. logger.Log(1, "error unmarshaling payload ", err.Error())
  167. return
  168. }
  169. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  170. var sendPeerUpdate bool
  171. switch hostUpdate.Action {
  172. case models.Acknowledgement:
  173. hu := hostactions.GetAction(currentHost.ID.String())
  174. if hu != nil {
  175. if err = HostUpdate(hu); err != nil {
  176. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  177. return
  178. } else {
  179. if err = PublishSingleHostPeerUpdate(context.Background(), currentHost, nil); err != nil {
  180. logger.Log(0, "failed peers publish after join acknowledged", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  181. return
  182. }
  183. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  184. logger.Log(0, "failed to send dns update after node,", hu.Node.ID.String(), ", added to host", hu.Host.Name, err.Error())
  185. return
  186. }
  187. }
  188. }
  189. case models.UpdateHost:
  190. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  191. err := logic.UpsertHost(currentHost)
  192. if err != nil {
  193. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  194. return
  195. }
  196. case models.DeleteHost:
  197. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  198. // delete EMQX credentials for host
  199. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  200. logger.Log(0, "failed to remove host credentials from EMQX: ", currentHost.ID.String(), err.Error())
  201. return
  202. }
  203. }
  204. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  205. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  206. return
  207. }
  208. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  209. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  210. return
  211. }
  212. sendPeerUpdate = true
  213. }
  214. if sendPeerUpdate {
  215. err := PublishPeerUpdate()
  216. if err != nil {
  217. logger.Log(0, "failed to pulish peer update: ", err.Error())
  218. }
  219. }
  220. // if servercfg.Is_EE && ifaceDelta {
  221. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  222. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  223. // }
  224. // }
  225. }
  226. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  227. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  228. if servercfg.Is_EE {
  229. id, err := getID(msg.Topic())
  230. if err != nil {
  231. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  232. return
  233. }
  234. currentNode, err := logic.GetNodeByID(id)
  235. if err != nil {
  236. logger.Log(1, "error getting node ", id, err.Error())
  237. return
  238. }
  239. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  240. if decryptErr != nil {
  241. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  242. return
  243. }
  244. var newMetrics models.Metrics
  245. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  246. logger.Log(1, "error unmarshaling payload ", err.Error())
  247. return
  248. }
  249. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  250. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  251. logger.Log(1, "faield to update node metrics", id, err.Error())
  252. return
  253. }
  254. if servercfg.IsMetricsExporter() {
  255. if err := pushMetricsToExporter(newMetrics); err != nil {
  256. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  257. currentNode.ID, err))
  258. }
  259. }
  260. if newMetrics.Connectivity != nil {
  261. err := logic.EnterpriseFailoverFunc(&currentNode)
  262. if err != nil {
  263. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  264. }
  265. }
  266. if shouldUpdate {
  267. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  268. host, err := logic.GetHost(currentNode.HostID.String())
  269. if err == nil {
  270. if err = PublishSingleHostPeerUpdate(context.Background(), host, nil); err != nil {
  271. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  272. }
  273. }
  274. }
  275. logger.Log(1, "updated node metrics", id)
  276. }
  277. }
  278. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  279. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  280. id, err := getID(msg.Topic())
  281. if err != nil {
  282. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  283. return
  284. }
  285. currentNode, err := logic.GetNodeByID(id)
  286. if err != nil {
  287. logger.Log(1, "error getting node ", id, err.Error())
  288. return
  289. }
  290. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  291. if decryptErr != nil {
  292. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  293. return
  294. }
  295. switch decrypted[0] {
  296. case ncutils.ACK:
  297. // do we still need this
  298. case ncutils.DONE:
  299. if err = PublishPeerUpdate(); err != nil {
  300. logger.Log(1, "error publishing peer update for node", currentNode.ID.String(), err.Error())
  301. return
  302. }
  303. }
  304. logger.Log(1, "sent peer updates after signal received from", id)
  305. }
  306. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  307. if newMetrics.FailoverPeers == nil {
  308. newMetrics.FailoverPeers = make(map[string]string)
  309. }
  310. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  311. if err != nil {
  312. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  313. return false
  314. }
  315. if oldMetrics.FailoverPeers == nil {
  316. oldMetrics.FailoverPeers = make(map[string]string)
  317. }
  318. var attachedClients []models.ExtClient
  319. if currentNode.IsIngressGateway {
  320. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  321. if err == nil {
  322. attachedClients = clients
  323. }
  324. }
  325. if len(attachedClients) > 0 {
  326. // associate ext clients with IDs
  327. for i := range attachedClients {
  328. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  329. if len(extMetric.NodeName) == 0 &&
  330. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  331. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  332. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  333. extMetric.Connected = true
  334. }
  335. }
  336. extMetric.NodeName = attachedClients[i].ClientID
  337. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  338. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  339. }
  340. }
  341. // run through metrics for each peer
  342. for k := range newMetrics.Connectivity {
  343. currMetric := newMetrics.Connectivity[k]
  344. oldMetric := oldMetrics.Connectivity[k]
  345. currMetric.TotalTime += oldMetric.TotalTime
  346. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  347. if currMetric.CollectedByProxy {
  348. currMetric.TotalReceived += oldMetric.TotalReceived
  349. currMetric.TotalSent += oldMetric.TotalSent
  350. } else {
  351. if currMetric.TotalReceived < oldMetric.TotalReceived {
  352. currMetric.TotalReceived += oldMetric.TotalReceived
  353. } else {
  354. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  355. }
  356. if currMetric.TotalSent < oldMetric.TotalSent {
  357. currMetric.TotalSent += oldMetric.TotalSent
  358. } else {
  359. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  360. }
  361. }
  362. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  363. currMetric.PercentUp = 0
  364. } else {
  365. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  366. }
  367. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  368. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  369. delete(oldMetrics.Connectivity, k) // remove from old data
  370. newMetrics.Connectivity[k] = currMetric
  371. }
  372. // add nodes that need failover
  373. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  374. if err != nil {
  375. logger.Log(0, "failed to retrieve nodes while updating metrics")
  376. return false
  377. }
  378. for _, node := range nodes {
  379. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  380. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  381. node.Connected &&
  382. len(node.FailoverNode) > 0 &&
  383. !node.Failover {
  384. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  385. }
  386. }
  387. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  388. for k, v := range oldMetrics.FailoverPeers {
  389. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  390. shouldUpdate = true
  391. }
  392. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  393. newMetrics.FailoverPeers[k] = v
  394. }
  395. }
  396. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  397. delete(newMetrics.Connectivity, k)
  398. }
  399. return shouldUpdate
  400. }
  401. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  402. dns := models.DNSUpdate{
  403. Action: models.DNSInsert,
  404. Name: host.Name + "." + node.Network,
  405. }
  406. if node.Address.IP != nil {
  407. dns.Address = node.Address.IP.String()
  408. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  409. return err
  410. }
  411. } else if node.Address6.IP != nil {
  412. dns.Address = node.Address6.IP.String()
  413. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  414. return err
  415. }
  416. }
  417. if err := PublishAllDNS(node); err != nil {
  418. return err
  419. }
  420. return nil
  421. }