handlers.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "time"
  6. mqtt "github.com/eclipse/paho.mqtt.golang"
  7. "github.com/google/uuid"
  8. "github.com/gravitl/netmaker/database"
  9. "github.com/gravitl/netmaker/logger"
  10. "github.com/gravitl/netmaker/logic"
  11. "github.com/gravitl/netmaker/logic/hostactions"
  12. "github.com/gravitl/netmaker/models"
  13. "github.com/gravitl/netmaker/netclient/ncutils"
  14. "github.com/gravitl/netmaker/servercfg"
  15. )
  16. // DefaultHandler default message queue handler -- NOT USED
  17. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  18. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  19. }
  20. // Ping message Handler -- handles ping topic from client nodes
  21. func Ping(client mqtt.Client, msg mqtt.Message) {
  22. id, err := getID(msg.Topic())
  23. if err != nil {
  24. logger.Log(0, "error getting node.ID sent on ping topic ")
  25. return
  26. }
  27. node, err := logic.GetNodeByID(id)
  28. if err != nil {
  29. logger.Log(3, "mq-ping error getting node: ", err.Error())
  30. node, err := logic.GetNodeByID(id)
  31. if err != nil {
  32. logger.Log(3, "mq-ping error getting node: ", err.Error())
  33. if database.IsEmptyRecord(err) {
  34. h := logic.GetHostByNodeID(id) // check if a host is still associated
  35. if h != nil { // inform host that node should be removed
  36. fakeNode := models.Node{}
  37. fakeNode.ID, _ = uuid.Parse(id)
  38. fakeNode.Action = models.NODE_DELETE
  39. fakeNode.PendingDelete = true
  40. if err := NodeUpdate(&fakeNode); err != nil {
  41. logger.Log(0, "failed to inform host", h.Name, h.ID.String(), "to remove node", id, err.Error())
  42. }
  43. }
  44. }
  45. return
  46. }
  47. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  48. if decryptErr != nil {
  49. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  50. return
  51. }
  52. var checkin models.NodeCheckin
  53. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  54. logger.Log(1, "error unmarshaling payload ", err.Error())
  55. return
  56. }
  57. host, err := logic.GetHost(node.HostID.String())
  58. if err != nil {
  59. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  60. return
  61. }
  62. node.SetLastCheckIn()
  63. host.Version = checkin.Version
  64. node.Connected = checkin.Connected
  65. host.Interfaces = checkin.Ifaces
  66. for i := range host.Interfaces {
  67. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  68. }
  69. if err := logic.UpdateNode(&node, &node); err != nil {
  70. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  71. return
  72. }
  73. return
  74. }
  75. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  76. if decryptErr != nil {
  77. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  78. return
  79. }
  80. var checkin models.NodeCheckin
  81. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  82. logger.Log(1, "error unmarshaling payload ", err.Error())
  83. return
  84. }
  85. host, err := logic.GetHost(node.HostID.String())
  86. if err != nil {
  87. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  88. return
  89. }
  90. node.SetLastCheckIn()
  91. host.Version = checkin.Version
  92. node.Connected = checkin.Connected
  93. host.Interfaces = checkin.Ifaces
  94. for i := range host.Interfaces {
  95. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  96. }
  97. if err := logic.UpdateNode(&node, &node); err != nil {
  98. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  99. return
  100. }
  101. logger.Log(3, "ping processed for node", node.ID.String())
  102. // --TODO --set client version once feature is implemented.
  103. //node.SetClientVersion(msg.Payload())
  104. }
  105. // UpdateNode message Handler -- handles updates from client nodes
  106. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  107. id, err := getID(msg.Topic())
  108. if err != nil {
  109. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  110. return
  111. }
  112. currentNode, err := logic.GetNodeByID(id)
  113. if err != nil {
  114. logger.Log(1, "error getting node ", id, err.Error())
  115. return
  116. }
  117. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  118. if decryptErr != nil {
  119. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  120. return
  121. }
  122. var newNode models.Node
  123. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  124. logger.Log(1, "error unmarshaling payload ", err.Error())
  125. return
  126. }
  127. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  128. if servercfg.Is_EE && ifaceDelta {
  129. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  130. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  131. }
  132. }
  133. newNode.SetLastCheckIn()
  134. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  135. logger.Log(1, "error saving node", err.Error())
  136. return
  137. }
  138. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  139. if err = PublishPeerUpdate(); err != nil {
  140. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  141. }
  142. }
  143. logger.Log(1, "updated node", id, newNode.ID.String())
  144. }
  145. // UpdateHost message Handler -- handles host updates from clients
  146. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  147. id, err := getID(msg.Topic())
  148. if err != nil {
  149. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  150. return
  151. }
  152. currentHost, err := logic.GetHost(id)
  153. if err != nil {
  154. logger.Log(1, "error getting host ", id, err.Error())
  155. return
  156. }
  157. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  158. if decryptErr != nil {
  159. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  160. return
  161. }
  162. var hostUpdate models.HostUpdate
  163. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  164. logger.Log(1, "error unmarshaling payload ", err.Error())
  165. return
  166. }
  167. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  168. var sendPeerUpdate bool
  169. switch hostUpdate.Action {
  170. case models.Acknowledgement:
  171. hu := hostactions.GetAction(currentHost.ID.String())
  172. if hu != nil {
  173. if err = HostUpdate(hu); err != nil {
  174. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  175. return
  176. } else {
  177. if err = PublishSingleHostPeerUpdate(currentHost, nil); err != nil {
  178. logger.Log(0, "failed peers publish after join acknowledged", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  179. return
  180. }
  181. }
  182. }
  183. case models.UpdateHost:
  184. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  185. err := logic.UpsertHost(currentHost)
  186. if err != nil {
  187. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  188. return
  189. }
  190. case models.DeleteHost:
  191. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  192. // delete EMQX credentials for host
  193. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  194. logger.Log(0, "failed to remove host credentials from EMQX: ", currentHost.ID.String(), err.Error())
  195. return
  196. }
  197. }
  198. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  199. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  200. return
  201. }
  202. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  203. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  204. return
  205. }
  206. sendPeerUpdate = true
  207. }
  208. if sendPeerUpdate {
  209. err := PublishPeerUpdate()
  210. if err != nil {
  211. logger.Log(0, "failed to pulish peer update: ", err.Error())
  212. }
  213. }
  214. // if servercfg.Is_EE && ifaceDelta {
  215. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  216. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  217. // }
  218. // }
  219. }
  220. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  221. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  222. if servercfg.Is_EE {
  223. id, err := getID(msg.Topic())
  224. if err != nil {
  225. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  226. return
  227. }
  228. currentNode, err := logic.GetNodeByID(id)
  229. if err != nil {
  230. logger.Log(1, "error getting node ", id, err.Error())
  231. return
  232. }
  233. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  234. if decryptErr != nil {
  235. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  236. return
  237. }
  238. var newMetrics models.Metrics
  239. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  240. logger.Log(1, "error unmarshaling payload ", err.Error())
  241. return
  242. }
  243. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  244. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  245. logger.Log(1, "faield to update node metrics", id, err.Error())
  246. return
  247. }
  248. if servercfg.IsMetricsExporter() {
  249. if err := pushMetricsToExporter(newMetrics); err != nil {
  250. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  251. currentNode.ID, err))
  252. }
  253. }
  254. if newMetrics.Connectivity != nil {
  255. err := logic.EnterpriseFailoverFunc(&currentNode)
  256. if err != nil {
  257. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  258. }
  259. }
  260. if shouldUpdate {
  261. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  262. host, err := logic.GetHost(currentNode.HostID.String())
  263. if err == nil {
  264. if err = PublishSingleHostPeerUpdate(host, nil); err != nil {
  265. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  266. }
  267. }
  268. }
  269. logger.Log(1, "updated node metrics", id)
  270. }
  271. }
  272. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  273. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  274. id, err := getID(msg.Topic())
  275. if err != nil {
  276. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  277. return
  278. }
  279. currentNode, err := logic.GetNodeByID(id)
  280. if err != nil {
  281. logger.Log(1, "error getting node ", id, err.Error())
  282. return
  283. }
  284. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  285. if decryptErr != nil {
  286. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  287. return
  288. }
  289. switch decrypted[0] {
  290. case ncutils.ACK:
  291. // do we still need this
  292. case ncutils.DONE:
  293. if err = PublishPeerUpdate(); err != nil {
  294. logger.Log(1, "error publishing peer update for node", currentNode.ID.String(), err.Error())
  295. return
  296. }
  297. }
  298. logger.Log(1, "sent peer updates after signal received from", id)
  299. }
  300. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  301. if newMetrics.FailoverPeers == nil {
  302. newMetrics.FailoverPeers = make(map[string]string)
  303. }
  304. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  305. if err != nil {
  306. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  307. return false
  308. }
  309. if oldMetrics.FailoverPeers == nil {
  310. oldMetrics.FailoverPeers = make(map[string]string)
  311. }
  312. var attachedClients []models.ExtClient
  313. if currentNode.IsIngressGateway {
  314. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  315. if err == nil {
  316. attachedClients = clients
  317. }
  318. }
  319. if len(attachedClients) > 0 {
  320. // associate ext clients with IDs
  321. for i := range attachedClients {
  322. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  323. if len(extMetric.NodeName) == 0 &&
  324. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  325. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  326. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  327. extMetric.Connected = true
  328. }
  329. }
  330. extMetric.NodeName = attachedClients[i].ClientID
  331. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  332. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  333. }
  334. }
  335. // run through metrics for each peer
  336. for k := range newMetrics.Connectivity {
  337. currMetric := newMetrics.Connectivity[k]
  338. oldMetric := oldMetrics.Connectivity[k]
  339. currMetric.TotalTime += oldMetric.TotalTime
  340. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  341. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  342. currMetric.PercentUp = 0
  343. } else {
  344. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  345. }
  346. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  347. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  348. delete(oldMetrics.Connectivity, k) // remove from old data
  349. newMetrics.Connectivity[k] = currMetric
  350. }
  351. // add nodes that need failover
  352. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  353. if err != nil {
  354. logger.Log(0, "failed to retrieve nodes while updating metrics")
  355. return false
  356. }
  357. for _, node := range nodes {
  358. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  359. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  360. node.Connected &&
  361. len(node.FailoverNode) > 0 &&
  362. !node.Failover {
  363. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  364. }
  365. }
  366. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  367. for k, v := range oldMetrics.FailoverPeers {
  368. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  369. shouldUpdate = true
  370. }
  371. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  372. newMetrics.FailoverPeers[k] = v
  373. }
  374. }
  375. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  376. delete(newMetrics.Connectivity, k)
  377. }
  378. return shouldUpdate
  379. }