handlers.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "time"
  6. mqtt "github.com/eclipse/paho.mqtt.golang"
  7. "github.com/google/uuid"
  8. "github.com/gravitl/netmaker/database"
  9. "github.com/gravitl/netmaker/logger"
  10. "github.com/gravitl/netmaker/logic"
  11. "github.com/gravitl/netmaker/logic/hostactions"
  12. "github.com/gravitl/netmaker/models"
  13. "github.com/gravitl/netmaker/netclient/ncutils"
  14. "github.com/gravitl/netmaker/servercfg"
  15. )
  16. // DefaultHandler default message queue handler -- NOT USED
  17. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  18. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  19. }
  20. // Ping message Handler -- handles ping topic from client nodes
  21. func Ping(client mqtt.Client, msg mqtt.Message) {
  22. go func() {
  23. id, err := getID(msg.Topic())
  24. if err != nil {
  25. logger.Log(0, "error getting node.ID sent on ping topic ")
  26. return
  27. }
  28. node, err := logic.GetNodeByID(id)
  29. if err != nil {
  30. logger.Log(3, "mq-ping error getting node: ", err.Error())
  31. if database.IsEmptyRecord(err) {
  32. h := logic.GetHostByNodeID(id) // check if a host is still associated
  33. if h != nil { // inform host that node should be removed
  34. fakeNode := models.Node{}
  35. fakeNode.ID, _ = uuid.Parse(id)
  36. fakeNode.Action = models.NODE_DELETE
  37. fakeNode.PendingDelete = true
  38. if err := NodeUpdate(&fakeNode); err != nil {
  39. logger.Log(0, "failed to inform host", h.Name, h.ID.String(), "to remove node", id, err.Error())
  40. }
  41. }
  42. }
  43. return
  44. }
  45. decrypted, decryptErr := decryptMsg(&node, msg.Payload())
  46. if decryptErr != nil {
  47. logger.Log(0, "error decrypting when updating node ", node.ID.String(), decryptErr.Error())
  48. return
  49. }
  50. var checkin models.NodeCheckin
  51. if err := json.Unmarshal(decrypted, &checkin); err != nil {
  52. logger.Log(1, "error unmarshaling payload ", err.Error())
  53. return
  54. }
  55. host, err := logic.GetHost(node.HostID.String())
  56. if err != nil {
  57. logger.Log(0, "error retrieving host for node ", node.ID.String(), err.Error())
  58. return
  59. }
  60. node.SetLastCheckIn()
  61. host.Version = checkin.Version
  62. node.Connected = checkin.Connected
  63. host.Interfaces = checkin.Ifaces
  64. for i := range host.Interfaces {
  65. host.Interfaces[i].AddressString = host.Interfaces[i].Address.String()
  66. }
  67. if err := logic.UpdateNode(&node, &node); err != nil {
  68. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  69. return
  70. }
  71. logger.Log(3, "ping processed for node", node.ID.String())
  72. // --TODO --set client version once feature is implemented.
  73. //node.SetClientVersion(msg.Payload())
  74. }()
  75. }
  76. // UpdateNode message Handler -- handles updates from client nodes
  77. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  78. go func() {
  79. id, err := getID(msg.Topic())
  80. if err != nil {
  81. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  82. return
  83. }
  84. currentNode, err := logic.GetNodeByID(id)
  85. if err != nil {
  86. logger.Log(1, "error getting node ", id, err.Error())
  87. return
  88. }
  89. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  90. if decryptErr != nil {
  91. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  92. return
  93. }
  94. var newNode models.Node
  95. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  96. logger.Log(1, "error unmarshaling payload ", err.Error())
  97. return
  98. }
  99. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  100. if servercfg.Is_EE && ifaceDelta {
  101. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  102. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  103. }
  104. }
  105. newNode.SetLastCheckIn()
  106. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  107. logger.Log(1, "error saving node", err.Error())
  108. return
  109. }
  110. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  111. if err = PublishPeerUpdate(); err != nil {
  112. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  113. }
  114. }
  115. logger.Log(1, "updated node", id, newNode.ID.String())
  116. }()
  117. }
  118. // UpdateHost message Handler -- handles host updates from clients
  119. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  120. go func(msg mqtt.Message) {
  121. id, err := getID(msg.Topic())
  122. if err != nil {
  123. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  124. return
  125. }
  126. currentHost, err := logic.GetHost(id)
  127. if err != nil {
  128. logger.Log(1, "error getting host ", id, err.Error())
  129. return
  130. }
  131. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  132. if decryptErr != nil {
  133. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  134. return
  135. }
  136. var hostUpdate models.HostUpdate
  137. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  138. logger.Log(1, "error unmarshaling payload ", err.Error())
  139. return
  140. }
  141. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  142. var sendPeerUpdate bool
  143. switch hostUpdate.Action {
  144. case models.Acknowledgement:
  145. hu := hostactions.GetAction(currentHost.ID.String())
  146. if hu != nil {
  147. if err = HostUpdate(hu); err != nil {
  148. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  149. return
  150. } else {
  151. if err = PublishSingleHostPeerUpdate(currentHost, nil); err != nil {
  152. logger.Log(0, "failed peers publish after join acknowledged", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  153. return
  154. }
  155. }
  156. }
  157. case models.UpdateHost:
  158. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  159. err := logic.UpsertHost(currentHost)
  160. if err != nil {
  161. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  162. return
  163. }
  164. case models.DeleteHost:
  165. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  166. // delete EMQX credentials for host
  167. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  168. logger.Log(0, "failed to remove host credentials from EMQX: ", currentHost.ID.String(), err.Error())
  169. return
  170. }
  171. }
  172. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  173. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  174. return
  175. }
  176. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  177. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  178. return
  179. }
  180. sendPeerUpdate = true
  181. }
  182. if sendPeerUpdate {
  183. err := PublishPeerUpdate()
  184. if err != nil {
  185. logger.Log(0, "failed to pulish peer update: ", err.Error())
  186. }
  187. }
  188. // if servercfg.Is_EE && ifaceDelta {
  189. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  190. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  191. // }
  192. // }
  193. }(msg)
  194. }
  195. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  196. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  197. if servercfg.Is_EE {
  198. go func() {
  199. id, err := getID(msg.Topic())
  200. if err != nil {
  201. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  202. return
  203. }
  204. currentNode, err := logic.GetNodeByID(id)
  205. if err != nil {
  206. logger.Log(1, "error getting node ", id, err.Error())
  207. return
  208. }
  209. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  210. if decryptErr != nil {
  211. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  212. return
  213. }
  214. var newMetrics models.Metrics
  215. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  216. logger.Log(1, "error unmarshaling payload ", err.Error())
  217. return
  218. }
  219. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  220. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  221. logger.Log(1, "faield to update node metrics", id, err.Error())
  222. return
  223. }
  224. if servercfg.IsMetricsExporter() {
  225. if err := pushMetricsToExporter(newMetrics); err != nil {
  226. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  227. currentNode.ID, err))
  228. }
  229. }
  230. if newMetrics.Connectivity != nil {
  231. err := logic.EnterpriseFailoverFunc(&currentNode)
  232. if err != nil {
  233. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  234. }
  235. }
  236. if shouldUpdate {
  237. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  238. host, err := logic.GetHost(currentNode.HostID.String())
  239. if err == nil {
  240. if err = PublishSingleHostPeerUpdate(host, nil); err != nil {
  241. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  242. }
  243. }
  244. }
  245. logger.Log(1, "updated node metrics", id)
  246. }()
  247. }
  248. }
  249. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  250. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  251. go func() {
  252. id, err := getID(msg.Topic())
  253. if err != nil {
  254. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  255. return
  256. }
  257. currentNode, err := logic.GetNodeByID(id)
  258. if err != nil {
  259. logger.Log(1, "error getting node ", id, err.Error())
  260. return
  261. }
  262. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  263. if decryptErr != nil {
  264. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  265. return
  266. }
  267. switch decrypted[0] {
  268. case ncutils.ACK:
  269. //do we still need this
  270. case ncutils.DONE:
  271. updateNodePeers(&currentNode)
  272. }
  273. logger.Log(1, "sent peer updates after signal received from", id)
  274. }()
  275. }
  276. func updateNodePeers(currentNode *models.Node) {
  277. if err := PublishPeerUpdate(); err != nil {
  278. logger.Log(1, "error publishing peer update ", err.Error())
  279. return
  280. }
  281. }
  282. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  283. if newMetrics.FailoverPeers == nil {
  284. newMetrics.FailoverPeers = make(map[string]string)
  285. }
  286. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  287. if err != nil {
  288. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  289. return false
  290. }
  291. if oldMetrics.FailoverPeers == nil {
  292. oldMetrics.FailoverPeers = make(map[string]string)
  293. }
  294. var attachedClients []models.ExtClient
  295. if currentNode.IsIngressGateway {
  296. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  297. if err == nil {
  298. attachedClients = clients
  299. }
  300. }
  301. if len(attachedClients) > 0 {
  302. // associate ext clients with IDs
  303. for i := range attachedClients {
  304. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  305. if len(extMetric.NodeName) == 0 &&
  306. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  307. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  308. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  309. extMetric.Connected = true
  310. }
  311. }
  312. extMetric.NodeName = attachedClients[i].ClientID
  313. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  314. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  315. }
  316. }
  317. // run through metrics for each peer
  318. for k := range newMetrics.Connectivity {
  319. currMetric := newMetrics.Connectivity[k]
  320. oldMetric := oldMetrics.Connectivity[k]
  321. currMetric.TotalTime += oldMetric.TotalTime
  322. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  323. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  324. currMetric.PercentUp = 0
  325. } else {
  326. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  327. }
  328. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  329. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  330. delete(oldMetrics.Connectivity, k) // remove from old data
  331. newMetrics.Connectivity[k] = currMetric
  332. }
  333. // add nodes that need failover
  334. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  335. if err != nil {
  336. logger.Log(0, "failed to retrieve nodes while updating metrics")
  337. return false
  338. }
  339. for _, node := range nodes {
  340. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  341. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  342. node.Connected &&
  343. len(node.FailoverNode) > 0 &&
  344. !node.Failover {
  345. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  346. }
  347. }
  348. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  349. for k, v := range oldMetrics.FailoverPeers {
  350. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  351. shouldUpdate = true
  352. }
  353. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  354. newMetrics.FailoverPeers[k] = v
  355. }
  356. }
  357. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  358. delete(newMetrics.Connectivity, k)
  359. }
  360. return shouldUpdate
  361. }