handlers.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. package mq
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "time"
  8. mqtt "github.com/eclipse/paho.mqtt.golang"
  9. "github.com/google/uuid"
  10. "github.com/gravitl/netmaker/database"
  11. "github.com/gravitl/netmaker/logger"
  12. "github.com/gravitl/netmaker/logic"
  13. "github.com/gravitl/netmaker/logic/hostactions"
  14. "github.com/gravitl/netmaker/models"
  15. "github.com/gravitl/netmaker/netclient/ncutils"
  16. "github.com/gravitl/netmaker/servercfg"
  17. "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
  18. )
  19. // DefaultHandler default message queue handler -- NOT USED
  20. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  21. logger.Log(0, "MQTT Message: Topic: ", string(msg.Topic()), " Message: ", string(msg.Payload()))
  22. }
  23. // UpdateNode message Handler -- handles updates from client nodes
  24. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  25. id, err := getID(msg.Topic())
  26. if err != nil {
  27. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  28. return
  29. }
  30. currentNode, err := logic.GetNodeByID(id)
  31. if err != nil {
  32. logger.Log(1, "error getting node ", id, err.Error())
  33. return
  34. }
  35. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  36. if decryptErr != nil {
  37. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  38. return
  39. }
  40. var newNode models.Node
  41. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  42. logger.Log(1, "error unmarshaling payload ", err.Error())
  43. return
  44. }
  45. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  46. if servercfg.Is_EE && ifaceDelta {
  47. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  48. logger.Log(1, "failed to reset failover list during node update", currentNode.ID.String(), currentNode.Network)
  49. }
  50. }
  51. newNode.SetLastCheckIn()
  52. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  53. logger.Log(1, "error saving node", err.Error())
  54. return
  55. }
  56. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  57. h, err := logic.GetHost(newNode.HostID.String())
  58. if err != nil {
  59. return
  60. }
  61. if err = BroadcastAddOrUpdatePeer(h, &newNode, true); err != nil {
  62. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  63. }
  64. if nodes, err := logic.GetNetworkNodes(newNode.Network); err == nil {
  65. FlushNetworkPeersToHost(h, &newNode, nodes)
  66. }
  67. }
  68. logger.Log(1, "updated node", id, newNode.ID.String())
  69. }
  70. // UpdateHost message Handler -- handles host updates from clients
  71. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  72. id, err := getID(msg.Topic())
  73. if err != nil {
  74. logger.Log(1, "error getting host.ID sent on ", msg.Topic(), err.Error())
  75. return
  76. }
  77. currentHost, err := logic.GetHost(id)
  78. if err != nil {
  79. logger.Log(1, "error getting host ", id, err.Error())
  80. return
  81. }
  82. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  83. if decryptErr != nil {
  84. logger.Log(1, "failed to decrypt message for host ", id, decryptErr.Error())
  85. return
  86. }
  87. var hostUpdate models.HostUpdate
  88. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  89. logger.Log(1, "error unmarshaling payload ", err.Error())
  90. return
  91. }
  92. logger.Log(3, fmt.Sprintf("recieved host update: %s\n", hostUpdate.Host.ID.String()))
  93. var sendPeerUpdate bool
  94. switch hostUpdate.Action {
  95. case models.CheckIn:
  96. sendPeerUpdate = handleHostCheckin(&hostUpdate.Host, currentHost)
  97. case models.Acknowledgement:
  98. hu := hostactions.GetAction(currentHost.ID.String())
  99. if hu != nil {
  100. if err = HostUpdate(hu); err != nil {
  101. logger.Log(0, "failed to send new node to host", hostUpdate.Host.Name, currentHost.ID.String(), err.Error())
  102. return
  103. } else {
  104. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  105. if err = AppendNodeUpdateACL(hu.Host.ID.String(), hu.Node.Network, hu.Node.ID.String(), servercfg.GetServer()); err != nil {
  106. logger.Log(0, "failed to add ACLs for EMQX node", err.Error())
  107. return
  108. }
  109. }
  110. // flush peers to host
  111. nodes, err := logic.GetNetworkNodes(hu.Node.Network)
  112. if err != nil {
  113. return
  114. }
  115. err = FlushNetworkPeersToHost(&hu.Host, &hu.Node, nodes)
  116. if err != nil {
  117. logger.Log(0, "failed to flush peers to host: ", err.Error())
  118. }
  119. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  120. logger.Log(0, "failed to send dns update after node,", hu.Node.ID.String(), ", added to host", hu.Host.Name, err.Error())
  121. return
  122. }
  123. }
  124. }
  125. case models.UpdateHost:
  126. if hostUpdate.Host.PublicKey != currentHost.PublicKey {
  127. //remove old peer entry
  128. peerUpdate := models.HostPeerUpdate{
  129. ServerVersion: servercfg.GetVersion(),
  130. Peers: []wgtypes.PeerConfig{
  131. {
  132. PublicKey: currentHost.PublicKey,
  133. Remove: true,
  134. },
  135. },
  136. }
  137. data, err := json.Marshal(&peerUpdate)
  138. if err != nil {
  139. logger.Log(2, "json error", err.Error())
  140. }
  141. hosts := logic.GetRelatedHosts(hostUpdate.Host.ID.String())
  142. server := servercfg.GetServer()
  143. for _, host := range hosts {
  144. publish(&host, fmt.Sprintf("peers/host/%s/%s", host.ID.String(), server), data)
  145. }
  146. }
  147. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  148. err := logic.UpsertHost(currentHost)
  149. if err != nil {
  150. logger.Log(0, "failed to update host: ", currentHost.ID.String(), err.Error())
  151. return
  152. }
  153. case models.DeleteHost:
  154. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  155. // delete EMQX credentials for host
  156. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  157. logger.Log(0, "failed to remove host credentials from EMQX: ", currentHost.ID.String(), err.Error())
  158. return
  159. }
  160. }
  161. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  162. logger.Log(0, "failed to delete all nodes of host: ", currentHost.ID.String(), err.Error())
  163. return
  164. }
  165. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  166. logger.Log(0, "failed to delete host: ", currentHost.ID.String(), err.Error())
  167. return
  168. }
  169. sendPeerUpdate = true
  170. case models.RegisterWithTurn:
  171. if servercfg.IsUsingTurn() {
  172. err = logic.RegisterHostWithTurn(hostUpdate.Host.ID.String(), hostUpdate.Host.HostPass)
  173. if err != nil {
  174. logger.Log(0, "failed to register host with turn server: ", err.Error())
  175. return
  176. }
  177. }
  178. }
  179. if sendPeerUpdate {
  180. err := PublishPeerUpdate()
  181. if err != nil {
  182. logger.Log(0, "failed to pulish peer update: ", err.Error())
  183. }
  184. }
  185. // if servercfg.Is_EE && ifaceDelta {
  186. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  187. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  188. // }
  189. // }
  190. }
  191. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  192. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  193. if servercfg.Is_EE {
  194. id, err := getID(msg.Topic())
  195. if err != nil {
  196. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  197. return
  198. }
  199. currentNode, err := logic.GetNodeByID(id)
  200. if err != nil {
  201. logger.Log(1, "error getting node ", id, err.Error())
  202. return
  203. }
  204. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  205. if decryptErr != nil {
  206. logger.Log(1, "failed to decrypt message for node ", id, decryptErr.Error())
  207. return
  208. }
  209. var newMetrics models.Metrics
  210. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  211. logger.Log(1, "error unmarshaling payload ", err.Error())
  212. return
  213. }
  214. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  215. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  216. logger.Log(1, "faield to update node metrics", id, err.Error())
  217. return
  218. }
  219. if servercfg.IsMetricsExporter() {
  220. if err := pushMetricsToExporter(newMetrics); err != nil {
  221. logger.Log(2, fmt.Sprintf("failed to push node: [%s] metrics to exporter, err: %v",
  222. currentNode.ID, err))
  223. }
  224. }
  225. if newMetrics.Connectivity != nil {
  226. err := logic.EnterpriseFailoverFunc(&currentNode)
  227. if err != nil {
  228. logger.Log(0, "failed to failover for node", currentNode.ID.String(), "on network", currentNode.Network, "-", err.Error())
  229. }
  230. }
  231. if shouldUpdate {
  232. logger.Log(2, "updating peers after node", currentNode.ID.String(), currentNode.Network, "detected connectivity issues")
  233. host, err := logic.GetHost(currentNode.HostID.String())
  234. if err == nil {
  235. if err = PublishSingleHostPeerUpdate(context.Background(), host, nil, nil); err != nil {
  236. logger.Log(0, "failed to publish update after failover peer change for node", currentNode.ID.String(), currentNode.Network)
  237. }
  238. }
  239. }
  240. logger.Log(1, "updated node metrics", id)
  241. }
  242. }
  243. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  244. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  245. id, err := getID(msg.Topic())
  246. if err != nil {
  247. logger.Log(1, "error getting node.ID sent on ", msg.Topic(), err.Error())
  248. return
  249. }
  250. currentNode, err := logic.GetNodeByID(id)
  251. if err != nil {
  252. logger.Log(1, "error getting node ", id, err.Error())
  253. return
  254. }
  255. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  256. if decryptErr != nil {
  257. logger.Log(1, "failed to decrypt message during client peer update for node ", id, decryptErr.Error())
  258. return
  259. }
  260. switch decrypted[0] {
  261. case ncutils.ACK:
  262. // do we still need this
  263. case ncutils.DONE:
  264. if err = PublishPeerUpdate(); err != nil {
  265. logger.Log(1, "error publishing peer update for node", currentNode.ID.String(), err.Error())
  266. return
  267. }
  268. }
  269. logger.Log(1, "sent peer updates after signal received from", id)
  270. }
  271. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  272. if newMetrics.FailoverPeers == nil {
  273. newMetrics.FailoverPeers = make(map[string]string)
  274. }
  275. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  276. if err != nil {
  277. logger.Log(1, "error finding old metrics for node", currentNode.ID.String())
  278. return false
  279. }
  280. if oldMetrics.FailoverPeers == nil {
  281. oldMetrics.FailoverPeers = make(map[string]string)
  282. }
  283. var attachedClients []models.ExtClient
  284. if currentNode.IsIngressGateway {
  285. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  286. if err == nil {
  287. attachedClients = clients
  288. }
  289. }
  290. if len(attachedClients) > 0 {
  291. // associate ext clients with IDs
  292. for i := range attachedClients {
  293. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  294. if len(extMetric.NodeName) == 0 &&
  295. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  296. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  297. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  298. extMetric.Connected = true
  299. }
  300. }
  301. extMetric.NodeName = attachedClients[i].ClientID
  302. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  303. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  304. }
  305. }
  306. // run through metrics for each peer
  307. for k := range newMetrics.Connectivity {
  308. currMetric := newMetrics.Connectivity[k]
  309. oldMetric := oldMetrics.Connectivity[k]
  310. currMetric.TotalTime += oldMetric.TotalTime
  311. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  312. if currMetric.CollectedByProxy {
  313. currMetric.TotalReceived += oldMetric.TotalReceived
  314. currMetric.TotalSent += oldMetric.TotalSent
  315. } else {
  316. if currMetric.TotalReceived < oldMetric.TotalReceived {
  317. currMetric.TotalReceived += oldMetric.TotalReceived
  318. } else {
  319. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  320. }
  321. if currMetric.TotalSent < oldMetric.TotalSent {
  322. currMetric.TotalSent += oldMetric.TotalSent
  323. } else {
  324. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  325. }
  326. }
  327. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  328. currMetric.PercentUp = 0
  329. } else {
  330. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  331. }
  332. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  333. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  334. delete(oldMetrics.Connectivity, k) // remove from old data
  335. newMetrics.Connectivity[k] = currMetric
  336. }
  337. // add nodes that need failover
  338. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  339. if err != nil {
  340. logger.Log(0, "failed to retrieve nodes while updating metrics")
  341. return false
  342. }
  343. for _, node := range nodes {
  344. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  345. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  346. node.Connected &&
  347. len(node.FailoverNode) > 0 &&
  348. !node.Failover {
  349. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  350. }
  351. }
  352. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  353. for k, v := range oldMetrics.FailoverPeers {
  354. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  355. shouldUpdate = true
  356. }
  357. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  358. newMetrics.FailoverPeers[k] = v
  359. }
  360. }
  361. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  362. delete(newMetrics.Connectivity, k)
  363. }
  364. return shouldUpdate
  365. }
  366. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  367. dns := models.DNSUpdate{
  368. Action: models.DNSInsert,
  369. Name: host.Name + "." + node.Network,
  370. }
  371. if node.Address.IP != nil {
  372. dns.Address = node.Address.IP.String()
  373. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  374. return err
  375. }
  376. } else if node.Address6.IP != nil {
  377. dns.Address = node.Address6.IP.String()
  378. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  379. return err
  380. }
  381. }
  382. if err := PublishAllDNS(node); err != nil {
  383. return err
  384. }
  385. return nil
  386. }
  387. func handleHostCheckin(h, currentHost *models.Host) bool {
  388. if h == nil {
  389. return false
  390. }
  391. for i := range currentHost.Nodes {
  392. currNodeID := currentHost.Nodes[i]
  393. node, err := logic.GetNodeByID(currNodeID)
  394. if err != nil {
  395. if database.IsEmptyRecord(err) {
  396. fakeNode := models.Node{}
  397. fakeNode.ID, _ = uuid.Parse(currNodeID)
  398. fakeNode.Action = models.NODE_DELETE
  399. fakeNode.PendingDelete = true
  400. if err := NodeUpdate(&fakeNode); err != nil {
  401. logger.Log(0, "failed to inform host", currentHost.Name, currentHost.ID.String(), "to remove node", currNodeID, err.Error())
  402. }
  403. }
  404. continue
  405. }
  406. if err := logic.UpdateNodeCheckin(&node); err != nil {
  407. logger.Log(0, "error updating node", node.ID.String(), " on checkin", err.Error())
  408. }
  409. }
  410. for i := range h.Interfaces {
  411. h.Interfaces[i].AddressString = h.Interfaces[i].Address.String()
  412. }
  413. ifaceDelta := len(h.Interfaces) != len(currentHost.Interfaces) ||
  414. !h.EndpointIP.Equal(currentHost.EndpointIP) ||
  415. (len(h.NatType) > 0 && h.NatType != currentHost.NatType) ||
  416. h.DefaultInterface != currentHost.DefaultInterface
  417. if ifaceDelta { // only save if something changes
  418. currentHost.EndpointIP = h.EndpointIP
  419. currentHost.Interfaces = h.Interfaces
  420. currentHost.DefaultInterface = h.DefaultInterface
  421. currentHost.NatType = h.NatType
  422. if err := logic.UpsertHost(currentHost); err != nil {
  423. logger.Log(0, "failed to update host after check-in", h.Name, h.ID.String(), err.Error())
  424. return false
  425. }
  426. logger.Log(1, "updated host after check-in", currentHost.Name, currentHost.ID.String())
  427. }
  428. logger.Log(2, "check-in processed for host", h.Name, h.ID.String())
  429. return ifaceDelta
  430. }