handlers.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. package mq
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "time"
  8. mqtt "github.com/eclipse/paho.mqtt.golang"
  9. "github.com/google/uuid"
  10. "github.com/gravitl/netmaker/database"
  11. "github.com/gravitl/netmaker/logger"
  12. "github.com/gravitl/netmaker/logic"
  13. "github.com/gravitl/netmaker/logic/hostactions"
  14. "github.com/gravitl/netmaker/models"
  15. "github.com/gravitl/netmaker/netclient/ncutils"
  16. "github.com/gravitl/netmaker/servercfg"
  17. "golang.org/x/exp/slog"
  18. "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
  19. )
  20. // DefaultHandler default message queue handler -- NOT USED
  21. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  22. slog.Info("mqtt default handler", "topic", msg.Topic(), "message", msg.Payload())
  23. }
  24. // UpdateNode message Handler -- handles updates from client nodes
  25. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  26. id, err := getID(msg.Topic())
  27. if err != nil {
  28. slog.Error("error getting node.ID ", "topic", msg.Topic(), "error", err)
  29. return
  30. }
  31. currentNode, err := logic.GetNodeByID(id)
  32. if err != nil {
  33. slog.Error("error getting node", "id", id, "error", err)
  34. return
  35. }
  36. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  37. if decryptErr != nil {
  38. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  39. return
  40. }
  41. var newNode models.Node
  42. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  43. slog.Error("error unmarshaling payload", "error", err)
  44. return
  45. }
  46. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  47. if servercfg.Is_EE && ifaceDelta {
  48. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  49. slog.Warn("failed to reset failover list during node update", "nodeid", currentNode.ID, "network", currentNode.Network)
  50. }
  51. }
  52. newNode.SetLastCheckIn()
  53. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  54. slog.Error("error saving node", "id", id, "error", err)
  55. return
  56. }
  57. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  58. h, err := logic.GetHost(newNode.HostID.String())
  59. if err != nil {
  60. return
  61. }
  62. peers, err := logic.GetNetworkClients(newNode.Network)
  63. if err != nil {
  64. slog.Warn("error getting network clients: ", "error", err)
  65. }
  66. for _, client := range peers {
  67. PubPeerUpdateToHost(&client.Host)
  68. }
  69. if nodes, err := logic.GetNetworkNodes(newNode.Network); err == nil {
  70. FlushNetworkPeersToHost(h, &newNode, nodes)
  71. }
  72. }
  73. slog.Info("updated node", "id", id, "newnodeid", newNode.ID)
  74. }
  75. // UpdateHost message Handler -- handles host updates from clients
  76. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  77. id, err := getID(msg.Topic())
  78. if err != nil {
  79. slog.Error("error getting host.ID sent on ", "topic", msg.Topic(), "error", err)
  80. return
  81. }
  82. currentHost, err := logic.GetHost(id)
  83. if err != nil {
  84. slog.Error("error getting host", "id", id, "error", err)
  85. return
  86. }
  87. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  88. if decryptErr != nil {
  89. slog.Error("failed to decrypt message for host", "id", id, "error", decryptErr)
  90. return
  91. }
  92. var hostUpdate models.HostUpdate
  93. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  94. slog.Error("error unmarshaling payload", "error", err)
  95. return
  96. }
  97. slog.Info("recieved host update", "name", hostUpdate.Host.Name, "id", hostUpdate.Host.ID)
  98. var sendPeerUpdate bool
  99. switch hostUpdate.Action {
  100. case models.CheckIn:
  101. sendPeerUpdate = handleHostCheckin(&hostUpdate.Host, currentHost)
  102. case models.Acknowledgement:
  103. hu := hostactions.GetAction(currentHost.ID.String())
  104. if hu != nil {
  105. if err = HostUpdate(hu); err != nil {
  106. slog.Error("failed to send new node to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  107. return
  108. } else {
  109. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  110. if err = AppendNodeUpdateACL(hu.Host.ID.String(), hu.Node.Network, hu.Node.ID.String(), servercfg.GetServer()); err != nil {
  111. slog.Error("failed to add ACLs for EMQX node", "error", err)
  112. return
  113. }
  114. }
  115. // flush peers to host
  116. nodes, err := logic.GetNetworkNodes(hu.Node.Network)
  117. if err != nil {
  118. return
  119. }
  120. err = FlushNetworkPeersToHost(&hu.Host, &hu.Node, nodes)
  121. if err != nil {
  122. logger.Log(0, "failed to flush peers to host: ", err.Error())
  123. }
  124. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  125. slog.Error("failed to send dns update after node added to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  126. return
  127. }
  128. }
  129. }
  130. case models.UpdateHost:
  131. if hostUpdate.Host.PublicKey != currentHost.PublicKey {
  132. //remove old peer entry
  133. peerUpdate := models.HostPeerUpdate{
  134. ServerVersion: servercfg.GetVersion(),
  135. Peers: []wgtypes.PeerConfig{
  136. {
  137. PublicKey: currentHost.PublicKey,
  138. Remove: true,
  139. },
  140. },
  141. }
  142. data, err := json.Marshal(&peerUpdate)
  143. if err != nil {
  144. slog.Error("failed to marshal peer update", "error", err)
  145. }
  146. hosts := logic.GetRelatedHosts(hostUpdate.Host.ID.String())
  147. server := servercfg.GetServer()
  148. for _, host := range hosts {
  149. publish(&host, fmt.Sprintf("peers/host/%s/%s", host.ID.String(), server), data)
  150. }
  151. }
  152. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  153. err := logic.UpsertHost(currentHost)
  154. if err != nil {
  155. slog.Error("failed to update host", "id", currentHost.ID, "error", err)
  156. return
  157. }
  158. case models.DeleteHost:
  159. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  160. // delete EMQX credentials for host
  161. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  162. slog.Error("failed to remove host credentials from EMQX", "id", currentHost.ID, "error", err)
  163. return
  164. }
  165. }
  166. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  167. slog.Error("failed to delete all nodes of host", "id", currentHost.ID, "error", err)
  168. return
  169. }
  170. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  171. slog.Error("failed to delete host", "id", currentHost.ID, "error", err)
  172. return
  173. }
  174. sendPeerUpdate = true
  175. case models.RegisterWithTurn:
  176. if servercfg.IsUsingTurn() {
  177. err = logic.RegisterHostWithTurn(hostUpdate.Host.ID.String(), hostUpdate.Host.HostPass)
  178. if err != nil {
  179. slog.Error("failed to register host with turn server", "id", currentHost.ID, "error", err)
  180. return
  181. }
  182. }
  183. }
  184. if sendPeerUpdate {
  185. err := PublishPeerUpdate()
  186. if err != nil {
  187. slog.Error("failed to publish peer update", "error", err)
  188. }
  189. }
  190. // if servercfg.Is_EE && ifaceDelta {
  191. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  192. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  193. // }
  194. // }
  195. }
  196. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  197. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  198. if servercfg.Is_EE {
  199. id, err := getID(msg.Topic())
  200. if err != nil {
  201. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  202. return
  203. }
  204. currentNode, err := logic.GetNodeByID(id)
  205. if err != nil {
  206. slog.Error("error getting node", "id", id, "error", err)
  207. return
  208. }
  209. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  210. if decryptErr != nil {
  211. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  212. return
  213. }
  214. var newMetrics models.Metrics
  215. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  216. slog.Error("error unmarshaling payload", "error", err)
  217. return
  218. }
  219. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  220. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  221. slog.Error("failed to update node metrics", "id", id, "error", err)
  222. return
  223. }
  224. if servercfg.IsMetricsExporter() {
  225. if err := pushMetricsToExporter(newMetrics); err != nil {
  226. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  227. }
  228. }
  229. if newMetrics.Connectivity != nil {
  230. err := logic.EnterpriseFailoverFunc(&currentNode)
  231. if err != nil {
  232. slog.Error("failed to failover for node", "id", currentNode.ID, "network", currentNode.Network, "error", err)
  233. }
  234. }
  235. if shouldUpdate {
  236. slog.Info("updating peers after node detected connectivity issues", "id", currentNode.ID, "network", currentNode.Network)
  237. host, err := logic.GetHost(currentNode.HostID.String())
  238. if err == nil {
  239. if err = PublishSingleHostPeerUpdate(context.Background(), host, nil, nil); err != nil {
  240. slog.Warn("failed to publish update after failover peer change for node", "id", currentNode.ID, "network", currentNode.Network, "error", err)
  241. }
  242. }
  243. }
  244. slog.Info("updated node metrics", "id", id)
  245. }
  246. }
  247. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  248. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  249. id, err := getID(msg.Topic())
  250. if err != nil {
  251. slog.Error("error getting node.ID sent on ", "topic", msg.Topic(), "error", err)
  252. return
  253. }
  254. currentNode, err := logic.GetNodeByID(id)
  255. if err != nil {
  256. slog.Error("error getting node", "id", id, "error", err)
  257. return
  258. }
  259. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  260. if decryptErr != nil {
  261. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  262. return
  263. }
  264. switch decrypted[0] {
  265. case ncutils.ACK:
  266. // do we still need this
  267. case ncutils.DONE:
  268. if err = PublishPeerUpdate(); err != nil {
  269. slog.Error("error publishing peer update for node", "id", currentNode.ID, "error", err)
  270. return
  271. }
  272. }
  273. slog.Info("sent peer updates after signal received from", "id", id)
  274. }
  275. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  276. if newMetrics.FailoverPeers == nil {
  277. newMetrics.FailoverPeers = make(map[string]string)
  278. }
  279. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  280. if err != nil {
  281. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  282. return false
  283. }
  284. if oldMetrics.FailoverPeers == nil {
  285. oldMetrics.FailoverPeers = make(map[string]string)
  286. }
  287. var attachedClients []models.ExtClient
  288. if currentNode.IsIngressGateway {
  289. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  290. if err == nil {
  291. attachedClients = clients
  292. }
  293. }
  294. if len(attachedClients) > 0 {
  295. // associate ext clients with IDs
  296. for i := range attachedClients {
  297. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  298. if len(extMetric.NodeName) == 0 &&
  299. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  300. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  301. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  302. extMetric.Connected = true
  303. }
  304. }
  305. extMetric.NodeName = attachedClients[i].ClientID
  306. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  307. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  308. }
  309. }
  310. // run through metrics for each peer
  311. for k := range newMetrics.Connectivity {
  312. currMetric := newMetrics.Connectivity[k]
  313. oldMetric := oldMetrics.Connectivity[k]
  314. currMetric.TotalTime += oldMetric.TotalTime
  315. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  316. if currMetric.CollectedByProxy {
  317. currMetric.TotalReceived += oldMetric.TotalReceived
  318. currMetric.TotalSent += oldMetric.TotalSent
  319. } else {
  320. if currMetric.TotalReceived < oldMetric.TotalReceived {
  321. currMetric.TotalReceived += oldMetric.TotalReceived
  322. } else {
  323. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  324. }
  325. if currMetric.TotalSent < oldMetric.TotalSent {
  326. currMetric.TotalSent += oldMetric.TotalSent
  327. } else {
  328. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  329. }
  330. }
  331. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  332. currMetric.PercentUp = 0
  333. } else {
  334. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  335. }
  336. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  337. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  338. delete(oldMetrics.Connectivity, k) // remove from old data
  339. newMetrics.Connectivity[k] = currMetric
  340. }
  341. // add nodes that need failover
  342. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  343. if err != nil {
  344. slog.Error("failed to retrieve nodes while updating metrics", "error", err)
  345. return false
  346. }
  347. for _, node := range nodes {
  348. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  349. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  350. node.Connected &&
  351. len(node.FailoverNode) > 0 &&
  352. !node.Failover {
  353. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  354. }
  355. }
  356. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  357. for k, v := range oldMetrics.FailoverPeers {
  358. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  359. shouldUpdate = true
  360. }
  361. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  362. newMetrics.FailoverPeers[k] = v
  363. }
  364. }
  365. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  366. delete(newMetrics.Connectivity, k)
  367. }
  368. return shouldUpdate
  369. }
  370. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  371. dns := models.DNSUpdate{
  372. Action: models.DNSInsert,
  373. Name: host.Name + "." + node.Network,
  374. }
  375. if node.Address.IP != nil {
  376. dns.Address = node.Address.IP.String()
  377. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  378. return err
  379. }
  380. } else if node.Address6.IP != nil {
  381. dns.Address = node.Address6.IP.String()
  382. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  383. return err
  384. }
  385. }
  386. if err := PublishAllDNS(node); err != nil {
  387. return err
  388. }
  389. return nil
  390. }
  391. func handleHostCheckin(h, currentHost *models.Host) bool {
  392. if h == nil {
  393. return false
  394. }
  395. for i := range currentHost.Nodes {
  396. currNodeID := currentHost.Nodes[i]
  397. node, err := logic.GetNodeByID(currNodeID)
  398. if err != nil {
  399. if database.IsEmptyRecord(err) {
  400. fakeNode := models.Node{}
  401. fakeNode.ID, _ = uuid.Parse(currNodeID)
  402. fakeNode.Action = models.NODE_DELETE
  403. fakeNode.PendingDelete = true
  404. if err := NodeUpdate(&fakeNode); err != nil {
  405. slog.Warn("failed to inform host to remove node", "host", currentHost.Name, "hostid", currentHost.ID, "nodeid", currNodeID, "error", err)
  406. }
  407. }
  408. continue
  409. }
  410. if err := logic.UpdateNodeCheckin(&node); err != nil {
  411. slog.Warn("failed to update node on checkin", "nodeid", node.ID, "error", err)
  412. }
  413. }
  414. for i := range h.Interfaces {
  415. h.Interfaces[i].AddressString = h.Interfaces[i].Address.String()
  416. }
  417. /// version or firewall in use change does not require a peerUpdate
  418. if h.Version != currentHost.Version || h.FirewallInUse != currentHost.FirewallInUse {
  419. currentHost.FirewallInUse = h.FirewallInUse
  420. currentHost.Version = h.Version
  421. if err := logic.UpsertHost(currentHost); err != nil {
  422. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  423. return false
  424. }
  425. }
  426. ifaceDelta := len(h.Interfaces) != len(currentHost.Interfaces) ||
  427. !h.EndpointIP.Equal(currentHost.EndpointIP) ||
  428. (len(h.NatType) > 0 && h.NatType != currentHost.NatType) ||
  429. h.DefaultInterface != currentHost.DefaultInterface ||
  430. h.EndpointDetection != servercfg.EndpointDetectionEnabled()
  431. if ifaceDelta { // only save if something changes
  432. currentHost.EndpointIP = h.EndpointIP
  433. currentHost.Interfaces = h.Interfaces
  434. currentHost.DefaultInterface = h.DefaultInterface
  435. currentHost.NatType = h.NatType
  436. if err := logic.UpsertHost(currentHost); err != nil {
  437. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  438. return false
  439. }
  440. slog.Info("updated host after check-in", "name", currentHost.Name, "id", currentHost.ID)
  441. }
  442. slog.Info("check-in processed for host", "name", h.Name, "id", h.ID)
  443. return ifaceDelta
  444. }