handlers.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "math"
  6. "time"
  7. mqtt "github.com/eclipse/paho.mqtt.golang"
  8. "github.com/google/uuid"
  9. "github.com/gravitl/netmaker/database"
  10. "github.com/gravitl/netmaker/logic"
  11. "github.com/gravitl/netmaker/logic/hostactions"
  12. "github.com/gravitl/netmaker/models"
  13. "github.com/gravitl/netmaker/netclient/ncutils"
  14. "github.com/gravitl/netmaker/servercfg"
  15. "golang.org/x/exp/slog"
  16. "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
  17. )
  18. // DefaultHandler default message queue handler -- NOT USED
  19. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  20. slog.Info("mqtt default handler", "topic", msg.Topic(), "message", msg.Payload())
  21. }
  22. // UpdateNode message Handler -- handles updates from client nodes
  23. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  24. id, err := getID(msg.Topic())
  25. if err != nil {
  26. slog.Error("error getting node.ID ", "topic", msg.Topic(), "error", err)
  27. return
  28. }
  29. currentNode, err := logic.GetNodeByID(id)
  30. if err != nil {
  31. slog.Error("error getting node", "id", id, "error", err)
  32. return
  33. }
  34. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  35. if decryptErr != nil {
  36. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  37. return
  38. }
  39. var newNode models.Node
  40. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  41. slog.Error("error unmarshaling payload", "error", err)
  42. return
  43. }
  44. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  45. if servercfg.Is_EE && ifaceDelta {
  46. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  47. slog.Warn("failed to reset failover list during node update", "nodeid", currentNode.ID, "network", currentNode.Network)
  48. }
  49. }
  50. newNode.SetLastCheckIn()
  51. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  52. slog.Error("error saving node", "id", id, "error", err)
  53. return
  54. }
  55. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  56. if err = PublishPeerUpdate(); err != nil {
  57. slog.Warn("error updating peers when node informed the server of an interface change", "nodeid", currentNode.ID, "error", err)
  58. }
  59. }
  60. slog.Info("updated node", "id", id, "newnodeid", newNode.ID)
  61. }
  62. // UpdateHost message Handler -- handles host updates from clients
  63. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  64. id, err := getID(msg.Topic())
  65. if err != nil {
  66. slog.Error("error getting host.ID sent on ", "topic", msg.Topic(), "error", err)
  67. return
  68. }
  69. currentHost, err := logic.GetHost(id)
  70. if err != nil {
  71. slog.Error("error getting host", "id", id, "error", err)
  72. return
  73. }
  74. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  75. if decryptErr != nil {
  76. slog.Error("failed to decrypt message for host", "id", id, "error", decryptErr)
  77. return
  78. }
  79. var hostUpdate models.HostUpdate
  80. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  81. slog.Error("error unmarshaling payload", "error", err)
  82. return
  83. }
  84. slog.Info("recieved host update", "name", hostUpdate.Host.Name, "id", hostUpdate.Host.ID)
  85. var sendPeerUpdate bool
  86. switch hostUpdate.Action {
  87. case models.CheckIn:
  88. sendPeerUpdate = handleHostCheckin(&hostUpdate.Host, currentHost)
  89. case models.Acknowledgement:
  90. hu := hostactions.GetAction(currentHost.ID.String())
  91. if hu != nil {
  92. if err = HostUpdate(hu); err != nil {
  93. slog.Error("failed to send new node to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  94. return
  95. } else {
  96. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  97. if err = AppendNodeUpdateACL(hu.Host.ID.String(), hu.Node.Network, hu.Node.ID.String(), servercfg.GetServer()); err != nil {
  98. slog.Error("failed to add ACLs for EMQX node", "error", err)
  99. return
  100. }
  101. }
  102. nodes, err := logic.GetAllNodes()
  103. if err != nil {
  104. return
  105. }
  106. if err = PublishSingleHostPeerUpdate(currentHost, nodes, nil, nil); err != nil {
  107. slog.Error("failed peers publish after join acknowledged", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  108. return
  109. }
  110. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  111. slog.Error("failed to send dns update after node added to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  112. return
  113. }
  114. }
  115. }
  116. case models.UpdateHost:
  117. if hostUpdate.Host.PublicKey != currentHost.PublicKey {
  118. //remove old peer entry
  119. peerUpdate := models.HostPeerUpdate{
  120. ServerVersion: servercfg.GetVersion(),
  121. Peers: []wgtypes.PeerConfig{
  122. {
  123. PublicKey: currentHost.PublicKey,
  124. Remove: true,
  125. },
  126. },
  127. }
  128. data, err := json.Marshal(&peerUpdate)
  129. if err != nil {
  130. slog.Error("failed to marshal peer update", "error", err)
  131. }
  132. hosts := logic.GetRelatedHosts(hostUpdate.Host.ID.String())
  133. server := servercfg.GetServer()
  134. for _, host := range hosts {
  135. publish(&host, fmt.Sprintf("peers/host/%s/%s", host.ID.String(), server), data)
  136. }
  137. }
  138. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  139. err := logic.UpsertHost(currentHost)
  140. if err != nil {
  141. slog.Error("failed to update host", "id", currentHost.ID, "error", err)
  142. return
  143. }
  144. case models.DeleteHost:
  145. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  146. // delete EMQX credentials for host
  147. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  148. slog.Error("failed to remove host credentials from EMQX", "id", currentHost.ID, "error", err)
  149. return
  150. }
  151. }
  152. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  153. slog.Error("failed to delete all nodes of host", "id", currentHost.ID, "error", err)
  154. return
  155. }
  156. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  157. slog.Error("failed to delete host", "id", currentHost.ID, "error", err)
  158. return
  159. }
  160. sendPeerUpdate = true
  161. case models.RegisterWithTurn:
  162. if servercfg.IsUsingTurn() {
  163. err = logic.RegisterHostWithTurn(hostUpdate.Host.ID.String(), hostUpdate.Host.HostPass)
  164. if err != nil {
  165. slog.Error("failed to register host with turn server", "id", currentHost.ID, "error", err)
  166. return
  167. }
  168. }
  169. }
  170. if sendPeerUpdate {
  171. err := PublishPeerUpdate()
  172. if err != nil {
  173. slog.Error("failed to publish peer update", "error", err)
  174. }
  175. }
  176. // if servercfg.Is_EE && ifaceDelta {
  177. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  178. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  179. // }
  180. // }
  181. }
  182. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  183. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  184. if servercfg.Is_EE {
  185. id, err := getID(msg.Topic())
  186. if err != nil {
  187. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  188. return
  189. }
  190. currentNode, err := logic.GetNodeByID(id)
  191. if err != nil {
  192. slog.Error("error getting node", "id", id, "error", err)
  193. return
  194. }
  195. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  196. if decryptErr != nil {
  197. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  198. return
  199. }
  200. var newMetrics models.Metrics
  201. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  202. slog.Error("error unmarshaling payload", "error", err)
  203. return
  204. }
  205. shouldUpdate := updateNodeMetrics(&currentNode, &newMetrics)
  206. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  207. slog.Error("failed to update node metrics", "id", id, "error", err)
  208. return
  209. }
  210. if servercfg.IsMetricsExporter() {
  211. if err := pushMetricsToExporter(newMetrics); err != nil {
  212. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  213. }
  214. }
  215. if newMetrics.Connectivity != nil {
  216. err := logic.EnterpriseFailoverFunc(&currentNode)
  217. if err != nil {
  218. slog.Error("failed to failover for node", "id", currentNode.ID, "network", currentNode.Network, "error", err)
  219. }
  220. }
  221. if shouldUpdate {
  222. slog.Info("updating peers after node detected connectivity issues", "id", currentNode.ID, "network", currentNode.Network)
  223. host, err := logic.GetHost(currentNode.HostID.String())
  224. if err == nil {
  225. nodes, err := logic.GetAllNodes()
  226. if err != nil {
  227. return
  228. }
  229. if err = PublishSingleHostPeerUpdate(host, nodes, nil, nil); err != nil {
  230. slog.Warn("failed to publish update after failover peer change for node", "id", currentNode.ID, "network", currentNode.Network, "error", err)
  231. }
  232. }
  233. }
  234. slog.Debug("updated node metrics", "id", id)
  235. }
  236. }
  237. // ClientPeerUpdate message handler -- handles updating peers after signal from client nodes
  238. func ClientPeerUpdate(client mqtt.Client, msg mqtt.Message) {
  239. id, err := getID(msg.Topic())
  240. if err != nil {
  241. slog.Error("error getting node.ID sent on ", "topic", msg.Topic(), "error", err)
  242. return
  243. }
  244. currentNode, err := logic.GetNodeByID(id)
  245. if err != nil {
  246. slog.Error("error getting node", "id", id, "error", err)
  247. return
  248. }
  249. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  250. if decryptErr != nil {
  251. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  252. return
  253. }
  254. switch decrypted[0] {
  255. case ncutils.ACK:
  256. // do we still need this
  257. case ncutils.DONE:
  258. if err = PublishPeerUpdate(); err != nil {
  259. slog.Error("error publishing peer update for node", "id", currentNode.ID, "error", err)
  260. return
  261. }
  262. }
  263. slog.Info("sent peer updates after signal received from", "id", id)
  264. }
  265. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  266. if newMetrics.FailoverPeers == nil {
  267. newMetrics.FailoverPeers = make(map[string]string)
  268. }
  269. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  270. if err != nil {
  271. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  272. return false
  273. }
  274. if oldMetrics.FailoverPeers == nil {
  275. oldMetrics.FailoverPeers = make(map[string]string)
  276. }
  277. var attachedClients []models.ExtClient
  278. if currentNode.IsIngressGateway {
  279. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  280. if err == nil {
  281. attachedClients = clients
  282. }
  283. }
  284. if len(attachedClients) > 0 {
  285. // associate ext clients with IDs
  286. for i := range attachedClients {
  287. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  288. if len(extMetric.NodeName) == 0 &&
  289. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  290. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  291. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  292. extMetric.Connected = true
  293. }
  294. }
  295. extMetric.NodeName = attachedClients[i].ClientID
  296. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  297. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  298. }
  299. }
  300. // run through metrics for each peer
  301. for k := range newMetrics.Connectivity {
  302. currMetric := newMetrics.Connectivity[k]
  303. oldMetric := oldMetrics.Connectivity[k]
  304. currMetric.TotalTime += oldMetric.TotalTime
  305. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  306. if currMetric.TotalReceived < oldMetric.TotalReceived {
  307. currMetric.TotalReceived += oldMetric.TotalReceived
  308. } else {
  309. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  310. }
  311. if currMetric.TotalSent < oldMetric.TotalSent {
  312. currMetric.TotalSent += oldMetric.TotalSent
  313. } else {
  314. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  315. }
  316. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  317. currMetric.PercentUp = 0
  318. } else {
  319. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  320. }
  321. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  322. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  323. delete(oldMetrics.Connectivity, k) // remove from old data
  324. newMetrics.Connectivity[k] = currMetric
  325. }
  326. // add nodes that need failover
  327. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  328. if err != nil {
  329. slog.Error("failed to retrieve nodes while updating metrics", "error", err)
  330. return false
  331. }
  332. for _, node := range nodes {
  333. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  334. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  335. node.Connected &&
  336. len(node.FailoverNode) > 0 &&
  337. !node.Failover {
  338. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  339. }
  340. }
  341. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  342. for k, v := range oldMetrics.FailoverPeers {
  343. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  344. shouldUpdate = true
  345. }
  346. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  347. newMetrics.FailoverPeers[k] = v
  348. }
  349. }
  350. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  351. delete(newMetrics.Connectivity, k)
  352. }
  353. return shouldUpdate
  354. }
  355. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  356. dns := models.DNSUpdate{
  357. Action: models.DNSInsert,
  358. Name: host.Name + "." + node.Network,
  359. }
  360. if node.Address.IP != nil {
  361. dns.Address = node.Address.IP.String()
  362. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  363. return err
  364. }
  365. } else if node.Address6.IP != nil {
  366. dns.Address = node.Address6.IP.String()
  367. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  368. return err
  369. }
  370. }
  371. if err := PublishAllDNS(node); err != nil {
  372. return err
  373. }
  374. return nil
  375. }
  376. func handleHostCheckin(h, currentHost *models.Host) bool {
  377. if h == nil {
  378. return false
  379. }
  380. for i := range currentHost.Nodes {
  381. currNodeID := currentHost.Nodes[i]
  382. node, err := logic.GetNodeByID(currNodeID)
  383. if err != nil {
  384. if database.IsEmptyRecord(err) {
  385. fakeNode := models.Node{}
  386. fakeNode.ID, _ = uuid.Parse(currNodeID)
  387. fakeNode.Action = models.NODE_DELETE
  388. fakeNode.PendingDelete = true
  389. if err := NodeUpdate(&fakeNode); err != nil {
  390. slog.Warn("failed to inform host to remove node", "host", currentHost.Name, "hostid", currentHost.ID, "nodeid", currNodeID, "error", err)
  391. }
  392. }
  393. continue
  394. }
  395. if err := logic.UpdateNodeCheckin(&node); err != nil {
  396. slog.Warn("failed to update node on checkin", "nodeid", node.ID, "error", err)
  397. }
  398. }
  399. for i := range h.Interfaces {
  400. h.Interfaces[i].AddressString = h.Interfaces[i].Address.String()
  401. }
  402. /// version or firewall in use change does not require a peerUpdate
  403. if h.Version != currentHost.Version || h.FirewallInUse != currentHost.FirewallInUse {
  404. currentHost.FirewallInUse = h.FirewallInUse
  405. currentHost.Version = h.Version
  406. if err := logic.UpsertHost(currentHost); err != nil {
  407. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  408. return false
  409. }
  410. }
  411. ifaceDelta := len(h.Interfaces) != len(currentHost.Interfaces) ||
  412. !h.EndpointIP.Equal(currentHost.EndpointIP) ||
  413. (len(h.NatType) > 0 && h.NatType != currentHost.NatType) ||
  414. h.DefaultInterface != currentHost.DefaultInterface ||
  415. (h.ListenPort != 0 && h.ListenPort != currentHost.ListenPort) || (h.WgPublicListenPort != 0 && h.WgPublicListenPort != currentHost.WgPublicListenPort)
  416. if ifaceDelta { // only save if something changes
  417. currentHost.EndpointIP = h.EndpointIP
  418. currentHost.Interfaces = h.Interfaces
  419. currentHost.DefaultInterface = h.DefaultInterface
  420. currentHost.NatType = h.NatType
  421. if h.ListenPort != 0 {
  422. currentHost.ListenPort = h.ListenPort
  423. }
  424. if h.WgPublicListenPort != 0 {
  425. currentHost.WgPublicListenPort = h.WgPublicListenPort
  426. }
  427. if err := logic.UpsertHost(currentHost); err != nil {
  428. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  429. return false
  430. }
  431. slog.Info("updated host after check-in", "name", currentHost.Name, "id", currentHost.ID)
  432. }
  433. slog.Info("check-in processed for host", "name", h.Name, "id", h.ID)
  434. return ifaceDelta
  435. }