handlers.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. package mq
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "math"
  6. "time"
  7. mqtt "github.com/eclipse/paho.mqtt.golang"
  8. "github.com/google/uuid"
  9. "github.com/gravitl/netmaker/database"
  10. "github.com/gravitl/netmaker/logger"
  11. "github.com/gravitl/netmaker/logic"
  12. "github.com/gravitl/netmaker/logic/hostactions"
  13. "github.com/gravitl/netmaker/models"
  14. "github.com/gravitl/netmaker/netclient/ncutils"
  15. "github.com/gravitl/netmaker/servercfg"
  16. "golang.org/x/exp/slog"
  17. "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
  18. )
  19. // DefaultHandler default message queue handler -- NOT USED
  20. func DefaultHandler(client mqtt.Client, msg mqtt.Message) {
  21. slog.Info("mqtt default handler", "topic", msg.Topic(), "message", msg.Payload())
  22. }
  23. // UpdateNode message Handler -- handles updates from client nodes
  24. func UpdateNode(client mqtt.Client, msg mqtt.Message) {
  25. id, err := getID(msg.Topic())
  26. if err != nil {
  27. slog.Error("error getting node.ID ", "topic", msg.Topic(), "error", err)
  28. return
  29. }
  30. currentNode, err := logic.GetNodeByID(id)
  31. if err != nil {
  32. slog.Error("error getting node", "id", id, "error", err)
  33. return
  34. }
  35. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  36. if decryptErr != nil {
  37. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  38. return
  39. }
  40. var newNode models.Node
  41. if err := json.Unmarshal(decrypted, &newNode); err != nil {
  42. slog.Error("error unmarshaling payload", "error", err)
  43. return
  44. }
  45. ifaceDelta := logic.IfaceDelta(&currentNode, &newNode)
  46. if servercfg.Is_EE && ifaceDelta {
  47. if err = logic.EnterpriseResetAllPeersFailovers(currentNode.ID, currentNode.Network); err != nil {
  48. slog.Warn("failed to reset failover list during node update", "nodeid", currentNode.ID, "network", currentNode.Network)
  49. }
  50. }
  51. newNode.SetLastCheckIn()
  52. if err := logic.UpdateNode(&currentNode, &newNode); err != nil {
  53. slog.Error("error saving node", "id", id, "error", err)
  54. return
  55. }
  56. if ifaceDelta { // reduce number of unneeded updates, by only sending on iface changes
  57. h, err := logic.GetHost(newNode.HostID.String())
  58. if err != nil {
  59. return
  60. }
  61. if err = BroadcastAddOrUpdateNetworkPeer(models.Client{Host: *h, Node: newNode}, true); err != nil {
  62. logger.Log(0, "error updating peers when node", currentNode.ID.String(), "informed the server of an interface change", err.Error())
  63. }
  64. if clients, err := logic.GetNetworkClients(newNode.Network); err == nil {
  65. FlushNetworkPeersToHost(models.Client{Host: *h, Node: newNode}, clients)
  66. }
  67. }
  68. slog.Info("updated node", "id", id, "newnodeid", newNode.ID)
  69. }
  70. // UpdateHost message Handler -- handles host updates from clients
  71. func UpdateHost(client mqtt.Client, msg mqtt.Message) {
  72. id, err := getID(msg.Topic())
  73. if err != nil {
  74. slog.Error("error getting host.ID sent on ", "topic", msg.Topic(), "error", err)
  75. return
  76. }
  77. currentHost, err := logic.GetHost(id)
  78. if err != nil {
  79. slog.Error("error getting host", "id", id, "error", err)
  80. return
  81. }
  82. decrypted, decryptErr := decryptMsgWithHost(currentHost, msg.Payload())
  83. if decryptErr != nil {
  84. slog.Error("failed to decrypt message for host", "id", id, "error", decryptErr)
  85. return
  86. }
  87. var hostUpdate models.HostUpdate
  88. if err := json.Unmarshal(decrypted, &hostUpdate); err != nil {
  89. slog.Error("error unmarshaling payload", "error", err)
  90. return
  91. }
  92. slog.Info("recieved host update", "name", hostUpdate.Host.Name, "id", hostUpdate.Host.ID)
  93. var sendPeerUpdate bool
  94. var removeHost bool
  95. switch hostUpdate.Action {
  96. case models.CheckIn:
  97. sendPeerUpdate = handleHostCheckin(&hostUpdate.Host, currentHost)
  98. case models.Acknowledgement:
  99. hu := hostactions.GetAction(currentHost.ID.String())
  100. if hu != nil {
  101. if err = HostUpdate(hu); err != nil {
  102. slog.Error("failed to send new node to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  103. return
  104. } else {
  105. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  106. if err = AppendNodeUpdateACL(hu.Host.ID.String(), hu.Node.Network, hu.Node.ID.String(), servercfg.GetServer()); err != nil {
  107. slog.Error("failed to add ACLs for EMQX node", "error", err)
  108. return
  109. }
  110. }
  111. // flush peers to host
  112. clients, err := logic.GetNetworkClients(hu.Node.Network)
  113. if err != nil {
  114. return
  115. }
  116. err = FlushNetworkPeersToHost(models.Client{Host: hu.Host, Node: hu.Node}, clients)
  117. if err != nil {
  118. logger.Log(0, "failed to flush peers to host: ", err.Error())
  119. }
  120. if err = handleNewNodeDNS(&hu.Host, &hu.Node); err != nil {
  121. slog.Error("failed to send dns update after node added to host", "name", hostUpdate.Host.Name, "id", currentHost.ID, "error", err)
  122. return
  123. }
  124. }
  125. }
  126. case models.UpdateHost:
  127. if hostUpdate.Host.PublicKey != currentHost.PublicKey {
  128. //remove old peer entry
  129. peerUpdate := models.HostPeerUpdate{
  130. ServerVersion: servercfg.GetVersion(),
  131. Peers: []wgtypes.PeerConfig{
  132. {
  133. PublicKey: currentHost.PublicKey,
  134. Remove: true,
  135. },
  136. },
  137. }
  138. data, err := json.Marshal(&peerUpdate)
  139. if err != nil {
  140. slog.Error("failed to marshal peer update", "error", err)
  141. }
  142. hosts := logic.GetRelatedHosts(hostUpdate.Host.ID.String())
  143. server := servercfg.GetServer()
  144. for _, host := range hosts {
  145. publish(&host, fmt.Sprintf("peers/host/%s/%s", host.ID.String(), server), data)
  146. }
  147. }
  148. sendPeerUpdate = logic.UpdateHostFromClient(&hostUpdate.Host, currentHost)
  149. err := logic.UpsertHost(currentHost)
  150. if err != nil {
  151. slog.Error("failed to update host", "id", currentHost.ID, "error", err)
  152. return
  153. }
  154. case models.DeleteHost:
  155. if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
  156. // delete EMQX credentials for host
  157. if err := DeleteEmqxUser(currentHost.ID.String()); err != nil {
  158. slog.Error("failed to remove host credentials from EMQX", "id", currentHost.ID, "error", err)
  159. return
  160. }
  161. }
  162. if err := logic.DisassociateAllNodesFromHost(currentHost.ID.String()); err != nil {
  163. slog.Error("failed to delete all nodes of host", "id", currentHost.ID, "error", err)
  164. return
  165. }
  166. if err := logic.RemoveHostByID(currentHost.ID.String()); err != nil {
  167. slog.Error("failed to delete host", "id", currentHost.ID, "error", err)
  168. return
  169. }
  170. removeHost = true
  171. sendPeerUpdate = true
  172. case models.RegisterWithTurn:
  173. if servercfg.IsUsingTurn() {
  174. err = logic.RegisterHostWithTurn(hostUpdate.Host.ID.String(), hostUpdate.Host.HostPass)
  175. if err != nil {
  176. slog.Error("failed to register host with turn server", "id", currentHost.ID, "error", err)
  177. return
  178. }
  179. }
  180. }
  181. if sendPeerUpdate {
  182. go BroadcastHostUpdate(currentHost, removeHost)
  183. }
  184. // if servercfg.Is_EE && ifaceDelta {
  185. // if err = logic.EnterpriseResetAllPeersFailovers(currentHost.ID.String(), currentHost.Network); err != nil {
  186. // logger.Log(1, "failed to reset failover list during node update", currentHost.ID.String(), currentHost.Network)
  187. // }
  188. // }
  189. }
  190. // UpdateMetrics message Handler -- handles updates from client nodes for metrics
  191. func UpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  192. if servercfg.Is_EE {
  193. id, err := getID(msg.Topic())
  194. if err != nil {
  195. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  196. return
  197. }
  198. currentNode, err := logic.GetNodeByID(id)
  199. if err != nil {
  200. slog.Error("error getting node", "id", id, "error", err)
  201. return
  202. }
  203. decrypted, decryptErr := decryptMsg(&currentNode, msg.Payload())
  204. if decryptErr != nil {
  205. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  206. return
  207. }
  208. var newMetrics models.Metrics
  209. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  210. slog.Error("error unmarshaling payload", "error", err)
  211. return
  212. }
  213. _ = updateNodeMetrics(&currentNode, &newMetrics)
  214. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  215. slog.Error("failed to update node metrics", "id", id, "error", err)
  216. return
  217. }
  218. if servercfg.IsMetricsExporter() {
  219. if err := pushMetricsToExporter(newMetrics); err != nil {
  220. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  221. }
  222. }
  223. if newMetrics.Connectivity != nil {
  224. err := logic.EnterpriseFailoverFunc(&currentNode)
  225. if err != nil {
  226. slog.Error("failed to failover for node", "id", currentNode.ID, "network", currentNode.Network, "error", err)
  227. }
  228. }
  229. slog.Info("updated node metrics", "id", id)
  230. }
  231. }
  232. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool {
  233. if newMetrics.FailoverPeers == nil {
  234. newMetrics.FailoverPeers = make(map[string]string)
  235. }
  236. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  237. if err != nil {
  238. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  239. return false
  240. }
  241. if oldMetrics.FailoverPeers == nil {
  242. oldMetrics.FailoverPeers = make(map[string]string)
  243. }
  244. var attachedClients []models.ExtClient
  245. if currentNode.IsIngressGateway {
  246. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  247. if err == nil {
  248. attachedClients = clients
  249. }
  250. }
  251. if len(attachedClients) > 0 {
  252. // associate ext clients with IDs
  253. for i := range attachedClients {
  254. extMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  255. if len(extMetric.NodeName) == 0 &&
  256. len(newMetrics.Connectivity[attachedClients[i].ClientID].NodeName) > 0 { // cover server clients
  257. extMetric = newMetrics.Connectivity[attachedClients[i].ClientID]
  258. if extMetric.TotalReceived > 0 && extMetric.TotalSent > 0 {
  259. extMetric.Connected = true
  260. }
  261. }
  262. extMetric.NodeName = attachedClients[i].ClientID
  263. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  264. newMetrics.Connectivity[attachedClients[i].ClientID] = extMetric
  265. }
  266. }
  267. // run through metrics for each peer
  268. for k := range newMetrics.Connectivity {
  269. currMetric := newMetrics.Connectivity[k]
  270. oldMetric := oldMetrics.Connectivity[k]
  271. currMetric.TotalTime += oldMetric.TotalTime
  272. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  273. if currMetric.TotalReceived < oldMetric.TotalReceived {
  274. currMetric.TotalReceived += oldMetric.TotalReceived
  275. } else {
  276. currMetric.TotalReceived += int64(math.Abs(float64(currMetric.TotalReceived) - float64(oldMetric.TotalReceived)))
  277. }
  278. if currMetric.TotalSent < oldMetric.TotalSent {
  279. currMetric.TotalSent += oldMetric.TotalSent
  280. } else {
  281. currMetric.TotalSent += int64(math.Abs(float64(currMetric.TotalSent) - float64(oldMetric.TotalSent)))
  282. }
  283. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  284. currMetric.PercentUp = 0
  285. } else {
  286. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  287. }
  288. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  289. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  290. delete(oldMetrics.Connectivity, k) // remove from old data
  291. newMetrics.Connectivity[k] = currMetric
  292. }
  293. // add nodes that need failover
  294. nodes, err := logic.GetNetworkNodes(currentNode.Network)
  295. if err != nil {
  296. slog.Error("failed to retrieve nodes while updating metrics", "error", err)
  297. return false
  298. }
  299. for _, node := range nodes {
  300. if !newMetrics.Connectivity[node.ID.String()].Connected &&
  301. len(newMetrics.Connectivity[node.ID.String()].NodeName) > 0 &&
  302. node.Connected &&
  303. len(node.FailoverNode) > 0 &&
  304. !node.Failover {
  305. newMetrics.FailoverPeers[node.ID.String()] = node.FailoverNode.String()
  306. }
  307. }
  308. shouldUpdate := len(oldMetrics.FailoverPeers) == 0 && len(newMetrics.FailoverPeers) > 0
  309. for k, v := range oldMetrics.FailoverPeers {
  310. if len(newMetrics.FailoverPeers[k]) > 0 && len(v) == 0 {
  311. shouldUpdate = true
  312. }
  313. if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 {
  314. newMetrics.FailoverPeers[k] = v
  315. }
  316. }
  317. for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing
  318. delete(newMetrics.Connectivity, k)
  319. }
  320. return shouldUpdate
  321. }
  322. func handleNewNodeDNS(host *models.Host, node *models.Node) error {
  323. dns := models.DNSUpdate{
  324. Action: models.DNSInsert,
  325. Name: host.Name + "." + node.Network,
  326. }
  327. if node.Address.IP != nil {
  328. dns.Address = node.Address.IP.String()
  329. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  330. return err
  331. }
  332. } else if node.Address6.IP != nil {
  333. dns.Address = node.Address6.IP.String()
  334. if err := PublishDNSUpdate(node.Network, dns); err != nil {
  335. return err
  336. }
  337. }
  338. if err := PublishAllDNS(node); err != nil {
  339. return err
  340. }
  341. return nil
  342. }
  343. func handleHostCheckin(h, currentHost *models.Host) bool {
  344. if h == nil {
  345. return false
  346. }
  347. for i := range currentHost.Nodes {
  348. currNodeID := currentHost.Nodes[i]
  349. node, err := logic.GetNodeByID(currNodeID)
  350. if err != nil {
  351. if database.IsEmptyRecord(err) {
  352. fakeNode := models.Node{}
  353. fakeNode.ID, _ = uuid.Parse(currNodeID)
  354. fakeNode.Action = models.NODE_DELETE
  355. fakeNode.PendingDelete = true
  356. if err := NodeUpdate(&fakeNode); err != nil {
  357. slog.Warn("failed to inform host to remove node", "host", currentHost.Name, "hostid", currentHost.ID, "nodeid", currNodeID, "error", err)
  358. }
  359. }
  360. continue
  361. }
  362. if err := logic.UpdateNodeCheckin(&node); err != nil {
  363. slog.Warn("failed to update node on checkin", "nodeid", node.ID, "error", err)
  364. }
  365. }
  366. for i := range h.Interfaces {
  367. h.Interfaces[i].AddressString = h.Interfaces[i].Address.String()
  368. }
  369. /// version or firewall in use change does not require a peerUpdate
  370. if h.Version != currentHost.Version || h.FirewallInUse != currentHost.FirewallInUse {
  371. currentHost.FirewallInUse = h.FirewallInUse
  372. currentHost.Version = h.Version
  373. if err := logic.UpsertHost(currentHost); err != nil {
  374. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  375. return false
  376. }
  377. }
  378. ifaceDelta := len(h.Interfaces) != len(currentHost.Interfaces) ||
  379. !h.EndpointIP.Equal(currentHost.EndpointIP) ||
  380. (len(h.NatType) > 0 && h.NatType != currentHost.NatType) ||
  381. h.DefaultInterface != currentHost.DefaultInterface
  382. if ifaceDelta { // only save if something changes
  383. currentHost.EndpointIP = h.EndpointIP
  384. currentHost.Interfaces = h.Interfaces
  385. currentHost.DefaultInterface = h.DefaultInterface
  386. currentHost.NatType = h.NatType
  387. if err := logic.UpsertHost(currentHost); err != nil {
  388. slog.Error("failed to update host after check-in", "name", h.Name, "id", h.ID, "error", err)
  389. return false
  390. }
  391. slog.Info("updated host after check-in", "name", currentHost.Name, "id", currentHost.ID)
  392. }
  393. slog.Info("check-in processed for host", "name", h.Name, "id", h.ID)
  394. return ifaceDelta
  395. }