mqpublish.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. package functions
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net"
  9. "net/http"
  10. "os"
  11. "strconv"
  12. "sync"
  13. "time"
  14. "github.com/cloverstd/tcping/ping"
  15. "github.com/gravitl/netmaker/logger"
  16. "github.com/gravitl/netmaker/logic/metrics"
  17. "github.com/gravitl/netmaker/models"
  18. "github.com/gravitl/netmaker/netclient/auth"
  19. "github.com/gravitl/netmaker/netclient/config"
  20. "github.com/gravitl/netmaker/netclient/ncutils"
  21. "github.com/gravitl/netmaker/tls"
  22. )
  23. var metricsCache = new(sync.Map)
  24. // Checkin -- go routine that checks for public or local ip changes, publishes changes
  25. //
  26. // if there are no updates, simply "pings" the server as a checkin
  27. func Checkin(ctx context.Context, wg *sync.WaitGroup) {
  28. logger.Log(2, "starting checkin goroutine")
  29. defer wg.Done()
  30. currentRun := 0
  31. checkin(currentRun)
  32. ticker := time.NewTicker(time.Second * 60)
  33. defer ticker.Stop()
  34. for {
  35. select {
  36. case <-ctx.Done():
  37. logger.Log(0, "checkin routine closed")
  38. return
  39. //delay should be configuraable -> use cfg.Node.NetworkSettings.DefaultCheckInInterval ??
  40. case <-ticker.C:
  41. currentRun++
  42. checkin(currentRun)
  43. if currentRun >= 5 {
  44. currentRun = 0
  45. }
  46. }
  47. }
  48. }
  49. func checkin(currentRun int) {
  50. networks, _ := ncutils.GetSystemNetworks()
  51. logger.Log(3, "checkin with server(s) for all networks")
  52. for _, network := range networks {
  53. var nodeCfg config.ClientConfig
  54. nodeCfg.Network = network
  55. nodeCfg.ReadConfig()
  56. // check for nftables present if on Linux
  57. if ncutils.IsLinux() {
  58. if ncutils.IsNFTablesPresent() {
  59. nodeCfg.Node.FirewallInUse = models.FIREWALL_NFTABLES
  60. } else {
  61. nodeCfg.Node.FirewallInUse = models.FIREWALL_IPTABLES
  62. }
  63. } else {
  64. // defaults to iptables for now, may need another default for non-Linux OSes
  65. nodeCfg.Node.FirewallInUse = models.FIREWALL_IPTABLES
  66. }
  67. if nodeCfg.Node.Connected == "yes" {
  68. if nodeCfg.Node.IsStatic != "yes" {
  69. extIP, err := ncutils.GetPublicIP(nodeCfg.Server.API)
  70. if err != nil {
  71. logger.Log(1, "error encountered checking public ip addresses: ", err.Error())
  72. }
  73. if nodeCfg.Node.Endpoint != extIP && extIP != "" {
  74. logger.Log(1, "network:", nodeCfg.Node.Network, "endpoint has changed from ", nodeCfg.Node.Endpoint, " to ", extIP)
  75. nodeCfg.Node.Endpoint = extIP
  76. if err := PublishNodeUpdate(&nodeCfg); err != nil {
  77. logger.Log(0, "network:", nodeCfg.Node.Network, "could not publish endpoint change")
  78. }
  79. }
  80. intIP, err := getPrivateAddr()
  81. if err != nil {
  82. logger.Log(1, "network:", nodeCfg.Node.Network, "error encountered checking private ip addresses: ", err.Error())
  83. }
  84. if nodeCfg.Node.LocalAddress != intIP && intIP != "" {
  85. logger.Log(1, "network:", nodeCfg.Node.Network, "local Address has changed from ", nodeCfg.Node.LocalAddress, " to ", intIP)
  86. nodeCfg.Node.LocalAddress = intIP
  87. if err := PublishNodeUpdate(&nodeCfg); err != nil {
  88. logger.Log(0, "Network: ", nodeCfg.Node.Network, " could not publish local address change")
  89. }
  90. }
  91. _ = UpdateLocalListenPort(&nodeCfg)
  92. } else if nodeCfg.Node.IsLocal == "yes" && nodeCfg.Node.LocalRange != "" {
  93. localIP, err := ncutils.GetLocalIP(nodeCfg.Node.LocalRange)
  94. if err != nil {
  95. logger.Log(1, "network:", nodeCfg.Node.Network, "error encountered checking local ip addresses: ", err.Error())
  96. }
  97. if nodeCfg.Node.Endpoint != localIP && localIP != "" {
  98. logger.Log(1, "network:", nodeCfg.Node.Network, "endpoint has changed from "+nodeCfg.Node.Endpoint+" to ", localIP)
  99. nodeCfg.Node.Endpoint = localIP
  100. if err := PublishNodeUpdate(&nodeCfg); err != nil {
  101. logger.Log(0, "network:", nodeCfg.Node.Network, "could not publish localip change")
  102. }
  103. }
  104. }
  105. }
  106. //check version
  107. if nodeCfg.Node.Version != ncutils.Version {
  108. nodeCfg.Node.Version = ncutils.Version
  109. config.Write(&nodeCfg, nodeCfg.Network)
  110. }
  111. Hello(&nodeCfg)
  112. checkCertExpiry(&nodeCfg)
  113. if currentRun >= 5 {
  114. logger.Log(0, "collecting metrics for node", nodeCfg.Node.Name)
  115. publishMetrics(&nodeCfg)
  116. }
  117. }
  118. }
  119. // PublishNodeUpdates -- saves node and pushes changes to broker
  120. func PublishNodeUpdate(nodeCfg *config.ClientConfig) error {
  121. if err := config.Write(nodeCfg, nodeCfg.Network); err != nil {
  122. return err
  123. }
  124. data, err := json.Marshal(nodeCfg.Node)
  125. if err != nil {
  126. return err
  127. }
  128. if err = publish(nodeCfg, fmt.Sprintf("update/%s", nodeCfg.Node.ID), data, 1); err != nil {
  129. return err
  130. }
  131. logger.Log(0, "network:", nodeCfg.Node.Network, "sent a node update to server for node", nodeCfg.Node.Name, ", ", nodeCfg.Node.ID)
  132. return nil
  133. }
  134. // Hello -- ping the broker to let server know node it's alive and well
  135. func Hello(nodeCfg *config.ClientConfig) {
  136. var checkin models.NodeCheckin
  137. checkin.Version = ncutils.Version
  138. checkin.Connected = nodeCfg.Node.Connected
  139. data, err := json.Marshal(checkin)
  140. if err != nil {
  141. logger.Log(0, "unable to marshal checkin data", err.Error())
  142. return
  143. }
  144. if err := publish(nodeCfg, fmt.Sprintf("ping/%s", nodeCfg.Node.ID), data, 0); err != nil {
  145. logger.Log(0, fmt.Sprintf("Network: %s error publishing ping, %v", nodeCfg.Node.Network, err))
  146. logger.Log(0, "running pull on "+nodeCfg.Node.Network+" to reconnect")
  147. _, err := Pull(nodeCfg.Node.Network, true)
  148. if err != nil {
  149. logger.Log(0, "could not run pull on "+nodeCfg.Node.Network+", error: "+err.Error())
  150. }
  151. } else {
  152. logger.Log(3, "checkin for", nodeCfg.Network, "complete")
  153. }
  154. }
  155. // publishMetrics - publishes the metrics of a given nodecfg
  156. func publishMetrics(nodeCfg *config.ClientConfig) {
  157. token, err := Authenticate(nodeCfg)
  158. if err != nil {
  159. logger.Log(1, "failed to authenticate when publishing metrics", err.Error())
  160. return
  161. }
  162. url := fmt.Sprintf("https://%s/api/nodes/%s/%s", nodeCfg.Server.API, nodeCfg.Network, nodeCfg.Node.ID)
  163. response, err := API("", http.MethodGet, url, token)
  164. if err != nil {
  165. logger.Log(1, "failed to read from server during metrics publish", err.Error())
  166. return
  167. }
  168. if response.StatusCode != http.StatusOK {
  169. bytes, err := io.ReadAll(response.Body)
  170. if err != nil {
  171. fmt.Println(err)
  172. }
  173. logger.Log(0, fmt.Sprintf("%s %s", string(bytes), err.Error()))
  174. return
  175. }
  176. defer response.Body.Close()
  177. var nodeGET models.NodeGet
  178. if err := json.NewDecoder(response.Body).Decode(&nodeGET); err != nil {
  179. logger.Log(0, "failed to decode node when running metrics update", err.Error())
  180. return
  181. }
  182. metrics, err := metrics.Collect(nodeCfg.Node.Interface, nodeGET.PeerIDs)
  183. if err != nil {
  184. logger.Log(0, "failed metric collection for node", nodeCfg.Node.Name, err.Error())
  185. }
  186. metrics.Network = nodeCfg.Node.Network
  187. metrics.NodeName = nodeCfg.Node.Name
  188. metrics.NodeID = nodeCfg.Node.ID
  189. metrics.IsServer = "no"
  190. data, err := json.Marshal(metrics)
  191. if err != nil {
  192. logger.Log(0, "something went wrong when marshalling metrics data for node", nodeCfg.Node.Name, err.Error())
  193. }
  194. if err = publish(nodeCfg, fmt.Sprintf("metrics/%s", nodeCfg.Node.ID), data, 1); err != nil {
  195. logger.Log(0, "error occurred during publishing of metrics on node", nodeCfg.Node.Name, err.Error())
  196. logger.Log(0, "aggregating metrics locally until broker connection re-established")
  197. val, ok := metricsCache.Load(nodeCfg.Node.ID)
  198. if !ok {
  199. metricsCache.Store(nodeCfg.Node.ID, data)
  200. } else {
  201. var oldMetrics models.Metrics
  202. err = json.Unmarshal(val.([]byte), &oldMetrics)
  203. if err == nil {
  204. for k := range oldMetrics.Connectivity {
  205. currentMetric := metrics.Connectivity[k]
  206. if currentMetric.Latency == 0 {
  207. currentMetric.Latency = oldMetrics.Connectivity[k].Latency
  208. }
  209. currentMetric.Uptime += oldMetrics.Connectivity[k].Uptime
  210. currentMetric.TotalTime += oldMetrics.Connectivity[k].TotalTime
  211. metrics.Connectivity[k] = currentMetric
  212. }
  213. newData, err := json.Marshal(metrics)
  214. if err == nil {
  215. metricsCache.Store(nodeCfg.Node.ID, newData)
  216. }
  217. }
  218. }
  219. } else {
  220. metricsCache.Delete(nodeCfg.Node.ID)
  221. logger.Log(0, "published metrics for node", nodeCfg.Node.Name)
  222. }
  223. }
  224. // node cfg is required in order to fetch the traffic keys of that node for encryption
  225. func publish(nodeCfg *config.ClientConfig, dest string, msg []byte, qos byte) error {
  226. // setup the keys
  227. trafficPrivKey, err := auth.RetrieveTrafficKey(nodeCfg.Node.Network)
  228. if err != nil {
  229. return err
  230. }
  231. serverPubKey, err := ncutils.ConvertBytesToKey(nodeCfg.Node.TrafficKeys.Server)
  232. if err != nil {
  233. return err
  234. }
  235. encrypted, err := ncutils.Chunk(msg, serverPubKey, trafficPrivKey)
  236. if err != nil {
  237. return err
  238. }
  239. if mqclient == nil {
  240. return errors.New("unable to publish ... no mqclient")
  241. }
  242. if token := mqclient.Publish(dest, qos, false, encrypted); !token.WaitTimeout(30*time.Second) || token.Error() != nil {
  243. logger.Log(0, "could not connect to broker at "+nodeCfg.Server.Server+":"+nodeCfg.Server.MQPort)
  244. var err error
  245. if token.Error() == nil {
  246. err = errors.New("connection timeout")
  247. } else {
  248. err = token.Error()
  249. }
  250. if err != nil {
  251. return err
  252. }
  253. }
  254. return nil
  255. }
  256. func checkCertExpiry(cfg *config.ClientConfig) error {
  257. cert, err := tls.ReadCertFromFile(ncutils.GetNetclientServerPath(cfg.Server.Server) + ncutils.GetSeparator() + "client.pem")
  258. //if cert doesn't exist or will expire within 10 days
  259. if errors.Is(err, os.ErrNotExist) || cert.NotAfter.Before(time.Now().Add(time.Hour*24*10)) {
  260. key, err := tls.ReadKeyFromFile(ncutils.GetNetclientPath() + ncutils.GetSeparator() + "client.key")
  261. if err != nil {
  262. return err
  263. }
  264. return RegisterWithServer(key, cfg)
  265. }
  266. if err != nil {
  267. return err
  268. }
  269. return nil
  270. }
  271. func checkBroker(broker string, port string) error {
  272. if broker == "" {
  273. return errors.New("error: broker address is blank")
  274. }
  275. if port == "" {
  276. return errors.New("error: broker port is blank")
  277. }
  278. _, err := net.LookupIP(broker)
  279. if err != nil {
  280. return errors.New("nslookup failed for broker ... check dns records")
  281. }
  282. pinger := ping.NewTCPing()
  283. intPort, err := strconv.Atoi(port)
  284. if err != nil {
  285. logger.Log(1, "error converting port to int: "+err.Error())
  286. }
  287. pinger.SetTarget(&ping.Target{
  288. Protocol: ping.TCP,
  289. Host: broker,
  290. Port: intPort,
  291. Counter: 3,
  292. Interval: 1 * time.Second,
  293. Timeout: 2 * time.Second,
  294. })
  295. pingerDone := pinger.Start()
  296. <-pingerDone
  297. if pinger.Result().SuccessCounter == 0 {
  298. return errors.New("unable to connect to broker port ... check netmaker server and firewalls")
  299. }
  300. return nil
  301. }