metrics.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. package logic
  2. import (
  3. "encoding/json"
  4. "net/http"
  5. "sync"
  6. "time"
  7. mqtt "github.com/eclipse/paho.mqtt.golang"
  8. "github.com/gravitl/netmaker/database"
  9. "github.com/gravitl/netmaker/logic"
  10. "github.com/gravitl/netmaker/models"
  11. "github.com/gravitl/netmaker/mq"
  12. "github.com/gravitl/netmaker/netclient/ncutils"
  13. "github.com/gravitl/netmaker/servercfg"
  14. "golang.org/x/exp/slog"
  15. )
  16. var (
  17. metricsCacheMutex = &sync.RWMutex{}
  18. metricsCacheMap = make(map[string]models.Metrics)
  19. )
  20. func getMetricsFromCache(key string) (metrics models.Metrics, ok bool) {
  21. metricsCacheMutex.RLock()
  22. metrics, ok = metricsCacheMap[key]
  23. metricsCacheMutex.RUnlock()
  24. return
  25. }
  26. func storeMetricsInCache(key string, metrics models.Metrics) {
  27. metricsCacheMutex.Lock()
  28. metricsCacheMap[key] = metrics
  29. metricsCacheMutex.Unlock()
  30. }
  31. func deleteNetworkFromCache(key string) {
  32. metricsCacheMutex.Lock()
  33. delete(metricsCacheMap, key)
  34. metricsCacheMutex.Unlock()
  35. }
  36. func LoadNodeMetricsToCache() error {
  37. slog.Info("loading metrics to cache")
  38. if metricsCacheMap == nil {
  39. metricsCacheMap = map[string]models.Metrics{}
  40. }
  41. collection, err := database.FetchRecords(database.METRICS_TABLE_NAME)
  42. if err != nil {
  43. return err
  44. }
  45. for key, value := range collection {
  46. var metrics models.Metrics
  47. if err := json.Unmarshal([]byte(value), &metrics); err != nil {
  48. slog.Error("parse metric record error", "error", err.Error())
  49. continue
  50. }
  51. if servercfg.CacheEnabled() {
  52. storeMetricsInCache(key, metrics)
  53. }
  54. }
  55. slog.Info("metrics loading done")
  56. return nil
  57. }
  58. // GetMetrics - gets the metrics
  59. func GetMetrics(nodeid string) (*models.Metrics, error) {
  60. var metrics models.Metrics
  61. if servercfg.CacheEnabled() {
  62. if metrics, ok := getMetricsFromCache(nodeid); ok {
  63. return &metrics, nil
  64. }
  65. }
  66. record, err := database.FetchRecord(database.METRICS_TABLE_NAME, nodeid)
  67. if err != nil {
  68. if database.IsEmptyRecord(err) {
  69. return &metrics, nil
  70. }
  71. return &metrics, err
  72. }
  73. err = json.Unmarshal([]byte(record), &metrics)
  74. if err != nil {
  75. return &metrics, err
  76. }
  77. if servercfg.CacheEnabled() {
  78. storeMetricsInCache(nodeid, metrics)
  79. }
  80. return &metrics, nil
  81. }
  82. // UpdateMetrics - updates the metrics of a given client
  83. func UpdateMetrics(nodeid string, metrics *models.Metrics) error {
  84. data, err := json.Marshal(metrics)
  85. if err != nil {
  86. return err
  87. }
  88. err = database.Insert(nodeid, string(data), database.METRICS_TABLE_NAME)
  89. if err != nil {
  90. return err
  91. }
  92. if servercfg.CacheEnabled() {
  93. storeMetricsInCache(nodeid, *metrics)
  94. }
  95. return nil
  96. }
  97. // DeleteMetrics - deletes metrics of a given node
  98. func DeleteMetrics(nodeid string) error {
  99. err := database.DeleteRecord(database.METRICS_TABLE_NAME, nodeid)
  100. if err != nil {
  101. return err
  102. }
  103. if servercfg.CacheEnabled() {
  104. deleteNetworkFromCache(nodeid)
  105. }
  106. return nil
  107. }
  108. // MQUpdateMetricsFallBack - called when mq fallback thread is triggered on client
  109. func MQUpdateMetricsFallBack(nodeid string, newMetrics models.Metrics) {
  110. currentNode, err := logic.GetNodeByID(nodeid)
  111. if err != nil {
  112. slog.Error("error getting node", "id", nodeid, "error", err)
  113. return
  114. }
  115. updateNodeMetrics(&currentNode, &newMetrics)
  116. if err = logic.UpdateMetrics(nodeid, &newMetrics); err != nil {
  117. slog.Error("failed to update node metrics", "id", nodeid, "error", err)
  118. return
  119. }
  120. if servercfg.IsMetricsExporter() {
  121. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  122. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  123. }
  124. }
  125. slog.Debug("updated node metrics", "id", nodeid)
  126. }
  127. func MQUpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  128. id, err := mq.GetID(msg.Topic())
  129. if err != nil {
  130. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  131. return
  132. }
  133. currentNode, err := logic.GetNodeByID(id)
  134. if err != nil {
  135. slog.Error("error getting node", "id", id, "error", err)
  136. return
  137. }
  138. decrypted, decryptErr := mq.DecryptMsg(&currentNode, msg.Payload())
  139. if decryptErr != nil {
  140. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  141. return
  142. }
  143. var newMetrics models.Metrics
  144. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  145. slog.Error("error unmarshaling payload", "error", err)
  146. return
  147. }
  148. updateNodeMetrics(&currentNode, &newMetrics)
  149. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  150. slog.Error("failed to update node metrics", "id", id, "error", err)
  151. return
  152. }
  153. if servercfg.IsMetricsExporter() {
  154. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  155. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  156. }
  157. }
  158. slog.Debug("updated node metrics", "id", id)
  159. }
  160. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
  161. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  162. if err != nil {
  163. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  164. return
  165. }
  166. var attachedClients []models.ExtClient
  167. if currentNode.IsIngressGateway {
  168. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  169. if err == nil {
  170. attachedClients = clients
  171. }
  172. }
  173. if newMetrics.Connectivity == nil {
  174. newMetrics.Connectivity = make(map[string]models.Metric)
  175. }
  176. for i := range attachedClients {
  177. slog.Debug("[metrics] processing attached client", "client", attachedClients[i].ClientID, "public key", attachedClients[i].PublicKey)
  178. clientMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  179. clientMetric.NodeName = attachedClients[i].ClientID
  180. newMetrics.Connectivity[attachedClients[i].ClientID] = clientMetric
  181. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  182. slog.Debug("[metrics] attached client metric", "metric", clientMetric)
  183. }
  184. // run through metrics for each peer
  185. for k := range newMetrics.Connectivity {
  186. currMetric := newMetrics.Connectivity[k]
  187. oldMetric := oldMetrics.Connectivity[k]
  188. currMetric.TotalTime += oldMetric.TotalTime
  189. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  190. totalRecv := currMetric.TotalReceived
  191. totalSent := currMetric.TotalSent
  192. if currMetric.TotalReceived < oldMetric.TotalReceived && currMetric.TotalReceived < oldMetric.LastTotalReceived {
  193. currMetric.TotalReceived += oldMetric.TotalReceived
  194. } else {
  195. currMetric.TotalReceived = currMetric.TotalReceived - oldMetric.LastTotalReceived + oldMetric.TotalReceived
  196. }
  197. if currMetric.TotalSent < oldMetric.TotalSent && currMetric.TotalSent < oldMetric.LastTotalSent {
  198. currMetric.TotalSent += oldMetric.TotalSent
  199. } else {
  200. currMetric.TotalSent = currMetric.TotalSent - oldMetric.LastTotalSent + oldMetric.TotalSent
  201. }
  202. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  203. currMetric.PercentUp = 0
  204. } else {
  205. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  206. }
  207. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  208. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  209. delete(oldMetrics.Connectivity, k) // remove from old data
  210. currMetric.LastTotalReceived = totalRecv
  211. currMetric.LastTotalSent = totalSent
  212. newMetrics.Connectivity[k] = currMetric
  213. }
  214. slog.Debug("[metrics] node metrics data", "node ID", currentNode.ID, "metrics", newMetrics)
  215. }
  216. func GetHostLocInfo(ip, token string) string {
  217. url := "https://ipinfo.io/"
  218. if ip != "" {
  219. url += ip
  220. }
  221. url += "/json"
  222. if token != "" {
  223. url += "?token=" + token
  224. }
  225. client := http.Client{Timeout: 3 * time.Second}
  226. resp, err := client.Get(url)
  227. if err != nil {
  228. return ""
  229. }
  230. defer resp.Body.Close()
  231. var data struct {
  232. Loc string `json:"loc"` // Format: "lat,lon"
  233. }
  234. if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
  235. return ""
  236. }
  237. return data.Loc
  238. }