metrics.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. package logic
  2. import (
  3. "encoding/json"
  4. "net/http"
  5. "sync"
  6. "time"
  7. mqtt "github.com/eclipse/paho.mqtt.golang"
  8. "github.com/gravitl/netmaker/database"
  9. "github.com/gravitl/netmaker/logic"
  10. "github.com/gravitl/netmaker/models"
  11. "github.com/gravitl/netmaker/mq"
  12. "github.com/gravitl/netmaker/netclient/ncutils"
  13. "github.com/gravitl/netmaker/servercfg"
  14. "golang.org/x/exp/slog"
  15. )
  16. var (
  17. metricsCacheMutex = &sync.RWMutex{}
  18. metricsCacheMap = make(map[string]models.Metrics)
  19. )
  20. func getMetricsFromCache(key string) (metrics models.Metrics, ok bool) {
  21. metricsCacheMutex.RLock()
  22. metrics, ok = metricsCacheMap[key]
  23. metricsCacheMutex.RUnlock()
  24. return
  25. }
  26. func storeMetricsInCache(key string, metrics models.Metrics) {
  27. metricsCacheMutex.Lock()
  28. metricsCacheMap[key] = metrics
  29. metricsCacheMutex.Unlock()
  30. }
  31. func deleteNetworkFromCache(key string) {
  32. metricsCacheMutex.Lock()
  33. delete(metricsCacheMap, key)
  34. metricsCacheMutex.Unlock()
  35. }
  36. func LoadNodeMetricsToCache() error {
  37. slog.Info("loading metrics to cache")
  38. if metricsCacheMap == nil {
  39. metricsCacheMap = map[string]models.Metrics{}
  40. }
  41. collection, err := database.FetchRecords(database.METRICS_TABLE_NAME)
  42. if err != nil {
  43. return err
  44. }
  45. for key, value := range collection {
  46. var metrics models.Metrics
  47. if err := json.Unmarshal([]byte(value), &metrics); err != nil {
  48. slog.Error("parse metric record error", "error", err.Error())
  49. continue
  50. }
  51. if servercfg.CacheEnabled() {
  52. storeMetricsInCache(key, metrics)
  53. }
  54. }
  55. slog.Info("metrics loading done")
  56. return nil
  57. }
  58. // GetMetrics - gets the metrics
  59. func GetMetrics(nodeid string) (*models.Metrics, error) {
  60. var metrics models.Metrics
  61. if servercfg.CacheEnabled() {
  62. if metrics, ok := getMetricsFromCache(nodeid); ok {
  63. return &metrics, nil
  64. }
  65. }
  66. record, err := database.FetchRecord(database.METRICS_TABLE_NAME, nodeid)
  67. if err != nil {
  68. if database.IsEmptyRecord(err) {
  69. return &metrics, nil
  70. }
  71. return &metrics, err
  72. }
  73. err = json.Unmarshal([]byte(record), &metrics)
  74. if err != nil {
  75. return &metrics, err
  76. }
  77. if servercfg.CacheEnabled() {
  78. storeMetricsInCache(nodeid, metrics)
  79. }
  80. return &metrics, nil
  81. }
  82. // UpdateMetrics - updates the metrics of a given client
  83. func UpdateMetrics(nodeid string, metrics *models.Metrics) error {
  84. metrics.UpdatedAt = time.Now()
  85. data, err := json.Marshal(metrics)
  86. if err != nil {
  87. return err
  88. }
  89. err = database.Insert(nodeid, string(data), database.METRICS_TABLE_NAME)
  90. if err != nil {
  91. return err
  92. }
  93. if servercfg.CacheEnabled() {
  94. storeMetricsInCache(nodeid, *metrics)
  95. }
  96. return nil
  97. }
  98. // DeleteMetrics - deletes metrics of a given node
  99. func DeleteMetrics(nodeid string) error {
  100. err := database.DeleteRecord(database.METRICS_TABLE_NAME, nodeid)
  101. if err != nil {
  102. return err
  103. }
  104. if servercfg.CacheEnabled() {
  105. deleteNetworkFromCache(nodeid)
  106. }
  107. return nil
  108. }
  109. // MQUpdateMetricsFallBack - called when mq fallback thread is triggered on client
  110. func MQUpdateMetricsFallBack(nodeid string, newMetrics models.Metrics) {
  111. currentNode, err := logic.GetNodeByID(nodeid)
  112. if err != nil {
  113. slog.Error("error getting node", "id", nodeid, "error", err)
  114. return
  115. }
  116. updateNodeMetrics(&currentNode, &newMetrics)
  117. if err = logic.UpdateMetrics(nodeid, &newMetrics); err != nil {
  118. slog.Error("failed to update node metrics", "id", nodeid, "error", err)
  119. return
  120. }
  121. if servercfg.IsMetricsExporter() {
  122. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  123. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  124. }
  125. }
  126. slog.Debug("updated node metrics", "id", nodeid)
  127. }
  128. func MQUpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  129. id, err := mq.GetID(msg.Topic())
  130. if err != nil {
  131. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  132. return
  133. }
  134. currentNode, err := logic.GetNodeByID(id)
  135. if err != nil {
  136. slog.Error("error getting node", "id", id, "error", err)
  137. return
  138. }
  139. decrypted, decryptErr := mq.DecryptMsg(&currentNode, msg.Payload())
  140. if decryptErr != nil {
  141. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  142. return
  143. }
  144. var newMetrics models.Metrics
  145. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  146. slog.Error("error unmarshaling payload", "error", err)
  147. return
  148. }
  149. updateNodeMetrics(&currentNode, &newMetrics)
  150. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  151. slog.Error("failed to update node metrics", "id", id, "error", err)
  152. return
  153. }
  154. if servercfg.IsMetricsExporter() {
  155. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  156. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  157. }
  158. }
  159. slog.Debug("updated node metrics", "id", id)
  160. }
  161. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
  162. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  163. if err != nil {
  164. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  165. return
  166. }
  167. var attachedClients []models.ExtClient
  168. if currentNode.IsIngressGateway {
  169. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  170. if err == nil {
  171. attachedClients = clients
  172. }
  173. }
  174. if newMetrics.Connectivity == nil {
  175. newMetrics.Connectivity = make(map[string]models.Metric)
  176. }
  177. for i := range attachedClients {
  178. slog.Debug("[metrics] processing attached client", "client", attachedClients[i].ClientID, "public key", attachedClients[i].PublicKey)
  179. clientMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  180. clientMetric.NodeName = attachedClients[i].ClientID
  181. newMetrics.Connectivity[attachedClients[i].ClientID] = clientMetric
  182. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  183. slog.Debug("[metrics] attached client metric", "metric", clientMetric)
  184. }
  185. // run through metrics for each peer
  186. for k := range newMetrics.Connectivity {
  187. currMetric := newMetrics.Connectivity[k]
  188. oldMetric := oldMetrics.Connectivity[k]
  189. currMetric.TotalTime += oldMetric.TotalTime
  190. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  191. totalRecv := currMetric.TotalReceived
  192. totalSent := currMetric.TotalSent
  193. if currMetric.TotalReceived < oldMetric.TotalReceived && currMetric.TotalReceived < oldMetric.LastTotalReceived {
  194. currMetric.TotalReceived += oldMetric.TotalReceived
  195. } else {
  196. currMetric.TotalReceived = currMetric.TotalReceived - oldMetric.LastTotalReceived + oldMetric.TotalReceived
  197. }
  198. if currMetric.TotalSent < oldMetric.TotalSent && currMetric.TotalSent < oldMetric.LastTotalSent {
  199. currMetric.TotalSent += oldMetric.TotalSent
  200. } else {
  201. currMetric.TotalSent = currMetric.TotalSent - oldMetric.LastTotalSent + oldMetric.TotalSent
  202. }
  203. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  204. currMetric.PercentUp = 0
  205. } else {
  206. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  207. }
  208. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  209. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  210. delete(oldMetrics.Connectivity, k) // remove from old data
  211. currMetric.LastTotalReceived = totalRecv
  212. currMetric.LastTotalSent = totalSent
  213. newMetrics.Connectivity[k] = currMetric
  214. }
  215. slog.Debug("[metrics] node metrics data", "node ID", currentNode.ID, "metrics", newMetrics)
  216. }
  217. func GetHostLocInfo(ip, token string) string {
  218. url := "https://ipinfo.io/"
  219. if ip != "" {
  220. url += ip
  221. }
  222. url += "/json"
  223. if token != "" {
  224. url += "?token=" + token
  225. }
  226. client := http.Client{Timeout: 3 * time.Second}
  227. resp, err := client.Get(url)
  228. if err != nil {
  229. return ""
  230. }
  231. defer resp.Body.Close()
  232. var data struct {
  233. Loc string `json:"loc"` // Format: "lat,lon"
  234. }
  235. if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
  236. return ""
  237. }
  238. return data.Loc
  239. }