metrics.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. package logic
  2. import (
  3. "encoding/json"
  4. "sync"
  5. "time"
  6. mqtt "github.com/eclipse/paho.mqtt.golang"
  7. "github.com/gravitl/netmaker/database"
  8. "github.com/gravitl/netmaker/logic"
  9. "github.com/gravitl/netmaker/models"
  10. "github.com/gravitl/netmaker/mq"
  11. "github.com/gravitl/netmaker/netclient/ncutils"
  12. "github.com/gravitl/netmaker/servercfg"
  13. "golang.org/x/exp/slog"
  14. )
  15. var (
  16. metricsCacheMutex = &sync.RWMutex{}
  17. metricsCacheMap = make(map[string]models.Metrics)
  18. )
  19. func getMetricsFromCache(key string) (metrics models.Metrics, ok bool) {
  20. metricsCacheMutex.RLock()
  21. metrics, ok = metricsCacheMap[key]
  22. metricsCacheMutex.RUnlock()
  23. return
  24. }
  25. func storeMetricsInCache(key string, metrics models.Metrics) {
  26. metricsCacheMutex.Lock()
  27. metricsCacheMap[key] = metrics
  28. metricsCacheMutex.Unlock()
  29. }
  30. func deleteNetworkFromCache(key string) {
  31. metricsCacheMutex.Lock()
  32. delete(metricsCacheMap, key)
  33. metricsCacheMutex.Unlock()
  34. }
  35. func LoadNodeMetricsToCache() error {
  36. slog.Info("loading metrics to cache")
  37. if metricsCacheMap == nil {
  38. metricsCacheMap = map[string]models.Metrics{}
  39. }
  40. collection, err := database.FetchRecords(database.METRICS_TABLE_NAME)
  41. if err != nil {
  42. return err
  43. }
  44. for key, value := range collection {
  45. var metrics models.Metrics
  46. if err := json.Unmarshal([]byte(value), &metrics); err != nil {
  47. slog.Error("parse metric record error", "error", err.Error())
  48. continue
  49. }
  50. if servercfg.CacheEnabled() {
  51. storeMetricsInCache(key, metrics)
  52. }
  53. }
  54. slog.Info("metrics loading done")
  55. return nil
  56. }
  57. // GetMetrics - gets the metrics
  58. func GetMetrics(nodeid string) (*models.Metrics, error) {
  59. var metrics models.Metrics
  60. if servercfg.CacheEnabled() {
  61. if metrics, ok := getMetricsFromCache(nodeid); ok {
  62. return &metrics, nil
  63. }
  64. }
  65. record, err := database.FetchRecord(database.METRICS_TABLE_NAME, nodeid)
  66. if err != nil {
  67. if database.IsEmptyRecord(err) {
  68. return &metrics, nil
  69. }
  70. return &metrics, err
  71. }
  72. err = json.Unmarshal([]byte(record), &metrics)
  73. if err != nil {
  74. return &metrics, err
  75. }
  76. if servercfg.CacheEnabled() {
  77. storeMetricsInCache(nodeid, metrics)
  78. }
  79. return &metrics, nil
  80. }
  81. // UpdateMetrics - updates the metrics of a given client
  82. func UpdateMetrics(nodeid string, metrics *models.Metrics) error {
  83. data, err := json.Marshal(metrics)
  84. if err != nil {
  85. return err
  86. }
  87. err = database.Insert(nodeid, string(data), database.METRICS_TABLE_NAME)
  88. if err != nil {
  89. return err
  90. }
  91. if servercfg.CacheEnabled() {
  92. storeMetricsInCache(nodeid, *metrics)
  93. }
  94. return nil
  95. }
  96. // DeleteMetrics - deletes metrics of a given node
  97. func DeleteMetrics(nodeid string) error {
  98. err := database.DeleteRecord(database.METRICS_TABLE_NAME, nodeid)
  99. if err != nil {
  100. return err
  101. }
  102. if servercfg.CacheEnabled() {
  103. deleteNetworkFromCache(nodeid)
  104. }
  105. return nil
  106. }
  107. // MQUpdateMetricsFallBack - called when mq fallback thread is triggered on client
  108. func MQUpdateMetricsFallBack(nodeid string, newMetrics models.Metrics) {
  109. currentNode, err := logic.GetNodeByID(nodeid)
  110. if err != nil {
  111. slog.Error("error getting node", "id", nodeid, "error", err)
  112. return
  113. }
  114. updateNodeMetrics(&currentNode, &newMetrics)
  115. if err = logic.UpdateMetrics(nodeid, &newMetrics); err != nil {
  116. slog.Error("failed to update node metrics", "id", nodeid, "error", err)
  117. return
  118. }
  119. if servercfg.IsMetricsExporter() {
  120. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  121. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  122. }
  123. }
  124. slog.Debug("updated node metrics", "id", nodeid)
  125. }
  126. func MQUpdateMetrics(client mqtt.Client, msg mqtt.Message) {
  127. id, err := mq.GetID(msg.Topic())
  128. if err != nil {
  129. slog.Error("error getting ID sent on ", "topic", msg.Topic(), "error", err)
  130. return
  131. }
  132. currentNode, err := logic.GetNodeByID(id)
  133. if err != nil {
  134. slog.Error("error getting node", "id", id, "error", err)
  135. return
  136. }
  137. decrypted, decryptErr := mq.DecryptMsg(&currentNode, msg.Payload())
  138. if decryptErr != nil {
  139. slog.Error("failed to decrypt message for node", "id", id, "error", decryptErr)
  140. return
  141. }
  142. var newMetrics models.Metrics
  143. if err := json.Unmarshal(decrypted, &newMetrics); err != nil {
  144. slog.Error("error unmarshaling payload", "error", err)
  145. return
  146. }
  147. updateNodeMetrics(&currentNode, &newMetrics)
  148. if err = logic.UpdateMetrics(id, &newMetrics); err != nil {
  149. slog.Error("failed to update node metrics", "id", id, "error", err)
  150. return
  151. }
  152. if servercfg.IsMetricsExporter() {
  153. if err := mq.PushMetricsToExporter(newMetrics); err != nil {
  154. slog.Error("failed to push node metrics to exporter", "id", currentNode.ID, "error", err)
  155. }
  156. }
  157. slog.Debug("updated node metrics", "id", id)
  158. }
  159. func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) {
  160. oldMetrics, err := logic.GetMetrics(currentNode.ID.String())
  161. if err != nil {
  162. slog.Error("error finding old metrics for node", "id", currentNode.ID, "error", err)
  163. return
  164. }
  165. var attachedClients []models.ExtClient
  166. if currentNode.IsIngressGateway {
  167. clients, err := logic.GetExtClientsByID(currentNode.ID.String(), currentNode.Network)
  168. if err == nil {
  169. attachedClients = clients
  170. }
  171. }
  172. if newMetrics.Connectivity == nil {
  173. newMetrics.Connectivity = make(map[string]models.Metric)
  174. }
  175. for i := range attachedClients {
  176. slog.Debug("[metrics] processing attached client", "client", attachedClients[i].ClientID, "public key", attachedClients[i].PublicKey)
  177. clientMetric := newMetrics.Connectivity[attachedClients[i].PublicKey]
  178. clientMetric.NodeName = attachedClients[i].ClientID
  179. newMetrics.Connectivity[attachedClients[i].ClientID] = clientMetric
  180. delete(newMetrics.Connectivity, attachedClients[i].PublicKey)
  181. slog.Debug("[metrics] attached client metric", "metric", clientMetric)
  182. }
  183. // run through metrics for each peer
  184. for k := range newMetrics.Connectivity {
  185. currMetric := newMetrics.Connectivity[k]
  186. oldMetric := oldMetrics.Connectivity[k]
  187. currMetric.TotalTime += oldMetric.TotalTime
  188. currMetric.Uptime += oldMetric.Uptime // get the total uptime for this connection
  189. totalRecv := currMetric.TotalReceived
  190. totalSent := currMetric.TotalSent
  191. if currMetric.TotalReceived < oldMetric.TotalReceived && currMetric.TotalReceived < oldMetric.LastTotalReceived {
  192. currMetric.TotalReceived += oldMetric.TotalReceived
  193. } else {
  194. currMetric.TotalReceived = currMetric.TotalReceived - oldMetric.LastTotalReceived + oldMetric.TotalReceived
  195. }
  196. if currMetric.TotalSent < oldMetric.TotalSent && currMetric.TotalSent < oldMetric.LastTotalSent {
  197. currMetric.TotalSent += oldMetric.TotalSent
  198. } else {
  199. currMetric.TotalSent = currMetric.TotalSent - oldMetric.LastTotalSent + oldMetric.TotalSent
  200. }
  201. if currMetric.Uptime == 0 || currMetric.TotalTime == 0 {
  202. currMetric.PercentUp = 0
  203. } else {
  204. currMetric.PercentUp = 100.0 * (float64(currMetric.Uptime) / float64(currMetric.TotalTime))
  205. }
  206. totalUpMinutes := currMetric.Uptime * ncutils.CheckInInterval
  207. currMetric.ActualUptime = time.Duration(totalUpMinutes) * time.Minute
  208. delete(oldMetrics.Connectivity, k) // remove from old data
  209. currMetric.LastTotalReceived = totalRecv
  210. currMetric.LastTotalSent = totalSent
  211. newMetrics.Connectivity[k] = currMetric
  212. }
  213. slog.Debug("[metrics] node metrics data", "node ID", currentNode.ID, "metrics", newMetrics)
  214. }