failover.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. package controllers
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "net/http"
  8. "github.com/google/uuid"
  9. "github.com/gorilla/mux"
  10. controller "github.com/gravitl/netmaker/controllers"
  11. "github.com/gravitl/netmaker/db"
  12. "github.com/gravitl/netmaker/logger"
  13. "github.com/gravitl/netmaker/logic"
  14. "github.com/gravitl/netmaker/models"
  15. "github.com/gravitl/netmaker/mq"
  16. proLogic "github.com/gravitl/netmaker/pro/logic"
  17. "github.com/gravitl/netmaker/schema"
  18. "golang.org/x/exp/slog"
  19. )
  20. // FailOverHandlers - handlers for FailOver
  21. func FailOverHandlers(r *mux.Router) {
  22. r.HandleFunc("/api/v1/node/{nodeid}/failover", controller.Authorize(true, false, "host", http.HandlerFunc(getfailOver))).
  23. Methods(http.MethodGet)
  24. r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(createfailOver))).
  25. Methods(http.MethodPost)
  26. r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(deletefailOver))).
  27. Methods(http.MethodDelete)
  28. r.HandleFunc("/api/v1/node/{network}/failover/reset", logic.SecurityCheck(true, http.HandlerFunc(resetFailOver))).
  29. Methods(http.MethodPost)
  30. r.HandleFunc("/api/v1/node/{nodeid}/failover_me", controller.Authorize(true, false, "host", http.HandlerFunc(failOverME))).
  31. Methods(http.MethodPost)
  32. r.HandleFunc("/api/v1/node/{nodeid}/failover_check", controller.Authorize(true, false, "host", http.HandlerFunc(checkfailOverCtx))).
  33. Methods(http.MethodGet)
  34. }
  35. // @Summary Get failover node
  36. // @Router /api/v1/node/{nodeid}/failover [get]
  37. // @Tags PRO
  38. // @Param nodeid path string true "Node ID"
  39. // @Success 200 {object} models.Node
  40. // @Failure 400 {object} models.ErrorResponse
  41. // @Failure 404 {object} models.ErrorResponse
  42. func getfailOver(w http.ResponseWriter, r *http.Request) {
  43. var params = mux.Vars(r)
  44. nodeid := params["nodeid"]
  45. // confirm host exists
  46. node, err := logic.GetNodeByID(nodeid)
  47. if err != nil {
  48. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  49. return
  50. }
  51. failOverNode, exists := proLogic.FailOverExists(node.Network)
  52. if !exists {
  53. logic.ReturnErrorResponse(
  54. w,
  55. r,
  56. logic.FormatError(errors.New("failover node not found"), "notfound"),
  57. )
  58. return
  59. }
  60. w.Header().Set("Content-Type", "application/json")
  61. logic.ReturnSuccessResponseWithJson(w, r, failOverNode, "get failover node successfully")
  62. }
  63. // @Summary Create failover node
  64. // @Router /api/v1/node/{nodeid}/failover [post]
  65. // @Tags PRO
  66. // @Param nodeid path string true "Node ID"
  67. // @Success 200 {object} models.Node
  68. // @Failure 400 {object} models.ErrorResponse
  69. // @Failure 500 {object} models.ErrorResponse
  70. func createfailOver(w http.ResponseWriter, r *http.Request) {
  71. var params = mux.Vars(r)
  72. nodeid := params["nodeid"]
  73. // confirm host exists
  74. node, err := logic.GetNodeByID(nodeid)
  75. if err != nil {
  76. slog.Error("failed to get node:", "error", err.Error())
  77. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  78. return
  79. }
  80. err = proLogic.CreateFailOver(node)
  81. if err != nil {
  82. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
  83. return
  84. }
  85. go mq.PublishPeerUpdate(false)
  86. w.Header().Set("Content-Type", "application/json")
  87. logic.ReturnSuccessResponseWithJson(w, r, node, "created failover successfully")
  88. }
  89. // @Summary Reset failover for a network
  90. // @Router /api/v1/node/{network}/failover/reset [post]
  91. // @Tags PRO
  92. // @Param network path string true "Network ID"
  93. // @Success 200 {object} models.SuccessResponse
  94. // @Failure 500 {object} models.ErrorResponse
  95. func resetFailOver(w http.ResponseWriter, r *http.Request) {
  96. var params = mux.Vars(r)
  97. net := params["network"]
  98. nodes, err := logic.GetNetworkNodes(net)
  99. if err != nil {
  100. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
  101. return
  102. }
  103. for _, node := range nodes {
  104. if node.FailedOverBy != uuid.Nil {
  105. node.FailedOverBy = uuid.Nil
  106. if node.Mutex != nil {
  107. node.Mutex.Lock()
  108. }
  109. node.FailOverPeers = make(map[string]struct{})
  110. if node.Mutex != nil {
  111. node.Mutex.Unlock()
  112. }
  113. logic.UpsertNode(&node)
  114. }
  115. }
  116. go mq.PublishPeerUpdate(false)
  117. w.Header().Set("Content-Type", "application/json")
  118. logic.ReturnSuccessResponse(w, r, "failover has been reset successfully")
  119. }
  120. // @Summary Delete failover node
  121. // @Router /api/v1/node/{nodeid}/failover [delete]
  122. // @Tags PRO
  123. // @Param nodeid path string true "Node ID"
  124. // @Success 200 {object} models.Node
  125. // @Failure 400 {object} models.ErrorResponse
  126. // @Failure 500 {object} models.ErrorResponse
  127. func deletefailOver(w http.ResponseWriter, r *http.Request) {
  128. var params = mux.Vars(r)
  129. nodeid := params["nodeid"]
  130. // confirm host exists
  131. node, err := logic.GetNodeByID(nodeid)
  132. if err != nil {
  133. slog.Error("failed to get node:", "error", err.Error())
  134. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  135. return
  136. }
  137. node.IsFailOver = false
  138. // Reset FailOvered Peers
  139. err = logic.UpsertNode(&node)
  140. if err != nil {
  141. slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
  142. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
  143. return
  144. }
  145. proLogic.RemoveFailOverFromCache(node.Network)
  146. go func() {
  147. proLogic.ResetFailOver(&node)
  148. mq.PublishPeerUpdate(false)
  149. }()
  150. w.Header().Set("Content-Type", "application/json")
  151. logic.ReturnSuccessResponseWithJson(w, r, node, "deleted failover successfully")
  152. }
  153. // @Summary Failover me
  154. // @Router /api/v1/node/{nodeid}/failover_me [post]
  155. // @Tags PRO
  156. // @Param nodeid path string true "Node ID"
  157. // @Accept json
  158. // @Param body body models.FailOverMeReq true "Failover request"
  159. // @Success 200 {object} models.SuccessResponse
  160. // @Failure 400 {object} models.ErrorResponse
  161. // @Failure 500 {object} models.ErrorResponse
  162. func failOverME(w http.ResponseWriter, r *http.Request) {
  163. var params = mux.Vars(r)
  164. nodeid := params["nodeid"]
  165. // confirm host exists
  166. node, err := logic.GetNodeByID(nodeid)
  167. if err != nil {
  168. logger.Log(0, r.Header.Get("user"), "failed to get node:", err.Error())
  169. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  170. return
  171. }
  172. host, err := logic.GetHost(node.HostID.String())
  173. if err != nil {
  174. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  175. return
  176. }
  177. failOverNode, exists := proLogic.FailOverExists(node.Network)
  178. if !exists {
  179. logic.ReturnErrorResponse(
  180. w,
  181. r,
  182. logic.FormatError(
  183. fmt.Errorf("req-from: %s, failover node doesn't exist in the network", host.Name),
  184. "badrequest",
  185. ),
  186. )
  187. return
  188. }
  189. var failOverReq models.FailOverMeReq
  190. err = json.NewDecoder(r.Body).Decode(&failOverReq)
  191. if err != nil {
  192. logger.Log(0, r.Header.Get("user"), "error decoding request body: ", err.Error())
  193. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  194. return
  195. }
  196. var sendPeerUpdate bool
  197. peerNode, err := logic.GetNodeByID(failOverReq.NodeID)
  198. if err != nil {
  199. slog.Error("peer not found: ", "nodeid", failOverReq.NodeID, "error", err)
  200. logic.ReturnErrorResponse(
  201. w,
  202. r,
  203. logic.FormatError(errors.New("peer not found"), "badrequest"),
  204. )
  205. return
  206. }
  207. eli, _ := (&schema.Egress{Network: node.Network}).ListByNetwork(db.WithContext(context.TODO()))
  208. acls, _ := logic.ListAclsByNetwork(models.NetworkID(node.Network))
  209. logic.GetNodeEgressInfo(&node, eli, acls)
  210. logic.GetNodeEgressInfo(&peerNode, eli, acls)
  211. logic.GetNodeEgressInfo(&failOverNode, eli, acls)
  212. if peerNode.IsFailOver {
  213. logic.ReturnErrorResponse(
  214. w,
  215. r,
  216. logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
  217. )
  218. return
  219. }
  220. if node.IsFailOver {
  221. logic.ReturnErrorResponse(
  222. w,
  223. r,
  224. logic.FormatError(errors.New("node is acting as failover"), "badrequest"),
  225. )
  226. return
  227. }
  228. if peerNode.IsFailOver {
  229. logic.ReturnErrorResponse(
  230. w,
  231. r,
  232. logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
  233. )
  234. return
  235. }
  236. if node.IsRelayed && node.RelayedBy == peerNode.ID.String() {
  237. logic.ReturnErrorResponse(
  238. w,
  239. r,
  240. logic.FormatError(errors.New("node is relayed by peer node"), "badrequest"),
  241. )
  242. return
  243. }
  244. if node.IsRelay && peerNode.RelayedBy == node.ID.String() {
  245. logic.ReturnErrorResponse(
  246. w,
  247. r,
  248. logic.FormatError(errors.New("node acting as relay for the peer node"), "badrequest"),
  249. )
  250. return
  251. }
  252. if (node.InternetGwID != "" && failOverNode.IsInternetGateway && node.InternetGwID != failOverNode.ID.String()) ||
  253. (peerNode.InternetGwID != "" && failOverNode.IsInternetGateway && peerNode.InternetGwID != failOverNode.ID.String()) {
  254. logic.ReturnErrorResponse(
  255. w,
  256. r,
  257. logic.FormatError(
  258. errors.New("node using a internet gw by the peer node"),
  259. "badrequest",
  260. ),
  261. )
  262. return
  263. }
  264. if node.IsInternetGateway && peerNode.InternetGwID == node.ID.String() {
  265. logic.ReturnErrorResponse(
  266. w,
  267. r,
  268. logic.FormatError(
  269. errors.New("node acting as internet gw for the peer node"),
  270. "badrequest",
  271. ),
  272. )
  273. return
  274. }
  275. if node.InternetGwID != "" && node.InternetGwID == peerNode.ID.String() {
  276. logic.ReturnErrorResponse(
  277. w,
  278. r,
  279. logic.FormatError(
  280. errors.New("node using a internet gw by the peer node"),
  281. "badrequest",
  282. ),
  283. )
  284. return
  285. }
  286. err = proLogic.SetFailOverCtx(failOverNode, node, peerNode)
  287. if err != nil {
  288. slog.Debug("failed to create failover", "id", node.ID.String(),
  289. "network", node.Network, "error", err)
  290. logic.ReturnErrorResponse(
  291. w,
  292. r,
  293. logic.FormatError(fmt.Errorf("failed to create failover: %v", err), "internal"),
  294. )
  295. return
  296. }
  297. slog.Info(
  298. "[auto-relay] created relay on node",
  299. "node",
  300. node.ID.String(),
  301. "network",
  302. node.Network,
  303. )
  304. sendPeerUpdate = true
  305. if sendPeerUpdate {
  306. go mq.PublishPeerUpdate(false)
  307. }
  308. w.Header().Set("Content-Type", "application/json")
  309. logic.ReturnSuccessResponse(w, r, "relayed successfully")
  310. }
  311. // @Summary checkfailOverCtx
  312. // @Router /api/v1/node/{nodeid}/failover_check [get]
  313. // @Tags PRO
  314. // @Param nodeid path string true "Node ID"
  315. // @Accept json
  316. // @Param body body models.FailOverMeReq true "Failover request"
  317. // @Success 200 {object} models.SuccessResponse
  318. // @Failure 400 {object} models.ErrorResponse
  319. // @Failure 500 {object} models.ErrorResponse
  320. func checkfailOverCtx(w http.ResponseWriter, r *http.Request) {
  321. var params = mux.Vars(r)
  322. nodeid := params["nodeid"]
  323. // confirm host exists
  324. node, err := logic.GetNodeByID(nodeid)
  325. if err != nil {
  326. logger.Log(0, r.Header.Get("user"), "failed to get node:", err.Error())
  327. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  328. return
  329. }
  330. host, err := logic.GetHost(node.HostID.String())
  331. if err != nil {
  332. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  333. return
  334. }
  335. failOverNode, exists := proLogic.FailOverExists(node.Network)
  336. if !exists {
  337. logic.ReturnErrorResponse(
  338. w,
  339. r,
  340. logic.FormatError(
  341. fmt.Errorf("req-from: %s, failover node doesn't exist in the network", host.Name),
  342. "badrequest",
  343. ),
  344. )
  345. return
  346. }
  347. var failOverReq models.FailOverMeReq
  348. err = json.NewDecoder(r.Body).Decode(&failOverReq)
  349. if err != nil {
  350. logger.Log(0, r.Header.Get("user"), "error decoding request body: ", err.Error())
  351. logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
  352. return
  353. }
  354. peerNode, err := logic.GetNodeByID(failOverReq.NodeID)
  355. if err != nil {
  356. slog.Error("peer not found: ", "nodeid", failOverReq.NodeID, "error", err)
  357. logic.ReturnErrorResponse(
  358. w,
  359. r,
  360. logic.FormatError(errors.New("peer not found"), "badrequest"),
  361. )
  362. return
  363. }
  364. eli, _ := (&schema.Egress{Network: node.Network}).ListByNetwork(db.WithContext(context.TODO()))
  365. acls, _ := logic.ListAclsByNetwork(models.NetworkID(node.Network))
  366. logic.GetNodeEgressInfo(&node, eli, acls)
  367. logic.GetNodeEgressInfo(&peerNode, eli, acls)
  368. logic.GetNodeEgressInfo(&failOverNode, eli, acls)
  369. if peerNode.IsFailOver {
  370. logic.ReturnErrorResponse(
  371. w,
  372. r,
  373. logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
  374. )
  375. return
  376. }
  377. if node.IsFailOver {
  378. logic.ReturnErrorResponse(
  379. w,
  380. r,
  381. logic.FormatError(errors.New("node is acting as failover"), "badrequest"),
  382. )
  383. return
  384. }
  385. if peerNode.IsFailOver {
  386. logic.ReturnErrorResponse(
  387. w,
  388. r,
  389. logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
  390. )
  391. return
  392. }
  393. if node.IsRelayed && node.RelayedBy == peerNode.ID.String() {
  394. logic.ReturnErrorResponse(
  395. w,
  396. r,
  397. logic.FormatError(errors.New("node is relayed by peer node"), "badrequest"),
  398. )
  399. return
  400. }
  401. if node.IsRelay && peerNode.RelayedBy == node.ID.String() {
  402. logic.ReturnErrorResponse(
  403. w,
  404. r,
  405. logic.FormatError(errors.New("node acting as relay for the peer node"), "badrequest"),
  406. )
  407. return
  408. }
  409. if (node.InternetGwID != "" && failOverNode.IsInternetGateway && node.InternetGwID != failOverNode.ID.String()) ||
  410. (peerNode.InternetGwID != "" && failOverNode.IsInternetGateway && peerNode.InternetGwID != failOverNode.ID.String()) {
  411. logic.ReturnErrorResponse(
  412. w,
  413. r,
  414. logic.FormatError(
  415. errors.New("node using a internet gw by the peer node"),
  416. "badrequest",
  417. ),
  418. )
  419. return
  420. }
  421. if node.IsInternetGateway && peerNode.InternetGwID == node.ID.String() {
  422. logic.ReturnErrorResponse(
  423. w,
  424. r,
  425. logic.FormatError(
  426. errors.New("node acting as internet gw for the peer node"),
  427. "badrequest",
  428. ),
  429. )
  430. return
  431. }
  432. if node.InternetGwID != "" && node.InternetGwID == peerNode.ID.String() {
  433. logic.ReturnErrorResponse(
  434. w,
  435. r,
  436. logic.FormatError(
  437. errors.New("node using a internet gw by the peer node"),
  438. "badrequest",
  439. ),
  440. )
  441. return
  442. }
  443. if ok := logic.IsPeerAllowed(node, peerNode, true); !ok {
  444. logic.ReturnErrorResponse(
  445. w,
  446. r,
  447. logic.FormatError(
  448. errors.New("peers are not allowed to communicate"),
  449. "badrequest",
  450. ),
  451. )
  452. return
  453. }
  454. err = proLogic.CheckFailOverCtx(failOverNode, node, peerNode)
  455. if err != nil {
  456. slog.Error("failover ctx cannot be set ", "error", err)
  457. logic.ReturnErrorResponse(
  458. w,
  459. r,
  460. logic.FormatError(fmt.Errorf("failover ctx cannot be set: %v", err), "internal"),
  461. )
  462. return
  463. }
  464. w.Header().Set("Content-Type", "application/json")
  465. logic.ReturnSuccessResponse(w, r, "failover can be set")
  466. }