瀏覽代碼

:gear: Allow to customize healthcheck intervals

this can be helpful to reduce network stress with longer delays
Ettore Di Giacinto 3 年之前
父節點
當前提交
605278afad
共有 5 個文件被更改,包括 44 次插入16 次删除
  1. 24 2
      cmd/main.go
  2. 7 4
      pkg/services/alive.go
  3. 6 6
      pkg/services/alive_test.go
  4. 1 1
      pkg/services/dns_test.go
  5. 6 3
      pkg/vpn/dhcp.go

+ 24 - 2
cmd/main.go

@@ -96,6 +96,24 @@ func MainFlags() []cli.Flag {
 			EnvVar: "DNSCACHESIZE",
 			Value:  200,
 		},
+		&cli.IntFlag{
+			Name:   "aliveness-healthcheck-interval",
+			Usage:  "Healthcheck interval",
+			EnvVar: "HEALTHCHECKINTERVAL",
+			Value:  120,
+		},
+		&cli.IntFlag{
+			Name:   "aliveness-healthcheck-scrub-interval",
+			Usage:  "Healthcheck scrub interval",
+			EnvVar: "HEALTHCHECKSCRUBINTERVAL",
+			Value:  600,
+		},
+		&cli.IntFlag{
+			Name:   "aliveness-healthcheck-max-interval",
+			Usage:  "Healthcheck max interval. Threshold after a node is determined offline",
+			EnvVar: "HEALTHCHECKMAXINTERVAL",
+			Value:  900,
+		},
 		&cli.StringSliceFlag{
 			Name:   "dns-forward-server",
 			Usage:  "List of DNS forward server, e.g. 8.8.8.8:53, 192.168.1.1:53 ...",
@@ -130,14 +148,18 @@ func Main() func(c *cli.Context) error {
 		}
 		o, vpnOpts, ll := cliToOpts(c)
 
-		o = append(o, services.Alive(30*time.Second, 10*time.Minute)...)
+		o = append(o,
+			services.Alive(
+				time.Duration(c.Int("aliveness-healthcheck-interval"))*time.Second,
+				time.Duration(c.Int("aliveness-healthcheck-scrub-interval"))*time.Second,
+				time.Duration(c.Int("aliveness-healthcheck-max-interval"))*time.Second)...)
 		if c.Bool("dhcp") {
 			// Adds DHCP server
 			address, _, err := net.ParseCIDR(c.String("address"))
 			if err != nil {
 				return err
 			}
-			nodeOpts, vO := vpn.DHCP(ll, 10*time.Second, c.String("lease-dir"), address.String())
+			nodeOpts, vO := vpn.DHCP(ll, 15*time.Minute, c.String("lease-dir"), address.String())
 			o = append(o, nodeOpts...)
 			vpnOpts = append(vpnOpts, vO...)
 		}

+ 7 - 4
pkg/services/alive.go

@@ -26,7 +26,9 @@ import (
 	"github.com/mudler/edgevpn/pkg/blockchain"
 )
 
-func Alive(announcetime, scrubTime time.Duration) []node.Option {
+// Alive announce the node every announce time, with a periodic scrub time for healthchecks
+// the maxtime is the time used to determine when a node is unreachable (after maxtime, its unreachable)
+func Alive(announcetime, scrubTime, maxtime time.Duration) []node.Option {
 	return []node.Option{
 		node.WithNetworkService(
 			func(ctx context.Context, c node.Config, n *node.Node, b *blockchain.Ledger) error {
@@ -42,7 +44,7 @@ func Alive(announcetime, scrubTime time.Duration) []node.Option {
 						})
 
 						// Keep-alive scrub
-						nodes := AvailableNodes(b)
+						nodes := AvailableNodes(b, maxtime)
 						if len(nodes) == 0 {
 							return
 						}
@@ -65,12 +67,13 @@ func Alive(announcetime, scrubTime time.Duration) []node.Option {
 	}
 }
 
-func AvailableNodes(b *blockchain.Ledger) (active []string) {
+// AvailableNodes returns the available nodes which sent a healthcheck in the last maxTime
+func AvailableNodes(b *blockchain.Ledger, maxTime time.Duration) (active []string) {
 	for u, t := range b.LastBlock().Storage[protocol.HealthCheckKey] {
 		var s string
 		t.Unmarshal(&s)
 		parsed, _ := time.Parse(time.RFC3339, s)
-		if parsed.Add(15 * time.Minute).After(time.Now()) {
+		if parsed.Add(maxTime).After(time.Now()) {
 			active = append(active, u)
 		}
 	}

+ 6 - 6
pkg/services/alive_test.go

@@ -36,7 +36,7 @@ var _ = Describe("Alive service", func() {
 	l := node.Logger(logg)
 
 	opts := append(
-		Alive(5*time.Second, 100*time.Second),
+		Alive(5*time.Second, 100*time.Second, 15*time.Minute),
 		node.FromBase64(true, true, token),
 		l)
 
@@ -63,7 +63,7 @@ var _ = Describe("Alive service", func() {
 				if err != nil {
 					return []string{}
 				}
-				return AvailableNodes(ll)
+				return AvailableNodes(ll, 15*time.Minute)
 			}, 100*time.Second, 1*time.Second).Should(matches)
 
 			Expect(ll.LastBlock().Index).ToNot(Equal(index))
@@ -73,7 +73,7 @@ var _ = Describe("Alive service", func() {
 	Context("Aliveness Scrub", func() {
 		BeforeEach(func() {
 			opts = append(
-				Alive(2*time.Second, 4*time.Second),
+				Alive(2*time.Second, 4*time.Second, 15*time.Minute),
 				node.FromBase64(true, true, token),
 				l)
 		})
@@ -100,7 +100,7 @@ var _ = Describe("Alive service", func() {
 				if err != nil {
 					return []string{}
 				}
-				return AvailableNodes(ll)
+				return AvailableNodes(ll, 15*time.Minute)
 			}, 100*time.Second, 1*time.Second).Should(matches)
 
 			Expect(ll.LastBlock().Index).ToNot(Equal(index))
@@ -111,7 +111,7 @@ var _ = Describe("Alive service", func() {
 				if err != nil {
 					return []string{}
 				}
-				return AvailableNodes(ll)
+				return AvailableNodes(ll, 15*time.Minute)
 			}, 30*time.Second, 1*time.Second).Should(BeEmpty())
 
 			Expect(ll.LastBlock().Index).ToNot(Equal(index))
@@ -122,7 +122,7 @@ var _ = Describe("Alive service", func() {
 				if err != nil {
 					return []string{}
 				}
-				return AvailableNodes(ll)
+				return AvailableNodes(ll, 15*time.Minute)
 			}, 10*time.Second, 1*time.Second).Should(matches)
 			Expect(ll.LastBlock().Index).ToNot(Equal(index))
 

+ 1 - 1
pkg/services/dns_test.go

@@ -39,7 +39,7 @@ var _ = Describe("DNS service", func() {
 	l := node.Logger(logg)
 
 	e2 := node.New(
-		append(Alive(15*time.Second, 90*time.Minute),
+		append(Alive(15*time.Second, 90*time.Minute, 15*time.Minute),
 			node.FromBase64(true, true, token), node.WithStore(&blockchain.MemoryStore{}), l)...)
 
 	Context("DNS service", func() {

+ 6 - 3
pkg/vpn/dhcp.go

@@ -46,7 +46,10 @@ func checkDHCPLease(c node.Config, leasedir string) string {
 	return ""
 }
 
-func DHCP(l log.StandardLogger, announcetime time.Duration, leasedir string, address string) ([]node.Option, []Option) {
+// DHCP returns a DHCP network service. It requires the Alive Service in order to determine available nodes.
+// Nodes available are used to determine which needs an IP and when maxTime expires nodes are marked as offline and
+// not considered.
+func DHCP(l log.StandardLogger, maxTime time.Duration, leasedir string, address string) ([]node.Option, []Option) {
 	ip := make(chan string, 1)
 	return []node.Option{
 			func(cfg *node.Config) error {
@@ -73,7 +76,7 @@ func DHCP(l log.StandardLogger, announcetime time.Duration, leasedir string, add
 						time.Sleep(5 * time.Second)
 
 						// This network service is blocking and calls in before VPN, hence it needs to registered before VPN
-						nodes := services.AvailableNodes(b)
+						nodes := services.AvailableNodes(b, maxTime)
 
 						currentIPs := map[string]string{}
 						ips := []string{}
@@ -104,7 +107,7 @@ func DHCP(l log.StandardLogger, announcetime time.Duration, leasedir string, add
 
 						if n.Host().ID().String() != lead {
 							l.Debug("Not leader, sleeping")
-							time.Sleep(announcetime)
+							time.Sleep(5 * time.Second)
 							continue
 						}