浏览代码

synctrace WIP

Wade Simmons 1 年之前
父节点
当前提交
8f1dc12618
共有 9 个文件被更改,包括 61 次插入32 次删除
  1. 2 1
      Makefile
  2. 7 7
      connection_manager.go
  3. 3 2
      connection_state.go
  4. 5 0
      go.mod
  5. 12 0
      go.sum
  6. 2 0
      handshake_ix.go
  7. 6 3
      handshake_manager.go
  8. 11 10
      hostmap.go
  9. 13 9
      lighthouse.go

+ 2 - 1
Makefile

@@ -61,7 +61,7 @@ ALL = $(ALL_LINUX) \
 	windows-arm64
 
 e2e:
-	$(TEST_ENV) go test -tags=e2e_testing -count=1 $(TEST_FLAGS) ./e2e
+	$(TEST_ENV) go test -tags=synctrace,e2e_testing -count=1 $(TEST_FLAGS) ./e2e
 
 e2ev: TEST_FLAGS = -v
 e2ev: e2e
@@ -206,6 +206,7 @@ ifeq ($(words $(MAKECMDGOALS)),1)
 	@$(MAKE) service ${.DEFAULT_GOAL} --no-print-directory
 endif
 
+bin-docker: BUILD_ARGS = -tags=synctrace
 bin-docker: bin build/linux-amd64/nebula build/linux-amd64/nebula-cert
 
 smoke-docker: bin-docker

+ 7 - 7
connection_manager.go

@@ -3,7 +3,6 @@ package nebula
 import (
 	"bytes"
 	"context"
-	"sync"
 	"time"
 
 	"github.com/rcrowley/go-metrics"
@@ -12,6 +11,7 @@ import (
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/udp"
+	"github.com/wadey/synctrace"
 )
 
 type trafficDecision int
@@ -28,14 +28,14 @@ const (
 
 type connectionManager struct {
 	in     map[uint32]struct{}
-	inLock *sync.RWMutex
+	inLock synctrace.RWMutex
 
 	out     map[uint32]struct{}
-	outLock *sync.RWMutex
+	outLock synctrace.RWMutex
 
 	// relayUsed holds which relay localIndexs are in use
 	relayUsed     map[uint32]struct{}
-	relayUsedLock *sync.RWMutex
+	relayUsedLock synctrace.RWMutex
 
 	hostMap                 *HostMap
 	trafficTimer            *LockingTimerWheel[uint32]
@@ -60,11 +60,11 @@ func newConnectionManager(ctx context.Context, l *logrus.Logger, intf *Interface
 	nc := &connectionManager{
 		hostMap:                 intf.hostMap,
 		in:                      make(map[uint32]struct{}),
-		inLock:                  &sync.RWMutex{},
+		inLock:                  synctrace.NewRWMutex("connection-manager-in"),
 		out:                     make(map[uint32]struct{}),
-		outLock:                 &sync.RWMutex{},
+		outLock:                 synctrace.NewRWMutex("connection-manager-out"),
 		relayUsed:               make(map[uint32]struct{}),
-		relayUsedLock:           &sync.RWMutex{},
+		relayUsedLock:           synctrace.NewRWMutex("connection-manager-relay-used"),
 		trafficTimer:            NewLockingTimerWheel[uint32](time.Millisecond*500, max),
 		intf:                    intf,
 		pendingDeletion:         make(map[uint32]struct{}),

+ 3 - 2
connection_state.go

@@ -3,13 +3,13 @@ package nebula
 import (
 	"crypto/rand"
 	"encoding/json"
-	"sync"
 	"sync/atomic"
 
 	"github.com/flynn/noise"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cert"
 	"github.com/slackhq/nebula/noiseutil"
+	"github.com/wadey/synctrace"
 )
 
 const ReplayWindow = 1024
@@ -23,7 +23,7 @@ type ConnectionState struct {
 	initiator      bool
 	messageCounter atomic.Uint64
 	window         *Bits
-	writeLock      sync.Mutex
+	writeLock      synctrace.Mutex
 }
 
 func NewConnectionState(l *logrus.Logger, cipher string, certState *CertState, initiator bool, pattern noise.HandshakePattern, psk []byte, pskStage int) *ConnectionState {
@@ -71,6 +71,7 @@ func NewConnectionState(l *logrus.Logger, cipher string, certState *CertState, i
 		initiator: initiator,
 		window:    b,
 		myCert:    certState.Certificate,
+		writeLock: synctrace.NewMutex("connection-state"),
 	}
 
 	return ci

+ 5 - 0
go.mod

@@ -22,6 +22,7 @@ require (
 	github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8
 	github.com/stretchr/testify v1.9.0
 	github.com/vishvananda/netlink v1.2.1-beta.2
+	github.com/wadey/synctrace v0.0.0-20240529131858-09630e8cbb1b
 	golang.org/x/crypto v0.23.0
 	golang.org/x/exp v0.0.0-20230725093048-515e97ebf090
 	golang.org/x/net v0.25.0
@@ -40,11 +41,15 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/emirpasic/gods v1.18.1 // indirect
 	github.com/google/btree v1.1.2 // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/heimdalr/dag v1.4.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_model v0.5.0 // indirect
 	github.com/prometheus/common v0.48.0 // indirect
 	github.com/prometheus/procfs v0.12.0 // indirect
+	github.com/timandy/routine v1.1.1 // indirect
 	github.com/vishvananda/netns v0.0.4 // indirect
 	golang.org/x/mod v0.16.0 // indirect
 	golang.org/x/time v0.5.0 // indirect

+ 12 - 0
go.sum

@@ -22,6 +22,8 @@ github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432/go.
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
+github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
 github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@@ -31,6 +33,8 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
 github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg=
+github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
@@ -56,6 +60,10 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/heimdalr/dag v1.4.0 h1:zG3JA4RDVLc55k3AXAgfwa+EgBNZ0TkfOO3C29Ucpmg=
+github.com/heimdalr/dag v1.4.0/go.mod h1:OCh6ghKmU0hPjtwMqWBoNxPmtRioKd1xSu7Zs4sbIqM=
 github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -135,11 +143,15 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/timandy/routine v1.1.1 h1:6/Z7qLFZj3GrzuRksBFzIG8YGUh8CLhjnnMePBQTrEI=
+github.com/timandy/routine v1.1.1/go.mod h1:OZHPOKSvqL/ZvqXFkNZyit0xIVelERptYXdAHH00adQ=
 github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
 github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
 github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
 github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
+github.com/wadey/synctrace v0.0.0-20240529131858-09630e8cbb1b h1:lVzvVABIrn3RJW6uAnYx22rVmG8h8cYk3AsoK5/ata8=
+github.com/wadey/synctrace v0.0.0-20240529131858-09630e8cbb1b/go.mod h1:ezigq3+pMngwXVAhh16Mi5IiYIJZSkNvF+HhLacwZho=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=

+ 2 - 0
handshake_ix.go

@@ -8,6 +8,7 @@ import (
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/udp"
+	"github.com/wadey/synctrace"
 )
 
 // NOISE IX Handshakes
@@ -139,6 +140,7 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via *ViaSender, packet []by
 		HandshakePacket:   make(map[uint8][]byte, 0),
 		lastHandshakeTime: hs.Details.Time,
 		relayState: RelayState{
+			RWMutex:       synctrace.NewRWMutex("relay-state"),
 			relays:        map[iputil.VpnIp]struct{}{},
 			relayForByIp:  map[iputil.VpnIp]*Relay{},
 			relayForByIdx: map[uint32]*Relay{},

+ 6 - 3
handshake_manager.go

@@ -7,7 +7,6 @@ import (
 	"encoding/binary"
 	"errors"
 	"net"
-	"sync"
 	"time"
 
 	"github.com/rcrowley/go-metrics"
@@ -15,6 +14,7 @@ import (
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/udp"
+	"github.com/wadey/synctrace"
 )
 
 const (
@@ -44,7 +44,7 @@ type HandshakeConfig struct {
 
 type HandshakeManager struct {
 	// Mutex for interacting with the vpnIps and indexes maps
-	sync.RWMutex
+	synctrace.RWMutex
 
 	vpnIps  map[iputil.VpnIp]*HandshakeHostInfo
 	indexes map[uint32]*HandshakeHostInfo
@@ -65,7 +65,7 @@ type HandshakeManager struct {
 }
 
 type HandshakeHostInfo struct {
-	sync.Mutex
+	synctrace.Mutex
 
 	startTime   time.Time       // Time that we first started trying with this handshake
 	ready       bool            // Is the handshake ready
@@ -103,6 +103,7 @@ func (hh *HandshakeHostInfo) cachePacket(l *logrus.Logger, t header.MessageType,
 
 func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *LightHouse, outside udp.Conn, config HandshakeConfig) *HandshakeManager {
 	return &HandshakeManager{
+		RWMutex:                synctrace.NewRWMutex("handshake-manager"),
 		vpnIps:                 map[iputil.VpnIp]*HandshakeHostInfo{},
 		indexes:                map[uint32]*HandshakeHostInfo{},
 		mainHostMap:            mainHostMap,
@@ -388,6 +389,7 @@ func (hm *HandshakeManager) StartHandshake(vpnIp iputil.VpnIp, cacheCb func(*Han
 		vpnIp:           vpnIp,
 		HandshakePacket: make(map[uint8][]byte, 0),
 		relayState: RelayState{
+			RWMutex:       synctrace.NewRWMutex("relay-state"),
 			relays:        map[iputil.VpnIp]struct{}{},
 			relayForByIp:  map[iputil.VpnIp]*Relay{},
 			relayForByIdx: map[uint32]*Relay{},
@@ -395,6 +397,7 @@ func (hm *HandshakeManager) StartHandshake(vpnIp iputil.VpnIp, cacheCb func(*Han
 	}
 
 	hh := &HandshakeHostInfo{
+		Mutex:     synctrace.NewMutex("handshake-hostinfo"),
 		hostinfo:  hostinfo,
 		startTime: time.Now(),
 	}

+ 11 - 10
hostmap.go

@@ -3,7 +3,6 @@ package nebula
 import (
 	"errors"
 	"net"
-	"sync"
 	"sync/atomic"
 	"time"
 
@@ -15,6 +14,7 @@ import (
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/udp"
+	"github.com/wadey/synctrace"
 )
 
 // const ProbeLen = 100
@@ -53,21 +53,21 @@ type Relay struct {
 }
 
 type HostMap struct {
-	sync.RWMutex    //Because we concurrently read and write to our maps
-	Indexes         map[uint32]*HostInfo
-	Relays          map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
-	RemoteIndexes   map[uint32]*HostInfo
-	Hosts           map[iputil.VpnIp]*HostInfo
-	preferredRanges atomic.Pointer[[]*net.IPNet]
-	vpnCIDR         *net.IPNet
-	l               *logrus.Logger
+	synctrace.RWMutex //Because we concurrently read and write to our maps
+	Indexes           map[uint32]*HostInfo
+	Relays            map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
+	RemoteIndexes     map[uint32]*HostInfo
+	Hosts             map[iputil.VpnIp]*HostInfo
+	preferredRanges   atomic.Pointer[[]*net.IPNet]
+	vpnCIDR           *net.IPNet
+	l                 *logrus.Logger
 }
 
 // For synchronization, treat the pointed-to Relay struct as immutable. To edit the Relay
 // struct, make a copy of an existing value, edit the fileds in the copy, and
 // then store a pointer to the new copy in both realyForBy* maps.
 type RelayState struct {
-	sync.RWMutex
+	synctrace.RWMutex
 
 	relays        map[iputil.VpnIp]struct{} // Set of VpnIp's of Hosts to use as relays to access this peer
 	relayForByIp  map[iputil.VpnIp]*Relay   // Maps VpnIps of peers for which this HostInfo is a relay to some Relay info
@@ -271,6 +271,7 @@ func NewHostMapFromConfig(l *logrus.Logger, vpnCIDR *net.IPNet, c *config.C) *Ho
 
 func newHostMap(l *logrus.Logger, vpnCIDR *net.IPNet) *HostMap {
 	return &HostMap{
+		RWMutex:       synctrace.NewRWMutex("hostmap"),
 		Indexes:       map[uint32]*HostInfo{},
 		Relays:        map[uint32]*HostInfo{},
 		RemoteIndexes: map[uint32]*HostInfo{},

+ 13 - 9
lighthouse.go

@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"net"
 	"net/netip"
-	"sync"
 	"sync/atomic"
 	"time"
 
@@ -19,6 +18,7 @@ import (
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/udp"
 	"github.com/slackhq/nebula/util"
+	"github.com/wadey/synctrace"
 )
 
 //TODO: if a lighthouse doesn't have an answer, clients AGGRESSIVELY REQUERY.. why? handshake manager and/or getOrHandshake?
@@ -33,14 +33,14 @@ type netIpAndPort struct {
 
 type LightHouse struct {
 	//TODO: We need a timer wheel to kick out vpnIps that haven't reported in a long time
-	sync.RWMutex //Because we concurrently read and write to our maps
-	ctx          context.Context
-	amLighthouse bool
-	myVpnIp      iputil.VpnIp
-	myVpnZeros   iputil.VpnIp
-	myVpnNet     *net.IPNet
-	punchConn    udp.Conn
-	punchy       *Punchy
+	synctrace.RWMutex //Because we concurrently read and write to our maps
+	ctx               context.Context
+	amLighthouse      bool
+	myVpnIp           iputil.VpnIp
+	myVpnZeros        iputil.VpnIp
+	myVpnNet          *net.IPNet
+	punchConn         udp.Conn
+	punchy            *Punchy
 
 	// Local cache of answers from light houses
 	// map of vpn Ip to answers
@@ -103,6 +103,7 @@ func NewLightHouseFromConfig(ctx context.Context, l *logrus.Logger, c *config.C,
 
 	ones, _ := myVpnNet.Mask.Size()
 	h := LightHouse{
+		RWMutex:      synctrace.NewRWMutex("lighthouse"),
 		ctx:          ctx,
 		amLighthouse: amLighthouse,
 		myVpnIp:      iputil.Ip2VpnIp(myVpnNet.IP),
@@ -468,6 +469,7 @@ func (lh *LightHouse) QueryServer(ip iputil.VpnIp) {
 		return
 	}
 
+	synctrace.ChanDebugSend("lighthouse-querychan")
 	lh.queryChan <- ip
 }
 
@@ -750,9 +752,11 @@ func (lh *LightHouse) startQueryWorker() {
 		nb := make([]byte, 12, 12)
 		out := make([]byte, mtu)
 
+		synctrace.ChanDebugRecvLock("lighthouse-querychan")
 		for {
 			select {
 			case <-lh.ctx.Done():
+				synctrace.ChanDebugRecvUnlock("lighthouse-querychan")
 				return
 			case ip := <-lh.queryChan:
 				lh.innerQueryServer(ip, nb, out)