handshake_manager.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. package nebula
  2. import (
  3. "bytes"
  4. "context"
  5. "crypto/rand"
  6. "encoding/binary"
  7. "errors"
  8. "net/netip"
  9. "slices"
  10. "sync"
  11. "time"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cert"
  15. "github.com/slackhq/nebula/header"
  16. "github.com/slackhq/nebula/udp"
  17. )
  18. const (
  19. DefaultHandshakeTryInterval = time.Millisecond * 100
  20. DefaultHandshakeRetries = 10
  21. DefaultHandshakeTriggerBuffer = 64
  22. DefaultUseRelays = true
  23. )
  24. var (
  25. defaultHandshakeConfig = HandshakeConfig{
  26. tryInterval: DefaultHandshakeTryInterval,
  27. retries: DefaultHandshakeRetries,
  28. triggerBuffer: DefaultHandshakeTriggerBuffer,
  29. useRelays: DefaultUseRelays,
  30. }
  31. )
  32. type HandshakeConfig struct {
  33. tryInterval time.Duration
  34. retries int64
  35. triggerBuffer int
  36. useRelays bool
  37. messageMetrics *MessageMetrics
  38. }
  39. type HandshakeManager struct {
  40. // Mutex for interacting with the vpnIps and indexes maps
  41. sync.RWMutex
  42. vpnIps map[netip.Addr]*HandshakeHostInfo
  43. indexes map[uint32]*HandshakeHostInfo
  44. mainHostMap *HostMap
  45. lightHouse *LightHouse
  46. outside udp.Conn
  47. config HandshakeConfig
  48. OutboundHandshakeTimer *LockingTimerWheel[netip.Addr]
  49. messageMetrics *MessageMetrics
  50. metricInitiated metrics.Counter
  51. metricTimedOut metrics.Counter
  52. f *Interface
  53. l *logrus.Logger
  54. multiPort MultiPortConfig
  55. udpRaw *udp.RawConn
  56. // can be used to trigger outbound handshake for the given vpnIp
  57. trigger chan netip.Addr
  58. }
  59. type HandshakeHostInfo struct {
  60. sync.Mutex
  61. startTime time.Time // Time that we first started trying with this handshake
  62. ready bool // Is the handshake ready
  63. counter int64 // How many attempts have we made so far
  64. lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
  65. packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
  66. hostinfo *HostInfo
  67. }
  68. func (hh *HandshakeHostInfo) cachePacket(l *logrus.Logger, t header.MessageType, st header.MessageSubType, packet []byte, f packetCallback, m *cachedPacketMetrics) {
  69. if len(hh.packetStore) < 100 {
  70. tempPacket := make([]byte, len(packet))
  71. copy(tempPacket, packet)
  72. hh.packetStore = append(hh.packetStore, &cachedPacket{t, st, f, tempPacket})
  73. if l.Level >= logrus.DebugLevel {
  74. hh.hostinfo.logger(l).
  75. WithField("length", len(hh.packetStore)).
  76. WithField("stored", true).
  77. Debugf("Packet store")
  78. }
  79. } else {
  80. m.dropped.Inc(1)
  81. if l.Level >= logrus.DebugLevel {
  82. hh.hostinfo.logger(l).
  83. WithField("length", len(hh.packetStore)).
  84. WithField("stored", false).
  85. Debugf("Packet store")
  86. }
  87. }
  88. }
  89. func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *LightHouse, outside udp.Conn, config HandshakeConfig) *HandshakeManager {
  90. return &HandshakeManager{
  91. vpnIps: map[netip.Addr]*HandshakeHostInfo{},
  92. indexes: map[uint32]*HandshakeHostInfo{},
  93. mainHostMap: mainHostMap,
  94. lightHouse: lightHouse,
  95. outside: outside,
  96. config: config,
  97. trigger: make(chan netip.Addr, config.triggerBuffer),
  98. OutboundHandshakeTimer: NewLockingTimerWheel[netip.Addr](config.tryInterval, hsTimeout(config.retries, config.tryInterval)),
  99. messageMetrics: config.messageMetrics,
  100. metricInitiated: metrics.GetOrRegisterCounter("handshake_manager.initiated", nil),
  101. metricTimedOut: metrics.GetOrRegisterCounter("handshake_manager.timed_out", nil),
  102. l: l,
  103. }
  104. }
  105. func (hm *HandshakeManager) Run(ctx context.Context) {
  106. clockSource := time.NewTicker(hm.config.tryInterval)
  107. defer clockSource.Stop()
  108. for {
  109. select {
  110. case <-ctx.Done():
  111. return
  112. case vpnIP := <-hm.trigger:
  113. hm.handleOutbound(vpnIP, true)
  114. case now := <-clockSource.C:
  115. hm.NextOutboundHandshakeTimerTick(now)
  116. }
  117. }
  118. }
  119. func (hm *HandshakeManager) HandleIncoming(addr netip.AddrPort, via *ViaSender, packet []byte, h *header.H) {
  120. // First remote allow list check before we know the vpnIp
  121. if addr.IsValid() {
  122. if !hm.lightHouse.GetRemoteAllowList().AllowUnknownVpnAddr(addr.Addr()) {
  123. hm.l.WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  124. return
  125. }
  126. }
  127. switch h.Subtype {
  128. case header.HandshakeIXPSK0:
  129. switch h.MessageCounter {
  130. case 1:
  131. ixHandshakeStage1(hm.f, addr, via, packet, h)
  132. case 2:
  133. newHostinfo := hm.queryIndex(h.RemoteIndex)
  134. tearDown := ixHandshakeStage2(hm.f, addr, via, newHostinfo, packet, h)
  135. if tearDown && newHostinfo != nil {
  136. hm.DeleteHostInfo(newHostinfo.hostinfo)
  137. }
  138. }
  139. }
  140. }
  141. func (hm *HandshakeManager) NextOutboundHandshakeTimerTick(now time.Time) {
  142. hm.OutboundHandshakeTimer.Advance(now)
  143. for {
  144. vpnIp, has := hm.OutboundHandshakeTimer.Purge()
  145. if !has {
  146. break
  147. }
  148. hm.handleOutbound(vpnIp, false)
  149. }
  150. }
  151. func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered bool) {
  152. hh := hm.queryVpnIp(vpnIp)
  153. if hh == nil {
  154. return
  155. }
  156. hh.Lock()
  157. defer hh.Unlock()
  158. hostinfo := hh.hostinfo
  159. // If we are out of time, clean up
  160. if hh.counter >= hm.config.retries {
  161. hh.hostinfo.logger(hm.l).WithField("udpAddrs", hh.hostinfo.remotes.CopyAddrs(hm.mainHostMap.GetPreferredRanges())).
  162. WithField("initiatorIndex", hh.hostinfo.localIndexId).
  163. WithField("remoteIndex", hh.hostinfo.remoteIndexId).
  164. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  165. WithField("durationNs", time.Since(hh.startTime).Nanoseconds()).
  166. Info("Handshake timed out")
  167. hm.metricTimedOut.Inc(1)
  168. hm.DeleteHostInfo(hostinfo)
  169. return
  170. }
  171. // Increment the counter to increase our delay, linear backoff
  172. hh.counter++
  173. // Check if we have a handshake packet to transmit yet
  174. if !hh.ready {
  175. if !ixHandshakeStage0(hm.f, hh) {
  176. hm.OutboundHandshakeTimer.Add(vpnIp, hm.config.tryInterval*time.Duration(hh.counter))
  177. return
  178. }
  179. }
  180. // Get a remotes object if we don't already have one.
  181. // This is mainly to protect us as this should never be the case
  182. // NB ^ This comment doesn't jive. It's how the thing gets initialized.
  183. // It's the common path. Should it update every time, in case a future LH query/queries give us more info?
  184. if hostinfo.remotes == nil {
  185. hostinfo.remotes = hm.lightHouse.QueryCache([]netip.Addr{vpnIp})
  186. }
  187. remotes := hostinfo.remotes.CopyAddrs(hm.mainHostMap.GetPreferredRanges())
  188. remotesHaveChanged := !slices.Equal(remotes, hh.lastRemotes)
  189. // We only care about a lighthouse trigger if we have new remotes to send to.
  190. // This is a very specific optimization for a fast lighthouse reply.
  191. if lighthouseTriggered && !remotesHaveChanged {
  192. // If we didn't return here a lighthouse could cause us to aggressively send handshakes
  193. return
  194. }
  195. hh.lastRemotes = remotes
  196. // This will generate a load of queries for hosts with only 1 ip
  197. // (such as ones registered to the lighthouse with only a private IP)
  198. // So we only do it one time after attempting 5 handshakes already.
  199. if len(remotes) <= 1 && hh.counter == 5 {
  200. // If we only have 1 remote it is highly likely our query raced with the other host registered within the lighthouse
  201. // Our vpnIp here has a tunnel with a lighthouse but has yet to send a host update packet there so we only know about
  202. // the learned public ip for them. Query again to short circuit the promotion counter
  203. hm.lightHouse.QueryServer(vpnIp)
  204. }
  205. // Send the handshake to all known ips, stage 2 takes care of assigning the hostinfo.remote based on the first to reply
  206. var sentTo []netip.AddrPort
  207. var sentMultiport bool
  208. hostinfo.remotes.ForEach(hm.mainHostMap.GetPreferredRanges(), func(addr netip.AddrPort, _ bool) {
  209. hm.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
  210. err := hm.outside.WriteTo(hostinfo.HandshakePacket[0], addr)
  211. if err != nil {
  212. hostinfo.logger(hm.l).WithField("udpAddr", addr).
  213. WithField("initiatorIndex", hostinfo.localIndexId).
  214. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  215. WithError(err).Error("Failed to send handshake message")
  216. } else {
  217. sentTo = append(sentTo, addr)
  218. }
  219. // Attempt a multiport handshake if we are past the TxHandshakeDelay attempts
  220. if hm.multiPort.TxHandshake && hm.udpRaw != nil && hh.counter >= hm.multiPort.TxHandshakeDelay {
  221. sentMultiport = true
  222. // We need to re-allocate with 8 bytes at the start of SOCK_RAW
  223. raw := hostinfo.HandshakePacket[0x80]
  224. if raw == nil {
  225. raw = make([]byte, len(hostinfo.HandshakePacket[0])+udp.RawOverhead)
  226. copy(raw[udp.RawOverhead:], hostinfo.HandshakePacket[0])
  227. hostinfo.HandshakePacket[0x80] = raw
  228. }
  229. hm.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
  230. err = hm.udpRaw.WriteTo(raw, udp.RandomSendPort.UDPSendPort(hm.multiPort.TxPorts), addr)
  231. if err != nil {
  232. hostinfo.logger(hm.l).WithField("udpAddr", addr).
  233. WithField("initiatorIndex", hostinfo.localIndexId).
  234. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  235. WithError(err).Error("Failed to send handshake message")
  236. }
  237. }
  238. })
  239. // Don't be too noisy or confusing if we fail to send a handshake - if we don't get through we'll eventually log a timeout,
  240. // so only log when the list of remotes has changed
  241. if remotesHaveChanged {
  242. hostinfo.logger(hm.l).WithField("udpAddrs", sentTo).
  243. WithField("initiatorIndex", hostinfo.localIndexId).
  244. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  245. WithField("multiportHandshake", sentMultiport).
  246. Info("Handshake message sent")
  247. } else if hm.l.Level >= logrus.DebugLevel {
  248. hostinfo.logger(hm.l).WithField("udpAddrs", sentTo).
  249. WithField("initiatorIndex", hostinfo.localIndexId).
  250. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  251. Debug("Handshake message sent")
  252. }
  253. if hm.config.useRelays && len(hostinfo.remotes.relays) > 0 {
  254. hostinfo.logger(hm.l).WithField("relays", hostinfo.remotes.relays).Info("Attempt to relay through hosts")
  255. // Send a RelayRequest to all known Relay IP's
  256. for _, relay := range hostinfo.remotes.relays {
  257. // Don't relay to myself
  258. if relay == vpnIp {
  259. continue
  260. }
  261. // Don't relay through the host I'm trying to connect to
  262. if hm.f.myVpnAddrsTable.Contains(relay) {
  263. continue
  264. }
  265. relayHostInfo := hm.mainHostMap.QueryVpnAddr(relay)
  266. if relayHostInfo == nil || !relayHostInfo.remote.IsValid() {
  267. hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Establish tunnel to relay target")
  268. hm.f.Handshake(relay)
  269. continue
  270. }
  271. // Check the relay HostInfo to see if we already established a relay through
  272. existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp)
  273. if !ok {
  274. // No relays exist or requested yet.
  275. if relayHostInfo.remote.IsValid() {
  276. idx, err := AddRelay(hm.l, relayHostInfo, hm.mainHostMap, vpnIp, nil, TerminalType, Requested)
  277. if err != nil {
  278. hostinfo.logger(hm.l).WithField("relay", relay.String()).WithError(err).Info("Failed to add relay to hostmap")
  279. }
  280. m := NebulaControl{
  281. Type: NebulaControl_CreateRelayRequest,
  282. InitiatorRelayIndex: idx,
  283. }
  284. switch relayHostInfo.GetCert().Certificate.Version() {
  285. case cert.Version1:
  286. if !hm.f.myVpnAddrs[0].Is4() {
  287. hostinfo.logger(hm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
  288. continue
  289. }
  290. if !vpnIp.Is4() {
  291. hostinfo.logger(hm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
  292. continue
  293. }
  294. b := hm.f.myVpnAddrs[0].As4()
  295. m.OldRelayFromAddr = binary.BigEndian.Uint32(b[:])
  296. b = vpnIp.As4()
  297. m.OldRelayToAddr = binary.BigEndian.Uint32(b[:])
  298. case cert.Version2:
  299. m.RelayFromAddr = netAddrToProtoAddr(hm.f.myVpnAddrs[0])
  300. m.RelayToAddr = netAddrToProtoAddr(vpnIp)
  301. default:
  302. hostinfo.logger(hm.l).Error("Unknown certificate version found while creating relay")
  303. continue
  304. }
  305. msg, err := m.Marshal()
  306. if err != nil {
  307. hostinfo.logger(hm.l).
  308. WithError(err).
  309. Error("Failed to marshal Control message to create relay")
  310. } else {
  311. hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
  312. hm.l.WithFields(logrus.Fields{
  313. "relayFrom": hm.f.myVpnAddrs[0],
  314. "relayTo": vpnIp,
  315. "initiatorRelayIndex": idx,
  316. "relay": relay}).
  317. Info("send CreateRelayRequest")
  318. }
  319. }
  320. continue
  321. }
  322. switch existingRelay.State {
  323. case Established:
  324. hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Send handshake via relay")
  325. hm.f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
  326. case Disestablished:
  327. // Mark this relay as 'requested'
  328. relayHostInfo.relayState.UpdateRelayForByIpState(vpnIp, Requested)
  329. fallthrough
  330. case Requested:
  331. hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
  332. // Re-send the CreateRelay request, in case the previous one was lost.
  333. m := NebulaControl{
  334. Type: NebulaControl_CreateRelayRequest,
  335. InitiatorRelayIndex: existingRelay.LocalIndex,
  336. }
  337. switch relayHostInfo.GetCert().Certificate.Version() {
  338. case cert.Version1:
  339. if !hm.f.myVpnAddrs[0].Is4() {
  340. hostinfo.logger(hm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
  341. continue
  342. }
  343. if !vpnIp.Is4() {
  344. hostinfo.logger(hm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
  345. continue
  346. }
  347. b := hm.f.myVpnAddrs[0].As4()
  348. m.OldRelayFromAddr = binary.BigEndian.Uint32(b[:])
  349. b = vpnIp.As4()
  350. m.OldRelayToAddr = binary.BigEndian.Uint32(b[:])
  351. case cert.Version2:
  352. m.RelayFromAddr = netAddrToProtoAddr(hm.f.myVpnAddrs[0])
  353. m.RelayToAddr = netAddrToProtoAddr(vpnIp)
  354. default:
  355. hostinfo.logger(hm.l).Error("Unknown certificate version found while creating relay")
  356. continue
  357. }
  358. msg, err := m.Marshal()
  359. if err != nil {
  360. hostinfo.logger(hm.l).
  361. WithError(err).
  362. Error("Failed to marshal Control message to create relay")
  363. } else {
  364. // This must send over the hostinfo, not over hm.Hosts[ip]
  365. hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
  366. hm.l.WithFields(logrus.Fields{
  367. "relayFrom": hm.f.myVpnAddrs[0],
  368. "relayTo": vpnIp,
  369. "initiatorRelayIndex": existingRelay.LocalIndex,
  370. "relay": relay}).
  371. Info("send CreateRelayRequest")
  372. }
  373. case PeerRequested:
  374. // PeerRequested only occurs in Forwarding relays, not Terminal relays, and this is a Terminal relay case.
  375. fallthrough
  376. default:
  377. hostinfo.logger(hm.l).
  378. WithField("vpnIp", vpnIp).
  379. WithField("state", existingRelay.State).
  380. WithField("relay", relay).
  381. Errorf("Relay unexpected state")
  382. }
  383. }
  384. }
  385. // If a lighthouse triggered this attempt then we are still in the timer wheel and do not need to re-add
  386. if !lighthouseTriggered {
  387. hm.OutboundHandshakeTimer.Add(vpnIp, hm.config.tryInterval*time.Duration(hh.counter))
  388. }
  389. }
  390. // GetOrHandshake will try to find a hostinfo with a fully formed tunnel or start a new handshake if one is not present
  391. // The 2nd argument will be true if the hostinfo is ready to transmit traffic
  392. func (hm *HandshakeManager) GetOrHandshake(vpnIp netip.Addr, cacheCb func(*HandshakeHostInfo)) (*HostInfo, bool) {
  393. hm.mainHostMap.RLock()
  394. h, ok := hm.mainHostMap.Hosts[vpnIp]
  395. hm.mainHostMap.RUnlock()
  396. if ok {
  397. // Do not attempt promotion if you are a lighthouse
  398. if !hm.lightHouse.amLighthouse {
  399. h.TryPromoteBest(hm.mainHostMap.GetPreferredRanges(), hm.f)
  400. }
  401. return h, true
  402. }
  403. return hm.StartHandshake(vpnIp, cacheCb), false
  404. }
  405. // StartHandshake will ensure a handshake is currently being attempted for the provided vpn ip
  406. func (hm *HandshakeManager) StartHandshake(vpnAddr netip.Addr, cacheCb func(*HandshakeHostInfo)) *HostInfo {
  407. hm.Lock()
  408. if hh, ok := hm.vpnIps[vpnAddr]; ok {
  409. // We are already trying to handshake with this vpn ip
  410. if cacheCb != nil {
  411. cacheCb(hh)
  412. }
  413. hm.Unlock()
  414. return hh.hostinfo
  415. }
  416. hostinfo := &HostInfo{
  417. vpnAddrs: []netip.Addr{vpnAddr},
  418. HandshakePacket: make(map[uint8][]byte, 0),
  419. relayState: RelayState{
  420. relays: nil,
  421. relayForByAddr: map[netip.Addr]*Relay{},
  422. relayForByIdx: map[uint32]*Relay{},
  423. },
  424. }
  425. hh := &HandshakeHostInfo{
  426. hostinfo: hostinfo,
  427. startTime: time.Now(),
  428. }
  429. hm.vpnIps[vpnAddr] = hh
  430. hm.metricInitiated.Inc(1)
  431. hm.OutboundHandshakeTimer.Add(vpnAddr, hm.config.tryInterval)
  432. if cacheCb != nil {
  433. cacheCb(hh)
  434. }
  435. // If this is a static host, we don't need to wait for the HostQueryReply
  436. // We can trigger the handshake right now
  437. _, doTrigger := hm.lightHouse.GetStaticHostList()[vpnAddr]
  438. if !doTrigger {
  439. // Add any calculated remotes, and trigger early handshake if one found
  440. doTrigger = hm.lightHouse.addCalculatedRemotes(vpnAddr)
  441. }
  442. if doTrigger {
  443. select {
  444. case hm.trigger <- vpnAddr:
  445. default:
  446. }
  447. }
  448. hm.Unlock()
  449. hm.lightHouse.QueryServer(vpnAddr)
  450. return hostinfo
  451. }
  452. var (
  453. ErrExistingHostInfo = errors.New("existing hostinfo")
  454. ErrAlreadySeen = errors.New("already seen")
  455. ErrLocalIndexCollision = errors.New("local index collision")
  456. )
  457. // CheckAndComplete checks for any conflicts in the main and pending hostmap
  458. // before adding hostinfo to main. If err is nil, it was added. Otherwise err will be:
  459. //
  460. // ErrAlreadySeen if we already have an entry in the hostmap that has seen the
  461. // exact same handshake packet
  462. //
  463. // ErrExistingHostInfo if we already have an entry in the hostmap for this
  464. // VpnIp and the new handshake was older than the one we currently have
  465. //
  466. // ErrLocalIndexCollision if we already have an entry in the main or pending
  467. // hostmap for the hostinfo.localIndexId.
  468. func (hm *HandshakeManager) CheckAndComplete(hostinfo *HostInfo, handshakePacket uint8, f *Interface) (*HostInfo, error) {
  469. hm.mainHostMap.Lock()
  470. defer hm.mainHostMap.Unlock()
  471. hm.Lock()
  472. defer hm.Unlock()
  473. // Check if we already have a tunnel with this vpn ip
  474. existingHostInfo, found := hm.mainHostMap.Hosts[hostinfo.vpnAddrs[0]]
  475. if found && existingHostInfo != nil {
  476. testHostInfo := existingHostInfo
  477. for testHostInfo != nil {
  478. // Is it just a delayed handshake packet?
  479. if bytes.Equal(hostinfo.HandshakePacket[handshakePacket], testHostInfo.HandshakePacket[handshakePacket]) {
  480. return testHostInfo, ErrAlreadySeen
  481. }
  482. testHostInfo = testHostInfo.next
  483. }
  484. // Is this a newer handshake?
  485. if existingHostInfo.lastHandshakeTime >= hostinfo.lastHandshakeTime && !existingHostInfo.ConnectionState.initiator {
  486. return existingHostInfo, ErrExistingHostInfo
  487. }
  488. existingHostInfo.logger(hm.l).Info("Taking new handshake")
  489. }
  490. existingIndex, found := hm.mainHostMap.Indexes[hostinfo.localIndexId]
  491. if found {
  492. // We have a collision, but for a different hostinfo
  493. return existingIndex, ErrLocalIndexCollision
  494. }
  495. existingPendingIndex, found := hm.indexes[hostinfo.localIndexId]
  496. if found && existingPendingIndex.hostinfo != hostinfo {
  497. // We have a collision, but for a different hostinfo
  498. return existingPendingIndex.hostinfo, ErrLocalIndexCollision
  499. }
  500. existingRemoteIndex, found := hm.mainHostMap.RemoteIndexes[hostinfo.remoteIndexId]
  501. if found && existingRemoteIndex != nil && existingRemoteIndex.vpnAddrs[0] != hostinfo.vpnAddrs[0] {
  502. // We have a collision, but this can happen since we can't control
  503. // the remote ID. Just log about the situation as a note.
  504. hostinfo.logger(hm.l).
  505. WithField("remoteIndex", hostinfo.remoteIndexId).WithField("collision", existingRemoteIndex.vpnAddrs).
  506. Info("New host shadows existing host remoteIndex")
  507. }
  508. hm.mainHostMap.unlockedAddHostInfo(hostinfo, f)
  509. return existingHostInfo, nil
  510. }
  511. // Complete is a simpler version of CheckAndComplete when we already know we
  512. // won't have a localIndexId collision because we already have an entry in the
  513. // pendingHostMap. An existing hostinfo is returned if there was one.
  514. func (hm *HandshakeManager) Complete(hostinfo *HostInfo, f *Interface) {
  515. hm.mainHostMap.Lock()
  516. defer hm.mainHostMap.Unlock()
  517. hm.Lock()
  518. defer hm.Unlock()
  519. existingRemoteIndex, found := hm.mainHostMap.RemoteIndexes[hostinfo.remoteIndexId]
  520. if found && existingRemoteIndex != nil {
  521. // We have a collision, but this can happen since we can't control
  522. // the remote ID. Just log about the situation as a note.
  523. hostinfo.logger(hm.l).
  524. WithField("remoteIndex", hostinfo.remoteIndexId).WithField("collision", existingRemoteIndex.vpnAddrs).
  525. Info("New host shadows existing host remoteIndex")
  526. }
  527. // We need to remove from the pending hostmap first to avoid undoing work when after to the main hostmap.
  528. hm.unlockedDeleteHostInfo(hostinfo)
  529. hm.mainHostMap.unlockedAddHostInfo(hostinfo, f)
  530. }
  531. // allocateIndex generates a unique localIndexId for this HostInfo
  532. // and adds it to the pendingHostMap. Will error if we are unable to generate
  533. // a unique localIndexId
  534. func (hm *HandshakeManager) allocateIndex(hh *HandshakeHostInfo) error {
  535. hm.mainHostMap.RLock()
  536. defer hm.mainHostMap.RUnlock()
  537. hm.Lock()
  538. defer hm.Unlock()
  539. for i := 0; i < 32; i++ {
  540. index, err := generateIndex(hm.l)
  541. if err != nil {
  542. return err
  543. }
  544. _, inPending := hm.indexes[index]
  545. _, inMain := hm.mainHostMap.Indexes[index]
  546. if !inMain && !inPending {
  547. hh.hostinfo.localIndexId = index
  548. hm.indexes[index] = hh
  549. return nil
  550. }
  551. }
  552. return errors.New("failed to generate unique localIndexId")
  553. }
  554. func (hm *HandshakeManager) DeleteHostInfo(hostinfo *HostInfo) {
  555. hm.Lock()
  556. defer hm.Unlock()
  557. hm.unlockedDeleteHostInfo(hostinfo)
  558. }
  559. func (hm *HandshakeManager) unlockedDeleteHostInfo(hostinfo *HostInfo) {
  560. for _, addr := range hostinfo.vpnAddrs {
  561. delete(hm.vpnIps, addr)
  562. }
  563. if len(hm.vpnIps) == 0 {
  564. hm.vpnIps = map[netip.Addr]*HandshakeHostInfo{}
  565. }
  566. delete(hm.indexes, hostinfo.localIndexId)
  567. if len(hm.indexes) == 0 {
  568. hm.indexes = map[uint32]*HandshakeHostInfo{}
  569. }
  570. if hm.l.Level >= logrus.DebugLevel {
  571. hm.l.WithField("hostMap", m{"mapTotalSize": len(hm.vpnIps),
  572. "vpnAddrs": hostinfo.vpnAddrs, "indexNumber": hostinfo.localIndexId, "remoteIndexNumber": hostinfo.remoteIndexId}).
  573. Debug("Pending hostmap hostInfo deleted")
  574. }
  575. }
  576. func (hm *HandshakeManager) QueryVpnAddr(vpnIp netip.Addr) *HostInfo {
  577. hh := hm.queryVpnIp(vpnIp)
  578. if hh != nil {
  579. return hh.hostinfo
  580. }
  581. return nil
  582. }
  583. func (hm *HandshakeManager) queryVpnIp(vpnIp netip.Addr) *HandshakeHostInfo {
  584. hm.RLock()
  585. defer hm.RUnlock()
  586. return hm.vpnIps[vpnIp]
  587. }
  588. func (hm *HandshakeManager) QueryIndex(index uint32) *HostInfo {
  589. hh := hm.queryIndex(index)
  590. if hh != nil {
  591. return hh.hostinfo
  592. }
  593. return nil
  594. }
  595. func (hm *HandshakeManager) queryIndex(index uint32) *HandshakeHostInfo {
  596. hm.RLock()
  597. defer hm.RUnlock()
  598. return hm.indexes[index]
  599. }
  600. func (hm *HandshakeManager) GetPreferredRanges() []netip.Prefix {
  601. return hm.mainHostMap.GetPreferredRanges()
  602. }
  603. func (hm *HandshakeManager) ForEachVpnAddr(f controlEach) {
  604. hm.RLock()
  605. defer hm.RUnlock()
  606. for _, v := range hm.vpnIps {
  607. f(v.hostinfo)
  608. }
  609. }
  610. func (hm *HandshakeManager) ForEachIndex(f controlEach) {
  611. hm.RLock()
  612. defer hm.RUnlock()
  613. for _, v := range hm.indexes {
  614. f(v.hostinfo)
  615. }
  616. }
  617. func (hm *HandshakeManager) EmitStats() {
  618. hm.RLock()
  619. hostLen := len(hm.vpnIps)
  620. indexLen := len(hm.indexes)
  621. hm.RUnlock()
  622. metrics.GetOrRegisterGauge("hostmap.pending.hosts", nil).Update(int64(hostLen))
  623. metrics.GetOrRegisterGauge("hostmap.pending.indexes", nil).Update(int64(indexLen))
  624. hm.mainHostMap.EmitStats()
  625. }
  626. // Utility functions below
  627. func generateIndex(l *logrus.Logger) (uint32, error) {
  628. b := make([]byte, 4)
  629. // Let zero mean we don't know the ID, so don't generate zero
  630. var index uint32
  631. for index == 0 {
  632. _, err := rand.Read(b)
  633. if err != nil {
  634. l.Errorln(err)
  635. return 0, err
  636. }
  637. index = binary.BigEndian.Uint32(b)
  638. }
  639. if l.Level >= logrus.DebugLevel {
  640. l.WithField("index", index).
  641. Debug("Generated index")
  642. }
  643. return index, nil
  644. }
  645. func hsTimeout(tries int64, interval time.Duration) time.Duration {
  646. return time.Duration(tries / 2 * ((2 * int64(interval)) + (tries-1)*int64(interval)))
  647. }