2
0

hostmap.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "sync"
  8. "sync/atomic"
  9. "time"
  10. "github.com/rcrowley/go-metrics"
  11. "github.com/sirupsen/logrus"
  12. "github.com/slackhq/nebula/cert"
  13. "github.com/slackhq/nebula/cidr"
  14. "github.com/slackhq/nebula/header"
  15. "github.com/slackhq/nebula/iputil"
  16. "github.com/slackhq/nebula/udp"
  17. )
  18. // const ProbeLen = 100
  19. const PromoteEvery = 1000
  20. const ReQueryEvery = 5000
  21. const MaxRemotes = 10
  22. // How long we should prevent roaming back to the previous IP.
  23. // This helps prevent flapping due to packets already in flight
  24. const RoamingSuppressSeconds = 2
  25. const (
  26. Requested = iota
  27. Established
  28. )
  29. const (
  30. Unknowntype = iota
  31. ForwardingType
  32. TerminalType
  33. )
  34. type Relay struct {
  35. Type int
  36. State int
  37. LocalIndex uint32
  38. RemoteIndex uint32
  39. PeerIp iputil.VpnIp
  40. }
  41. type HostMap struct {
  42. sync.RWMutex //Because we concurrently read and write to our maps
  43. name string
  44. Indexes map[uint32]*HostInfo
  45. Relays map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
  46. RemoteIndexes map[uint32]*HostInfo
  47. Hosts map[iputil.VpnIp]*HostInfo
  48. preferredRanges []*net.IPNet
  49. vpnCIDR *net.IPNet
  50. metricsEnabled bool
  51. l *logrus.Logger
  52. }
  53. type RelayState struct {
  54. sync.RWMutex
  55. relays map[iputil.VpnIp]struct{} // Set of VpnIp's of Hosts to use as relays to access this peer
  56. relayForByIp map[iputil.VpnIp]*Relay // Maps VpnIps of peers for which this HostInfo is a relay to some Relay info
  57. relayForByIdx map[uint32]*Relay // Maps a local index to some Relay info
  58. }
  59. func (rs *RelayState) DeleteRelay(ip iputil.VpnIp) {
  60. rs.Lock()
  61. defer rs.Unlock()
  62. delete(rs.relays, ip)
  63. }
  64. func (rs *RelayState) GetRelayForByIp(ip iputil.VpnIp) (*Relay, bool) {
  65. rs.RLock()
  66. defer rs.RUnlock()
  67. r, ok := rs.relayForByIp[ip]
  68. return r, ok
  69. }
  70. func (rs *RelayState) InsertRelayTo(ip iputil.VpnIp) {
  71. rs.Lock()
  72. defer rs.Unlock()
  73. rs.relays[ip] = struct{}{}
  74. }
  75. func (rs *RelayState) CopyRelayIps() []iputil.VpnIp {
  76. rs.RLock()
  77. defer rs.RUnlock()
  78. ret := make([]iputil.VpnIp, 0, len(rs.relays))
  79. for ip := range rs.relays {
  80. ret = append(ret, ip)
  81. }
  82. return ret
  83. }
  84. func (rs *RelayState) CopyRelayForIps() []iputil.VpnIp {
  85. rs.RLock()
  86. defer rs.RUnlock()
  87. currentRelays := make([]iputil.VpnIp, 0, len(rs.relayForByIp))
  88. for relayIp := range rs.relayForByIp {
  89. currentRelays = append(currentRelays, relayIp)
  90. }
  91. return currentRelays
  92. }
  93. func (rs *RelayState) CopyRelayForIdxs() []uint32 {
  94. rs.RLock()
  95. defer rs.RUnlock()
  96. ret := make([]uint32, 0, len(rs.relayForByIdx))
  97. for i := range rs.relayForByIdx {
  98. ret = append(ret, i)
  99. }
  100. return ret
  101. }
  102. func (rs *RelayState) RemoveRelay(localIdx uint32) (iputil.VpnIp, bool) {
  103. rs.Lock()
  104. defer rs.Unlock()
  105. relay, ok := rs.relayForByIdx[localIdx]
  106. if !ok {
  107. return iputil.VpnIp(0), false
  108. }
  109. delete(rs.relayForByIdx, localIdx)
  110. delete(rs.relayForByIp, relay.PeerIp)
  111. return relay.PeerIp, true
  112. }
  113. func (rs *RelayState) QueryRelayForByIp(vpnIp iputil.VpnIp) (*Relay, bool) {
  114. rs.RLock()
  115. defer rs.RUnlock()
  116. r, ok := rs.relayForByIp[vpnIp]
  117. return r, ok
  118. }
  119. func (rs *RelayState) QueryRelayForByIdx(idx uint32) (*Relay, bool) {
  120. rs.RLock()
  121. defer rs.RUnlock()
  122. r, ok := rs.relayForByIdx[idx]
  123. return r, ok
  124. }
  125. func (rs *RelayState) InsertRelay(ip iputil.VpnIp, idx uint32, r *Relay) {
  126. rs.Lock()
  127. defer rs.Unlock()
  128. rs.relayForByIp[ip] = r
  129. rs.relayForByIdx[idx] = r
  130. }
  131. type HostInfo struct {
  132. sync.RWMutex
  133. remote *udp.Addr
  134. remotes *RemoteList
  135. promoteCounter atomic.Uint32
  136. ConnectionState *ConnectionState
  137. handshakeStart time.Time //todo: this an entry in the handshake manager
  138. HandshakeReady bool //todo: being in the manager means you are ready
  139. HandshakeCounter int //todo: another handshake manager entry
  140. HandshakeComplete bool //todo: this should go away in favor of ConnectionState.ready
  141. HandshakePacket map[uint8][]byte //todo: this is other handshake manager entry
  142. packetStore []*cachedPacket //todo: this is other handshake manager entry
  143. remoteIndexId uint32
  144. localIndexId uint32
  145. vpnIp iputil.VpnIp
  146. recvError int
  147. remoteCidr *cidr.Tree4
  148. relayState RelayState
  149. // lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH
  150. // for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like
  151. // with a handshake
  152. lastRebindCount int8
  153. // lastHandshakeTime records the time the remote side told us about at the stage when the handshake was completed locally
  154. // Stage 1 packet will contain it if I am a responder, stage 2 packet if I am an initiator
  155. // This is used to avoid an attack where a handshake packet is replayed after some time
  156. lastHandshakeTime uint64
  157. lastRoam time.Time
  158. lastRoamRemote *udp.Addr
  159. }
  160. type ViaSender struct {
  161. relayHI *HostInfo // relayHI is the host info object of the relay
  162. remoteIdx uint32 // remoteIdx is the index included in the header of the received packet
  163. relay *Relay // relay contains the rest of the relay information, including the PeerIP of the host trying to communicate with us.
  164. }
  165. type cachedPacket struct {
  166. messageType header.MessageType
  167. messageSubType header.MessageSubType
  168. callback packetCallback
  169. packet []byte
  170. }
  171. type packetCallback func(t header.MessageType, st header.MessageSubType, h *HostInfo, p, nb, out []byte)
  172. type cachedPacketMetrics struct {
  173. sent metrics.Counter
  174. dropped metrics.Counter
  175. }
  176. func NewHostMap(l *logrus.Logger, name string, vpnCIDR *net.IPNet, preferredRanges []*net.IPNet) *HostMap {
  177. h := map[iputil.VpnIp]*HostInfo{}
  178. i := map[uint32]*HostInfo{}
  179. r := map[uint32]*HostInfo{}
  180. relays := map[uint32]*HostInfo{}
  181. m := HostMap{
  182. name: name,
  183. Indexes: i,
  184. Relays: relays,
  185. RemoteIndexes: r,
  186. Hosts: h,
  187. preferredRanges: preferredRanges,
  188. vpnCIDR: vpnCIDR,
  189. l: l,
  190. }
  191. return &m
  192. }
  193. // UpdateStats takes a name and reports host and index counts to the stats collection system
  194. func (hm *HostMap) EmitStats(name string) {
  195. hm.RLock()
  196. hostLen := len(hm.Hosts)
  197. indexLen := len(hm.Indexes)
  198. remoteIndexLen := len(hm.RemoteIndexes)
  199. relaysLen := len(hm.Relays)
  200. hm.RUnlock()
  201. metrics.GetOrRegisterGauge("hostmap."+name+".hosts", nil).Update(int64(hostLen))
  202. metrics.GetOrRegisterGauge("hostmap."+name+".indexes", nil).Update(int64(indexLen))
  203. metrics.GetOrRegisterGauge("hostmap."+name+".remoteIndexes", nil).Update(int64(remoteIndexLen))
  204. metrics.GetOrRegisterGauge("hostmap."+name+".relayIndexes", nil).Update(int64(relaysLen))
  205. }
  206. func (hm *HostMap) RemoveRelay(localIdx uint32) {
  207. hm.Lock()
  208. hiRelay, ok := hm.Relays[localIdx]
  209. if !ok {
  210. hm.Unlock()
  211. return
  212. }
  213. delete(hm.Relays, localIdx)
  214. hm.Unlock()
  215. ip, ok := hiRelay.relayState.RemoveRelay(localIdx)
  216. if !ok {
  217. return
  218. }
  219. hiPeer, err := hm.QueryVpnIp(ip)
  220. if err != nil {
  221. return
  222. }
  223. var otherPeerIdx uint32
  224. hiPeer.relayState.DeleteRelay(hiRelay.vpnIp)
  225. relay, ok := hiPeer.relayState.GetRelayForByIp(hiRelay.vpnIp)
  226. if ok {
  227. otherPeerIdx = relay.LocalIndex
  228. }
  229. // I am a relaying host. I need to remove the other relay, too.
  230. hm.RemoveRelay(otherPeerIdx)
  231. }
  232. func (hm *HostMap) GetIndexByVpnIp(vpnIp iputil.VpnIp) (uint32, error) {
  233. hm.RLock()
  234. if i, ok := hm.Hosts[vpnIp]; ok {
  235. index := i.localIndexId
  236. hm.RUnlock()
  237. return index, nil
  238. }
  239. hm.RUnlock()
  240. return 0, errors.New("vpn IP not found")
  241. }
  242. func (hm *HostMap) Add(ip iputil.VpnIp, hostinfo *HostInfo) {
  243. hm.Lock()
  244. hm.Hosts[ip] = hostinfo
  245. hm.Unlock()
  246. }
  247. func (hm *HostMap) AddVpnIp(vpnIp iputil.VpnIp, init func(hostinfo *HostInfo)) (hostinfo *HostInfo, created bool) {
  248. hm.RLock()
  249. if h, ok := hm.Hosts[vpnIp]; !ok {
  250. hm.RUnlock()
  251. h = &HostInfo{
  252. vpnIp: vpnIp,
  253. HandshakePacket: make(map[uint8][]byte, 0),
  254. relayState: RelayState{
  255. relays: map[iputil.VpnIp]struct{}{},
  256. relayForByIp: map[iputil.VpnIp]*Relay{},
  257. relayForByIdx: map[uint32]*Relay{},
  258. },
  259. }
  260. if init != nil {
  261. init(h)
  262. }
  263. hm.Lock()
  264. hm.Hosts[vpnIp] = h
  265. hm.Unlock()
  266. return h, true
  267. } else {
  268. hm.RUnlock()
  269. return h, false
  270. }
  271. }
  272. func (hm *HostMap) DeleteVpnIp(vpnIp iputil.VpnIp) {
  273. hm.Lock()
  274. delete(hm.Hosts, vpnIp)
  275. if len(hm.Hosts) == 0 {
  276. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  277. }
  278. hm.Unlock()
  279. if hm.l.Level >= logrus.DebugLevel {
  280. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts)}).
  281. Debug("Hostmap vpnIp deleted")
  282. }
  283. }
  284. // Only used by pendingHostMap when the remote index is not initially known
  285. func (hm *HostMap) addRemoteIndexHostInfo(index uint32, h *HostInfo) {
  286. hm.Lock()
  287. h.remoteIndexId = index
  288. hm.RemoteIndexes[index] = h
  289. hm.Unlock()
  290. if hm.l.Level > logrus.DebugLevel {
  291. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes),
  292. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "hostId": h.vpnIp}}).
  293. Debug("Hostmap remoteIndex added")
  294. }
  295. }
  296. func (hm *HostMap) AddVpnIpHostInfo(vpnIp iputil.VpnIp, h *HostInfo) {
  297. hm.Lock()
  298. h.vpnIp = vpnIp
  299. hm.Hosts[vpnIp] = h
  300. hm.Indexes[h.localIndexId] = h
  301. hm.RemoteIndexes[h.remoteIndexId] = h
  302. hm.Unlock()
  303. if hm.l.Level > logrus.DebugLevel {
  304. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts),
  305. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "vpnIp": h.vpnIp}}).
  306. Debug("Hostmap vpnIp added")
  307. }
  308. }
  309. // This is only called in pendingHostmap, to cleanup an inbound handshake
  310. func (hm *HostMap) DeleteIndex(index uint32) {
  311. hm.Lock()
  312. hostinfo, ok := hm.Indexes[index]
  313. if ok {
  314. delete(hm.Indexes, index)
  315. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  316. // Check if we have an entry under hostId that matches the same hostinfo
  317. // instance. Clean it up as well if we do.
  318. hostinfo2, ok := hm.Hosts[hostinfo.vpnIp]
  319. if ok && hostinfo2 == hostinfo {
  320. delete(hm.Hosts, hostinfo.vpnIp)
  321. }
  322. }
  323. hm.Unlock()
  324. if hm.l.Level >= logrus.DebugLevel {
  325. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  326. Debug("Hostmap index deleted")
  327. }
  328. }
  329. // This is used to cleanup on recv_error
  330. func (hm *HostMap) DeleteReverseIndex(index uint32) {
  331. hm.Lock()
  332. hostinfo, ok := hm.RemoteIndexes[index]
  333. if ok {
  334. delete(hm.Indexes, hostinfo.localIndexId)
  335. delete(hm.RemoteIndexes, index)
  336. // Check if we have an entry under hostId that matches the same hostinfo
  337. // instance. Clean it up as well if we do (they might not match in pendingHostmap)
  338. var hostinfo2 *HostInfo
  339. hostinfo2, ok = hm.Hosts[hostinfo.vpnIp]
  340. if ok && hostinfo2 == hostinfo {
  341. delete(hm.Hosts, hostinfo.vpnIp)
  342. }
  343. }
  344. hm.Unlock()
  345. if hm.l.Level >= logrus.DebugLevel {
  346. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  347. Debug("Hostmap remote index deleted")
  348. }
  349. }
  350. func (hm *HostMap) DeleteHostInfo(hostinfo *HostInfo) {
  351. // Delete the host itself, ensuring it's not modified anymore
  352. hm.Lock()
  353. hm.unlockedDeleteHostInfo(hostinfo)
  354. hm.Unlock()
  355. // And tear down all the relays going through this host
  356. for _, localIdx := range hostinfo.relayState.CopyRelayForIdxs() {
  357. hm.RemoveRelay(localIdx)
  358. }
  359. // And tear down the relays this deleted hostInfo was using to be reached
  360. teardownRelayIdx := []uint32{}
  361. for _, relayIp := range hostinfo.relayState.CopyRelayIps() {
  362. relayHostInfo, err := hm.QueryVpnIp(relayIp)
  363. if err != nil {
  364. hm.l.WithError(err).WithField("relay", relayIp).Info("Missing relay host in hostmap")
  365. } else {
  366. if r, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp); ok {
  367. teardownRelayIdx = append(teardownRelayIdx, r.LocalIndex)
  368. }
  369. }
  370. }
  371. for _, localIdx := range teardownRelayIdx {
  372. hm.RemoveRelay(localIdx)
  373. }
  374. }
  375. func (hm *HostMap) DeleteRelayIdx(localIdx uint32) {
  376. hm.Lock()
  377. defer hm.Unlock()
  378. delete(hm.RemoteIndexes, localIdx)
  379. }
  380. func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
  381. // Check if this same hostId is in the hostmap with a different instance.
  382. // This could happen if we have an entry in the pending hostmap with different
  383. // index values than the one in the main hostmap.
  384. hostinfo2, ok := hm.Hosts[hostinfo.vpnIp]
  385. if ok && hostinfo2 != hostinfo {
  386. delete(hm.Hosts, hostinfo2.vpnIp)
  387. delete(hm.Indexes, hostinfo2.localIndexId)
  388. delete(hm.RemoteIndexes, hostinfo2.remoteIndexId)
  389. }
  390. delete(hm.Hosts, hostinfo.vpnIp)
  391. if len(hm.Hosts) == 0 {
  392. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  393. }
  394. delete(hm.Indexes, hostinfo.localIndexId)
  395. if len(hm.Indexes) == 0 {
  396. hm.Indexes = map[uint32]*HostInfo{}
  397. }
  398. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  399. if len(hm.RemoteIndexes) == 0 {
  400. hm.RemoteIndexes = map[uint32]*HostInfo{}
  401. }
  402. if hm.l.Level >= logrus.DebugLevel {
  403. hm.l.WithField("hostMap", m{"mapName": hm.name, "mapTotalSize": len(hm.Hosts),
  404. "vpnIp": hostinfo.vpnIp, "indexNumber": hostinfo.localIndexId, "remoteIndexNumber": hostinfo.remoteIndexId}).
  405. Debug("Hostmap hostInfo deleted")
  406. }
  407. }
  408. func (hm *HostMap) QueryIndex(index uint32) (*HostInfo, error) {
  409. //TODO: we probably just want to return bool instead of error, or at least a static error
  410. hm.RLock()
  411. if h, ok := hm.Indexes[index]; ok {
  412. hm.RUnlock()
  413. return h, nil
  414. } else {
  415. hm.RUnlock()
  416. return nil, errors.New("unable to find index")
  417. }
  418. }
  419. func (hm *HostMap) QueryRelayIndex(index uint32) (*HostInfo, error) {
  420. //TODO: we probably just want to return bool instead of error, or at least a static error
  421. hm.RLock()
  422. if h, ok := hm.Relays[index]; ok {
  423. hm.RUnlock()
  424. return h, nil
  425. } else {
  426. hm.RUnlock()
  427. return nil, errors.New("unable to find index")
  428. }
  429. }
  430. func (hm *HostMap) QueryReverseIndex(index uint32) (*HostInfo, error) {
  431. hm.RLock()
  432. if h, ok := hm.RemoteIndexes[index]; ok {
  433. hm.RUnlock()
  434. return h, nil
  435. } else {
  436. hm.RUnlock()
  437. return nil, fmt.Errorf("unable to find reverse index or connectionstate nil in %s hostmap", hm.name)
  438. }
  439. }
  440. func (hm *HostMap) QueryVpnIp(vpnIp iputil.VpnIp) (*HostInfo, error) {
  441. return hm.queryVpnIp(vpnIp, nil)
  442. }
  443. // PromoteBestQueryVpnIp will attempt to lazily switch to the best remote every
  444. // `PromoteEvery` calls to this function for a given host.
  445. func (hm *HostMap) PromoteBestQueryVpnIp(vpnIp iputil.VpnIp, ifce *Interface) (*HostInfo, error) {
  446. return hm.queryVpnIp(vpnIp, ifce)
  447. }
  448. func (hm *HostMap) queryVpnIp(vpnIp iputil.VpnIp, promoteIfce *Interface) (*HostInfo, error) {
  449. hm.RLock()
  450. if h, ok := hm.Hosts[vpnIp]; ok {
  451. hm.RUnlock()
  452. // Do not attempt promotion if you are a lighthouse
  453. if promoteIfce != nil && !promoteIfce.lightHouse.amLighthouse {
  454. h.TryPromoteBest(hm.preferredRanges, promoteIfce)
  455. }
  456. return h, nil
  457. }
  458. hm.RUnlock()
  459. return nil, errors.New("unable to find host")
  460. }
  461. // We already have the hm Lock when this is called, so make sure to not call
  462. // any other methods that might try to grab it again
  463. func (hm *HostMap) addHostInfo(hostinfo *HostInfo, f *Interface) {
  464. if f.serveDns {
  465. remoteCert := hostinfo.ConnectionState.peerCert
  466. dnsR.Add(remoteCert.Details.Name+".", remoteCert.Details.Ips[0].IP.String())
  467. }
  468. hm.Hosts[hostinfo.vpnIp] = hostinfo
  469. hm.Indexes[hostinfo.localIndexId] = hostinfo
  470. hm.RemoteIndexes[hostinfo.remoteIndexId] = hostinfo
  471. if hm.l.Level >= logrus.DebugLevel {
  472. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": hostinfo.vpnIp, "mapTotalSize": len(hm.Hosts),
  473. "hostinfo": m{"existing": true, "localIndexId": hostinfo.localIndexId, "hostId": hostinfo.vpnIp}}).
  474. Debug("Hostmap vpnIp added")
  475. }
  476. }
  477. // punchList assembles a list of all non nil RemoteList pointer entries in this hostmap
  478. // The caller can then do the its work outside of the read lock
  479. func (hm *HostMap) punchList(rl []*RemoteList) []*RemoteList {
  480. hm.RLock()
  481. defer hm.RUnlock()
  482. for _, v := range hm.Hosts {
  483. if v.remotes != nil {
  484. rl = append(rl, v.remotes)
  485. }
  486. }
  487. return rl
  488. }
  489. // Punchy iterates through the result of punchList() to assemble all known addresses and sends a hole punch packet to them
  490. func (hm *HostMap) Punchy(ctx context.Context, conn *udp.Conn) {
  491. var metricsTxPunchy metrics.Counter
  492. if hm.metricsEnabled {
  493. metricsTxPunchy = metrics.GetOrRegisterCounter("messages.tx.punchy", nil)
  494. } else {
  495. metricsTxPunchy = metrics.NilCounter{}
  496. }
  497. var remotes []*RemoteList
  498. b := []byte{1}
  499. clockSource := time.NewTicker(time.Second * 10)
  500. defer clockSource.Stop()
  501. for {
  502. remotes = hm.punchList(remotes[:0])
  503. for _, rl := range remotes {
  504. //TODO: CopyAddrs generates garbage but ForEach locks for the work here, figure out which way is better
  505. for _, addr := range rl.CopyAddrs(hm.preferredRanges) {
  506. metricsTxPunchy.Inc(1)
  507. conn.WriteTo(b, addr)
  508. }
  509. }
  510. select {
  511. case <-ctx.Done():
  512. return
  513. case <-clockSource.C:
  514. continue
  515. }
  516. }
  517. }
  518. // TryPromoteBest handles re-querying lighthouses and probing for better paths
  519. // NOTE: It is an error to call this if you are a lighthouse since they should not roam clients!
  520. func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface) {
  521. c := i.promoteCounter.Add(1)
  522. if c%PromoteEvery == 0 {
  523. // The lock here is currently protecting i.remote access
  524. i.RLock()
  525. remote := i.remote
  526. i.RUnlock()
  527. // return early if we are already on a preferred remote
  528. if remote != nil {
  529. rIP := remote.IP
  530. for _, l := range preferredRanges {
  531. if l.Contains(rIP) {
  532. return
  533. }
  534. }
  535. }
  536. i.remotes.ForEach(preferredRanges, func(addr *udp.Addr, preferred bool) {
  537. if remote != nil && (addr == nil || !preferred) {
  538. return
  539. }
  540. // Try to send a test packet to that host, this should
  541. // cause it to detect a roaming event and switch remotes
  542. ifce.sendTo(header.Test, header.TestRequest, i.ConnectionState, i, addr, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  543. })
  544. }
  545. // Re query our lighthouses for new remotes occasionally
  546. if c%ReQueryEvery == 0 && ifce.lightHouse != nil {
  547. ifce.lightHouse.QueryServer(i.vpnIp, ifce)
  548. }
  549. }
  550. func (i *HostInfo) cachePacket(l *logrus.Logger, t header.MessageType, st header.MessageSubType, packet []byte, f packetCallback, m *cachedPacketMetrics) {
  551. //TODO: return the error so we can log with more context
  552. if len(i.packetStore) < 100 {
  553. tempPacket := make([]byte, len(packet))
  554. copy(tempPacket, packet)
  555. //l.WithField("trace", string(debug.Stack())).Error("Caching packet", tempPacket)
  556. i.packetStore = append(i.packetStore, &cachedPacket{t, st, f, tempPacket})
  557. if l.Level >= logrus.DebugLevel {
  558. i.logger(l).
  559. WithField("length", len(i.packetStore)).
  560. WithField("stored", true).
  561. Debugf("Packet store")
  562. }
  563. } else if l.Level >= logrus.DebugLevel {
  564. m.dropped.Inc(1)
  565. i.logger(l).
  566. WithField("length", len(i.packetStore)).
  567. WithField("stored", false).
  568. Debugf("Packet store")
  569. }
  570. }
  571. // handshakeComplete will set the connection as ready to communicate, as well as flush any stored packets
  572. func (i *HostInfo) handshakeComplete(l *logrus.Logger, m *cachedPacketMetrics) {
  573. //TODO: I'm not certain the distinction between handshake complete and ConnectionState being ready matters because:
  574. //TODO: HandshakeComplete means send stored packets and ConnectionState.ready means we are ready to send
  575. //TODO: if the transition from HandhsakeComplete to ConnectionState.ready happens all within this function they are identical
  576. i.ConnectionState.queueLock.Lock()
  577. i.HandshakeComplete = true
  578. //TODO: this should be managed by the handshake state machine to set it based on how many handshake were seen.
  579. // Clamping it to 2 gets us out of the woods for now
  580. i.ConnectionState.messageCounter.Store(2)
  581. if l.Level >= logrus.DebugLevel {
  582. i.logger(l).Debugf("Sending %d stored packets", len(i.packetStore))
  583. }
  584. if len(i.packetStore) > 0 {
  585. nb := make([]byte, 12, 12)
  586. out := make([]byte, mtu)
  587. for _, cp := range i.packetStore {
  588. cp.callback(cp.messageType, cp.messageSubType, i, cp.packet, nb, out)
  589. }
  590. m.sent.Inc(int64(len(i.packetStore)))
  591. }
  592. i.remotes.ResetBlockedRemotes()
  593. i.packetStore = make([]*cachedPacket, 0)
  594. i.ConnectionState.ready = true
  595. i.ConnectionState.queueLock.Unlock()
  596. i.ConnectionState.certState = nil
  597. }
  598. func (i *HostInfo) GetCert() *cert.NebulaCertificate {
  599. if i.ConnectionState != nil {
  600. return i.ConnectionState.peerCert
  601. }
  602. return nil
  603. }
  604. func (i *HostInfo) SetRemote(remote *udp.Addr) {
  605. // We copy here because we likely got this remote from a source that reuses the object
  606. if !i.remote.Equals(remote) {
  607. i.remote = remote.Copy()
  608. i.remotes.LearnRemote(i.vpnIp, remote.Copy())
  609. }
  610. }
  611. // SetRemoteIfPreferred returns true if the remote was changed. The lastRoam
  612. // time on the HostInfo will also be updated.
  613. func (i *HostInfo) SetRemoteIfPreferred(hm *HostMap, newRemote *udp.Addr) bool {
  614. if newRemote == nil {
  615. // relays have nil udp Addrs
  616. return false
  617. }
  618. currentRemote := i.remote
  619. if currentRemote == nil {
  620. i.SetRemote(newRemote)
  621. return true
  622. }
  623. // NOTE: We do this loop here instead of calling `isPreferred` in
  624. // remote_list.go so that we only have to loop over preferredRanges once.
  625. newIsPreferred := false
  626. for _, l := range hm.preferredRanges {
  627. // return early if we are already on a preferred remote
  628. if l.Contains(currentRemote.IP) {
  629. return false
  630. }
  631. if l.Contains(newRemote.IP) {
  632. newIsPreferred = true
  633. }
  634. }
  635. if newIsPreferred {
  636. // Consider this a roaming event
  637. i.lastRoam = time.Now()
  638. i.lastRoamRemote = currentRemote.Copy()
  639. i.SetRemote(newRemote)
  640. return true
  641. }
  642. return false
  643. }
  644. func (i *HostInfo) RecvErrorExceeded() bool {
  645. if i.recvError < 3 {
  646. i.recvError += 1
  647. return false
  648. }
  649. return true
  650. }
  651. func (i *HostInfo) CreateRemoteCIDR(c *cert.NebulaCertificate) {
  652. if len(c.Details.Ips) == 1 && len(c.Details.Subnets) == 0 {
  653. // Simple case, no CIDRTree needed
  654. return
  655. }
  656. remoteCidr := cidr.NewTree4()
  657. for _, ip := range c.Details.Ips {
  658. remoteCidr.AddCIDR(&net.IPNet{IP: ip.IP, Mask: net.IPMask{255, 255, 255, 255}}, struct{}{})
  659. }
  660. for _, n := range c.Details.Subnets {
  661. remoteCidr.AddCIDR(n, struct{}{})
  662. }
  663. i.remoteCidr = remoteCidr
  664. }
  665. func (i *HostInfo) logger(l *logrus.Logger) *logrus.Entry {
  666. if i == nil {
  667. return logrus.NewEntry(l)
  668. }
  669. li := l.WithField("vpnIp", i.vpnIp)
  670. if connState := i.ConnectionState; connState != nil {
  671. if peerCert := connState.peerCert; peerCert != nil {
  672. li = li.WithField("certName", peerCert.Details.Name)
  673. }
  674. }
  675. return li
  676. }
  677. // Utility functions
  678. func localIps(l *logrus.Logger, allowList *LocalAllowList) *[]net.IP {
  679. //FIXME: This function is pretty garbage
  680. var ips []net.IP
  681. ifaces, _ := net.Interfaces()
  682. for _, i := range ifaces {
  683. allow := allowList.AllowName(i.Name)
  684. if l.Level >= logrus.TraceLevel {
  685. l.WithField("interfaceName", i.Name).WithField("allow", allow).Trace("localAllowList.AllowName")
  686. }
  687. if !allow {
  688. continue
  689. }
  690. addrs, _ := i.Addrs()
  691. for _, addr := range addrs {
  692. var ip net.IP
  693. switch v := addr.(type) {
  694. case *net.IPNet:
  695. //continue
  696. ip = v.IP
  697. case *net.IPAddr:
  698. ip = v.IP
  699. }
  700. //TODO: Filtering out link local for now, this is probably the most correct thing
  701. //TODO: Would be nice to filter out SLAAC MAC based ips as well
  702. if ip.IsLoopback() == false && !ip.IsLinkLocalUnicast() {
  703. allow := allowList.Allow(ip)
  704. if l.Level >= logrus.TraceLevel {
  705. l.WithField("localIp", ip).WithField("allow", allow).Trace("localAllowList.Allow")
  706. }
  707. if !allow {
  708. continue
  709. }
  710. ips = append(ips, ip)
  711. }
  712. }
  713. }
  714. return &ips
  715. }