hostmap.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "sync"
  8. "sync/atomic"
  9. "time"
  10. "github.com/rcrowley/go-metrics"
  11. "github.com/sirupsen/logrus"
  12. "github.com/slackhq/nebula/cert"
  13. "github.com/slackhq/nebula/cidr"
  14. "github.com/slackhq/nebula/header"
  15. "github.com/slackhq/nebula/iputil"
  16. "github.com/slackhq/nebula/udp"
  17. )
  18. //const ProbeLen = 100
  19. const PromoteEvery = 1000
  20. const ReQueryEvery = 5000
  21. const MaxRemotes = 10
  22. // How long we should prevent roaming back to the previous IP.
  23. // This helps prevent flapping due to packets already in flight
  24. const RoamingSuppressSeconds = 2
  25. const (
  26. Requested = iota
  27. Established
  28. )
  29. const (
  30. Unknowntype = iota
  31. ForwardingType
  32. TerminalType
  33. )
  34. type Relay struct {
  35. Type int
  36. State int
  37. LocalIndex uint32
  38. RemoteIndex uint32
  39. PeerIp iputil.VpnIp
  40. }
  41. type HostMap struct {
  42. sync.RWMutex //Because we concurrently read and write to our maps
  43. name string
  44. Indexes map[uint32]*HostInfo
  45. Relays map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
  46. RemoteIndexes map[uint32]*HostInfo
  47. Hosts map[iputil.VpnIp]*HostInfo
  48. preferredRanges []*net.IPNet
  49. vpnCIDR *net.IPNet
  50. metricsEnabled bool
  51. l *logrus.Logger
  52. }
  53. type RelayState struct {
  54. sync.RWMutex
  55. relays map[iputil.VpnIp]struct{} // Set of VpnIp's of Hosts to use as relays to access this peer
  56. relayForByIp map[iputil.VpnIp]*Relay // Maps VpnIps of peers for which this HostInfo is a relay to some Relay info
  57. relayForByIdx map[uint32]*Relay // Maps a local index to some Relay info
  58. }
  59. func (rs *RelayState) DeleteRelay(ip iputil.VpnIp) {
  60. rs.Lock()
  61. defer rs.Unlock()
  62. delete(rs.relays, ip)
  63. }
  64. func (rs *RelayState) GetRelayForByIp(ip iputil.VpnIp) (*Relay, bool) {
  65. rs.RLock()
  66. defer rs.RUnlock()
  67. r, ok := rs.relayForByIp[ip]
  68. return r, ok
  69. }
  70. func (rs *RelayState) InsertRelayTo(ip iputil.VpnIp) {
  71. rs.Lock()
  72. defer rs.Unlock()
  73. rs.relays[ip] = struct{}{}
  74. }
  75. func (rs *RelayState) CopyRelayIps() []iputil.VpnIp {
  76. rs.RLock()
  77. defer rs.RUnlock()
  78. ret := make([]iputil.VpnIp, 0, len(rs.relays))
  79. for ip := range rs.relays {
  80. ret = append(ret, ip)
  81. }
  82. return ret
  83. }
  84. func (rs *RelayState) CopyRelayForIps() []iputil.VpnIp {
  85. rs.RLock()
  86. defer rs.RUnlock()
  87. currentRelays := make([]iputil.VpnIp, 0, len(rs.relayForByIp))
  88. for relayIp := range rs.relayForByIp {
  89. currentRelays = append(currentRelays, relayIp)
  90. }
  91. return currentRelays
  92. }
  93. func (rs *RelayState) CopyRelayForIdxs() []uint32 {
  94. rs.RLock()
  95. defer rs.RUnlock()
  96. ret := make([]uint32, 0, len(rs.relayForByIdx))
  97. for i := range rs.relayForByIdx {
  98. ret = append(ret, i)
  99. }
  100. return ret
  101. }
  102. func (rs *RelayState) RemoveRelay(localIdx uint32) (iputil.VpnIp, bool) {
  103. rs.Lock()
  104. defer rs.Unlock()
  105. relay, ok := rs.relayForByIdx[localIdx]
  106. if !ok {
  107. return iputil.VpnIp(0), false
  108. }
  109. delete(rs.relayForByIdx, localIdx)
  110. delete(rs.relayForByIp, relay.PeerIp)
  111. return relay.PeerIp, true
  112. }
  113. func (rs *RelayState) QueryRelayForByIp(vpnIp iputil.VpnIp) (*Relay, bool) {
  114. rs.RLock()
  115. defer rs.RUnlock()
  116. r, ok := rs.relayForByIp[vpnIp]
  117. return r, ok
  118. }
  119. func (rs *RelayState) QueryRelayForByIdx(idx uint32) (*Relay, bool) {
  120. rs.RLock()
  121. defer rs.RUnlock()
  122. r, ok := rs.relayForByIdx[idx]
  123. return r, ok
  124. }
  125. func (rs *RelayState) InsertRelay(ip iputil.VpnIp, idx uint32, r *Relay) {
  126. rs.Lock()
  127. defer rs.Unlock()
  128. rs.relayForByIp[ip] = r
  129. rs.relayForByIdx[idx] = r
  130. }
  131. type HostInfo struct {
  132. sync.RWMutex
  133. remote *udp.Addr
  134. remotes *RemoteList
  135. promoteCounter uint32
  136. ConnectionState *ConnectionState
  137. handshakeStart time.Time //todo: this an entry in the handshake manager
  138. HandshakeReady bool //todo: being in the manager means you are ready
  139. HandshakeCounter int //todo: another handshake manager entry
  140. HandshakeComplete bool //todo: this should go away in favor of ConnectionState.ready
  141. HandshakePacket map[uint8][]byte //todo: this is other handshake manager entry
  142. packetStore []*cachedPacket //todo: this is other handshake manager entry
  143. remoteIndexId uint32
  144. localIndexId uint32
  145. vpnIp iputil.VpnIp
  146. recvError int
  147. remoteCidr *cidr.Tree4
  148. relayState RelayState
  149. // lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH
  150. // for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like
  151. // with a handshake
  152. lastRebindCount int8
  153. // lastHandshakeTime records the time the remote side told us about at the stage when the handshake was completed locally
  154. // Stage 1 packet will contain it if I am a responder, stage 2 packet if I am an initiator
  155. // This is used to avoid an attack where a handshake packet is replayed after some time
  156. lastHandshakeTime uint64
  157. lastRoam time.Time
  158. lastRoamRemote *udp.Addr
  159. }
  160. type ViaSender struct {
  161. relayHI *HostInfo // relayHI is the host info object of the relay
  162. remoteIdx uint32 // remoteIdx is the index included in the header of the received packet
  163. relay *Relay // relay contains the rest of the relay information, including the PeerIP of the host trying to communicate with us.
  164. }
  165. type cachedPacket struct {
  166. messageType header.MessageType
  167. messageSubType header.MessageSubType
  168. callback packetCallback
  169. packet []byte
  170. }
  171. type packetCallback func(t header.MessageType, st header.MessageSubType, h *HostInfo, p, nb, out []byte)
  172. type cachedPacketMetrics struct {
  173. sent metrics.Counter
  174. dropped metrics.Counter
  175. }
  176. func NewHostMap(l *logrus.Logger, name string, vpnCIDR *net.IPNet, preferredRanges []*net.IPNet) *HostMap {
  177. h := map[iputil.VpnIp]*HostInfo{}
  178. i := map[uint32]*HostInfo{}
  179. r := map[uint32]*HostInfo{}
  180. relays := map[uint32]*HostInfo{}
  181. m := HostMap{
  182. name: name,
  183. Indexes: i,
  184. Relays: relays,
  185. RemoteIndexes: r,
  186. Hosts: h,
  187. preferredRanges: preferredRanges,
  188. vpnCIDR: vpnCIDR,
  189. l: l,
  190. }
  191. return &m
  192. }
  193. // UpdateStats takes a name and reports host and index counts to the stats collection system
  194. func (hm *HostMap) EmitStats(name string) {
  195. hm.RLock()
  196. hostLen := len(hm.Hosts)
  197. indexLen := len(hm.Indexes)
  198. remoteIndexLen := len(hm.RemoteIndexes)
  199. relaysLen := len(hm.Relays)
  200. hm.RUnlock()
  201. metrics.GetOrRegisterGauge("hostmap."+name+".hosts", nil).Update(int64(hostLen))
  202. metrics.GetOrRegisterGauge("hostmap."+name+".indexes", nil).Update(int64(indexLen))
  203. metrics.GetOrRegisterGauge("hostmap."+name+".remoteIndexes", nil).Update(int64(remoteIndexLen))
  204. metrics.GetOrRegisterGauge("hostmap."+name+".relayIndexes", nil).Update(int64(relaysLen))
  205. }
  206. func (hm *HostMap) RemoveRelay(localIdx uint32) {
  207. hm.Lock()
  208. hiRelay, ok := hm.Relays[localIdx]
  209. if !ok {
  210. hm.Unlock()
  211. return
  212. }
  213. delete(hm.Relays, localIdx)
  214. hm.Unlock()
  215. ip, ok := hiRelay.relayState.RemoveRelay(localIdx)
  216. if !ok {
  217. return
  218. }
  219. hiPeer, err := hm.QueryVpnIp(ip)
  220. if err != nil {
  221. return
  222. }
  223. var otherPeerIdx uint32
  224. hiPeer.relayState.DeleteRelay(hiRelay.vpnIp)
  225. relay, ok := hiPeer.relayState.GetRelayForByIp(hiRelay.vpnIp)
  226. if ok {
  227. otherPeerIdx = relay.LocalIndex
  228. }
  229. // I am a relaying host. I need to remove the other relay, too.
  230. hm.RemoveRelay(otherPeerIdx)
  231. }
  232. func (hm *HostMap) GetIndexByVpnIp(vpnIp iputil.VpnIp) (uint32, error) {
  233. hm.RLock()
  234. if i, ok := hm.Hosts[vpnIp]; ok {
  235. index := i.localIndexId
  236. hm.RUnlock()
  237. return index, nil
  238. }
  239. hm.RUnlock()
  240. return 0, errors.New("vpn IP not found")
  241. }
  242. func (hm *HostMap) Add(ip iputil.VpnIp, hostinfo *HostInfo) {
  243. hm.Lock()
  244. hm.Hosts[ip] = hostinfo
  245. hm.Unlock()
  246. }
  247. func (hm *HostMap) AddVpnIp(vpnIp iputil.VpnIp, init func(hostinfo *HostInfo)) (hostinfo *HostInfo, created bool) {
  248. hm.RLock()
  249. if h, ok := hm.Hosts[vpnIp]; !ok {
  250. hm.RUnlock()
  251. h = &HostInfo{
  252. promoteCounter: 0,
  253. vpnIp: vpnIp,
  254. HandshakePacket: make(map[uint8][]byte, 0),
  255. relayState: RelayState{
  256. relays: map[iputil.VpnIp]struct{}{},
  257. relayForByIp: map[iputil.VpnIp]*Relay{},
  258. relayForByIdx: map[uint32]*Relay{},
  259. },
  260. }
  261. if init != nil {
  262. init(h)
  263. }
  264. hm.Lock()
  265. hm.Hosts[vpnIp] = h
  266. hm.Unlock()
  267. return h, true
  268. } else {
  269. hm.RUnlock()
  270. return h, false
  271. }
  272. }
  273. func (hm *HostMap) DeleteVpnIp(vpnIp iputil.VpnIp) {
  274. hm.Lock()
  275. delete(hm.Hosts, vpnIp)
  276. if len(hm.Hosts) == 0 {
  277. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  278. }
  279. hm.Unlock()
  280. if hm.l.Level >= logrus.DebugLevel {
  281. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts)}).
  282. Debug("Hostmap vpnIp deleted")
  283. }
  284. }
  285. // Only used by pendingHostMap when the remote index is not initially known
  286. func (hm *HostMap) addRemoteIndexHostInfo(index uint32, h *HostInfo) {
  287. hm.Lock()
  288. h.remoteIndexId = index
  289. hm.RemoteIndexes[index] = h
  290. hm.Unlock()
  291. if hm.l.Level > logrus.DebugLevel {
  292. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes),
  293. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "hostId": h.vpnIp}}).
  294. Debug("Hostmap remoteIndex added")
  295. }
  296. }
  297. func (hm *HostMap) AddVpnIpHostInfo(vpnIp iputil.VpnIp, h *HostInfo) {
  298. hm.Lock()
  299. h.vpnIp = vpnIp
  300. hm.Hosts[vpnIp] = h
  301. hm.Indexes[h.localIndexId] = h
  302. hm.RemoteIndexes[h.remoteIndexId] = h
  303. hm.Unlock()
  304. if hm.l.Level > logrus.DebugLevel {
  305. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts),
  306. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "vpnIp": h.vpnIp}}).
  307. Debug("Hostmap vpnIp added")
  308. }
  309. }
  310. // This is only called in pendingHostmap, to cleanup an inbound handshake
  311. func (hm *HostMap) DeleteIndex(index uint32) {
  312. hm.Lock()
  313. hostinfo, ok := hm.Indexes[index]
  314. if ok {
  315. delete(hm.Indexes, index)
  316. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  317. // Check if we have an entry under hostId that matches the same hostinfo
  318. // instance. Clean it up as well if we do.
  319. hostinfo2, ok := hm.Hosts[hostinfo.vpnIp]
  320. if ok && hostinfo2 == hostinfo {
  321. delete(hm.Hosts, hostinfo.vpnIp)
  322. }
  323. }
  324. hm.Unlock()
  325. if hm.l.Level >= logrus.DebugLevel {
  326. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  327. Debug("Hostmap index deleted")
  328. }
  329. }
  330. // This is used to cleanup on recv_error
  331. func (hm *HostMap) DeleteReverseIndex(index uint32) {
  332. hm.Lock()
  333. hostinfo, ok := hm.RemoteIndexes[index]
  334. if ok {
  335. delete(hm.Indexes, hostinfo.localIndexId)
  336. delete(hm.RemoteIndexes, index)
  337. // Check if we have an entry under hostId that matches the same hostinfo
  338. // instance. Clean it up as well if we do (they might not match in pendingHostmap)
  339. var hostinfo2 *HostInfo
  340. hostinfo2, ok = hm.Hosts[hostinfo.vpnIp]
  341. if ok && hostinfo2 == hostinfo {
  342. delete(hm.Hosts, hostinfo.vpnIp)
  343. }
  344. }
  345. hm.Unlock()
  346. if hm.l.Level >= logrus.DebugLevel {
  347. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  348. Debug("Hostmap remote index deleted")
  349. }
  350. }
  351. func (hm *HostMap) DeleteHostInfo(hostinfo *HostInfo) {
  352. // Delete the host itself, ensuring it's not modified anymore
  353. hm.Lock()
  354. hm.unlockedDeleteHostInfo(hostinfo)
  355. hm.Unlock()
  356. // And tear down all the relays going through this host
  357. for _, localIdx := range hostinfo.relayState.CopyRelayForIdxs() {
  358. hm.RemoveRelay(localIdx)
  359. }
  360. // And tear down the relays this deleted hostInfo was using to be reached
  361. teardownRelayIdx := []uint32{}
  362. for _, relayIp := range hostinfo.relayState.CopyRelayIps() {
  363. relayHostInfo, err := hm.QueryVpnIp(relayIp)
  364. if err != nil {
  365. hm.l.WithError(err).WithField("relay", relayIp).Info("Missing relay host in hostmap")
  366. } else {
  367. if r, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp); ok {
  368. teardownRelayIdx = append(teardownRelayIdx, r.LocalIndex)
  369. }
  370. }
  371. }
  372. for _, localIdx := range teardownRelayIdx {
  373. hm.RemoveRelay(localIdx)
  374. }
  375. }
  376. func (hm *HostMap) DeleteRelayIdx(localIdx uint32) {
  377. hm.Lock()
  378. defer hm.Unlock()
  379. delete(hm.RemoteIndexes, localIdx)
  380. }
  381. func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
  382. // Check if this same hostId is in the hostmap with a different instance.
  383. // This could happen if we have an entry in the pending hostmap with different
  384. // index values than the one in the main hostmap.
  385. hostinfo2, ok := hm.Hosts[hostinfo.vpnIp]
  386. if ok && hostinfo2 != hostinfo {
  387. delete(hm.Hosts, hostinfo2.vpnIp)
  388. delete(hm.Indexes, hostinfo2.localIndexId)
  389. delete(hm.RemoteIndexes, hostinfo2.remoteIndexId)
  390. }
  391. delete(hm.Hosts, hostinfo.vpnIp)
  392. if len(hm.Hosts) == 0 {
  393. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  394. }
  395. delete(hm.Indexes, hostinfo.localIndexId)
  396. if len(hm.Indexes) == 0 {
  397. hm.Indexes = map[uint32]*HostInfo{}
  398. }
  399. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  400. if len(hm.RemoteIndexes) == 0 {
  401. hm.RemoteIndexes = map[uint32]*HostInfo{}
  402. }
  403. if hm.l.Level >= logrus.DebugLevel {
  404. hm.l.WithField("hostMap", m{"mapName": hm.name, "mapTotalSize": len(hm.Hosts),
  405. "vpnIp": hostinfo.vpnIp, "indexNumber": hostinfo.localIndexId, "remoteIndexNumber": hostinfo.remoteIndexId}).
  406. Debug("Hostmap hostInfo deleted")
  407. }
  408. }
  409. func (hm *HostMap) QueryIndex(index uint32) (*HostInfo, error) {
  410. //TODO: we probably just want to return bool instead of error, or at least a static error
  411. hm.RLock()
  412. if h, ok := hm.Indexes[index]; ok {
  413. hm.RUnlock()
  414. return h, nil
  415. } else {
  416. hm.RUnlock()
  417. return nil, errors.New("unable to find index")
  418. }
  419. }
  420. func (hm *HostMap) QueryRelayIndex(index uint32) (*HostInfo, error) {
  421. //TODO: we probably just want to return bool instead of error, or at least a static error
  422. hm.RLock()
  423. if h, ok := hm.Relays[index]; ok {
  424. hm.RUnlock()
  425. return h, nil
  426. } else {
  427. hm.RUnlock()
  428. return nil, errors.New("unable to find index")
  429. }
  430. }
  431. func (hm *HostMap) QueryReverseIndex(index uint32) (*HostInfo, error) {
  432. hm.RLock()
  433. if h, ok := hm.RemoteIndexes[index]; ok {
  434. hm.RUnlock()
  435. return h, nil
  436. } else {
  437. hm.RUnlock()
  438. return nil, fmt.Errorf("unable to find reverse index or connectionstate nil in %s hostmap", hm.name)
  439. }
  440. }
  441. func (hm *HostMap) QueryVpnIp(vpnIp iputil.VpnIp) (*HostInfo, error) {
  442. return hm.queryVpnIp(vpnIp, nil)
  443. }
  444. // PromoteBestQueryVpnIp will attempt to lazily switch to the best remote every
  445. // `PromoteEvery` calls to this function for a given host.
  446. func (hm *HostMap) PromoteBestQueryVpnIp(vpnIp iputil.VpnIp, ifce *Interface) (*HostInfo, error) {
  447. return hm.queryVpnIp(vpnIp, ifce)
  448. }
  449. func (hm *HostMap) queryVpnIp(vpnIp iputil.VpnIp, promoteIfce *Interface) (*HostInfo, error) {
  450. hm.RLock()
  451. if h, ok := hm.Hosts[vpnIp]; ok {
  452. hm.RUnlock()
  453. // Do not attempt promotion if you are a lighthouse
  454. if promoteIfce != nil && !promoteIfce.lightHouse.amLighthouse {
  455. h.TryPromoteBest(hm.preferredRanges, promoteIfce)
  456. }
  457. return h, nil
  458. }
  459. hm.RUnlock()
  460. return nil, errors.New("unable to find host")
  461. }
  462. // We already have the hm Lock when this is called, so make sure to not call
  463. // any other methods that might try to grab it again
  464. func (hm *HostMap) addHostInfo(hostinfo *HostInfo, f *Interface) {
  465. if f.serveDns {
  466. remoteCert := hostinfo.ConnectionState.peerCert
  467. dnsR.Add(remoteCert.Details.Name+".", remoteCert.Details.Ips[0].IP.String())
  468. }
  469. hm.Hosts[hostinfo.vpnIp] = hostinfo
  470. hm.Indexes[hostinfo.localIndexId] = hostinfo
  471. hm.RemoteIndexes[hostinfo.remoteIndexId] = hostinfo
  472. if hm.l.Level >= logrus.DebugLevel {
  473. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": hostinfo.vpnIp, "mapTotalSize": len(hm.Hosts),
  474. "hostinfo": m{"existing": true, "localIndexId": hostinfo.localIndexId, "hostId": hostinfo.vpnIp}}).
  475. Debug("Hostmap vpnIp added")
  476. }
  477. }
  478. // punchList assembles a list of all non nil RemoteList pointer entries in this hostmap
  479. // The caller can then do the its work outside of the read lock
  480. func (hm *HostMap) punchList(rl []*RemoteList) []*RemoteList {
  481. hm.RLock()
  482. defer hm.RUnlock()
  483. for _, v := range hm.Hosts {
  484. if v.remotes != nil {
  485. rl = append(rl, v.remotes)
  486. }
  487. }
  488. return rl
  489. }
  490. // Punchy iterates through the result of punchList() to assemble all known addresses and sends a hole punch packet to them
  491. func (hm *HostMap) Punchy(ctx context.Context, conn *udp.Conn) {
  492. var metricsTxPunchy metrics.Counter
  493. if hm.metricsEnabled {
  494. metricsTxPunchy = metrics.GetOrRegisterCounter("messages.tx.punchy", nil)
  495. } else {
  496. metricsTxPunchy = metrics.NilCounter{}
  497. }
  498. var remotes []*RemoteList
  499. b := []byte{1}
  500. clockSource := time.NewTicker(time.Second * 10)
  501. defer clockSource.Stop()
  502. for {
  503. remotes = hm.punchList(remotes[:0])
  504. for _, rl := range remotes {
  505. //TODO: CopyAddrs generates garbage but ForEach locks for the work here, figure out which way is better
  506. for _, addr := range rl.CopyAddrs(hm.preferredRanges) {
  507. metricsTxPunchy.Inc(1)
  508. conn.WriteTo(b, addr)
  509. }
  510. }
  511. select {
  512. case <-ctx.Done():
  513. return
  514. case <-clockSource.C:
  515. continue
  516. }
  517. }
  518. }
  519. // TryPromoteBest handles re-querying lighthouses and probing for better paths
  520. // NOTE: It is an error to call this if you are a lighthouse since they should not roam clients!
  521. func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface) {
  522. c := atomic.AddUint32(&i.promoteCounter, 1)
  523. if c%PromoteEvery == 0 {
  524. // The lock here is currently protecting i.remote access
  525. i.RLock()
  526. remote := i.remote
  527. i.RUnlock()
  528. // return early if we are already on a preferred remote
  529. if remote != nil {
  530. rIP := remote.IP
  531. for _, l := range preferredRanges {
  532. if l.Contains(rIP) {
  533. return
  534. }
  535. }
  536. }
  537. i.remotes.ForEach(preferredRanges, func(addr *udp.Addr, preferred bool) {
  538. if remote != nil && (addr == nil || !preferred) {
  539. return
  540. }
  541. // Try to send a test packet to that host, this should
  542. // cause it to detect a roaming event and switch remotes
  543. ifce.sendTo(header.Test, header.TestRequest, i.ConnectionState, i, addr, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  544. })
  545. }
  546. // Re query our lighthouses for new remotes occasionally
  547. if c%ReQueryEvery == 0 && ifce.lightHouse != nil {
  548. ifce.lightHouse.QueryServer(i.vpnIp, ifce)
  549. }
  550. }
  551. func (i *HostInfo) cachePacket(l *logrus.Logger, t header.MessageType, st header.MessageSubType, packet []byte, f packetCallback, m *cachedPacketMetrics) {
  552. //TODO: return the error so we can log with more context
  553. if len(i.packetStore) < 100 {
  554. tempPacket := make([]byte, len(packet))
  555. copy(tempPacket, packet)
  556. //l.WithField("trace", string(debug.Stack())).Error("Caching packet", tempPacket)
  557. i.packetStore = append(i.packetStore, &cachedPacket{t, st, f, tempPacket})
  558. if l.Level >= logrus.DebugLevel {
  559. i.logger(l).
  560. WithField("length", len(i.packetStore)).
  561. WithField("stored", true).
  562. Debugf("Packet store")
  563. }
  564. } else if l.Level >= logrus.DebugLevel {
  565. m.dropped.Inc(1)
  566. i.logger(l).
  567. WithField("length", len(i.packetStore)).
  568. WithField("stored", false).
  569. Debugf("Packet store")
  570. }
  571. }
  572. // handshakeComplete will set the connection as ready to communicate, as well as flush any stored packets
  573. func (i *HostInfo) handshakeComplete(l *logrus.Logger, m *cachedPacketMetrics) {
  574. //TODO: I'm not certain the distinction between handshake complete and ConnectionState being ready matters because:
  575. //TODO: HandshakeComplete means send stored packets and ConnectionState.ready means we are ready to send
  576. //TODO: if the transition from HandhsakeComplete to ConnectionState.ready happens all within this function they are identical
  577. i.ConnectionState.queueLock.Lock()
  578. i.HandshakeComplete = true
  579. //TODO: this should be managed by the handshake state machine to set it based on how many handshake were seen.
  580. // Clamping it to 2 gets us out of the woods for now
  581. atomic.StoreUint64(&i.ConnectionState.atomicMessageCounter, 2)
  582. if l.Level >= logrus.DebugLevel {
  583. i.logger(l).Debugf("Sending %d stored packets", len(i.packetStore))
  584. }
  585. if len(i.packetStore) > 0 {
  586. nb := make([]byte, 12, 12)
  587. out := make([]byte, mtu)
  588. for _, cp := range i.packetStore {
  589. cp.callback(cp.messageType, cp.messageSubType, i, cp.packet, nb, out)
  590. }
  591. m.sent.Inc(int64(len(i.packetStore)))
  592. }
  593. i.remotes.ResetBlockedRemotes()
  594. i.packetStore = make([]*cachedPacket, 0)
  595. i.ConnectionState.ready = true
  596. i.ConnectionState.queueLock.Unlock()
  597. i.ConnectionState.certState = nil
  598. }
  599. func (i *HostInfo) GetCert() *cert.NebulaCertificate {
  600. if i.ConnectionState != nil {
  601. return i.ConnectionState.peerCert
  602. }
  603. return nil
  604. }
  605. func (i *HostInfo) SetRemote(remote *udp.Addr) {
  606. // We copy here because we likely got this remote from a source that reuses the object
  607. if !i.remote.Equals(remote) {
  608. i.remote = remote.Copy()
  609. i.remotes.LearnRemote(i.vpnIp, remote.Copy())
  610. }
  611. }
  612. // SetRemoteIfPreferred returns true if the remote was changed. The lastRoam
  613. // time on the HostInfo will also be updated.
  614. func (i *HostInfo) SetRemoteIfPreferred(hm *HostMap, newRemote *udp.Addr) bool {
  615. if newRemote == nil {
  616. // relays have nil udp Addrs
  617. return false
  618. }
  619. currentRemote := i.remote
  620. if currentRemote == nil {
  621. i.SetRemote(newRemote)
  622. return true
  623. }
  624. // NOTE: We do this loop here instead of calling `isPreferred` in
  625. // remote_list.go so that we only have to loop over preferredRanges once.
  626. newIsPreferred := false
  627. for _, l := range hm.preferredRanges {
  628. // return early if we are already on a preferred remote
  629. if l.Contains(currentRemote.IP) {
  630. return false
  631. }
  632. if l.Contains(newRemote.IP) {
  633. newIsPreferred = true
  634. }
  635. }
  636. if newIsPreferred {
  637. // Consider this a roaming event
  638. i.lastRoam = time.Now()
  639. i.lastRoamRemote = currentRemote.Copy()
  640. i.SetRemote(newRemote)
  641. return true
  642. }
  643. return false
  644. }
  645. func (i *HostInfo) RecvErrorExceeded() bool {
  646. if i.recvError < 3 {
  647. i.recvError += 1
  648. return false
  649. }
  650. return true
  651. }
  652. func (i *HostInfo) CreateRemoteCIDR(c *cert.NebulaCertificate) {
  653. if len(c.Details.Ips) == 1 && len(c.Details.Subnets) == 0 {
  654. // Simple case, no CIDRTree needed
  655. return
  656. }
  657. remoteCidr := cidr.NewTree4()
  658. for _, ip := range c.Details.Ips {
  659. remoteCidr.AddCIDR(&net.IPNet{IP: ip.IP, Mask: net.IPMask{255, 255, 255, 255}}, struct{}{})
  660. }
  661. for _, n := range c.Details.Subnets {
  662. remoteCidr.AddCIDR(n, struct{}{})
  663. }
  664. i.remoteCidr = remoteCidr
  665. }
  666. func (i *HostInfo) logger(l *logrus.Logger) *logrus.Entry {
  667. if i == nil {
  668. return logrus.NewEntry(l)
  669. }
  670. li := l.WithField("vpnIp", i.vpnIp)
  671. if connState := i.ConnectionState; connState != nil {
  672. if peerCert := connState.peerCert; peerCert != nil {
  673. li = li.WithField("certName", peerCert.Details.Name)
  674. }
  675. }
  676. return li
  677. }
  678. // Utility functions
  679. func localIps(l *logrus.Logger, allowList *LocalAllowList) *[]net.IP {
  680. //FIXME: This function is pretty garbage
  681. var ips []net.IP
  682. ifaces, _ := net.Interfaces()
  683. for _, i := range ifaces {
  684. allow := allowList.AllowName(i.Name)
  685. if l.Level >= logrus.TraceLevel {
  686. l.WithField("interfaceName", i.Name).WithField("allow", allow).Trace("localAllowList.AllowName")
  687. }
  688. if !allow {
  689. continue
  690. }
  691. addrs, _ := i.Addrs()
  692. for _, addr := range addrs {
  693. var ip net.IP
  694. switch v := addr.(type) {
  695. case *net.IPNet:
  696. //continue
  697. ip = v.IP
  698. case *net.IPAddr:
  699. ip = v.IP
  700. }
  701. //TODO: Filtering out link local for now, this is probably the most correct thing
  702. //TODO: Would be nice to filter out SLAAC MAC based ips as well
  703. if ip.IsLoopback() == false && !ip.IsLinkLocalUnicast() {
  704. allow := allowList.Allow(ip)
  705. if l.Level >= logrus.TraceLevel {
  706. l.WithField("localIp", ip).WithField("allow", allow).Trace("localAllowList.Allow")
  707. }
  708. if !allow {
  709. continue
  710. }
  711. ips = append(ips, ip)
  712. }
  713. }
  714. }
  715. return &ips
  716. }