lighthouse.go 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129
  1. package nebula
  2. import (
  3. "context"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/netip"
  9. "sync"
  10. "sync/atomic"
  11. "time"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cidr"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/header"
  17. "github.com/slackhq/nebula/iputil"
  18. "github.com/slackhq/nebula/udp"
  19. "github.com/slackhq/nebula/util"
  20. )
  21. //TODO: if a lighthouse doesn't have an answer, clients AGGRESSIVELY REQUERY.. why? handshake manager and/or getOrHandshake?
  22. //TODO: nodes are roaming lighthouses, this is bad. How are they learning?
  23. var ErrHostNotKnown = errors.New("host not known")
  24. type netIpAndPort struct {
  25. ip net.IP
  26. port uint16
  27. }
  28. type LightHouse struct {
  29. //TODO: We need a timer wheel to kick out vpnIps that haven't reported in a long time
  30. sync.RWMutex //Because we concurrently read and write to our maps
  31. ctx context.Context
  32. amLighthouse bool
  33. myVpnIp iputil.VpnIp
  34. myVpnZeros iputil.VpnIp
  35. myVpnNet *net.IPNet
  36. punchConn udp.Conn
  37. punchy *Punchy
  38. // Local cache of answers from light houses
  39. // map of vpn Ip to answers
  40. addrMap map[iputil.VpnIp]*RemoteList
  41. // filters remote addresses allowed for each host
  42. // - When we are a lighthouse, this filters what addresses we store and
  43. // respond with.
  44. // - When we are not a lighthouse, this filters which addresses we accept
  45. // from lighthouses.
  46. remoteAllowList atomic.Pointer[RemoteAllowList]
  47. // filters local addresses that we advertise to lighthouses
  48. localAllowList atomic.Pointer[LocalAllowList]
  49. // used to trigger the HandshakeManager when we receive HostQueryReply
  50. handshakeTrigger chan<- iputil.VpnIp
  51. // staticList exists to avoid having a bool in each addrMap entry
  52. // since static should be rare
  53. staticList atomic.Pointer[map[iputil.VpnIp]struct{}]
  54. lighthouses atomic.Pointer[map[iputil.VpnIp]struct{}]
  55. interval atomic.Int64
  56. updateCancel context.CancelFunc
  57. ifce EncWriter
  58. nebulaPort uint32 // 32 bits because protobuf does not have a uint16
  59. advertiseAddrs atomic.Pointer[[]netIpAndPort]
  60. // IP's of relays that can be used by peers to access me
  61. relaysForMe atomic.Pointer[[]iputil.VpnIp]
  62. calculatedRemotes atomic.Pointer[cidr.Tree4[[]*calculatedRemote]] // Maps VpnIp to []*calculatedRemote
  63. metrics *MessageMetrics
  64. metricHolepunchTx metrics.Counter
  65. l *logrus.Logger
  66. }
  67. // NewLightHouseFromConfig will build a Lighthouse struct from the values provided in the config object
  68. // addrMap should be nil unless this is during a config reload
  69. func NewLightHouseFromConfig(ctx context.Context, l *logrus.Logger, c *config.C, myVpnNet *net.IPNet, pc udp.Conn, p *Punchy) (*LightHouse, error) {
  70. amLighthouse := c.GetBool("lighthouse.am_lighthouse", false)
  71. nebulaPort := uint32(c.GetInt("listen.port", 0))
  72. if amLighthouse && nebulaPort == 0 {
  73. return nil, util.NewContextualError("lighthouse.am_lighthouse enabled on node but no port number is set in config", nil, nil)
  74. }
  75. // If port is dynamic, discover it
  76. if nebulaPort == 0 && pc != nil {
  77. uPort, err := pc.LocalAddr()
  78. if err != nil {
  79. return nil, util.NewContextualError("Failed to get listening port", nil, err)
  80. }
  81. nebulaPort = uint32(uPort.Port)
  82. }
  83. ones, _ := myVpnNet.Mask.Size()
  84. h := LightHouse{
  85. ctx: ctx,
  86. amLighthouse: amLighthouse,
  87. myVpnIp: iputil.Ip2VpnIp(myVpnNet.IP),
  88. myVpnZeros: iputil.VpnIp(32 - ones),
  89. myVpnNet: myVpnNet,
  90. addrMap: make(map[iputil.VpnIp]*RemoteList),
  91. nebulaPort: nebulaPort,
  92. punchConn: pc,
  93. punchy: p,
  94. l: l,
  95. }
  96. lighthouses := make(map[iputil.VpnIp]struct{})
  97. h.lighthouses.Store(&lighthouses)
  98. staticList := make(map[iputil.VpnIp]struct{})
  99. h.staticList.Store(&staticList)
  100. if c.GetBool("stats.lighthouse_metrics", false) {
  101. h.metrics = newLighthouseMetrics()
  102. h.metricHolepunchTx = metrics.GetOrRegisterCounter("messages.tx.holepunch", nil)
  103. } else {
  104. h.metricHolepunchTx = metrics.NilCounter{}
  105. }
  106. err := h.reload(c, true)
  107. if err != nil {
  108. return nil, err
  109. }
  110. c.RegisterReloadCallback(func(c *config.C) {
  111. err := h.reload(c, false)
  112. switch v := err.(type) {
  113. case *util.ContextualError:
  114. v.Log(l)
  115. case error:
  116. l.WithError(err).Error("failed to reload lighthouse")
  117. }
  118. })
  119. return &h, nil
  120. }
  121. func (lh *LightHouse) GetStaticHostList() map[iputil.VpnIp]struct{} {
  122. return *lh.staticList.Load()
  123. }
  124. func (lh *LightHouse) GetLighthouses() map[iputil.VpnIp]struct{} {
  125. return *lh.lighthouses.Load()
  126. }
  127. func (lh *LightHouse) GetRemoteAllowList() *RemoteAllowList {
  128. return lh.remoteAllowList.Load()
  129. }
  130. func (lh *LightHouse) GetLocalAllowList() *LocalAllowList {
  131. return lh.localAllowList.Load()
  132. }
  133. func (lh *LightHouse) GetAdvertiseAddrs() []netIpAndPort {
  134. return *lh.advertiseAddrs.Load()
  135. }
  136. func (lh *LightHouse) GetRelaysForMe() []iputil.VpnIp {
  137. return *lh.relaysForMe.Load()
  138. }
  139. func (lh *LightHouse) getCalculatedRemotes() *cidr.Tree4[[]*calculatedRemote] {
  140. return lh.calculatedRemotes.Load()
  141. }
  142. func (lh *LightHouse) GetUpdateInterval() int64 {
  143. return lh.interval.Load()
  144. }
  145. func (lh *LightHouse) reload(c *config.C, initial bool) error {
  146. if initial || c.HasChanged("lighthouse.advertise_addrs") {
  147. rawAdvAddrs := c.GetStringSlice("lighthouse.advertise_addrs", []string{})
  148. advAddrs := make([]netIpAndPort, 0)
  149. for i, rawAddr := range rawAdvAddrs {
  150. fIp, fPort, err := udp.ParseIPAndPort(rawAddr)
  151. if err != nil {
  152. return util.NewContextualError("Unable to parse lighthouse.advertise_addrs entry", m{"addr": rawAddr, "entry": i + 1}, err)
  153. }
  154. if fPort == 0 {
  155. fPort = uint16(lh.nebulaPort)
  156. }
  157. if ip4 := fIp.To4(); ip4 != nil && lh.myVpnNet.Contains(fIp) {
  158. lh.l.WithField("addr", rawAddr).WithField("entry", i+1).
  159. Warn("Ignoring lighthouse.advertise_addrs report because it is within the nebula network range")
  160. continue
  161. }
  162. advAddrs = append(advAddrs, netIpAndPort{ip: fIp, port: fPort})
  163. }
  164. lh.advertiseAddrs.Store(&advAddrs)
  165. if !initial {
  166. lh.l.Info("lighthouse.advertise_addrs has changed")
  167. }
  168. }
  169. if initial || c.HasChanged("lighthouse.interval") {
  170. lh.interval.Store(int64(c.GetInt("lighthouse.interval", 10)))
  171. if !initial {
  172. lh.l.Infof("lighthouse.interval changed to %v", lh.interval.Load())
  173. if lh.updateCancel != nil {
  174. // May not always have a running routine
  175. lh.updateCancel()
  176. }
  177. lh.StartUpdateWorker()
  178. }
  179. }
  180. if initial || c.HasChanged("lighthouse.remote_allow_list") || c.HasChanged("lighthouse.remote_allow_ranges") {
  181. ral, err := NewRemoteAllowListFromConfig(c, "lighthouse.remote_allow_list", "lighthouse.remote_allow_ranges")
  182. if err != nil {
  183. return util.NewContextualError("Invalid lighthouse.remote_allow_list", nil, err)
  184. }
  185. lh.remoteAllowList.Store(ral)
  186. if !initial {
  187. //TODO: a diff will be annoyingly difficult
  188. lh.l.Info("lighthouse.remote_allow_list and/or lighthouse.remote_allow_ranges has changed")
  189. }
  190. }
  191. if initial || c.HasChanged("lighthouse.local_allow_list") {
  192. lal, err := NewLocalAllowListFromConfig(c, "lighthouse.local_allow_list")
  193. if err != nil {
  194. return util.NewContextualError("Invalid lighthouse.local_allow_list", nil, err)
  195. }
  196. lh.localAllowList.Store(lal)
  197. if !initial {
  198. //TODO: a diff will be annoyingly difficult
  199. lh.l.Info("lighthouse.local_allow_list has changed")
  200. }
  201. }
  202. if initial || c.HasChanged("lighthouse.calculated_remotes") {
  203. cr, err := NewCalculatedRemotesFromConfig(c, "lighthouse.calculated_remotes")
  204. if err != nil {
  205. return util.NewContextualError("Invalid lighthouse.calculated_remotes", nil, err)
  206. }
  207. lh.calculatedRemotes.Store(cr)
  208. if !initial {
  209. //TODO: a diff will be annoyingly difficult
  210. lh.l.Info("lighthouse.calculated_remotes has changed")
  211. }
  212. }
  213. //NOTE: many things will get much simpler when we combine static_host_map and lighthouse.hosts in config
  214. if initial || c.HasChanged("static_host_map") || c.HasChanged("static_map.cadence") || c.HasChanged("static_map.network") || c.HasChanged("static_map.lookup_timeout") {
  215. // Clean up. Entries still in the static_host_map will be re-built.
  216. // Entries no longer present must have their (possible) background DNS goroutines stopped.
  217. if existingStaticList := lh.staticList.Load(); existingStaticList != nil {
  218. lh.RLock()
  219. for staticVpnIp := range *existingStaticList {
  220. if am, ok := lh.addrMap[staticVpnIp]; ok && am != nil {
  221. am.hr.Cancel()
  222. }
  223. }
  224. lh.RUnlock()
  225. }
  226. // Build a new list based on current config.
  227. staticList := make(map[iputil.VpnIp]struct{})
  228. err := lh.loadStaticMap(c, lh.myVpnNet, staticList)
  229. if err != nil {
  230. return err
  231. }
  232. lh.staticList.Store(&staticList)
  233. if !initial {
  234. //TODO: we should remove any remote list entries for static hosts that were removed/modified?
  235. if c.HasChanged("static_host_map") {
  236. lh.l.Info("static_host_map has changed")
  237. }
  238. if c.HasChanged("static_map.cadence") {
  239. lh.l.Info("static_map.cadence has changed")
  240. }
  241. if c.HasChanged("static_map.network") {
  242. lh.l.Info("static_map.network has changed")
  243. }
  244. if c.HasChanged("static_map.lookup_timeout") {
  245. lh.l.Info("static_map.lookup_timeout has changed")
  246. }
  247. }
  248. }
  249. if initial || c.HasChanged("lighthouse.hosts") {
  250. lhMap := make(map[iputil.VpnIp]struct{})
  251. err := lh.parseLighthouses(c, lh.myVpnNet, lhMap)
  252. if err != nil {
  253. return err
  254. }
  255. lh.lighthouses.Store(&lhMap)
  256. if !initial {
  257. //NOTE: we are not tearing down existing lighthouse connections because they might be used for non lighthouse traffic
  258. lh.l.Info("lighthouse.hosts has changed")
  259. }
  260. }
  261. if initial || c.HasChanged("relay.relays") {
  262. switch c.GetBool("relay.am_relay", false) {
  263. case true:
  264. // Relays aren't allowed to specify other relays
  265. if len(c.GetStringSlice("relay.relays", nil)) > 0 {
  266. lh.l.Info("Ignoring relays from config because am_relay is true")
  267. }
  268. relaysForMe := []iputil.VpnIp{}
  269. lh.relaysForMe.Store(&relaysForMe)
  270. case false:
  271. relaysForMe := []iputil.VpnIp{}
  272. for _, v := range c.GetStringSlice("relay.relays", nil) {
  273. lh.l.WithField("relay", v).Info("Read relay from config")
  274. configRIP := net.ParseIP(v)
  275. if configRIP != nil {
  276. relaysForMe = append(relaysForMe, iputil.Ip2VpnIp(configRIP))
  277. }
  278. }
  279. lh.relaysForMe.Store(&relaysForMe)
  280. }
  281. }
  282. return nil
  283. }
  284. func (lh *LightHouse) parseLighthouses(c *config.C, tunCidr *net.IPNet, lhMap map[iputil.VpnIp]struct{}) error {
  285. lhs := c.GetStringSlice("lighthouse.hosts", []string{})
  286. if lh.amLighthouse && len(lhs) != 0 {
  287. lh.l.Warn("lighthouse.am_lighthouse enabled on node but upstream lighthouses exist in config")
  288. }
  289. for i, host := range lhs {
  290. ip := net.ParseIP(host)
  291. if ip == nil {
  292. return util.NewContextualError("Unable to parse lighthouse host entry", m{"host": host, "entry": i + 1}, nil)
  293. }
  294. if !tunCidr.Contains(ip) {
  295. return util.NewContextualError("lighthouse host is not in our subnet, invalid", m{"vpnIp": ip, "network": tunCidr.String()}, nil)
  296. }
  297. lhMap[iputil.Ip2VpnIp(ip)] = struct{}{}
  298. }
  299. if !lh.amLighthouse && len(lhMap) == 0 {
  300. lh.l.Warn("No lighthouse.hosts configured, this host will only be able to initiate tunnels with static_host_map entries")
  301. }
  302. staticList := lh.GetStaticHostList()
  303. for lhIP, _ := range lhMap {
  304. if _, ok := staticList[lhIP]; !ok {
  305. return fmt.Errorf("lighthouse %s does not have a static_host_map entry", lhIP)
  306. }
  307. }
  308. return nil
  309. }
  310. func getStaticMapCadence(c *config.C) (time.Duration, error) {
  311. cadence := c.GetString("static_map.cadence", "30s")
  312. d, err := time.ParseDuration(cadence)
  313. if err != nil {
  314. return 0, err
  315. }
  316. return d, nil
  317. }
  318. func getStaticMapLookupTimeout(c *config.C) (time.Duration, error) {
  319. lookupTimeout := c.GetString("static_map.lookup_timeout", "250ms")
  320. d, err := time.ParseDuration(lookupTimeout)
  321. if err != nil {
  322. return 0, err
  323. }
  324. return d, nil
  325. }
  326. func getStaticMapNetwork(c *config.C) (string, error) {
  327. network := c.GetString("static_map.network", "ip4")
  328. if network != "ip" && network != "ip4" && network != "ip6" {
  329. return "", fmt.Errorf("static_map.network must be one of ip, ip4, or ip6")
  330. }
  331. return network, nil
  332. }
  333. func (lh *LightHouse) loadStaticMap(c *config.C, tunCidr *net.IPNet, staticList map[iputil.VpnIp]struct{}) error {
  334. d, err := getStaticMapCadence(c)
  335. if err != nil {
  336. return err
  337. }
  338. network, err := getStaticMapNetwork(c)
  339. if err != nil {
  340. return err
  341. }
  342. lookup_timeout, err := getStaticMapLookupTimeout(c)
  343. if err != nil {
  344. return err
  345. }
  346. shm := c.GetMap("static_host_map", map[interface{}]interface{}{})
  347. i := 0
  348. for k, v := range shm {
  349. rip := net.ParseIP(fmt.Sprintf("%v", k))
  350. if rip == nil {
  351. return util.NewContextualError("Unable to parse static_host_map entry", m{"host": k, "entry": i + 1}, nil)
  352. }
  353. if !tunCidr.Contains(rip) {
  354. return util.NewContextualError("static_host_map key is not in our subnet, invalid", m{"vpnIp": rip, "network": tunCidr.String(), "entry": i + 1}, nil)
  355. }
  356. vpnIp := iputil.Ip2VpnIp(rip)
  357. vals, ok := v.([]interface{})
  358. if !ok {
  359. vals = []interface{}{v}
  360. }
  361. remoteAddrs := []string{}
  362. for _, v := range vals {
  363. remoteAddrs = append(remoteAddrs, fmt.Sprintf("%v", v))
  364. }
  365. err := lh.addStaticRemotes(i, d, network, lookup_timeout, vpnIp, remoteAddrs, staticList)
  366. if err != nil {
  367. return err
  368. }
  369. i++
  370. }
  371. return nil
  372. }
  373. func (lh *LightHouse) Query(ip iputil.VpnIp, f EncWriter) *RemoteList {
  374. if !lh.IsLighthouseIP(ip) {
  375. lh.QueryServer(ip, f)
  376. }
  377. lh.RLock()
  378. if v, ok := lh.addrMap[ip]; ok {
  379. lh.RUnlock()
  380. return v
  381. }
  382. lh.RUnlock()
  383. return nil
  384. }
  385. // This is asynchronous so no reply should be expected
  386. func (lh *LightHouse) QueryServer(ip iputil.VpnIp, f EncWriter) {
  387. if lh.amLighthouse {
  388. return
  389. }
  390. if lh.IsLighthouseIP(ip) {
  391. return
  392. }
  393. // Send a query to the lighthouses and hope for the best next time
  394. query, err := NewLhQueryByInt(ip).Marshal()
  395. if err != nil {
  396. lh.l.WithError(err).WithField("vpnIp", ip).Error("Failed to marshal lighthouse query payload")
  397. return
  398. }
  399. lighthouses := lh.GetLighthouses()
  400. lh.metricTx(NebulaMeta_HostQuery, int64(len(lighthouses)))
  401. nb := make([]byte, 12, 12)
  402. out := make([]byte, mtu)
  403. for n := range lighthouses {
  404. f.SendMessageToVpnIp(header.LightHouse, 0, n, query, nb, out)
  405. }
  406. }
  407. func (lh *LightHouse) QueryCache(ip iputil.VpnIp) *RemoteList {
  408. lh.RLock()
  409. if v, ok := lh.addrMap[ip]; ok {
  410. lh.RUnlock()
  411. return v
  412. }
  413. lh.RUnlock()
  414. lh.Lock()
  415. defer lh.Unlock()
  416. // Add an entry if we don't already have one
  417. return lh.unlockedGetRemoteList(ip)
  418. }
  419. // queryAndPrepMessage is a lock helper on RemoteList, assisting the caller to build a lighthouse message containing
  420. // details from the remote list. It looks for a hit in the addrMap and a hit in the RemoteList under the owner vpnIp
  421. // If one is found then f() is called with proper locking, f() must return result of n.MarshalTo()
  422. func (lh *LightHouse) queryAndPrepMessage(vpnIp iputil.VpnIp, f func(*cache) (int, error)) (bool, int, error) {
  423. lh.RLock()
  424. // Do we have an entry in the main cache?
  425. if v, ok := lh.addrMap[vpnIp]; ok {
  426. // Swap lh lock for remote list lock
  427. v.RLock()
  428. defer v.RUnlock()
  429. lh.RUnlock()
  430. // vpnIp should also be the owner here since we are a lighthouse.
  431. c := v.cache[vpnIp]
  432. // Make sure we have
  433. if c != nil {
  434. n, err := f(c)
  435. return true, n, err
  436. }
  437. return false, 0, nil
  438. }
  439. lh.RUnlock()
  440. return false, 0, nil
  441. }
  442. func (lh *LightHouse) DeleteVpnIp(vpnIp iputil.VpnIp) {
  443. // First we check the static mapping
  444. // and do nothing if it is there
  445. if _, ok := lh.GetStaticHostList()[vpnIp]; ok {
  446. return
  447. }
  448. lh.Lock()
  449. //l.Debugln(lh.addrMap)
  450. delete(lh.addrMap, vpnIp)
  451. if lh.l.Level >= logrus.DebugLevel {
  452. lh.l.Debugf("deleting %s from lighthouse.", vpnIp)
  453. }
  454. lh.Unlock()
  455. }
  456. // AddStaticRemote adds a static host entry for vpnIp as ourselves as the owner
  457. // We are the owner because we don't want a lighthouse server to advertise for static hosts it was configured with
  458. // And we don't want a lighthouse query reply to interfere with our learned cache if we are a client
  459. // NOTE: this function should not interact with any hot path objects, like lh.staticList, the caller should handle it
  460. func (lh *LightHouse) addStaticRemotes(i int, d time.Duration, network string, timeout time.Duration, vpnIp iputil.VpnIp, toAddrs []string, staticList map[iputil.VpnIp]struct{}) error {
  461. lh.Lock()
  462. am := lh.unlockedGetRemoteList(vpnIp)
  463. am.Lock()
  464. defer am.Unlock()
  465. ctx := lh.ctx
  466. lh.Unlock()
  467. hr, err := NewHostnameResults(ctx, lh.l, d, network, timeout, toAddrs, func() {
  468. // This callback runs whenever the DNS hostname resolver finds a different set of IP's
  469. // in its resolution for hostnames.
  470. am.Lock()
  471. defer am.Unlock()
  472. am.shouldRebuild = true
  473. })
  474. if err != nil {
  475. return util.NewContextualError("Static host address could not be parsed", m{"vpnIp": vpnIp, "entry": i + 1}, err)
  476. }
  477. am.unlockedSetHostnamesResults(hr)
  478. for _, addrPort := range hr.GetIPs() {
  479. switch {
  480. case addrPort.Addr().Is4():
  481. to := NewIp4AndPortFromNetIP(addrPort.Addr(), addrPort.Port())
  482. if !lh.unlockedShouldAddV4(vpnIp, to) {
  483. continue
  484. }
  485. am.unlockedPrependV4(lh.myVpnIp, to)
  486. case addrPort.Addr().Is6():
  487. to := NewIp6AndPortFromNetIP(addrPort.Addr(), addrPort.Port())
  488. if !lh.unlockedShouldAddV6(vpnIp, to) {
  489. continue
  490. }
  491. am.unlockedPrependV6(lh.myVpnIp, to)
  492. }
  493. }
  494. // Mark it as static in the caller provided map
  495. staticList[vpnIp] = struct{}{}
  496. return nil
  497. }
  498. // addCalculatedRemotes adds any calculated remotes based on the
  499. // lighthouse.calculated_remotes configuration. It returns true if any
  500. // calculated remotes were added
  501. func (lh *LightHouse) addCalculatedRemotes(vpnIp iputil.VpnIp) bool {
  502. tree := lh.getCalculatedRemotes()
  503. if tree == nil {
  504. return false
  505. }
  506. ok, calculatedRemotes := tree.MostSpecificContains(vpnIp)
  507. if !ok {
  508. return false
  509. }
  510. var calculated []*Ip4AndPort
  511. for _, cr := range calculatedRemotes {
  512. c := cr.Apply(vpnIp)
  513. if c != nil {
  514. calculated = append(calculated, c)
  515. }
  516. }
  517. lh.Lock()
  518. am := lh.unlockedGetRemoteList(vpnIp)
  519. am.Lock()
  520. defer am.Unlock()
  521. lh.Unlock()
  522. am.unlockedSetV4(lh.myVpnIp, vpnIp, calculated, lh.unlockedShouldAddV4)
  523. return len(calculated) > 0
  524. }
  525. // unlockedGetRemoteList assumes you have the lh lock
  526. func (lh *LightHouse) unlockedGetRemoteList(vpnIp iputil.VpnIp) *RemoteList {
  527. am, ok := lh.addrMap[vpnIp]
  528. if !ok {
  529. am = NewRemoteList(func(a netip.Addr) bool { return lh.shouldAdd(vpnIp, a) })
  530. lh.addrMap[vpnIp] = am
  531. }
  532. return am
  533. }
  534. func (lh *LightHouse) shouldAdd(vpnIp iputil.VpnIp, to netip.Addr) bool {
  535. switch {
  536. case to.Is4():
  537. ipBytes := to.As4()
  538. ip := iputil.Ip2VpnIp(ipBytes[:])
  539. allow := lh.GetRemoteAllowList().AllowIpV4(vpnIp, ip)
  540. if lh.l.Level >= logrus.TraceLevel {
  541. lh.l.WithField("remoteIp", vpnIp).WithField("allow", allow).Trace("remoteAllowList.Allow")
  542. }
  543. if !allow || ipMaskContains(lh.myVpnIp, lh.myVpnZeros, ip) {
  544. return false
  545. }
  546. case to.Is6():
  547. ipBytes := to.As16()
  548. hi := binary.BigEndian.Uint64(ipBytes[:8])
  549. lo := binary.BigEndian.Uint64(ipBytes[8:])
  550. allow := lh.GetRemoteAllowList().AllowIpV6(vpnIp, hi, lo)
  551. if lh.l.Level >= logrus.TraceLevel {
  552. lh.l.WithField("remoteIp", to).WithField("allow", allow).Trace("remoteAllowList.Allow")
  553. }
  554. // We don't check our vpn network here because nebula does not support ipv6 on the inside
  555. if !allow {
  556. return false
  557. }
  558. }
  559. return true
  560. }
  561. // unlockedShouldAddV4 checks if to is allowed by our allow list
  562. func (lh *LightHouse) unlockedShouldAddV4(vpnIp iputil.VpnIp, to *Ip4AndPort) bool {
  563. allow := lh.GetRemoteAllowList().AllowIpV4(vpnIp, iputil.VpnIp(to.Ip))
  564. if lh.l.Level >= logrus.TraceLevel {
  565. lh.l.WithField("remoteIp", vpnIp).WithField("allow", allow).Trace("remoteAllowList.Allow")
  566. }
  567. if !allow || ipMaskContains(lh.myVpnIp, lh.myVpnZeros, iputil.VpnIp(to.Ip)) {
  568. return false
  569. }
  570. return true
  571. }
  572. // unlockedShouldAddV6 checks if to is allowed by our allow list
  573. func (lh *LightHouse) unlockedShouldAddV6(vpnIp iputil.VpnIp, to *Ip6AndPort) bool {
  574. allow := lh.GetRemoteAllowList().AllowIpV6(vpnIp, to.Hi, to.Lo)
  575. if lh.l.Level >= logrus.TraceLevel {
  576. lh.l.WithField("remoteIp", lhIp6ToIp(to)).WithField("allow", allow).Trace("remoteAllowList.Allow")
  577. }
  578. // We don't check our vpn network here because nebula does not support ipv6 on the inside
  579. if !allow {
  580. return false
  581. }
  582. return true
  583. }
  584. func lhIp6ToIp(v *Ip6AndPort) net.IP {
  585. ip := make(net.IP, 16)
  586. binary.BigEndian.PutUint64(ip[:8], v.Hi)
  587. binary.BigEndian.PutUint64(ip[8:], v.Lo)
  588. return ip
  589. }
  590. func (lh *LightHouse) IsLighthouseIP(vpnIp iputil.VpnIp) bool {
  591. if _, ok := lh.GetLighthouses()[vpnIp]; ok {
  592. return true
  593. }
  594. return false
  595. }
  596. func NewLhQueryByInt(VpnIp iputil.VpnIp) *NebulaMeta {
  597. return &NebulaMeta{
  598. Type: NebulaMeta_HostQuery,
  599. Details: &NebulaMetaDetails{
  600. VpnIp: uint32(VpnIp),
  601. },
  602. }
  603. }
  604. func NewIp4AndPort(ip net.IP, port uint32) *Ip4AndPort {
  605. ipp := Ip4AndPort{Port: port}
  606. ipp.Ip = uint32(iputil.Ip2VpnIp(ip))
  607. return &ipp
  608. }
  609. func NewIp4AndPortFromNetIP(ip netip.Addr, port uint16) *Ip4AndPort {
  610. v4Addr := ip.As4()
  611. return &Ip4AndPort{
  612. Ip: binary.BigEndian.Uint32(v4Addr[:]),
  613. Port: uint32(port),
  614. }
  615. }
  616. func NewIp6AndPort(ip net.IP, port uint32) *Ip6AndPort {
  617. return &Ip6AndPort{
  618. Hi: binary.BigEndian.Uint64(ip[:8]),
  619. Lo: binary.BigEndian.Uint64(ip[8:]),
  620. Port: port,
  621. }
  622. }
  623. func NewIp6AndPortFromNetIP(ip netip.Addr, port uint16) *Ip6AndPort {
  624. ip6Addr := ip.As16()
  625. return &Ip6AndPort{
  626. Hi: binary.BigEndian.Uint64(ip6Addr[:8]),
  627. Lo: binary.BigEndian.Uint64(ip6Addr[8:]),
  628. Port: uint32(port),
  629. }
  630. }
  631. func NewUDPAddrFromLH4(ipp *Ip4AndPort) *udp.Addr {
  632. ip := ipp.Ip
  633. return udp.NewAddr(
  634. net.IPv4(byte(ip&0xff000000>>24), byte(ip&0x00ff0000>>16), byte(ip&0x0000ff00>>8), byte(ip&0x000000ff)),
  635. uint16(ipp.Port),
  636. )
  637. }
  638. func NewUDPAddrFromLH6(ipp *Ip6AndPort) *udp.Addr {
  639. return udp.NewAddr(lhIp6ToIp(ipp), uint16(ipp.Port))
  640. }
  641. func (lh *LightHouse) StartUpdateWorker() {
  642. interval := lh.GetUpdateInterval()
  643. if lh.amLighthouse || interval == 0 {
  644. return
  645. }
  646. clockSource := time.NewTicker(time.Second * time.Duration(interval))
  647. updateCtx, cancel := context.WithCancel(lh.ctx)
  648. lh.updateCancel = cancel
  649. go func() {
  650. defer clockSource.Stop()
  651. for {
  652. lh.SendUpdate()
  653. select {
  654. case <-updateCtx.Done():
  655. return
  656. case <-clockSource.C:
  657. continue
  658. }
  659. }
  660. }()
  661. }
  662. func (lh *LightHouse) SendUpdate() {
  663. var v4 []*Ip4AndPort
  664. var v6 []*Ip6AndPort
  665. for _, e := range lh.GetAdvertiseAddrs() {
  666. if ip := e.ip.To4(); ip != nil {
  667. v4 = append(v4, NewIp4AndPort(e.ip, uint32(e.port)))
  668. } else {
  669. v6 = append(v6, NewIp6AndPort(e.ip, uint32(e.port)))
  670. }
  671. }
  672. lal := lh.GetLocalAllowList()
  673. for _, e := range *localIps(lh.l, lal) {
  674. if ip4 := e.To4(); ip4 != nil && ipMaskContains(lh.myVpnIp, lh.myVpnZeros, iputil.Ip2VpnIp(ip4)) {
  675. continue
  676. }
  677. // Only add IPs that aren't my VPN/tun IP
  678. if ip := e.To4(); ip != nil {
  679. v4 = append(v4, NewIp4AndPort(e, lh.nebulaPort))
  680. } else {
  681. v6 = append(v6, NewIp6AndPort(e, lh.nebulaPort))
  682. }
  683. }
  684. var relays []uint32
  685. for _, r := range lh.GetRelaysForMe() {
  686. relays = append(relays, (uint32)(r))
  687. }
  688. m := &NebulaMeta{
  689. Type: NebulaMeta_HostUpdateNotification,
  690. Details: &NebulaMetaDetails{
  691. VpnIp: uint32(lh.myVpnIp),
  692. Ip4AndPorts: v4,
  693. Ip6AndPorts: v6,
  694. RelayVpnIp: relays,
  695. },
  696. }
  697. lighthouses := lh.GetLighthouses()
  698. lh.metricTx(NebulaMeta_HostUpdateNotification, int64(len(lighthouses)))
  699. nb := make([]byte, 12, 12)
  700. out := make([]byte, mtu)
  701. mm, err := m.Marshal()
  702. if err != nil {
  703. lh.l.WithError(err).Error("Error while marshaling for lighthouse update")
  704. return
  705. }
  706. for vpnIp := range lighthouses {
  707. lh.ifce.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, mm, nb, out)
  708. }
  709. }
  710. type LightHouseHandler struct {
  711. lh *LightHouse
  712. nb []byte
  713. out []byte
  714. pb []byte
  715. meta *NebulaMeta
  716. l *logrus.Logger
  717. }
  718. func (lh *LightHouse) NewRequestHandler() *LightHouseHandler {
  719. lhh := &LightHouseHandler{
  720. lh: lh,
  721. nb: make([]byte, 12, 12),
  722. out: make([]byte, mtu),
  723. l: lh.l,
  724. pb: make([]byte, mtu),
  725. meta: &NebulaMeta{
  726. Details: &NebulaMetaDetails{},
  727. },
  728. }
  729. return lhh
  730. }
  731. func (lh *LightHouse) metricRx(t NebulaMeta_MessageType, i int64) {
  732. lh.metrics.Rx(header.MessageType(t), 0, i)
  733. }
  734. func (lh *LightHouse) metricTx(t NebulaMeta_MessageType, i int64) {
  735. lh.metrics.Tx(header.MessageType(t), 0, i)
  736. }
  737. // This method is similar to Reset(), but it re-uses the pointer structs
  738. // so that we don't have to re-allocate them
  739. func (lhh *LightHouseHandler) resetMeta() *NebulaMeta {
  740. details := lhh.meta.Details
  741. lhh.meta.Reset()
  742. // Keep the array memory around
  743. details.Ip4AndPorts = details.Ip4AndPorts[:0]
  744. details.Ip6AndPorts = details.Ip6AndPorts[:0]
  745. details.RelayVpnIp = details.RelayVpnIp[:0]
  746. lhh.meta.Details = details
  747. return lhh.meta
  748. }
  749. func lhHandleRequest(lhh *LightHouseHandler, f *Interface) udp.LightHouseHandlerFunc {
  750. return func(rAddr *udp.Addr, vpnIp iputil.VpnIp, p []byte) {
  751. lhh.HandleRequest(rAddr, vpnIp, p, f)
  752. }
  753. }
  754. func (lhh *LightHouseHandler) HandleRequest(rAddr *udp.Addr, vpnIp iputil.VpnIp, p []byte, w EncWriter) {
  755. n := lhh.resetMeta()
  756. err := n.Unmarshal(p)
  757. if err != nil {
  758. lhh.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", rAddr).
  759. Error("Failed to unmarshal lighthouse packet")
  760. //TODO: send recv_error?
  761. return
  762. }
  763. if n.Details == nil {
  764. lhh.l.WithField("vpnIp", vpnIp).WithField("udpAddr", rAddr).
  765. Error("Invalid lighthouse update")
  766. //TODO: send recv_error?
  767. return
  768. }
  769. lhh.lh.metricRx(n.Type, 1)
  770. switch n.Type {
  771. case NebulaMeta_HostQuery:
  772. lhh.handleHostQuery(n, vpnIp, rAddr, w)
  773. case NebulaMeta_HostQueryReply:
  774. lhh.handleHostQueryReply(n, vpnIp)
  775. case NebulaMeta_HostUpdateNotification:
  776. lhh.handleHostUpdateNotification(n, vpnIp, w)
  777. case NebulaMeta_HostMovedNotification:
  778. case NebulaMeta_HostPunchNotification:
  779. lhh.handleHostPunchNotification(n, vpnIp, w)
  780. case NebulaMeta_HostUpdateNotificationAck:
  781. // noop
  782. }
  783. }
  784. func (lhh *LightHouseHandler) handleHostQuery(n *NebulaMeta, vpnIp iputil.VpnIp, addr *udp.Addr, w EncWriter) {
  785. // Exit if we don't answer queries
  786. if !lhh.lh.amLighthouse {
  787. if lhh.l.Level >= logrus.DebugLevel {
  788. lhh.l.Debugln("I don't answer queries, but received from: ", addr)
  789. }
  790. return
  791. }
  792. //TODO: we can DRY this further
  793. reqVpnIp := n.Details.VpnIp
  794. //TODO: Maybe instead of marshalling into n we marshal into a new `r` to not nuke our current request data
  795. found, ln, err := lhh.lh.queryAndPrepMessage(iputil.VpnIp(n.Details.VpnIp), func(c *cache) (int, error) {
  796. n = lhh.resetMeta()
  797. n.Type = NebulaMeta_HostQueryReply
  798. n.Details.VpnIp = reqVpnIp
  799. lhh.coalesceAnswers(c, n)
  800. return n.MarshalTo(lhh.pb)
  801. })
  802. if !found {
  803. return
  804. }
  805. if err != nil {
  806. lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host query reply")
  807. return
  808. }
  809. lhh.lh.metricTx(NebulaMeta_HostQueryReply, 1)
  810. w.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, lhh.pb[:ln], lhh.nb, lhh.out[:0])
  811. // This signals the other side to punch some zero byte udp packets
  812. found, ln, err = lhh.lh.queryAndPrepMessage(vpnIp, func(c *cache) (int, error) {
  813. n = lhh.resetMeta()
  814. n.Type = NebulaMeta_HostPunchNotification
  815. n.Details.VpnIp = uint32(vpnIp)
  816. lhh.coalesceAnswers(c, n)
  817. return n.MarshalTo(lhh.pb)
  818. })
  819. if !found {
  820. return
  821. }
  822. if err != nil {
  823. lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host was queried for")
  824. return
  825. }
  826. lhh.lh.metricTx(NebulaMeta_HostPunchNotification, 1)
  827. w.SendMessageToVpnIp(header.LightHouse, 0, iputil.VpnIp(reqVpnIp), lhh.pb[:ln], lhh.nb, lhh.out[:0])
  828. }
  829. func (lhh *LightHouseHandler) coalesceAnswers(c *cache, n *NebulaMeta) {
  830. if c.v4 != nil {
  831. if c.v4.learned != nil {
  832. n.Details.Ip4AndPorts = append(n.Details.Ip4AndPorts, c.v4.learned)
  833. }
  834. if c.v4.reported != nil && len(c.v4.reported) > 0 {
  835. n.Details.Ip4AndPorts = append(n.Details.Ip4AndPorts, c.v4.reported...)
  836. }
  837. }
  838. if c.v6 != nil {
  839. if c.v6.learned != nil {
  840. n.Details.Ip6AndPorts = append(n.Details.Ip6AndPorts, c.v6.learned)
  841. }
  842. if c.v6.reported != nil && len(c.v6.reported) > 0 {
  843. n.Details.Ip6AndPorts = append(n.Details.Ip6AndPorts, c.v6.reported...)
  844. }
  845. }
  846. if c.relay != nil {
  847. n.Details.RelayVpnIp = append(n.Details.RelayVpnIp, c.relay.relay...)
  848. }
  849. }
  850. func (lhh *LightHouseHandler) handleHostQueryReply(n *NebulaMeta, vpnIp iputil.VpnIp) {
  851. if !lhh.lh.IsLighthouseIP(vpnIp) {
  852. return
  853. }
  854. lhh.lh.Lock()
  855. am := lhh.lh.unlockedGetRemoteList(iputil.VpnIp(n.Details.VpnIp))
  856. am.Lock()
  857. lhh.lh.Unlock()
  858. certVpnIp := iputil.VpnIp(n.Details.VpnIp)
  859. am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4)
  860. am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6)
  861. am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp)
  862. am.Unlock()
  863. // Non-blocking attempt to trigger, skip if it would block
  864. select {
  865. case lhh.lh.handshakeTrigger <- iputil.VpnIp(n.Details.VpnIp):
  866. default:
  867. }
  868. }
  869. func (lhh *LightHouseHandler) handleHostUpdateNotification(n *NebulaMeta, vpnIp iputil.VpnIp, w EncWriter) {
  870. if !lhh.lh.amLighthouse {
  871. if lhh.l.Level >= logrus.DebugLevel {
  872. lhh.l.Debugln("I am not a lighthouse, do not take host updates: ", vpnIp)
  873. }
  874. return
  875. }
  876. //Simple check that the host sent this not someone else
  877. if n.Details.VpnIp != uint32(vpnIp) {
  878. if lhh.l.Level >= logrus.DebugLevel {
  879. lhh.l.WithField("vpnIp", vpnIp).WithField("answer", iputil.VpnIp(n.Details.VpnIp)).Debugln("Host sent invalid update")
  880. }
  881. return
  882. }
  883. lhh.lh.Lock()
  884. am := lhh.lh.unlockedGetRemoteList(vpnIp)
  885. am.Lock()
  886. lhh.lh.Unlock()
  887. certVpnIp := iputil.VpnIp(n.Details.VpnIp)
  888. am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4)
  889. am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6)
  890. am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp)
  891. am.Unlock()
  892. n = lhh.resetMeta()
  893. n.Type = NebulaMeta_HostUpdateNotificationAck
  894. n.Details.VpnIp = uint32(vpnIp)
  895. ln, err := n.MarshalTo(lhh.pb)
  896. if err != nil {
  897. lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host update ack")
  898. return
  899. }
  900. lhh.lh.metricTx(NebulaMeta_HostUpdateNotificationAck, 1)
  901. w.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, lhh.pb[:ln], lhh.nb, lhh.out[:0])
  902. }
  903. func (lhh *LightHouseHandler) handleHostPunchNotification(n *NebulaMeta, vpnIp iputil.VpnIp, w EncWriter) {
  904. if !lhh.lh.IsLighthouseIP(vpnIp) {
  905. return
  906. }
  907. empty := []byte{0}
  908. punch := func(vpnPeer *udp.Addr) {
  909. if vpnPeer == nil {
  910. return
  911. }
  912. go func() {
  913. time.Sleep(lhh.lh.punchy.GetDelay())
  914. lhh.lh.metricHolepunchTx.Inc(1)
  915. lhh.lh.punchConn.WriteTo(empty, vpnPeer)
  916. }()
  917. if lhh.l.Level >= logrus.DebugLevel {
  918. //TODO: lacking the ip we are actually punching on, old: l.Debugf("Punching %s on %d for %s", IntIp(a.Ip), a.Port, IntIp(n.Details.VpnIp))
  919. lhh.l.Debugf("Punching on %d for %s", vpnPeer.Port, iputil.VpnIp(n.Details.VpnIp))
  920. }
  921. }
  922. for _, a := range n.Details.Ip4AndPorts {
  923. punch(NewUDPAddrFromLH4(a))
  924. }
  925. for _, a := range n.Details.Ip6AndPorts {
  926. punch(NewUDPAddrFromLH6(a))
  927. }
  928. // This sends a nebula test packet to the host trying to contact us. In the case
  929. // of a double nat or other difficult scenario, this may help establish
  930. // a tunnel.
  931. if lhh.lh.punchy.GetRespond() {
  932. queryVpnIp := iputil.VpnIp(n.Details.VpnIp)
  933. go func() {
  934. time.Sleep(lhh.lh.punchy.GetRespondDelay())
  935. if lhh.l.Level >= logrus.DebugLevel {
  936. lhh.l.Debugf("Sending a nebula test packet to vpn ip %s", queryVpnIp)
  937. }
  938. //NOTE: we have to allocate a new output buffer here since we are spawning a new goroutine
  939. // for each punchBack packet. We should move this into a timerwheel or a single goroutine
  940. // managed by a channel.
  941. w.SendMessageToVpnIp(header.Test, header.TestRequest, queryVpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  942. }()
  943. }
  944. }
  945. // ipMaskContains checks if testIp is contained by ip after applying a cidr
  946. // zeros is 32 - bits from net.IPMask.Size()
  947. func ipMaskContains(ip iputil.VpnIp, zeros iputil.VpnIp, testIp iputil.VpnIp) bool {
  948. return (testIp^ip)>>zeros == 0
  949. }