interface.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. package nebula
  2. import (
  3. "context"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net"
  9. "net/netip"
  10. "os"
  11. "runtime"
  12. "sync/atomic"
  13. "time"
  14. "github.com/rcrowley/go-metrics"
  15. "github.com/sirupsen/logrus"
  16. "github.com/slackhq/nebula/config"
  17. "github.com/slackhq/nebula/firewall"
  18. "github.com/slackhq/nebula/header"
  19. "github.com/slackhq/nebula/overlay"
  20. "github.com/slackhq/nebula/udp"
  21. )
  22. const mtu = 9001
  23. type InterfaceConfig struct {
  24. HostMap *HostMap
  25. Outside udp.Conn
  26. Inside overlay.Device
  27. pki *PKI
  28. Cipher string
  29. Firewall *Firewall
  30. ServeDns bool
  31. HandshakeManager *HandshakeManager
  32. lightHouse *LightHouse
  33. checkInterval time.Duration
  34. pendingDeletionInterval time.Duration
  35. DropLocalBroadcast bool
  36. DropMulticast bool
  37. routines int
  38. MessageMetrics *MessageMetrics
  39. version string
  40. relayManager *relayManager
  41. punchy *Punchy
  42. tryPromoteEvery uint32
  43. reQueryEvery uint32
  44. reQueryWait time.Duration
  45. ConntrackCacheTimeout time.Duration
  46. l *logrus.Logger
  47. }
  48. type Interface struct {
  49. hostMap *HostMap
  50. outside udp.Conn
  51. inside overlay.Device
  52. pki *PKI
  53. cipher string
  54. firewall *Firewall
  55. connectionManager *connectionManager
  56. handshakeManager *HandshakeManager
  57. serveDns bool
  58. createTime time.Time
  59. lightHouse *LightHouse
  60. myBroadcastAddr netip.Addr
  61. myVpnNet netip.Prefix
  62. dropLocalBroadcast bool
  63. dropMulticast bool
  64. routines int
  65. disconnectInvalid atomic.Bool
  66. closed atomic.Bool
  67. relayManager *relayManager
  68. tryPromoteEvery atomic.Uint32
  69. reQueryEvery atomic.Uint32
  70. reQueryWait atomic.Int64
  71. sendRecvErrorConfig sendRecvErrorConfig
  72. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  73. rebindCount int8
  74. version string
  75. conntrackCacheTimeout time.Duration
  76. writers []udp.Conn
  77. readers []io.ReadWriteCloser
  78. metricHandshakes metrics.Histogram
  79. messageMetrics *MessageMetrics
  80. cachedPacketMetrics *cachedPacketMetrics
  81. l *logrus.Logger
  82. }
  83. type EncWriter interface {
  84. SendVia(via *HostInfo,
  85. relay *Relay,
  86. ad,
  87. nb,
  88. out []byte,
  89. nocopy bool,
  90. )
  91. SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp netip.Addr, p, nb, out []byte)
  92. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  93. Handshake(vpnIp netip.Addr)
  94. }
  95. type sendRecvErrorConfig uint8
  96. const (
  97. sendRecvErrorAlways sendRecvErrorConfig = iota
  98. sendRecvErrorNever
  99. sendRecvErrorPrivate
  100. )
  101. func (s sendRecvErrorConfig) ShouldSendRecvError(ip netip.AddrPort) bool {
  102. switch s {
  103. case sendRecvErrorPrivate:
  104. return ip.Addr().IsPrivate()
  105. case sendRecvErrorAlways:
  106. return true
  107. case sendRecvErrorNever:
  108. return false
  109. default:
  110. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  111. }
  112. }
  113. func (s sendRecvErrorConfig) String() string {
  114. switch s {
  115. case sendRecvErrorAlways:
  116. return "always"
  117. case sendRecvErrorNever:
  118. return "never"
  119. case sendRecvErrorPrivate:
  120. return "private"
  121. default:
  122. return fmt.Sprintf("invalid(%d)", s)
  123. }
  124. }
  125. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  126. if c.Outside == nil {
  127. return nil, errors.New("no outside connection")
  128. }
  129. if c.Inside == nil {
  130. return nil, errors.New("no inside interface (tun)")
  131. }
  132. if c.pki == nil {
  133. return nil, errors.New("no certificate state")
  134. }
  135. if c.Firewall == nil {
  136. return nil, errors.New("no firewall rules")
  137. }
  138. certificate := c.pki.GetCertState().Certificate
  139. ifce := &Interface{
  140. pki: c.pki,
  141. hostMap: c.HostMap,
  142. outside: c.Outside,
  143. inside: c.Inside,
  144. cipher: c.Cipher,
  145. firewall: c.Firewall,
  146. serveDns: c.ServeDns,
  147. handshakeManager: c.HandshakeManager,
  148. createTime: time.Now(),
  149. lightHouse: c.lightHouse,
  150. dropLocalBroadcast: c.DropLocalBroadcast,
  151. dropMulticast: c.DropMulticast,
  152. routines: c.routines,
  153. version: c.version,
  154. writers: make([]udp.Conn, c.routines),
  155. readers: make([]io.ReadWriteCloser, c.routines),
  156. myVpnNet: certificate.Networks()[0],
  157. relayManager: c.relayManager,
  158. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  159. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  160. messageMetrics: c.MessageMetrics,
  161. cachedPacketMetrics: &cachedPacketMetrics{
  162. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  163. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  164. },
  165. l: c.l,
  166. }
  167. if ifce.myVpnNet.Addr().Is4() {
  168. maskedAddr := certificate.Networks()[0].Masked()
  169. addr := maskedAddr.Addr().As4()
  170. mask := net.CIDRMask(maskedAddr.Bits(), maskedAddr.Addr().BitLen())
  171. binary.BigEndian.PutUint32(addr[:], binary.BigEndian.Uint32(addr[:])|^binary.BigEndian.Uint32(mask))
  172. ifce.myBroadcastAddr = netip.AddrFrom4(addr)
  173. }
  174. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  175. ifce.reQueryEvery.Store(c.reQueryEvery)
  176. ifce.reQueryWait.Store(int64(c.reQueryWait))
  177. ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
  178. return ifce, nil
  179. }
  180. // activate creates the interface on the host. After the interface is created, any
  181. // other services that want to bind listeners to its IP may do so successfully. However,
  182. // the interface isn't going to process anything until run() is called.
  183. func (f *Interface) activate() {
  184. // actually turn on tun dev
  185. addr, err := f.outside.LocalAddr()
  186. if err != nil {
  187. f.l.WithError(err).Error("Failed to get udp listen address")
  188. }
  189. f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
  190. WithField("build", f.version).WithField("udpAddr", addr).
  191. WithField("boringcrypto", boringEnabled()).
  192. Info("Nebula interface is active")
  193. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  194. // Prepare n tun queues
  195. var reader io.ReadWriteCloser = f.inside
  196. for i := 0; i < f.routines; i++ {
  197. if i > 0 {
  198. reader, err = f.inside.NewMultiQueueReader()
  199. if err != nil {
  200. f.l.Fatal(err)
  201. }
  202. }
  203. f.readers[i] = reader
  204. }
  205. if err := f.inside.Activate(); err != nil {
  206. f.inside.Close()
  207. f.l.Fatal(err)
  208. }
  209. }
  210. func (f *Interface) run() {
  211. // Launch n queues to read packets from udp
  212. for i := 0; i < f.routines; i++ {
  213. go f.listenOut(i)
  214. }
  215. // Launch n queues to read packets from tun dev
  216. for i := 0; i < f.routines; i++ {
  217. go f.listenIn(f.readers[i], i)
  218. }
  219. }
  220. func (f *Interface) listenOut(i int) {
  221. runtime.LockOSThread()
  222. var li udp.Conn
  223. // TODO clean this up with a coherent interface for each outside connection
  224. if i > 0 {
  225. li = f.writers[i]
  226. } else {
  227. li = f.outside
  228. }
  229. lhh := f.lightHouse.NewRequestHandler()
  230. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  231. li.ListenOut(readOutsidePackets(f), lhHandleRequest(lhh, f), conntrackCache, i)
  232. }
  233. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  234. runtime.LockOSThread()
  235. packet := make([]byte, mtu)
  236. out := make([]byte, mtu)
  237. fwPacket := &firewall.Packet{}
  238. nb := make([]byte, 12, 12)
  239. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  240. for {
  241. n, err := reader.Read(packet)
  242. if err != nil {
  243. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  244. return
  245. }
  246. f.l.WithError(err).Error("Error while reading outbound packet")
  247. // This only seems to happen when something fatal happens to the fd, so exit.
  248. os.Exit(2)
  249. }
  250. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  251. }
  252. }
  253. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  254. c.RegisterReloadCallback(f.reloadFirewall)
  255. c.RegisterReloadCallback(f.reloadSendRecvError)
  256. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  257. c.RegisterReloadCallback(f.reloadMisc)
  258. for _, udpConn := range f.writers {
  259. c.RegisterReloadCallback(udpConn.ReloadConfig)
  260. }
  261. }
  262. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  263. initial := c.InitialLoad()
  264. if initial || c.HasChanged("pki.disconnect_invalid") {
  265. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  266. if !initial {
  267. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  268. }
  269. }
  270. }
  271. func (f *Interface) reloadFirewall(c *config.C) {
  272. //TODO: need to trigger/detect if the certificate changed too
  273. if c.HasChanged("firewall") == false {
  274. f.l.Debug("No firewall config change detected")
  275. return
  276. }
  277. fw, err := NewFirewallFromConfig(f.l, f.pki.GetCertState().Certificate, c)
  278. if err != nil {
  279. f.l.WithError(err).Error("Error while creating firewall during reload")
  280. return
  281. }
  282. oldFw := f.firewall
  283. conntrack := oldFw.Conntrack
  284. conntrack.Lock()
  285. defer conntrack.Unlock()
  286. fw.rulesVersion = oldFw.rulesVersion + 1
  287. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  288. // safe and just reset conntrack in this case.
  289. if fw.rulesVersion == 0 {
  290. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  291. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  292. WithField("rulesVersion", fw.rulesVersion).
  293. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  294. } else {
  295. fw.Conntrack = conntrack
  296. }
  297. f.firewall = fw
  298. oldFw.Destroy()
  299. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  300. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  301. WithField("rulesVersion", fw.rulesVersion).
  302. Info("New firewall has been installed")
  303. }
  304. func (f *Interface) reloadSendRecvError(c *config.C) {
  305. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  306. stringValue := c.GetString("listen.send_recv_error", "always")
  307. switch stringValue {
  308. case "always":
  309. f.sendRecvErrorConfig = sendRecvErrorAlways
  310. case "never":
  311. f.sendRecvErrorConfig = sendRecvErrorNever
  312. case "private":
  313. f.sendRecvErrorConfig = sendRecvErrorPrivate
  314. default:
  315. if c.GetBool("listen.send_recv_error", true) {
  316. f.sendRecvErrorConfig = sendRecvErrorAlways
  317. } else {
  318. f.sendRecvErrorConfig = sendRecvErrorNever
  319. }
  320. }
  321. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  322. Info("Loaded send_recv_error config")
  323. }
  324. }
  325. func (f *Interface) reloadMisc(c *config.C) {
  326. if c.HasChanged("counters.try_promote") {
  327. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  328. f.tryPromoteEvery.Store(n)
  329. f.l.Info("counters.try_promote has changed")
  330. }
  331. if c.HasChanged("counters.requery_every_packets") {
  332. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  333. f.reQueryEvery.Store(n)
  334. f.l.Info("counters.requery_every_packets has changed")
  335. }
  336. if c.HasChanged("timers.requery_wait_duration") {
  337. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  338. f.reQueryWait.Store(int64(n))
  339. f.l.Info("timers.requery_wait_duration has changed")
  340. }
  341. }
  342. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  343. ticker := time.NewTicker(i)
  344. defer ticker.Stop()
  345. udpStats := udp.NewUDPStatsEmitter(f.writers)
  346. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  347. for {
  348. select {
  349. case <-ctx.Done():
  350. return
  351. case <-ticker.C:
  352. f.firewall.EmitStats()
  353. f.handshakeManager.EmitStats()
  354. udpStats()
  355. certExpirationGauge.Update(int64(f.pki.GetCertState().Certificate.NotAfter().Sub(time.Now()) / time.Second))
  356. }
  357. }
  358. }
  359. func (f *Interface) Close() error {
  360. f.closed.Store(true)
  361. for _, u := range f.writers {
  362. err := u.Close()
  363. if err != nil {
  364. f.l.WithError(err).Error("Error while closing udp socket")
  365. }
  366. }
  367. // Release the tun device
  368. return f.inside.Close()
  369. }