2
0

interface.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net"
  8. "os"
  9. "runtime"
  10. "sync/atomic"
  11. "time"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/config"
  15. "github.com/slackhq/nebula/firewall"
  16. "github.com/slackhq/nebula/header"
  17. "github.com/slackhq/nebula/iputil"
  18. "github.com/slackhq/nebula/overlay"
  19. "github.com/slackhq/nebula/udp"
  20. )
  21. const mtu = 9001
  22. type InterfaceConfig struct {
  23. HostMap *HostMap
  24. Outside udp.Conn
  25. Inside overlay.Device
  26. pki *PKI
  27. Cipher string
  28. Firewall *Firewall
  29. ServeDns bool
  30. HandshakeManager *HandshakeManager
  31. lightHouse *LightHouse
  32. checkInterval time.Duration
  33. pendingDeletionInterval time.Duration
  34. DropLocalBroadcast bool
  35. DropMulticast bool
  36. routines int
  37. MessageMetrics *MessageMetrics
  38. version string
  39. relayManager *relayManager
  40. punchy *Punchy
  41. tryPromoteEvery uint32
  42. reQueryEvery uint32
  43. reQueryWait time.Duration
  44. ConntrackCacheTimeout time.Duration
  45. l *logrus.Logger
  46. }
  47. type Interface struct {
  48. hostMap *HostMap
  49. outside udp.Conn
  50. inside overlay.Device
  51. pki *PKI
  52. cipher string
  53. firewall *Firewall
  54. connectionManager *connectionManager
  55. handshakeManager *HandshakeManager
  56. serveDns bool
  57. createTime time.Time
  58. lightHouse *LightHouse
  59. localBroadcast iputil.VpnIp
  60. myVpnIp iputil.VpnIp
  61. dropLocalBroadcast bool
  62. dropMulticast bool
  63. routines int
  64. disconnectInvalid atomic.Bool
  65. closed atomic.Bool
  66. relayManager *relayManager
  67. tryPromoteEvery atomic.Uint32
  68. reQueryEvery atomic.Uint32
  69. reQueryWait atomic.Int64
  70. sendRecvErrorConfig sendRecvErrorConfig
  71. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  72. rebindCount int8
  73. version string
  74. conntrackCacheTimeout time.Duration
  75. writers []udp.Conn
  76. readers []io.ReadWriteCloser
  77. metricHandshakes metrics.Histogram
  78. messageMetrics *MessageMetrics
  79. cachedPacketMetrics *cachedPacketMetrics
  80. l *logrus.Logger
  81. }
  82. type EncWriter interface {
  83. SendVia(via *HostInfo,
  84. relay *Relay,
  85. ad,
  86. nb,
  87. out []byte,
  88. nocopy bool,
  89. )
  90. SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte)
  91. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  92. Handshake(vpnIp iputil.VpnIp)
  93. }
  94. type sendRecvErrorConfig uint8
  95. const (
  96. sendRecvErrorAlways sendRecvErrorConfig = iota
  97. sendRecvErrorNever
  98. sendRecvErrorPrivate
  99. )
  100. func (s sendRecvErrorConfig) ShouldSendRecvError(ip net.IP) bool {
  101. switch s {
  102. case sendRecvErrorPrivate:
  103. return ip.IsPrivate()
  104. case sendRecvErrorAlways:
  105. return true
  106. case sendRecvErrorNever:
  107. return false
  108. default:
  109. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  110. }
  111. }
  112. func (s sendRecvErrorConfig) String() string {
  113. switch s {
  114. case sendRecvErrorAlways:
  115. return "always"
  116. case sendRecvErrorNever:
  117. return "never"
  118. case sendRecvErrorPrivate:
  119. return "private"
  120. default:
  121. return fmt.Sprintf("invalid(%d)", s)
  122. }
  123. }
  124. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  125. if c.Outside == nil {
  126. return nil, errors.New("no outside connection")
  127. }
  128. if c.Inside == nil {
  129. return nil, errors.New("no inside interface (tun)")
  130. }
  131. if c.pki == nil {
  132. return nil, errors.New("no certificate state")
  133. }
  134. if c.Firewall == nil {
  135. return nil, errors.New("no firewall rules")
  136. }
  137. certificate := c.pki.GetCertState().Certificate
  138. myVpnIp := iputil.Ip2VpnIp(certificate.Details.Ips[0].IP)
  139. ifce := &Interface{
  140. pki: c.pki,
  141. hostMap: c.HostMap,
  142. outside: c.Outside,
  143. inside: c.Inside,
  144. cipher: c.Cipher,
  145. firewall: c.Firewall,
  146. serveDns: c.ServeDns,
  147. handshakeManager: c.HandshakeManager,
  148. createTime: time.Now(),
  149. lightHouse: c.lightHouse,
  150. localBroadcast: myVpnIp | ^iputil.Ip2VpnIp(certificate.Details.Ips[0].Mask),
  151. dropLocalBroadcast: c.DropLocalBroadcast,
  152. dropMulticast: c.DropMulticast,
  153. routines: c.routines,
  154. version: c.version,
  155. writers: make([]udp.Conn, c.routines),
  156. readers: make([]io.ReadWriteCloser, c.routines),
  157. myVpnIp: myVpnIp,
  158. relayManager: c.relayManager,
  159. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  160. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  161. messageMetrics: c.MessageMetrics,
  162. cachedPacketMetrics: &cachedPacketMetrics{
  163. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  164. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  165. },
  166. l: c.l,
  167. }
  168. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  169. ifce.reQueryEvery.Store(c.reQueryEvery)
  170. ifce.reQueryWait.Store(int64(c.reQueryWait))
  171. ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
  172. return ifce, nil
  173. }
  174. // activate creates the interface on the host. After the interface is created, any
  175. // other services that want to bind listeners to its IP may do so successfully. However,
  176. // the interface isn't going to process anything until run() is called.
  177. func (f *Interface) activate() {
  178. // actually turn on tun dev
  179. addr, err := f.outside.LocalAddr()
  180. if err != nil {
  181. f.l.WithError(err).Error("Failed to get udp listen address")
  182. }
  183. f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
  184. WithField("build", f.version).WithField("udpAddr", addr).
  185. WithField("boringcrypto", boringEnabled()).
  186. Info("Nebula interface is active")
  187. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  188. // Prepare n tun queues
  189. var reader io.ReadWriteCloser = f.inside
  190. for i := 0; i < f.routines; i++ {
  191. if i > 0 {
  192. reader, err = f.inside.NewMultiQueueReader()
  193. if err != nil {
  194. f.l.Fatal(err)
  195. }
  196. }
  197. f.readers[i] = reader
  198. }
  199. if err := f.inside.Activate(); err != nil {
  200. f.inside.Close()
  201. f.l.Fatal(err)
  202. }
  203. }
  204. func (f *Interface) run() {
  205. // Launch n queues to read packets from udp
  206. for i := 0; i < f.routines; i++ {
  207. go f.listenOut(i)
  208. }
  209. // Launch n queues to read packets from tun dev
  210. for i := 0; i < f.routines; i++ {
  211. go f.listenIn(f.readers[i], i)
  212. }
  213. }
  214. func (f *Interface) listenOut(i int) {
  215. runtime.LockOSThread()
  216. var li udp.Conn
  217. // TODO clean this up with a coherent interface for each outside connection
  218. if i > 0 {
  219. li = f.writers[i]
  220. } else {
  221. li = f.outside
  222. }
  223. lhh := f.lightHouse.NewRequestHandler()
  224. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  225. li.ListenOut(readOutsidePackets(f), lhHandleRequest(lhh, f), conntrackCache, i)
  226. }
  227. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  228. runtime.LockOSThread()
  229. packet := make([]byte, mtu)
  230. out := make([]byte, mtu)
  231. fwPacket := &firewall.Packet{}
  232. nb := make([]byte, 12, 12)
  233. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  234. for {
  235. n, err := reader.Read(packet)
  236. if err != nil {
  237. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  238. return
  239. }
  240. f.l.WithError(err).Error("Error while reading outbound packet")
  241. // This only seems to happen when something fatal happens to the fd, so exit.
  242. os.Exit(2)
  243. }
  244. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  245. }
  246. }
  247. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  248. c.RegisterReloadCallback(f.reloadFirewall)
  249. c.RegisterReloadCallback(f.reloadSendRecvError)
  250. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  251. c.RegisterReloadCallback(f.reloadMisc)
  252. for _, udpConn := range f.writers {
  253. c.RegisterReloadCallback(udpConn.ReloadConfig)
  254. }
  255. }
  256. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  257. initial := c.InitialLoad()
  258. if initial || c.HasChanged("pki.disconnect_invalid") {
  259. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  260. if !initial {
  261. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  262. }
  263. }
  264. }
  265. func (f *Interface) reloadFirewall(c *config.C) {
  266. //TODO: need to trigger/detect if the certificate changed too
  267. if c.HasChanged("firewall") == false {
  268. f.l.Debug("No firewall config change detected")
  269. return
  270. }
  271. fw, err := NewFirewallFromConfig(f.l, f.pki.GetCertState().Certificate, c)
  272. if err != nil {
  273. f.l.WithError(err).Error("Error while creating firewall during reload")
  274. return
  275. }
  276. oldFw := f.firewall
  277. conntrack := oldFw.Conntrack
  278. conntrack.Lock()
  279. defer conntrack.Unlock()
  280. fw.rulesVersion = oldFw.rulesVersion + 1
  281. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  282. // safe and just reset conntrack in this case.
  283. if fw.rulesVersion == 0 {
  284. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  285. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  286. WithField("rulesVersion", fw.rulesVersion).
  287. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  288. } else {
  289. fw.Conntrack = conntrack
  290. }
  291. f.firewall = fw
  292. oldFw.Destroy()
  293. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  294. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  295. WithField("rulesVersion", fw.rulesVersion).
  296. Info("New firewall has been installed")
  297. }
  298. func (f *Interface) reloadSendRecvError(c *config.C) {
  299. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  300. stringValue := c.GetString("listen.send_recv_error", "always")
  301. switch stringValue {
  302. case "always":
  303. f.sendRecvErrorConfig = sendRecvErrorAlways
  304. case "never":
  305. f.sendRecvErrorConfig = sendRecvErrorNever
  306. case "private":
  307. f.sendRecvErrorConfig = sendRecvErrorPrivate
  308. default:
  309. if c.GetBool("listen.send_recv_error", true) {
  310. f.sendRecvErrorConfig = sendRecvErrorAlways
  311. } else {
  312. f.sendRecvErrorConfig = sendRecvErrorNever
  313. }
  314. }
  315. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  316. Info("Loaded send_recv_error config")
  317. }
  318. }
  319. func (f *Interface) reloadMisc(c *config.C) {
  320. if c.HasChanged("counters.try_promote") {
  321. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  322. f.tryPromoteEvery.Store(n)
  323. f.l.Info("counters.try_promote has changed")
  324. }
  325. if c.HasChanged("counters.requery_every_packets") {
  326. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  327. f.reQueryEvery.Store(n)
  328. f.l.Info("counters.requery_every_packets has changed")
  329. }
  330. if c.HasChanged("timers.requery_wait_duration") {
  331. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  332. f.reQueryWait.Store(int64(n))
  333. f.l.Info("timers.requery_wait_duration has changed")
  334. }
  335. }
  336. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  337. ticker := time.NewTicker(i)
  338. defer ticker.Stop()
  339. udpStats := udp.NewUDPStatsEmitter(f.writers)
  340. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  341. for {
  342. select {
  343. case <-ctx.Done():
  344. return
  345. case <-ticker.C:
  346. f.firewall.EmitStats()
  347. f.handshakeManager.EmitStats()
  348. udpStats()
  349. certExpirationGauge.Update(int64(f.pki.GetCertState().Certificate.Details.NotAfter.Sub(time.Now()) / time.Second))
  350. }
  351. }
  352. }
  353. func (f *Interface) Close() error {
  354. f.closed.Store(true)
  355. for _, u := range f.writers {
  356. err := u.Close()
  357. if err != nil {
  358. f.l.WithError(err).Error("Error while closing udp socket")
  359. }
  360. }
  361. // Release the tun device
  362. return f.inside.Close()
  363. }