3
0
Эх сурвалжийг харах

Relay (#678)

Co-authored-by: Wade Simmons <[email protected]>
brad-defined 3 жил өмнө
parent
commit
1a7c575011

+ 8 - 0
.github/workflows/smoke.yml

@@ -45,4 +45,12 @@ jobs:
       working-directory: ./.github/workflows/smoke
       run: ./smoke.sh
 
+    - name: setup relay docker image
+      working-directory: ./.github/workflows/smoke
+      run: ./build-relay.sh
+
+    - name: run smoke relay
+      working-directory: ./.github/workflows/smoke
+      run: ./smoke-relay.sh
+
     timeout-minutes: 10

+ 44 - 0
.github/workflows/smoke/build-relay.sh

@@ -0,0 +1,44 @@
+#!/bin/sh
+
+set -e -x
+
+rm -rf ./build
+mkdir ./build
+
+(
+    cd build
+
+    cp ../../../../build/linux-amd64/nebula .
+    cp ../../../../build/linux-amd64/nebula-cert .
+
+    HOST="lighthouse1" AM_LIGHTHOUSE=true ../genconfig.sh >lighthouse1.yml <<EOF
+relay:
+  am_relay: true
+EOF
+
+    export LIGHTHOUSES="192.168.100.1 172.17.0.2:4242"
+    export REMOTE_ALLOW_LIST='{"172.17.0.4/32": false, "172.17.0.5/32": false}'
+
+    HOST="host2" ../genconfig.sh >host2.yml <<EOF
+relay:
+  relays:
+    - 192.168.100.1
+EOF
+
+    export REMOTE_ALLOW_LIST='{"172.17.0.3/32": false}'
+
+    HOST="host3" ../genconfig.sh >host3.yml
+
+    HOST="host4" ../genconfig.sh >host4.yml <<EOF
+relay:
+  use_relays: false
+EOF
+
+    ../../../../nebula-cert ca -name "Smoke Test"
+    ../../../../nebula-cert sign -name "lighthouse1" -groups "lighthouse,lighthouse1" -ip "192.168.100.1/24"
+    ../../../../nebula-cert sign -name "host2" -groups "host,host2" -ip "192.168.100.2/24"
+    ../../../../nebula-cert sign -name "host3" -groups "host,host3" -ip "192.168.100.3/24"
+    ../../../../nebula-cert sign -name "host4" -groups "host,host4" -ip "192.168.100.4/24"
+)
+
+sudo docker build -t nebula:smoke-relay .

+ 3 - 0
.github/workflows/smoke/genconfig.sh

@@ -40,6 +40,7 @@ pki:
 lighthouse:
   am_lighthouse: ${AM_LIGHTHOUSE:-false}
   hosts: $(lighthouse_hosts)
+  remote_allow_list: ${REMOTE_ALLOW_LIST}
 
 listen:
   host: 0.0.0.0
@@ -51,4 +52,6 @@ tun:
 firewall:
   outbound: ${OUTBOUND:-$FIREWALL_ALL}
   inbound: ${INBOUND:-$FIREWALL_ALL}
+
+$(test -t 0 || cat)
 EOF

+ 85 - 0
.github/workflows/smoke/smoke-relay.sh

@@ -0,0 +1,85 @@
+#!/bin/bash
+
+set -e -x
+
+set -o pipefail
+
+mkdir -p logs
+
+cleanup() {
+    echo
+    echo " *** cleanup"
+    echo
+
+    set +e
+    if [ "$(jobs -r)" ]
+    then
+        sudo docker kill lighthouse1 host2 host3 host4
+    fi
+}
+
+trap cleanup EXIT
+
+sudo docker run --name lighthouse1 --rm nebula:smoke-relay -config lighthouse1.yml -test
+sudo docker run --name host2 --rm nebula:smoke-relay -config host2.yml -test
+sudo docker run --name host3 --rm nebula:smoke-relay -config host3.yml -test
+sudo docker run --name host4 --rm nebula:smoke-relay -config host4.yml -test
+
+sudo docker run --name lighthouse1 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke-relay -config lighthouse1.yml 2>&1 | tee logs/lighthouse1 | sed -u 's/^/  [lighthouse1]  /' &
+sleep 1
+sudo docker run --name host2 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke-relay -config host2.yml 2>&1 | tee logs/host2 | sed -u 's/^/  [host2]  /' &
+sleep 1
+sudo docker run --name host3 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke-relay -config host3.yml 2>&1 | tee logs/host3 | sed -u 's/^/  [host3]  /' &
+sleep 1
+sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke-relay -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/  [host4]  /' &
+sleep 1
+
+set +x
+echo
+echo " *** Testing ping from lighthouse1"
+echo
+set -x
+sudo docker exec lighthouse1 ping -c1 192.168.100.2
+sudo docker exec lighthouse1 ping -c1 192.168.100.3
+sudo docker exec lighthouse1 ping -c1 192.168.100.4
+
+set +x
+echo
+echo " *** Testing ping from host2"
+echo
+set -x
+sudo docker exec host2 ping -c1 192.168.100.1
+# Should fail because no relay configured in this direction
+! sudo docker exec host2 ping -c1 192.168.100.3 -w5 || exit 1
+! sudo docker exec host2 ping -c1 192.168.100.4 -w5 || exit 1
+
+set +x
+echo
+echo " *** Testing ping from host3"
+echo
+set -x
+sudo docker exec host3 ping -c1 192.168.100.1
+sudo docker exec host3 ping -c1 192.168.100.2
+sudo docker exec host3 ping -c1 192.168.100.4
+
+set +x
+echo
+echo " *** Testing ping from host4"
+echo
+set -x
+sudo docker exec host4 ping -c1 192.168.100.1
+# Should fail because relays not allowed
+! sudo docker exec host4 ping -c1 192.168.100.2 -w5 || exit 1
+sudo docker exec host4 ping -c1 192.168.100.3
+
+sudo docker exec host4 sh -c 'kill 1'
+sudo docker exec host3 sh -c 'kill 1'
+sudo docker exec host2 sh -c 'kill 1'
+sudo docker exec lighthouse1 sh -c 'kill 1'
+sleep 1
+
+if [ "$(jobs -r)" ]
+then
+    echo "nebula still running after SIGTERM sent" >&2
+    exit 1
+fi

+ 14 - 4
.github/workflows/smoke/smoke.sh

@@ -7,6 +7,10 @@ set -o pipefail
 mkdir -p logs
 
 cleanup() {
+    echo
+    echo " *** cleanup"
+    echo
+
     set +e
     if [ "$(jobs -r)" ]
     then
@@ -21,13 +25,13 @@ sudo docker run --name host2 --rm nebula:smoke -config host2.yml -test
 sudo docker run --name host3 --rm nebula:smoke -config host3.yml -test
 sudo docker run --name host4 --rm nebula:smoke -config host4.yml -test
 
-sudo docker run --name lighthouse1 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config lighthouse1.yml 2>&1 | tee logs/lighthouse1 &
+sudo docker run --name lighthouse1 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config lighthouse1.yml 2>&1 | tee logs/lighthouse1 | sed -u 's/^/  [lighthouse1]  /' &
 sleep 1
-sudo docker run --name host2 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host2.yml 2>&1 | tee logs/host2 &
+sudo docker run --name host2 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host2.yml 2>&1 | tee logs/host2 | sed -u 's/^/  [host2]  /' &
 sleep 1
-sudo docker run --name host3 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host3.yml 2>&1 | tee logs/host3 &
+sudo docker run --name host3 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host3.yml 2>&1 | tee logs/host3 | sed -u 's/^/  [host3]  /' &
 sleep 1
-sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 &
+sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/  [host4]  /' &
 sleep 1
 
 set +x
@@ -81,3 +85,9 @@ sudo docker exec host3 sh -c 'kill 1'
 sudo docker exec host2 sh -c 'kill 1'
 sudo docker exec lighthouse1 sh -c 'kill 1'
 sleep 1
+
+if [ "$(jobs -r)" ]
+then
+    echo "nebula still running after SIGTERM sent" >&2
+    exit 1
+fi

+ 4 - 0
Makefile

@@ -168,6 +168,10 @@ smoke-docker: bin-docker
 	cd .github/workflows/smoke/ && ./build.sh
 	cd .github/workflows/smoke/ && ./smoke.sh
 
+smoke-relay-docker: bin-docker
+	cd .github/workflows/smoke/ && ./build-relay.sh
+	cd .github/workflows/smoke/ && ./smoke-relay.sh
+
 smoke-docker-race: BUILD_ARGS = -race
 smoke-docker-race: smoke-docker
 

+ 2 - 2
cert.go

@@ -157,14 +157,14 @@ func loadCAFromConfig(l *logrus.Logger, c *config.C) (*cert.NebulaCAPool, error)
 	}
 
 	for _, fp := range c.GetStringSlice("pki.blocklist", []string{}) {
-		l.WithField("fingerprint", fp).Infof("Blocklisting cert")
+		l.WithField("fingerprint", fp).Info("Blocklisting cert")
 		CAs.BlocklistFingerprint(fp)
 	}
 
 	// Support deprecated config for at least one minor release to allow for migrations
 	//TODO: remove in 2022 or later
 	for _, fp := range c.GetStringSlice("pki.blacklist", []string{}) {
-		l.WithField("fingerprint", fp).Infof("Blocklisting cert")
+		l.WithField("fingerprint", fp).Info("Blocklisting cert")
 		l.Warn("pki.blacklist is deprecated and will not be supported in a future release. Please migrate your config to use pki.blocklist")
 		CAs.BlocklistFingerprint(fp)
 	}

+ 1 - 1
connection_manager.go

@@ -301,7 +301,7 @@ func (n *connectionManager) handleInvalidCertificate(now time.Time, vpnIp iputil
 
 	// Inform the remote and close the tunnel locally
 	n.intf.sendCloseTunnel(hostinfo)
-	n.intf.closeTunnel(hostinfo, false)
+	n.intf.closeTunnel(hostinfo)
 
 	n.ClearIP(vpnIp)
 	n.ClearPendingDeletion(vpnIp)

+ 54 - 26
control.go

@@ -28,14 +28,16 @@ type Control struct {
 }
 
 type ControlHostInfo struct {
-	VpnIp          net.IP                  `json:"vpnIp"`
-	LocalIndex     uint32                  `json:"localIndex"`
-	RemoteIndex    uint32                  `json:"remoteIndex"`
-	RemoteAddrs    []*udp.Addr             `json:"remoteAddrs"`
-	CachedPackets  int                     `json:"cachedPackets"`
-	Cert           *cert.NebulaCertificate `json:"cert"`
-	MessageCounter uint64                  `json:"messageCounter"`
-	CurrentRemote  *udp.Addr               `json:"currentRemote"`
+	VpnIp                  net.IP                  `json:"vpnIp"`
+	LocalIndex             uint32                  `json:"localIndex"`
+	RemoteIndex            uint32                  `json:"remoteIndex"`
+	RemoteAddrs            []*udp.Addr             `json:"remoteAddrs"`
+	CachedPackets          int                     `json:"cachedPackets"`
+	Cert                   *cert.NebulaCertificate `json:"cert"`
+	MessageCounter         uint64                  `json:"messageCounter"`
+	CurrentRemote          *udp.Addr               `json:"currentRemote"`
+	CurrentRelaysToMe      []iputil.VpnIp          `json:"currentRelaysToMe"`
+	CurrentRelaysThroughMe []iputil.VpnIp          `json:"currentRelaysThroughMe"`
 }
 
 // Start actually runs nebula, this is a nonblocking call. To block use Control.ShutdownBlock()
@@ -60,12 +62,14 @@ func (c *Control) Start() {
 
 // Stop signals nebula to shutdown, returns after the shutdown is complete
 func (c *Control) Stop() {
-	//TODO: stop tun and udp routines, the lock on hostMap effectively does that though
+	// Stop the handshakeManager (and other serivces), to prevent new tunnels from
+	// being created while we're shutting them all down.
+	c.cancel()
+
 	c.CloseAllTunnels(false)
 	if err := c.f.Close(); err != nil {
 		c.l.WithError(err).Error("Close interface failed")
 	}
-	c.cancel()
 	c.l.Info("Goodbye")
 }
 
@@ -144,14 +148,13 @@ func (c *Control) CloseTunnel(vpnIp iputil.VpnIp, localOnly bool) bool {
 			0,
 			hostInfo.ConnectionState,
 			hostInfo,
-			hostInfo.remote,
 			[]byte{},
 			make([]byte, 12, 12),
 			make([]byte, mtu),
 		)
 	}
 
-	c.f.closeTunnel(hostInfo, false)
+	c.f.closeTunnel(hostInfo)
 	return true
 }
 
@@ -159,35 +162,60 @@ func (c *Control) CloseTunnel(vpnIp iputil.VpnIp, localOnly bool) bool {
 // the int returned is a count of tunnels closed
 func (c *Control) CloseAllTunnels(excludeLighthouses bool) (closed int) {
 	//TODO: this is probably better as a function in ConnectionManager or HostMap directly
-	c.f.hostMap.Lock()
 	lighthouses := c.f.lightHouse.GetLighthouses()
-	for _, h := range c.f.hostMap.Hosts {
+
+	shutdown := func(h *HostInfo) {
 		if excludeLighthouses {
 			if _, ok := lighthouses[h.vpnIp]; ok {
-				continue
+				return
 			}
 		}
+		c.f.send(header.CloseTunnel, 0, h.ConnectionState, h, []byte{}, make([]byte, 12, 12), make([]byte, mtu))
+		c.f.closeTunnel(h)
 
-		if h.ConnectionState.ready {
-			c.f.send(header.CloseTunnel, 0, h.ConnectionState, h, h.remote, []byte{}, make([]byte, 12, 12), make([]byte, mtu))
-			c.f.closeTunnel(h, true)
+		c.l.WithField("vpnIp", h.vpnIp).WithField("udpAddr", h.remote).
+			Debug("Sending close tunnel message")
+		closed++
+	}
 
-			c.l.WithField("vpnIp", h.vpnIp).WithField("udpAddr", h.remote).
-				Debug("Sending close tunnel message")
-			closed++
+	// Learn which hosts are being used as relays, so we can shut them down last.
+	relayingHosts := map[iputil.VpnIp]*HostInfo{}
+	// Grab the hostMap lock to access the Relays map
+	c.f.hostMap.Lock()
+	for _, relayingHost := range c.f.hostMap.Relays {
+		relayingHosts[relayingHost.vpnIp] = relayingHost
+	}
+	c.f.hostMap.Unlock()
+
+	hostInfos := []*HostInfo{}
+	// Grab the hostMap lock to access the Hosts map
+	c.f.hostMap.Lock()
+	for _, relayHost := range c.f.hostMap.Hosts {
+		if _, ok := relayingHosts[relayHost.vpnIp]; !ok {
+			hostInfos = append(hostInfos, relayHost)
 		}
 	}
 	c.f.hostMap.Unlock()
+
+	for _, h := range hostInfos {
+		shutdown(h)
+	}
+	for _, h := range relayingHosts {
+		shutdown(h)
+	}
 	return
 }
 
 func copyHostInfo(h *HostInfo, preferredRanges []*net.IPNet) ControlHostInfo {
+
 	chi := ControlHostInfo{
-		VpnIp:         h.vpnIp.ToIP(),
-		LocalIndex:    h.localIndexId,
-		RemoteIndex:   h.remoteIndexId,
-		RemoteAddrs:   h.remotes.CopyAddrs(preferredRanges),
-		CachedPackets: len(h.packetStore),
+		VpnIp:                  h.vpnIp.ToIP(),
+		LocalIndex:             h.localIndexId,
+		RemoteIndex:            h.remoteIndexId,
+		RemoteAddrs:            h.remotes.CopyAddrs(preferredRanges),
+		CachedPackets:          len(h.packetStore),
+		CurrentRelaysToMe:      h.relayState.CopyRelayIps(),
+		CurrentRelaysThroughMe: h.relayState.CopyRelayForIps(),
 	}
 
 	if h.ConnectionState != nil {

+ 21 - 9
control_test.go

@@ -59,6 +59,11 @@ func TestControl_GetHostInfoByVpnIp(t *testing.T) {
 		remoteIndexId: 200,
 		localIndexId:  201,
 		vpnIp:         iputil.Ip2VpnIp(ipNet.IP),
+		relayState: RelayState{
+			relays:        map[iputil.VpnIp]struct{}{},
+			relayForByIp:  map[iputil.VpnIp]*Relay{},
+			relayForByIdx: map[uint32]*Relay{},
+		},
 	})
 
 	hm.Add(iputil.Ip2VpnIp(ipNet2.IP), &HostInfo{
@@ -70,6 +75,11 @@ func TestControl_GetHostInfoByVpnIp(t *testing.T) {
 		remoteIndexId: 200,
 		localIndexId:  201,
 		vpnIp:         iputil.Ip2VpnIp(ipNet2.IP),
+		relayState: RelayState{
+			relays:        map[iputil.VpnIp]struct{}{},
+			relayForByIp:  map[iputil.VpnIp]*Relay{},
+			relayForByIdx: map[uint32]*Relay{},
+		},
 	})
 
 	c := Control{
@@ -82,18 +92,20 @@ func TestControl_GetHostInfoByVpnIp(t *testing.T) {
 	thi := c.GetHostInfoByVpnIp(iputil.Ip2VpnIp(ipNet.IP), false)
 
 	expectedInfo := ControlHostInfo{
-		VpnIp:          net.IPv4(1, 2, 3, 4).To4(),
-		LocalIndex:     201,
-		RemoteIndex:    200,
-		RemoteAddrs:    []*udp.Addr{remote2, remote1},
-		CachedPackets:  0,
-		Cert:           crt.Copy(),
-		MessageCounter: 0,
-		CurrentRemote:  udp.NewAddr(net.ParseIP("0.0.0.100"), 4444),
+		VpnIp:                  net.IPv4(1, 2, 3, 4).To4(),
+		LocalIndex:             201,
+		RemoteIndex:            200,
+		RemoteAddrs:            []*udp.Addr{remote2, remote1},
+		CachedPackets:          0,
+		Cert:                   crt.Copy(),
+		MessageCounter:         0,
+		CurrentRemote:          udp.NewAddr(net.ParseIP("0.0.0.100"), 4444),
+		CurrentRelaysToMe:      []iputil.VpnIp{},
+		CurrentRelaysThroughMe: []iputil.VpnIp{},
 	}
 
 	// Make sure we don't have any unexpected fields
-	assertFields(t, []string{"VpnIp", "LocalIndex", "RemoteIndex", "RemoteAddrs", "CachedPackets", "Cert", "MessageCounter", "CurrentRemote"}, thi)
+	assertFields(t, []string{"VpnIp", "LocalIndex", "RemoteIndex", "RemoteAddrs", "CachedPackets", "Cert", "MessageCounter", "CurrentRemote", "CurrentRelaysToMe", "CurrentRelaysThroughMe"}, thi)
 	test.AssertDeepCopyEqual(t, &expectedInfo, thi)
 
 	// Make sure we don't panic if the host info doesn't have a cert yet

+ 1 - 1
dns_server.go

@@ -135,7 +135,7 @@ func getDnsServerAddr(c *config.C) string {
 func startDns(l *logrus.Logger, c *config.C) {
 	dnsAddr = getDnsServerAddr(c)
 	dnsServer = &dns.Server{Addr: dnsAddr, Net: "udp"}
-	l.WithField("dnsListener", dnsAddr).Infof("Starting DNS responder")
+	l.WithField("dnsListener", dnsAddr).Info("Starting DNS responder")
 	err := dnsServer.ListenAndServe()
 	defer dnsServer.Shutdown()
 	if err != nil {

+ 14 - 0
examples/config.yml

@@ -153,6 +153,20 @@ punchy:
       #keys:
         #- "ssh public key string"
 
+# EXPERIMENTAL: relay support for networks that can't establish direct connections.
+relay:
+  # Relays are a list of Nebula IP's that peers can use to relay packets to me.
+  # IPs in this list must have am_relay set to true in thier configs, otherwise
+  # they will reject relay requests.
+  #relays:
+    #- 192.168.100.1
+    #- <other Nebula VPN IPs of host use use as relay to access me>
+  # Set am_relay to true to permit other hosts to list my IP in their relays config. Default false.
+  am_relay: false
+  # Set use_relays to false to prevent this instance from attempting to establish connections through relays.
+  # default true
+  use_relays: true
+
 # Configure the private interface. Note: addr is baked into the nebula certificate
 tun:
   # When tun is disabled, a lighthouse can be started without a local tun interface (and therefore without root)

+ 8 - 6
handshake.go

@@ -5,21 +5,23 @@ import (
 	"github.com/slackhq/nebula/udp"
 )
 
-func HandleIncomingHandshake(f *Interface, addr *udp.Addr, packet []byte, h *header.H, hostinfo *HostInfo) {
+func HandleIncomingHandshake(f *Interface, addr *udp.Addr, via interface{}, packet []byte, h *header.H, hostinfo *HostInfo) {
 	// First remote allow list check before we know the vpnIp
-	if !f.lightHouse.GetRemoteAllowList().AllowUnknownVpnIp(addr.IP) {
-		f.l.WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
-		return
+	if addr != nil {
+		if !f.lightHouse.GetRemoteAllowList().AllowUnknownVpnIp(addr.IP) {
+			f.l.WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
+			return
+		}
 	}
 
 	switch h.Subtype {
 	case header.HandshakeIXPSK0:
 		switch h.MessageCounter {
 		case 1:
-			ixHandshakeStage1(f, addr, packet, h)
+			ixHandshakeStage1(f, addr, via, packet, h)
 		case 2:
 			newHostinfo, _ := f.handshakeManager.QueryIndex(h.RemoteIndex)
-			tearDown := ixHandshakeStage2(f, addr, newHostinfo, packet, h)
+			tearDown := ixHandshakeStage2(f, addr, via, newHostinfo, packet, h)
 			if tearDown && newHostinfo != nil {
 				f.handshakeManager.DeleteHostInfo(newHostinfo)
 			}

+ 72 - 26
handshake_ix.go

@@ -69,7 +69,7 @@ func ixHandshakeStage0(f *Interface, vpnIp iputil.VpnIp, hostinfo *HostInfo) {
 	hostinfo.handshakeStart = time.Now()
 }
 
-func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H) {
+func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []byte, h *header.H) {
 	ci := f.newConnectionState(f.l, false, noise.HandshakeIX, []byte{}, 0)
 	// Mark packet 1 as seen so it doesn't show up as missed
 	ci.window.Update(f.l, 1)
@@ -113,9 +113,11 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H)
 		return
 	}
 
-	if !f.lightHouse.GetRemoteAllowList().Allow(vpnIp, addr.IP) {
-		f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
-		return
+	if addr != nil {
+		if !f.lightHouse.GetRemoteAllowList().Allow(vpnIp, addr.IP) {
+			f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
+			return
+		}
 	}
 
 	myIndex, err := generateIndex(f.l)
@@ -135,6 +137,11 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H)
 		vpnIp:             vpnIp,
 		HandshakePacket:   make(map[uint8][]byte, 0),
 		lastHandshakeTime: hs.Details.Time,
+		relayState: RelayState{
+			relays:        map[iputil.VpnIp]struct{}{},
+			relayForByIp:  map[iputil.VpnIp]*Relay{},
+			relayForByIdx: map[uint32]*Relay{},
+		},
 	}
 
 	hostinfo.Lock()
@@ -223,17 +230,31 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H)
 
 			msg = existing.HandshakePacket[2]
 			f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
-			err := f.outside.WriteTo(msg, addr)
-			if err != nil {
-				f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
-					WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
-					WithError(err).Error("Failed to send handshake message")
+			if addr != nil {
+				err := f.outside.WriteTo(msg, addr)
+				if err != nil {
+					f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
+						WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
+						WithError(err).Error("Failed to send handshake message")
+				} else {
+					f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
+						WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
+						Info("Handshake message sent")
+				}
+				return
 			} else {
-				f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
+				via2 := via.(*ViaSender)
+				if via2 == nil {
+					f.l.Error("Handshake send failed: both addr and via are nil.")
+					return
+				}
+				hostinfo.relayState.InsertRelayTo(via2.relayHI.vpnIp)
+				f.SendVia(via2.relayHI, via2.relay, msg, make([]byte, 12), make([]byte, mtu), false)
+				f.l.WithField("vpnIp", existing.vpnIp).WithField("relay", via2.relayHI.vpnIp).
 					WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
 					Info("Handshake message sent")
+				return
 			}
-			return
 		case ErrExistingHostInfo:
 			// This means there was an existing tunnel and this handshake was older than the one we are currently based on
 			f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
@@ -286,17 +307,35 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H)
 
 	// Do the send
 	f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
-	err = f.outside.WriteTo(msg, addr)
-	if err != nil {
-		f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
-			WithField("certName", certName).
-			WithField("fingerprint", fingerprint).
-			WithField("issuer", issuer).
-			WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
-			WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
-			WithError(err).Error("Failed to send handshake")
+	if addr != nil {
+		err = f.outside.WriteTo(msg, addr)
+		if err != nil {
+			f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
+				WithField("certName", certName).
+				WithField("fingerprint", fingerprint).
+				WithField("issuer", issuer).
+				WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
+				WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
+				WithError(err).Error("Failed to send handshake")
+		} else {
+			f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
+				WithField("certName", certName).
+				WithField("fingerprint", fingerprint).
+				WithField("issuer", issuer).
+				WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
+				WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
+				WithField("sentCachedPackets", len(hostinfo.packetStore)).
+				Info("Handshake message sent")
+		}
 	} else {
-		f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
+		via2 := via.(*ViaSender)
+		if via2 == nil {
+			f.l.Error("Handshake send failed: both addr and via are nil.")
+			return
+		}
+		hostinfo.relayState.InsertRelayTo(via2.relayHI.vpnIp)
+		f.SendVia(via2.relayHI, via2.relay, msg, make([]byte, 12), make([]byte, mtu), false)
+		f.l.WithField("vpnIp", vpnIp).WithField("relay", via2.relayHI.vpnIp).
 			WithField("certName", certName).
 			WithField("fingerprint", fingerprint).
 			WithField("issuer", issuer).
@@ -311,7 +350,7 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H)
 	return
 }
 
-func ixHandshakeStage2(f *Interface, addr *udp.Addr, hostinfo *HostInfo, packet []byte, h *header.H) bool {
+func ixHandshakeStage2(f *Interface, addr *udp.Addr, via interface{}, hostinfo *HostInfo, packet []byte, h *header.H) bool {
 	if hostinfo == nil {
 		// Nothing here to tear down, got a bogus stage 2 packet
 		return true
@@ -320,9 +359,11 @@ func ixHandshakeStage2(f *Interface, addr *udp.Addr, hostinfo *HostInfo, packet
 	hostinfo.Lock()
 	defer hostinfo.Unlock()
 
-	if !f.lightHouse.GetRemoteAllowList().Allow(hostinfo.vpnIp, addr.IP) {
-		f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
-		return false
+	if addr != nil {
+		if !f.lightHouse.GetRemoteAllowList().Allow(hostinfo.vpnIp, addr.IP) {
+			f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
+			return false
+		}
 	}
 
 	ci := hostinfo.ConnectionState
@@ -450,7 +491,12 @@ func ixHandshakeStage2(f *Interface, addr *udp.Addr, hostinfo *HostInfo, packet
 	ci.eKey = NewNebulaCipherState(eKey)
 
 	// Make sure the current udpAddr being used is set for responding
-	hostinfo.SetRemote(addr)
+	if addr != nil {
+		hostinfo.SetRemote(addr)
+	} else {
+		via2 := via.(*ViaSender)
+		hostinfo.relayState.InsertRelayTo(via2.relayHI.vpnIp)
+	}
 
 	// Build up the radix for the firewall if we have subnets in the cert
 	hostinfo.CreateRemoteCIDR(remoteCert)

+ 82 - 1
handshake_manager.go

@@ -20,6 +20,7 @@ const (
 	DefaultHandshakeTryInterval   = time.Millisecond * 100
 	DefaultHandshakeRetries       = 10
 	DefaultHandshakeTriggerBuffer = 64
+	DefaultUseRelays              = true
 )
 
 var (
@@ -27,6 +28,7 @@ var (
 		tryInterval:   DefaultHandshakeTryInterval,
 		retries:       DefaultHandshakeRetries,
 		triggerBuffer: DefaultHandshakeTriggerBuffer,
+		useRelays:     DefaultUseRelays,
 	}
 )
 
@@ -34,6 +36,7 @@ type HandshakeConfig struct {
 	tryInterval   time.Duration
 	retries       int
 	triggerBuffer int
+	useRelays     bool
 
 	messageMetrics *MessageMetrics
 }
@@ -79,7 +82,6 @@ func (c *HandshakeManager) Run(ctx context.Context, f udp.EncWriter) {
 		case <-ctx.Done():
 			return
 		case vpnIP := <-c.trigger:
-			c.l.WithField("vpnIp", vpnIP).Debug("HandshakeManager: triggered")
 			c.handleOutbound(vpnIP, f, true)
 		case now := <-clockSource.C:
 			c.NextOutboundHandshakeTimerTick(now, f)
@@ -145,6 +147,8 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, f udp.EncWriter, l
 
 	// Get a remotes object if we don't already have one.
 	// This is mainly to protect us as this should never be the case
+	// NB ^ This comment doesn't jive. It's how the thing gets intiailized.
+	// It's the common path. Should it update every time, in case a future LH query/queries give us more info?
 	if hostinfo.remotes == nil {
 		hostinfo.remotes = c.lightHouse.QueryCache(vpnIp)
 	}
@@ -181,6 +185,77 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, f udp.EncWriter, l
 			Info("Handshake message sent")
 	}
 
+	if c.config.useRelays && len(hostinfo.remotes.relays) > 0 {
+		hostinfo.logger(c.l).WithField("relayIps", hostinfo.remotes.relays).Info("Attempt to relay through hosts")
+		// Send a RelayRequest to all known Relay IP's
+		for _, relay := range hostinfo.remotes.relays {
+			// Don't relay to myself, and don't relay through the host I'm trying to connect to
+			if *relay == vpnIp || *relay == c.lightHouse.myVpnIp {
+				continue
+			}
+			relayHostInfo, err := c.mainHostMap.QueryVpnIp(*relay)
+			if err != nil || relayHostInfo.remote == nil {
+				hostinfo.logger(c.l).WithError(err).WithField("relay", relay.String()).Info("Establish tunnel to relay target.")
+				f.Handshake(*relay)
+				continue
+			}
+			// Check the relay HostInfo to see if we already established a relay through it
+			if existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp); ok {
+				switch existingRelay.State {
+				case Established:
+					hostinfo.logger(c.l).WithField("relay", relay.String()).Info("Send handshake via relay")
+					f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
+				case Requested:
+					hostinfo.logger(c.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
+					// Re-send the CreateRelay request, in case the previous one was lost.
+					m := NebulaControl{
+						Type:                NebulaControl_CreateRelayRequest,
+						InitiatorRelayIndex: existingRelay.LocalIndex,
+						RelayFromIp:         uint32(c.lightHouse.myVpnIp),
+						RelayToIp:           uint32(vpnIp),
+					}
+					msg, err := m.Marshal()
+					if err != nil {
+						hostinfo.logger(c.l).
+							WithError(err).
+							Error("Failed to marshal Control message to create relay")
+					} else {
+						f.SendMessageToVpnIp(header.Control, 0, *relay, msg, make([]byte, 12), make([]byte, mtu))
+					}
+				default:
+					hostinfo.logger(c.l).
+						WithField("vpnIp", vpnIp).
+						WithField("state", existingRelay.State).
+						WithField("relayVpnIp", relayHostInfo.vpnIp).
+						Errorf("Relay unexpected state")
+				}
+			} else {
+				// No relays exist or requested yet.
+				if relayHostInfo.remote != nil {
+					idx, err := AddRelay(c.l, relayHostInfo, c.mainHostMap, vpnIp, nil, TerminalType, Requested)
+					if err != nil {
+						hostinfo.logger(c.l).WithField("relay", relay.String()).WithError(err).Info("Failed to add relay to hostmap")
+					}
+
+					m := NebulaControl{
+						Type:                NebulaControl_CreateRelayRequest,
+						InitiatorRelayIndex: idx,
+						RelayFromIp:         uint32(c.lightHouse.myVpnIp),
+						RelayToIp:           uint32(vpnIp),
+					}
+					msg, err := m.Marshal()
+					if err != nil {
+						hostinfo.logger(c.l).
+							WithError(err).
+							Error("Failed to marshal Control message to create relay")
+					} else {
+						f.SendMessageToVpnIp(header.Control, 0, *relay, msg, make([]byte, 12), make([]byte, mtu))
+					}
+				}
+			}
+		}
+	}
+
 	// Increment the counter to increase our delay, linear backoff
 	hostinfo.HandshakeCounter++
 
@@ -284,6 +359,9 @@ func (c *HandshakeManager) CheckAndComplete(hostinfo *HostInfo, handshakePacket
 		delete(c.mainHostMap.Hosts, existingHostInfo.vpnIp)
 		delete(c.mainHostMap.Indexes, existingHostInfo.localIndexId)
 		delete(c.mainHostMap.RemoteIndexes, existingHostInfo.remoteIndexId)
+		for _, relayIdx := range existingHostInfo.relayState.CopyRelayForIdxs() {
+			delete(c.mainHostMap.Relays, relayIdx)
+		}
 	}
 
 	c.mainHostMap.addHostInfo(hostinfo, f)
@@ -305,6 +383,9 @@ func (c *HandshakeManager) Complete(hostinfo *HostInfo, f *Interface) {
 		delete(c.mainHostMap.Hosts, existingHostInfo.vpnIp)
 		delete(c.mainHostMap.Indexes, existingHostInfo.localIndexId)
 		delete(c.mainHostMap.RemoteIndexes, existingHostInfo.remoteIndexId)
+		for _, relayIdx := range existingHostInfo.relayState.CopyRelayForIdxs() {
+			delete(c.mainHostMap.Relays, relayIdx)
+		}
 	}
 
 	existingRemoteIndex, found := c.mainHostMap.RemoteIndexes[hostinfo.remoteIndexId]

+ 7 - 0
handshake_manager_test.go

@@ -24,6 +24,7 @@ func Test_NewHandshakeManagerVpnIp(t *testing.T) {
 	lh := &LightHouse{
 		atomicStaticList:  make(map[iputil.VpnIp]struct{}),
 		atomicLighthouses: make(map[iputil.VpnIp]struct{}),
+		addrMap:           make(map[iputil.VpnIp]*RemoteList),
 	}
 
 	blah := NewHandshakeManager(l, tuncidr, preferredRanges, mainHM, lh, &udp.Conn{}, defaultHandshakeConfig)
@@ -131,3 +132,9 @@ type mockEncWriter struct {
 func (mw *mockEncWriter) SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte) {
 	return
 }
+
+func (mw *mockEncWriter) SendVia(via interface{}, relay interface{}, ad, nb, out []byte, nocopy bool) {
+	return
+}
+
+func (mw *mockEncWriter) Handshake(vpnIP iputil.VpnIp) {}

+ 7 - 0
header/header.go

@@ -36,6 +36,7 @@ const (
 	LightHouse  MessageType = 3
 	Test        MessageType = 4
 	CloseTunnel MessageType = 5
+	Control     MessageType = 6
 )
 
 var typeMap = map[MessageType]string{
@@ -47,6 +48,11 @@ var typeMap = map[MessageType]string{
 	CloseTunnel: "closeTunnel",
 }
 
+const (
+	MessageNone  MessageSubType = 0
+	MessageRelay MessageSubType = 1
+)
+
 const (
 	TestRequest MessageSubType = 0
 	TestReply   MessageSubType = 1
@@ -75,6 +81,7 @@ var subTypeMap = map[MessageType]*map[MessageSubType]string{
 	Handshake: {
 		HandshakeIXPSK0: "ix_psk0",
 	},
+	Control: &subTypeNoneMap,
 }
 
 type H struct {

+ 1 - 0
header/header_test.go

@@ -93,6 +93,7 @@ func TestTypeMap(t *testing.T) {
 		Handshake: {
 			HandshakeIXPSK0: "ix_psk0",
 		},
+		Control: &subTypeNoneMap,
 	}, subTypeMap)
 }
 

+ 207 - 9
hostmap.go

@@ -27,10 +27,30 @@ const MaxRemotes = 10
 // This helps prevent flapping due to packets already in flight
 const RoamingSuppressSeconds = 2
 
+const (
+	Requested = iota
+	Established
+)
+
+const (
+	Unknowntype = iota
+	ForwardingType
+	TerminalType
+)
+
+type Relay struct {
+	Type        int
+	State       int
+	LocalIndex  uint32
+	RemoteIndex uint32
+	PeerIp      iputil.VpnIp
+}
+
 type HostMap struct {
 	sync.RWMutex    //Because we concurrently read and write to our maps
 	name            string
 	Indexes         map[uint32]*HostInfo
+	Relays          map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
 	RemoteIndexes   map[uint32]*HostInfo
 	Hosts           map[iputil.VpnIp]*HostInfo
 	preferredRanges []*net.IPNet
@@ -39,6 +59,95 @@ type HostMap struct {
 	l               *logrus.Logger
 }
 
+type RelayState struct {
+	sync.RWMutex
+
+	relays        map[iputil.VpnIp]struct{} // Set of VpnIp's of Hosts to use as relays to access this peer
+	relayForByIp  map[iputil.VpnIp]*Relay   // Maps VpnIps of peers for which this HostInfo is a relay to some Relay info
+	relayForByIdx map[uint32]*Relay         // Maps a local index to some Relay info
+}
+
+func (rs *RelayState) DeleteRelay(ip iputil.VpnIp) {
+	rs.Lock()
+	defer rs.Unlock()
+	delete(rs.relays, ip)
+}
+
+func (rs *RelayState) GetRelayForByIp(ip iputil.VpnIp) (*Relay, bool) {
+	rs.RLock()
+	defer rs.RUnlock()
+	r, ok := rs.relayForByIp[ip]
+	return r, ok
+}
+
+func (rs *RelayState) InsertRelayTo(ip iputil.VpnIp) {
+	rs.Lock()
+	defer rs.Unlock()
+	rs.relays[ip] = struct{}{}
+}
+
+func (rs *RelayState) CopyRelayIps() []iputil.VpnIp {
+	rs.RLock()
+	defer rs.RUnlock()
+	ret := make([]iputil.VpnIp, 0, len(rs.relays))
+	for ip := range rs.relays {
+		ret = append(ret, ip)
+	}
+	return ret
+}
+
+func (rs *RelayState) CopyRelayForIps() []iputil.VpnIp {
+	rs.RLock()
+	defer rs.RUnlock()
+	currentRelays := make([]iputil.VpnIp, 0, len(rs.relayForByIp))
+	for relayIp := range rs.relayForByIp {
+		currentRelays = append(currentRelays, relayIp)
+	}
+	return currentRelays
+}
+
+func (rs *RelayState) CopyRelayForIdxs() []uint32 {
+	rs.RLock()
+	defer rs.RUnlock()
+	ret := make([]uint32, 0, len(rs.relayForByIdx))
+	for i := range rs.relayForByIdx {
+		ret = append(ret, i)
+	}
+	return ret
+}
+
+func (rs *RelayState) RemoveRelay(localIdx uint32) (iputil.VpnIp, bool) {
+	rs.Lock()
+	defer rs.Unlock()
+	relay, ok := rs.relayForByIdx[localIdx]
+	if !ok {
+		return iputil.VpnIp(0), false
+	}
+	delete(rs.relayForByIdx, localIdx)
+	delete(rs.relayForByIp, relay.PeerIp)
+	return relay.PeerIp, true
+}
+
+func (rs *RelayState) QueryRelayForByIp(vpnIp iputil.VpnIp) (*Relay, bool) {
+	rs.RLock()
+	defer rs.RUnlock()
+	r, ok := rs.relayForByIp[vpnIp]
+	return r, ok
+}
+
+func (rs *RelayState) QueryRelayForByIdx(idx uint32) (*Relay, bool) {
+	rs.RLock()
+	defer rs.RUnlock()
+	r, ok := rs.relayForByIdx[idx]
+	return r, ok
+}
+func (rs *RelayState) InsertRelay(ip iputil.VpnIp, idx uint32, r *Relay) {
+	rs.Lock()
+	defer rs.Unlock()
+	rs.relayForByIp[ip] = r
+	rs.relayForByIdx[idx] = r
+}
+
 type HostInfo struct {
 	sync.RWMutex
 
@@ -57,6 +166,7 @@ type HostInfo struct {
 	vpnIp             iputil.VpnIp
 	recvError         int
 	remoteCidr        *cidr.Tree4
+	relayState        RelayState
 
 	// lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH
 	// for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like
@@ -72,6 +182,12 @@ type HostInfo struct {
 	lastRoamRemote *udp.Addr
 }
 
+type ViaSender struct {
+	relayHI   *HostInfo // relayHI is the host info object of the relay
+	remoteIdx uint32    // remoteIdx is the index included in the header of the received packet
+	relay     *Relay    // relay contains the rest of the relay information, including the PeerIP of the host trying to communicate with us.
+}
+
 type cachedPacket struct {
 	messageType    header.MessageType
 	messageSubType header.MessageSubType
@@ -90,9 +206,11 @@ func NewHostMap(l *logrus.Logger, name string, vpnCIDR *net.IPNet, preferredRang
 	h := map[iputil.VpnIp]*HostInfo{}
 	i := map[uint32]*HostInfo{}
 	r := map[uint32]*HostInfo{}
+	relays := map[uint32]*HostInfo{}
 	m := HostMap{
 		name:            name,
 		Indexes:         i,
+		Relays:          relays,
 		RemoteIndexes:   r,
 		Hosts:           h,
 		preferredRanges: preferredRanges,
@@ -108,11 +226,40 @@ func (hm *HostMap) EmitStats(name string) {
 	hostLen := len(hm.Hosts)
 	indexLen := len(hm.Indexes)
 	remoteIndexLen := len(hm.RemoteIndexes)
+	relaysLen := len(hm.Relays)
 	hm.RUnlock()
 
 	metrics.GetOrRegisterGauge("hostmap."+name+".hosts", nil).Update(int64(hostLen))
 	metrics.GetOrRegisterGauge("hostmap."+name+".indexes", nil).Update(int64(indexLen))
 	metrics.GetOrRegisterGauge("hostmap."+name+".remoteIndexes", nil).Update(int64(remoteIndexLen))
+	metrics.GetOrRegisterGauge("hostmap."+name+".relayIndexes", nil).Update(int64(relaysLen))
+}
+
+func (hm *HostMap) RemoveRelay(localIdx uint32) {
+	hm.Lock()
+	hiRelay, ok := hm.Relays[localIdx]
+	if !ok {
+		hm.Unlock()
+		return
+	}
+	delete(hm.Relays, localIdx)
+	hm.Unlock()
+	ip, ok := hiRelay.relayState.RemoveRelay(localIdx)
+	if !ok {
+		return
+	}
+	hiPeer, err := hm.QueryVpnIp(ip)
+	if err != nil {
+		return
+	}
+	var otherPeerIdx uint32
+	hiPeer.relayState.DeleteRelay(hiRelay.vpnIp)
+	relay, ok := hiPeer.relayState.GetRelayForByIp(hiRelay.vpnIp)
+	if ok {
+		otherPeerIdx = relay.LocalIndex
+	}
+	// I am a relaying host. I need to remove the other relay, too.
+	hm.RemoveRelay(otherPeerIdx)
 }
 
 func (hm *HostMap) GetIndexByVpnIp(vpnIp iputil.VpnIp) (uint32, error) {
@@ -140,6 +287,11 @@ func (hm *HostMap) AddVpnIp(vpnIp iputil.VpnIp, init func(hostinfo *HostInfo)) (
 			promoteCounter:  0,
 			vpnIp:           vpnIp,
 			HandshakePacket: make(map[uint8][]byte, 0),
+			relayState: RelayState{
+				relays:        map[iputil.VpnIp]struct{}{},
+				relayForByIp:  map[iputil.VpnIp]*Relay{},
+				relayForByIdx: map[uint32]*Relay{},
+			},
 		}
 		if init != nil {
 			init(h)
@@ -245,9 +397,37 @@ func (hm *HostMap) DeleteReverseIndex(index uint32) {
 }
 
 func (hm *HostMap) DeleteHostInfo(hostinfo *HostInfo) {
+	// Delete the host itself, ensuring it's not modified anymore
 	hm.Lock()
-	defer hm.Unlock()
 	hm.unlockedDeleteHostInfo(hostinfo)
+	hm.Unlock()
+
+	// And tear down all the relays going through this host
+	for _, localIdx := range hostinfo.relayState.CopyRelayForIdxs() {
+		hm.RemoveRelay(localIdx)
+	}
+
+	// And tear down the relays this deleted hostInfo was using to be reached
+	teardownRelayIdx := []uint32{}
+	for _, relayIp := range hostinfo.relayState.CopyRelayIps() {
+		relayHostInfo, err := hm.QueryVpnIp(relayIp)
+		if err != nil {
+			hm.l.WithError(err).WithField("relay", relayIp).Info("Missing relay host in hostmap")
+		} else {
+			if r, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp); ok {
+				teardownRelayIdx = append(teardownRelayIdx, r.LocalIndex)
+			}
+		}
+	}
+	for _, localIdx := range teardownRelayIdx {
+		hm.RemoveRelay(localIdx)
+	}
+}
+
+func (hm *HostMap) DeleteRelayIdx(localIdx uint32) {
+	hm.Lock()
+	defer hm.Unlock()
+	delete(hm.RemoteIndexes, localIdx)
 }
 
 func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
@@ -282,7 +462,7 @@ func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
 }
 
 func (hm *HostMap) QueryIndex(index uint32) (*HostInfo, error) {
-	//TODO: we probably just want ot return bool instead of error, or at least a static error
+	//TODO: we probably just want to return bool instead of error, or at least a static error
 	hm.RLock()
 	if h, ok := hm.Indexes[index]; ok {
 		hm.RUnlock()
@@ -292,6 +472,17 @@ func (hm *HostMap) QueryIndex(index uint32) (*HostInfo, error) {
 		return nil, errors.New("unable to find index")
 	}
 }
+func (hm *HostMap) QueryRelayIndex(index uint32) (*HostInfo, error) {
+	//TODO: we probably just want to return bool instead of error, or at least a static error
+	hm.RLock()
+	if h, ok := hm.Relays[index]; ok {
+		hm.RUnlock()
+		return h, nil
+	} else {
+		hm.RUnlock()
+		return nil, errors.New("unable to find index")
+	}
+}
 
 func (hm *HostMap) QueryReverseIndex(index uint32) (*HostInfo, error) {
 	hm.RLock()
@@ -404,24 +595,27 @@ func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface)
 	if c%PromoteEvery == 0 {
 		// The lock here is currently protecting i.remote access
 		i.RLock()
-		defer i.RUnlock()
+		remote := i.remote
+		i.RUnlock()
 
 		// return early if we are already on a preferred remote
-		rIP := i.remote.IP
-		for _, l := range preferredRanges {
-			if l.Contains(rIP) {
-				return
+		if remote != nil {
+			rIP := remote.IP
+			for _, l := range preferredRanges {
+				if l.Contains(rIP) {
+					return
+				}
 			}
 		}
 
 		i.remotes.ForEach(preferredRanges, func(addr *udp.Addr, preferred bool) {
-			if addr == nil || !preferred {
+			if remote != nil && (addr == nil || !preferred) {
 				return
 			}
 
 			// Try to send a test packet to that host, this should
 			// cause it to detect a roaming event and switch remotes
-			ifce.send(header.Test, header.TestRequest, i.ConnectionState, i, addr, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
+			ifce.sendTo(header.Test, header.TestRequest, i.ConnectionState, i, addr, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
 		})
 	}
 
@@ -504,6 +698,10 @@ func (i *HostInfo) SetRemote(remote *udp.Addr) {
 // SetRemoteIfPreferred returns true if the remote was changed. The lastRoam
 // time on the HostInfo will also be updated.
 func (i *HostInfo) SetRemoteIfPreferred(hm *HostMap, newRemote *udp.Addr) bool {
+	if newRemote == nil {
+		// relays have nil udp Addrs
+		return false
+	}
 	currentRemote := i.remote
 	if currentRemote == nil {
 		i.SetRemote(newRemote)

+ 113 - 9
inside.go

@@ -58,7 +58,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 
 	dropReason := f.firewall.Drop(packet, *fwPacket, false, hostinfo, f.caPool, localCache)
 	if dropReason == nil {
-		f.sendNoMetrics(header.Message, 0, ci, hostinfo, hostinfo.remote, packet, nb, out, q)
+		f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q)
 
 	} else if f.l.Level >= logrus.DebugLevel {
 		hostinfo.logger(f.l).
@@ -68,6 +68,10 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 	}
 }
 
+func (f *Interface) Handshake(vpnIp iputil.VpnIp) {
+	f.getOrHandshake(vpnIp)
+}
+
 // getOrHandshake returns nil if the vpnIp is not routable
 func (f *Interface) getOrHandshake(vpnIp iputil.VpnIp) *HostInfo {
 	//TODO: we can find contains without converting back to bytes
@@ -146,7 +150,7 @@ func (f *Interface) sendMessageNow(t header.MessageType, st header.MessageSubTyp
 		return
 	}
 
-	f.sendNoMetrics(header.Message, st, hostInfo.ConnectionState, hostInfo, hostInfo.remote, p, nb, out, 0)
+	f.sendNoMetrics(header.Message, st, hostInfo.ConnectionState, hostInfo, nil, p, nb, out, 0)
 }
 
 // SendMessageToVpnIp handles real ip:port lookup and sends to the current best known address for vpnIp
@@ -177,21 +181,93 @@ func (f *Interface) SendMessageToVpnIp(t header.MessageType, st header.MessageSu
 }
 
 func (f *Interface) sendMessageToVpnIp(t header.MessageType, st header.MessageSubType, hostInfo *HostInfo, p, nb, out []byte) {
-	f.send(t, st, hostInfo.ConnectionState, hostInfo, hostInfo.remote, p, nb, out)
+	f.send(t, st, hostInfo.ConnectionState, hostInfo, p, nb, out)
+}
+
+func (f *Interface) send(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, p, nb, out []byte) {
+	f.messageMetrics.Tx(t, st, 1)
+	f.sendNoMetrics(t, st, ci, hostinfo, nil, p, nb, out, 0)
 }
 
-func (f *Interface) send(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte) {
+func (f *Interface) sendTo(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte) {
 	f.messageMetrics.Tx(t, st, 1)
 	f.sendNoMetrics(t, st, ci, hostinfo, remote, p, nb, out, 0)
 }
 
+// sendVia sends a payload through a Relay tunnel. No authentication or encryption is done
+// to the payload for the ultimate target host, making this a useful method for sending
+// handshake messages to peers through relay tunnels.
+// via is the HostInfo through which the message is relayed.
+// ad is the plaintext data to authenticate, but not encrypt
+// nb is a buffer used to store the nonce value, re-used for performance reasons.
+// out is a buffer used to store the result of the Encrypt operation
+// q indicates which writer to use to send the packet.
+func (f *Interface) SendVia(viaIfc interface{},
+	relayIfc interface{},
+	ad,
+	nb,
+	out []byte,
+	nocopy bool,
+) {
+	via := viaIfc.(*HostInfo)
+	relay := relayIfc.(*Relay)
+	c := atomic.AddUint64(&via.ConnectionState.atomicMessageCounter, 1)
+
+	out = header.Encode(out, header.Version, header.Message, header.MessageRelay, relay.RemoteIndex, c)
+	f.connectionManager.Out(via.vpnIp)
+
+	// Authenticate the header and payload, but do not encrypt for this message type.
+	// The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload.
+	if len(out)+len(ad)+via.ConnectionState.eKey.Overhead() > cap(out) {
+		via.logger(f.l).
+			WithField("outCap", cap(out)).
+			WithField("payloadLen", len(ad)).
+			WithField("headerLen", len(out)).
+			WithField("cipherOverhead", via.ConnectionState.eKey.Overhead()).
+			Error("SendVia out buffer not large enough for relay")
+		return
+	}
+
+	// The header bytes are written to the 'out' slice; Grow the slice to hold the header and associated data payload.
+	offset := len(out)
+	out = out[:offset+len(ad)]
+
+	// In one call path, the associated data _is_ already stored in out. In other call paths, the associated data must
+	// be copied into 'out'.
+	if !nocopy {
+		copy(out[offset:], ad)
+	}
+
+	var err error
+	out, err = via.ConnectionState.eKey.EncryptDanger(out, out, nil, c, nb)
+	if err != nil {
+		via.logger(f.l).WithError(err).Info("Failed to EncryptDanger in sendVia")
+		return
+	}
+	err = f.writers[0].WriteTo(out, via.remote)
+	if err != nil {
+		via.logger(f.l).WithError(err).Info("Failed to WriteTo in sendVia")
+	}
+}
+
 func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte, q int) {
 	if ci.eKey == nil {
 		//TODO: log warning
 		return
 	}
+	useRelay := remote == nil && hostinfo.remote == nil
+	fullOut := out
+
+	if useRelay {
+		if len(out) < header.Len {
+			// out always has a capacity of mtu, but not always a length greater than the header.Len.
+			// Grow it to make sure the next operation works.
+			out = out[:header.Len]
+		}
+		// Save a header's worth of data at the front of the 'out' buffer.
+		out = out[header.Len:]
+	}
 
-	var err error
 	//TODO: enable if we do more than 1 tun queue
 	//ci.writeLock.Lock()
 	c := atomic.AddUint64(&ci.atomicMessageCounter, 1)
@@ -212,6 +288,7 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 		}
 	}
 
+	var err error
 	out, err = ci.eKey.EncryptDanger(out, out, p, c, nb)
 	//TODO: see above note on lock
 	//ci.writeLock.Unlock()
@@ -223,10 +300,37 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 		return
 	}
 
-	err = f.writers[q].WriteTo(out, remote)
-	if err != nil {
-		hostinfo.logger(f.l).WithError(err).
-			WithField("udpAddr", remote).Error("Failed to write outgoing packet")
+	if remote != nil {
+		err = f.writers[q].WriteTo(out, remote)
+		if err != nil {
+			hostinfo.logger(f.l).WithError(err).
+				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
+		}
+	} else if hostinfo.remote != nil {
+		err = f.writers[q].WriteTo(out, hostinfo.remote)
+		if err != nil {
+			hostinfo.logger(f.l).WithError(err).
+				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
+		}
+	} else {
+		// Try to send via a relay
+		for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
+			relayHostInfo, err := f.hostMap.QueryVpnIp(relayIP)
+			if err != nil {
+				hostinfo.logger(f.l).WithField("relayIp", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
+				continue
+			}
+			relay, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp)
+			if !ok {
+				hostinfo.logger(f.l).
+					WithField("relayIp", relayHostInfo.vpnIp).
+					WithField("relayTarget", hostinfo.vpnIp).
+					Info("sendNoMetrics relay missing object for target")
+				continue
+			}
+			f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
+			break
+		}
 	}
 	return
 }

+ 3 - 0
interface.go

@@ -40,6 +40,7 @@ type InterfaceConfig struct {
 	version                 string
 	caPool                  *cert.NebulaCAPool
 	disconnectInvalid       bool
+	relayManager            *relayManager
 
 	ConntrackCacheTimeout time.Duration
 	l                     *logrus.Logger
@@ -65,6 +66,7 @@ type Interface struct {
 	caPool             *cert.NebulaCAPool
 	disconnectInvalid  bool
 	closed             int32
+	relayManager       *relayManager
 
 	// rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
 	rebindCount int8
@@ -118,6 +120,7 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 		caPool:             c.caPool,
 		disconnectInvalid:  c.disconnectInvalid,
 		myVpnIp:            myVpnIp,
+		relayManager:       c.relayManager,
 
 		conntrackCacheTimeout: c.ConntrackCacheTimeout,
 

+ 44 - 1
lighthouse.go

@@ -70,6 +70,9 @@ type LightHouse struct {
 
 	atomicAdvertiseAddrs []netIpAndPort
 
+	// IP's of relays that can be used by peers to access me
+	atomicRelaysForMe []iputil.VpnIp
+
 	metrics           *MessageMetrics
 	metricHolepunchTx metrics.Counter
 	l                 *logrus.Logger
@@ -153,6 +156,10 @@ func (lh *LightHouse) GetAdvertiseAddrs() []netIpAndPort {
 	return *(*[]netIpAndPort)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&lh.atomicAdvertiseAddrs))))
 }
 
+func (lh *LightHouse) GetRelaysForMe() []iputil.VpnIp {
+	return *(*[]iputil.VpnIp)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&lh.atomicRelaysForMe))))
+}
+
 func (lh *LightHouse) GetUpdateInterval() int64 {
 	return atomic.LoadInt64(&lh.atomicInterval)
 }
@@ -259,6 +266,29 @@ func (lh *LightHouse) reload(c *config.C, initial bool) error {
 		}
 	}
 
+	if initial || c.HasChanged("relay.relays") {
+		switch c.GetBool("relay.am_relay", false) {
+		case true:
+			// Relays aren't allowed to specify other relays
+			if len(c.GetStringSlice("relay.relays", nil)) > 0 {
+				lh.l.Info("Ignoring relays from config because am_relay is true")
+			}
+			relaysForMe := []iputil.VpnIp{}
+			atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&lh.atomicRelaysForMe)), unsafe.Pointer(&relaysForMe))
+		case false:
+			relaysForMe := []iputil.VpnIp{}
+			for _, v := range c.GetStringSlice("relay.relays", nil) {
+				lh.l.WithField("RelayIP", v).Info("Read relay from config")
+
+				configRIP := net.ParseIP(v)
+				if configRIP != nil {
+					relaysForMe = append(relaysForMe, iputil.Ip2VpnIp(configRIP))
+				}
+			}
+			atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&lh.atomicRelaysForMe)), unsafe.Pointer(&relaysForMe))
+		}
+	}
+
 	return nil
 }
 
@@ -427,7 +457,7 @@ func (lh *LightHouse) DeleteVpnIp(vpnIp iputil.VpnIp) {
 	lh.Unlock()
 }
 
-// addStaticRemote adds a static host entry for vpnIp as ourselves as the owner
+// AddStaticRemote adds a static host entry for vpnIp as ourselves as the owner
 // We are the owner because we don't want a lighthouse server to advertise for static hosts it was configured with
 // And we don't want a lighthouse query reply to interfere with our learned cache if we are a client
 //NOTE: this function should not interact with any hot path objects, like lh.staticList, the caller should handle it
@@ -597,12 +627,18 @@ func (lh *LightHouse) SendUpdate(f udp.EncWriter) {
 		}
 	}
 
+	var relays []uint32
+	for _, r := range lh.GetRelaysForMe() {
+		relays = append(relays, (uint32)(r))
+	}
+
 	m := &NebulaMeta{
 		Type: NebulaMeta_HostUpdateNotification,
 		Details: &NebulaMetaDetails{
 			VpnIp:       uint32(lh.myVpnIp),
 			Ip4AndPorts: v4,
 			Ip6AndPorts: v6,
+			RelayVpnIp:  relays,
 		},
 	}
 
@@ -664,6 +700,7 @@ func (lhh *LightHouseHandler) resetMeta() *NebulaMeta {
 	// Keep the array memory around
 	details.Ip4AndPorts = details.Ip4AndPorts[:0]
 	details.Ip6AndPorts = details.Ip6AndPorts[:0]
+	details.RelayVpnIp = details.RelayVpnIp[:0]
 	lhh.meta.Details = details
 
 	return lhh.meta
@@ -780,6 +817,10 @@ func (lhh *LightHouseHandler) coalesceAnswers(c *cache, n *NebulaMeta) {
 			n.Details.Ip6AndPorts = append(n.Details.Ip6AndPorts, c.v6.reported...)
 		}
 	}
+
+	if c.relay != nil {
+		n.Details.RelayVpnIp = append(n.Details.RelayVpnIp, c.relay.relay...)
+	}
 }
 
 func (lhh *LightHouseHandler) handleHostQueryReply(n *NebulaMeta, vpnIp iputil.VpnIp) {
@@ -795,6 +836,7 @@ func (lhh *LightHouseHandler) handleHostQueryReply(n *NebulaMeta, vpnIp iputil.V
 	certVpnIp := iputil.VpnIp(n.Details.VpnIp)
 	am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4)
 	am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6)
+	am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp)
 	am.Unlock()
 
 	// Non-blocking attempt to trigger, skip if it would block
@@ -828,6 +870,7 @@ func (lhh *LightHouseHandler) handleHostUpdateNotification(n *NebulaMeta, vpnIp
 	certVpnIp := iputil.VpnIp(n.Details.VpnIp)
 	am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4)
 	am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6)
+	am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp)
 	am.Unlock()
 }
 

+ 5 - 0
lighthouse_test.go

@@ -372,6 +372,11 @@ type testEncWriter struct {
 	metaFilter *NebulaMeta_MessageType
 }
 
+func (tw *testEncWriter) SendVia(via interface{}, relay interface{}, ad, nb, out []byte, nocopy bool) {
+}
+func (tw *testEncWriter) Handshake(vpnIp iputil.VpnIp) {
+}
+
 func (tw *testEncWriter) SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, _, _ []byte) {
 	msg := &NebulaMeta{}
 	err := msg.Unmarshal(p)

+ 4 - 0
main.go

@@ -230,10 +230,13 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 		messageMetrics = newMessageMetricsOnlyRecvError()
 	}
 
+	useRelays := c.GetBool("relay.use_relays", DefaultUseRelays) && !c.GetBool("relay.am_relay", false)
+
 	handshakeConfig := HandshakeConfig{
 		tryInterval:   c.GetDuration("handshakes.try_interval", DefaultHandshakeTryInterval),
 		retries:       c.GetInt("handshakes.retries", DefaultHandshakeRetries),
 		triggerBuffer: c.GetInt("handshakes.trigger_buffer", DefaultHandshakeTriggerBuffer),
+		useRelays:     useRelays,
 
 		messageMetrics: messageMetrics,
 	}
@@ -275,6 +278,7 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 		version:                 buildVersion,
 		caPool:                  caPool,
 		disconnectInvalid:       c.GetBool("pki.disconnect_invalid", false),
+		relayManager:            NewRelayManager(ctx, l, hostMap, c),
 
 		ConntrackCacheTimeout: conntrackCacheTimeout,
 		l:                     l,

+ 476 - 37
nebula.pb.go

@@ -96,6 +96,34 @@ func (NebulaPing_MessageType) EnumDescriptor() ([]byte, []int) {
 	return fileDescriptor_2d65afa7693df5ef, []int{4, 0}
 }
 
+type NebulaControl_MessageType int32
+
+const (
+	NebulaControl_None                NebulaControl_MessageType = 0
+	NebulaControl_CreateRelayRequest  NebulaControl_MessageType = 1
+	NebulaControl_CreateRelayResponse NebulaControl_MessageType = 2
+)
+
+var NebulaControl_MessageType_name = map[int32]string{
+	0: "None",
+	1: "CreateRelayRequest",
+	2: "CreateRelayResponse",
+}
+
+var NebulaControl_MessageType_value = map[string]int32{
+	"None":                0,
+	"CreateRelayRequest":  1,
+	"CreateRelayResponse": 2,
+}
+
+func (x NebulaControl_MessageType) String() string {
+	return proto.EnumName(NebulaControl_MessageType_name, int32(x))
+}
+
+func (NebulaControl_MessageType) EnumDescriptor() ([]byte, []int) {
+	return fileDescriptor_2d65afa7693df5ef, []int{7, 0}
+}
+
 type NebulaMeta struct {
 	Type    NebulaMeta_MessageType `protobuf:"varint,1,opt,name=Type,proto3,enum=nebula.NebulaMeta_MessageType" json:"Type,omitempty"`
 	Details *NebulaMetaDetails     `protobuf:"bytes,2,opt,name=Details,proto3" json:"Details,omitempty"`
@@ -152,6 +180,7 @@ type NebulaMetaDetails struct {
 	VpnIp       uint32        `protobuf:"varint,1,opt,name=VpnIp,proto3" json:"VpnIp,omitempty"`
 	Ip4AndPorts []*Ip4AndPort `protobuf:"bytes,2,rep,name=Ip4AndPorts,proto3" json:"Ip4AndPorts,omitempty"`
 	Ip6AndPorts []*Ip6AndPort `protobuf:"bytes,4,rep,name=Ip6AndPorts,proto3" json:"Ip6AndPorts,omitempty"`
+	RelayVpnIp  []uint32      `protobuf:"varint,5,rep,packed,name=RelayVpnIp,proto3" json:"RelayVpnIp,omitempty"`
 	Counter     uint32        `protobuf:"varint,3,opt,name=counter,proto3" json:"counter,omitempty"`
 }
 
@@ -209,6 +238,13 @@ func (m *NebulaMetaDetails) GetIp6AndPorts() []*Ip6AndPort {
 	return nil
 }
 
+func (m *NebulaMetaDetails) GetRelayVpnIp() []uint32 {
+	if m != nil {
+		return m.RelayVpnIp
+	}
+	return nil
+}
+
 func (m *NebulaMetaDetails) GetCounter() uint32 {
 	if m != nil {
 		return m.Counter
@@ -508,9 +544,86 @@ func (m *NebulaHandshakeDetails) GetTime() uint64 {
 	return 0
 }
 
+type NebulaControl struct {
+	Type                NebulaControl_MessageType `protobuf:"varint,1,opt,name=Type,proto3,enum=nebula.NebulaControl_MessageType" json:"Type,omitempty"`
+	InitiatorRelayIndex uint32                    `protobuf:"varint,2,opt,name=InitiatorRelayIndex,proto3" json:"InitiatorRelayIndex,omitempty"`
+	ResponderRelayIndex uint32                    `protobuf:"varint,3,opt,name=ResponderRelayIndex,proto3" json:"ResponderRelayIndex,omitempty"`
+	RelayToIp           uint32                    `protobuf:"varint,4,opt,name=RelayToIp,proto3" json:"RelayToIp,omitempty"`
+	RelayFromIp         uint32                    `protobuf:"varint,5,opt,name=RelayFromIp,proto3" json:"RelayFromIp,omitempty"`
+}
+
+func (m *NebulaControl) Reset()         { *m = NebulaControl{} }
+func (m *NebulaControl) String() string { return proto.CompactTextString(m) }
+func (*NebulaControl) ProtoMessage()    {}
+func (*NebulaControl) Descriptor() ([]byte, []int) {
+	return fileDescriptor_2d65afa7693df5ef, []int{7}
+}
+func (m *NebulaControl) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *NebulaControl) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_NebulaControl.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *NebulaControl) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_NebulaControl.Merge(m, src)
+}
+func (m *NebulaControl) XXX_Size() int {
+	return m.Size()
+}
+func (m *NebulaControl) XXX_DiscardUnknown() {
+	xxx_messageInfo_NebulaControl.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_NebulaControl proto.InternalMessageInfo
+
+func (m *NebulaControl) GetType() NebulaControl_MessageType {
+	if m != nil {
+		return m.Type
+	}
+	return NebulaControl_None
+}
+
+func (m *NebulaControl) GetInitiatorRelayIndex() uint32 {
+	if m != nil {
+		return m.InitiatorRelayIndex
+	}
+	return 0
+}
+
+func (m *NebulaControl) GetResponderRelayIndex() uint32 {
+	if m != nil {
+		return m.ResponderRelayIndex
+	}
+	return 0
+}
+
+func (m *NebulaControl) GetRelayToIp() uint32 {
+	if m != nil {
+		return m.RelayToIp
+	}
+	return 0
+}
+
+func (m *NebulaControl) GetRelayFromIp() uint32 {
+	if m != nil {
+		return m.RelayFromIp
+	}
+	return 0
+}
+
 func init() {
 	proto.RegisterEnum("nebula.NebulaMeta_MessageType", NebulaMeta_MessageType_name, NebulaMeta_MessageType_value)
 	proto.RegisterEnum("nebula.NebulaPing_MessageType", NebulaPing_MessageType_name, NebulaPing_MessageType_value)
+	proto.RegisterEnum("nebula.NebulaControl_MessageType", NebulaControl_MessageType_name, NebulaControl_MessageType_value)
 	proto.RegisterType((*NebulaMeta)(nil), "nebula.NebulaMeta")
 	proto.RegisterType((*NebulaMetaDetails)(nil), "nebula.NebulaMetaDetails")
 	proto.RegisterType((*Ip4AndPort)(nil), "nebula.Ip4AndPort")
@@ -518,48 +631,56 @@ func init() {
 	proto.RegisterType((*NebulaPing)(nil), "nebula.NebulaPing")
 	proto.RegisterType((*NebulaHandshake)(nil), "nebula.NebulaHandshake")
 	proto.RegisterType((*NebulaHandshakeDetails)(nil), "nebula.NebulaHandshakeDetails")
+	proto.RegisterType((*NebulaControl)(nil), "nebula.NebulaControl")
 }
 
 func init() { proto.RegisterFile("nebula.proto", fileDescriptor_2d65afa7693df5ef) }
 
 var fileDescriptor_2d65afa7693df5ef = []byte{
-	// 570 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x54, 0x41, 0x6f, 0xda, 0x4c,
-	0x10, 0x65, 0x8d, 0x21, 0xc9, 0x90, 0x10, 0x7f, 0xfb, 0xb5, 0x08, 0x7a, 0xb0, 0x22, 0x1f, 0x2a,
-	0x4e, 0xa4, 0x82, 0x08, 0xf5, 0xd8, 0x96, 0x1e, 0x40, 0x0a, 0x88, 0x5a, 0x69, 0x2b, 0xf5, 0x52,
-	0x2d, 0xf6, 0x16, 0xaf, 0x80, 0x5d, 0xd7, 0x5e, 0xaa, 0xf0, 0x2f, 0xfa, 0x33, 0x7a, 0xeb, 0xdf,
-	0xe8, 0xa1, 0x87, 0x1c, 0x7a, 0xe8, 0xb1, 0x82, 0x3f, 0x52, 0xed, 0xda, 0xd8, 0x04, 0xa2, 0xde,
-	0xe6, 0xcd, 0xbc, 0x37, 0x3b, 0x3c, 0x3f, 0x01, 0xa7, 0x9c, 0x4e, 0x96, 0x73, 0xd2, 0x0a, 0x23,
-	0x21, 0x05, 0x2e, 0x27, 0xc8, 0xf9, 0x69, 0x00, 0x8c, 0x74, 0x39, 0xa4, 0x92, 0xe0, 0x36, 0x98,
-	0x37, 0xab, 0x90, 0xd6, 0xd1, 0x05, 0x6a, 0x56, 0xdb, 0x76, 0x2b, 0xd5, 0xe4, 0x8c, 0xd6, 0x90,
-	0xc6, 0x31, 0x99, 0x52, 0xc5, 0x72, 0x35, 0x17, 0x77, 0xe0, 0xe8, 0x35, 0x95, 0x84, 0xcd, 0xe3,
-	0xba, 0x71, 0x81, 0x9a, 0x95, 0x76, 0xe3, 0x50, 0x96, 0x12, 0xdc, 0x2d, 0xd3, 0xf9, 0x85, 0xa0,
-	0xb2, 0xb3, 0x0a, 0x1f, 0x83, 0x39, 0x12, 0x9c, 0x5a, 0x05, 0x7c, 0x06, 0x27, 0x7d, 0x11, 0xcb,
-	0x37, 0x4b, 0x1a, 0xad, 0x2c, 0x84, 0x31, 0x54, 0x33, 0xe8, 0xd2, 0x70, 0xbe, 0xb2, 0x0c, 0xfc,
-	0x04, 0x6a, 0xaa, 0xf7, 0x36, 0xf4, 0x89, 0xa4, 0x23, 0x21, 0xd9, 0x27, 0xe6, 0x11, 0xc9, 0x04,
-	0xb7, 0x8a, 0xb8, 0x01, 0x8f, 0xd5, 0x6c, 0x28, 0xbe, 0x50, 0xff, 0xde, 0xc8, 0xdc, 0x8e, 0xc6,
-	0x4b, 0xee, 0x05, 0xf7, 0x46, 0x25, 0x5c, 0x05, 0x50, 0xa3, 0xf7, 0x81, 0x20, 0x0b, 0x66, 0x95,
-	0xf1, 0xff, 0x70, 0x9e, 0xe3, 0xe4, 0xd9, 0x23, 0x75, 0xd9, 0x98, 0xc8, 0xa0, 0x17, 0x50, 0x6f,
-	0x66, 0x1d, 0xab, 0xcb, 0x32, 0x98, 0x50, 0x4e, 0x9c, 0xef, 0x08, 0xfe, 0x3b, 0xf8, 0xd5, 0xf8,
-	0x11, 0x94, 0xde, 0x85, 0x7c, 0x10, 0x6a, 0x5b, 0xcf, 0xdc, 0x04, 0xe0, 0x2b, 0xa8, 0x0c, 0xc2,
-	0xab, 0x97, 0xdc, 0x1f, 0x8b, 0x48, 0x2a, 0xef, 0x8a, 0xcd, 0x4a, 0x1b, 0x6f, 0xbd, 0xcb, 0x47,
-	0xee, 0x2e, 0x2d, 0x51, 0x75, 0x33, 0x95, 0xb9, 0xaf, 0xea, 0xee, 0xa8, 0x32, 0x1a, 0xae, 0xc3,
-	0x91, 0x27, 0x96, 0x5c, 0xd2, 0xa8, 0x5e, 0xd4, 0x37, 0x6c, 0xa1, 0xf3, 0x0c, 0x20, 0x5f, 0x8f,
-	0xab, 0x60, 0x64, 0x67, 0x1a, 0x83, 0x10, 0x63, 0x30, 0x55, 0x5f, 0x7f, 0xd8, 0x33, 0x57, 0xd7,
-	0xce, 0x0b, 0xa5, 0xe8, 0xee, 0x28, 0xfa, 0x4c, 0x2b, 0x4c, 0xd7, 0xe8, 0x33, 0x85, 0xaf, 0x85,
-	0xe6, 0x9b, 0xae, 0x71, 0x2d, 0xb2, 0x0d, 0xc5, 0x9d, 0x0d, 0xb7, 0xdb, 0xcc, 0x8d, 0x19, 0x9f,
-	0xfe, 0x3b, 0x73, 0x8a, 0xf1, 0x40, 0xe6, 0x30, 0x98, 0x37, 0x6c, 0x41, 0xd3, 0x77, 0x74, 0xed,
-	0x38, 0x07, 0x89, 0x52, 0x62, 0xab, 0x80, 0x4f, 0xa0, 0x94, 0x7c, 0x1f, 0xe4, 0x7c, 0x84, 0xf3,
-	0x64, 0x6f, 0x9f, 0x70, 0x3f, 0x0e, 0xc8, 0x8c, 0xe2, 0xe7, 0x79, 0x7c, 0x91, 0x8e, 0xef, 0xde,
-	0x05, 0x19, 0x73, 0x3f, 0xc3, 0xea, 0x88, 0xfe, 0x82, 0x78, 0xfa, 0x88, 0x53, 0x57, 0xd7, 0xce,
-	0x37, 0x04, 0xb5, 0x87, 0x75, 0x8a, 0xde, 0xa3, 0x91, 0xd4, 0xaf, 0x9c, 0xba, 0xba, 0xc6, 0x4f,
-	0xa1, 0x3a, 0xe0, 0x4c, 0x32, 0x22, 0x45, 0x34, 0xe0, 0x3e, 0xbd, 0x4d, 0x9d, 0xde, 0xeb, 0x2a,
-	0x9e, 0x4b, 0xe3, 0x50, 0x70, 0x9f, 0xa6, 0xbc, 0xc4, 0xcf, 0xbd, 0x2e, 0xae, 0x41, 0xb9, 0x27,
-	0xc4, 0x8c, 0xd1, 0xba, 0xa9, 0x9d, 0x49, 0x51, 0xe6, 0x57, 0x29, 0xf7, 0xeb, 0x55, 0xe7, 0xc7,
-	0xda, 0x46, 0x77, 0x6b, 0x1b, 0xfd, 0x59, 0xdb, 0xe8, 0xeb, 0xc6, 0x2e, 0xdc, 0x6d, 0xec, 0xc2,
-	0xef, 0x8d, 0x5d, 0xf8, 0xd0, 0x98, 0x32, 0x19, 0x2c, 0x27, 0x2d, 0x4f, 0x2c, 0x2e, 0xe3, 0x39,
-	0xf1, 0x66, 0xc1, 0xe7, 0xcb, 0xc4, 0x93, 0x49, 0x59, 0xff, 0x7d, 0x74, 0xfe, 0x06, 0x00, 0x00,
-	0xff, 0xff, 0x20, 0x00, 0x2b, 0x46, 0x4e, 0x04, 0x00, 0x00,
+	// 685 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x54, 0xc1, 0x6e, 0xd3, 0x4a,
+	0x14, 0x8d, 0x1d, 0x27, 0x6d, 0x6e, 0x9a, 0xd4, 0xef, 0xf6, 0x11, 0x52, 0x84, 0xac, 0xe0, 0x05,
+	0xca, 0x2a, 0xad, 0xd2, 0x52, 0xb1, 0x04, 0x82, 0x50, 0x22, 0xb5, 0x55, 0x18, 0x15, 0x90, 0xd8,
+	0xa0, 0x69, 0x32, 0xd4, 0x56, 0x92, 0x19, 0xd7, 0x9e, 0xa0, 0xe6, 0x2f, 0xf8, 0x0c, 0x3e, 0x80,
+	0x8f, 0x60, 0x81, 0x44, 0x17, 0x2c, 0x58, 0xa2, 0xf6, 0x47, 0xd0, 0x8c, 0x1d, 0xc7, 0x49, 0x03,
+	0xbb, 0x39, 0xf7, 0x9e, 0x33, 0x73, 0xe6, 0xcc, 0xd5, 0xc0, 0x16, 0x67, 0xe7, 0xd3, 0x31, 0x6d,
+	0x05, 0xa1, 0x90, 0x02, 0x8b, 0x31, 0x72, 0xbf, 0x9b, 0x00, 0xa7, 0x7a, 0x79, 0xc2, 0x24, 0xc5,
+	0x36, 0x58, 0x67, 0xb3, 0x80, 0xd5, 0x8d, 0x86, 0xd1, 0xac, 0xb6, 0x9d, 0x56, 0xa2, 0x59, 0x30,
+	0x5a, 0x27, 0x2c, 0x8a, 0xe8, 0x05, 0x53, 0x2c, 0xa2, 0xb9, 0x78, 0x00, 0x1b, 0x2f, 0x99, 0xa4,
+	0xfe, 0x38, 0xaa, 0x9b, 0x0d, 0xa3, 0x59, 0x6e, 0xef, 0xde, 0x95, 0x25, 0x04, 0x32, 0x67, 0xba,
+	0x3f, 0x0d, 0x28, 0x67, 0xb6, 0xc2, 0x4d, 0xb0, 0x4e, 0x05, 0x67, 0x76, 0x0e, 0x2b, 0x50, 0xea,
+	0x8a, 0x48, 0xbe, 0x9e, 0xb2, 0x70, 0x66, 0x1b, 0x88, 0x50, 0x4d, 0x21, 0x61, 0xc1, 0x78, 0x66,
+	0x9b, 0xf8, 0x00, 0x6a, 0xaa, 0xf6, 0x26, 0x18, 0x52, 0xc9, 0x4e, 0x85, 0xf4, 0x3f, 0xfa, 0x03,
+	0x2a, 0x7d, 0xc1, 0xed, 0x3c, 0xee, 0xc2, 0x3d, 0xd5, 0x3b, 0x11, 0x9f, 0xd8, 0x70, 0xa9, 0x65,
+	0xcd, 0x5b, 0xfd, 0x29, 0x1f, 0x78, 0x4b, 0xad, 0x02, 0x56, 0x01, 0x54, 0xeb, 0x9d, 0x27, 0xe8,
+	0xc4, 0xb7, 0x8b, 0xb8, 0x03, 0xdb, 0x0b, 0x1c, 0x1f, 0xbb, 0xa1, 0x9c, 0xf5, 0xa9, 0xf4, 0x3a,
+	0x1e, 0x1b, 0x8c, 0xec, 0x4d, 0xe5, 0x2c, 0x85, 0x31, 0xa5, 0xe4, 0xfe, 0x30, 0xe0, 0xbf, 0x3b,
+	0xb7, 0xc6, 0xff, 0xa1, 0xf0, 0x36, 0xe0, 0xbd, 0x40, 0xc7, 0x5a, 0x21, 0x31, 0xc0, 0x43, 0x28,
+	0xf7, 0x82, 0xc3, 0xe7, 0x7c, 0xd8, 0x17, 0xa1, 0x54, 0xd9, 0xe5, 0x9b, 0xe5, 0x36, 0xce, 0xb3,
+	0x5b, 0xb4, 0x48, 0x96, 0x16, 0xab, 0x8e, 0x52, 0x95, 0xb5, 0xaa, 0x3a, 0xca, 0xa8, 0x52, 0x1a,
+	0x3a, 0x00, 0x84, 0x8d, 0xe9, 0x2c, 0xb6, 0x51, 0x68, 0xe4, 0x9b, 0x15, 0x92, 0xa9, 0x60, 0x1d,
+	0x36, 0x06, 0x62, 0xca, 0x25, 0x0b, 0xeb, 0x79, 0xed, 0x71, 0x0e, 0xdd, 0x7d, 0x80, 0xc5, 0xf1,
+	0x58, 0x05, 0x33, 0xbd, 0x86, 0xd9, 0x0b, 0x10, 0xc1, 0x52, 0x75, 0xfd, 0xf0, 0x15, 0xa2, 0xd7,
+	0xee, 0x33, 0xa5, 0x38, 0xca, 0x28, 0xba, 0xbe, 0x56, 0x58, 0xc4, 0xec, 0xfa, 0x0a, 0x1f, 0x0b,
+	0xcd, 0xb7, 0x88, 0x79, 0x2c, 0xd2, 0x1d, 0xf2, 0x99, 0x1d, 0xae, 0xe6, 0x33, 0xd9, 0xf7, 0xf9,
+	0xc5, 0xbf, 0x67, 0x52, 0x31, 0xd6, 0xcc, 0x24, 0x82, 0x75, 0xe6, 0x4f, 0x58, 0x72, 0x8e, 0x5e,
+	0xbb, 0xee, 0x9d, 0x89, 0x53, 0x62, 0x3b, 0x87, 0x25, 0x28, 0xc4, 0xef, 0x67, 0xb8, 0x1f, 0x60,
+	0x3b, 0xde, 0xb7, 0x4b, 0xf9, 0x30, 0xf2, 0xe8, 0x88, 0xe1, 0xd3, 0xc5, 0x78, 0x1b, 0x7a, 0xbc,
+	0x57, 0x1c, 0xa4, 0xcc, 0xd5, 0x19, 0x57, 0x26, 0xba, 0x13, 0x3a, 0xd0, 0x26, 0xb6, 0x88, 0x5e,
+	0xbb, 0x5f, 0x0c, 0xa8, 0xad, 0xd7, 0x29, 0x7a, 0x87, 0x85, 0x52, 0x9f, 0xb2, 0x45, 0xf4, 0x1a,
+	0x1f, 0x43, 0xb5, 0xc7, 0x7d, 0xe9, 0x53, 0x29, 0xc2, 0x1e, 0x1f, 0xb2, 0xab, 0x24, 0xe9, 0x95,
+	0xaa, 0xe2, 0x11, 0x16, 0x05, 0x82, 0x0f, 0x59, 0xc2, 0x8b, 0xf3, 0x5c, 0xa9, 0x62, 0x0d, 0x8a,
+	0x1d, 0x21, 0x46, 0x3e, 0xab, 0x5b, 0x3a, 0x99, 0x04, 0xa5, 0x79, 0x15, 0x32, 0x79, 0x7d, 0x35,
+	0xa1, 0x12, 0x5b, 0xed, 0x08, 0x2e, 0x43, 0x31, 0xc6, 0x27, 0x4b, 0x2f, 0xf1, 0x68, 0x39, 0x87,
+	0x84, 0xb4, 0xe6, 0x31, 0xf6, 0x61, 0x27, 0xb5, 0xab, 0x67, 0x2e, 0x7b, 0x93, 0x75, 0x2d, 0xa5,
+	0x48, 0x8d, 0x67, 0x14, 0xf1, 0x9d, 0xd6, 0xb5, 0xf0, 0x21, 0x94, 0x34, 0x3a, 0x13, 0xbd, 0x40,
+	0xdf, 0xad, 0x42, 0x16, 0x05, 0x6c, 0x40, 0x59, 0x83, 0x57, 0xa1, 0x98, 0xe8, 0xf9, 0x57, 0xfd,
+	0x6c, 0xc9, 0xed, 0xfe, 0xed, 0x3b, 0xaa, 0x01, 0x76, 0x42, 0x46, 0x25, 0xd3, 0x6c, 0xc2, 0x2e,
+	0xa7, 0x2c, 0x92, 0xb6, 0x81, 0xf7, 0x61, 0x67, 0xa9, 0xae, 0x2c, 0x45, 0xcc, 0x36, 0x5f, 0x1c,
+	0x7c, 0xbb, 0x71, 0x8c, 0xeb, 0x1b, 0xc7, 0xf8, 0x7d, 0xe3, 0x18, 0x9f, 0x6f, 0x9d, 0xdc, 0xf5,
+	0xad, 0x93, 0xfb, 0x75, 0xeb, 0xe4, 0xde, 0xef, 0x5e, 0xf8, 0xd2, 0x9b, 0x9e, 0xb7, 0x06, 0x62,
+	0xb2, 0x17, 0x8d, 0xe9, 0x60, 0xe4, 0x5d, 0xee, 0xc5, 0x11, 0x9e, 0x17, 0xf5, 0xaf, 0x7c, 0xf0,
+	0x27, 0x00, 0x00, 0xff, 0xff, 0xd3, 0x87, 0x83, 0x6c, 0xa5, 0x05, 0x00, 0x00,
 }
 
 func (m *NebulaMeta) Marshal() (dAtA []byte, err error) {
@@ -622,6 +743,24 @@ func (m *NebulaMetaDetails) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	_ = i
 	var l int
 	_ = l
+	if len(m.RelayVpnIp) > 0 {
+		dAtA3 := make([]byte, len(m.RelayVpnIp)*10)
+		var j2 int
+		for _, num := range m.RelayVpnIp {
+			for num >= 1<<7 {
+				dAtA3[j2] = uint8(uint64(num)&0x7f | 0x80)
+				num >>= 7
+				j2++
+			}
+			dAtA3[j2] = uint8(num)
+			j2++
+		}
+		i -= j2
+		copy(dAtA[i:], dAtA3[:j2])
+		i = encodeVarintNebula(dAtA, i, uint64(j2))
+		i--
+		dAtA[i] = 0x2a
+	}
 	if len(m.Ip6AndPorts) > 0 {
 		for iNdEx := len(m.Ip6AndPorts) - 1; iNdEx >= 0; iNdEx-- {
 			{
@@ -859,6 +998,54 @@ func (m *NebulaHandshakeDetails) MarshalToSizedBuffer(dAtA []byte) (int, error)
 	return len(dAtA) - i, nil
 }
 
+func (m *NebulaControl) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *NebulaControl) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *NebulaControl) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.RelayFromIp != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.RelayFromIp))
+		i--
+		dAtA[i] = 0x28
+	}
+	if m.RelayToIp != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.RelayToIp))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.ResponderRelayIndex != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.ResponderRelayIndex))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.InitiatorRelayIndex != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.InitiatorRelayIndex))
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.Type != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.Type))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
 func encodeVarintNebula(dAtA []byte, offset int, v uint64) int {
 	offset -= sovNebula(v)
 	base := offset
@@ -910,6 +1097,13 @@ func (m *NebulaMetaDetails) Size() (n int) {
 			n += 1 + l + sovNebula(uint64(l))
 		}
 	}
+	if len(m.RelayVpnIp) > 0 {
+		l = 0
+		for _, e := range m.RelayVpnIp {
+			l += sovNebula(uint64(e))
+		}
+		n += 1 + sovNebula(uint64(l)) + l
+	}
 	return n
 }
 
@@ -1003,6 +1197,30 @@ func (m *NebulaHandshakeDetails) Size() (n int) {
 	return n
 }
 
+func (m *NebulaControl) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Type != 0 {
+		n += 1 + sovNebula(uint64(m.Type))
+	}
+	if m.InitiatorRelayIndex != 0 {
+		n += 1 + sovNebula(uint64(m.InitiatorRelayIndex))
+	}
+	if m.ResponderRelayIndex != 0 {
+		n += 1 + sovNebula(uint64(m.ResponderRelayIndex))
+	}
+	if m.RelayToIp != 0 {
+		n += 1 + sovNebula(uint64(m.RelayToIp))
+	}
+	if m.RelayFromIp != 0 {
+		n += 1 + sovNebula(uint64(m.RelayFromIp))
+	}
+	return n
+}
+
 func sovNebula(x uint64) (n int) {
 	return (math_bits.Len64(x|1) + 6) / 7
 }
@@ -1249,6 +1467,82 @@ func (m *NebulaMetaDetails) Unmarshal(dAtA []byte) error {
 				return err
 			}
 			iNdEx = postIndex
+		case 5:
+			if wireType == 0 {
+				var v uint32
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return ErrIntOverflowNebula
+					}
+					if iNdEx >= l {
+						return io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					v |= uint32(b&0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				m.RelayVpnIp = append(m.RelayVpnIp, v)
+			} else if wireType == 2 {
+				var packedLen int
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return ErrIntOverflowNebula
+					}
+					if iNdEx >= l {
+						return io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					packedLen |= int(b&0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				if packedLen < 0 {
+					return ErrInvalidLengthNebula
+				}
+				postIndex := iNdEx + packedLen
+				if postIndex < 0 {
+					return ErrInvalidLengthNebula
+				}
+				if postIndex > l {
+					return io.ErrUnexpectedEOF
+				}
+				var elementCount int
+				var count int
+				for _, integer := range dAtA[iNdEx:postIndex] {
+					if integer < 128 {
+						count++
+					}
+				}
+				elementCount = count
+				if elementCount != 0 && len(m.RelayVpnIp) == 0 {
+					m.RelayVpnIp = make([]uint32, 0, elementCount)
+				}
+				for iNdEx < postIndex {
+					var v uint32
+					for shift := uint(0); ; shift += 7 {
+						if shift >= 64 {
+							return ErrIntOverflowNebula
+						}
+						if iNdEx >= l {
+							return io.ErrUnexpectedEOF
+						}
+						b := dAtA[iNdEx]
+						iNdEx++
+						v |= uint32(b&0x7F) << shift
+						if b < 0x80 {
+							break
+						}
+					}
+					m.RelayVpnIp = append(m.RelayVpnIp, v)
+				}
+			} else {
+				return fmt.Errorf("proto: wrong wireType = %d for field RelayVpnIp", wireType)
+			}
 		default:
 			iNdEx = preIndex
 			skippy, err := skipNebula(dAtA[iNdEx:])
@@ -1833,6 +2127,151 @@ func (m *NebulaHandshakeDetails) Unmarshal(dAtA []byte) error {
 	}
 	return nil
 }
+func (m *NebulaControl) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowNebula
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: NebulaControl: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: NebulaControl: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType)
+			}
+			m.Type = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Type |= NebulaControl_MessageType(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field InitiatorRelayIndex", wireType)
+			}
+			m.InitiatorRelayIndex = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.InitiatorRelayIndex |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ResponderRelayIndex", wireType)
+			}
+			m.ResponderRelayIndex = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.ResponderRelayIndex |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RelayToIp", wireType)
+			}
+			m.RelayToIp = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.RelayToIp |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 5:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RelayFromIp", wireType)
+			}
+			m.RelayFromIp = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.RelayFromIp |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := skipNebula(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return ErrInvalidLengthNebula
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
 func skipNebula(dAtA []byte) (n int, err error) {
 	l := len(dAtA)
 	iNdEx := 0

+ 14 - 1
nebula.proto

@@ -15,7 +15,6 @@ message NebulaMeta {
     HostWhoamiReply = 7;
     PathCheck = 8;
     PathCheckReply = 9;
-
   }
 
   MessageType Type = 1;
@@ -26,6 +25,7 @@ message NebulaMetaDetails {
   uint32 VpnIp = 1;
   repeated Ip4AndPort Ip4AndPorts = 2;
   repeated Ip6AndPort Ip6AndPorts = 4;
+  repeated uint32 RelayVpnIp = 5;
   uint32 counter = 3;
 }
 
@@ -63,3 +63,16 @@ message NebulaHandshakeDetails {
   uint64 Time = 5;
 }
 
+message NebulaControl {
+  enum MessageType {
+    None = 0;
+    CreateRelayRequest = 1;
+    CreateRelayResponse = 2;
+  }
+  MessageType Type = 1;
+
+  uint32 InitiatorRelayIndex = 2;
+  uint32 ResponderRelayIndex = 3;
+  uint32 RelayToIp = 4;
+  uint32 RelayFromIp = 5;
+}

+ 15 - 0
noise.go

@@ -25,6 +25,14 @@ func NewNebulaCipherState(s *noise.CipherState) *NebulaCipherState {
 
 }
 
+// EncryptDanger encrypts and authenticates a given payload.
+//
+// out is a destination slice to hold the output of the EncryptDanger operation.
+// - ad is additional data, which will be authenticated and appended to out, but not encrypted.
+// - plaintext is encrypted, authenticated and appended to out.
+// - n is a nonce value which must never be re-used with this key.
+// - nb is a buffer used for temporary storage in the implementation of this call, which should
+// be re-used by callers to minimize garbage collection.
 func (s *NebulaCipherState) EncryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error) {
 	if s != nil {
 		// TODO: Is this okay now that we have made messageCounter atomic?
@@ -58,3 +66,10 @@ func (s *NebulaCipherState) DecryptDanger(out, ad, ciphertext []byte, n uint64,
 		return []byte{}, nil
 	}
 }
+
+func (s *NebulaCipherState) Overhead() int {
+	if s != nil {
+		return s.c.(cipher.AEAD).Overhead()
+	}
+	return 0
+}

+ 114 - 21
outside.go

@@ -21,7 +21,7 @@ const (
 	minFwPacketLen = 4
 )
 
-func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf udp.LightHouseHandlerFunc, nb []byte, q int, localCache firewall.ConntrackCache) {
+func (f *Interface) readOutsidePackets(addr *udp.Addr, via interface{}, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf udp.LightHouseHandlerFunc, nb []byte, q int, localCache firewall.ConntrackCache) {
 	err := h.Parse(packet)
 	if err != nil {
 		// TODO: best if we return this and let caller log
@@ -35,23 +35,96 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte
 
 	//l.Error("in packet ", header, packet[HeaderLen:])
 
+	var hostinfo *HostInfo
 	// verify if we've seen this index before, otherwise respond to the handshake initiation
-	hostinfo, err := f.hostMap.QueryIndex(h.RemoteIndex)
+	if h.Type == header.Message && h.Subtype == header.MessageRelay {
+		hostinfo, _ = f.hostMap.QueryRelayIndex(h.RemoteIndex)
+	} else {
+		hostinfo, _ = f.hostMap.QueryIndex(h.RemoteIndex)
+	}
 
 	var ci *ConnectionState
-	if err == nil {
+	if hostinfo != nil {
 		ci = hostinfo.ConnectionState
 	}
 
 	switch h.Type {
 	case header.Message:
+		// TODO handleEncrypted sends directly to addr on error. Handle this in the tunneling case.
 		if !f.handleEncrypted(ci, addr, h) {
 			return
 		}
 
-		f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache)
+		switch h.Subtype {
+		case header.MessageNone:
+			f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache)
+		case header.MessageRelay:
+			// The entire body is sent as AD, not encrypted.
+			// The packet consists of a 16-byte parsed Nebula header, Associated Data-protected payload, and a trailing 16-byte AEAD signature value.
+			// The packet is guaranteed to be at least 16 bytes at this point, b/c it got past the h.Parse() call above. If it's
+			// otherwise malformed (meaning, there is no trailing 16 byte AEAD value), then this will result in at worst a 0-length slice
+			// which will gracefully fail in the DecryptDanger call.
+			signedPayload := packet[:len(packet)-hostinfo.ConnectionState.dKey.Overhead()]
+			signatureValue := packet[len(packet)-hostinfo.ConnectionState.dKey.Overhead():]
+			out, err = hostinfo.ConnectionState.dKey.DecryptDanger(out, signedPayload, signatureValue, h.MessageCounter, nb)
+			if err != nil {
+				return
+			}
+			// Successfully validated the thing. Get rid of the Relay header.
+			signedPayload = signedPayload[header.Len:]
+			// Pull the Roaming parts up here, and return in all call paths.
+			f.handleHostRoaming(hostinfo, addr)
+			f.connectionManager.In(hostinfo.vpnIp)
+
+			relay, ok := hostinfo.relayState.QueryRelayForByIdx(h.RemoteIndex)
+			if !ok {
+				// The only way this happens is if hostmap has an index to the correct HostInfo, but the HostInfo is missing
+				// its internal mapping. This shouldn't happen!
+				hostinfo.logger(f.l).WithField("hostinfo", hostinfo.vpnIp).WithField("remoteIndex", h.RemoteIndex).Errorf("HostInfo missing remote index")
+				// Delete my local index from the hostmap
+				f.hostMap.DeleteRelayIdx(h.RemoteIndex)
+				// When the peer doesn't recieve any return traffic, its connection_manager will eventually clean up
+				// the broken relay when it cleans up the associated HostInfo object.
+				return
+			}
 
-		// Fallthrough to the bottom to record incoming traffic
+			switch relay.Type {
+			case TerminalType:
+				// If I am the target of this relay, process the unwrapped packet
+				// From this recursive point, all these variables are 'burned'. We shouldn't rely on them again.
+				f.readOutsidePackets(nil, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[:0], signedPayload, h, fwPacket, lhf, nb, q, localCache)
+				return
+			case ForwardingType:
+				// Find the target HostInfo relay object
+				targetHI, err := f.hostMap.QueryVpnIp(relay.PeerIp)
+				if err != nil {
+					hostinfo.logger(f.l).WithField("peerIp", relay.PeerIp).WithError(err).Info("Failed to find target host info by ip")
+					return
+				}
+				// find the target Relay info object
+				targetRelay, ok := targetHI.relayState.QueryRelayForByIp(hostinfo.vpnIp)
+				if !ok {
+					hostinfo.logger(f.l).WithField("peerIp", relay.PeerIp).Info("Failed to find relay in hostinfo")
+					return
+				}
+
+				// If that relay is Established, forward the payload through it
+				if targetRelay.State == Established {
+					switch targetRelay.Type {
+					case ForwardingType:
+						// Forward this packet through the relay tunnel
+						// Find the target HostInfo
+						f.SendVia(targetHI, targetRelay, signedPayload, nb, out, false)
+						return
+					case TerminalType:
+						hostinfo.logger(f.l).Error("Unexpected Relay Type of Terminal")
+					}
+				} else {
+					hostinfo.logger(f.l).WithField("targetRelayState", targetRelay.State).Info("Unexpected target relay state")
+					return
+				}
+			}
+		}
 
 	case header.LightHouse:
 		f.messageMetrics.Rx(h.Type, h.Subtype, 1)
@@ -95,7 +168,7 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte
 			// This testRequest might be from TryPromoteBest, so we should roam
 			// to the new IP address before responding
 			f.handleHostRoaming(hostinfo, addr)
-			f.send(header.Test, header.TestReply, ci, hostinfo, hostinfo.remote, d, nb, out)
+			f.send(header.Test, header.TestReply, ci, hostinfo, d, nb, out)
 		}
 
 		// Fallthrough to the bottom to record incoming traffic
@@ -105,7 +178,7 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte
 
 	case header.Handshake:
 		f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-		HandleIncomingHandshake(f, addr, packet, h, hostinfo)
+		HandleIncomingHandshake(f, addr, via, packet, h, hostinfo)
 		return
 
 	case header.RecvError:
@@ -122,9 +195,30 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte
 		hostinfo.logger(f.l).WithField("udpAddr", addr).
 			Info("Close tunnel received, tearing down.")
 
-		f.closeTunnel(hostinfo, false)
+		f.closeTunnel(hostinfo)
 		return
 
+	case header.Control:
+		if !f.handleEncrypted(ci, addr, h) {
+			return
+		}
+
+		d, err := f.decrypt(hostinfo, h.MessageCounter, out, packet, h, nb)
+		if err != nil {
+			hostinfo.logger(f.l).WithError(err).WithField("udpAddr", addr).
+				WithField("packet", packet).
+				Error("Failed to decrypt Control packet")
+			return
+		}
+		m := &NebulaControl{}
+		err = m.Unmarshal(d)
+		if err != nil {
+			hostinfo.logger(f.l).WithError(err).Error("Failed to unmarshal control message")
+			break
+		}
+
+		f.relayManager.HandleControlMsg(hostinfo, m, f)
+
 	default:
 		f.messageMetrics.Rx(h.Type, h.Subtype, 1)
 		hostinfo.logger(f.l).Debugf("Unexpected packet received from %s", addr)
@@ -137,26 +231,22 @@ func (f *Interface) readOutsidePackets(addr *udp.Addr, out []byte, packet []byte
 }
 
 // closeTunnel closes a tunnel locally, it does not send a closeTunnel packet to the remote
-func (f *Interface) closeTunnel(hostInfo *HostInfo, hasHostMapLock bool) {
+func (f *Interface) closeTunnel(hostInfo *HostInfo) {
 	//TODO: this would be better as a single function in ConnectionManager that handled locks appropriately
 	f.connectionManager.ClearIP(hostInfo.vpnIp)
 	f.connectionManager.ClearPendingDeletion(hostInfo.vpnIp)
 	f.lightHouse.DeleteVpnIp(hostInfo.vpnIp)
 
-	if hasHostMapLock {
-		f.hostMap.unlockedDeleteHostInfo(hostInfo)
-	} else {
-		f.hostMap.DeleteHostInfo(hostInfo)
-	}
+	f.hostMap.DeleteHostInfo(hostInfo)
 }
 
 // sendCloseTunnel is a helper function to send a proper close tunnel packet to a remote
 func (f *Interface) sendCloseTunnel(h *HostInfo) {
-	f.send(header.CloseTunnel, 0, h.ConnectionState, h, h.remote, []byte{}, make([]byte, 12, 12), make([]byte, mtu))
+	f.send(header.CloseTunnel, 0, h.ConnectionState, h, []byte{}, make([]byte, 12, 12), make([]byte, mtu))
 }
 
 func (f *Interface) handleHostRoaming(hostinfo *HostInfo, addr *udp.Addr) {
-	if !hostinfo.remote.Equals(addr) {
+	if addr != nil && !hostinfo.remote.Equals(addr) {
 		if !f.lightHouse.GetRemoteAllowList().Allow(hostinfo.vpnIp, addr.IP) {
 			hostinfo.logger(f.l).WithField("newAddr", addr).Debug("lighthouse.remote_allow_list denied roaming")
 			return
@@ -172,8 +262,7 @@ func (f *Interface) handleHostRoaming(hostinfo *HostInfo, addr *udp.Addr) {
 		hostinfo.logger(f.l).WithField("udpAddr", hostinfo.remote).WithField("newAddr", addr).
 			Info("Host roamed to new udp ip/port.")
 		hostinfo.lastRoam = time.Now()
-		remoteCopy := *hostinfo.remote
-		hostinfo.lastRoamRemote = &remoteCopy
+		hostinfo.lastRoamRemote = hostinfo.remote
 		hostinfo.SetRemote(addr)
 	}
 
@@ -183,8 +272,12 @@ func (f *Interface) handleEncrypted(ci *ConnectionState, addr *udp.Addr, h *head
 	// If connectionstate exists and the replay protector allows, process packet
 	// Else, send recv errors for 300 seconds after a restart to allow fast reconnection.
 	if ci == nil || !ci.window.Check(f.l, h.MessageCounter) {
-		f.sendRecvError(addr, h.RemoteIndex)
-		return false
+		if addr != nil {
+			f.sendRecvError(addr, h.RemoteIndex)
+			return false
+		} else {
+			return false
+		}
 	}
 
 	return true
@@ -349,7 +442,7 @@ func (f *Interface) handleRecvError(addr *udp.Addr, h *header.H) {
 		return
 	}
 
-	f.closeTunnel(hostinfo, false)
+	f.closeTunnel(hostinfo)
 	// We also delete it from pending hostmap to allow for
 	// fast reconnect.
 	f.handshakeManager.DeleteHostInfo(hostinfo)

+ 315 - 0
relay_manager.go

@@ -0,0 +1,315 @@
+package nebula
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync/atomic"
+
+	"github.com/sirupsen/logrus"
+	"github.com/slackhq/nebula/config"
+	"github.com/slackhq/nebula/header"
+	"github.com/slackhq/nebula/iputil"
+)
+
+type relayManager struct {
+	l             *logrus.Logger
+	hostmap       *HostMap
+	atomicAmRelay int32
+}
+
+func NewRelayManager(ctx context.Context, l *logrus.Logger, hostmap *HostMap, c *config.C) *relayManager {
+	rm := &relayManager{
+		l:       l,
+		hostmap: hostmap,
+	}
+	rm.reload(c, true)
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := rm.reload(c, false)
+		if err != nil {
+			l.WithError(err).Error("Failed to reload relay_manager")
+		}
+	})
+	return rm
+}
+
+func (rm *relayManager) reload(c *config.C, initial bool) error {
+	if initial || c.HasChanged("relay.am_relay") {
+		rm.setAmRelay(c.GetBool("relay.am_relay", false))
+	}
+	return nil
+}
+
+func (rm *relayManager) GetAmRelay() bool {
+	return atomic.LoadInt32(&rm.atomicAmRelay) == 1
+}
+
+func (rm *relayManager) setAmRelay(v bool) {
+	var val int32
+	switch v {
+	case true:
+		val = 1
+	case false:
+		val = 0
+	}
+	atomic.StoreInt32(&rm.atomicAmRelay, val)
+}
+
+// AddRelay finds an available relay index on the hostmap, and associates the relay info with it.
+// relayHostInfo is the Nebula peer which can be used as a relay to access the target vpnIp.
+func AddRelay(l *logrus.Logger, relayHostInfo *HostInfo, hm *HostMap, vpnIp iputil.VpnIp, remoteIdx *uint32, relayType int, state int) (uint32, error) {
+	hm.Lock()
+	defer hm.Unlock()
+	for i := 0; i < 32; i++ {
+		index, err := generateIndex(l)
+		if err != nil {
+			return 0, err
+		}
+
+		_, inRelays := hm.Relays[index]
+		if !inRelays {
+			hm.Relays[index] = relayHostInfo
+			newRelay := Relay{
+				Type:       relayType,
+				State:      state,
+				LocalIndex: index,
+				PeerIp:     vpnIp,
+			}
+
+			if remoteIdx != nil {
+				newRelay.RemoteIndex = *remoteIdx
+			}
+			relayHostInfo.relayState.InsertRelay(vpnIp, index, &newRelay)
+
+			return index, nil
+		}
+	}
+
+	return 0, errors.New("failed to generate unique localIndexId")
+}
+
+// EstablishRelay updates a Requested Relay to become an Established Relay, which can pass traffic.
+func (rm *relayManager) EstablishRelay(relayHostInfo *HostInfo, m *NebulaControl) (*Relay, error) {
+	relay, ok := relayHostInfo.relayState.QueryRelayForByIdx(m.InitiatorRelayIndex)
+	if !ok {
+		rm.l.WithFields(logrus.Fields{"relayHostInfo": relayHostInfo.vpnIp,
+			"initiatorRelayIndex": m.InitiatorRelayIndex,
+			"relayFrom":           m.RelayFromIp,
+			"relayTo":             m.RelayToIp}).Info("relayManager EstablishRelay relayForByIdx not found")
+		return nil, fmt.Errorf("unknown relay")
+	}
+	// relay deserves some synchronization
+	relay.RemoteIndex = m.ResponderRelayIndex
+	relay.State = Established
+
+	return relay, nil
+}
+
+func (rm *relayManager) HandleControlMsg(h *HostInfo, m *NebulaControl, f *Interface) {
+
+	switch m.Type {
+	case NebulaControl_CreateRelayRequest:
+		rm.handleCreateRelayRequest(h, f, m)
+	case NebulaControl_CreateRelayResponse:
+		rm.handleCreateRelayResponse(h, f, m)
+	}
+
+}
+
+func (rm *relayManager) handleCreateRelayResponse(h *HostInfo, f *Interface, m *NebulaControl) {
+	rm.l.WithFields(logrus.Fields{
+		"relayFrom":    iputil.VpnIp(m.RelayFromIp),
+		"relayTarget":  iputil.VpnIp(m.RelayToIp),
+		"initiatorIdx": m.InitiatorRelayIndex,
+		"responderIdx": m.ResponderRelayIndex,
+		"hostInfo":     h.vpnIp}).
+		Info("handleCreateRelayResponse")
+	target := iputil.VpnIp(m.RelayToIp)
+
+	relay, err := rm.EstablishRelay(h, m)
+	if err != nil {
+		rm.l.WithError(err).WithField("target", target.String()).Error("Failed to update relay for target")
+		return
+	}
+	// Do I need to complete the relays now?
+	if relay.Type == TerminalType {
+		return
+	}
+	// I'm the middle man. Let the initiator know that the I've established the relay they requested.
+	peerHostInfo, err := rm.hostmap.QueryVpnIp(relay.PeerIp)
+	if err != nil {
+		rm.l.WithError(err).WithField("relayPeerIp", relay.PeerIp).Error("Can't find a HostInfo for peer IP")
+		return
+	}
+	peerRelay, ok := peerHostInfo.relayState.QueryRelayForByIp(target)
+	if !ok {
+		rm.l.WithField("peerIp", peerHostInfo.vpnIp).WithField("target", target.String()).Error("peerRelay does not have Relay state for target IP", peerHostInfo.vpnIp.String(), target.String())
+		return
+	}
+	peerRelay.State = Established
+	resp := NebulaControl{
+		Type:                NebulaControl_CreateRelayResponse,
+		ResponderRelayIndex: peerRelay.LocalIndex,
+		InitiatorRelayIndex: peerRelay.RemoteIndex,
+		RelayFromIp:         uint32(peerHostInfo.vpnIp),
+		RelayToIp:           uint32(target),
+	}
+	msg, err := resp.Marshal()
+	if err != nil {
+		rm.l.
+			WithError(err).Error("relayManager Failed to marhsal Control CreateRelayResponse message to create relay")
+	} else {
+		f.SendMessageToVpnIp(header.Control, 0, peerHostInfo.vpnIp, msg, make([]byte, 12), make([]byte, mtu))
+	}
+}
+
+func (rm *relayManager) handleCreateRelayRequest(h *HostInfo, f *Interface, m *NebulaControl) {
+	rm.l.WithFields(logrus.Fields{
+		"relayFrom":    iputil.VpnIp(m.RelayFromIp),
+		"relayTarget":  iputil.VpnIp(m.RelayToIp),
+		"initiatorIdx": m.InitiatorRelayIndex,
+		"hostInfo":     h.vpnIp}).
+		Info("handleCreateRelayRequest")
+	from := iputil.VpnIp(m.RelayFromIp)
+	target := iputil.VpnIp(m.RelayToIp)
+	// Is the target of the relay me?
+	if target == f.myVpnIp {
+		existingRelay, ok := h.relayState.QueryRelayForByIp(from)
+		addRelay := !ok
+		if ok {
+			// Clean up existing relay, if this is a new request.
+			if existingRelay.RemoteIndex != m.InitiatorRelayIndex {
+				// We got a brand new Relay request, because its index is different than what we saw before.
+				// Clean up the existing Relay state, and get ready to record new Relay state.
+				rm.hostmap.RemoveRelay(existingRelay.LocalIndex)
+				addRelay = true
+			}
+		}
+		if addRelay {
+			_, err := AddRelay(rm.l, h, f.hostMap, from, &m.InitiatorRelayIndex, TerminalType, Established)
+			if err != nil {
+				return
+			}
+		}
+
+		relay, ok := h.relayState.QueryRelayForByIp(from)
+		if ok && m.InitiatorRelayIndex != relay.RemoteIndex {
+			// Do something, Something happened.
+		}
+
+		resp := NebulaControl{
+			Type:                NebulaControl_CreateRelayResponse,
+			ResponderRelayIndex: relay.LocalIndex,
+			InitiatorRelayIndex: relay.RemoteIndex,
+			RelayFromIp:         uint32(from),
+			RelayToIp:           uint32(target),
+		}
+		msg, err := resp.Marshal()
+		if err != nil {
+			rm.l.
+				WithError(err).Error("relayManager Failed to marshal Control CreateRelayResponse message to create relay")
+		} else {
+			f.SendMessageToVpnIp(header.Control, 0, h.vpnIp, msg, make([]byte, 12), make([]byte, mtu))
+		}
+		return
+	} else {
+		// the target is not me. Create a relay to the target, from me.
+		if rm.GetAmRelay() == false {
+			return
+		}
+		peer, err := rm.hostmap.QueryVpnIp(target)
+		if err != nil {
+			// Try to establish a connection to this host. If we get a future relay request,
+			// we'll be ready!
+			f.getOrHandshake(target)
+			return
+		}
+		if peer.remote == nil {
+			// Only create relays to peers for whom I have a direct connection
+			return
+		}
+		sendCreateRequest := false
+		var index uint32
+		targetRelay, ok := peer.relayState.QueryRelayForByIp(from)
+		if ok {
+			index = targetRelay.LocalIndex
+			if targetRelay.State == Requested {
+				sendCreateRequest = true
+			}
+		} else {
+			// Allocate an index in the hostMap for this relay peer
+			index, err = AddRelay(rm.l, peer, f.hostMap, from, nil, ForwardingType, Requested)
+			if err != nil {
+				return
+			}
+			sendCreateRequest = true
+		}
+		if sendCreateRequest {
+			// Send a CreateRelayRequest to the peer.
+			req := NebulaControl{
+				Type:                NebulaControl_CreateRelayRequest,
+				InitiatorRelayIndex: index,
+				RelayFromIp:         uint32(h.vpnIp),
+				RelayToIp:           uint32(target),
+			}
+			msg, err := req.Marshal()
+			if err != nil {
+				rm.l.
+					WithError(err).Error("relayManager Failed to marshal Control message to create relay")
+			} else {
+				f.SendMessageToVpnIp(header.Control, 0, target, msg, make([]byte, 12), make([]byte, mtu))
+			}
+		}
+		// Also track the half-created Relay state just received
+		relay, ok := h.relayState.QueryRelayForByIp(target)
+		if !ok {
+			// Add the relay
+			state := Requested
+			if targetRelay != nil && targetRelay.State == Established {
+				state = Established
+			}
+			_, err := AddRelay(rm.l, h, f.hostMap, target, &m.InitiatorRelayIndex, ForwardingType, state)
+			if err != nil {
+				rm.l.
+					WithError(err).Error("relayManager Failed to allocate a local index for relay")
+				return
+			}
+		} else {
+			if relay.RemoteIndex != m.InitiatorRelayIndex {
+				// This is a stale Relay entry for the same tunnel targets.
+				// Clean up the existing stuff.
+				rm.RemoveRelay(relay.LocalIndex)
+				// Add the new relay
+				_, err := AddRelay(rm.l, h, f.hostMap, target, &m.InitiatorRelayIndex, ForwardingType, Requested)
+				if err != nil {
+					return
+				}
+				relay, _ = h.relayState.QueryRelayForByIp(target)
+			}
+			switch relay.State {
+			case Established:
+				resp := NebulaControl{
+					Type:                NebulaControl_CreateRelayResponse,
+					ResponderRelayIndex: relay.LocalIndex,
+					InitiatorRelayIndex: relay.RemoteIndex,
+					RelayFromIp:         uint32(h.vpnIp),
+					RelayToIp:           uint32(target),
+				}
+				msg, err := resp.Marshal()
+				if err != nil {
+					rm.l.
+						WithError(err).Error("relayManager Failed to marshal Control CreateRelayResponse message to create relay")
+				} else {
+					f.SendMessageToVpnIp(header.Control, 0, h.vpnIp, msg, make([]byte, 12), make([]byte, mtu))
+				}
+
+			case Requested:
+				// Keep waiting for the other relay to complete
+			}
+		}
+	}
+}
+
+func (rm *relayManager) RemoveRelay(localIdx uint32) {
+	rm.hostmap.RemoveRelay(localIdx)
+}

+ 60 - 4
remote_list.go

@@ -26,6 +26,7 @@ type CacheMap map[string]*Cache
 type Cache struct {
 	Learned  []*udp.Addr `json:"learned,omitempty"`
 	Reported []*udp.Addr `json:"reported,omitempty"`
+	Relay    []*net.IP   `json:"relay"`
 }
 
 //TODO: Seems like we should plop static host entries in here too since the are protected by the lighthouse from deletion
@@ -33,8 +34,13 @@ type Cache struct {
 
 // cache is an internal struct that splits v4 and v6 addresses inside the cache map
 type cache struct {
-	v4 *cacheV4
-	v6 *cacheV6
+	v4    *cacheV4
+	v6    *cacheV6
+	relay *cacheRelay
+}
+
+type cacheRelay struct {
+	relay []uint32
 }
 
 // cacheV4 stores learned and reported ipv4 records under cache
@@ -58,6 +64,9 @@ type RemoteList struct {
 	// A deduplicated set of addresses. Any accessor should lock beforehand.
 	addrs []*udp.Addr
 
+	// A set of relay addresses. VpnIp addresses that the remote identified as relays.
+	relays []*iputil.VpnIp
+
 	// These are maps to store v4 and v6 addresses per lighthouse
 	// Map key is the vpnIp of the person that told us about this the cached entries underneath.
 	// For learned addresses, this is the vpnIp that sent the packet
@@ -74,8 +83,9 @@ type RemoteList struct {
 // NewRemoteList creates a new empty RemoteList
 func NewRemoteList() *RemoteList {
 	return &RemoteList{
-		addrs: make([]*udp.Addr, 0),
-		cache: make(map[iputil.VpnIp]*cache),
+		addrs:  make([]*udp.Addr, 0),
+		relays: make([]*iputil.VpnIp, 0),
+		cache:  make(map[iputil.VpnIp]*cache),
 	}
 }
 
@@ -144,6 +154,7 @@ func (r *RemoteList) CopyCache() *CacheMap {
 			c = &Cache{
 				Learned:  make([]*udp.Addr, 0),
 				Reported: make([]*udp.Addr, 0),
+				Relay:    make([]*net.IP, 0),
 			}
 			cm[vpnIp] = c
 		}
@@ -172,6 +183,13 @@ func (r *RemoteList) CopyCache() *CacheMap {
 				c.Reported = append(c.Reported, NewUDPAddrFromLH6(a))
 			}
 		}
+
+		if mc.relay != nil {
+			for _, a := range mc.relay.relay {
+				nip := iputil.VpnIp(a).ToIP()
+				c.Relay = append(c.Relay, &nip)
+			}
+		}
 	}
 
 	return &cm
@@ -179,6 +197,10 @@ func (r *RemoteList) CopyCache() *CacheMap {
 
 // BlockRemote locks and records the address as bad, it will be excluded from the deduplicated address list
 func (r *RemoteList) BlockRemote(bad *udp.Addr) {
+	if bad == nil {
+		// relays can have nil udp Addrs
+		return
+	}
 	r.Lock()
 	defer r.Unlock()
 
@@ -264,6 +286,17 @@ func (r *RemoteList) unlockedSetV4(ownerVpnIp iputil.VpnIp, vpnIp iputil.VpnIp,
 	}
 }
 
+func (r *RemoteList) unlockedSetRelay(ownerVpnIp iputil.VpnIp, vpnIp iputil.VpnIp, to []uint32) {
+	r.shouldRebuild = true
+	c := r.unlockedGetOrMakeRelay(ownerVpnIp)
+
+	// Reset the slice
+	c.relay = c.relay[:0]
+
+	// We can't take their array but we can take their pointers
+	c.relay = append(c.relay, to[:minInt(len(to), MaxRemotes)]...)
+}
+
 // unlockedPrependV4 assumes you have the write lock and prepends the address in the reported list for this owner
 // This is only useful for establishing static hosts
 func (r *RemoteList) unlockedPrependV4(ownerVpnIp iputil.VpnIp, to *Ip4AndPort) {
@@ -314,6 +347,19 @@ func (r *RemoteList) unlockedPrependV6(ownerVpnIp iputil.VpnIp, to *Ip6AndPort)
 	}
 }
 
+func (r *RemoteList) unlockedGetOrMakeRelay(ownerVpnIp iputil.VpnIp) *cacheRelay {
+	am := r.cache[ownerVpnIp]
+	if am == nil {
+		am = &cache{}
+		r.cache[ownerVpnIp] = am
+	}
+	// Avoid occupying memory for relay if we never have any
+	if am.relay == nil {
+		am.relay = &cacheRelay{}
+	}
+	return am.relay
+}
+
 // unlockedGetOrMakeV4 assumes you have the write lock and builds the cache and owner entry. Only the v4 pointer is established.
 // The caller must dirty the learned address cache if required
 func (r *RemoteList) unlockedGetOrMakeV4(ownerVpnIp iputil.VpnIp) *cacheV4 {
@@ -348,6 +394,7 @@ func (r *RemoteList) unlockedGetOrMakeV6(ownerVpnIp iputil.VpnIp) *cacheV6 {
 // The result of this function can contain duplicates. unlockedSort handles cleaning it.
 func (r *RemoteList) unlockedCollect() {
 	addrs := r.addrs[:0]
+	relays := r.relays[:0]
 
 	for _, c := range r.cache {
 		if c.v4 != nil {
@@ -381,9 +428,18 @@ func (r *RemoteList) unlockedCollect() {
 				}
 			}
 		}
+
+		if c.relay != nil {
+			for _, v := range c.relay.relay {
+				ip := iputil.VpnIp(v)
+				relays = append(relays, &ip)
+			}
+		}
 	}
 
 	r.addrs = addrs
+	r.relays = relays
+
 }
 
 // unlockedSort assumes you have the write lock and performs the deduping and sorting of the address list

+ 113 - 2
ssh.go

@@ -293,6 +293,20 @@ func attachCommands(l *logrus.Logger, ssh *sshd.SSHServer, hostMap *HostMap, pen
 		},
 	})
 
+	ssh.RegisterCommand(&sshd.Command{
+		Name:             "print-relays",
+		ShortDescription: "Prints json details about all relay info",
+		Flags: func() (*flag.FlagSet, interface{}) {
+			fl := flag.NewFlagSet("", flag.ContinueOnError)
+			s := sshPrintTunnelFlags{}
+			fl.BoolVar(&s.Pretty, "pretty", false, "pretty prints json")
+			return fl, &s
+		},
+		Callback: func(fs interface{}, a []string, w sshd.StringWriter) error {
+			return sshPrintRelays(ifce, fs, a, w)
+		},
+	})
+
 	ssh.RegisterCommand(&sshd.Command{
 		Name:             "change-remote",
 		ShortDescription: "Changes the remote address used in the tunnel for the provided vpn ip",
@@ -519,14 +533,13 @@ func sshCloseTunnel(ifce *Interface, fs interface{}, a []string, w sshd.StringWr
 			0,
 			hostInfo.ConnectionState,
 			hostInfo,
-			hostInfo.remote,
 			[]byte{},
 			make([]byte, 12, 12),
 			make([]byte, mtu),
 		)
 	}
 
-	ifce.closeTunnel(hostInfo, false)
+	ifce.closeTunnel(hostInfo)
 	return w.WriteLine("Closed")
 }
 
@@ -730,6 +743,104 @@ func sshPrintCert(ifce *Interface, fs interface{}, a []string, w sshd.StringWrit
 	return w.WriteLine(cert.String())
 }
 
+func sshPrintRelays(ifce *Interface, fs interface{}, a []string, w sshd.StringWriter) error {
+	args, ok := fs.(*sshPrintTunnelFlags)
+	if !ok {
+		//TODO: error
+		w.WriteLine(fmt.Sprintf("sshPrintRelays failed to convert args type"))
+		return nil
+	}
+
+	relays := map[uint32]*HostInfo{}
+	ifce.hostMap.Lock()
+	for k, v := range ifce.hostMap.Relays {
+		relays[k] = v
+	}
+	ifce.hostMap.Unlock()
+
+	type RelayFor struct {
+		Error          error
+		Type           string
+		State          string
+		PeerIp         iputil.VpnIp
+		LocalIndex     uint32
+		RemoteIndex    uint32
+		RelayedThrough []iputil.VpnIp
+	}
+
+	type RelayOutput struct {
+		NebulaIp    iputil.VpnIp
+		RelayForIps []RelayFor
+	}
+
+	type CmdOutput struct {
+		Relays []*RelayOutput
+	}
+
+	co := CmdOutput{}
+
+	enc := json.NewEncoder(w.GetWriter())
+
+	if args.Pretty {
+		enc.SetIndent("", "    ")
+	}
+
+	for k, v := range relays {
+		ro := RelayOutput{NebulaIp: v.vpnIp}
+		co.Relays = append(co.Relays, &ro)
+		relayHI, err := ifce.hostMap.QueryVpnIp(v.vpnIp)
+		if err != nil {
+			ro.RelayForIps = append(ro.RelayForIps, RelayFor{Error: err})
+			continue
+		}
+		for _, vpnIp := range relayHI.relayState.CopyRelayForIps() {
+			rf := RelayFor{Error: nil}
+			r, ok := relayHI.relayState.GetRelayForByIp(vpnIp)
+			if ok {
+				t := ""
+				switch r.Type {
+				case ForwardingType:
+					t = "forwarding"
+				case TerminalType:
+					t = "terminal"
+				default:
+					t = "unkown"
+				}
+
+				s := ""
+				switch r.State {
+				case Requested:
+					s = "requested"
+				case Established:
+					s = "established"
+				default:
+					s = "unknown"
+				}
+
+				rf.LocalIndex = r.LocalIndex
+				rf.RemoteIndex = r.RemoteIndex
+				rf.PeerIp = r.PeerIp
+				rf.Type = t
+				rf.State = s
+				if rf.LocalIndex != k {
+					rf.Error = fmt.Errorf("hostmap LocalIndex '%v' does not match RelayState LocalIndex", k)
+				}
+			}
+			relayedHI, err := ifce.hostMap.QueryVpnIp(vpnIp)
+			if err == nil {
+				rf.RelayedThrough = append(rf.RelayedThrough, relayedHI.relayState.CopyRelayIps()...)
+			}
+
+			ro.RelayForIps = append(ro.RelayForIps, rf)
+		}
+	}
+	err := enc.Encode(co)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
 func sshPrintTunnel(ifce *Interface, fs interface{}, a []string, w sshd.StringWriter) error {
 	args, ok := fs.(*sshPrintTunnelFlags)
 	if !ok {

+ 1 - 0
udp/conn.go

@@ -9,6 +9,7 @@ const MTU = 9001
 
 type EncReader func(
 	addr *Addr,
+	via interface{},
 	out []byte,
 	packet []byte,
 	header *header.H,

+ 10 - 2
udp/temp.go

@@ -5,10 +5,18 @@ import (
 	"github.com/slackhq/nebula/iputil"
 )
 
-//TODO: The items in this file belong in their own packages but doing that in a single PR is a nightmare
-
 type EncWriter interface {
+	SendVia(via interface{},
+		relay interface{},
+		ad,
+		nb,
+		out []byte,
+		nocopy bool,
+	)
 	SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte)
+	Handshake(vpnIp iputil.VpnIp)
 }
 
+//TODO: The items in this file belong in their own packages but doing that in a single PR is a nightmare
+
 type LightHouseHandlerFunc func(rAddr *Addr, vpnIp iputil.VpnIp, p []byte, w EncWriter)

+ 1 - 1
udp/udp_generic.go

@@ -86,6 +86,6 @@ func (u *Conn) ListenOut(r EncReader, lhf LightHouseHandlerFunc, cache *firewall
 
 		udpAddr.IP = rua.IP
 		udpAddr.Port = uint16(rua.Port)
-		r(udpAddr, plaintext[:0], buffer[:n], h, fwPacket, lhf, nb, q, cache.Get(u.l))
+		r(udpAddr, nil, plaintext[:0], buffer[:n], h, fwPacket, lhf, nb, q, cache.Get(u.l))
 	}
 }

+ 1 - 1
udp/udp_linux.go

@@ -145,7 +145,7 @@ func (u *Conn) ListenOut(r EncReader, lhf LightHouseHandlerFunc, cache *firewall
 		for i := 0; i < n; i++ {
 			udpAddr.IP = names[i][8:24]
 			udpAddr.Port = binary.BigEndian.Uint16(names[i][2:4])
-			r(udpAddr, plaintext[:0], buffers[i][:msgs[i].Len], h, fwPacket, lhf, nb, q, cache.Get(u.l))
+			r(udpAddr, nil, plaintext[:0], buffers[i][:msgs[i].Len], h, fwPacket, lhf, nb, q, cache.Get(u.l))
 		}
 	}
 }

+ 1 - 1
udp/udp_tester.go

@@ -117,7 +117,7 @@ func (u *Conn) ListenOut(r EncReader, lhf LightHouseHandlerFunc, cache *firewall
 		p := <-u.RxPackets
 		ua.Port = p.FromPort
 		copy(ua.IP, p.FromIp.To16())
-		r(ua, plaintext[:0], p.Data, h, fwPacket, lhf, nb, q, cache.Get(u.l))
+		r(ua, nil, plaintext[:0], p.Data, h, fwPacket, lhf, nb, q, cache.Get(u.l))
 	}
 }