Bläddra i källkod

firewall: add option to send REJECT replies (#738)

* firewall: add option to send REJECT replies

This change allows you to configure the firewall to send REJECT packets
when a packet is denied.

    firewall:
      # Action to take when a packet is not allowed by the firewall rules.
      # Can be one of:
      #   `drop` (default): silently drop the packet.
      #   `reject`: send a reject reply.
      #     - For TCP, this will be a RST "Connection Reset" packet.
      #     - For other protocols, this will be an ICMP port unreachable packet.
      outbound_action: drop
      inbound_action: drop

These packets are only sent to established tunnels, and only on the
overlay network (currently IPv4 only).

    $ ping -c1 192.168.100.3
    PING 192.168.100.3 (192.168.100.3) 56(84) bytes of data.
    From 192.168.100.3 icmp_seq=2 Destination Port Unreachable

    --- 192.168.100.3 ping statistics ---
    2 packets transmitted, 0 received, +1 errors, 100% packet loss, time 31ms

    $ nc -nzv 192.168.100.3 22
    (UNKNOWN) [192.168.100.3] 22 (?) : Connection refused

This change also modifies the smoke test to capture tcpdump pcaps from
both the inside and outside to inspect what is going on over the wire.
It also now does TCP and UDP packet tests using the Nmap version of
ncat.

* calculate seq and ack the same was as the kernel

The logic a bit confusing, so we copy it straight from how the kernel
does iptables `--reject-with tcp-reset`:

- https://github.com/torvalds/linux/blob/v5.19/net/ipv4/netfilter/nf_reject_ipv4.c#L193-L221

* cleanup
Wade Simmons 2 år sedan
förälder
incheckning
6e0ae4f9a3

+ 3 - 1
.github/workflows/smoke/Dockerfile

@@ -1,4 +1,6 @@
-FROM debian:buster
+FROM ubuntu:jammy
+
+RUN apt-get update && apt-get install -y iputils-ping ncat tcpdump
 
 ADD ./build /nebula
 

+ 2 - 0
.github/workflows/smoke/genconfig.sh

@@ -50,6 +50,8 @@ tun:
   dev: ${TUN_DEV:-nebula1}
 
 firewall:
+  inbound_action: reject
+  outbound_action: reject
   outbound: ${OUTBOUND:-$FIREWALL_ALL}
   inbound: ${INBOUND:-$FIREWALL_ALL}
 

+ 43 - 0
.github/workflows/smoke/smoke.sh

@@ -34,6 +34,21 @@ sleep 1
 sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/  [host4]  /' &
 sleep 1
 
+# grab tcpdump pcaps for debugging
+sudo docker exec lighthouse1 tcpdump -i nebula1 -q -w - -U 2>logs/lighthouse1.inside.log >logs/lighthouse1.inside.pcap &
+sudo docker exec lighthouse1 tcpdump -i eth0 -q -w - -U 2>logs/lighthouse1.outside.log >logs/lighthouse1.outside.pcap &
+sudo docker exec host2 tcpdump -i nebula1 -q -w - -U 2>logs/host2.inside.log >logs/host2.inside.pcap &
+sudo docker exec host2 tcpdump -i eth0 -q -w - -U 2>logs/host2.outside.log >logs/host2.outside.pcap &
+sudo docker exec host3 tcpdump -i nebula1 -q -w - -U 2>logs/host3.inside.log >logs/host3.inside.pcap &
+sudo docker exec host3 tcpdump -i eth0 -q -w - -U 2>logs/host3.outside.log >logs/host3.outside.pcap &
+sudo docker exec host4 tcpdump -i nebula1 -q -w - -U 2>logs/host4.inside.log >logs/host4.inside.pcap &
+sudo docker exec host4 tcpdump -i eth0 -q -w - -U 2>logs/host4.outside.log >logs/host4.outside.pcap &
+
+sudo docker exec host2 ncat -nklv 0.0.0.0 2000 &
+sudo docker exec host3 ncat -nklv 0.0.0.0 2000 &
+sudo docker exec host2 ncat -e '/usr/bin/echo host2' -nkluv 0.0.0.0 3000 &
+sudo docker exec host3 ncat -e '/usr/bin/echo host3' -nkluv 0.0.0.0 3000 &
+
 set +x
 echo
 echo " *** Testing ping from lighthouse1"
@@ -51,6 +66,15 @@ sudo docker exec host2 ping -c1 192.168.100.1
 # Should fail because not allowed by host3 inbound firewall
 ! sudo docker exec host2 ping -c1 192.168.100.3 -w5 || exit 1
 
+set +x
+echo
+echo " *** Testing ncat from host2"
+echo
+set -x
+# Should fail because not allowed by host3 inbound firewall
+! sudo docker exec host2 ncat -nzv -w5 192.168.100.3 2000 || exit 1
+! sudo docker exec host2 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1
+
 set +x
 echo
 echo " *** Testing ping from host3"
@@ -59,6 +83,14 @@ set -x
 sudo docker exec host3 ping -c1 192.168.100.1
 sudo docker exec host3 ping -c1 192.168.100.2
 
+set +x
+echo
+echo " *** Testing ncat from host3"
+echo
+set -x
+sudo docker exec host3 ncat -nzv -w5 192.168.100.2 2000
+sudo docker exec host3 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2
+
 set +x
 echo
 echo " *** Testing ping from host4"
@@ -69,6 +101,17 @@ sudo docker exec host4 ping -c1 192.168.100.1
 ! sudo docker exec host4 ping -c1 192.168.100.2 -w5 || exit 1
 ! sudo docker exec host4 ping -c1 192.168.100.3 -w5 || exit 1
 
+set +x
+echo
+echo " *** Testing ncat from host4"
+echo
+set -x
+# Should fail because not allowed by host4 outbound firewall
+! sudo docker exec host4 ncat -nzv -w5 192.168.100.2 2000 || exit 1
+! sudo docker exec host4 ncat -nzv -w5 192.168.100.3 2000 || exit 1
+! sudo docker exec host4 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2 || exit 1
+! sudo docker exec host4 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1
+
 set +x
 echo
 echo " *** Testing conntrack"

+ 9 - 0
examples/config.yml

@@ -259,6 +259,15 @@ logging:
 
 # Nebula security group configuration
 firewall:
+  # Action to take when a packet is not allowed by the firewall rules.
+  # Can be one of:
+  #   `drop` (default): silently drop the packet.
+  #   `reject`: send a reject reply.
+  #     - For TCP, this will be a RST "Connection Reset" packet.
+  #     - For other protocols, this will be an ICMP port unreachable packet.
+  outbound_action: drop
+  inbound_action: drop
+
   conntrack:
     tcp_timeout: 12m
     udp_timeout: 3m

+ 25 - 0
firewall.go

@@ -47,6 +47,9 @@ type Firewall struct {
 	InRules  *FirewallTable
 	OutRules *FirewallTable
 
+	InSendReject  bool
+	OutSendReject bool
+
 	//TODO: we should have many more options for TCP, an option for ICMP, and mimic the kernel a bit better
 	// https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt
 	TCPTimeout     time.Duration //linux: 5 days max
@@ -179,6 +182,28 @@ func NewFirewallFromConfig(l *logrus.Logger, nc *cert.NebulaCertificate, c *conf
 		//TODO: max_connections
 	)
 
+	inboundAction := c.GetString("firewall.inbound_action", "drop")
+	switch inboundAction {
+	case "reject":
+		fw.InSendReject = true
+	case "drop":
+		fw.InSendReject = false
+	default:
+		l.WithField("action", inboundAction).Warn("invalid firewall.inbound_action, defaulting to `drop`")
+		fw.InSendReject = false
+	}
+
+	outboundAction := c.GetString("firewall.outbound_action", "drop")
+	switch outboundAction {
+	case "reject":
+		fw.OutSendReject = true
+	case "drop":
+		fw.OutSendReject = false
+	default:
+		l.WithField("action", inboundAction).Warn("invalid firewall.outbound_action, defaulting to `drop`")
+		fw.OutSendReject = false
+	}
+
 	err := AddFirewallRulesFromConfig(l, false, c, fw)
 	if err != nil {
 		return nil, err

+ 34 - 5
inside.go

@@ -46,6 +46,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 
 	hostinfo := f.getOrHandshake(fwPacket.RemoteIP)
 	if hostinfo == nil {
+		f.rejectInside(packet, out, q)
 		if f.l.Level >= logrus.DebugLevel {
 			f.l.WithField("vpnIp", fwPacket.RemoteIP).
 				WithField("fwPacket", fwPacket).
@@ -71,12 +72,40 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 	if dropReason == nil {
 		f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q)
 
-	} else if f.l.Level >= logrus.DebugLevel {
-		hostinfo.logger(f.l).
-			WithField("fwPacket", fwPacket).
-			WithField("reason", dropReason).
-			Debugln("dropping outbound packet")
+	} else {
+		f.rejectInside(packet, out, q)
+		if f.l.Level >= logrus.DebugLevel {
+			hostinfo.logger(f.l).
+				WithField("fwPacket", fwPacket).
+				WithField("reason", dropReason).
+				Debugln("dropping outbound packet")
+		}
+	}
+}
+
+func (f *Interface) rejectInside(packet []byte, out []byte, q int) {
+	if !f.firewall.InSendReject {
+		return
 	}
+
+	out = iputil.CreateRejectPacket(packet, out)
+	_, err := f.readers[q].Write(out)
+	if err != nil {
+		f.l.WithError(err).Error("Failed to write to tun")
+	}
+}
+
+func (f *Interface) rejectOutside(packet []byte, ci *ConnectionState, hostinfo *HostInfo, nb, out []byte, q int) {
+	if !f.firewall.OutSendReject {
+		return
+	}
+
+	// Use some out buffer space to build the packet before encryption
+	// Need 40 bytes for the reject packet (20 byte ipv4 header, 20 byte tcp rst packet)
+	// Leave 100 bytes for the encrypted packet (60 byte Nebula header, 40 byte reject packet)
+	out = out[:140]
+	outPacket := iputil.CreateRejectPacket(packet, out[100:])
+	f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, outPacket, nb, out, q)
 }
 
 func (f *Interface) Handshake(vpnIp iputil.VpnIp) {

+ 211 - 0
iputil/packet.go

@@ -0,0 +1,211 @@
+package iputil
+
+import (
+	"encoding/binary"
+
+	"golang.org/x/net/ipv4"
+)
+
+func CreateRejectPacket(packet []byte, out []byte) []byte {
+	// TODO ipv4 only, need to fix when inside supports ipv6
+	switch packet[9] {
+	case 6: // tcp
+		return ipv4CreateRejectTCPPacket(packet, out)
+	default:
+		return ipv4CreateRejectICMPPacket(packet, out)
+	}
+}
+
+func ipv4CreateRejectICMPPacket(packet []byte, out []byte) []byte {
+	ihl := int(packet[0]&0x0f) << 2
+
+	// ICMP reply includes header and first 8 bytes of the packet
+	packetLen := len(packet)
+	if packetLen > ihl+8 {
+		packetLen = ihl + 8
+	}
+
+	outLen := ipv4.HeaderLen + 8 + packetLen
+
+	out = out[:(outLen)]
+
+	ipHdr := out[0:ipv4.HeaderLen]
+	ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2)                        // version, ihl
+	ipHdr[1] = 0                                                              // DSCP, ECN
+	binary.BigEndian.PutUint16(ipHdr[2:], uint16(ipv4.HeaderLen+8+packetLen)) // Total Length
+
+	ipHdr[4] = 0  // id
+	ipHdr[5] = 0  //  .
+	ipHdr[6] = 0  // flags, fragment offset
+	ipHdr[7] = 0  //  .
+	ipHdr[8] = 64 // TTL
+	ipHdr[9] = 1  // protocol (icmp)
+	ipHdr[10] = 0 // checksum
+	ipHdr[11] = 0 //  .
+
+	// Swap dest / src IPs
+	copy(ipHdr[12:16], packet[16:20])
+	copy(ipHdr[16:20], packet[12:16])
+
+	// Calculate checksum
+	binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0))
+
+	// ICMP Destination Unreachable
+	icmpOut := out[ipv4.HeaderLen:]
+	icmpOut[0] = 3 // type (Destination unreachable)
+	icmpOut[1] = 3 // code (Port unreachable error)
+	icmpOut[2] = 0 // checksum
+	icmpOut[3] = 0 //  .
+	icmpOut[4] = 0 // unused
+	icmpOut[5] = 0 //  .
+	icmpOut[6] = 0 //  .
+	icmpOut[7] = 0 //  .
+
+	// Copy original IP header and first 8 bytes as body
+	copy(icmpOut[8:], packet[:packetLen])
+
+	// Calculate checksum
+	binary.BigEndian.PutUint16(icmpOut[2:], tcpipChecksum(icmpOut, 0))
+
+	return out
+}
+
+func ipv4CreateRejectTCPPacket(packet []byte, out []byte) []byte {
+	const tcpLen = 20
+
+	ihl := int(packet[0]&0x0f) << 2
+	outLen := ipv4.HeaderLen + tcpLen
+
+	out = out[:(outLen)]
+
+	ipHdr := out[0:ipv4.HeaderLen]
+	ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2)    // version, ihl
+	ipHdr[1] = 0                                          // DSCP, ECN
+	binary.BigEndian.PutUint16(ipHdr[2:], uint16(outLen)) // Total Length
+	ipHdr[4] = 0                                          // id
+	ipHdr[5] = 0                                          //  .
+	ipHdr[6] = 0                                          // flags, fragment offset
+	ipHdr[7] = 0                                          //  .
+	ipHdr[8] = 64                                         // TTL
+	ipHdr[9] = 6                                          // protocol (tcp)
+	ipHdr[10] = 0                                         // checksum
+	ipHdr[11] = 0                                         //  .
+
+	// Swap dest / src IPs
+	copy(ipHdr[12:16], packet[16:20])
+	copy(ipHdr[16:20], packet[12:16])
+
+	// Calculate checksum
+	binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0))
+
+	// TCP RST
+	tcpIn := packet[ihl:]
+	var ackSeq, seq uint32
+	outFlags := byte(0b00000100) // RST
+
+	// Set seq and ackSeq based on how iptables/netfilter does it in Linux:
+	// - https://github.com/torvalds/linux/blob/v5.19/net/ipv4/netfilter/nf_reject_ipv4.c#L193-L221
+	inAck := tcpIn[13]&0b00010000 != 0
+	if inAck {
+		seq = binary.BigEndian.Uint32(tcpIn[8:])
+	} else {
+		inSyn := uint32((tcpIn[13] & 0b00000010) >> 1)
+		inFin := uint32(tcpIn[13] & 0b00000001)
+		// seq from the packet + syn + fin + tcp segment length
+		ackSeq = binary.BigEndian.Uint32(tcpIn[4:]) + inSyn + inFin + uint32(len(tcpIn)) - uint32(tcpIn[12]>>4)<<2
+		outFlags |= 0b00010000 // ACK
+	}
+
+	tcpOut := out[ipv4.HeaderLen:]
+	// Swap dest / src ports
+	copy(tcpOut[0:2], tcpIn[2:4])
+	copy(tcpOut[2:4], tcpIn[0:2])
+	binary.BigEndian.PutUint32(tcpOut[4:], seq)
+	binary.BigEndian.PutUint32(tcpOut[8:], ackSeq)
+	tcpOut[12] = (tcpLen >> 2) << 4 // data offset,  reserved,  NS
+	tcpOut[13] = outFlags           // CWR, ECE, URG, ACK, PSH, RST, SYN, FIN
+	tcpOut[14] = 0                  // window size
+	tcpOut[15] = 0                  //  .
+	tcpOut[16] = 0                  // checksum
+	tcpOut[17] = 0                  //  .
+	tcpOut[18] = 0                  // URG Pointer
+	tcpOut[19] = 0                  //  .
+
+	// Calculate checksum
+	csum := ipv4PseudoheaderChecksum(ipHdr[12:16], ipHdr[16:20], 6, tcpLen)
+	binary.BigEndian.PutUint16(tcpOut[16:], tcpipChecksum(tcpOut, csum))
+
+	return out
+}
+
+func CreateICMPEchoResponse(packet, out []byte) []byte {
+	// Return early if this is not a simple ICMP Echo Request
+	//TODO: make constants out of these
+	if !(len(packet) >= 28 && len(packet) <= 9001 && packet[0] == 0x45 && packet[9] == 0x01 && packet[20] == 0x08) {
+		return nil
+	}
+
+	// We don't support fragmented packets
+	if packet[7] != 0 || (packet[6]&0x2F != 0) {
+		return nil
+	}
+
+	out = out[:len(packet)]
+
+	copy(out, packet)
+
+	// Swap dest / src IPs and recalculate checksum
+	ipv4 := out[0:20]
+	copy(ipv4[12:16], packet[16:20])
+	copy(ipv4[16:20], packet[12:16])
+	ipv4[10] = 0
+	ipv4[11] = 0
+	binary.BigEndian.PutUint16(ipv4[10:], tcpipChecksum(ipv4, 0))
+
+	// Change type to ICMP Echo Reply and recalculate checksum
+	icmp := out[20:]
+	icmp[0] = 0
+	icmp[2] = 0
+	icmp[3] = 0
+	binary.BigEndian.PutUint16(icmp[2:], tcpipChecksum(icmp, 0))
+
+	return out
+}
+
+// calculates the TCP/IP checksum defined in rfc1071. The passed-in
+// csum is any initial checksum data that's already been computed.
+//
+// based on:
+// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L50-L70
+func tcpipChecksum(data []byte, csum uint32) uint16 {
+	// to handle odd lengths, we loop to length - 1, incrementing by 2, then
+	// handle the last byte specifically by checking against the original
+	// length.
+	length := len(data) - 1
+	for i := 0; i < length; i += 2 {
+		// For our test packet, doing this manually is about 25% faster
+		// (740 ns vs. 1000ns) than doing it by calling binary.BigEndian.Uint16.
+		csum += uint32(data[i]) << 8
+		csum += uint32(data[i+1])
+	}
+	if len(data)%2 == 1 {
+		csum += uint32(data[length]) << 8
+	}
+	for csum > 0xffff {
+		csum = (csum >> 16) + (csum & 0xffff)
+	}
+	return ^uint16(csum)
+}
+
+// based on:
+// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L26-L35
+func ipv4PseudoheaderChecksum(src, dst []byte, proto, length uint32) (csum uint32) {
+	csum += (uint32(src[0]) + uint32(src[2])) << 8
+	csum += uint32(src[1]) + uint32(src[3])
+	csum += (uint32(dst[0]) + uint32(dst[2])) << 8
+	csum += uint32(dst[1]) + uint32(dst[3])
+	csum += proto
+	csum += length & 0xffff
+	csum += length >> 16
+	return csum
+}

+ 1 - 0
outside.go

@@ -399,6 +399,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
 
 	dropReason := f.firewall.Drop(out, *fwPacket, true, hostinfo, f.caPool, localCache)
 	if dropReason != nil {
+		f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, out, q)
 		if f.l.Level >= logrus.DebugLevel {
 			hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
 				WithField("reason", dropReason).

+ 4 - 47
overlay/tun_disabled.go

@@ -1,7 +1,6 @@
 package overlay
 
 import (
-	"encoding/binary"
 	"fmt"
 	"io"
 	"net"
@@ -75,38 +74,15 @@ func (t *disabledTun) Read(b []byte) (int, error) {
 }
 
 func (t *disabledTun) handleICMPEchoRequest(b []byte) bool {
-	// Return early if this is not a simple ICMP Echo Request
-	//TODO: make constants out of these
-	if !(len(b) >= 28 && len(b) <= 9001 && b[0] == 0x45 && b[9] == 0x01 && b[20] == 0x08) {
+	out := make([]byte, len(b))
+	out = iputil.CreateICMPEchoResponse(b, out)
+	if out == nil {
 		return false
 	}
 
-	// We don't support fragmented packets
-	if b[7] != 0 || (b[6]&0x2F != 0) {
-		return false
-	}
-
-	buf := make([]byte, len(b))
-	copy(buf, b)
-
-	// Swap dest / src IPs and recalculate checksum
-	ipv4 := buf[0:20]
-	copy(ipv4[12:16], b[16:20])
-	copy(ipv4[16:20], b[12:16])
-	ipv4[10] = 0
-	ipv4[11] = 0
-	binary.BigEndian.PutUint16(ipv4[10:], ipChecksum(ipv4))
-
-	// Change type to ICMP Echo Reply and recalculate checksum
-	icmp := buf[20:]
-	icmp[0] = 0
-	icmp[2] = 0
-	icmp[3] = 0
-	binary.BigEndian.PutUint16(icmp[2:], ipChecksum(icmp))
-
 	// attempt to write it, but don't block
 	select {
-	case t.read <- buf:
+	case t.read <- out:
 	default:
 		t.l.Debugf("tun_disabled: dropped ICMP Echo Reply response")
 	}
@@ -154,22 +130,3 @@ func (p prettyPacket) String() string {
 
 	return s.String()
 }
-
-func ipChecksum(b []byte) uint16 {
-	var c uint32
-	sz := len(b) - 1
-
-	for i := 0; i < sz; i += 2 {
-		c += uint32(b[i]) << 8
-		c += uint32(b[i+1])
-	}
-	if sz%2 == 0 {
-		c += uint32(b[sz]) << 8
-	}
-
-	for (c >> 16) > 0 {
-		c = (c & 0xffff) + (c >> 16)
-	}
-
-	return ^uint16(c)
-}