| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333 | /* * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * * Change Date: 2026-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. *//****/#ifndef ZT_N_SWITCH_HPP#define ZT_N_SWITCH_HPP#include "Constants.hpp"#include "Hashtable.hpp"#include "IncomingPacket.hpp"#include "InetAddress.hpp"#include "MAC.hpp"#include "Mutex.hpp"#include "Network.hpp"#include "Packet.hpp"#include "SharedPtr.hpp"#include "Topology.hpp"#include "Utils.hpp"#include <list>#include <map>#include <set>#include <vector>/* Ethernet frame types that might be relevant to us */#define ZT_ETHERTYPE_IPV4  0x0800#define ZT_ETHERTYPE_ARP   0x0806#define ZT_ETHERTYPE_RARP  0x8035#define ZT_ETHERTYPE_ATALK 0x809b#define ZT_ETHERTYPE_AARP  0x80f3#define ZT_ETHERTYPE_IPX_A 0x8137#define ZT_ETHERTYPE_IPX_B 0x8138#define ZT_ETHERTYPE_IPV6  0x86ddnamespace ZeroTier {class RuntimeEnvironment;class Peer;/** * Core of the distributed Ethernet switch and protocol implementation * * This class is perhaps a bit misnamed, but it's basically where everything * meets. Transport-layer ZT packets come in here, as do virtual network * packets from tap devices, and this sends them where they need to go and * wraps/unwraps accordingly. It also handles queues and timeouts and such. */class Switch {	struct ManagedQueue;	struct TXQueueEntry;	friend class SharedPtr<Peer>;	typedef struct {		TXQueueEntry* p;		bool ok_to_drop;	} dqr;  public:	Switch(const RuntimeEnvironment* renv);	/**	 * Called when a packet is received from the real network	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param localSocket Local I/O socket as supplied by external code	 * @param fromAddr Internet IP address of origin	 * @param data Packet data	 * @param len Packet length	 */	void onRemotePacket(void* tPtr, const int64_t localSocket, const InetAddress& fromAddr, const void* data, unsigned int len);	/**	 * Returns whether our bonding or balancing policy is aware of flows.	 */	bool isFlowAware();	/**	 * Called when a packet comes from a local Ethernet tap	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param network Which network's TAP did this packet come from?	 * @param from Originating MAC address	 * @param to Destination MAC address	 * @param etherType Ethernet packet type	 * @param vlanId VLAN ID or 0 if none	 * @param data Ethernet payload	 * @param len Frame length	 */	void onLocalEthernet(void* tPtr, const SharedPtr<Network>& network, const MAC& from, const MAC& to, unsigned int etherType, unsigned int vlanId, const void* data, unsigned int len);	/**	 * Determines the next drop schedule for packets in the TX queue	 *	 * @param t Current time	 * @param count Number of packets dropped this round	 */	uint64_t control_law(uint64_t t, int count);	/**	 * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets	 * may be intentionally dropped before a packet is returned to the AQM scheduler.	 *	 * @param q The TX queue that is being dequeued from	 * @param now Current time	 */	dqr dodequeue(ManagedQueue* q, uint64_t now);	/**	 * Presents a packet to the AQM scheduler.	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param network Network that the packet shall be sent over	 * @param packet Packet to be sent	 * @param encrypt Encrypt packet payload? (always true except for HELLO)	 * @param qosBucket Which bucket the rule-system determined this packet should fall into	 */	void aqm_enqueue(void* tPtr, const SharedPtr<Network>& network, Packet& packet, bool encrypt, int qosBucket, int32_t flowId = ZT_QOS_NO_FLOW);	/**	 * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 */	void aqm_dequeue(void* tPtr);	/**	 * Calls the dequeue mechanism and adjust queue state variables	 *	 * @param q The TX queue that is being dequeued from	 * @param isNew Whether or not this queue is in the NEW list	 * @param now Current time	 */	Switch::TXQueueEntry* CoDelDequeue(ManagedQueue* q, bool isNew, uint64_t now);	/**	 * Removes QoS Queues and flow state variables for a specific network. These queues are created	 * automatically upon the transmission of the first packet from this peer to another peer on the	 * given network.	 *	 * The reason for existence of queues and flow state variables specific to each network is so that	 * each network's QoS rules function independently.	 *	 * @param nwid Network ID	 */	void removeNetworkQoSControlBlock(uint64_t nwid);	/**	 * Send a packet to a ZeroTier address (destination in packet)	 *	 * The packet must be fully composed with source and destination but not	 * yet encrypted. If the destination peer is known the packet	 * is sent immediately. Otherwise it is queued and a WHOIS is dispatched.	 *	 * The packet may be compressed. Compression isn't done here.	 *	 * Needless to say, the packet's source must be this node. Otherwise it	 * won't be encrypted right. (This is not used for relaying.)	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param packet Packet to send (buffer may be modified)	 * @param encrypt Encrypt packet payload? (always true except for HELLO)	 */	void send(void* tPtr, Packet& packet, bool encrypt, int32_t flowId = ZT_QOS_NO_FLOW);	/**	 * Request WHOIS on a given address	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param now Current time	 * @param addr Address to look up	 */	void requestWhois(void* tPtr, const int64_t now, const Address& addr);	/**	 * Run any processes that are waiting for this peer's identity	 *	 * Called when we learn of a peer's identity from HELLO, OK(WHOIS), etc.	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param peer New peer	 */	void doAnythingWaitingForPeer(void* tPtr, const SharedPtr<Peer>& peer);	/**	 * Perform retries and other periodic timer tasks	 *	 * This can return a very long delay if there are no pending timer	 * tasks. The caller should cap this comparatively vs. other values.	 *	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call	 * @param now Current time	 * @return Number of milliseconds until doTimerTasks() should be run again	 */	unsigned long doTimerTasks(void* tPtr, int64_t now);  private:	bool _shouldUnite(const int64_t now, const Address& source, const Address& destination);	bool _trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId = ZT_QOS_NO_FLOW);	// packet is modified if return is true	void _sendViaSpecificPath(void* tPtr, SharedPtr<Peer> peer, SharedPtr<Path> viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId, bool fragmentedAtVl2);	void _recordOutgoingPacketMetrics(const Packet& p);	const RuntimeEnvironment* const RR;	int64_t _lastBeaconResponse;	volatile int64_t _lastCheckedQueues;	// Time we last sent a WHOIS request for each address	Hashtable<Address, int64_t> _lastSentWhoisRequest;	Mutex _lastSentWhoisRequest_m;	// Packets waiting for WHOIS replies or other decode info or missing fragments	struct RXQueueEntry {		RXQueueEntry() : timestamp(0)		{		}		volatile int64_t timestamp;	  // 0 if entry is not in use		volatile uint64_t packetId;		IncomingPacket frag0;								   // head of packet		Packet::Fragment frags[ZT_MAX_PACKET_FRAGMENTS - 1];   // later fragments (if any)		unsigned int totalFragments;						   // 0 if only frag0 received, waiting for frags		uint32_t haveFragments;								   // bit mask, LSB to MSB		volatile bool complete;								   // if true, packet is complete		volatile int32_t flowId;		Mutex lock;	};	RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];	AtomicCounter _rxQueuePtr;	// Returns matching or next available RX queue entry	inline RXQueueEntry* _findRXQueueEntry(uint64_t packetId)	{		const unsigned int current = static_cast<unsigned int>(_rxQueuePtr.load());		for (unsigned int k = 1; k <= ZT_RX_QUEUE_SIZE; ++k) {			RXQueueEntry* rq = &(_rxQueue[(current - k) % ZT_RX_QUEUE_SIZE]);			if ((rq->packetId == packetId) && (rq->timestamp)) {				return rq;			}		}		++_rxQueuePtr;		return &(_rxQueue[static_cast<unsigned int>(current) % ZT_RX_QUEUE_SIZE]);	}	// Returns current entry in rx queue ring buffer and increments ring pointer	inline RXQueueEntry* _nextRXQueueEntry()	{		return &(_rxQueue[static_cast<unsigned int>((++_rxQueuePtr) - 1) % ZT_RX_QUEUE_SIZE]);	}	// ZeroTier-layer TX queue entry	struct TXQueueEntry {		TXQueueEntry()		{		}		TXQueueEntry(Address d, uint64_t ct, const Packet& p, bool enc, int32_t fid) : dest(d), creationTime(ct), packet(p), encrypt(enc), flowId(fid)		{		}		Address dest;		uint64_t creationTime;		Packet packet;	 // unencrypted/unMAC'd packet -- this is done at send time		bool encrypt;		int32_t flowId;	};	std::list<TXQueueEntry> _txQueue;	Mutex _txQueue_m;	Mutex _aqm_m;	// Tracks sending of VERB_RENDEZVOUS to relaying peers	struct _LastUniteKey {		_LastUniteKey() : x(0), y(0)		{		}		_LastUniteKey(const Address& a1, const Address& a2)		{			if (a1 > a2) {				x = a2.toInt();				y = a1.toInt();			}			else {				x = a1.toInt();				y = a2.toInt();			}		}		inline unsigned long hashCode() const		{			return ((unsigned long)x ^ (unsigned long)y);		}		inline bool operator==(const _LastUniteKey& k) const		{			return ((x == k.x) && (y == k.y));		}		uint64_t x, y;	};	Hashtable<_LastUniteKey, uint64_t> _lastUniteAttempt;	// key is always sorted in ascending order, for set-like behavior	Mutex _lastUniteAttempt_m;	// Queue with additional flow state variables	struct ManagedQueue {		ManagedQueue(int id) : id(id), byteCredit(ZT_AQM_QUANTUM), byteLength(0), dropping(false)		{		}		int id;		int byteCredit;		int byteLength;		uint64_t first_above_time;		uint32_t count;		uint64_t drop_next;		bool dropping;		uint64_t drop_next_time;		std::list<TXQueueEntry*> q;	};	// To implement fq_codel we need to maintain a queue of queues	struct NetworkQoSControlBlock {		int _currEnqueuedPackets;		std::vector<ManagedQueue*> newQueues;		std::vector<ManagedQueue*> oldQueues;		std::vector<ManagedQueue*> inactiveQueues;	};	std::map<uint64_t, NetworkQoSControlBlock*> _netQueueControlBlock;};}	// namespace ZeroTier#endif
 |