Switch.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. /*
  2. * Copyright (c)2013-2020 ZeroTier, Inc.
  3. *
  4. * Use of this software is governed by the Business Source License included
  5. * in the LICENSE.TXT file in the project's root directory.
  6. *
  7. * Change Date: 2026-01-01
  8. *
  9. * On the date above, in accordance with the Business Source License, use
  10. * of this software will be governed by version 2.0 of the Apache License.
  11. */
  12. /****/
  13. #ifndef ZT_N_SWITCH_HPP
  14. #define ZT_N_SWITCH_HPP
  15. #include <map>
  16. #include <set>
  17. #include <vector>
  18. #include <list>
  19. #include "Constants.hpp"
  20. #include "Mutex.hpp"
  21. #include "MAC.hpp"
  22. #include "Packet.hpp"
  23. #include "Utils.hpp"
  24. #include "InetAddress.hpp"
  25. #include "Topology.hpp"
  26. #include "Network.hpp"
  27. #include "SharedPtr.hpp"
  28. #include "IncomingPacket.hpp"
  29. #include "Hashtable.hpp"
  30. /* Ethernet frame types that might be relevant to us */
  31. #define ZT_ETHERTYPE_IPV4 0x0800
  32. #define ZT_ETHERTYPE_ARP 0x0806
  33. #define ZT_ETHERTYPE_RARP 0x8035
  34. #define ZT_ETHERTYPE_ATALK 0x809b
  35. #define ZT_ETHERTYPE_AARP 0x80f3
  36. #define ZT_ETHERTYPE_IPX_A 0x8137
  37. #define ZT_ETHERTYPE_IPX_B 0x8138
  38. #define ZT_ETHERTYPE_IPV6 0x86dd
  39. namespace ZeroTier {
  40. class RuntimeEnvironment;
  41. class Peer;
  42. /**
  43. * Core of the distributed Ethernet switch and protocol implementation
  44. *
  45. * This class is perhaps a bit misnamed, but it's basically where everything
  46. * meets. Transport-layer ZT packets come in here, as do virtual network
  47. * packets from tap devices, and this sends them where they need to go and
  48. * wraps/unwraps accordingly. It also handles queues and timeouts and such.
  49. */
  50. class Switch
  51. {
  52. struct ManagedQueue;
  53. struct TXQueueEntry;
  54. friend class SharedPtr<Peer>;
  55. typedef struct {
  56. TXQueueEntry *p;
  57. bool ok_to_drop;
  58. } dqr;
  59. public:
  60. Switch(const RuntimeEnvironment *renv);
  61. /**
  62. * Called when a packet is received from the real network
  63. *
  64. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  65. * @param localSocket Local I/O socket as supplied by external code
  66. * @param fromAddr Internet IP address of origin
  67. * @param data Packet data
  68. * @param len Packet length
  69. */
  70. void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len);
  71. /**
  72. * Returns whether our bonding or balancing policy is aware of flows.
  73. */
  74. bool isFlowAware();
  75. /**
  76. * Called when a packet comes from a local Ethernet tap
  77. *
  78. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  79. * @param network Which network's TAP did this packet come from?
  80. * @param from Originating MAC address
  81. * @param to Destination MAC address
  82. * @param etherType Ethernet packet type
  83. * @param vlanId VLAN ID or 0 if none
  84. * @param data Ethernet payload
  85. * @param len Frame length
  86. */
  87. void onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len);
  88. /**
  89. * Determines the next drop schedule for packets in the TX queue
  90. *
  91. * @param t Current time
  92. * @param count Number of packets dropped this round
  93. */
  94. uint64_t control_law(uint64_t t, int count);
  95. /**
  96. * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets
  97. * may be intentionally dropped before a packet is returned to the AQM scheduler.
  98. *
  99. * @param q The TX queue that is being dequeued from
  100. * @param now Current time
  101. */
  102. dqr dodequeue(ManagedQueue *q, uint64_t now);
  103. /**
  104. * Presents a packet to the AQM scheduler.
  105. *
  106. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  107. * @param network Network that the packet shall be sent over
  108. * @param packet Packet to be sent
  109. * @param encrypt Encrypt packet payload? (always true except for HELLO)
  110. * @param qosBucket Which bucket the rule-system determined this packet should fall into
  111. */
  112. void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW);
  113. /**
  114. * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
  115. *
  116. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  117. */
  118. void aqm_dequeue(void *tPtr);
  119. /**
  120. * Calls the dequeue mechanism and adjust queue state variables
  121. *
  122. * @param q The TX queue that is being dequeued from
  123. * @param isNew Whether or not this queue is in the NEW list
  124. * @param now Current time
  125. */
  126. Switch::TXQueueEntry * CoDelDequeue(ManagedQueue *q, bool isNew, uint64_t now);
  127. /**
  128. * Removes QoS Queues and flow state variables for a specific network. These queues are created
  129. * automatically upon the transmission of the first packet from this peer to another peer on the
  130. * given network.
  131. *
  132. * The reason for existence of queues and flow state variables specific to each network is so that
  133. * each network's QoS rules function independently.
  134. *
  135. * @param nwid Network ID
  136. */
  137. void removeNetworkQoSControlBlock(uint64_t nwid);
  138. /**
  139. * Send a packet to a ZeroTier address (destination in packet)
  140. *
  141. * The packet must be fully composed with source and destination but not
  142. * yet encrypted. If the destination peer is known the packet
  143. * is sent immediately. Otherwise it is queued and a WHOIS is dispatched.
  144. *
  145. * The packet may be compressed. Compression isn't done here.
  146. *
  147. * Needless to say, the packet's source must be this node. Otherwise it
  148. * won't be encrypted right. (This is not used for relaying.)
  149. *
  150. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  151. * @param packet Packet to send (buffer may be modified)
  152. * @param encrypt Encrypt packet payload? (always true except for HELLO)
  153. */
  154. void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW);
  155. /**
  156. * Request WHOIS on a given address
  157. *
  158. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  159. * @param now Current time
  160. * @param addr Address to look up
  161. */
  162. void requestWhois(void *tPtr,const int64_t now,const Address &addr);
  163. /**
  164. * Run any processes that are waiting for this peer's identity
  165. *
  166. * Called when we learn of a peer's identity from HELLO, OK(WHOIS), etc.
  167. *
  168. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  169. * @param peer New peer
  170. */
  171. void doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer);
  172. /**
  173. * Perform retries and other periodic timer tasks
  174. *
  175. * This can return a very long delay if there are no pending timer
  176. * tasks. The caller should cap this comparatively vs. other values.
  177. *
  178. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  179. * @param now Current time
  180. * @return Number of milliseconds until doTimerTasks() should be run again
  181. */
  182. unsigned long doTimerTasks(void *tPtr,int64_t now);
  183. private:
  184. bool _shouldUnite(const int64_t now,const Address &source,const Address &destination);
  185. bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true
  186. void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,uint16_t userSpecifiedMtu, int64_t now,Packet &packet,bool encrypt,int32_t flowId);
  187. void _recordOutgoingPacketMetrics(const Packet &p);
  188. const RuntimeEnvironment *const RR;
  189. int64_t _lastBeaconResponse;
  190. volatile int64_t _lastCheckedQueues;
  191. // Time we last sent a WHOIS request for each address
  192. Hashtable< Address,int64_t > _lastSentWhoisRequest;
  193. Mutex _lastSentWhoisRequest_m;
  194. // Packets waiting for WHOIS replies or other decode info or missing fragments
  195. struct RXQueueEntry
  196. {
  197. RXQueueEntry() : timestamp(0) {}
  198. volatile int64_t timestamp; // 0 if entry is not in use
  199. volatile uint64_t packetId;
  200. IncomingPacket frag0; // head of packet
  201. Packet::Fragment frags[ZT_MAX_PACKET_FRAGMENTS - 1]; // later fragments (if any)
  202. unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
  203. uint32_t haveFragments; // bit mask, LSB to MSB
  204. volatile bool complete; // if true, packet is complete
  205. volatile int32_t flowId;
  206. Mutex lock;
  207. };
  208. RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
  209. AtomicCounter _rxQueuePtr;
  210. // Returns matching or next available RX queue entry
  211. inline RXQueueEntry *_findRXQueueEntry(uint64_t packetId)
  212. {
  213. const unsigned int current = static_cast<unsigned int>(_rxQueuePtr.load());
  214. for(unsigned int k=1;k<=ZT_RX_QUEUE_SIZE;++k) {
  215. RXQueueEntry *rq = &(_rxQueue[(current - k) % ZT_RX_QUEUE_SIZE]);
  216. if ((rq->packetId == packetId)&&(rq->timestamp)) {
  217. return rq;
  218. }
  219. }
  220. ++_rxQueuePtr;
  221. return &(_rxQueue[static_cast<unsigned int>(current) % ZT_RX_QUEUE_SIZE]);
  222. }
  223. // Returns current entry in rx queue ring buffer and increments ring pointer
  224. inline RXQueueEntry *_nextRXQueueEntry()
  225. {
  226. return &(_rxQueue[static_cast<unsigned int>((++_rxQueuePtr) - 1) % ZT_RX_QUEUE_SIZE]);
  227. }
  228. // ZeroTier-layer TX queue entry
  229. struct TXQueueEntry
  230. {
  231. TXQueueEntry() {}
  232. TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) :
  233. dest(d),
  234. creationTime(ct),
  235. packet(p),
  236. encrypt(enc),
  237. flowId(fid) {}
  238. Address dest;
  239. uint64_t creationTime;
  240. Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
  241. bool encrypt;
  242. int32_t flowId;
  243. };
  244. std::list< TXQueueEntry > _txQueue;
  245. Mutex _txQueue_m;
  246. Mutex _aqm_m;
  247. // Tracks sending of VERB_RENDEZVOUS to relaying peers
  248. struct _LastUniteKey
  249. {
  250. _LastUniteKey() : x(0),y(0) {}
  251. _LastUniteKey(const Address &a1,const Address &a2)
  252. {
  253. if (a1 > a2) {
  254. x = a2.toInt();
  255. y = a1.toInt();
  256. } else {
  257. x = a1.toInt();
  258. y = a2.toInt();
  259. }
  260. }
  261. inline unsigned long hashCode() const { return ((unsigned long)x ^ (unsigned long)y); }
  262. inline bool operator==(const _LastUniteKey &k) const { return ((x == k.x)&&(y == k.y)); }
  263. uint64_t x,y;
  264. };
  265. Hashtable< _LastUniteKey,uint64_t > _lastUniteAttempt; // key is always sorted in ascending order, for set-like behavior
  266. Mutex _lastUniteAttempt_m;
  267. // Queue with additional flow state variables
  268. struct ManagedQueue
  269. {
  270. ManagedQueue(int id) :
  271. id(id),
  272. byteCredit(ZT_AQM_QUANTUM),
  273. byteLength(0),
  274. dropping(false)
  275. {}
  276. int id;
  277. int byteCredit;
  278. int byteLength;
  279. uint64_t first_above_time;
  280. uint32_t count;
  281. uint64_t drop_next;
  282. bool dropping;
  283. uint64_t drop_next_time;
  284. std::list< TXQueueEntry *> q;
  285. };
  286. // To implement fq_codel we need to maintain a queue of queues
  287. struct NetworkQoSControlBlock
  288. {
  289. int _currEnqueuedPackets;
  290. std::vector<ManagedQueue *> newQueues;
  291. std::vector<ManagedQueue *> oldQueues;
  292. std::vector<ManagedQueue *> inactiveQueues;
  293. };
  294. std::map<uint64_t,NetworkQoSControlBlock*> _netQueueControlBlock;
  295. };
  296. } // namespace ZeroTier
  297. #endif