Switch.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at https://mozilla.org/MPL/2.0/.
  4. *
  5. * (c) ZeroTier, Inc.
  6. * https://www.zerotier.com/
  7. */
  8. #ifndef ZT_N_SWITCH_HPP
  9. #define ZT_N_SWITCH_HPP
  10. #include "Constants.hpp"
  11. #include "Hashtable.hpp"
  12. #include "IncomingPacket.hpp"
  13. #include "InetAddress.hpp"
  14. #include "MAC.hpp"
  15. #include "Mutex.hpp"
  16. #include "Network.hpp"
  17. #include "Packet.hpp"
  18. #include "SharedPtr.hpp"
  19. #include "Topology.hpp"
  20. #include <list>
  21. #include <map>
  22. #include <vector>
  23. /* Ethernet frame types that might be relevant to us */
  24. #define ZT_ETHERTYPE_IPV4 0x0800
  25. #define ZT_ETHERTYPE_ARP 0x0806
  26. #define ZT_ETHERTYPE_RARP 0x8035
  27. #define ZT_ETHERTYPE_ATALK 0x809b
  28. #define ZT_ETHERTYPE_AARP 0x80f3
  29. #define ZT_ETHERTYPE_IPX_A 0x8137
  30. #define ZT_ETHERTYPE_IPX_B 0x8138
  31. #define ZT_ETHERTYPE_IPV6 0x86dd
  32. namespace ZeroTier {
  33. class RuntimeEnvironment;
  34. class Peer;
  35. /**
  36. * Core of the distributed Ethernet switch and protocol implementation
  37. *
  38. * This class is perhaps a bit misnamed, but it's basically where everything
  39. * meets. Transport-layer ZT packets come in here, as do virtual network
  40. * packets from tap devices, and this sends them where they need to go and
  41. * wraps/unwraps accordingly. It also handles queues and timeouts and such.
  42. */
  43. class Switch {
  44. struct ManagedQueue;
  45. struct TXQueueEntry;
  46. friend class SharedPtr<Peer>;
  47. typedef struct {
  48. TXQueueEntry* p;
  49. bool ok_to_drop;
  50. } dqr;
  51. public:
  52. Switch(const RuntimeEnvironment* renv);
  53. /**
  54. * Called when a packet is received from the real network
  55. *
  56. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  57. * @param localSocket Local I/O socket as supplied by external code
  58. * @param fromAddr Internet IP address of origin
  59. * @param data Packet data
  60. * @param len Packet length
  61. */
  62. void onRemotePacket(void* tPtr, const int64_t localSocket, const InetAddress& fromAddr, const void* data, unsigned int len);
  63. /**
  64. * Returns whether our bonding or balancing policy is aware of flows.
  65. */
  66. bool isFlowAware();
  67. /**
  68. * Called when a packet comes from a local Ethernet tap
  69. *
  70. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  71. * @param network Which network's TAP did this packet come from?
  72. * @param from Originating MAC address
  73. * @param to Destination MAC address
  74. * @param etherType Ethernet packet type
  75. * @param vlanId VLAN ID or 0 if none
  76. * @param data Ethernet payload
  77. * @param len Frame length
  78. */
  79. void onLocalEthernet(void* tPtr, const SharedPtr<Network>& network, const MAC& from, const MAC& to, unsigned int etherType, unsigned int vlanId, const void* data, unsigned int len);
  80. /**
  81. * Determines the next drop schedule for packets in the TX queue
  82. *
  83. * @param t Current time
  84. * @param count Number of packets dropped this round
  85. */
  86. uint64_t control_law(uint64_t t, int count);
  87. /**
  88. * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets
  89. * may be intentionally dropped before a packet is returned to the AQM scheduler.
  90. *
  91. * @param q The TX queue that is being dequeued from
  92. * @param now Current time
  93. */
  94. dqr dodequeue(ManagedQueue* q, uint64_t now);
  95. /**
  96. * Presents a packet to the AQM scheduler.
  97. *
  98. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  99. * @param network Network that the packet shall be sent over
  100. * @param packet Packet to be sent
  101. * @param encrypt Encrypt packet payload? (always true except for HELLO)
  102. * @param qosBucket Which bucket the rule-system determined this packet should fall into
  103. */
  104. void aqm_enqueue(void* tPtr, const SharedPtr<Network>& network, Packet& packet, const bool encrypt, const int qosBucket, const uint64_t nwid, const int32_t flowId /* = ZT_QOS_NO_FLOW*/);
  105. /**
  106. * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
  107. *
  108. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  109. */
  110. void aqm_dequeue(void* tPtr);
  111. /**
  112. * Calls the dequeue mechanism and adjust queue state variables
  113. *
  114. * @param q The TX queue that is being dequeued from
  115. * @param isNew Whether or not this queue is in the NEW list
  116. * @param now Current time
  117. */
  118. Switch::TXQueueEntry* CoDelDequeue(ManagedQueue* q, bool isNew, uint64_t now);
  119. /**
  120. * Removes QoS Queues and flow state variables for a specific network. These queues are created
  121. * automatically upon the transmission of the first packet from this peer to another peer on the
  122. * given network.
  123. *
  124. * The reason for existence of queues and flow state variables specific to each network is so that
  125. * each network's QoS rules function independently.
  126. *
  127. * @param nwid Network ID
  128. */
  129. void removeNetworkQoSControlBlock(uint64_t nwid);
  130. /**
  131. * Send a packet to a ZeroTier address (destination in packet)
  132. *
  133. * The packet must be fully composed with source and destination but not
  134. * yet encrypted. If the destination peer is known the packet
  135. * is sent immediately. Otherwise it is queued and a WHOIS is dispatched.
  136. *
  137. * The packet may be compressed. Compression isn't done here.
  138. *
  139. * Needless to say, the packet's source must be this node. Otherwise it
  140. * won't be encrypted right. (This is not used for relaying.)
  141. *
  142. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  143. * @param packet Packet to send (buffer may be modified)
  144. * @param encrypt Encrypt packet payload? (always true except for HELLO)
  145. * @param nwid Network ID to which this packet is related or 0 if none
  146. */
  147. void send(void* tPtr, Packet& packet, const bool encrypt, const uint64_t nwid, const int32_t flowId /* = ZT_QOS_NO_FLOW*/);
  148. /**
  149. * Request WHOIS on a given address
  150. *
  151. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  152. * @param now Current time
  153. * @param addr Address to look up
  154. */
  155. void requestWhois(void* tPtr, const int64_t now, const Address& addr);
  156. /**
  157. * Run any processes that are waiting for this peer's identity
  158. *
  159. * Called when we learn of a peer's identity from HELLO, OK(WHOIS), etc.
  160. *
  161. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  162. * @param peer New peer
  163. */
  164. void doAnythingWaitingForPeer(void* tPtr, const SharedPtr<Peer>& peer);
  165. /**
  166. * Perform retries and other periodic timer tasks
  167. *
  168. * This can return a very long delay if there are no pending timer
  169. * tasks. The caller should cap this comparatively vs. other values.
  170. *
  171. * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
  172. * @param now Current time
  173. * @return Number of milliseconds until doTimerTasks() should be run again
  174. */
  175. unsigned long doTimerTasks(void* tPtr, int64_t now);
  176. private:
  177. bool _shouldUnite(const int64_t now, const Address& source, const Address& destination);
  178. bool _trySend(void* tPtr, Packet& packet, bool encrypt, const uint64_t nwid, const int32_t flowId /* = ZT_QOS_NO_FLOW*/);
  179. void _sendViaSpecificPath(void* tPtr, SharedPtr<Peer> peer, SharedPtr<Path> viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId);
  180. void _recordOutgoingPacketMetrics(const Packet& p);
  181. const RuntimeEnvironment* const RR;
  182. int64_t _lastBeaconResponse;
  183. volatile int64_t _lastCheckedQueues;
  184. // Time we last sent a WHOIS request for each address
  185. Hashtable<Address, int64_t> _lastSentWhoisRequest;
  186. Mutex _lastSentWhoisRequest_m;
  187. // Packets waiting for WHOIS replies or other decode info or missing fragments
  188. struct RXQueueEntry {
  189. RXQueueEntry() : timestamp(0)
  190. {
  191. }
  192. volatile int64_t timestamp; // 0 if entry is not in use
  193. volatile uint64_t packetId;
  194. IncomingPacket frag0; // head of packet
  195. Packet::Fragment frags[ZT_MAX_PACKET_FRAGMENTS - 1]; // later fragments (if any)
  196. unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
  197. uint32_t haveFragments; // bit mask, LSB to MSB
  198. volatile bool complete; // if true, packet is complete
  199. volatile int32_t flowId;
  200. Mutex lock;
  201. };
  202. RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
  203. AtomicCounter _rxQueuePtr;
  204. // Returns matching or next available RX queue entry
  205. inline RXQueueEntry* _findRXQueueEntry(uint64_t packetId)
  206. {
  207. const unsigned int current = static_cast<unsigned int>(_rxQueuePtr.load());
  208. for (unsigned int k = 1; k <= ZT_RX_QUEUE_SIZE; ++k) {
  209. RXQueueEntry* rq = &(_rxQueue[(current - k) % ZT_RX_QUEUE_SIZE]);
  210. if ((rq->packetId == packetId) && (rq->timestamp)) {
  211. return rq;
  212. }
  213. }
  214. ++_rxQueuePtr;
  215. return &(_rxQueue[static_cast<unsigned int>(current) % ZT_RX_QUEUE_SIZE]);
  216. }
  217. // Returns current entry in rx queue ring buffer and increments ring pointer
  218. inline RXQueueEntry* _nextRXQueueEntry()
  219. {
  220. return &(_rxQueue[static_cast<unsigned int>((++_rxQueuePtr) - 1) % ZT_RX_QUEUE_SIZE]);
  221. }
  222. // ZeroTier-layer TX queue entry
  223. struct TXQueueEntry {
  224. TXQueueEntry()
  225. {
  226. }
  227. TXQueueEntry(Address d, uint64_t nwid, uint64_t ct, const Packet& p, bool enc, int32_t fid) : dest(d), nwid(nwid), creationTime(ct), packet(p), encrypt(enc), flowId(fid)
  228. {
  229. }
  230. Address dest;
  231. uint64_t nwid;
  232. uint64_t creationTime;
  233. Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
  234. bool encrypt;
  235. int32_t flowId;
  236. };
  237. std::list<TXQueueEntry> _txQueue;
  238. Mutex _txQueue_m;
  239. Mutex _aqm_m;
  240. // Tracks sending of VERB_RENDEZVOUS to relaying peers
  241. struct _LastUniteKey {
  242. _LastUniteKey() : x(0), y(0)
  243. {
  244. }
  245. _LastUniteKey(const Address& a1, const Address& a2)
  246. {
  247. if (a1 > a2) {
  248. x = a2.toInt();
  249. y = a1.toInt();
  250. }
  251. else {
  252. x = a1.toInt();
  253. y = a2.toInt();
  254. }
  255. }
  256. inline unsigned long hashCode() const
  257. {
  258. return ((unsigned long)x ^ (unsigned long)y);
  259. }
  260. inline bool operator==(const _LastUniteKey& k) const
  261. {
  262. return ((x == k.x) && (y == k.y));
  263. }
  264. uint64_t x, y;
  265. };
  266. Hashtable<_LastUniteKey, uint64_t> _lastUniteAttempt; // key is always sorted in ascending order, for set-like behavior
  267. Mutex _lastUniteAttempt_m;
  268. // Queue with additional flow state variables
  269. struct ManagedQueue {
  270. ManagedQueue(int id) : id(id), byteCredit(ZT_AQM_QUANTUM), byteLength(0), dropping(false)
  271. {
  272. }
  273. int id;
  274. int byteCredit;
  275. int byteLength;
  276. uint64_t first_above_time;
  277. uint32_t count;
  278. uint64_t drop_next;
  279. bool dropping;
  280. uint64_t drop_next_time;
  281. std::list<TXQueueEntry*> q;
  282. };
  283. // To implement fq_codel we need to maintain a queue of queues
  284. struct NetworkQoSControlBlock {
  285. int _currEnqueuedPackets;
  286. std::vector<ManagedQueue*> newQueues;
  287. std::vector<ManagedQueue*> oldQueues;
  288. std::vector<ManagedQueue*> inactiveQueues;
  289. };
  290. std::map<uint64_t, NetworkQoSControlBlock*> _netQueueControlBlock;
  291. };
  292. } // namespace ZeroTier
  293. #endif