Multicaster.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /*
  2. * Copyright (c)2019 ZeroTier, Inc.
  3. *
  4. * Use of this software is governed by the Business Source License included
  5. * in the LICENSE.TXT file in the project's root directory.
  6. *
  7. * Change Date: 2026-01-01
  8. *
  9. * On the date above, in accordance with the Business Source License, use
  10. * of this software will be governed by version 2.0 of the Apache License.
  11. */
  12. /****/
  13. #include "Multicaster.hpp"
  14. #include "CertificateOfMembership.hpp"
  15. #include "Constants.hpp"
  16. #include "Network.hpp"
  17. #include "Node.hpp"
  18. #include "Packet.hpp"
  19. #include "Peer.hpp"
  20. #include "RuntimeEnvironment.hpp"
  21. #include "Switch.hpp"
  22. #include "Topology.hpp"
  23. #include <algorithm>
  24. namespace ZeroTier {
  25. Multicaster::Multicaster(const RuntimeEnvironment* renv) : RR(renv), _groups(32)
  26. {
  27. }
  28. Multicaster::~Multicaster()
  29. {
  30. }
  31. void Multicaster::addMultiple(void* tPtr, int64_t now, uint64_t nwid, const MulticastGroup& mg, const void* addresses, unsigned int count, unsigned int totalKnown)
  32. {
  33. const unsigned char* p = (const unsigned char*)addresses;
  34. const unsigned char* e = p + (5 * count);
  35. Mutex::Lock _l(_groups_m);
  36. MulticastGroupStatus& gs = _groups[Multicaster::Key(nwid, mg)];
  37. while (p != e) {
  38. _add(tPtr, now, nwid, mg, gs, Address(p, 5));
  39. p += 5;
  40. }
  41. }
  42. void Multicaster::remove(uint64_t nwid, const MulticastGroup& mg, const Address& member)
  43. {
  44. Mutex::Lock _l(_groups_m);
  45. MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  46. if (s) {
  47. for (std::vector<MulticastGroupMember>::iterator m(s->members.begin()); m != s->members.end(); ++m) {
  48. if (m->address == member) {
  49. s->members.erase(m);
  50. break;
  51. }
  52. }
  53. }
  54. }
  55. unsigned int Multicaster::gather(const Address& queryingPeer, uint64_t nwid, const MulticastGroup& mg, Buffer<ZT_PROTO_MAX_PACKET_LENGTH>& appendTo, unsigned int limit) const
  56. {
  57. unsigned char* p;
  58. unsigned int added = 0, i, k, rptr, totalKnown = 0;
  59. uint64_t a, picked[(ZT_PROTO_MAX_PACKET_LENGTH / 5) + 2];
  60. if (! limit) {
  61. return 0;
  62. }
  63. else if (limit > 0xffff) {
  64. limit = 0xffff;
  65. }
  66. const unsigned int totalAt = appendTo.size();
  67. appendTo.addSize(4); // sizeof(uint32_t)
  68. const unsigned int addedAt = appendTo.size();
  69. appendTo.addSize(2); // sizeof(uint16_t)
  70. { // Return myself if I am a member of this group
  71. SharedPtr<Network> network(RR->node->network(nwid));
  72. if ((network) && (network->subscribedToMulticastGroup(mg, true))) {
  73. RR->identity.address().appendTo(appendTo);
  74. ++totalKnown;
  75. ++added;
  76. }
  77. }
  78. Mutex::Lock _l(_groups_m);
  79. const MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  80. if ((s) && (! s->members.empty())) {
  81. totalKnown += (unsigned int)s->members.size();
  82. // Members are returned in random order so that repeated gather queries
  83. // will return different subsets of a large multicast group.
  84. k = 0;
  85. while ((added < limit) && (k < s->members.size()) && ((appendTo.size() + ZT_ADDRESS_LENGTH) <= ZT_PROTO_MAX_PACKET_LENGTH)) {
  86. rptr = (unsigned int)RR->node->prng();
  87. restart_member_scan:
  88. a = s->members[rptr % (unsigned int)s->members.size()].address.toInt();
  89. for (i = 0; i < k; ++i) {
  90. if (picked[i] == a) {
  91. ++rptr;
  92. goto restart_member_scan;
  93. }
  94. }
  95. picked[k++] = a;
  96. if (queryingPeer.toInt() != a) { // do not return the peer that is making the request as a result
  97. p = (unsigned char*)appendTo.appendField(ZT_ADDRESS_LENGTH);
  98. *(p++) = (unsigned char)((a >> 32) & 0xff);
  99. *(p++) = (unsigned char)((a >> 24) & 0xff);
  100. *(p++) = (unsigned char)((a >> 16) & 0xff);
  101. *(p++) = (unsigned char)((a >> 8) & 0xff);
  102. *p = (unsigned char)(a & 0xff);
  103. ++added;
  104. }
  105. }
  106. }
  107. appendTo.setAt(totalAt, (uint32_t)totalKnown);
  108. appendTo.setAt(addedAt, (uint16_t)added);
  109. return added;
  110. }
  111. std::vector<Address> Multicaster::getMembers(uint64_t nwid, const MulticastGroup& mg, unsigned int limit) const
  112. {
  113. std::vector<Address> ls;
  114. Mutex::Lock _l(_groups_m);
  115. const MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  116. if (! s) {
  117. return ls;
  118. }
  119. for (std::vector<MulticastGroupMember>::const_reverse_iterator m(s->members.rbegin()); m != s->members.rend(); ++m) {
  120. ls.push_back(m->address);
  121. if (ls.size() >= limit) {
  122. break;
  123. }
  124. }
  125. return ls;
  126. }
  127. void Multicaster::send(void* tPtr, int64_t now, const SharedPtr<Network>& network, const Address& origin, const MulticastGroup& mg, const MAC& src, unsigned int etherType, const void* data, unsigned int len)
  128. {
  129. unsigned long idxbuf[4096];
  130. unsigned long* indexes = idxbuf;
  131. // If we're in hub-and-spoke designated multicast replication mode, see if we
  132. // have a multicast replicator active. If so, pick the best and send it
  133. // there. If we are a multicast replicator or if none are alive, fall back
  134. // to sender replication. Note that bridges do not do this since this would
  135. // break bridge route learning. This is sort of an edge case limitation of
  136. // the current protocol and could be fixed, but fixing it would add more
  137. // complexity than the fix is probably worth. Bridges are generally high
  138. // bandwidth nodes.
  139. if (! network->config().isActiveBridge(RR->identity.address())) {
  140. Address multicastReplicators[ZT_MAX_NETWORK_SPECIALISTS];
  141. const unsigned int multicastReplicatorCount = network->config().multicastReplicators(multicastReplicators);
  142. if (multicastReplicatorCount) {
  143. if (std::find(multicastReplicators, multicastReplicators + multicastReplicatorCount, RR->identity.address()) == (multicastReplicators + multicastReplicatorCount)) {
  144. SharedPtr<Peer> bestMulticastReplicator;
  145. SharedPtr<Path> bestMulticastReplicatorPath;
  146. unsigned int bestMulticastReplicatorLatency = 0xffff;
  147. for (unsigned int i = 0; i < multicastReplicatorCount; ++i) {
  148. const SharedPtr<Peer> p(RR->topology->getPeerNoCache(multicastReplicators[i]));
  149. if ((p) && (p->isAlive(now))) {
  150. const SharedPtr<Path> pp(p->getAppropriatePath(now, false));
  151. if ((pp) && (pp->latency() < bestMulticastReplicatorLatency)) {
  152. bestMulticastReplicatorLatency = pp->latency();
  153. bestMulticastReplicatorPath = pp;
  154. bestMulticastReplicator = p;
  155. }
  156. }
  157. }
  158. if (bestMulticastReplicator) {
  159. Packet outp(bestMulticastReplicator->address(), RR->identity.address(), Packet::VERB_MULTICAST_FRAME);
  160. outp.append((uint64_t)network->id());
  161. outp.append((uint8_t)0x0c); // includes source MAC | please replicate
  162. ((src) ? src : MAC(RR->identity.address(), network->id())).appendTo(outp);
  163. mg.mac().appendTo(outp);
  164. outp.append((uint32_t)mg.adi());
  165. outp.append((uint16_t)etherType);
  166. outp.append(data, len);
  167. if (! network->config().disableCompression()) {
  168. outp.compress();
  169. }
  170. outp.armor(bestMulticastReplicator->key(), true, false, bestMulticastReplicator->aesKeysIfSupported(), bestMulticastReplicator->identity());
  171. Metrics::pkt_multicast_frame_out++;
  172. bestMulticastReplicatorPath->send(RR, tPtr, outp.data(), outp.size(), now);
  173. return;
  174. }
  175. }
  176. }
  177. }
  178. try {
  179. Mutex::Lock _l(_groups_m);
  180. MulticastGroupStatus& gs = _groups[Multicaster::Key(network->id(), mg)];
  181. if (! gs.members.empty()) {
  182. // Allocate a memory buffer if group is monstrous
  183. if (gs.members.size() > (sizeof(idxbuf) / sizeof(unsigned long))) {
  184. indexes = new unsigned long[gs.members.size()];
  185. }
  186. // Generate a random permutation of member indexes
  187. for (unsigned long i = 0; i < gs.members.size(); ++i) {
  188. indexes[i] = i;
  189. }
  190. for (unsigned long i = (unsigned long)gs.members.size() - 1; i > 0; --i) {
  191. unsigned long j = (unsigned long)RR->node->prng() % (i + 1);
  192. unsigned long tmp = indexes[j];
  193. indexes[j] = indexes[i];
  194. indexes[i] = tmp;
  195. }
  196. }
  197. Address activeBridges[ZT_MAX_NETWORK_SPECIALISTS];
  198. const unsigned int activeBridgeCount = network->config().activeBridges(activeBridges);
  199. const unsigned int limit = network->config().multicastLimit;
  200. if (gs.members.size() >= limit) {
  201. // Skip queue if we already have enough members to complete the send operation
  202. OutboundMulticast out;
  203. out.init(
  204. RR,
  205. now,
  206. network->id(),
  207. network->config().disableCompression(),
  208. limit,
  209. 1, // we'll still gather a little from peers to keep multicast list fresh
  210. src,
  211. mg,
  212. etherType,
  213. data,
  214. len);
  215. unsigned int count = 0;
  216. for (unsigned int i = 0; i < activeBridgeCount; ++i) {
  217. if ((activeBridges[i] != RR->identity.address()) && (activeBridges[i] != origin)) {
  218. out.sendOnly(RR, tPtr, activeBridges[i]); // optimization: don't use dedup log if it's a one-pass send
  219. if (++count >= limit) {
  220. break;
  221. }
  222. }
  223. }
  224. unsigned long idx = 0;
  225. while ((count < limit) && (idx < gs.members.size())) {
  226. const Address ma(gs.members[indexes[idx++]].address);
  227. if ((std::find(activeBridges, activeBridges + activeBridgeCount, ma) == (activeBridges + activeBridgeCount)) && (ma != origin)) {
  228. out.sendOnly(RR, tPtr, ma); // optimization: don't use dedup log if it's a one-pass send
  229. ++count;
  230. }
  231. }
  232. }
  233. else {
  234. while (gs.txQueue.size() >= ZT_TX_QUEUE_SIZE) {
  235. gs.txQueue.pop_front();
  236. }
  237. const unsigned int gatherLimit = (limit - (unsigned int)gs.members.size()) + 1;
  238. int timerScale = RR->node->lowBandwidthModeEnabled() ? 3 : 1;
  239. if ((gs.members.empty()) || ((now - gs.lastExplicitGather) >= (ZT_MULTICAST_EXPLICIT_GATHER_DELAY * timerScale))) {
  240. gs.lastExplicitGather = now;
  241. Address explicitGatherPeers[16];
  242. unsigned int numExplicitGatherPeers = 0;
  243. SharedPtr<Peer> bestRoot(RR->topology->getUpstreamPeer());
  244. if (bestRoot) {
  245. explicitGatherPeers[numExplicitGatherPeers++] = bestRoot->address();
  246. }
  247. explicitGatherPeers[numExplicitGatherPeers++] = network->controller();
  248. Address ac[ZT_MAX_NETWORK_SPECIALISTS];
  249. const unsigned int accnt = network->config().alwaysContactAddresses(ac);
  250. unsigned int shuffled[ZT_MAX_NETWORK_SPECIALISTS];
  251. for (unsigned int i = 0; i < accnt; ++i) {
  252. shuffled[i] = i;
  253. }
  254. for (unsigned int i = 0, k = accnt >> 1; i < k; ++i) {
  255. const uint64_t x = RR->node->prng();
  256. const unsigned int x1 = shuffled[(unsigned int)x % accnt];
  257. const unsigned int x2 = shuffled[(unsigned int)(x >> 32) % accnt];
  258. const unsigned int tmp = shuffled[x1];
  259. shuffled[x1] = shuffled[x2];
  260. shuffled[x2] = tmp;
  261. }
  262. for (unsigned int i = 0; i < accnt; ++i) {
  263. explicitGatherPeers[numExplicitGatherPeers++] = ac[shuffled[i]];
  264. if (numExplicitGatherPeers == 16) {
  265. break;
  266. }
  267. }
  268. std::vector<Address> anchors(network->config().anchors());
  269. for (std::vector<Address>::const_iterator a(anchors.begin()); a != anchors.end(); ++a) {
  270. if (*a != RR->identity.address()) {
  271. explicitGatherPeers[numExplicitGatherPeers++] = *a;
  272. if (numExplicitGatherPeers == 16) {
  273. break;
  274. }
  275. }
  276. }
  277. for (unsigned int k = 0; k < numExplicitGatherPeers; ++k) {
  278. const CertificateOfMembership* com = (network) ? ((network->config().com) ? &(network->config().com) : (const CertificateOfMembership*)0) : (const CertificateOfMembership*)0;
  279. Packet outp(explicitGatherPeers[k], RR->identity.address(), Packet::VERB_MULTICAST_GATHER);
  280. outp.append(network->id());
  281. outp.append((uint8_t)((com) ? 0x01 : 0x00));
  282. mg.mac().appendTo(outp);
  283. outp.append((uint32_t)mg.adi());
  284. outp.append((uint32_t)gatherLimit);
  285. if (com) {
  286. com->serialize(outp);
  287. }
  288. RR->node->expectReplyTo(outp.packetId());
  289. RR->sw->send(tPtr, outp, true);
  290. Metrics::pkt_multicast_gather_out++;
  291. }
  292. }
  293. gs.txQueue.push_back(OutboundMulticast());
  294. OutboundMulticast& out = gs.txQueue.back();
  295. out.init(RR, now, network->id(), network->config().disableCompression(), limit, gatherLimit, src, mg, etherType, data, len);
  296. if (origin) {
  297. out.logAsSent(origin);
  298. }
  299. unsigned int count = 0;
  300. for (unsigned int i = 0; i < activeBridgeCount; ++i) {
  301. if (activeBridges[i] != RR->identity.address()) {
  302. out.sendAndLog(RR, tPtr, activeBridges[i]);
  303. if (++count >= limit) {
  304. break;
  305. }
  306. }
  307. }
  308. unsigned long idx = 0;
  309. while ((count < limit) && (idx < gs.members.size())) {
  310. Address ma(gs.members[indexes[idx++]].address);
  311. if (std::find(activeBridges, activeBridges + activeBridgeCount, ma) == (activeBridges + activeBridgeCount)) {
  312. out.sendAndLog(RR, tPtr, ma);
  313. ++count;
  314. }
  315. }
  316. }
  317. }
  318. catch (...) {
  319. } // this is a sanity check to catch any failures and make sure indexes[] still gets deleted
  320. // Free allocated memory buffer if any
  321. if (indexes != idxbuf) {
  322. delete[] indexes;
  323. }
  324. }
  325. void Multicaster::clean(int64_t now)
  326. {
  327. Mutex::Lock _l(_groups_m);
  328. Multicaster::Key* k = (Multicaster::Key*)0;
  329. MulticastGroupStatus* s = (MulticastGroupStatus*)0;
  330. Hashtable<Multicaster::Key, MulticastGroupStatus>::Iterator mm(_groups);
  331. while (mm.next(k, s)) {
  332. for (std::list<OutboundMulticast>::iterator tx(s->txQueue.begin()); tx != s->txQueue.end();) {
  333. if ((tx->expired(now)) || (tx->atLimit())) {
  334. s->txQueue.erase(tx++);
  335. }
  336. else {
  337. ++tx;
  338. }
  339. }
  340. unsigned long count = 0;
  341. {
  342. std::vector<MulticastGroupMember>::iterator reader(s->members.begin());
  343. std::vector<MulticastGroupMember>::iterator writer(reader);
  344. while (reader != s->members.end()) {
  345. if ((now - reader->timestamp) < ZT_MULTICAST_LIKE_EXPIRE) {
  346. *writer = *reader;
  347. ++writer;
  348. ++count;
  349. }
  350. ++reader;
  351. }
  352. }
  353. if (count) {
  354. s->members.resize(count);
  355. }
  356. else if (s->txQueue.empty()) {
  357. _groups.erase(*k);
  358. }
  359. else {
  360. s->members.clear();
  361. }
  362. }
  363. }
  364. void Multicaster::_add(void* tPtr, int64_t now, uint64_t nwid, const MulticastGroup& mg, MulticastGroupStatus& gs, const Address& member)
  365. {
  366. // assumes _groups_m is locked
  367. // Do not add self -- even if someone else returns it
  368. if (member == RR->identity.address()) {
  369. return;
  370. }
  371. std::vector<MulticastGroupMember>::iterator m(std::lower_bound(gs.members.begin(), gs.members.end(), member));
  372. if (m != gs.members.end()) {
  373. if (m->address == member) {
  374. m->timestamp = now;
  375. return;
  376. }
  377. gs.members.insert(m, MulticastGroupMember(member, now));
  378. }
  379. else {
  380. gs.members.push_back(MulticastGroupMember(member, now));
  381. }
  382. for (std::list<OutboundMulticast>::iterator tx(gs.txQueue.begin()); tx != gs.txQueue.end();) {
  383. if (tx->atLimit()) {
  384. gs.txQueue.erase(tx++);
  385. }
  386. else {
  387. tx->sendIfNew(RR, tPtr, member);
  388. if (tx->atLimit()) {
  389. gs.txQueue.erase(tx++);
  390. }
  391. else {
  392. ++tx;
  393. }
  394. }
  395. }
  396. }
  397. } // namespace ZeroTier