2
0

Multicaster.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at https://mozilla.org/MPL/2.0/.
  4. *
  5. * (c) ZeroTier, Inc.
  6. * https://www.zerotier.com/
  7. */
  8. #include "Multicaster.hpp"
  9. #include "CertificateOfMembership.hpp"
  10. #include "Constants.hpp"
  11. #include "Network.hpp"
  12. #include "Node.hpp"
  13. #include "Packet.hpp"
  14. #include "Peer.hpp"
  15. #include "RuntimeEnvironment.hpp"
  16. #include "Switch.hpp"
  17. #include "Topology.hpp"
  18. #include <algorithm>
  19. namespace ZeroTier {
  20. Multicaster::Multicaster(const RuntimeEnvironment* renv) : RR(renv), _groups(32)
  21. {
  22. }
  23. Multicaster::~Multicaster()
  24. {
  25. }
  26. void Multicaster::addMultiple(void* tPtr, int64_t now, uint64_t nwid, const MulticastGroup& mg, const void* addresses, unsigned int count, unsigned int totalKnown)
  27. {
  28. const unsigned char* p = (const unsigned char*)addresses;
  29. const unsigned char* e = p + (5 * count);
  30. Mutex::Lock _l(_groups_m);
  31. MulticastGroupStatus& gs = _groups[Multicaster::Key(nwid, mg)];
  32. while (p != e) {
  33. _add(tPtr, now, nwid, mg, gs, Address(p, 5));
  34. p += 5;
  35. }
  36. }
  37. void Multicaster::remove(uint64_t nwid, const MulticastGroup& mg, const Address& member)
  38. {
  39. Mutex::Lock _l(_groups_m);
  40. MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  41. if (s) {
  42. for (std::vector<MulticastGroupMember>::iterator m(s->members.begin()); m != s->members.end(); ++m) {
  43. if (m->address == member) {
  44. s->members.erase(m);
  45. break;
  46. }
  47. }
  48. }
  49. }
  50. unsigned int Multicaster::gather(const Address& queryingPeer, uint64_t nwid, const MulticastGroup& mg, Buffer<ZT_PROTO_MAX_PACKET_LENGTH>& appendTo, unsigned int limit) const
  51. {
  52. unsigned char* p;
  53. unsigned int added = 0, i, k, rptr, totalKnown = 0;
  54. uint64_t a, picked[(ZT_PROTO_MAX_PACKET_LENGTH / 5) + 2];
  55. if (! limit) {
  56. return 0;
  57. }
  58. else if (limit > 0xffff) {
  59. limit = 0xffff;
  60. }
  61. const unsigned int totalAt = appendTo.size();
  62. appendTo.addSize(4); // sizeof(uint32_t)
  63. const unsigned int addedAt = appendTo.size();
  64. appendTo.addSize(2); // sizeof(uint16_t)
  65. { // Return myself if I am a member of this group
  66. SharedPtr<Network> network(RR->node->network(nwid));
  67. if ((network) && (network->subscribedToMulticastGroup(mg, true))) {
  68. RR->identity.address().appendTo(appendTo);
  69. ++totalKnown;
  70. ++added;
  71. }
  72. }
  73. Mutex::Lock _l(_groups_m);
  74. const MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  75. if ((s) && (! s->members.empty())) {
  76. totalKnown += (unsigned int)s->members.size();
  77. // Members are returned in random order so that repeated gather queries
  78. // will return different subsets of a large multicast group.
  79. k = 0;
  80. while ((added < limit) && (k < s->members.size()) && ((appendTo.size() + ZT_ADDRESS_LENGTH) <= ZT_PROTO_MAX_PACKET_LENGTH)) {
  81. rptr = (unsigned int)RR->node->prng();
  82. restart_member_scan:
  83. a = s->members[rptr % (unsigned int)s->members.size()].address.toInt();
  84. for (i = 0; i < k; ++i) {
  85. if (picked[i] == a) {
  86. ++rptr;
  87. goto restart_member_scan;
  88. }
  89. }
  90. picked[k++] = a;
  91. if (queryingPeer.toInt() != a) { // do not return the peer that is making the request as a result
  92. p = (unsigned char*)appendTo.appendField(ZT_ADDRESS_LENGTH);
  93. *(p++) = (unsigned char)((a >> 32) & 0xff);
  94. *(p++) = (unsigned char)((a >> 24) & 0xff);
  95. *(p++) = (unsigned char)((a >> 16) & 0xff);
  96. *(p++) = (unsigned char)((a >> 8) & 0xff);
  97. *p = (unsigned char)(a & 0xff);
  98. ++added;
  99. }
  100. }
  101. }
  102. appendTo.setAt(totalAt, (uint32_t)totalKnown);
  103. appendTo.setAt(addedAt, (uint16_t)added);
  104. return added;
  105. }
  106. std::vector<Address> Multicaster::getMembers(uint64_t nwid, const MulticastGroup& mg, unsigned int limit) const
  107. {
  108. std::vector<Address> ls;
  109. Mutex::Lock _l(_groups_m);
  110. const MulticastGroupStatus* s = _groups.get(Multicaster::Key(nwid, mg));
  111. if (! s) {
  112. return ls;
  113. }
  114. for (std::vector<MulticastGroupMember>::const_reverse_iterator m(s->members.rbegin()); m != s->members.rend(); ++m) {
  115. ls.push_back(m->address);
  116. if (ls.size() >= limit) {
  117. break;
  118. }
  119. }
  120. return ls;
  121. }
  122. void Multicaster::send(void* tPtr, int64_t now, const SharedPtr<Network>& network, const Address& origin, const MulticastGroup& mg, const MAC& src, unsigned int etherType, const void* data, unsigned int len)
  123. {
  124. unsigned long idxbuf[4096];
  125. unsigned long* indexes = idxbuf;
  126. // If we're in hub-and-spoke designated multicast replication mode, see if we
  127. // have a multicast replicator active. If so, pick the best and send it
  128. // there. If we are a multicast replicator or if none are alive, fall back
  129. // to sender replication. Note that bridges do not do this since this would
  130. // break bridge route learning. This is sort of an edge case limitation of
  131. // the current protocol and could be fixed, but fixing it would add more
  132. // complexity than the fix is probably worth. Bridges are generally high
  133. // bandwidth nodes.
  134. if (! network->config().isActiveBridge(RR->identity.address())) {
  135. Address multicastReplicators[ZT_MAX_NETWORK_SPECIALISTS];
  136. const unsigned int multicastReplicatorCount = network->config().multicastReplicators(multicastReplicators);
  137. if (multicastReplicatorCount) {
  138. if (std::find(multicastReplicators, multicastReplicators + multicastReplicatorCount, RR->identity.address()) == (multicastReplicators + multicastReplicatorCount)) {
  139. SharedPtr<Peer> bestMulticastReplicator;
  140. SharedPtr<Path> bestMulticastReplicatorPath;
  141. unsigned int bestMulticastReplicatorLatency = 0xffff;
  142. for (unsigned int i = 0; i < multicastReplicatorCount; ++i) {
  143. const SharedPtr<Peer> p(RR->topology->getPeerNoCache(multicastReplicators[i]));
  144. if ((p) && (p->isAlive(now))) {
  145. const SharedPtr<Path> pp(p->getAppropriatePath(now, false));
  146. if ((pp) && (pp->latency() < bestMulticastReplicatorLatency)) {
  147. bestMulticastReplicatorLatency = pp->latency();
  148. bestMulticastReplicatorPath = pp;
  149. bestMulticastReplicator = p;
  150. }
  151. }
  152. }
  153. if (bestMulticastReplicator) {
  154. Packet outp(bestMulticastReplicator->address(), RR->identity.address(), Packet::VERB_MULTICAST_FRAME);
  155. outp.append((uint64_t)network->id());
  156. outp.append((uint8_t)0x0c); // includes source MAC | please replicate
  157. ((src) ? src : MAC(RR->identity.address(), network->id())).appendTo(outp);
  158. mg.mac().appendTo(outp);
  159. outp.append((uint32_t)mg.adi());
  160. outp.append((uint16_t)etherType);
  161. outp.append(data, len);
  162. outp.compress();
  163. outp.armor(bestMulticastReplicator->key(), true, false, bestMulticastReplicator->aesKeysIfSupported(), bestMulticastReplicator->identity());
  164. Metrics::pkt_multicast_frame_out++;
  165. bestMulticastReplicatorPath->send(RR, tPtr, outp.data(), outp.size(), now);
  166. return;
  167. }
  168. }
  169. }
  170. }
  171. try {
  172. Mutex::Lock _l(_groups_m);
  173. MulticastGroupStatus& gs = _groups[Multicaster::Key(network->id(), mg)];
  174. if (! gs.members.empty()) {
  175. // Allocate a memory buffer if group is monstrous
  176. if (gs.members.size() > (sizeof(idxbuf) / sizeof(unsigned long))) {
  177. indexes = new unsigned long[gs.members.size()];
  178. }
  179. // Generate a random permutation of member indexes
  180. for (unsigned long i = 0; i < gs.members.size(); ++i) {
  181. indexes[i] = i;
  182. }
  183. for (unsigned long i = (unsigned long)gs.members.size() - 1; i > 0; --i) {
  184. unsigned long j = (unsigned long)RR->node->prng() % (i + 1);
  185. unsigned long tmp = indexes[j];
  186. indexes[j] = indexes[i];
  187. indexes[i] = tmp;
  188. }
  189. }
  190. Address activeBridges[ZT_MAX_NETWORK_SPECIALISTS];
  191. const unsigned int activeBridgeCount = network->config().activeBridges(activeBridges);
  192. const unsigned int limit = network->config().multicastLimit;
  193. if (gs.members.size() >= limit) {
  194. // Skip queue if we already have enough members to complete the send operation
  195. OutboundMulticast out;
  196. out.init(
  197. RR,
  198. now,
  199. network->id(),
  200. false,
  201. limit,
  202. 1, // we'll still gather a little from peers to keep multicast list fresh
  203. src,
  204. mg,
  205. etherType,
  206. data,
  207. len);
  208. unsigned int count = 0;
  209. for (unsigned int i = 0; i < activeBridgeCount; ++i) {
  210. if ((activeBridges[i] != RR->identity.address()) && (activeBridges[i] != origin)) {
  211. out.sendOnly(RR, tPtr, activeBridges[i]); // optimization: don't use dedup log if it's a one-pass send
  212. }
  213. }
  214. unsigned long idx = 0;
  215. while ((count < limit) && (idx < gs.members.size())) {
  216. const Address ma(gs.members[indexes[idx++]].address);
  217. if ((std::find(activeBridges, activeBridges + activeBridgeCount, ma) == (activeBridges + activeBridgeCount)) && (ma != origin)) {
  218. out.sendOnly(RR, tPtr, ma); // optimization: don't use dedup log if it's a one-pass send
  219. ++count;
  220. }
  221. }
  222. }
  223. else {
  224. while (gs.txQueue.size() >= ZT_TX_QUEUE_SIZE) {
  225. gs.txQueue.pop_front();
  226. }
  227. const unsigned int gatherLimit = (limit - (unsigned int)gs.members.size()) + 1;
  228. int timerScale = RR->node->lowBandwidthModeEnabled() ? 3 : 1;
  229. if ((gs.members.empty()) || ((now - gs.lastExplicitGather) >= (ZT_MULTICAST_EXPLICIT_GATHER_DELAY * timerScale))) {
  230. gs.lastExplicitGather = now;
  231. Address explicitGatherPeers[16];
  232. unsigned int numExplicitGatherPeers = 0;
  233. SharedPtr<Peer> bestRoot(RR->topology->getUpstreamPeer(network->id()));
  234. if (bestRoot) {
  235. explicitGatherPeers[numExplicitGatherPeers++] = bestRoot->address();
  236. }
  237. explicitGatherPeers[numExplicitGatherPeers++] = network->controller();
  238. Address ac[ZT_MAX_NETWORK_SPECIALISTS];
  239. const unsigned int accnt = network->config().alwaysContactAddresses(ac);
  240. unsigned int shuffled[ZT_MAX_NETWORK_SPECIALISTS];
  241. for (unsigned int i = 0; i < accnt; ++i) {
  242. shuffled[i] = i;
  243. }
  244. for (unsigned int i = 0, k = accnt >> 1; i < k; ++i) {
  245. const uint64_t x = RR->node->prng();
  246. const unsigned int x1 = shuffled[(unsigned int)x % accnt];
  247. const unsigned int x2 = shuffled[(unsigned int)(x >> 32) % accnt];
  248. const unsigned int tmp = shuffled[x1];
  249. shuffled[x1] = shuffled[x2];
  250. shuffled[x2] = tmp;
  251. }
  252. for (unsigned int i = 0; i < accnt; ++i) {
  253. explicitGatherPeers[numExplicitGatherPeers++] = ac[shuffled[i]];
  254. if (numExplicitGatherPeers == 16) {
  255. break;
  256. }
  257. }
  258. for (unsigned int k = 0; k < numExplicitGatherPeers; ++k) {
  259. const CertificateOfMembership* com = (network) ? ((network->config().com) ? &(network->config().com) : (const CertificateOfMembership*)0) : (const CertificateOfMembership*)0;
  260. Packet outp(explicitGatherPeers[k], RR->identity.address(), Packet::VERB_MULTICAST_GATHER);
  261. outp.append(network->id());
  262. outp.append((uint8_t)((com) ? 0x01 : 0x00));
  263. mg.mac().appendTo(outp);
  264. outp.append((uint32_t)mg.adi());
  265. outp.append((uint32_t)gatherLimit);
  266. if (com) {
  267. com->serialize(outp);
  268. }
  269. RR->node->expectReplyTo(outp.packetId());
  270. RR->sw->send(tPtr, outp, true, network->id(), ZT_QOS_NO_FLOW);
  271. Metrics::pkt_multicast_gather_out++;
  272. }
  273. }
  274. gs.txQueue.push_back(OutboundMulticast());
  275. OutboundMulticast& out = gs.txQueue.back();
  276. out.init(RR, now, network->id(), false, limit, gatherLimit, src, mg, etherType, data, len);
  277. if (origin) {
  278. out.logAsSent(origin);
  279. }
  280. unsigned int count = 0;
  281. for (unsigned int i = 0; i < activeBridgeCount; ++i) {
  282. if (activeBridges[i] != RR->identity.address()) {
  283. out.sendAndLog(RR, tPtr, activeBridges[i]);
  284. if (++count >= limit) {
  285. break;
  286. }
  287. }
  288. }
  289. unsigned long idx = 0;
  290. while ((count < limit) && (idx < gs.members.size())) {
  291. Address ma(gs.members[indexes[idx++]].address);
  292. if (std::find(activeBridges, activeBridges + activeBridgeCount, ma) == (activeBridges + activeBridgeCount)) {
  293. out.sendAndLog(RR, tPtr, ma);
  294. ++count;
  295. }
  296. }
  297. }
  298. }
  299. catch (...) {
  300. } // this is a sanity check to catch any failures and make sure indexes[] still gets deleted
  301. // Free allocated memory buffer if any
  302. if (indexes != idxbuf) {
  303. delete[] indexes;
  304. }
  305. }
  306. void Multicaster::clean(int64_t now)
  307. {
  308. Mutex::Lock _l(_groups_m);
  309. Multicaster::Key* k = (Multicaster::Key*)0;
  310. MulticastGroupStatus* s = (MulticastGroupStatus*)0;
  311. Hashtable<Multicaster::Key, MulticastGroupStatus>::Iterator mm(_groups);
  312. while (mm.next(k, s)) {
  313. for (std::list<OutboundMulticast>::iterator tx(s->txQueue.begin()); tx != s->txQueue.end();) {
  314. if ((tx->expired(now)) || (tx->atLimit())) {
  315. s->txQueue.erase(tx++);
  316. }
  317. else {
  318. ++tx;
  319. }
  320. }
  321. unsigned long count = 0;
  322. {
  323. std::vector<MulticastGroupMember>::iterator reader(s->members.begin());
  324. std::vector<MulticastGroupMember>::iterator writer(reader);
  325. while (reader != s->members.end()) {
  326. if ((now - reader->timestamp) < ZT_MULTICAST_LIKE_EXPIRE) {
  327. *writer = *reader;
  328. ++writer;
  329. ++count;
  330. }
  331. ++reader;
  332. }
  333. }
  334. if (count) {
  335. s->members.resize(count);
  336. }
  337. else if (s->txQueue.empty()) {
  338. _groups.erase(*k);
  339. }
  340. else {
  341. s->members.clear();
  342. }
  343. }
  344. }
  345. void Multicaster::_add(void* tPtr, int64_t now, uint64_t nwid, const MulticastGroup& mg, MulticastGroupStatus& gs, const Address& member)
  346. {
  347. // assumes _groups_m is locked
  348. // Do not add self -- even if someone else returns it
  349. if (member == RR->identity.address()) {
  350. return;
  351. }
  352. std::vector<MulticastGroupMember>::iterator m(std::lower_bound(gs.members.begin(), gs.members.end(), member));
  353. if (m != gs.members.end()) {
  354. if (m->address == member) {
  355. m->timestamp = now;
  356. return;
  357. }
  358. gs.members.insert(m, MulticastGroupMember(member, now));
  359. }
  360. else {
  361. gs.members.push_back(MulticastGroupMember(member, now));
  362. }
  363. for (std::list<OutboundMulticast>::iterator tx(gs.txQueue.begin()); tx != gs.txQueue.end();) {
  364. if (tx->atLimit()) {
  365. gs.txQueue.erase(tx++);
  366. }
  367. else {
  368. tx->sendIfNew(RR, tPtr, member);
  369. if (tx->atLimit()) {
  370. gs.txQueue.erase(tx++);
  371. }
  372. else {
  373. ++tx;
  374. }
  375. }
  376. }
  377. }
  378. } // namespace ZeroTier