BSDEthernetTap.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at https://mozilla.org/MPL/2.0/.
  4. *
  5. * (c) ZeroTier, Inc.
  6. * https://www.zerotier.com/
  7. */
  8. #include "BSDEthernetTap.hpp"
  9. #include "../node/Constants.hpp"
  10. #include "../node/Mutex.hpp"
  11. #include "../node/Utils.hpp"
  12. #include "OSUtils.hpp"
  13. #include <algorithm>
  14. #include <arpa/inet.h>
  15. #include <errno.h>
  16. #include <fcntl.h>
  17. #include <ifaddrs.h>
  18. #include <map>
  19. #include <net/if.h>
  20. #include <net/if_arp.h>
  21. #include <net/if_dl.h>
  22. #include <net/if_media.h>
  23. #include <net/route.h>
  24. #include <netinet/in.h>
  25. #include <pthread_np.h>
  26. #include <sched.h>
  27. #include <set>
  28. #include <signal.h>
  29. #include <stdint.h>
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <string>
  34. #include <sys/cdefs.h>
  35. #include <sys/ioctl.h>
  36. #include <sys/param.h>
  37. #include <sys/select.h>
  38. #include <sys/socket.h>
  39. #include <sys/stat.h>
  40. #include <sys/types.h>
  41. #include <sys/uio.h>
  42. #include <sys/wait.h>
  43. #include <unistd.h>
  44. #include <utility>
  45. #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv"
  46. #define ZT_TAP_BUF_SIZE (1024 * 16)
  47. // ff:ff:ff:ff:ff:ff with no ADI
  48. static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff), 0);
  49. namespace ZeroTier {
  50. BSDEthernetTap::BSDEthernetTap(
  51. const char* homePath,
  52. unsigned int concurrency,
  53. bool pinning,
  54. const MAC& mac,
  55. unsigned int mtu,
  56. unsigned int metric,
  57. uint64_t nwid,
  58. const char* friendlyName,
  59. void (*handler)(void*, void*, uint64_t, const MAC&, const MAC&, unsigned int, unsigned int, const void*, unsigned int),
  60. void* arg)
  61. : _handler(handler)
  62. , _concurrency(concurrency)
  63. , _pinning(pinning)
  64. , _arg(arg)
  65. , _nwid(nwid)
  66. , _mtu(mtu)
  67. , _metric(metric)
  68. , _fd(0)
  69. , _enabled(true)
  70. , _lastIfAddrsUpdate(0)
  71. {
  72. static Mutex globalTapCreateLock;
  73. char devpath[64], ethaddr[64], mtustr[32], metstr[32], tmpdevname[32];
  74. Mutex::Lock _gl(globalTapCreateLock);
  75. #ifdef __FreeBSD__
  76. /* FreeBSD allows long interface names and interface renaming */
  77. _dev = "zt";
  78. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 60) & 0x1f)]);
  79. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 55) & 0x1f)]);
  80. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 50) & 0x1f)]);
  81. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 45) & 0x1f)]);
  82. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 40) & 0x1f)]);
  83. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 35) & 0x1f)]);
  84. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 30) & 0x1f)]);
  85. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 25) & 0x1f)]);
  86. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 20) & 0x1f)]);
  87. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 15) & 0x1f)]);
  88. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 10) & 0x1f)]);
  89. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)((nwid >> 5) & 0x1f)]);
  90. _dev.push_back(ZT_BASE32_CHARS[(unsigned long)(nwid & 0x1f)]);
  91. std::vector<std::string> devFiles(OSUtils::listDirectory("/dev"));
  92. for (int i = 9993; i < (9993 + 128); ++i) {
  93. OSUtils::ztsnprintf(tmpdevname, sizeof(tmpdevname), "tap%d", i);
  94. OSUtils::ztsnprintf(devpath, sizeof(devpath), "/dev/%s", tmpdevname);
  95. if (std::find(devFiles.begin(), devFiles.end(), std::string(tmpdevname)) == devFiles.end()) {
  96. long cpid = (long)vfork();
  97. if (cpid == 0) {
  98. #ifdef ZT_TRACE
  99. fprintf(stderr, "DEBUG: ifconfig %s create" ZT_EOL_S, tmpdevname);
  100. #endif
  101. ::execl("/sbin/ifconfig", "/sbin/ifconfig", tmpdevname, "create", (const char*)0);
  102. ::_exit(-1);
  103. }
  104. else if (cpid > 0) {
  105. int exitcode = -1;
  106. ::waitpid(cpid, &exitcode, 0);
  107. }
  108. else
  109. throw std::runtime_error("fork() failed");
  110. struct stat stattmp;
  111. if (! stat(devpath, &stattmp)) {
  112. cpid = (long)vfork();
  113. if (cpid == 0) {
  114. #ifdef ZT_TRACE
  115. fprintf(stderr, "DEBUG: ifconfig %s name %s" ZT_EOL_S, tmpdevname, _dev.c_str());
  116. #endif
  117. ::execl("/sbin/ifconfig", "/sbin/ifconfig", tmpdevname, "name", _dev.c_str(), (const char*)0);
  118. ::_exit(-1);
  119. }
  120. else if (cpid > 0) {
  121. int exitcode = -1;
  122. ::waitpid(cpid, &exitcode, 0);
  123. if (exitcode)
  124. throw std::runtime_error("ifconfig rename operation failed");
  125. }
  126. else
  127. throw std::runtime_error("fork() failed");
  128. _fd = ::open(devpath, O_RDWR);
  129. if (_fd > 0)
  130. break;
  131. else
  132. throw std::runtime_error("unable to open created tap device");
  133. }
  134. else {
  135. throw std::runtime_error("cannot find /dev node for newly created tap device");
  136. }
  137. }
  138. }
  139. #else
  140. /* Other BSDs like OpenBSD only have a limited number of tap devices that cannot be renamed */
  141. for (int i = 0; i < 64; ++i) {
  142. OSUtils::ztsnprintf(tmpdevname, sizeof(tmpdevname), "tap%d", i);
  143. OSUtils::ztsnprintf(devpath, sizeof(devpath), "/dev/%s", tmpdevname);
  144. _fd = ::open(devpath, O_RDWR);
  145. if (_fd > 0) {
  146. _dev = tmpdevname;
  147. break;
  148. }
  149. }
  150. #endif
  151. if (_fd <= 0)
  152. throw std::runtime_error("unable to open TAP device or no more devices available");
  153. if (fcntl(_fd, F_SETFL, fcntl(_fd, F_GETFL) & ~O_NONBLOCK) == -1) {
  154. ::close(_fd);
  155. throw std::runtime_error("unable to set flags on file descriptor for TAP device");
  156. }
  157. // Configure MAC address and MTU, bring interface up
  158. OSUtils::ztsnprintf(ethaddr, sizeof(ethaddr), "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3], (int)mac[4], (int)mac[5]);
  159. OSUtils::ztsnprintf(mtustr, sizeof(mtustr), "%u", _mtu);
  160. OSUtils::ztsnprintf(metstr, sizeof(metstr), "%u", _metric);
  161. long cpid = (long)vfork();
  162. if (cpid == 0) {
  163. #ifdef ZT_TRACE
  164. fprintf(stderr, "DEBUG: ifconfig %s lladdr %s mtu %s metric %s up" ZT_EOL_S, _dev.c_str(), ethaddr, mtustr, metstr);
  165. #endif
  166. ::execl("/sbin/ifconfig", "/sbin/ifconfig", _dev.c_str(), "lladdr", ethaddr, "mtu", mtustr, "metric", metstr, "up", (const char*)0);
  167. ::_exit(-1);
  168. }
  169. else if (cpid > 0) {
  170. int exitcode = -1;
  171. ::waitpid(cpid, &exitcode, 0);
  172. if (exitcode) {
  173. ::close(_fd);
  174. throw std::runtime_error("ifconfig failure setting link-layer address and activating tap interface");
  175. }
  176. }
  177. // Set close-on-exec so that devices cannot persist if we fork/exec for update
  178. fcntl(_fd, F_SETFD, fcntl(_fd, F_GETFD) | FD_CLOEXEC);
  179. ::pipe(_shutdownSignalPipe);
  180. _thread = Thread::start(this);
  181. }
  182. BSDEthernetTap::~BSDEthernetTap()
  183. {
  184. ::write(_shutdownSignalPipe[1], "\0", 1); // causes thread to exit
  185. ::close(_fd);
  186. ::close(_shutdownSignalPipe[0]);
  187. ::close(_shutdownSignalPipe[1]);
  188. long cpid = (long)vfork();
  189. if (cpid == 0) {
  190. #ifdef ZT_TRACE
  191. fprintf(stderr, "DEBUG: ifconfig %s destroy" ZT_EOL_S, _dev.c_str());
  192. #endif
  193. ::execl("/sbin/ifconfig", "/sbin/ifconfig", _dev.c_str(), "destroy", (const char*)0);
  194. ::_exit(-1);
  195. }
  196. else if (cpid > 0) {
  197. int exitcode = -1;
  198. ::waitpid(cpid, &exitcode, 0);
  199. }
  200. Thread::join(_thread);
  201. for (std::thread& t : _rxThreads) {
  202. t.join();
  203. }
  204. }
  205. void BSDEthernetTap::setEnabled(bool en)
  206. {
  207. _enabled = en;
  208. }
  209. bool BSDEthernetTap::enabled() const
  210. {
  211. return _enabled;
  212. }
  213. static bool ___removeIp(const std::string& _dev, const InetAddress& ip)
  214. {
  215. long cpid = (long)vfork();
  216. if (cpid == 0) {
  217. char ipbuf[64];
  218. #ifdef ZT_TRACE
  219. fprintf(stderr, "DEBUG: ifconfig %s inet %s -alias" ZT_EOL_S, _dev.c_str(), ip.toIpString(ipbuf));
  220. #endif
  221. execl("/sbin/ifconfig", "/sbin/ifconfig", _dev.c_str(), "inet", ip.toIpString(ipbuf), "-alias", (const char*)0);
  222. _exit(-1);
  223. }
  224. else if (cpid > 0) {
  225. int exitcode = -1;
  226. waitpid(cpid, &exitcode, 0);
  227. return (exitcode == 0);
  228. }
  229. return false; // never reached, make compiler shut up about return value
  230. }
  231. bool BSDEthernetTap::addIp(const InetAddress& ip)
  232. {
  233. if (! ip)
  234. return false;
  235. std::vector<InetAddress> allIps(ips());
  236. if (std::find(allIps.begin(), allIps.end(), ip) != allIps.end())
  237. return true; // IP/netmask already assigned
  238. // Remove and reconfigure if address is the same but netmask is different
  239. for (std::vector<InetAddress>::iterator i(allIps.begin()); i != allIps.end(); ++i) {
  240. if ((i->ipsEqual(ip)) && (i->netmaskBits() != ip.netmaskBits())) {
  241. if (___removeIp(_dev, *i))
  242. break;
  243. }
  244. }
  245. long cpid = (long)vfork();
  246. if (cpid == 0) {
  247. char tmp[128];
  248. #ifdef ZT_TRACE
  249. fprintf(stderr, "DEBUG: ifconfig %s %s %s alias" ZT_EOL_S, _dev.c_str(), ip.isV4() ? "inet" : "inet6", ip.toString(tmp));
  250. #endif
  251. ::execl("/sbin/ifconfig", "/sbin/ifconfig", _dev.c_str(), ip.isV4() ? "inet" : "inet6", ip.toString(tmp), "alias", (const char*)0);
  252. ::_exit(-1);
  253. }
  254. else if (cpid > 0) {
  255. int exitcode = -1;
  256. ::waitpid(cpid, &exitcode, 0);
  257. return (exitcode == 0);
  258. }
  259. return false;
  260. }
  261. bool BSDEthernetTap::removeIp(const InetAddress& ip)
  262. {
  263. if (! ip)
  264. return false;
  265. std::vector<InetAddress> allIps(ips());
  266. if (std::find(allIps.begin(), allIps.end(), ip) != allIps.end()) {
  267. if (___removeIp(_dev, ip))
  268. return true;
  269. }
  270. return false;
  271. }
  272. std::vector<InetAddress> BSDEthernetTap::ips() const
  273. {
  274. uint64_t now = OSUtils::now();
  275. if ((now - _lastIfAddrsUpdate) <= GETIFADDRS_CACHE_TIME) {
  276. return _ifaddrs;
  277. }
  278. _lastIfAddrsUpdate = now;
  279. struct ifaddrs* ifa = (struct ifaddrs*)0;
  280. if (getifaddrs(&ifa))
  281. return std::vector<InetAddress>();
  282. std::vector<InetAddress> r;
  283. struct ifaddrs* p = ifa;
  284. while (p) {
  285. if ((! strcmp(p->ifa_name, _dev.c_str())) && (p->ifa_addr) && (p->ifa_netmask) && (p->ifa_addr->sa_family == p->ifa_netmask->sa_family)) {
  286. switch (p->ifa_addr->sa_family) {
  287. case AF_INET: {
  288. struct sockaddr_in* sin = (struct sockaddr_in*)p->ifa_addr;
  289. struct sockaddr_in* nm = (struct sockaddr_in*)p->ifa_netmask;
  290. r.push_back(InetAddress(&(sin->sin_addr.s_addr), 4, Utils::countBits((uint32_t)nm->sin_addr.s_addr)));
  291. } break;
  292. case AF_INET6: {
  293. struct sockaddr_in6* sin = (struct sockaddr_in6*)p->ifa_addr;
  294. struct sockaddr_in6* nm = (struct sockaddr_in6*)p->ifa_netmask;
  295. uint32_t b[4];
  296. memcpy(b, nm->sin6_addr.s6_addr, sizeof(b));
  297. r.push_back(InetAddress(sin->sin6_addr.s6_addr, 16, Utils::countBits(b[0]) + Utils::countBits(b[1]) + Utils::countBits(b[2]) + Utils::countBits(b[3])));
  298. } break;
  299. }
  300. }
  301. p = p->ifa_next;
  302. }
  303. if (ifa)
  304. freeifaddrs(ifa);
  305. std::sort(r.begin(), r.end());
  306. std::unique(r.begin(), r.end());
  307. _ifaddrs = r;
  308. return r;
  309. }
  310. void BSDEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len)
  311. {
  312. char putBuf[ZT_MAX_MTU + 64];
  313. if ((_fd > 0) && (len <= _mtu) && (_enabled)) {
  314. to.copyTo(putBuf, 6);
  315. from.copyTo(putBuf + 6, 6);
  316. *((uint16_t*)(putBuf + 12)) = htons((uint16_t)etherType);
  317. memcpy(putBuf + 14, data, len);
  318. len += 14;
  319. ::write(_fd, putBuf, len);
  320. }
  321. }
  322. std::string BSDEthernetTap::deviceName() const
  323. {
  324. return _dev;
  325. }
  326. void BSDEthernetTap::setFriendlyName(const char* friendlyName)
  327. {
  328. }
  329. void BSDEthernetTap::scanMulticastGroups(std::vector<MulticastGroup>& added, std::vector<MulticastGroup>& removed)
  330. {
  331. std::vector<MulticastGroup> newGroups;
  332. #ifndef __OpenBSD__
  333. struct ifmaddrs* ifmap = (struct ifmaddrs*)0;
  334. if (! getifmaddrs(&ifmap)) {
  335. struct ifmaddrs* p = ifmap;
  336. while (p) {
  337. if (p->ifma_addr->sa_family == AF_LINK) {
  338. struct sockaddr_dl* in = (struct sockaddr_dl*)p->ifma_name;
  339. struct sockaddr_dl* la = (struct sockaddr_dl*)p->ifma_addr;
  340. if ((la->sdl_alen == 6) && (in->sdl_nlen <= _dev.length()) && (! memcmp(_dev.data(), in->sdl_data, in->sdl_nlen)))
  341. newGroups.push_back(MulticastGroup(MAC(la->sdl_data + la->sdl_nlen, 6), 0));
  342. }
  343. p = p->ifma_next;
  344. }
  345. freeifmaddrs(ifmap);
  346. }
  347. #endif // __OpenBSD__
  348. std::vector<InetAddress> allIps(ips());
  349. for (std::vector<InetAddress>::iterator ip(allIps.begin()); ip != allIps.end(); ++ip)
  350. newGroups.push_back(MulticastGroup::deriveMulticastGroupForAddressResolution(*ip));
  351. std::sort(newGroups.begin(), newGroups.end());
  352. std::unique(newGroups.begin(), newGroups.end());
  353. for (std::vector<MulticastGroup>::iterator m(newGroups.begin()); m != newGroups.end(); ++m) {
  354. if (! std::binary_search(_multicastGroups.begin(), _multicastGroups.end(), *m))
  355. added.push_back(*m);
  356. }
  357. for (std::vector<MulticastGroup>::iterator m(_multicastGroups.begin()); m != _multicastGroups.end(); ++m) {
  358. if (! std::binary_search(newGroups.begin(), newGroups.end(), *m))
  359. removed.push_back(*m);
  360. }
  361. _multicastGroups.swap(newGroups);
  362. }
  363. void BSDEthernetTap::setMtu(unsigned int mtu)
  364. {
  365. if (mtu != _mtu) {
  366. _mtu = mtu;
  367. long cpid = (long)vfork();
  368. if (cpid == 0) {
  369. char tmp[64];
  370. OSUtils::ztsnprintf(tmp, sizeof(tmp), "%u", mtu);
  371. #ifdef ZT_TRACE
  372. fprintf(stderr, "DEBUG: ifconfig %s mtu %s" ZT_EOL_S, _dev.c_str(), tmp);
  373. #endif
  374. execl("/sbin/ifconfig", "/sbin/ifconfig", _dev.c_str(), "mtu", tmp, (const char*)0);
  375. _exit(-1);
  376. }
  377. else if (cpid > 0) {
  378. int exitcode = -1;
  379. waitpid(cpid, &exitcode, 0);
  380. }
  381. }
  382. }
  383. void BSDEthernetTap::threadMain() throw()
  384. {
  385. // Wait for a moment after startup -- wait for Network to finish
  386. // constructing itself.
  387. Thread::sleep(500);
  388. #ifndef __OpenBSD__
  389. bool pinning = _pinning;
  390. for (unsigned int i = 0; i < _concurrency; ++i) {
  391. _rxThreads.push_back(std::thread([this, i, pinning] {
  392. if (pinning) {
  393. int pinCore = i % _concurrency;
  394. fprintf(stderr, "Pinning thread %d to core %d\n", i, pinCore);
  395. pthread_t self = pthread_self();
  396. cpu_set_t cpuset;
  397. CPU_ZERO(&cpuset);
  398. CPU_SET(pinCore, &cpuset);
  399. // int rc = sched_setaffinity(self, sizeof(cpu_set_t), &cpuset);
  400. int rc = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
  401. if (rc != 0) {
  402. fprintf(stderr, "Failed to pin thread %d to core %d: %s\n", i, pinCore, strerror(errno));
  403. exit(1);
  404. }
  405. }
  406. #endif // __OpenBSD__
  407. uint8_t b[ZT_TAP_BUF_SIZE];
  408. MAC to, from;
  409. fd_set readfds, nullfds;
  410. int n, nfds, r;
  411. FD_ZERO(&readfds);
  412. FD_ZERO(&nullfds);
  413. nfds = (int)std::max(_shutdownSignalPipe[0], _fd) + 1;
  414. r = 0;
  415. for (;;) {
  416. FD_SET(_shutdownSignalPipe[0], &readfds);
  417. FD_SET(_fd, &readfds);
  418. select(nfds, &readfds, &nullfds, &nullfds, (struct timeval*)0);
  419. if (FD_ISSET(_shutdownSignalPipe[0], &readfds)) // writes to shutdown pipe terminate thread
  420. break;
  421. if (FD_ISSET(_fd, &readfds)) {
  422. n = (int)::read(_fd, b + r, sizeof(b) - r);
  423. if (n < 0) {
  424. if ((errno != EINTR) && (errno != ETIMEDOUT))
  425. break;
  426. }
  427. else {
  428. // Some tap drivers like to send the ethernet frame and the
  429. // payload in two chunks, so handle that by accumulating
  430. // data until we have at least a frame.
  431. r += n;
  432. if (r > 14) {
  433. if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms
  434. r = _mtu + 14;
  435. if (_enabled) {
  436. to.setTo(b, 6);
  437. from.setTo(b + 6, 6);
  438. unsigned int etherType = ntohs(((const uint16_t*)b)[6]);
  439. _handler(_arg, (void*)0, _nwid, from, to, etherType, 0, (const void*)(b + 14), r - 14);
  440. }
  441. r = 0;
  442. }
  443. }
  444. }
  445. }
  446. #ifndef __OpenBSD__
  447. }));
  448. }
  449. #endif // __OpenBSD__
  450. }
  451. } // namespace ZeroTier