MacEthernetTapAgent.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at https://mozilla.org/MPL/2.0/.
  4. *
  5. * (c) ZeroTier, Inc.
  6. * https://www.zerotier.com/
  7. */
  8. /*
  9. * This creates a pair of feth devices with the lower numbered device
  10. * being the ZeroTier virtual interface and the other being the device
  11. * used to actually read and write packets. The latter gets no IP config
  12. * and is only used for I/O. The behavior of feth is similar to the
  13. * veth pairs that exist on Linux.
  14. *
  15. * The feth device has only existed since MacOS Sierra, but that's fairly
  16. * long ago in Mac terms.
  17. *
  18. * I/O with feth must be done using two different sockets. The BPF socket
  19. * is used to receive packets, while an AF_NDRV (low-level network driver
  20. * access) socket must be used to inject. AF_NDRV can't read IP frames
  21. * since BSD doesn't forward packets out the NDRV tap if they've already
  22. * been handled, and while BPF can inject its MTU for injected packets
  23. * is limited to 2048. AF_NDRV packet injection is required to inject
  24. * ZeroTier's large MTU frames.
  25. *
  26. * All this stuff is basically undocumented. A lot of tracing through
  27. * the Darwin/XNU kernel source was required to figure out how to make
  28. * this actually work.
  29. *
  30. * We hope to develop a DriverKit-based driver in the near-mid future to
  31. * replace this weird hack, but it works for now through Big Sur in our
  32. * testing.
  33. *
  34. * See also:
  35. *
  36. * https://apple.stackexchange.com/questions/337715/fake-ethernet-interfaces-feth-if-fake-anyone-ever-seen-this
  37. * https://opensource.apple.com/source/xnu/xnu-4570.41.2/bsd/net/if_fake.c.auto.html
  38. *
  39. */
  40. #include <stdio.h>
  41. #include <stdlib.h>
  42. #include <string.h>
  43. #include <stdarg.h>
  44. #include <unistd.h>
  45. #include <signal.h>
  46. #include <fcntl.h>
  47. #include <errno.h>
  48. #include <sys/signal.h>
  49. #include <sys/types.h>
  50. #include <sys/stat.h>
  51. #include <sys/ioctl.h>
  52. #include <sys/wait.h>
  53. #include <sys/select.h>
  54. #include <sys/cdefs.h>
  55. #include <sys/uio.h>
  56. #include <sys/param.h>
  57. #include <sys/ioctl.h>
  58. #include <sys/socket.h>
  59. #include <sys/sysctl.h>
  60. #include <sys/resource.h>
  61. #include <netinet/in.h>
  62. #include <arpa/inet.h>
  63. #include <net/bpf.h>
  64. #include <net/route.h>
  65. #include <net/if.h>
  66. #include <net/if_arp.h>
  67. #include <net/if_dl.h>
  68. #include <net/if_media.h>
  69. #include <net/ndrv.h>
  70. #include <netinet/in_var.h>
  71. #include <netinet/icmp6.h>
  72. #include <netinet6/in6_var.h>
  73. #include <netinet6/nd6.h>
  74. #include <ifaddrs.h>
  75. #include "../version.h"
  76. #include "MacEthernetTapAgent.h"
  77. #ifndef SIOCAUTOCONF_START
  78. #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */
  79. #endif
  80. #ifndef SIOCAUTOCONF_STOP
  81. #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */
  82. #endif
  83. #define P_IFCONFIG "/sbin/ifconfig"
  84. static unsigned char s_pktReadBuf[131072] __attribute__ ((__aligned__(16)));
  85. static unsigned char s_stdinReadBuf[131072] __attribute__ ((__aligned__(16)));
  86. static char s_deviceName[IFNAMSIZ];
  87. static char s_peerDeviceName[IFNAMSIZ];
  88. static int s_bpffd = -1;
  89. static int s_ndrvfd = -1;
  90. static pid_t s_parentPid;
  91. static void configureIpv6Parameters(const char *ifname,int performNUD,int acceptRouterAdverts)
  92. {
  93. struct in6_ndireq nd;
  94. struct in6_ifreq ifr;
  95. int s = socket(AF_INET6,SOCK_DGRAM,0);
  96. if (s <= 0)
  97. return;
  98. memset(&nd,0,sizeof(nd));
  99. strncpy(nd.ifname,ifname,sizeof(nd.ifname));
  100. if (ioctl(s,SIOCGIFINFO_IN6,&nd)) {
  101. close(s);
  102. return;
  103. }
  104. unsigned long oldFlags = (unsigned long)nd.ndi.flags;
  105. if (performNUD)
  106. nd.ndi.flags |= ND6_IFF_PERFORMNUD;
  107. else nd.ndi.flags &= ~ND6_IFF_PERFORMNUD;
  108. if (oldFlags != (unsigned long)nd.ndi.flags) {
  109. if (ioctl(s,SIOCSIFINFO_FLAGS,&nd)) {
  110. close(s);
  111. return;
  112. }
  113. }
  114. memset(&ifr,0,sizeof(ifr));
  115. strncpy(ifr.ifr_name,ifname,sizeof(ifr.ifr_name));
  116. if (ioctl(s,acceptRouterAdverts ? SIOCAUTOCONF_START : SIOCAUTOCONF_STOP,&ifr)) {
  117. close(s);
  118. return;
  119. }
  120. close(s);
  121. }
  122. static int run(const char *path,...)
  123. {
  124. va_list ap;
  125. char *args[16];
  126. int argNo = 1;
  127. va_start(ap,path);
  128. args[0] = (char *)path;
  129. for(;argNo<15;++argNo) {
  130. args[argNo] = va_arg(ap,char *);
  131. if (!args[argNo]) {
  132. break;
  133. }
  134. }
  135. args[argNo++] = (char *)0;
  136. va_end(ap);
  137. pid_t pid = fork();
  138. if (pid < 0) {
  139. return -1;
  140. } else if (pid == 0) {
  141. dup2(STDERR_FILENO,STDOUT_FILENO);
  142. execv(args[0],args);
  143. _exit(-1);
  144. }
  145. int rv = 0;
  146. waitpid(pid,&rv,0);
  147. return rv;
  148. }
  149. static void die()
  150. {
  151. if (s_ndrvfd >= 0)
  152. close(s_ndrvfd);
  153. if (s_bpffd >= 0)
  154. close(s_bpffd);
  155. if (s_peerDeviceName[0])
  156. run("/sbin/ifconfig",s_peerDeviceName,"destroy",(char *)0);
  157. if (s_deviceName[0])
  158. run("/sbin/ifconfig",s_deviceName,"destroy",(char *)0);
  159. }
  160. static inline void close_inherited_fds()
  161. {
  162. struct rlimit lim;
  163. getrlimit(RLIMIT_NOFILE, &lim);
  164. for (int i=3,j=(int)lim.rlim_cur;i<j;++i)
  165. close(i);
  166. }
  167. int main(int argc,char **argv)
  168. {
  169. char buf[128];
  170. struct ifreq ifr;
  171. u_int fl;
  172. fd_set rfds,wfds,efds;
  173. struct iovec iov[2];
  174. s_deviceName[0] = 0;
  175. s_peerDeviceName[0] = 0;
  176. s_parentPid = getppid();
  177. atexit(&die);
  178. signal(SIGIO,SIG_IGN);
  179. signal(SIGCHLD,SIG_IGN);
  180. signal(SIGPIPE,SIG_IGN);
  181. signal(SIGUSR1,SIG_IGN);
  182. signal(SIGUSR2,SIG_IGN);
  183. signal(SIGALRM,SIG_IGN);
  184. signal(SIGQUIT,&exit);
  185. signal(SIGTERM,&exit);
  186. signal(SIGKILL,&exit);
  187. signal(SIGINT,&exit);
  188. signal(SIGPIPE,&exit);
  189. close_inherited_fds();
  190. if (getuid() != 0) {
  191. if (setuid(0) != 0) {
  192. fprintf(stderr,"E must be run as root or with root setuid bit on executable\n");
  193. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  194. }
  195. }
  196. if (argc < 5) {
  197. fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n");
  198. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  199. }
  200. const int deviceNo = atoi(argv[1]);
  201. if ((deviceNo < 0)||(deviceNo > 4999)) {
  202. fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n");
  203. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  204. }
  205. const char *mac = argv[2];
  206. const char *mtu = argv[3];
  207. const char *metric = argv[4];
  208. s_ndrvfd = socket(AF_NDRV,SOCK_RAW,0);
  209. if (s_ndrvfd < 0) {
  210. fprintf(stderr,"E unable to open AF_NDRV socket\n");
  211. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  212. }
  213. snprintf(s_deviceName,sizeof(s_deviceName),"feth%d",deviceNo);
  214. snprintf(s_peerDeviceName,sizeof(s_peerDeviceName),"feth%d",deviceNo+5000);
  215. if (run(P_IFCONFIG,s_peerDeviceName,"create",(char *)0) != 0) {
  216. fprintf(stderr,"E unable to create %s\n",s_deviceName);
  217. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  218. }
  219. usleep(10);
  220. if (run(P_IFCONFIG,s_deviceName,"create",(char *)0) != 0) {
  221. fprintf(stderr,"E unable to create %s\n",s_deviceName);
  222. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  223. }
  224. run(P_IFCONFIG,s_deviceName,"lladdr",mac,(char *)0);
  225. usleep(10);
  226. run(P_IFCONFIG,s_peerDeviceName,"peer",s_deviceName,(char *)0);
  227. usleep(10);
  228. run(P_IFCONFIG,s_peerDeviceName,"mtu",mtu,"up",(char *)0);
  229. usleep(10);
  230. run(P_IFCONFIG,s_deviceName,"mtu",mtu,"metric",metric,"up",(char *)0);
  231. usleep(10);
  232. configureIpv6Parameters(s_deviceName,1,0);
  233. usleep(10);
  234. struct sockaddr_ndrv nd;
  235. nd.snd_len = sizeof(struct sockaddr_ndrv);
  236. nd.snd_family = AF_NDRV;
  237. memcpy(nd.snd_name,s_peerDeviceName,sizeof(nd.snd_name));
  238. if (bind(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) {
  239. fprintf(stderr,"E unable to bind AF_NDRV socket\n");
  240. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  241. }
  242. if (connect(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) {
  243. fprintf(stderr,"E unable to connect AF_NDRV socket\n");
  244. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  245. }
  246. /* Start at /dev/bpf1 since some simple bpf-using net utilities hard-code /dev/bpf0.
  247. * Things like libpcap are smart enough to search. */
  248. for(int bpfno=1;bpfno<5000;++bpfno) {
  249. char tmp[32];
  250. snprintf(tmp,sizeof(tmp),"/dev/bpf%d",bpfno);
  251. s_bpffd = open(tmp,O_RDWR);
  252. if (s_bpffd >= 0) {
  253. break;
  254. }
  255. }
  256. if (s_bpffd < 0) {
  257. fprintf(stderr,"E unable to open bpf device\n");
  258. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  259. }
  260. fl = sizeof(s_pktReadBuf);
  261. if (ioctl(s_bpffd,BIOCSBLEN,&fl) != 0) {
  262. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  263. }
  264. const size_t readPktSize = (size_t)fl;
  265. fl = 1;
  266. if (ioctl(s_bpffd,BIOCIMMEDIATE,&fl) != 0) {
  267. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  268. }
  269. fl = 0;
  270. if (ioctl(s_bpffd,BIOCSSEESENT,&fl) != 0) {
  271. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  272. }
  273. memset(&ifr,0,sizeof(ifr));
  274. memcpy(ifr.ifr_name,s_peerDeviceName,IFNAMSIZ);
  275. if (ioctl(s_bpffd,BIOCSETIF,&ifr) != 0) {
  276. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  277. }
  278. fl = 1;
  279. if (ioctl(s_bpffd,BIOCSHDRCMPLT,&fl) != 0) {
  280. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  281. }
  282. fl = 1;
  283. if (ioctl(s_bpffd,BIOCPROMISC,&fl) != 0) {
  284. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  285. }
  286. fprintf(stderr,"I %s %s %d.%d.%d.%d\n",s_deviceName,s_peerDeviceName,ZEROTIER_ONE_VERSION_MAJOR,ZEROTIER_ONE_VERSION_MINOR,ZEROTIER_ONE_VERSION_REVISION,ZEROTIER_ONE_VERSION_BUILD);
  287. FD_ZERO(&rfds);
  288. FD_ZERO(&wfds);
  289. FD_ZERO(&efds);
  290. long stdinReadPtr = 0;
  291. for(;;) {
  292. FD_SET(STDIN_FILENO,&rfds);
  293. FD_SET(s_bpffd,&rfds);
  294. if (select(s_bpffd+1,&rfds,&wfds,&efds,(struct timeval *)0) < 0) {
  295. if ((errno == EAGAIN)||(errno == EINTR)) {
  296. usleep(10);
  297. continue;
  298. }
  299. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_READ_ERROR;
  300. }
  301. if (FD_ISSET(s_bpffd,&rfds)) {
  302. long n = (long)read(s_bpffd,s_pktReadBuf,readPktSize);
  303. if (n > 0) {
  304. for(unsigned char *p=s_pktReadBuf,*eof=p+n;p<eof;) {
  305. struct bpf_hdr *h = (struct bpf_hdr *)p;
  306. if ((h->bh_caplen > 0)&&((p + h->bh_hdrlen + h->bh_caplen) <= eof)) {
  307. uint16_t len = (uint16_t)h->bh_caplen;
  308. iov[0].iov_base = &len;
  309. iov[0].iov_len = 2;
  310. iov[1].iov_base = p + h->bh_hdrlen;
  311. iov[1].iov_len = h->bh_caplen;
  312. writev(STDOUT_FILENO,iov,2);
  313. }
  314. p += BPF_WORDALIGN(h->bh_hdrlen + h->bh_caplen);
  315. }
  316. }
  317. }
  318. if (FD_ISSET(STDIN_FILENO,&rfds)) {
  319. long n = (long)read(STDIN_FILENO,s_stdinReadBuf + stdinReadPtr,sizeof(s_stdinReadBuf) - stdinReadPtr);
  320. if (n > 0) {
  321. stdinReadPtr += n;
  322. while (stdinReadPtr >= 2) {
  323. long len = *((uint16_t *)s_stdinReadBuf);
  324. if (stdinReadPtr >= (len + 2)) {
  325. if (len > 0) {
  326. unsigned char *msg = s_stdinReadBuf + 2;
  327. switch(msg[0]) {
  328. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_PACKET:
  329. if (len > 1) {
  330. if (write(s_ndrvfd,msg+1,len-1) < 0) {
  331. fprintf(stderr,"E inject failed size==%ld errno==%d\n",len-1,errno);
  332. }
  333. }
  334. break;
  335. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_IFCONFIG: {
  336. char *args[16];
  337. args[0] = P_IFCONFIG;
  338. args[1] = s_deviceName;
  339. int argNo = 2;
  340. for(int argPtr=0,k=1,l=(int)len;k<l;++k) {
  341. if (!msg[k]) {
  342. if (argPtr > 0) {
  343. argPtr = 0;
  344. ++argNo;
  345. if (argNo >= 15) {
  346. break;
  347. }
  348. }
  349. } else {
  350. if (argPtr == 0) {
  351. args[argNo] = (char *)(msg + k);
  352. }
  353. argPtr++;
  354. }
  355. }
  356. args[argNo] = (char *)0;
  357. if (argNo > 2) {
  358. pid_t pid = fork();
  359. if (pid < 0) {
  360. return -1;
  361. } else if (pid == 0) {
  362. dup2(STDERR_FILENO,STDOUT_FILENO);
  363. execv(args[0],args);
  364. _exit(-1);
  365. }
  366. int rv = 0;
  367. waitpid(pid,&rv,0);
  368. }
  369. } break;
  370. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_EXIT:
  371. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS;
  372. default:
  373. fprintf(stderr,"E unrecognized message type over pipe from host process: %d (length: %d)\n",(int)msg[0],(int)len);
  374. break;
  375. }
  376. }
  377. if (stdinReadPtr > (len + 2)) {
  378. memmove(s_stdinReadBuf,s_stdinReadBuf + len + 2,stdinReadPtr -= (len + 2));
  379. } else {
  380. stdinReadPtr = 0;
  381. }
  382. } else {
  383. break;
  384. }
  385. }
  386. }
  387. }
  388. }
  389. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS;
  390. }