MacEthernetTapAgent.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. /*
  2. * Copyright (c)2019 ZeroTier, Inc.
  3. *
  4. * Use of this software is governed by the Business Source License included
  5. * in the LICENSE.TXT file in the project's root directory.
  6. *
  7. * Change Date: 2026-01-01
  8. *
  9. * On the date above, in accordance with the Business Source License, use
  10. * of this software will be governed by version 2.0 of the Apache License.
  11. */
  12. /****/
  13. /*
  14. * This creates a pair of feth devices with the lower numbered device
  15. * being the ZeroTier virtual interface and the other being the device
  16. * used to actually read and write packets. The latter gets no IP config
  17. * and is only used for I/O. The behavior of feth is similar to the
  18. * veth pairs that exist on Linux.
  19. *
  20. * The feth device has only existed since MacOS Sierra, but that's fairly
  21. * long ago in Mac terms.
  22. *
  23. * I/O with feth must be done using two different sockets. The BPF socket
  24. * is used to receive packets, while an AF_NDRV (low-level network driver
  25. * access) socket must be used to inject. AF_NDRV can't read IP frames
  26. * since BSD doesn't forward packets out the NDRV tap if they've already
  27. * been handled, and while BPF can inject its MTU for injected packets
  28. * is limited to 2048. AF_NDRV packet injection is required to inject
  29. * ZeroTier's large MTU frames.
  30. *
  31. * All this stuff is basically undocumented. A lot of tracing through
  32. * the Darwin/XNU kernel source was required to figure out how to make
  33. * this actually work.
  34. *
  35. * We hope to develop a DriverKit-based driver in the near-mid future to
  36. * replace this weird hack, but it works for now through Big Sur in our
  37. * testing.
  38. *
  39. * See also:
  40. *
  41. * https://apple.stackexchange.com/questions/337715/fake-ethernet-interfaces-feth-if-fake-anyone-ever-seen-this
  42. * https://opensource.apple.com/source/xnu/xnu-4570.41.2/bsd/net/if_fake.c.auto.html
  43. *
  44. */
  45. #include <stdio.h>
  46. #include <stdlib.h>
  47. #include <string.h>
  48. #include <stdarg.h>
  49. #include <unistd.h>
  50. #include <signal.h>
  51. #include <fcntl.h>
  52. #include <errno.h>
  53. #include <sys/signal.h>
  54. #include <sys/types.h>
  55. #include <sys/stat.h>
  56. #include <sys/ioctl.h>
  57. #include <sys/wait.h>
  58. #include <sys/select.h>
  59. #include <sys/cdefs.h>
  60. #include <sys/uio.h>
  61. #include <sys/param.h>
  62. #include <sys/ioctl.h>
  63. #include <sys/socket.h>
  64. #include <sys/sysctl.h>
  65. #include <sys/resource.h>
  66. #include <netinet/in.h>
  67. #include <arpa/inet.h>
  68. #include <net/bpf.h>
  69. #include <net/route.h>
  70. #include <net/if.h>
  71. #include <net/if_arp.h>
  72. #include <net/if_dl.h>
  73. #include <net/if_media.h>
  74. #include <net/ndrv.h>
  75. #include <netinet/in_var.h>
  76. #include <netinet/icmp6.h>
  77. #include <netinet6/in6_var.h>
  78. #include <netinet6/nd6.h>
  79. #include <ifaddrs.h>
  80. #include "../version.h"
  81. #include "MacEthernetTapAgent.h"
  82. #ifndef SIOCAUTOCONF_START
  83. #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */
  84. #endif
  85. #ifndef SIOCAUTOCONF_STOP
  86. #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */
  87. #endif
  88. #define P_IFCONFIG "/sbin/ifconfig"
  89. static unsigned char s_pktReadBuf[131072] __attribute__ ((__aligned__(16)));
  90. static unsigned char s_stdinReadBuf[131072] __attribute__ ((__aligned__(16)));
  91. static char s_deviceName[IFNAMSIZ];
  92. static char s_peerDeviceName[IFNAMSIZ];
  93. static int s_bpffd = -1;
  94. static int s_ndrvfd = -1;
  95. static pid_t s_parentPid;
  96. static void configureIpv6Parameters(const char *ifname,int performNUD,int acceptRouterAdverts)
  97. {
  98. struct in6_ndireq nd;
  99. struct in6_ifreq ifr;
  100. int s = socket(AF_INET6,SOCK_DGRAM,0);
  101. if (s <= 0)
  102. return;
  103. memset(&nd,0,sizeof(nd));
  104. strncpy(nd.ifname,ifname,sizeof(nd.ifname));
  105. if (ioctl(s,SIOCGIFINFO_IN6,&nd)) {
  106. close(s);
  107. return;
  108. }
  109. unsigned long oldFlags = (unsigned long)nd.ndi.flags;
  110. if (performNUD)
  111. nd.ndi.flags |= ND6_IFF_PERFORMNUD;
  112. else nd.ndi.flags &= ~ND6_IFF_PERFORMNUD;
  113. if (oldFlags != (unsigned long)nd.ndi.flags) {
  114. if (ioctl(s,SIOCSIFINFO_FLAGS,&nd)) {
  115. close(s);
  116. return;
  117. }
  118. }
  119. memset(&ifr,0,sizeof(ifr));
  120. strncpy(ifr.ifr_name,ifname,sizeof(ifr.ifr_name));
  121. if (ioctl(s,acceptRouterAdverts ? SIOCAUTOCONF_START : SIOCAUTOCONF_STOP,&ifr)) {
  122. close(s);
  123. return;
  124. }
  125. close(s);
  126. }
  127. static int run(const char *path,...)
  128. {
  129. va_list ap;
  130. char *args[16];
  131. int argNo = 1;
  132. va_start(ap,path);
  133. args[0] = (char *)path;
  134. for(;argNo<15;++argNo) {
  135. args[argNo] = va_arg(ap,char *);
  136. if (!args[argNo]) {
  137. break;
  138. }
  139. }
  140. args[argNo++] = (char *)0;
  141. va_end(ap);
  142. pid_t pid = fork();
  143. if (pid < 0) {
  144. return -1;
  145. } else if (pid == 0) {
  146. dup2(STDERR_FILENO,STDOUT_FILENO);
  147. execv(args[0],args);
  148. _exit(-1);
  149. }
  150. int rv = 0;
  151. waitpid(pid,&rv,0);
  152. return rv;
  153. }
  154. static void die()
  155. {
  156. if (s_ndrvfd >= 0)
  157. close(s_ndrvfd);
  158. if (s_bpffd >= 0)
  159. close(s_bpffd);
  160. if (s_peerDeviceName[0])
  161. run("/sbin/ifconfig",s_peerDeviceName,"destroy",(char *)0);
  162. if (s_deviceName[0])
  163. run("/sbin/ifconfig",s_deviceName,"destroy",(char *)0);
  164. }
  165. static inline void close_inherited_fds()
  166. {
  167. struct rlimit lim;
  168. getrlimit(RLIMIT_NOFILE, &lim);
  169. for (int i=3,j=(int)lim.rlim_cur;i<j;++i)
  170. close(i);
  171. }
  172. int main(int argc,char **argv)
  173. {
  174. char buf[128];
  175. struct ifreq ifr;
  176. u_int fl;
  177. fd_set rfds,wfds,efds;
  178. struct iovec iov[2];
  179. s_deviceName[0] = 0;
  180. s_peerDeviceName[0] = 0;
  181. s_parentPid = getppid();
  182. atexit(&die);
  183. signal(SIGIO,SIG_IGN);
  184. signal(SIGCHLD,SIG_IGN);
  185. signal(SIGPIPE,SIG_IGN);
  186. signal(SIGUSR1,SIG_IGN);
  187. signal(SIGUSR2,SIG_IGN);
  188. signal(SIGALRM,SIG_IGN);
  189. signal(SIGQUIT,&exit);
  190. signal(SIGTERM,&exit);
  191. signal(SIGKILL,&exit);
  192. signal(SIGINT,&exit);
  193. signal(SIGPIPE,&exit);
  194. close_inherited_fds();
  195. if (getuid() != 0) {
  196. if (setuid(0) != 0) {
  197. fprintf(stderr,"E must be run as root or with root setuid bit on executable\n");
  198. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  199. }
  200. }
  201. if (argc < 5) {
  202. fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n");
  203. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  204. }
  205. const int deviceNo = atoi(argv[1]);
  206. if ((deviceNo < 0)||(deviceNo > 4999)) {
  207. fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n");
  208. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST;
  209. }
  210. const char *mac = argv[2];
  211. const char *mtu = argv[3];
  212. const char *metric = argv[4];
  213. s_ndrvfd = socket(AF_NDRV,SOCK_RAW,0);
  214. if (s_ndrvfd < 0) {
  215. fprintf(stderr,"E unable to open AF_NDRV socket\n");
  216. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  217. }
  218. snprintf(s_deviceName,sizeof(s_deviceName),"feth%d",deviceNo);
  219. snprintf(s_peerDeviceName,sizeof(s_peerDeviceName),"feth%d",deviceNo+5000);
  220. if (run(P_IFCONFIG,s_peerDeviceName,"create",(char *)0) != 0) {
  221. fprintf(stderr,"E unable to create %s\n",s_deviceName);
  222. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  223. }
  224. usleep(10);
  225. if (run(P_IFCONFIG,s_deviceName,"create",(char *)0) != 0) {
  226. fprintf(stderr,"E unable to create %s\n",s_deviceName);
  227. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  228. }
  229. run(P_IFCONFIG,s_deviceName,"lladdr",mac,(char *)0);
  230. usleep(10);
  231. run(P_IFCONFIG,s_peerDeviceName,"peer",s_deviceName,(char *)0);
  232. usleep(10);
  233. run(P_IFCONFIG,s_peerDeviceName,"mtu",mtu,"up",(char *)0);
  234. usleep(10);
  235. run(P_IFCONFIG,s_deviceName,"mtu",mtu,"metric",metric,"up",(char *)0);
  236. usleep(10);
  237. configureIpv6Parameters(s_deviceName,1,0);
  238. usleep(10);
  239. struct sockaddr_ndrv nd;
  240. nd.snd_len = sizeof(struct sockaddr_ndrv);
  241. nd.snd_family = AF_NDRV;
  242. memcpy(nd.snd_name,s_peerDeviceName,sizeof(nd.snd_name));
  243. if (bind(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) {
  244. fprintf(stderr,"E unable to bind AF_NDRV socket\n");
  245. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  246. }
  247. if (connect(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) {
  248. fprintf(stderr,"E unable to connect AF_NDRV socket\n");
  249. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  250. }
  251. /* Start at /dev/bpf1 since some simple bpf-using net utilities hard-code /dev/bpf0.
  252. * Things like libpcap are smart enough to search. */
  253. for(int bpfno=1;bpfno<5000;++bpfno) {
  254. char tmp[32];
  255. snprintf(tmp,sizeof(tmp),"/dev/bpf%d",bpfno);
  256. s_bpffd = open(tmp,O_RDWR);
  257. if (s_bpffd >= 0) {
  258. break;
  259. }
  260. }
  261. if (s_bpffd < 0) {
  262. fprintf(stderr,"E unable to open bpf device\n");
  263. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  264. }
  265. fl = sizeof(s_pktReadBuf);
  266. if (ioctl(s_bpffd,BIOCSBLEN,&fl) != 0) {
  267. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  268. }
  269. const size_t readPktSize = (size_t)fl;
  270. fl = 1;
  271. if (ioctl(s_bpffd,BIOCIMMEDIATE,&fl) != 0) {
  272. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  273. }
  274. fl = 0;
  275. if (ioctl(s_bpffd,BIOCSSEESENT,&fl) != 0) {
  276. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  277. }
  278. memset(&ifr,0,sizeof(ifr));
  279. memcpy(ifr.ifr_name,s_peerDeviceName,IFNAMSIZ);
  280. if (ioctl(s_bpffd,BIOCSETIF,&ifr) != 0) {
  281. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  282. }
  283. fl = 1;
  284. if (ioctl(s_bpffd,BIOCSHDRCMPLT,&fl) != 0) {
  285. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  286. }
  287. fl = 1;
  288. if (ioctl(s_bpffd,BIOCPROMISC,&fl) != 0) {
  289. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE;
  290. }
  291. fprintf(stderr,"I %s %s %d.%d.%d.%d\n",s_deviceName,s_peerDeviceName,ZEROTIER_ONE_VERSION_MAJOR,ZEROTIER_ONE_VERSION_MINOR,ZEROTIER_ONE_VERSION_REVISION,ZEROTIER_ONE_VERSION_BUILD);
  292. FD_ZERO(&rfds);
  293. FD_ZERO(&wfds);
  294. FD_ZERO(&efds);
  295. long stdinReadPtr = 0;
  296. for(;;) {
  297. FD_SET(STDIN_FILENO,&rfds);
  298. FD_SET(s_bpffd,&rfds);
  299. if (select(s_bpffd+1,&rfds,&wfds,&efds,(struct timeval *)0) < 0) {
  300. if ((errno == EAGAIN)||(errno == EINTR)) {
  301. usleep(10);
  302. continue;
  303. }
  304. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_READ_ERROR;
  305. }
  306. if (FD_ISSET(s_bpffd,&rfds)) {
  307. long n = (long)read(s_bpffd,s_pktReadBuf,readPktSize);
  308. if (n > 0) {
  309. for(unsigned char *p=s_pktReadBuf,*eof=p+n;p<eof;) {
  310. struct bpf_hdr *h = (struct bpf_hdr *)p;
  311. if ((h->bh_caplen > 0)&&((p + h->bh_hdrlen + h->bh_caplen) <= eof)) {
  312. uint16_t len = (uint16_t)h->bh_caplen;
  313. iov[0].iov_base = &len;
  314. iov[0].iov_len = 2;
  315. iov[1].iov_base = p + h->bh_hdrlen;
  316. iov[1].iov_len = h->bh_caplen;
  317. writev(STDOUT_FILENO,iov,2);
  318. }
  319. p += BPF_WORDALIGN(h->bh_hdrlen + h->bh_caplen);
  320. }
  321. }
  322. }
  323. if (FD_ISSET(STDIN_FILENO,&rfds)) {
  324. long n = (long)read(STDIN_FILENO,s_stdinReadBuf + stdinReadPtr,sizeof(s_stdinReadBuf) - stdinReadPtr);
  325. if (n > 0) {
  326. stdinReadPtr += n;
  327. while (stdinReadPtr >= 2) {
  328. long len = *((uint16_t *)s_stdinReadBuf);
  329. if (stdinReadPtr >= (len + 2)) {
  330. if (len > 0) {
  331. unsigned char *msg = s_stdinReadBuf + 2;
  332. switch(msg[0]) {
  333. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_PACKET:
  334. if (len > 1) {
  335. if (write(s_ndrvfd,msg+1,len-1) < 0) {
  336. fprintf(stderr,"E inject failed size==%ld errno==%d\n",len-1,errno);
  337. }
  338. }
  339. break;
  340. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_IFCONFIG: {
  341. char *args[16];
  342. args[0] = P_IFCONFIG;
  343. args[1] = s_deviceName;
  344. int argNo = 2;
  345. for(int argPtr=0,k=1,l=(int)len;k<l;++k) {
  346. if (!msg[k]) {
  347. if (argPtr > 0) {
  348. argPtr = 0;
  349. ++argNo;
  350. if (argNo >= 15) {
  351. break;
  352. }
  353. }
  354. } else {
  355. if (argPtr == 0) {
  356. args[argNo] = (char *)(msg + k);
  357. }
  358. argPtr++;
  359. }
  360. }
  361. args[argNo] = (char *)0;
  362. if (argNo > 2) {
  363. pid_t pid = fork();
  364. if (pid < 0) {
  365. return -1;
  366. } else if (pid == 0) {
  367. dup2(STDERR_FILENO,STDOUT_FILENO);
  368. execv(args[0],args);
  369. _exit(-1);
  370. }
  371. int rv = 0;
  372. waitpid(pid,&rv,0);
  373. }
  374. } break;
  375. case ZT_MACETHERNETTAPAGENT_STDIN_CMD_EXIT:
  376. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS;
  377. default:
  378. fprintf(stderr,"E unrecognized message type over pipe from host process: %d (length: %d)\n",(int)msg[0],(int)len);
  379. break;
  380. }
  381. }
  382. if (stdinReadPtr > (len + 2)) {
  383. memmove(s_stdinReadBuf,s_stdinReadBuf + len + 2,stdinReadPtr -= (len + 2));
  384. } else {
  385. stdinReadPtr = 0;
  386. }
  387. } else {
  388. break;
  389. }
  390. }
  391. }
  392. }
  393. }
  394. return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS;
  395. }