NativeSocketManager.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994
  1. /*
  2. * ZeroTier One - Global Peer to Peer Ethernet
  3. * Copyright (C) 2011-2014 ZeroTier Networks LLC
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. * --
  19. *
  20. * ZeroTier may be used and distributed under the terms of the GPLv3, which
  21. * are available at: http://www.gnu.org/licenses/gpl-3.0.html
  22. *
  23. * If you would like to embed ZeroTier into a commercial application or
  24. * redistribute it in a modified binary form, please contact ZeroTier Networks
  25. * LLC. Start here: http://www.zerotier.com/
  26. */
  27. /* Native SocketManager for Windows and Unix */
  28. #include <stdio.h>
  29. #include <string.h>
  30. #include <stdlib.h>
  31. #include <fcntl.h>
  32. #include <time.h>
  33. #include <sys/types.h>
  34. #include <algorithm>
  35. #include "../node/Constants.hpp"
  36. #include "NativeSocketManager.hpp"
  37. #ifndef __WINDOWS__
  38. #include <errno.h>
  39. #include <unistd.h>
  40. #include <sys/socket.h>
  41. #include <arpa/inet.h>
  42. #include <signal.h>
  43. #include <netinet/in.h>
  44. #include <netinet/tcp.h>
  45. #endif // !__WINDOWS__
  46. // Uncomment to turn off TCP Nagle
  47. //#define ZT_TCP_NODELAY
  48. // Allow us to use the same value on Windows and *nix
  49. #ifndef INVALID_SOCKET
  50. #define INVALID_SOCKET (-1)
  51. #endif
  52. #ifdef __WINDOWS__
  53. #define CLOSE_SOCKET(s) ::closesocket(s)
  54. #else
  55. #define CLOSE_SOCKET(s) ::close(s)
  56. #endif
  57. namespace ZeroTier {
  58. //////////////////////////////////////////////////////////////////////////////
  59. // Socket implementations
  60. //////////////////////////////////////////////////////////////////////////////
  61. class NativeSocket : public Socket
  62. {
  63. public:
  64. #ifdef __WINDOWS__
  65. NativeSocket(const Type &t,SOCKET s) : Socket(t),_sock(s) {}
  66. SOCKET _sock;
  67. #else
  68. NativeSocket(const Type &t,int s) : Socket(t),_sock(s) {}
  69. int _sock;
  70. #endif
  71. virtual bool notifyAvailableForRead(const SharedPtr<Socket> &self,NativeSocketManager *sm) = 0;
  72. virtual bool notifyAvailableForWrite(const SharedPtr<Socket> &self,NativeSocketManager *sm) = 0;
  73. };
  74. /**
  75. * Native UDP socket
  76. */
  77. class NativeUdpSocket : public NativeSocket
  78. {
  79. public:
  80. #ifdef __WINDOWS__
  81. NativeUdpSocket(Type t,SOCKET s) : NativeSocket(t,s) {}
  82. #else
  83. NativeUdpSocket(Type t,int s) : NativeSocket(t,s) {}
  84. #endif
  85. virtual ~NativeUdpSocket()
  86. {
  87. #ifdef __WINDOWS__
  88. ::closesocket(_sock);
  89. #else
  90. ::close(_sock);
  91. #endif
  92. }
  93. virtual bool send(const InetAddress &to,const void *msg,unsigned int msglen)
  94. {
  95. if (to.isV6()) {
  96. #ifdef __WINDOWS__
  97. return ((int)sendto(_sock,(const char *)msg,msglen,0,to.saddr(),to.saddrLen()) == (int)msglen);
  98. #else
  99. return ((int)sendto(_sock,msg,msglen,0,to.saddr(),to.saddrLen()) == (int)msglen);
  100. #endif
  101. } else {
  102. #ifdef __WINDOWS__
  103. return ((int)sendto(_sock,(const char *)msg,msglen,0,to.saddr(),to.saddrLen()) == (int)msglen);
  104. #else
  105. return ((int)sendto(_sock,msg,msglen,0,to.saddr(),to.saddrLen()) == (int)msglen);
  106. #endif
  107. }
  108. }
  109. virtual bool notifyAvailableForRead(const SharedPtr<Socket> &self,NativeSocketManager *sm)
  110. {
  111. Buffer<ZT_SOCKET_MAX_MESSAGE_LEN> buf;
  112. InetAddress from;
  113. socklen_t salen = from.saddrSpaceLen();
  114. int n = (int)recvfrom(_sock,(char *)(buf.data()),ZT_SOCKET_MAX_MESSAGE_LEN,0,from.saddr(),&salen);
  115. if (n > 0) {
  116. buf.setSize((unsigned int)n);
  117. sm->handleReceivedPacket(self,from,buf);
  118. }
  119. return true;
  120. }
  121. virtual bool notifyAvailableForWrite(const SharedPtr<Socket> &self,NativeSocketManager *sm)
  122. {
  123. return true;
  124. }
  125. };
  126. /**
  127. * A TCP socket encapsulating ZeroTier packets over a TCP stream connection
  128. *
  129. * This implements a simple packet encapsulation that is designed to look like
  130. * a TLS connection. It's not a TLS connection, but it sends TLS format record
  131. * headers. It could be extended in the future to implement a fake TLS
  132. * handshake.
  133. *
  134. * At the moment, each packet is just made to look like TLS application data:
  135. * <[1] TLS content type> - currently 0x17 for "application data"
  136. * <[1] TLS major version> - currently 0x03 for TLS 1.2
  137. * <[1] TLS minor version> - currently 0x03 for TLS 1.2
  138. * <[2] payload length> - 16-bit length of payload in bytes
  139. * <[...] payload> - Message payload
  140. *
  141. * The primary purpose of TCP sockets is to work over ports like HTTPS(443),
  142. * allowing users behind particularly fascist firewalls to at least reach
  143. * ZeroTier's supernodes. UDP is the preferred method of communication as
  144. * encapsulating L2 and L3 protocols over TCP is inherently inefficient
  145. * due to double-ACKs. So TCP is only used as a fallback.
  146. */
  147. class NativeTcpSocket : public NativeSocket
  148. {
  149. public:
  150. #ifdef __WINDOWS__
  151. NativeTcpSocket(NativeSocketManager *sm,SOCKET s,Socket::Type t,bool c,const InetAddress &r) :
  152. #else
  153. NativeTcpSocket(NativeSocketManager *sm,int s,Socket::Type t,bool c,const InetAddress &r) :
  154. #endif
  155. NativeSocket(t,s),
  156. _lastActivity(Utils::now()),
  157. _sm(sm),
  158. _inptr(0),
  159. _outptr(0),
  160. _connecting(c),
  161. _remote(r) {}
  162. virtual ~NativeTcpSocket()
  163. {
  164. #ifdef __WINDOWS__
  165. ::closesocket(_sock);
  166. #else
  167. ::close(_sock);
  168. #endif
  169. }
  170. virtual bool send(const InetAddress &to,const void *msg,unsigned int msglen)
  171. {
  172. if (msglen > ZT_SOCKET_MAX_MESSAGE_LEN)
  173. return false; // message too big
  174. if (!msglen)
  175. return true; // sanity check
  176. Mutex::Lock _l(_writeLock);
  177. bool writeInProgress = ((_outptr != 0)||(_connecting));
  178. if ((_outptr + 5 + msglen) > (unsigned int)sizeof(_outbuf))
  179. return false;
  180. _outbuf[_outptr++] = 0x17; // look like TLS data
  181. _outbuf[_outptr++] = 0x03;
  182. _outbuf[_outptr++] = 0x03; // look like TLS 1.2
  183. _outbuf[_outptr++] = (unsigned char)((msglen >> 8) & 0xff);
  184. _outbuf[_outptr++] = (unsigned char)(msglen & 0xff);
  185. for(unsigned int i=0;i<msglen;++i)
  186. _outbuf[_outptr++] = ((const unsigned char *)msg)[i];
  187. if (!writeInProgress) {
  188. // If no output was enqueued before this, try to send() it and then
  189. // start a queued write if any remains after that.
  190. int n = (int)::send(_sock,(const char *)_outbuf,_outptr,0);
  191. if (n > 0)
  192. memmove(_outbuf,_outbuf + (unsigned int)n,_outptr -= (unsigned int)n);
  193. if (_outptr) {
  194. _sm->_startNotifyWrite(this);
  195. _sm->whack();
  196. }
  197. } // else just leave in _outbuf[] to get written when stream is available for write
  198. return true;
  199. }
  200. virtual bool notifyAvailableForRead(const SharedPtr<Socket> &self,NativeSocketManager *sm)
  201. {
  202. unsigned char buf[65536];
  203. int n = (int)::recv(_sock,(char *)buf,sizeof(buf),0);
  204. if (n <= 0)
  205. return false; // read error, stream probably closed
  206. unsigned int p = _inptr,pl = 0;
  207. for(int k=0;k<n;++k) {
  208. _inbuf[p++] = buf[k];
  209. if (p >= (int)sizeof(_inbuf))
  210. return false; // read overrun, packet too large or invalid
  211. if ((!pl)&&(p >= 5)) {
  212. if (_inbuf[0] == 0x17) {
  213. // fake TLS data frame, next two bytes are TLS version and are ignored
  214. pl = (((unsigned int)_inbuf[3] << 8) | (unsigned int)_inbuf[4]) + 5;
  215. } else return false; // in the future we may support fake TLS handshakes
  216. }
  217. if ((pl)&&(p >= pl)) {
  218. Buffer<ZT_SOCKET_MAX_MESSAGE_LEN> data(_inbuf + 5,pl - 5);
  219. memmove(_inbuf,_inbuf + pl,p -= pl);
  220. try {
  221. sm->handleReceivedPacket(self,_remote,data);
  222. } catch ( ... ) {} // handlers should not throw
  223. pl = 0;
  224. }
  225. }
  226. _inptr = p;
  227. return true;
  228. }
  229. virtual bool notifyAvailableForWrite(const SharedPtr<Socket> &self,NativeSocketManager *sm)
  230. {
  231. Mutex::Lock _l(_writeLock);
  232. if (_connecting)
  233. _connecting = false;
  234. if (_outptr) {
  235. int n = (int)::send(_sock,(const char *)_outbuf,_outptr,0);
  236. #ifdef __WINDOWS__
  237. if (n == SOCKET_ERROR) {
  238. switch(WSAGetLastError()) {
  239. case WSAEINTR:
  240. case WSAEWOULDBLOCK:
  241. break;
  242. default:
  243. return false;
  244. }
  245. #else
  246. if (n <= 0) {
  247. switch(errno) {
  248. #ifdef EAGAIN
  249. case EAGAIN:
  250. #endif
  251. #if defined(EWOULDBLOCK) && ( !defined(EAGAIN) || (EWOULDBLOCK != EAGAIN) )
  252. case EWOULDBLOCK:
  253. #endif
  254. #ifdef EINTR
  255. case EINTR:
  256. #endif
  257. break;
  258. default:
  259. return false;
  260. }
  261. #endif
  262. } else memmove(_outbuf,_outbuf + (unsigned int)n,_outptr -= (unsigned int)n);
  263. }
  264. if (!_outptr)
  265. sm->_stopNotifyWrite(this);
  266. return true;
  267. }
  268. unsigned char _inbuf[ZT_SOCKET_MAX_MESSAGE_LEN];
  269. unsigned char _outbuf[ZT_SOCKET_MAX_MESSAGE_LEN * 4];
  270. uint64_t _lastActivity; // updated whenever data is received, checked directly by SocketManager for stale TCP cleanup
  271. NativeSocketManager *_sm;
  272. unsigned int _inptr;
  273. unsigned int _outptr;
  274. bool _connecting; // manipulated directly by SocketManager, true if connect() is in progress
  275. InetAddress _remote;
  276. Mutex _writeLock;
  277. };
  278. //////////////////////////////////////////////////////////////////////////////
  279. #ifdef __WINDOWS__
  280. // hack copied from StackOverflow, behaves a bit like pipe() on *nix systems
  281. static inline void winPipeHack(SOCKET fds[2])
  282. {
  283. struct sockaddr_in inaddr;
  284. struct sockaddr addr;
  285. SOCKET lst=::socket(AF_INET, SOCK_STREAM,IPPROTO_TCP);
  286. memset(&inaddr, 0, sizeof(inaddr));
  287. memset(&addr, 0, sizeof(addr));
  288. inaddr.sin_family = AF_INET;
  289. inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
  290. inaddr.sin_port = 0;
  291. int yes=1;
  292. setsockopt(lst,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes));
  293. bind(lst,(struct sockaddr *)&inaddr,sizeof(inaddr));
  294. listen(lst,1);
  295. int len=sizeof(inaddr);
  296. getsockname(lst, &addr,&len);
  297. fds[0]=::socket(AF_INET, SOCK_STREAM,0);
  298. connect(fds[0],&addr,len);
  299. fds[1]=accept(lst,0,0);
  300. closesocket(lst);
  301. }
  302. #endif
  303. NativeSocketManager::NativeSocketManager(
  304. int localUdpPort,
  305. int localTcpPort,
  306. void (*packetHandler)(const SharedPtr<Socket> &,void *,const InetAddress &,Buffer<ZT_SOCKET_MAX_MESSAGE_LEN> &),
  307. void *arg) :
  308. SocketManager(packetHandler,arg),
  309. _whackSendPipe(INVALID_SOCKET),
  310. _whackReceivePipe(INVALID_SOCKET),
  311. _tcpV4ListenSocket(INVALID_SOCKET),
  312. _tcpV6ListenSocket(INVALID_SOCKET),
  313. _nfds(0)
  314. {
  315. FD_ZERO(&_readfds);
  316. FD_ZERO(&_writefds);
  317. // Create a pipe or socket pair that can be used to interrupt select()
  318. #ifdef __WINDOWS__
  319. {
  320. SOCKET tmps[2] = { INVALID_SOCKET,INVALID_SOCKET };
  321. winPipeHack(tmps);
  322. _whackSendPipe = tmps[0];
  323. _whackReceivePipe = tmps[1];
  324. u_long iMode=1;
  325. ioctlsocket(tmps[1],FIONBIO,&iMode);
  326. }
  327. #else
  328. {
  329. int tmpfds[2];
  330. if (::pipe(tmpfds))
  331. throw std::runtime_error("pipe() failed");
  332. _whackSendPipe = tmpfds[1];
  333. _whackReceivePipe = tmpfds[0];
  334. fcntl(_whackReceivePipe,F_SETFL,O_NONBLOCK);
  335. }
  336. #endif
  337. FD_SET(_whackReceivePipe,&_readfds);
  338. if (localTcpPort > 0) {
  339. if (localTcpPort > 0xffff) {
  340. _closeSockets();
  341. throw std::runtime_error("invalid local TCP port number");
  342. }
  343. { // bind TCP IPv6
  344. _tcpV6ListenSocket = ::socket(AF_INET6,SOCK_STREAM,0);
  345. #ifdef __WINDOWS__
  346. if (_tcpV6ListenSocket != INVALID_SOCKET) {
  347. {
  348. BOOL f;
  349. f = TRUE; ::setsockopt(_tcpV6ListenSocket,IPPROTO_IPV6,IPV6_V6ONLY,(const char *)&f,sizeof(f));
  350. f = TRUE; ::setsockopt(_tcpV6ListenSocket,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f));
  351. u_long iMode=1;
  352. ioctlsocket(_tcpV6ListenSocket,FIONBIO,&iMode);
  353. }
  354. #else
  355. if (_tcpV6ListenSocket > 0) {
  356. {
  357. int f;
  358. f = 1; ::setsockopt(_tcpV6ListenSocket,IPPROTO_IPV6,IPV6_V6ONLY,(void *)&f,sizeof(f));
  359. f = 1; ::setsockopt(_tcpV6ListenSocket,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f));
  360. fcntl(_tcpV6ListenSocket,F_SETFL,O_NONBLOCK);
  361. }
  362. #endif // __WINDOWS__ / not __WINDOWS__
  363. struct sockaddr_in6 sin6;
  364. memset(&sin6,0,sizeof(sin6));
  365. sin6.sin6_family = AF_INET6;
  366. sin6.sin6_port = htons(localTcpPort);
  367. memcpy(&(sin6.sin6_addr),&in6addr_any,sizeof(struct in6_addr));
  368. if (::bind(_tcpV6ListenSocket,(const struct sockaddr *)&sin6,sizeof(sin6))) {
  369. _closeSockets();
  370. throw std::runtime_error("unable to bind to local TCP port");
  371. }
  372. if (::listen(_tcpV6ListenSocket,16)) {
  373. _closeSockets();
  374. throw std::runtime_error("listen() failed");
  375. }
  376. FD_SET(_tcpV6ListenSocket,&_readfds);
  377. }
  378. }
  379. { // bind TCP IPv4
  380. _tcpV4ListenSocket = ::socket(AF_INET,SOCK_STREAM,0);
  381. #ifdef __WINDOWS__
  382. if (_tcpV4ListenSocket == INVALID_SOCKET) {
  383. #else
  384. if (_tcpV4ListenSocket <= 0) {
  385. #endif
  386. _closeSockets();
  387. throw std::runtime_error("unable to create IPv4 SOCK_STREAM socket");
  388. }
  389. #ifdef __WINDOWS__
  390. {
  391. BOOL f = TRUE; ::setsockopt(_tcpV4ListenSocket,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f));
  392. u_long iMode=1;
  393. ioctlsocket(_tcpV4ListenSocket,FIONBIO,&iMode);
  394. }
  395. #else
  396. {
  397. int f = 1; ::setsockopt(_tcpV4ListenSocket,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f));
  398. fcntl(_tcpV4ListenSocket,F_SETFL,O_NONBLOCK);
  399. }
  400. #endif
  401. struct sockaddr_in sin4;
  402. memset(&sin4,0,sizeof(sin4));
  403. sin4.sin_family = AF_INET;
  404. sin4.sin_port = htons(localTcpPort);
  405. sin4.sin_addr.s_addr = INADDR_ANY;
  406. if (::bind(_tcpV4ListenSocket,(const struct sockaddr *)&sin4,sizeof(sin4))) {
  407. _closeSockets();
  408. throw std::runtime_error("unable to bind to local TCP port");
  409. }
  410. if (::listen(_tcpV4ListenSocket,16)) {
  411. _closeSockets();
  412. throw std::runtime_error("listen() failed");
  413. }
  414. FD_SET(_tcpV4ListenSocket,&_readfds);
  415. }
  416. }
  417. if (localUdpPort > 0) {
  418. if (localUdpPort > 0xffff) {
  419. _closeSockets();
  420. throw std::runtime_error("invalid local UDP port number");
  421. }
  422. { // bind UDP IPv6
  423. #ifdef __WINDOWS__
  424. SOCKET s = ::socket(AF_INET6,SOCK_DGRAM,0);
  425. if (s != INVALID_SOCKET) {
  426. #else
  427. int s = ::socket(AF_INET6,SOCK_DGRAM,0);
  428. if (s > 0) {
  429. #endif
  430. {
  431. int bs = 1048576;
  432. while (bs >= 65536) {
  433. int tmpbs = bs;
  434. if (setsockopt(s,SOL_SOCKET,SO_RCVBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0)
  435. break;
  436. bs -= 16384;
  437. }
  438. bs = 1048576;
  439. while (bs >= 65536) {
  440. int tmpbs = bs;
  441. if (setsockopt(s,SOL_SOCKET,SO_SNDBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0)
  442. break;
  443. bs -= 16384;
  444. }
  445. #ifdef __WINDOWS__
  446. BOOL f;
  447. f = TRUE; setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(const char *)&f,sizeof(f));
  448. f = FALSE; setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f));
  449. f = FALSE; setsockopt(s,IPPROTO_IPV6,IPV6_DONTFRAG,(const char *)&f,sizeof(f));
  450. f = TRUE; setsockopt(s,SOL_SOCKET,SO_BROADCAST,(const char *)&f,sizeof(f));
  451. #else
  452. int f;
  453. f = 1; setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(void *)&f,sizeof(f));
  454. f = 0; setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f));
  455. f = 1; setsockopt(s,SOL_SOCKET,SO_BROADCAST,(void *)&f,sizeof(f));
  456. #ifdef IP_DONTFRAG
  457. f = 0; setsockopt(s,IPPROTO_IP,IP_DONTFRAG,&f,sizeof(f));
  458. #endif
  459. #ifdef IP_MTU_DISCOVER
  460. f = 0; setsockopt(s,IPPROTO_IP,IP_MTU_DISCOVER,&f,sizeof(f));
  461. #endif
  462. #ifdef IPV6_MTU_DISCOVER
  463. f = 0; setsockopt(s,IPPROTO_IPV6,IPV6_MTU_DISCOVER,&f,sizeof(f));
  464. #endif
  465. #endif
  466. }
  467. struct sockaddr_in6 sin6;
  468. memset(&sin6,0,sizeof(sin6));
  469. sin6.sin6_family = AF_INET6;
  470. sin6.sin6_port = htons(localUdpPort);
  471. memcpy(&(sin6.sin6_addr),&in6addr_any,sizeof(struct in6_addr));
  472. if (::bind(s,(const struct sockaddr *)&sin6,sizeof(sin6))) {
  473. CLOSE_SOCKET(s);
  474. _closeSockets();
  475. throw std::runtime_error("unable to bind to port");
  476. }
  477. _udpV6Socket = SharedPtr<Socket>(new NativeUdpSocket(Socket::ZT_SOCKET_TYPE_UDP_V6,s));
  478. #ifdef __WINDOWS__
  479. u_long iMode=1;
  480. ioctlsocket(s,FIONBIO,&iMode);
  481. #else
  482. fcntl(s,F_SETFL,O_NONBLOCK);
  483. #endif
  484. FD_SET(s,&_readfds);
  485. }
  486. }
  487. { // bind UDP IPv4
  488. #ifdef __WINDOWS__
  489. SOCKET s = ::socket(AF_INET,SOCK_DGRAM,0);
  490. if (s == INVALID_SOCKET) {
  491. _closeSockets();
  492. throw std::runtime_error("unable to create IPv4 SOCK_DGRAM socket");
  493. }
  494. #else
  495. int s = ::socket(AF_INET,SOCK_DGRAM,0);
  496. if (s <= 0) {
  497. _closeSockets();
  498. throw std::runtime_error("unable to create IPv4 SOCK_DGRAM socket");
  499. }
  500. #endif
  501. {
  502. int bs = 1048576;
  503. while (bs >= 65536) {
  504. int tmpbs = bs;
  505. if (setsockopt(s,SOL_SOCKET,SO_RCVBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0)
  506. break;
  507. bs -= 16384;
  508. }
  509. bs = 1048576;
  510. while (bs >= 65536) {
  511. int tmpbs = bs;
  512. if (setsockopt(s,SOL_SOCKET,SO_SNDBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0)
  513. break;
  514. bs -= 16384;
  515. }
  516. #ifdef __WINDOWS__
  517. BOOL f;
  518. f = FALSE; setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f));
  519. f = FALSE; setsockopt(s,IPPROTO_IP,IP_DONTFRAGMENT,(const char *)&f,sizeof(f));
  520. f = TRUE; setsockopt(s,SOL_SOCKET,SO_BROADCAST,(const char *)&f,sizeof(f));
  521. #else
  522. int f;
  523. f = 0; setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f));
  524. f = 1; setsockopt(s,SOL_SOCKET,SO_BROADCAST,(void *)&f,sizeof(f));
  525. #ifdef IP_DONTFRAG
  526. f = 0; setsockopt(s,IPPROTO_IP,IP_DONTFRAG,&f,sizeof(f));
  527. #endif
  528. #ifdef IP_MTU_DISCOVER
  529. f = 0; setsockopt(s,IPPROTO_IP,IP_MTU_DISCOVER,&f,sizeof(f));
  530. #endif
  531. #endif
  532. }
  533. struct sockaddr_in sin4;
  534. memset(&sin4,0,sizeof(sin4));
  535. sin4.sin_family = AF_INET;
  536. sin4.sin_port = htons(localUdpPort);
  537. sin4.sin_addr.s_addr = INADDR_ANY;
  538. if (::bind(s,(const struct sockaddr *)&sin4,sizeof(sin4))) {
  539. CLOSE_SOCKET(s);
  540. _closeSockets();
  541. throw std::runtime_error("unable to bind to port");
  542. }
  543. _udpV4Socket = SharedPtr<Socket>(new NativeUdpSocket(Socket::ZT_SOCKET_TYPE_UDP_V4,s));
  544. #ifdef __WINDOWS__
  545. u_long iMode=1;
  546. ioctlsocket(s,FIONBIO,&iMode);
  547. #else
  548. fcntl(s,F_SETFL,O_NONBLOCK);
  549. #endif
  550. FD_SET(s,&_readfds);
  551. }
  552. }
  553. _updateNfds();
  554. }
  555. NativeSocketManager::~NativeSocketManager()
  556. {
  557. Mutex::Lock _l(_pollLock);
  558. _closeSockets();
  559. }
  560. bool NativeSocketManager::send(const InetAddress &to,bool tcp,bool autoConnectTcp,const void *msg,unsigned int msglen)
  561. {
  562. if (tcp) {
  563. SharedPtr<Socket> ts;
  564. {
  565. Mutex::Lock _l(_tcpSockets_m);
  566. std::map< InetAddress,SharedPtr<Socket> >::iterator opents(_tcpSockets.find(to));
  567. if (opents != _tcpSockets.end())
  568. ts = opents->second;
  569. }
  570. if (ts)
  571. return ts->send(to,msg,msglen);
  572. if (!autoConnectTcp)
  573. return false;
  574. #ifdef __WINDOWS__
  575. SOCKET s = ::socket(to.isV4() ? AF_INET : AF_INET6,SOCK_STREAM,0);
  576. if (s == INVALID_SOCKET)
  577. return false;
  578. { u_long iMode=1; ioctlsocket(s,FIONBIO,&iMode); }
  579. #ifdef ZT_TCP_NODELAY
  580. { BOOL f = TRUE; setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  581. #endif
  582. #else
  583. int s = ::socket(to.isV4() ? AF_INET : AF_INET6,SOCK_STREAM,0);
  584. if (s <= 0)
  585. return false;
  586. if (s >= FD_SETSIZE) {
  587. ::close(s);
  588. return false;
  589. }
  590. fcntl(s,F_SETFL,O_NONBLOCK);
  591. #ifdef ZT_TCP_NODELAY
  592. { int f = 1; setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  593. #endif
  594. #endif
  595. bool connecting = false;
  596. if (::connect(s,to.saddr(),to.saddrLen())) {
  597. #ifdef __WINDOWS__
  598. if (WSAGetLastError() != WSAEWOULDBLOCK) {
  599. #else
  600. if (errno != EINPROGRESS) {
  601. #endif
  602. CLOSE_SOCKET(s);
  603. return false;
  604. } else connecting = true;
  605. }
  606. ts = SharedPtr<Socket>(new NativeTcpSocket(this,s,Socket::ZT_SOCKET_TYPE_TCP_OUT,connecting,to));
  607. if (!ts->send(to,msg,msglen)) {
  608. _fdSetLock.lock();
  609. FD_CLR(s,&_readfds);
  610. FD_CLR(s,&_writefds);
  611. _fdSetLock.unlock();
  612. return false;
  613. }
  614. {
  615. Mutex::Lock _l(_tcpSockets_m);
  616. _tcpSockets[to] = ts;
  617. }
  618. _fdSetLock.lock();
  619. FD_SET(s,&_readfds);
  620. if (connecting)
  621. FD_SET(s,&_writefds);
  622. _fdSetLock.unlock();
  623. _updateNfds();
  624. whack();
  625. return true;
  626. } else if (to.isV4()) {
  627. if (_udpV4Socket)
  628. return _udpV4Socket->send(to,msg,msglen);
  629. } else if (to.isV6()) {
  630. if (_udpV6Socket)
  631. return _udpV6Socket->send(to,msg,msglen);
  632. }
  633. return false;
  634. }
  635. void NativeSocketManager::poll(unsigned long timeout)
  636. {
  637. fd_set rfds,wfds,efds;
  638. struct timeval tv;
  639. std::vector< SharedPtr<Socket> > ts;
  640. #ifdef __WINDOWS__
  641. SOCKET sockfd;
  642. #else
  643. int sockfd;
  644. #endif
  645. Mutex::Lock _l(_pollLock);
  646. _fdSetLock.lock();
  647. memcpy(&rfds,&_readfds,sizeof(rfds));
  648. memcpy(&wfds,&_writefds,sizeof(wfds));
  649. _fdSetLock.unlock();
  650. FD_ZERO(&efds);
  651. #ifdef __WINDOWS__
  652. // Windows signals failed connects in exceptfds
  653. {
  654. Mutex::Lock _l2(_tcpSockets_m);
  655. for(std::map< InetAddress,SharedPtr<Socket> >::iterator s(_tcpSockets.begin());s!=_tcpSockets.end();++s) {
  656. if (((TcpSocket *)s->second.ptr())->_connecting)
  657. FD_SET(s->second->_sock,&efds);
  658. }
  659. }
  660. #endif
  661. tv.tv_sec = (long)(timeout / 1000);
  662. tv.tv_usec = (long)((timeout % 1000) * 1000);
  663. select(_nfds + 1,&rfds,&wfds,&efds,(timeout > 0) ? &tv : (struct timeval *)0);
  664. if (FD_ISSET(_whackReceivePipe,&rfds)) {
  665. char tmp[16];
  666. #ifdef __WINDOWS__
  667. ::recv(_whackReceivePipe,tmp,16,0);
  668. #else
  669. ::read(_whackReceivePipe,tmp,16);
  670. #endif
  671. }
  672. if ((_tcpV4ListenSocket != INVALID_SOCKET)&&(FD_ISSET(_tcpV4ListenSocket,&rfds))) {
  673. struct sockaddr_in from;
  674. socklen_t fromlen = sizeof(from);
  675. sockfd = accept(_tcpV4ListenSocket,(struct sockaddr *)&from,&fromlen);
  676. #ifdef __WINDOWS__
  677. if (sockfd != INVALID_SOCKET) {
  678. #else
  679. if (sockfd > 0) {
  680. if (sockfd < FD_SETSIZE) {
  681. #endif
  682. InetAddress fromia((const struct sockaddr *)&from);
  683. Mutex::Lock _l2(_tcpSockets_m);
  684. try {
  685. _tcpSockets[fromia] = SharedPtr<Socket>(new NativeTcpSocket(this,sockfd,Socket::ZT_SOCKET_TYPE_TCP_IN,false,fromia));
  686. #ifdef __WINDOWS__
  687. { u_long iMode=1; ioctlsocket(sockfd,FIONBIO,&iMode); }
  688. #ifdef ZT_TCP_NODELAY
  689. { BOOL f = TRUE; setsockopt(sockfd,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  690. #endif
  691. #else
  692. fcntl(sockfd,F_SETFL,O_NONBLOCK);
  693. #ifdef ZT_TCP_NODELAY
  694. { int f = 1; setsockopt(sockfd,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  695. #endif
  696. #endif
  697. _fdSetLock.lock();
  698. FD_SET(sockfd,&_readfds);
  699. _fdSetLock.unlock();
  700. if ((int)sockfd > (int)_nfds)
  701. _nfds = (int)sockfd;
  702. } catch ( ... ) {
  703. CLOSE_SOCKET(sockfd);
  704. }
  705. #ifndef __WINDOWS__
  706. } else {
  707. CLOSE_SOCKET(sockfd);
  708. }
  709. #endif
  710. }
  711. }
  712. if ((_tcpV6ListenSocket != INVALID_SOCKET)&&(FD_ISSET(_tcpV6ListenSocket,&rfds))) {
  713. struct sockaddr_in6 from;
  714. socklen_t fromlen = sizeof(from);
  715. sockfd = accept(_tcpV6ListenSocket,(struct sockaddr *)&from,&fromlen);
  716. #ifdef __WINDOWS__
  717. if (sockfd != INVALID_SOCKET) {
  718. #else
  719. if (sockfd > 0) {
  720. if (sockfd < FD_SETSIZE) {
  721. #endif
  722. InetAddress fromia((const struct sockaddr *)&from);
  723. Mutex::Lock _l2(_tcpSockets_m);
  724. try {
  725. _tcpSockets[fromia] = SharedPtr<Socket>(new NativeTcpSocket(this,sockfd,Socket::ZT_SOCKET_TYPE_TCP_IN,false,fromia));
  726. #ifdef __WINDOWS__
  727. { u_long iMode=1; ioctlsocket(sockfd,FIONBIO,&iMode); }
  728. #ifdef ZT_TCP_NODELAY
  729. { BOOL f = TRUE; setsockopt(sockfd,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  730. #endif
  731. #else
  732. fcntl(sockfd,F_SETFL,O_NONBLOCK);
  733. #ifdef ZT_TCP_NODELAY
  734. { int f = 1; setsockopt(sockfd,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); }
  735. #endif
  736. #endif
  737. _fdSetLock.lock();
  738. FD_SET(sockfd,&_readfds);
  739. _fdSetLock.unlock();
  740. if ((int)sockfd > (int)_nfds)
  741. _nfds = (int)sockfd;
  742. } catch ( ... ) {
  743. CLOSE_SOCKET(sockfd);
  744. }
  745. #ifndef __WINDOWS__
  746. } else {
  747. CLOSE_SOCKET(sockfd);
  748. }
  749. #endif
  750. }
  751. }
  752. {
  753. NativeUdpSocket *usock = (NativeUdpSocket *)_udpV4Socket.ptr();
  754. if ((usock)&&(FD_ISSET(usock->_sock,&rfds))) {
  755. usock->notifyAvailableForRead(_udpV4Socket,this);
  756. }
  757. usock = (NativeUdpSocket *)_udpV6Socket.ptr();
  758. if ((usock)&&(FD_ISSET(usock->_sock,&rfds))) {
  759. usock->notifyAvailableForRead(_udpV6Socket,this);
  760. }
  761. }
  762. bool closedSockets = false;
  763. { // grab copy of TCP sockets list because _tcpSockets[] might be changed in a handler
  764. Mutex::Lock _l2(_tcpSockets_m);
  765. if (!_tcpSockets.empty()) {
  766. ts.reserve(_tcpSockets.size());
  767. uint64_t now = Utils::now();
  768. for(std::map< InetAddress,SharedPtr<Socket> >::iterator s(_tcpSockets.begin());s!=_tcpSockets.end();) {
  769. NativeTcpSocket *tsock = (NativeTcpSocket *)s->second.ptr();
  770. #ifdef __WINDOWS__
  771. if ( ((now - tsock->_lastActivity) < ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT) && (! ((tsock->_connecting)&&(FD_ISSET(tsock->_sock,&efds))) ) ) {
  772. #else
  773. if ((now - tsock->_lastActivity) < ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT) {
  774. #endif
  775. ts.push_back(s->second);
  776. ++s;
  777. } else {
  778. _fdSetLock.lock();
  779. FD_CLR(tsock->_sock,&_readfds);
  780. FD_CLR(tsock->_sock,&_writefds);
  781. _fdSetLock.unlock();
  782. _tcpSockets.erase(s++);
  783. closedSockets = true;
  784. }
  785. }
  786. }
  787. }
  788. for(std::vector< SharedPtr<Socket> >::iterator s(ts.begin());s!=ts.end();++s) {
  789. NativeTcpSocket *tsock = (NativeTcpSocket *)s->ptr();
  790. if (FD_ISSET(tsock->_sock,&wfds)) {
  791. if (!tsock->notifyAvailableForWrite(*s,this)) {
  792. {
  793. Mutex::Lock _l2(_tcpSockets_m);
  794. _tcpSockets.erase(tsock->_remote);
  795. }
  796. _fdSetLock.lock();
  797. FD_CLR(tsock->_sock,&_readfds);
  798. FD_CLR(tsock->_sock,&_writefds);
  799. _fdSetLock.unlock();
  800. closedSockets = true;
  801. continue;
  802. }
  803. }
  804. if (FD_ISSET(tsock->_sock,&rfds)) {
  805. if (!tsock->notifyAvailableForRead(*s,this)) {
  806. {
  807. Mutex::Lock _l2(_tcpSockets_m);
  808. _tcpSockets.erase(tsock->_remote);
  809. }
  810. _fdSetLock.lock();
  811. FD_CLR(tsock->_sock,&_readfds);
  812. FD_CLR(tsock->_sock,&_writefds);
  813. _fdSetLock.unlock();
  814. closedSockets = true;
  815. continue;
  816. }
  817. }
  818. }
  819. if (closedSockets)
  820. _updateNfds();
  821. }
  822. void NativeSocketManager::whack()
  823. {
  824. _whackSendPipe_m.lock();
  825. #ifdef __WINDOWS__
  826. ::send(_whackSendPipe,(const char *)this,1,0);
  827. #else
  828. ::write(_whackSendPipe,(const void *)this,1); // data is arbitrary, just send a byte
  829. #endif
  830. _whackSendPipe_m.unlock();
  831. }
  832. void NativeSocketManager::closeTcpSockets()
  833. {
  834. {
  835. Mutex::Lock _l2(_tcpSockets_m);
  836. _fdSetLock.lock();
  837. for(std::map< InetAddress,SharedPtr<Socket> >::iterator s(_tcpSockets.begin());s!=_tcpSockets.end();++s) {
  838. FD_CLR(((NativeTcpSocket *)s->second.ptr())->_sock,&_readfds);
  839. FD_CLR(((NativeTcpSocket *)s->second.ptr())->_sock,&_writefds);
  840. }
  841. _fdSetLock.unlock();
  842. _tcpSockets.clear();
  843. }
  844. _updateNfds();
  845. }
  846. void NativeSocketManager::_startNotifyWrite(const NativeSocket *sock)
  847. {
  848. _fdSetLock.lock();
  849. FD_SET(sock->_sock,&_writefds);
  850. _fdSetLock.unlock();
  851. }
  852. void NativeSocketManager::_stopNotifyWrite(const NativeSocket *sock)
  853. {
  854. _fdSetLock.lock();
  855. FD_CLR(sock->_sock,&_writefds);
  856. _fdSetLock.unlock();
  857. }
  858. void NativeSocketManager::_closeSockets()
  859. {
  860. #ifdef __WINDOWS__
  861. if (_whackSendPipe != INVALID_SOCKET)
  862. ::closesocket(_whackSendPipe);
  863. if (_whackReceivePipe != INVALID_SOCKET)
  864. ::closesocket(_whackReceivePipe);
  865. if (_tcpV4ListenSocket != INVALID_SOCKET)
  866. ::closesocket(_tcpV4ListenSocket);
  867. if (_tcpV6ListenSocket != INVALID_SOCKET)
  868. ::closesocket(_tcpV6ListenSocket);
  869. #else
  870. if (_whackSendPipe > 0)
  871. ::close(_whackSendPipe);
  872. if (_whackReceivePipe > 0)
  873. ::close(_whackReceivePipe);
  874. if (_tcpV4ListenSocket > 0)
  875. ::close(_tcpV4ListenSocket);
  876. if (_tcpV4ListenSocket > 0)
  877. ::close(_tcpV6ListenSocket);
  878. #endif
  879. }
  880. void NativeSocketManager::_updateNfds()
  881. {
  882. #ifdef __WINDOWS__
  883. SOCKET nfds = _whackSendPipe;
  884. #else
  885. int nfds = _whackSendPipe;
  886. #endif
  887. if (_whackReceivePipe > nfds)
  888. nfds = _whackReceivePipe;
  889. if (_tcpV4ListenSocket > nfds)
  890. nfds = _tcpV4ListenSocket;
  891. if (_tcpV6ListenSocket > nfds)
  892. nfds = _tcpV6ListenSocket;
  893. if ((_udpV4Socket)&&(((NativeUdpSocket *)_udpV4Socket.ptr())->_sock > nfds))
  894. nfds = ((NativeUdpSocket *)_udpV4Socket.ptr())->_sock;
  895. if ((_udpV6Socket)&&(((NativeUdpSocket *)_udpV6Socket.ptr())->_sock > nfds))
  896. nfds = ((NativeUdpSocket *)_udpV6Socket.ptr())->_sock;
  897. Mutex::Lock _l(_tcpSockets_m);
  898. for(std::map< InetAddress,SharedPtr<Socket> >::const_iterator s(_tcpSockets.begin());s!=_tcpSockets.end();++s) {
  899. if (((NativeTcpSocket *)s->second.ptr())->_sock > nfds)
  900. nfds = ((NativeTcpSocket *)s->second.ptr())->_sock;
  901. }
  902. _nfds = (int)nfds;
  903. }
  904. } // namespace ZeroTier