io_wait.h 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325
  1. /*
  2. * $Id$
  3. *
  4. * Copyright (C) 2005 iptelorg GmbH
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. /*
  19. * tcp io wait common stuff used by tcp_main.c & tcp_read.c
  20. * All the functions are inline because of speed reasons and because they are
  21. * used only from 2 places.
  22. * You also have to define:
  23. * int handle_io(struct fd_map* fm, short events, int idx) (see below)
  24. * (this could be trivially replaced by a callback pointer entry attached
  25. * to the io_wait handler if more flexibility rather then performance
  26. * is needed)
  27. * fd_type - define to some enum of you choice and define also
  28. * FD_TYPE_DEFINED (if you don't do it fd_type will be defined
  29. * to int). 0 has a special not set/not init. meaning
  30. * (a lot of sanity checks and the sigio_rt code are based on
  31. * this assumption)
  32. * local_malloc (defaults to pkg_malloc)
  33. * local_free (defaults to pkg_free)
  34. *
  35. */
  36. /*
  37. * History:
  38. * --------
  39. * 2005-06-13 created by andrei
  40. * 2005-06-26 added kqueue (andrei)
  41. * 2005-07-01 added /dev/poll (andrei)
  42. * 2006-05-30 sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
  43. * 2007-11-22 when handle_io() is called in a loop check & stop if the fd was
  44. * removed inside handle_io() (andrei)
  45. * 2007-11-29 support for write (POLLOUT); added io_watch_chg() (andrei)
  46. * 2008-02-04 POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
  47. * is set) (andrei)
  48. */
  49. #ifndef _io_wait_h
  50. #define _io_wait_h
  51. #include <errno.h>
  52. #include <string.h>
  53. #ifdef HAVE_SIGIO_RT
  54. #define __USE_GNU /* or else F_SETSIG won't be included */
  55. #include <sys/types.h> /* recv */
  56. #include <sys/socket.h> /* recv */
  57. #include <signal.h> /* sigprocmask, sigwait a.s.o */
  58. #endif
  59. #define _GNU_SOURCE /* for POLLRDHUP on linux */
  60. #include <sys/poll.h>
  61. #include <fcntl.h>
  62. #ifdef HAVE_EPOLL
  63. #include <sys/epoll.h>
  64. #endif
  65. #ifdef HAVE_KQUEUE
  66. #include <sys/types.h> /* needed on freebsd */
  67. #include <sys/event.h>
  68. #include <sys/time.h>
  69. #endif
  70. #ifdef HAVE_DEVPOLL
  71. #include <sys/devpoll.h>
  72. #endif
  73. #ifdef HAVE_SELECT
  74. /* needed on openbsd for select*/
  75. #include <sys/time.h>
  76. #include <sys/types.h>
  77. #include <unistd.h>
  78. /* needed according to POSIX for select*/
  79. #include <sys/select.h>
  80. #endif
  81. #include "dprint.h"
  82. #include "poll_types.h" /* poll_types*/
  83. #ifdef HAVE_SIGIO_RT
  84. #include "pt.h" /* mypid() */
  85. #endif
  86. #include "compiler_opt.h"
  87. #ifdef HAVE_EPOLL
  88. /* fix defines for EPOLL */
  89. #if defined POLLRDHUP && ! defined EPOLLRDHUP
  90. #define EPOLLRDHUP POLLRDHUP /* should work on all linuxes */
  91. #endif /* POLLRDHUP && EPOLLRDHUP */
  92. #endif /* HAVE_EPOLL */
  93. extern int _os_ver; /* os version number, needed to select bugs workarrounds */
  94. #if 0
  95. enum fd_types; /* this should be defined from the including file,
  96. see tcp_main.c for an example,
  97. 0 has a special meaning: not used/empty*/
  98. #endif
  99. #ifndef FD_TYPE_DEFINED
  100. typedef int fd_type;
  101. #define FD_TYPE_DEFINED
  102. #endif
  103. /* maps a fd to some other structure; used in almost all cases
  104. * except epoll and maybe kqueue or /dev/poll */
  105. struct fd_map{
  106. int fd; /* fd no */
  107. fd_type type; /* "data" type */
  108. void* data; /* pointer to the corresponding structure */
  109. short events; /* events we are interested int */
  110. };
  111. #ifdef HAVE_KQUEUE
  112. #ifndef KQ_CHANGES_ARRAY_SIZE
  113. #define KQ_CHANGES_ARRAY_SIZE 256
  114. #ifdef __OS_netbsd
  115. #define KEV_UDATA_CAST (intptr_t)
  116. #else
  117. #define KEV_UDATA_CAST
  118. #endif
  119. #endif
  120. #endif
  121. /* handler structure */
  122. struct io_wait_handler{
  123. enum poll_types poll_method;
  124. int flags;
  125. struct fd_map* fd_hash;
  126. int fd_no; /* current index used in fd_array and the passed size for
  127. ep_array & kq_array*/
  128. int max_fd_no; /* maximum fd no, is also the size of fd_array,
  129. fd_hash and ep_array*/
  130. /* common stuff for POLL, SIGIO_RT and SELECT
  131. * since poll support is always compiled => this will always be compiled */
  132. struct pollfd* fd_array; /* used also by devpoll as devpoll array */
  133. int crt_fd_array_idx; /* crt idx for which handle_io is called
  134. (updated also by del -> internal optimization) */
  135. /* end of common stuff */
  136. #ifdef HAVE_EPOLL
  137. int epfd; /* epoll ctrl fd */
  138. struct epoll_event* ep_array;
  139. #endif
  140. #ifdef HAVE_SIGIO_RT
  141. sigset_t sset; /* signal mask for sigio & sigrtmin */
  142. int signo; /* real time signal used */
  143. #endif
  144. #ifdef HAVE_KQUEUE
  145. int kq_fd;
  146. struct kevent* kq_array; /* used for the eventlist*/
  147. struct kevent* kq_changes; /* used for the changelist */
  148. size_t kq_nchanges;
  149. size_t kq_changes_size; /* size of the changes array */
  150. #endif
  151. #ifdef HAVE_DEVPOLL
  152. int dpoll_fd;
  153. #endif
  154. #ifdef HAVE_SELECT
  155. fd_set master_rset; /* read set */
  156. fd_set master_wset; /* write set */
  157. int max_fd_select; /* maximum select used fd */
  158. #endif
  159. };
  160. typedef struct io_wait_handler io_wait_h;
  161. /* get the corresponding fd_map structure pointer */
  162. #define get_fd_map(h, fd) (&(h)->fd_hash[(fd)])
  163. /* remove a fd_map structure from the hash; the pointer must be returned
  164. * by get_fd_map or hash_fd_map*/
  165. #define unhash_fd_map(pfm) \
  166. do{ \
  167. (pfm)->type=0 /*F_NONE */; \
  168. (pfm)->fd=-1; \
  169. }while(0)
  170. /* add a fd_map structure to the fd hash */
  171. static inline struct fd_map* hash_fd_map( io_wait_h* h,
  172. int fd,
  173. short events,
  174. fd_type type,
  175. void* data)
  176. {
  177. h->fd_hash[fd].fd=fd;
  178. h->fd_hash[fd].events=events;
  179. h->fd_hash[fd].type=type;
  180. h->fd_hash[fd].data=data;
  181. return &h->fd_hash[fd];
  182. }
  183. #ifdef HANDLE_IO_INLINE
  184. /* generic handle io routine, this must be defined in the including file
  185. * (faster then registering a callback pointer)
  186. *
  187. * params: fm - pointer to a fd hash entry
  188. * events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
  189. * idx - index in the fd_array (or -1 if not known)
  190. * return: -1 on error
  191. * 0 on EAGAIN or when by some other way it is known that no more
  192. * io events are queued on the fd (the receive buffer is empty).
  193. * Usefull to detect when there are no more io events queued for
  194. * sigio_rt, epoll_et, kqueue.
  195. * >0 on successfull read from the fd (when there might be more io
  196. * queued -- the receive buffer might still be non-empty)
  197. */
  198. inline static int handle_io(struct fd_map* fm, short events, int idx);
  199. #else
  200. int handle_io(struct fd_map* fm, short events, int idx);
  201. #endif
  202. #ifdef HAVE_KQUEUE
  203. /*
  204. * kqueue specific function: register a change
  205. * (adds a change to the kevent change array, and if full flushes it first)
  206. *
  207. * TODO: check if the event already exists in the change list or if it's
  208. * complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
  209. * and EVFILT_WRITE, EV_ADD for the same fd).
  210. * returns: -1 on error, 0 on success
  211. */
  212. static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag,
  213. void* data)
  214. {
  215. int n;
  216. struct timespec tspec;
  217. if (h->kq_nchanges>=h->kq_changes_size){
  218. /* changes array full ! */
  219. LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
  220. " trying to flush...\n");
  221. tspec.tv_sec=0;
  222. tspec.tv_nsec=0;
  223. again:
  224. n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
  225. if (n==-1){
  226. if (errno==EINTR) goto again;
  227. LOG(L_ERR, "ERROR: io_watch_add: kevent flush changes "
  228. " failed: %s [%d]\n", strerror(errno), errno);
  229. return -1;
  230. }
  231. h->kq_nchanges=0; /* changes array is empty */
  232. }
  233. EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
  234. KEV_UDATA_CAST data);
  235. h->kq_nchanges++;
  236. return 0;
  237. }
  238. #endif
  239. /* generic io_watch_add function
  240. * Params:
  241. * h - pointer to initialized io_wait handle
  242. * fd - fd to watch
  243. * events - bitmap with the fd events for which the fd should be watched
  244. * (combination of POLLIN and POLLOUT)
  245. * type - fd type (non 0 value, returned in the call to handle_io)
  246. * data - pointer/private data returned in the handle_io call
  247. * returns 0 on success, -1 on error
  248. *
  249. * WARNING: handle_io() can be called immediately (from io_watch_add()) so
  250. * make sure that any dependent init. (e.g. data stuff) is made before
  251. * calling io_watch_add
  252. *
  253. * this version should be faster than pointers to poll_method specific
  254. * functions (it avoids functions calls, the overhead being only an extra
  255. * switch())*/
  256. inline static int io_watch_add( io_wait_h* h,
  257. int fd,
  258. short events,
  259. fd_type type,
  260. void* data)
  261. {
  262. /* helper macros */
  263. #define fd_array_setup(ev) \
  264. do{ \
  265. h->fd_array[h->fd_no].fd=fd; \
  266. h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
  267. h->fd_array[h->fd_no].revents=0; /* useless for select */ \
  268. }while(0)
  269. #define set_fd_flags(f) \
  270. do{ \
  271. flags=fcntl(fd, F_GETFL); \
  272. if (flags==-1){ \
  273. LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
  274. " %s [%d]\n", strerror(errno), errno); \
  275. goto error; \
  276. } \
  277. if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
  278. LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
  279. " failed: %s [%d]\n", strerror(errno), errno); \
  280. goto error; \
  281. } \
  282. }while(0)
  283. struct fd_map* e;
  284. int flags;
  285. #ifdef HAVE_EPOLL
  286. struct epoll_event ep_event;
  287. #endif
  288. #ifdef HAVE_DEVPOLL
  289. struct pollfd pfd;
  290. #endif
  291. #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
  292. int n;
  293. #endif
  294. #if defined(HAVE_SIGIO_RT)
  295. int idx;
  296. int check_io;
  297. struct pollfd pf;
  298. check_io=0; /* set to 1 if we need to check for pre-existing queued
  299. io/data on the fd */
  300. idx=-1;
  301. #endif
  302. e=0;
  303. /* sanity checks */
  304. if (unlikely(fd==-1)){
  305. LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
  306. goto error;
  307. }
  308. if (unlikely((events&(POLLIN|POLLOUT))==0)){
  309. LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
  310. goto error;
  311. }
  312. /* check if not too big */
  313. if (unlikely(h->fd_no>=h->max_fd_no)){
  314. LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
  315. " %d/%d\n", h->fd_no, h->max_fd_no);
  316. goto error;
  317. }
  318. DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
  319. h, fd, type, data, h->fd_no);
  320. /* hash sanity check */
  321. e=get_fd_map(h, fd);
  322. if (unlikely(e && (e->type!=0 /*F_NONE*/))){
  323. LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
  324. " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
  325. fd, events, e->fd, e->type, e->data, fd, type, data);
  326. e=0;
  327. goto error;
  328. }
  329. if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
  330. LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
  331. goto error;
  332. }
  333. switch(h->poll_method){ /* faster then pointer to functions */
  334. case POLL_POLL:
  335. #ifdef POLLRDHUP
  336. /* listen to POLLRDHUP by default (if POLLIN) */
  337. events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
  338. #endif /* POLLRDHUP */
  339. fd_array_setup(events);
  340. set_fd_flags(O_NONBLOCK);
  341. break;
  342. #ifdef HAVE_SELECT
  343. case POLL_SELECT:
  344. fd_array_setup(events);
  345. if (likely(events & POLLIN))
  346. FD_SET(fd, &h->master_rset);
  347. if (unlikely(events & POLLOUT))
  348. FD_SET(fd, &h->master_wset);
  349. if (h->max_fd_select<fd) h->max_fd_select=fd;
  350. break;
  351. #endif
  352. #ifdef HAVE_SIGIO_RT
  353. case POLL_SIGIO_RT:
  354. fd_array_setup(events);
  355. /* re-set O_ASYNC might be needed, if not done from
  356. * io_watch_del (or if somebody wants to add a fd which has
  357. * already O_ASYNC/F_SETSIG set on a duplicate)
  358. */
  359. /* set async & signal */
  360. if (fcntl(fd, F_SETOWN, my_pid())==-1){
  361. LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
  362. " failed: %s [%d]\n", strerror(errno), errno);
  363. goto error;
  364. }
  365. if (fcntl(fd, F_SETSIG, h->signo)==-1){
  366. LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
  367. " failed: %s [%d]\n", strerror(errno), errno);
  368. goto error;
  369. }
  370. /* set both non-blocking and async */
  371. set_fd_flags(O_ASYNC| O_NONBLOCK);
  372. #ifdef EXTRA_DEBUG
  373. DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
  374. fd, h->signo, my_pid());
  375. #endif
  376. /* empty socket receive buffer, if buffer is already full
  377. * no more space to put packets
  378. * => no more signals are ever generated
  379. * also when moving fds, the freshly moved fd might have
  380. * already some bytes queued, we want to get them now
  381. * and not later -- andrei */
  382. idx=h->fd_no;
  383. check_io=1;
  384. break;
  385. #endif
  386. #ifdef HAVE_EPOLL
  387. case POLL_EPOLL_LT:
  388. ep_event.events=
  389. #ifdef POLLRDHUP
  390. /* listen for EPOLLRDHUP too */
  391. ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
  392. #else /* POLLRDHUP */
  393. (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
  394. #endif /* POLLRDHUP */
  395. (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
  396. ep_event.data.ptr=e;
  397. again1:
  398. n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
  399. if (unlikely(n==-1)){
  400. if (errno==EAGAIN) goto again1;
  401. LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
  402. strerror(errno), errno);
  403. goto error;
  404. }
  405. break;
  406. case POLL_EPOLL_ET:
  407. set_fd_flags(O_NONBLOCK);
  408. ep_event.events=
  409. #ifdef POLLRDHUP
  410. /* listen for EPOLLRDHUP too */
  411. ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
  412. #else /* POLLRDHUP */
  413. (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
  414. #endif /* POLLRDHUP */
  415. (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
  416. EPOLLET;
  417. ep_event.data.ptr=e;
  418. again2:
  419. n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
  420. if (unlikely(n==-1)){
  421. if (errno==EAGAIN) goto again2;
  422. LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
  423. strerror(errno), errno);
  424. goto error;
  425. }
  426. break;
  427. #endif
  428. #ifdef HAVE_KQUEUE
  429. case POLL_KQUEUE:
  430. if (likely( events & POLLIN)){
  431. if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
  432. goto error;
  433. }
  434. if (unlikely( events & POLLOUT)){
  435. if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
  436. {
  437. if (likely(events & POLLIN)){
  438. kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
  439. }
  440. }
  441. goto error;
  442. }
  443. break;
  444. #endif
  445. #ifdef HAVE_DEVPOLL
  446. case POLL_DEVPOLL:
  447. pfd.fd=fd;
  448. pfd.events=events;
  449. pfd.revents=0;
  450. again_devpoll:
  451. if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
  452. if (errno==EAGAIN) goto again_devpoll;
  453. LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
  454. "%s [%d]\n", strerror(errno), errno);
  455. goto error;
  456. }
  457. break;
  458. #endif
  459. default:
  460. LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
  461. " %s (%d)\n", poll_method_str[h->poll_method],
  462. h->poll_method);
  463. goto error;
  464. }
  465. h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
  466. has only informative value */
  467. #if defined(HAVE_SIGIO_RT)
  468. if (check_io){
  469. /* handle possible pre-existing events */
  470. pf.fd=fd;
  471. pf.events=events;
  472. check_io_again:
  473. n=0;
  474. while(e->type && ((n=poll(&pf, 1, 0))>0) &&
  475. (handle_io(e, pf.revents, idx)>0) &&
  476. (pf.revents & (e->events|POLLERR|POLLHUP)));
  477. if (unlikely(e->type && (n==-1))){
  478. if (errno==EINTR) goto check_io_again;
  479. LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
  480. strerror(errno), errno);
  481. }
  482. }
  483. #endif
  484. return 0;
  485. error:
  486. if (e) unhash_fd_map(e);
  487. return -1;
  488. #undef fd_array_setup
  489. #undef set_fd_flags
  490. }
  491. #define IO_FD_CLOSING 16
  492. /* parameters: h - handler
  493. * fd - file descriptor
  494. * index - index in the fd_array if known, -1 if not
  495. * (if index==-1 fd_array will be searched for the
  496. * corresponding fd* entry -- slower but unavoidable in
  497. * some cases). index is not used (no fd_array) for epoll,
  498. * /dev/poll and kqueue
  499. * flags - optimization flags, e.g. IO_FD_CLOSING, the fd was
  500. * or will shortly be closed, in some cases we can avoid
  501. * extra remove operations (e.g.: epoll, kqueue, sigio)
  502. * returns 0 if ok, -1 on error */
  503. inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
  504. {
  505. #define fix_fd_array \
  506. do{\
  507. if (unlikely(idx==-1)){ \
  508. /* fix idx if -1 and needed */ \
  509. for (idx=0; (idx<h->fd_no) && \
  510. (h->fd_array[idx].fd!=fd); idx++); \
  511. } \
  512. if (likely(idx<h->fd_no)){ \
  513. memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
  514. (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
  515. if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
  516. h->crt_fd_array_idx--; \
  517. } \
  518. }while(0)
  519. struct fd_map* e;
  520. int events;
  521. #ifdef HAVE_EPOLL
  522. int n;
  523. struct epoll_event ep_event;
  524. #endif
  525. #ifdef HAVE_DEVPOLL
  526. struct pollfd pfd;
  527. #endif
  528. #ifdef HAVE_SIGIO_RT
  529. int fd_flags;
  530. #endif
  531. if (unlikely((fd<0) || (fd>=h->max_fd_no))){
  532. LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
  533. fd, h->fd_no);
  534. goto error;
  535. }
  536. DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
  537. h, fd, idx, flags, h->fd_no);
  538. e=get_fd_map(h, fd);
  539. /* more sanity checks */
  540. if (unlikely(e==0)){
  541. LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
  542. fd);
  543. goto error;
  544. }
  545. if (unlikely(e->type==0 /*F_NONE*/)){
  546. LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
  547. " entry %d in the hash(%d, %d, %p) flags %x)\n",
  548. fd, e->fd, e->type, e->data, flags);
  549. goto error;
  550. }
  551. events=e->events;
  552. unhash_fd_map(e);
  553. switch(h->poll_method){
  554. case POLL_POLL:
  555. fix_fd_array;
  556. break;
  557. #ifdef HAVE_SELECT
  558. case POLL_SELECT:
  559. if (likely(events & POLLIN))
  560. FD_CLR(fd, &h->master_rset);
  561. if (unlikely(events & POLLOUT))
  562. FD_CLR(fd, &h->master_wset);
  563. if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
  564. /* we don't know the prev. max, so we just decrement it */
  565. h->max_fd_select--;
  566. fix_fd_array;
  567. break;
  568. #endif
  569. #ifdef HAVE_SIGIO_RT
  570. case POLL_SIGIO_RT:
  571. fix_fd_array;
  572. /* the O_ASYNC flag must be reset all the time, the fd
  573. * can be changed only if O_ASYNC is reset (if not and
  574. * the fd is a duplicate, you will get signals from the dup. fd
  575. * and not from the original, even if the dup. fd was closed
  576. * and the signals re-set on the original) -- andrei
  577. */
  578. /*if (!(flags & IO_FD_CLOSING)){*/
  579. /* reset ASYNC */
  580. fd_flags=fcntl(fd, F_GETFL);
  581. if (unlikely(fd_flags==-1)){
  582. LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:"
  583. " %s [%d]\n", strerror(errno), errno);
  584. goto error;
  585. }
  586. if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){
  587. LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL"
  588. " failed: %s [%d]\n", strerror(errno), errno);
  589. goto error;
  590. }
  591. break;
  592. #endif
  593. #ifdef HAVE_EPOLL
  594. case POLL_EPOLL_LT:
  595. case POLL_EPOLL_ET:
  596. /* epoll doesn't seem to automatically remove sockets,
  597. * if the socket is a duplicate/moved and the original
  598. * is still open. The fd is removed from the epoll set
  599. * only when the original (and all the copies?) is/are
  600. * closed. This is probably a bug in epoll. --andrei */
  601. #ifdef EPOLL_NO_CLOSE_BUG
  602. if (!(flags & IO_FD_CLOSING)){
  603. #endif
  604. again_epoll:
  605. n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
  606. if (unlikely(n==-1)){
  607. if (errno==EAGAIN) goto again_epoll;
  608. LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
  609. "list failed: %s [%d]\n", strerror(errno), errno);
  610. goto error;
  611. }
  612. #ifdef EPOLL_NO_CLOSE_BUG
  613. }
  614. #endif
  615. break;
  616. #endif
  617. #ifdef HAVE_KQUEUE
  618. case POLL_KQUEUE:
  619. if (!(flags & IO_FD_CLOSING)){
  620. if (likely(events & POLLIN)){
  621. if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
  622. EV_DELETE, 0) ==-1)){
  623. /* try to delete the write filter anyway */
  624. if (events & POLLOUT){
  625. kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
  626. }
  627. goto error;
  628. }
  629. }
  630. if (unlikely(events & POLLOUT)){
  631. if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
  632. EV_DELETE, 0) ==-1))
  633. goto error;
  634. }
  635. }
  636. break;
  637. #endif
  638. #ifdef HAVE_DEVPOLL
  639. case POLL_DEVPOLL:
  640. /* for /dev/poll the closed fds _must_ be removed
  641. (they are not removed automatically on close()) */
  642. pfd.fd=fd;
  643. pfd.events=POLLREMOVE;
  644. pfd.revents=0;
  645. again_devpoll:
  646. if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
  647. if (errno==EINTR) goto again_devpoll;
  648. LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
  649. "/dev/poll failed: %s [%d]\n",
  650. strerror(errno), errno);
  651. goto error;
  652. }
  653. break;
  654. #endif
  655. default:
  656. LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
  657. " %s (%d)\n", poll_method_str[h->poll_method],
  658. h->poll_method);
  659. goto error;
  660. }
  661. h->fd_no--;
  662. return 0;
  663. error:
  664. return -1;
  665. #undef fix_fd_array
  666. }
  667. /* parameters: h - handler
  668. * fd - file descriptor
  669. * events - new events to watch for
  670. * idx - index in the fd_array if known, -1 if not
  671. * (if index==-1 fd_array will be searched for the
  672. * corresponding fd* entry -- slower but unavoidable in
  673. * some cases). index is not used (no fd_array) for epoll,
  674. * /dev/poll and kqueue
  675. * returns 0 if ok, -1 on error */
  676. inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
  677. {
  678. #define fd_array_chg(ev) \
  679. do{\
  680. if (unlikely(idx==-1)){ \
  681. /* fix idx if -1 and needed */ \
  682. for (idx=0; (idx<h->fd_no) && \
  683. (h->fd_array[idx].fd!=fd); idx++); \
  684. } \
  685. if (likely(idx<h->fd_no)){ \
  686. h->fd_array[idx].events=(ev); \
  687. } \
  688. }while(0)
  689. struct fd_map* e;
  690. int add_events;
  691. int del_events;
  692. #ifdef HAVE_DEVPOLL
  693. struct pollfd pfd;
  694. #endif
  695. #ifdef HAVE_EPOLL
  696. int n;
  697. struct epoll_event ep_event;
  698. #endif
  699. if (unlikely((fd<0) || (fd>=h->max_fd_no))){
  700. LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
  701. fd, h->fd_no);
  702. goto error;
  703. }
  704. if (unlikely((events&(POLLIN|POLLOUT))==0)){
  705. LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
  706. goto error;
  707. }
  708. DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
  709. h, fd, events, idx, h->fd_no);
  710. e=get_fd_map(h, fd);
  711. /* more sanity checks */
  712. if (unlikely(e==0)){
  713. LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
  714. fd);
  715. goto error;
  716. }
  717. if (unlikely(e->type==0 /*F_NONE*/)){
  718. LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
  719. " entry %d in the hash(%d, %d, %p) )\n",
  720. fd, e->fd, e->type, e->data);
  721. goto error;
  722. }
  723. add_events=events & ~e->events;
  724. del_events=e->events & ~events;
  725. e->events=events;
  726. switch(h->poll_method){
  727. case POLL_POLL:
  728. #ifdef POLLRDHUP
  729. /* listen to POLLRDHUP by default (if POLLIN) */
  730. events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
  731. #endif /* POLLRDHUP */
  732. fd_array_chg(events);
  733. break;
  734. #ifdef HAVE_SELECT
  735. case POLL_SELECT:
  736. fd_array_chg(events);
  737. if (unlikely(del_events & POLLIN))
  738. FD_CLR(fd, &h->master_rset);
  739. else if (unlikely(add_events & POLLIN))
  740. FD_SET(fd, &h->master_rset);
  741. if (likely(del_events & POLLOUT))
  742. FD_CLR(fd, &h->master_wset);
  743. else if (likely(add_events & POLLOUT))
  744. FD_SET(fd, &h->master_wset);
  745. break;
  746. #endif
  747. #ifdef HAVE_SIGIO_RT
  748. case POLL_SIGIO_RT:
  749. fd_array_chg(events);
  750. /* no need for check_io, since SIGIO_RT listens by default for all
  751. * the events */
  752. break;
  753. #endif
  754. #ifdef HAVE_EPOLL
  755. case POLL_EPOLL_LT:
  756. ep_event.events=
  757. #ifdef POLLRDHUP
  758. /* listen for EPOLLRDHUP too */
  759. ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
  760. #else /* POLLRDHUP */
  761. (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
  762. #endif /* POLLRDHUP */
  763. (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
  764. ep_event.data.ptr=e;
  765. again_epoll_lt:
  766. n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
  767. if (unlikely(n==-1)){
  768. if (errno==EAGAIN) goto again_epoll_lt;
  769. LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
  770. " failed: %s [%d]\n", strerror(errno), errno);
  771. goto error;
  772. }
  773. break;
  774. case POLL_EPOLL_ET:
  775. ep_event.events=
  776. #ifdef POLLRDHUP
  777. /* listen for EPOLLRDHUP too */
  778. ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
  779. #else /* POLLRDHUP */
  780. (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
  781. #endif /* POLLRDHUP */
  782. (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
  783. EPOLLET;
  784. ep_event.data.ptr=e;
  785. again_epoll_et:
  786. n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
  787. if (unlikely(n==-1)){
  788. if (errno==EAGAIN) goto again_epoll_et;
  789. LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
  790. " failed: %s [%d]\n", strerror(errno), errno);
  791. goto error;
  792. }
  793. break;
  794. #endif
  795. #ifdef HAVE_KQUEUE
  796. case POLL_KQUEUE:
  797. if (unlikely(del_events & POLLIN)){
  798. if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
  799. EV_DELETE, 0) ==-1))
  800. goto error;
  801. }else if (unlikely(add_events & POLLIN)){
  802. if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
  803. goto error;
  804. }
  805. if (likely(del_events & POLLOUT)){
  806. if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
  807. EV_DELETE, 0) ==-1))
  808. goto error;
  809. }else if (likely(add_events & POLLOUT)){
  810. if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
  811. goto error;
  812. }
  813. break;
  814. #endif
  815. #ifdef HAVE_DEVPOLL
  816. case POLL_DEVPOLL:
  817. /* for /dev/poll the closed fds _must_ be removed
  818. (they are not removed automatically on close()) */
  819. pfd.fd=fd;
  820. pfd.events=POLLREMOVE;
  821. pfd.revents=0;
  822. again_devpoll1:
  823. if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
  824. if (errno==EINTR) goto again_devpoll1;
  825. LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
  826. "/dev/poll failed: %s [%d]\n",
  827. strerror(errno), errno);
  828. goto error;
  829. }
  830. again_devpoll2:
  831. pfd.events=events;
  832. pfd.revents=0;
  833. if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
  834. if (errno==EINTR) goto again_devpoll2;
  835. LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
  836. "/dev/poll failed: %s [%d]\n",
  837. strerror(errno), errno);
  838. goto error;
  839. }
  840. break;
  841. #endif
  842. default:
  843. LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
  844. " %s (%d)\n", poll_method_str[h->poll_method],
  845. h->poll_method);
  846. goto error;
  847. }
  848. return 0;
  849. error:
  850. return -1;
  851. #undef fix_fd_array
  852. }
  853. /* io_wait_loop_x style function
  854. * wait for io using poll()
  855. * params: h - io_wait handle
  856. * t - timeout in s
  857. * repeat - if !=0 handle_io will be called until it returns <=0
  858. * returns: number of IO events handled on success (can be 0), -1 on error
  859. */
  860. inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
  861. {
  862. int n, r;
  863. int ret;
  864. struct fd_map* fm;
  865. again:
  866. ret=n=poll(h->fd_array, h->fd_no, t*1000);
  867. if (n==-1){
  868. if (errno==EINTR) goto again; /* signal, ignore it */
  869. else{
  870. LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
  871. strerror(errno), errno);
  872. goto error;
  873. }
  874. }
  875. for (r=0; (r<h->fd_no) && n; r++){
  876. fm=get_fd_map(h, h->fd_array[r].fd);
  877. if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
  878. n--;
  879. /* sanity checks */
  880. if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
  881. (h->fd_array[r].fd < 0))){
  882. LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
  883. "(no in the 0 - %d range)\n",
  884. h->fd_array[r].fd, h->max_fd_no);
  885. /* try to continue anyway */
  886. h->fd_array[r].events=0; /* clear the events */
  887. continue;
  888. }
  889. h->crt_fd_array_idx=r;
  890. /* repeat handle_io if repeat, fd still watched (not deleted
  891. * inside handle_io), handle_io returns that there's still
  892. * IO and the fd is still watched for the triggering event */
  893. while(fm->type &&
  894. (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
  895. repeat && ((fm->events|POLLERR|POLLHUP) &
  896. h->fd_array[r].revents));
  897. r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
  898. array shifting */
  899. }
  900. }
  901. error:
  902. return ret;
  903. }
  904. #ifdef HAVE_SELECT
  905. /* wait for io using select */
  906. inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
  907. {
  908. fd_set sel_rset;
  909. fd_set sel_wset;
  910. int n, ret;
  911. struct timeval timeout;
  912. int r;
  913. struct fd_map* fm;
  914. int revents;
  915. again:
  916. sel_rset=h->master_rset;
  917. sel_wset=h->master_wset;
  918. timeout.tv_sec=t;
  919. timeout.tv_usec=0;
  920. ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
  921. if (n<0){
  922. if (errno==EINTR) goto again; /* just a signal */
  923. LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
  924. strerror(errno), errno);
  925. n=0;
  926. /* continue */
  927. }
  928. /* use poll fd array */
  929. for(r=0; (r<h->fd_no) && n; r++){
  930. revents=0;
  931. if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
  932. revents|=POLLIN;
  933. if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
  934. revents|=POLLOUT;
  935. if (unlikely(revents)){
  936. h->crt_fd_array_idx=r;
  937. fm=get_fd_map(h, h->fd_array[r].fd);
  938. while(fm->type && (fm->events & revents) &&
  939. (handle_io(fm, revents, r)>0) && repeat);
  940. r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
  941. array shifting */
  942. n--;
  943. }
  944. };
  945. return ret;
  946. }
  947. #endif
  948. #ifdef HAVE_EPOLL
  949. inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
  950. {
  951. int n, r;
  952. struct fd_map* fm;
  953. int revents;
  954. again:
  955. n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
  956. if (unlikely(n==-1)){
  957. if (errno==EINTR) goto again; /* signal, ignore it */
  958. else{
  959. LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
  960. "epoll_wait(%d, %p, %d, %d): %s [%d]\n",
  961. h->epfd, h->ep_array, h->fd_no, t*1000,
  962. strerror(errno), errno);
  963. goto error;
  964. }
  965. }
  966. #if 0
  967. if (n>1){
  968. for(r=0; r<n; r++){
  969. LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
  970. r, h->ep_array[r].events, h->ep_array[r].data.ptr);
  971. }
  972. }
  973. #endif
  974. for (r=0; r<n; r++){
  975. revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
  976. -1)) |
  977. (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
  978. (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
  979. (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
  980. #ifdef POLLRDHUP
  981. | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
  982. #endif
  983. ;
  984. if (likely(revents)){
  985. fm=(struct fd_map*)h->ep_array[r].data.ptr;
  986. while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) &&
  987. (handle_io(fm, revents, -1)>0) && repeat);
  988. }else{
  989. LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
  990. " on %d/%d, data=%p\n", h->ep_array[r].events,
  991. r+1, n, h->ep_array[r].data.ptr);
  992. }
  993. }
  994. error:
  995. return n;
  996. }
  997. #endif
  998. #ifdef HAVE_KQUEUE
  999. inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
  1000. {
  1001. int n, r;
  1002. struct timespec tspec;
  1003. struct fd_map* fm;
  1004. int revents;
  1005. tspec.tv_sec=t;
  1006. tspec.tv_nsec=0;
  1007. again:
  1008. n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, h->kq_array,
  1009. h->fd_no, &tspec);
  1010. if (unlikely(n==-1)){
  1011. if (errno==EINTR) goto again; /* signal, ignore it */
  1012. else{
  1013. LOG(L_ERR, "ERROR: io_wait_loop_kqueue: kevent:"
  1014. " %s [%d]\n", strerror(errno), errno);
  1015. goto error;
  1016. }
  1017. }
  1018. h->kq_nchanges=0; /* reset changes array */
  1019. for (r=0; r<n; r++){
  1020. #ifdef EXTRA_DEBUG
  1021. DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
  1022. r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
  1023. h->kq_array[r].flags);
  1024. #endif
  1025. #if 0
  1026. if (unlikely(h->kq_array[r].flags & EV_ERROR)){
  1027. /* error in changes: we ignore it, it can be caused by
  1028. trying to remove an already closed fd: race between
  1029. adding something to the changes array, close() and
  1030. applying the changes */
  1031. LOG(L_INFO, "INFO: io_wait_loop_kqueue: kevent error on "
  1032. "fd %ld: %s [%ld]\n", h->kq_array[r].ident,
  1033. strerror(h->kq_array[r].data),
  1034. (long)h->kq_array[r].data);
  1035. }else{
  1036. #endif
  1037. fm=(struct fd_map*)h->kq_array[r].udata;
  1038. if (likely(h->kq_array[r].filter==EVFILT_READ)){
  1039. revents=POLLIN |
  1040. (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
  1041. while(fm->type && (fm->events & revents) &&
  1042. (handle_io(fm, revents, -1)>0) && repeat);
  1043. }else if (h->kq_array[r].filter==EVFILT_WRITE){
  1044. revents=POLLOUT |
  1045. (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP);
  1046. while(fm->type && (fm->events & revents) &&
  1047. (handle_io(fm, revents, -1)>0) && repeat);
  1048. }
  1049. /*} */
  1050. }
  1051. error:
  1052. return n;
  1053. }
  1054. #endif
  1055. #ifdef HAVE_SIGIO_RT
  1056. /* sigio rt version has no repeat (it doesn't make sense)*/
  1057. inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
  1058. {
  1059. int n;
  1060. int ret;
  1061. struct timespec ts;
  1062. siginfo_t siginfo;
  1063. int sigio_band;
  1064. int sigio_fd;
  1065. struct fd_map* fm;
  1066. int revents;
  1067. #ifdef SIGINFO64_WORKARROUND
  1068. int* pi;
  1069. #endif
  1070. ret=1; /* 1 event per call normally */
  1071. ts.tv_sec=t;
  1072. ts.tv_nsec=0;
  1073. if (unlikely(!sigismember(&h->sset, h->signo) ||
  1074. !sigismember(&h->sset, SIGIO))) {
  1075. LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
  1076. " is not properly set!\n");
  1077. goto error;
  1078. }
  1079. again:
  1080. n=sigtimedwait(&h->sset, &siginfo, &ts);
  1081. if (unlikely(n==-1)){
  1082. if (errno==EINTR) goto again; /* some other signal, ignore it */
  1083. else if (errno==EAGAIN){ /* timeout */
  1084. ret=0;
  1085. goto end;
  1086. }else{
  1087. LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
  1088. " %s [%d]\n", strerror(errno), errno);
  1089. goto error;
  1090. }
  1091. }
  1092. if (likely(n!=SIGIO)){
  1093. #ifdef SIGINFO64_WORKARROUND
  1094. /* on linux siginfo.si_band is defined as long in userspace
  1095. * and as int in kernel (< 2.6.5) => on 64 bits things will break!
  1096. * (si_band will include si_fd, and si_fd will contain
  1097. * garbage).
  1098. * see /usr/src/linux/include/asm-generic/siginfo.h and
  1099. * /usr/include/bits/siginfo.h
  1100. * On newer kernels this is fixed (si_band is long in the kernel too).
  1101. * -- andrei */
  1102. if ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
  1103. pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
  1104. sigio_band=*pi;
  1105. sigio_fd=*(pi+1);
  1106. }else
  1107. #endif
  1108. {
  1109. sigio_band=siginfo.si_band;
  1110. sigio_fd=siginfo.si_fd;
  1111. }
  1112. if (unlikely(siginfo.si_code==SI_SIGIO)){
  1113. /* old style, we don't know the event (linux 2.2.?) */
  1114. LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
  1115. " interface\n");
  1116. fm=get_fd_map(h, sigio_fd);
  1117. /* we can have queued signals generated by fds not watched
  1118. * any more, or by fds in transition, to a child => ignore them*/
  1119. if (fm->type)
  1120. handle_io(fm, POLLIN|POLLOUT, -1);
  1121. }else{
  1122. /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
  1123. * POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
  1124. * and si_band the translated poll event bitmap:
  1125. * POLLIN|POLLRDNORM (=POLL_IN),
  1126. * POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
  1127. * POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
  1128. * POLLERR (=POLL_ERR),
  1129. * POLLPRI|POLLRDBAND (=POLL_PRI),
  1130. * POLLHUP|POLLERR (=POLL_HUP)
  1131. * [linux 2.6.22 fs/fcntl.c:447]
  1132. */
  1133. #ifdef EXTRA_DEBUG
  1134. DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
  1135. " si_code=%d, si_band=0x%x,"
  1136. " si_fd=%d\n",
  1137. siginfo.si_signo, n, siginfo.si_code,
  1138. (unsigned)sigio_band,
  1139. sigio_fd);
  1140. #endif
  1141. /* on some errors (e.g. when receving TCP RST), sigio_band will
  1142. * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
  1143. * unix socket close) , so better catch all events --andrei */
  1144. if (likely(sigio_band)){
  1145. fm=get_fd_map(h, sigio_fd);
  1146. revents=sigio_band;
  1147. /* fix revents==POLLPRI case */
  1148. revents |= (!(revents & POLLPRI)-1) & POLLIN;
  1149. /* we can have queued signals generated by fds not watched
  1150. * any more, or by fds in transition, to a child
  1151. * => ignore them */
  1152. if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
  1153. handle_io(fm, revents, -1);
  1154. else
  1155. DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
  1156. " %x on fd %d, watching for %x, si_code=%x "
  1157. "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
  1158. sigio_band, sigio_fd, fm->events, siginfo.si_code,
  1159. fm->type, fm->fd, fm->data);
  1160. }else{
  1161. LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
  1162. " on fd %d: %x\n", sigio_fd, sigio_band);
  1163. }
  1164. }
  1165. }else{
  1166. /* signal queue overflow
  1167. * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
  1168. LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
  1169. "- falling back to poll\n");
  1170. /* clear real-time signal queue
  1171. * both SIG_IGN and SIG_DFL are needed , it doesn't work
  1172. * only with SIG_DFL */
  1173. if (signal(h->signo, SIG_IGN)==SIG_ERR){
  1174. LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
  1175. }
  1176. if (signal(h->signo, SIG_DFL)==SIG_ERR){
  1177. LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
  1178. }
  1179. /* falling back to normal poll */
  1180. ret=io_wait_loop_poll(h, -1, 1);
  1181. }
  1182. end:
  1183. return ret;
  1184. error:
  1185. return -1;
  1186. }
  1187. #endif
  1188. #ifdef HAVE_DEVPOLL
  1189. inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
  1190. {
  1191. int n, r;
  1192. int ret;
  1193. struct dvpoll dpoll;
  1194. struct fd_map* fm;
  1195. dpoll.dp_timeout=t*1000;
  1196. dpoll.dp_nfds=h->fd_no;
  1197. dpoll.dp_fds=h->fd_array;
  1198. again:
  1199. ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
  1200. if (unlikely(n==-1)){
  1201. if (errno==EINTR) goto again; /* signal, ignore it */
  1202. else{
  1203. LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
  1204. strerror(errno), errno);
  1205. goto error;
  1206. }
  1207. }
  1208. for (r=0; r< n; r++){
  1209. if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
  1210. LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
  1211. " for fd %d, revents=%x\n",
  1212. h->fd_array[r].fd, h->fd_array[r].revents);
  1213. }
  1214. /* POLLIN|POLLHUP just go through */
  1215. fm=get_fd_map(h, h->fd_array[r].fd);
  1216. while(fm->type && (fm->events & h->fd_array[r].revents) &&
  1217. (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
  1218. }
  1219. error:
  1220. return ret;
  1221. }
  1222. #endif
  1223. /* init */
  1224. /* initializes the static vars/arrays
  1225. * params: h - pointer to the io_wait_h that will be initialized
  1226. * max_fd - maximum allowed fd number
  1227. * poll_m - poll method (0 for automatic best fit)
  1228. */
  1229. int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
  1230. /* destroys everything init_io_wait allocated */
  1231. void destroy_io_wait(io_wait_h* h);
  1232. #endif