123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428 |
- /*
- * $Id$
- *
- * Copyright (C) 2005 iptelorg GmbH
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- /*
- * tcp io wait common stuff used by tcp_main.c & tcp_read.c
- * All the functions are inline because of speed reasons and because they are
- * used only from 2 places.
- * You also have to define:
- * int handle_io(struct fd_map* fm, short events, int idx) (see below)
- * (this could be trivially replaced by a callback pointer entry attached
- * to the io_wait handler if more flexibility rather then performance
- * is needed)
- * fd_type - define to some enum of you choice and define also
- * FD_TYPE_DEFINED (if you don't do it fd_type will be defined
- * to int). 0 has a special not set/not init. meaning
- * (a lot of sanity checks and the sigio_rt code are based on
- * this assumption)
- * local_malloc (defaults to pkg_malloc)
- * local_free (defaults to pkg_free)
- *
- */
- /*
- * History:
- * --------
- * 2005-06-13 created by andrei
- * 2005-06-26 added kqueue (andrei)
- * 2005-07-01 added /dev/poll (andrei)
- * 2006-05-30 sigio 64 bit workarround enabled for kernels < 2.6.5 (andrei)
- * 2007-11-22 when handle_io() is called in a loop check & stop if the fd was
- * removed inside handle_io() (andrei)
- * 2007-11-29 support for write (POLLOUT); added io_watch_chg() (andrei)
- * 2008-02-04 POLLRDHUP & EPOLLRDHUP support (automatically enabled if POLLIN
- * is set) (andrei)
- * 2010-06-17 re-enabled & enhanced the EV_ERROR for kqueue (andrei)
- */
- #ifndef _io_wait_h
- #define _io_wait_h
- #include <errno.h>
- #include <string.h>
- #ifdef HAVE_SIGIO_RT
- #define __USE_GNU /* or else F_SETSIG won't be included */
- #include <sys/types.h> /* recv */
- #include <sys/socket.h> /* recv */
- #include <signal.h> /* sigprocmask, sigwait a.s.o */
- #endif
- #define _GNU_SOURCE /* for POLLRDHUP on linux */
- #include <poll.h>
- #include <fcntl.h>
- #ifdef HAVE_EPOLL
- #include <sys/epoll.h>
- #endif
- #ifdef HAVE_KQUEUE
- #include <sys/types.h> /* needed on freebsd */
- #include <sys/event.h>
- #include <sys/time.h>
- #endif
- #ifdef HAVE_DEVPOLL
- #include <sys/devpoll.h>
- #endif
- #ifdef HAVE_SELECT
- /* needed on openbsd for select*/
- #include <sys/time.h>
- #include <sys/types.h>
- #include <unistd.h>
- /* needed according to POSIX for select*/
- #include <sys/select.h>
- #endif
- #include "dprint.h"
- #include "poll_types.h" /* poll_types*/
- #ifdef HAVE_SIGIO_RT
- #include "pt.h" /* mypid() */
- #endif
- #include "compiler_opt.h"
- #ifdef HAVE_EPOLL
- /* fix defines for EPOLL */
- #if defined POLLRDHUP && ! defined EPOLLRDHUP
- #define EPOLLRDHUP POLLRDHUP /* should work on all linuxes */
- #endif /* POLLRDHUP && EPOLLRDHUP */
- #endif /* HAVE_EPOLL */
- extern int _os_ver; /* os version number, needed to select bugs workarrounds */
- #if 0
- enum fd_types; /* this should be defined from the including file,
- see tcp_main.c for an example,
- 0 has a special meaning: not used/empty*/
- #endif
- #ifndef FD_TYPE_DEFINED
- typedef int fd_type;
- #define FD_TYPE_DEFINED
- #endif
- /* maps a fd to some other structure; used in almost all cases
- * except epoll and maybe kqueue or /dev/poll */
- struct fd_map{
- int fd; /* fd no */
- fd_type type; /* "data" type */
- void* data; /* pointer to the corresponding structure */
- short events; /* events we are interested int */
- };
- #ifdef HAVE_KQUEUE
- #ifndef KQ_CHANGES_ARRAY_SIZE
- #define KQ_CHANGES_ARRAY_SIZE 256
- #ifdef __OS_netbsd
- #define KEV_UDATA_CAST (intptr_t)
- #else
- #define KEV_UDATA_CAST
- #endif
- #endif
- #endif
- /* handler structure */
- struct io_wait_handler{
- enum poll_types poll_method;
- int flags;
- struct fd_map* fd_hash;
- int fd_no; /* current index used in fd_array and the passed size for
- ep_array (for kq_array at least
- max(twice the size, kq_changes_size) should be
- be passed). */
- int max_fd_no; /* maximum fd no, is also the size of fd_array,
- fd_hash and ep_array*/
- /* common stuff for POLL, SIGIO_RT and SELECT
- * since poll support is always compiled => this will always be compiled */
- struct pollfd* fd_array; /* used also by devpoll as devpoll array */
- int crt_fd_array_idx; /* crt idx for which handle_io is called
- (updated also by del -> internal optimization) */
- /* end of common stuff */
- #ifdef HAVE_EPOLL
- int epfd; /* epoll ctrl fd */
- struct epoll_event* ep_array;
- #endif
- #ifdef HAVE_SIGIO_RT
- sigset_t sset; /* signal mask for sigio & sigrtmin */
- int signo; /* real time signal used */
- #endif
- #ifdef HAVE_KQUEUE
- int kq_fd;
- struct kevent* kq_array; /* used for the eventlist*/
- struct kevent* kq_changes; /* used for the changelist */
- size_t kq_nchanges;
- size_t kq_array_size; /* array size */
- size_t kq_changes_size; /* size of the changes array */
- #endif
- #ifdef HAVE_DEVPOLL
- int dpoll_fd;
- #endif
- #ifdef HAVE_SELECT
- fd_set master_rset; /* read set */
- fd_set master_wset; /* write set */
- int max_fd_select; /* maximum select used fd */
- #endif
- };
- typedef struct io_wait_handler io_wait_h;
- /* get the corresponding fd_map structure pointer */
- #define get_fd_map(h, fd) (&(h)->fd_hash[(fd)])
- /* remove a fd_map structure from the hash; the pointer must be returned
- * by get_fd_map or hash_fd_map*/
- #define unhash_fd_map(pfm) \
- do{ \
- (pfm)->type=0 /*F_NONE */; \
- (pfm)->fd=-1; \
- }while(0)
- /* add a fd_map structure to the fd hash */
- static inline struct fd_map* hash_fd_map( io_wait_h* h,
- int fd,
- short events,
- fd_type type,
- void* data)
- {
- h->fd_hash[fd].fd=fd;
- h->fd_hash[fd].events=events;
- h->fd_hash[fd].type=type;
- h->fd_hash[fd].data=data;
- return &h->fd_hash[fd];
- }
- #ifdef HANDLE_IO_INLINE
- /* generic handle io routine, this must be defined in the including file
- * (faster then registering a callback pointer)
- *
- * params: fm - pointer to a fd hash entry
- * events - combinations of POLLIN, POLLOUT, POLLERR & POLLHUP
- * idx - index in the fd_array (or -1 if not known)
- * return: -1 on error
- * 0 on EAGAIN or when by some other way it is known that no more
- * io events are queued on the fd (the receive buffer is empty).
- * Usefull to detect when there are no more io events queued for
- * sigio_rt, epoll_et, kqueue.
- * >0 on successfull read from the fd (when there might be more io
- * queued -- the receive buffer might still be non-empty)
- */
- inline static int handle_io(struct fd_map* fm, short events, int idx);
- #else
- int handle_io(struct fd_map* fm, short events, int idx);
- #endif
- #ifdef HAVE_KQUEUE
- /*
- * kqueue specific function: register a change
- * (adds a change to the kevent change array, and if full flushes it first)
- *
- * TODO: check if the event already exists in the change list or if it's
- * complementary to an event in the list (e.g. EVFILT_WRITE, EV_DELETE
- * and EVFILT_WRITE, EV_ADD for the same fd).
- * returns: -1 on error, 0 on success
- */
- static inline int kq_ev_change(io_wait_h* h, int fd, int filter, int flag,
- void* data)
- {
- int n;
- int r;
- struct timespec tspec;
- if (h->kq_nchanges>=h->kq_changes_size){
- /* changes array full ! */
- LOG(L_WARN, "WARNING: kq_ev_change: kqueue changes array full"
- " trying to flush...\n");
- tspec.tv_sec=0;
- tspec.tv_nsec=0;
- again:
- n=kevent(h->kq_fd, h->kq_changes, h->kq_nchanges, 0, 0, &tspec);
- if (unlikely(n == -1)){
- if (unlikely(errno == EINTR)) goto again;
- else {
- /* for a detailed explanation of what follows see
- io_wait_loop_kqueue EV_ERROR case */
- if (unlikely(!(errno == EBADF || errno == ENOENT)))
- BUG("kq_ev_change: kevent flush changes failed"
- " (unexpected error): %s [%d]\n",
- strerror(errno), errno);
- /* ignore error even if it's not a EBADF/ENOENT */
- /* one of the file descriptors is bad, probably already
- closed => try to apply changes one-by-one */
- for (r = 0; r < h->kq_nchanges; r++) {
- retry2:
- n = kevent(h->kq_fd, &h->kq_changes[r], 1, 0, 0, &tspec);
- if (n==-1) {
- if (unlikely(errno == EINTR))
- goto retry2;
- /* for a detailed explanation of what follows see
- io_wait_loop_kqueue EV_ERROR case */
- if (unlikely(!(errno == EBADF || errno == ENOENT)))
- BUG("kq_ev_change: kevent flush changes failed:"
- " (unexpected error) %s [%d] (%d/%lu)\n",
- strerror(errno), errno,
- r, (unsigned long)h->kq_nchanges);
- continue; /* skip over it */
- }
- }
- }
- }
- h->kq_nchanges=0; /* changes array is empty */
- }
- EV_SET(&h->kq_changes[h->kq_nchanges], fd, filter, flag, 0, 0,
- KEV_UDATA_CAST data);
- h->kq_nchanges++;
- return 0;
- }
- #endif
- /* generic io_watch_add function
- * Params:
- * h - pointer to initialized io_wait handle
- * fd - fd to watch
- * events - bitmap with the fd events for which the fd should be watched
- * (combination of POLLIN and POLLOUT)
- * type - fd type (non 0 value, returned in the call to handle_io)
- * data - pointer/private data returned in the handle_io call
- * returns 0 on success, -1 on error
- *
- * WARNING: handle_io() can be called immediately (from io_watch_add()) so
- * make sure that any dependent init. (e.g. data stuff) is made before
- * calling io_watch_add
- *
- * this version should be faster than pointers to poll_method specific
- * functions (it avoids functions calls, the overhead being only an extra
- * switch())*/
- inline static int io_watch_add( io_wait_h* h,
- int fd,
- short events,
- fd_type type,
- void* data)
- {
- /* helper macros */
- #define fd_array_setup(ev) \
- do{ \
- h->fd_array[h->fd_no].fd=fd; \
- h->fd_array[h->fd_no].events=(ev); /* useless for select */ \
- h->fd_array[h->fd_no].revents=0; /* useless for select */ \
- }while(0)
-
- #define set_fd_flags(f) \
- do{ \
- flags=fcntl(fd, F_GETFL); \
- if (flags==-1){ \
- LOG(L_ERR, "ERROR: io_watch_add: fnctl: GETFL failed:" \
- " %s [%d]\n", strerror(errno), errno); \
- goto error; \
- } \
- if (fcntl(fd, F_SETFL, flags|(f))==-1){ \
- LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETFL" \
- " failed: %s [%d]\n", strerror(errno), errno); \
- goto error; \
- } \
- }while(0)
-
-
- struct fd_map* e;
- int flags;
- #ifdef HAVE_EPOLL
- struct epoll_event ep_event;
- #endif
- #ifdef HAVE_DEVPOLL
- struct pollfd pfd;
- #endif
- #if defined(HAVE_SIGIO_RT) || defined (HAVE_EPOLL)
- int n;
- #endif
- #if defined(HAVE_SIGIO_RT)
- int idx;
- int check_io;
- struct pollfd pf;
-
- check_io=0; /* set to 1 if we need to check for pre-existing queued
- io/data on the fd */
- idx=-1;
- #endif
- e=0;
- /* sanity checks */
- if (unlikely(fd==-1)){
- LOG(L_CRIT, "BUG: io_watch_add: fd is -1!\n");
- goto error;
- }
- if (unlikely((events&(POLLIN|POLLOUT))==0)){
- LOG(L_CRIT, "BUG: io_watch_add: invalid events: 0x%0x\n", events);
- goto error;
- }
- /* check if not too big */
- if (unlikely(h->fd_no>=h->max_fd_no)){
- LOG(L_CRIT, "ERROR: io_watch_add: maximum fd number exceeded:"
- " %d/%d\n", h->fd_no, h->max_fd_no);
- goto error;
- }
- DBG("DBG: io_watch_add(%p, %d, %d, %p), fd_no=%d\n",
- h, fd, type, data, h->fd_no);
- /* hash sanity check */
- e=get_fd_map(h, fd);
- if (unlikely(e && (e->type!=0 /*F_NONE*/))){
- LOG(L_ERR, "ERROR: io_watch_add: trying to overwrite entry %d"
- " watched for %x in the hash(%d, %d, %p) with (%d, %d, %p)\n",
- fd, events, e->fd, e->type, e->data, fd, type, data);
- e=0;
- goto error;
- }
-
- if (unlikely((e=hash_fd_map(h, fd, events, type, data))==0)){
- LOG(L_ERR, "ERROR: io_watch_add: failed to hash the fd %d\n", fd);
- goto error;
- }
- switch(h->poll_method){ /* faster then pointer to functions */
- case POLL_POLL:
- #ifdef POLLRDHUP
- /* listen to POLLRDHUP by default (if POLLIN) */
- events|=((int)!(events & POLLIN) - 1) & POLLRDHUP;
- #endif /* POLLRDHUP */
- fd_array_setup(events);
- set_fd_flags(O_NONBLOCK);
- break;
- #ifdef HAVE_SELECT
- case POLL_SELECT:
- fd_array_setup(events);
- if (likely(events & POLLIN))
- FD_SET(fd, &h->master_rset);
- if (unlikely(events & POLLOUT))
- FD_SET(fd, &h->master_wset);
- if (h->max_fd_select<fd) h->max_fd_select=fd;
- break;
- #endif
- #ifdef HAVE_SIGIO_RT
- case POLL_SIGIO_RT:
- fd_array_setup(events);
- /* re-set O_ASYNC might be needed, if not done from
- * io_watch_del (or if somebody wants to add a fd which has
- * already O_ASYNC/F_SETSIG set on a duplicate)
- */
- /* set async & signal */
- if (fcntl(fd, F_SETOWN, my_pid())==-1){
- LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETOWN"
- " failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- if (fcntl(fd, F_SETSIG, h->signo)==-1){
- LOG(L_ERR, "ERROR: io_watch_add: fnctl: SETSIG"
- " failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- /* set both non-blocking and async */
- set_fd_flags(O_ASYNC| O_NONBLOCK);
- #ifdef EXTRA_DEBUG
- DBG("io_watch_add: sigio_rt on f %d, signal %d to pid %d\n",
- fd, h->signo, my_pid());
- #endif
- /* empty socket receive buffer, if buffer is already full
- * no more space to put packets
- * => no more signals are ever generated
- * also when moving fds, the freshly moved fd might have
- * already some bytes queued, we want to get them now
- * and not later -- andrei */
- idx=h->fd_no;
- check_io=1;
- break;
- #endif
- #ifdef HAVE_EPOLL
- case POLL_EPOLL_LT:
- ep_event.events=
- #ifdef POLLRDHUP
- /* listen for EPOLLRDHUP too */
- ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
- #else /* POLLRDHUP */
- (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
- #endif /* POLLRDHUP */
- (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
- ep_event.data.ptr=e;
- again1:
- n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
- if (unlikely(n==-1)){
- if (errno==EAGAIN) goto again1;
- LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- break;
- case POLL_EPOLL_ET:
- set_fd_flags(O_NONBLOCK);
- ep_event.events=
- #ifdef POLLRDHUP
- /* listen for EPOLLRDHUP too */
- ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
- #else /* POLLRDHUP */
- (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
- #endif /* POLLRDHUP */
- (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
- EPOLLET;
- ep_event.data.ptr=e;
- again2:
- n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
- if (unlikely(n==-1)){
- if (errno==EAGAIN) goto again2;
- LOG(L_ERR, "ERROR: io_watch_add: epoll_ctl failed: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- break;
- #endif
- #ifdef HAVE_KQUEUE
- case POLL_KQUEUE:
- if (likely( events & POLLIN)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e)==-1))
- goto error;
- }
- if (unlikely( events & POLLOUT)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
- {
- if (likely(events & POLLIN)){
- kq_ev_change(h, fd, EVFILT_READ, EV_DELETE, 0);
- }
- goto error;
- }
- }
- break;
- #endif
- #ifdef HAVE_DEVPOLL
- case POLL_DEVPOLL:
- pfd.fd=fd;
- pfd.events=events;
- pfd.revents=0;
- again_devpoll:
- if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
- if (errno==EAGAIN) goto again_devpoll;
- LOG(L_ERR, "ERROR: io_watch_add: /dev/poll write failed:"
- "%s [%d]\n", strerror(errno), errno);
- goto error;
- }
- break;
- #endif
-
- default:
- LOG(L_CRIT, "BUG: io_watch_add: no support for poll method "
- " %s (%d)\n", poll_method_str[h->poll_method],
- h->poll_method);
- goto error;
- }
-
- h->fd_no++; /* "activate" changes, for epoll/kqueue/devpoll it
- has only informative value */
- #if defined(HAVE_SIGIO_RT)
- if (check_io){
- /* handle possible pre-existing events */
- pf.fd=fd;
- pf.events=events;
- check_io_again:
- n=0;
- while(e->type && ((n=poll(&pf, 1, 0))>0) &&
- (handle_io(e, pf.revents, idx)>0) &&
- (pf.revents & (e->events|POLLERR|POLLHUP)));
- if (unlikely(e->type && (n==-1))){
- if (errno==EINTR) goto check_io_again;
- LOG(L_ERR, "ERROR: io_watch_add: check_io poll: %s [%d]\n",
- strerror(errno), errno);
- }
- }
- #endif
- return 0;
- error:
- if (e) unhash_fd_map(e);
- return -1;
- #undef fd_array_setup
- #undef set_fd_flags
- }
- #define IO_FD_CLOSING 16
- /* parameters: h - handler
- * fd - file descriptor
- * index - index in the fd_array if known, -1 if not
- * (if index==-1 fd_array will be searched for the
- * corresponding fd* entry -- slower but unavoidable in
- * some cases). index is not used (no fd_array) for epoll,
- * /dev/poll and kqueue
- * flags - optimization flags, e.g. IO_FD_CLOSING, the fd was
- * or will shortly be closed, in some cases we can avoid
- * extra remove operations (e.g.: epoll, kqueue, sigio)
- * returns 0 if ok, -1 on error */
- inline static int io_watch_del(io_wait_h* h, int fd, int idx, int flags)
- {
-
- #define fix_fd_array \
- do{\
- if (unlikely(idx==-1)){ \
- /* fix idx if -1 and needed */ \
- for (idx=0; (idx<h->fd_no) && \
- (h->fd_array[idx].fd!=fd); idx++); \
- } \
- if (likely(idx<h->fd_no)){ \
- memmove(&h->fd_array[idx], &h->fd_array[idx+1], \
- (h->fd_no-(idx+1))*sizeof(*(h->fd_array))); \
- if ((idx<=h->crt_fd_array_idx) && (h->crt_fd_array_idx>=0)) \
- h->crt_fd_array_idx--; \
- } \
- }while(0)
-
- struct fd_map* e;
- int events;
- #ifdef HAVE_EPOLL
- int n;
- struct epoll_event ep_event;
- #endif
- #ifdef HAVE_DEVPOLL
- struct pollfd pfd;
- #endif
- #ifdef HAVE_SIGIO_RT
- int fd_flags;
- #endif
-
- if (unlikely((fd<0) || (fd>=h->max_fd_no))){
- LOG(L_CRIT, "BUG: io_watch_del: invalid fd %d, not in [0, %d) \n",
- fd, h->fd_no);
- goto error;
- }
- DBG("DBG: io_watch_del (%p, %d, %d, 0x%x) fd_no=%d called\n",
- h, fd, idx, flags, h->fd_no);
- e=get_fd_map(h, fd);
- /* more sanity checks */
- if (unlikely(e==0)){
- LOG(L_CRIT, "BUG: io_watch_del: no corresponding hash entry for %d\n",
- fd);
- goto error;
- }
- if (unlikely(e->type==0 /*F_NONE*/)){
- LOG(L_ERR, "ERROR: io_watch_del: trying to delete already erased"
- " entry %d in the hash(%d, %d, %p) flags %x)\n",
- fd, e->fd, e->type, e->data, flags);
- goto error;
- }
- events=e->events;
-
- switch(h->poll_method){
- case POLL_POLL:
- fix_fd_array;
- break;
- #ifdef HAVE_SELECT
- case POLL_SELECT:
- if (likely(events & POLLIN))
- FD_CLR(fd, &h->master_rset);
- if (unlikely(events & POLLOUT))
- FD_CLR(fd, &h->master_wset);
- if (unlikely(h->max_fd_select && (h->max_fd_select==fd)))
- /* we don't know the prev. max, so we just decrement it */
- h->max_fd_select--;
- fix_fd_array;
- break;
- #endif
- #ifdef HAVE_SIGIO_RT
- case POLL_SIGIO_RT:
- /* the O_ASYNC flag must be reset all the time, the fd
- * can be changed only if O_ASYNC is reset (if not and
- * the fd is a duplicate, you will get signals from the dup. fd
- * and not from the original, even if the dup. fd was closed
- * and the signals re-set on the original) -- andrei
- */
- /*if (!(flags & IO_FD_CLOSING)){*/
- /* reset ASYNC */
- fd_flags=fcntl(fd, F_GETFL);
- if (unlikely(fd_flags==-1)){
- LOG(L_ERR, "ERROR: io_watch_del: fnctl: GETFL failed:"
- " %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- if (unlikely(fcntl(fd, F_SETFL, fd_flags&(~O_ASYNC))==-1)){
- LOG(L_ERR, "ERROR: io_watch_del: fnctl: SETFL"
- " failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- fix_fd_array; /* only on success */
- break;
- #endif
- #ifdef HAVE_EPOLL
- case POLL_EPOLL_LT:
- case POLL_EPOLL_ET:
- /* epoll doesn't seem to automatically remove sockets,
- * if the socket is a duplicate/moved and the original
- * is still open. The fd is removed from the epoll set
- * only when the original (and all the copies?) is/are
- * closed. This is probably a bug in epoll. --andrei */
- #ifdef EPOLL_NO_CLOSE_BUG
- if (!(flags & IO_FD_CLOSING)){
- #endif
- again_epoll:
- n=epoll_ctl(h->epfd, EPOLL_CTL_DEL, fd, &ep_event);
- if (unlikely(n==-1)){
- if (errno==EAGAIN) goto again_epoll;
- LOG(L_ERR, "ERROR: io_watch_del: removing fd from epoll "
- "list failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- #ifdef EPOLL_NO_CLOSE_BUG
- }
- #endif
- break;
- #endif
- #ifdef HAVE_KQUEUE
- case POLL_KQUEUE:
- if (!(flags & IO_FD_CLOSING)){
- if (likely(events & POLLIN)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
- EV_DELETE, 0) ==-1)){
- /* try to delete the write filter anyway */
- if (events & POLLOUT){
- kq_ev_change(h, fd, EVFILT_WRITE, EV_DELETE, 0);
- }
- goto error;
- }
- }
- if (unlikely(events & POLLOUT)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
- EV_DELETE, 0) ==-1))
- goto error;
- }
- }
- break;
- #endif
- #ifdef HAVE_DEVPOLL
- case POLL_DEVPOLL:
- /* for /dev/poll the closed fds _must_ be removed
- (they are not removed automatically on close()) */
- pfd.fd=fd;
- pfd.events=POLLREMOVE;
- pfd.revents=0;
- again_devpoll:
- if (write(h->dpoll_fd, &pfd, sizeof(pfd))==-1){
- if (errno==EINTR) goto again_devpoll;
- LOG(L_ERR, "ERROR: io_watch_del: removing fd from "
- "/dev/poll failed: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- break;
- #endif
- default:
- LOG(L_CRIT, "BUG: io_watch_del: no support for poll method "
- " %s (%d)\n", poll_method_str[h->poll_method],
- h->poll_method);
- goto error;
- }
- unhash_fd_map(e); /* only on success */
- h->fd_no--;
- return 0;
- error:
- return -1;
- #undef fix_fd_array
- }
- /* parameters: h - handler
- * fd - file descriptor
- * events - new events to watch for
- * idx - index in the fd_array if known, -1 if not
- * (if index==-1 fd_array will be searched for the
- * corresponding fd* entry -- slower but unavoidable in
- * some cases). index is not used (no fd_array) for epoll,
- * /dev/poll and kqueue
- * returns 0 if ok, -1 on error */
- inline static int io_watch_chg(io_wait_h* h, int fd, short events, int idx )
- {
-
- #define fd_array_chg(ev) \
- do{\
- if (unlikely(idx==-1)){ \
- /* fix idx if -1 and needed */ \
- for (idx=0; (idx<h->fd_no) && \
- (h->fd_array[idx].fd!=fd); idx++); \
- } \
- if (likely(idx<h->fd_no)){ \
- h->fd_array[idx].events=(ev); \
- } \
- }while(0)
-
- struct fd_map* e;
- int add_events;
- int del_events;
- #ifdef HAVE_DEVPOLL
- struct pollfd pfd;
- #endif
- #ifdef HAVE_EPOLL
- int n;
- struct epoll_event ep_event;
- #endif
-
- if (unlikely((fd<0) || (fd>=h->max_fd_no))){
- LOG(L_CRIT, "BUG: io_watch_chg: invalid fd %d, not in [0, %d) \n",
- fd, h->fd_no);
- goto error;
- }
- if (unlikely((events&(POLLIN|POLLOUT))==0)){
- LOG(L_CRIT, "BUG: io_watch_chg: invalid events: 0x%0x\n", events);
- goto error;
- }
- DBG("DBG: io_watch_chg (%p, %d, 0x%x, 0x%x) fd_no=%d called\n",
- h, fd, events, idx, h->fd_no);
- e=get_fd_map(h, fd);
- /* more sanity checks */
- if (unlikely(e==0)){
- LOG(L_CRIT, "BUG: io_watch_chg: no corresponding hash entry for %d\n",
- fd);
- goto error;
- }
- if (unlikely(e->type==0 /*F_NONE*/)){
- LOG(L_ERR, "ERROR: io_watch_chg: trying to change an already erased"
- " entry %d in the hash(%d, %d, %p) )\n",
- fd, e->fd, e->type, e->data);
- goto error;
- }
-
- add_events=events & ~e->events;
- del_events=e->events & ~events;
- switch(h->poll_method){
- case POLL_POLL:
- #ifdef POLLRDHUP
- fd_array_chg(events |
- /* listen to POLLRDHUP by default (if POLLIN) */
- (((int)!(events & POLLIN) - 1) & POLLRDHUP)
- );
- #else /* POLLRDHUP */
- fd_array_chg(events);
- #endif /* POLLRDHUP */
- break;
- #ifdef HAVE_SELECT
- case POLL_SELECT:
- fd_array_chg(events);
- if (unlikely(del_events & POLLIN))
- FD_CLR(fd, &h->master_rset);
- else if (unlikely(add_events & POLLIN))
- FD_SET(fd, &h->master_rset);
- if (likely(del_events & POLLOUT))
- FD_CLR(fd, &h->master_wset);
- else if (likely(add_events & POLLOUT))
- FD_SET(fd, &h->master_wset);
- break;
- #endif
- #ifdef HAVE_SIGIO_RT
- case POLL_SIGIO_RT:
- fd_array_chg(events);
- /* no need for check_io, since SIGIO_RT listens by default for all
- * the events */
- break;
- #endif
- #ifdef HAVE_EPOLL
- case POLL_EPOLL_LT:
- ep_event.events=
- #ifdef POLLRDHUP
- /* listen for EPOLLRDHUP too */
- ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
- #else /* POLLRDHUP */
- (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
- #endif /* POLLRDHUP */
- (EPOLLOUT & ((int)!(events & POLLOUT)-1) );
- ep_event.data.ptr=e;
- again_epoll_lt:
- n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
- if (unlikely(n==-1)){
- if (errno==EAGAIN) goto again_epoll_lt;
- LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
- " failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- break;
- case POLL_EPOLL_ET:
- ep_event.events=
- #ifdef POLLRDHUP
- /* listen for EPOLLRDHUP too */
- ((EPOLLIN|EPOLLRDHUP) & ((int)!(events & POLLIN)-1) ) |
- #else /* POLLRDHUP */
- (EPOLLIN & ((int)!(events & POLLIN)-1) ) |
- #endif /* POLLRDHUP */
- (EPOLLOUT & ((int)!(events & POLLOUT)-1) ) |
- EPOLLET;
- ep_event.data.ptr=e;
- again_epoll_et:
- n=epoll_ctl(h->epfd, EPOLL_CTL_MOD, fd, &ep_event);
- if (unlikely(n==-1)){
- if (errno==EAGAIN) goto again_epoll_et;
- LOG(L_ERR, "ERROR: io_watch_chg: modifying epoll events"
- " failed: %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- break;
- #endif
- #ifdef HAVE_KQUEUE
- case POLL_KQUEUE:
- if (unlikely(del_events & POLLIN)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_READ,
- EV_DELETE, 0) ==-1))
- goto error;
- }else if (unlikely(add_events & POLLIN)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_READ, EV_ADD, e) ==-1))
- goto error;
- }
- if (likely(del_events & POLLOUT)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE,
- EV_DELETE, 0) ==-1))
- goto error;
- }else if (likely(add_events & POLLOUT)){
- if (unlikely(kq_ev_change(h, fd, EVFILT_WRITE, EV_ADD, e)==-1))
- goto error;
- }
- break;
- #endif
- #ifdef HAVE_DEVPOLL
- case POLL_DEVPOLL:
- /* for /dev/poll the closed fds _must_ be removed
- (they are not removed automatically on close()) */
- pfd.fd=fd;
- pfd.events=POLLREMOVE;
- pfd.revents=0;
- again_devpoll1:
- if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
- if (errno==EINTR) goto again_devpoll1;
- LOG(L_ERR, "ERROR: io_watch_chg: removing fd from "
- "/dev/poll failed: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- again_devpoll2:
- pfd.events=events;
- pfd.revents=0;
- if (unlikely(write(h->dpoll_fd, &pfd, sizeof(pfd))==-1)){
- if (errno==EINTR) goto again_devpoll2;
- LOG(L_ERR, "ERROR: io_watch_chg: re-adding fd to "
- "/dev/poll failed: %s [%d]\n",
- strerror(errno), errno);
- /* error re-adding the fd => mark it as removed/unhash */
- unhash_fd_map(e);
- goto error;
- }
- break;
- #endif
- default:
- LOG(L_CRIT, "BUG: io_watch_chg: no support for poll method "
- " %s (%d)\n", poll_method_str[h->poll_method],
- h->poll_method);
- goto error;
- }
- e->events=events; /* only on success */
- return 0;
- error:
- return -1;
- #undef fix_fd_array
- }
- /* io_wait_loop_x style function.
- * wait for io using poll()
- * params: h - io_wait handle
- * t - timeout in s
- * repeat - if !=0 handle_io will be called until it returns <=0
- * returns: number of IO events handled on success (can be 0), -1 on error
- */
- inline static int io_wait_loop_poll(io_wait_h* h, int t, int repeat)
- {
- int n, r;
- int ret;
- struct fd_map* fm;
-
- again:
- ret=n=poll(h->fd_array, h->fd_no, t*1000);
- if (n==-1){
- if (errno==EINTR) goto again; /* signal, ignore it */
- else{
- LOG(L_ERR, "ERROR:io_wait_loop_poll: poll: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- }
- for (r=0; (r<h->fd_no) && n; r++){
- fm=get_fd_map(h, h->fd_array[r].fd);
- if (h->fd_array[r].revents & (fm->events|POLLERR|POLLHUP)){
- n--;
- /* sanity checks */
- if (unlikely((h->fd_array[r].fd >= h->max_fd_no)||
- (h->fd_array[r].fd < 0))){
- LOG(L_CRIT, "BUG: io_wait_loop_poll: bad fd %d "
- "(no in the 0 - %d range)\n",
- h->fd_array[r].fd, h->max_fd_no);
- /* try to continue anyway */
- h->fd_array[r].events=0; /* clear the events */
- continue;
- }
- h->crt_fd_array_idx=r;
- /* repeat handle_io if repeat, fd still watched (not deleted
- * inside handle_io), handle_io returns that there's still
- * IO and the fd is still watched for the triggering event */
- while(fm->type &&
- (handle_io(fm, h->fd_array[r].revents, r) > 0) &&
- repeat && ((fm->events|POLLERR|POLLHUP) &
- h->fd_array[r].revents));
- r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
- array shifting */
- }
- }
- error:
- return ret;
- }
- #ifdef HAVE_SELECT
- /* wait for io using select */
- inline static int io_wait_loop_select(io_wait_h* h, int t, int repeat)
- {
- fd_set sel_rset;
- fd_set sel_wset;
- int n, ret;
- struct timeval timeout;
- int r;
- struct fd_map* fm;
- int revents;
-
- again:
- sel_rset=h->master_rset;
- sel_wset=h->master_wset;
- timeout.tv_sec=t;
- timeout.tv_usec=0;
- ret=n=select(h->max_fd_select+1, &sel_rset, &sel_wset, 0, &timeout);
- if (n<0){
- if (errno==EINTR) goto again; /* just a signal */
- LOG(L_ERR, "ERROR: io_wait_loop_select: select: %s [%d]\n",
- strerror(errno), errno);
- n=0;
- /* continue */
- }
- /* use poll fd array */
- for(r=0; (r<h->fd_no) && n; r++){
- revents=0;
- if (likely(FD_ISSET(h->fd_array[r].fd, &sel_rset)))
- revents|=POLLIN;
- if (unlikely(FD_ISSET(h->fd_array[r].fd, &sel_wset)))
- revents|=POLLOUT;
- if (unlikely(revents)){
- h->crt_fd_array_idx=r;
- fm=get_fd_map(h, h->fd_array[r].fd);
- while(fm->type && (fm->events & revents) &&
- (handle_io(fm, revents, r)>0) && repeat);
- r=h->crt_fd_array_idx; /* can change due to io_watch_del(fd)
- array shifting */
- n--;
- }
- };
- return ret;
- }
- #endif
- #ifdef HAVE_EPOLL
- inline static int io_wait_loop_epoll(io_wait_h* h, int t, int repeat)
- {
- int n, r;
- struct fd_map* fm;
- int revents;
-
- again:
- n=epoll_wait(h->epfd, h->ep_array, h->fd_no, t*1000);
- if (unlikely(n==-1)){
- if (errno==EINTR) goto again; /* signal, ignore it */
- else{
- LOG(L_ERR, "ERROR:io_wait_loop_epoll: "
- "epoll_wait(%d, %p, %d, %d): %s [%d]\n",
- h->epfd, h->ep_array, h->fd_no, t*1000,
- strerror(errno), errno);
- goto error;
- }
- }
- #if 0
- if (n>1){
- for(r=0; r<n; r++){
- LOG(L_ERR, "WARNING: ep_array[%d]= %x, %p\n",
- r, h->ep_array[r].events, h->ep_array[r].data.ptr);
- }
- }
- #endif
- for (r=0; r<n; r++){
- revents= (POLLIN & (!(h->ep_array[r].events & (EPOLLIN|EPOLLPRI))
- -1)) |
- (POLLOUT & (!(h->ep_array[r].events & EPOLLOUT)-1)) |
- (POLLERR & (!(h->ep_array[r].events & EPOLLERR)-1)) |
- (POLLHUP & (!(h->ep_array[r].events & EPOLLHUP)-1))
- #ifdef POLLRDHUP
- | (POLLRDHUP & (!(h->ep_array[r].events & EPOLLRDHUP)-1))
- #endif
- ;
- if (likely(revents)){
- fm=(struct fd_map*)h->ep_array[r].data.ptr;
- while(fm->type && ((fm->events|POLLERR|POLLHUP) & revents) &&
- (handle_io(fm, revents, -1)>0) && repeat);
- }else{
- LOG(L_ERR, "ERROR:io_wait_loop_epoll: unexpected event %x"
- " on %d/%d, data=%p\n", h->ep_array[r].events,
- r+1, n, h->ep_array[r].data.ptr);
- }
- }
- error:
- return n;
- }
- #endif
- #ifdef HAVE_KQUEUE
- inline static int io_wait_loop_kqueue(io_wait_h* h, int t, int repeat)
- {
- int n, r;
- struct timespec tspec;
- struct fd_map* fm;
- int orig_changes;
- int apply_changes;
- int revents;
-
- tspec.tv_sec=t;
- tspec.tv_nsec=0;
- orig_changes=h->kq_nchanges;
- apply_changes=orig_changes;
- do {
- again:
- n=kevent(h->kq_fd, h->kq_changes, apply_changes, h->kq_array,
- h->kq_array_size, &tspec);
- if (unlikely(n==-1)){
- if (unlikely(errno==EINTR)) goto again; /* signal, ignore it */
- else {
- /* for a detailed explanation of what follows see below
- the EV_ERROR case */
- if (unlikely(!(errno==EBADF || errno==ENOENT)))
- BUG("io_wait_loop_kqueue: kevent: unexpected error"
- " %s [%d]\n", strerror(errno), errno);
- /* some of the FDs in kq_changes are bad (already closed)
- and there is not enough space in kq_array to return all
- of them back */
- apply_changes = h->kq_array_size;
- goto again;
- }
- }
- /* remove applied changes */
- h->kq_nchanges -= apply_changes;
- if (unlikely(apply_changes < orig_changes)) {
- orig_changes -= apply_changes;
- memmove(&h->kq_changes[0], &h->kq_changes[apply_changes],
- sizeof(h->kq_changes[0])*h->kq_nchanges);
- apply_changes = (orig_changes < h->kq_array_size) ? orig_changes :
- h->kq_array_size;
- } else {
- orig_changes = 0;
- apply_changes = 0;
- }
- for (r=0; r<n; r++){
- #ifdef EXTRA_DEBUG
- DBG("DBG: kqueue: event %d/%d: fd=%d, udata=%lx, flags=0x%x\n",
- r, n, h->kq_array[r].ident, (long)h->kq_array[r].udata,
- h->kq_array[r].flags);
- #endif
- if (unlikely((h->kq_array[r].flags & EV_ERROR) ||
- h->kq_array[r].udata == 0)){
- /* error in changes: we ignore it if it has to do with a
- bad fd or update==0. It can be caused by trying to remove an
- already closed fd: race between adding something to the
- changes array, close() and applying the changes (EBADF).
- E.g. for ser tcp: tcp_main sends a fd to child for reading
- => deletes it from the watched fds => the changes array
- will contain an EV_DELETE for it. Before the changes
- are applied (they are at the end of the main io_wait loop,
- after all the fd events were processed), a CON_ERR sent
- to tcp_main by a sender (send fail) is processed and causes
- the fd to be closed. When the changes are applied =>
- error for the EV_DELETE attempt of a closed fd.
- Something similar can happen when a fd is scheduled
- for removal, is close()'ed before being removed and
- re-opened(a new sock. get the same fd). When the
- watched fd changes will be applied the fd will be valid
- (so no EBADF), but it's not already watch => ENOENT.
- We report a BUG for the other errors (there's nothing
- constructive we can do if we get an error we don't know
- how to handle), but apart from that we ignore it in the
- idea that it is better apply the rest of the changes,
- rather then dropping all of them.
- */
- /*
- example EV_ERROR for trying to delete a read watched fd,
- that was already closed:
- {
- ident = 63, [fd]
- filter = -1, [EVFILT_READ]
- flags = 16384, [EV_ERROR]
- fflags = 0,
- data = 9, [errno = EBADF]
- udata = 0x0
- }
- */
- if (h->kq_array[r].data != EBADF &&
- h->kq_array[r].data != ENOENT)
- BUG("io_wait_loop_kqueue: kevent unexpected error on "
- "fd %ld udata %lx: %s [%ld]\n",
- (long)h->kq_array[r].ident,
- (long)h->kq_array[r].udata,
- strerror(h->kq_array[r].data),
- (long)h->kq_array[r].data);
- }else{
- fm=(struct fd_map*)h->kq_array[r].udata;
- if (likely(h->kq_array[r].filter==EVFILT_READ)){
- revents=POLLIN |
- (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
- (((int)!((h->kq_array[r].flags & EV_EOF) &&
- h->kq_array[r].fflags != 0) - 1)&POLLERR);
- while(fm->type && (fm->events & revents) &&
- (handle_io(fm, revents, -1)>0) && repeat);
- }else if (h->kq_array[r].filter==EVFILT_WRITE){
- revents=POLLOUT |
- (((int)!(h->kq_array[r].flags & EV_EOF)-1)&POLLHUP) |
- (((int)!((h->kq_array[r].flags & EV_EOF) &&
- h->kq_array[r].fflags != 0) - 1)&POLLERR);
- while(fm->type && (fm->events & revents) &&
- (handle_io(fm, revents, -1)>0) && repeat);
- }else{
- BUG("io_wait_loop_kqueue: unknown filter: kqueue: event "
- "%d/%d: fd=%d, filter=%d, flags=0x%x, fflags=0x%x,"
- " data=%lx, udata=%lx\n",
- r, n, (int)h->kq_array[r].ident, (int)h->kq_array[r].filter,
- h->kq_array[r].flags, h->kq_array[r].fflags,
- (unsigned long)h->kq_array[r].data,
- (unsigned long)h->kq_array[r].udata);
- }
- }
- }
- } while(unlikely(orig_changes));
- return n;
- }
- #endif
- #ifdef HAVE_SIGIO_RT
- /* sigio rt version has no repeat (it doesn't make sense)*/
- inline static int io_wait_loop_sigio_rt(io_wait_h* h, int t)
- {
- int n;
- int ret;
- struct timespec ts;
- siginfo_t siginfo;
- int sigio_band;
- int sigio_fd;
- struct fd_map* fm;
- int revents;
- #ifdef SIGINFO64_WORKARROUND
- int* pi;
- #endif
-
-
- ret=1; /* 1 event per call normally */
- ts.tv_sec=t;
- ts.tv_nsec=0;
- if (unlikely(!sigismember(&h->sset, h->signo) ||
- !sigismember(&h->sset, SIGIO))) {
- LOG(L_CRIT, "BUG: io_wait_loop_sigio_rt: the signal mask"
- " is not properly set!\n");
- goto error;
- }
- again:
- n=sigtimedwait(&h->sset, &siginfo, &ts);
- if (unlikely(n==-1)){
- if (errno==EINTR) goto again; /* some other signal, ignore it */
- else if (errno==EAGAIN){ /* timeout */
- ret=0;
- goto end;
- }else{
- LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: sigtimed_wait"
- " %s [%d]\n", strerror(errno), errno);
- goto error;
- }
- }
- if (likely(n!=SIGIO)){
- #ifdef SIGINFO64_WORKARROUND
- /* on linux siginfo.si_band is defined as long in userspace
- * and as int in kernel (< 2.6.5) => on 64 bits things will break!
- * (si_band will include si_fd, and si_fd will contain
- * garbage).
- * see /usr/src/linux/include/asm-generic/siginfo.h and
- * /usr/include/bits/siginfo.h
- * On newer kernels this is fixed (si_band is long in the kernel too).
- * -- andrei */
- if ((_os_ver<0x020605) && (sizeof(siginfo.si_band)>sizeof(int))){
- pi=(int*)(void*)&siginfo.si_band; /* avoid type punning warnings */
- sigio_band=*pi;
- sigio_fd=*(pi+1);
- }else
- #endif
- {
- sigio_band=siginfo.si_band;
- sigio_fd=siginfo.si_fd;
- }
- if (unlikely(siginfo.si_code==SI_SIGIO)){
- /* old style, we don't know the event (linux 2.2.?) */
- LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: old style sigio"
- " interface\n");
- fm=get_fd_map(h, sigio_fd);
- /* we can have queued signals generated by fds not watched
- * any more, or by fds in transition, to a child => ignore them*/
- if (fm->type)
- handle_io(fm, POLLIN|POLLOUT, -1);
- }else{
- /* si_code contains the SIGPOLL reason: POLL_IN, POLL_OUT,
- * POLL_MSG, POLL_ERR, POLL_PRI or POLL_HUP
- * and si_band the translated poll event bitmap:
- * POLLIN|POLLRDNORM (=POLL_IN),
- * POLLOUT|POLLWRNORM|POLLWRBAND (=POLL_OUT),
- * POLLIN|POLLRDNORM|POLLMSG (=POLL_MSG),
- * POLLERR (=POLL_ERR),
- * POLLPRI|POLLRDBAND (=POLL_PRI),
- * POLLHUP|POLLERR (=POLL_HUP)
- * [linux 2.6.22 fs/fcntl.c:447]
- */
- #ifdef EXTRA_DEBUG
- DBG("io_wait_loop_sigio_rt: siginfo: signal=%d (%d),"
- " si_code=%d, si_band=0x%x,"
- " si_fd=%d\n",
- siginfo.si_signo, n, siginfo.si_code,
- (unsigned)sigio_band,
- sigio_fd);
- #endif
- /* on some errors (e.g. when receving TCP RST), sigio_band will
- * be set to 0x08 (POLLERR) or 0x18 (POLLERR|POLLHUP - on stream
- * unix socket close) , so better catch all events --andrei */
- if (likely(sigio_band)){
- fm=get_fd_map(h, sigio_fd);
- revents=sigio_band;
- /* fix revents==POLLPRI case */
- revents |= (!(revents & POLLPRI)-1) & POLLIN;
- /* we can have queued signals generated by fds not watched
- * any more, or by fds in transition, to a child
- * => ignore them */
- if (fm->type && ((fm->events|POLLERR|POLLHUP) & revents))
- handle_io(fm, revents, -1);
- else
- DBG("WARNING: io_wait_loop_sigio_rt: ignoring event"
- " %x on fd %d, watching for %x, si_code=%x "
- "(fm->type=%d, fm->fd=%d, fm->data=%p)\n",
- sigio_band, sigio_fd, fm->events, siginfo.si_code,
- fm->type, fm->fd, fm->data);
- }else{
- LOG(L_ERR, "ERROR: io_wait_loop_sigio_rt: unexpected event"
- " on fd %d: %x\n", sigio_fd, sigio_band);
- }
- }
- }else{
- /* signal queue overflow
- * TODO: increase signal queue size: 2.4x /proc/.., 2.6x -rlimits */
- LOG(L_WARN, "WARNING: io_wait_loop_sigio_rt: signal queue overflowed"
- "- falling back to poll\n");
- /* clear real-time signal queue
- * both SIG_IGN and SIG_DFL are needed , it doesn't work
- * only with SIG_DFL */
- if (signal(h->signo, SIG_IGN)==SIG_ERR){
- LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to IGN\n");
- }
-
- if (signal(h->signo, SIG_DFL)==SIG_ERR){
- LOG(L_CRIT, "BUG: do_poll: couldn't reset signal to DFL\n");
- }
- /* falling back to normal poll */
- ret=io_wait_loop_poll(h, -1, 1);
- }
- end:
- return ret;
- error:
- return -1;
- }
- #endif
- #ifdef HAVE_DEVPOLL
- inline static int io_wait_loop_devpoll(io_wait_h* h, int t, int repeat)
- {
- int n, r;
- int ret;
- struct dvpoll dpoll;
- struct fd_map* fm;
- dpoll.dp_timeout=t*1000;
- dpoll.dp_nfds=h->fd_no;
- dpoll.dp_fds=h->fd_array;
- again:
- ret=n=ioctl(h->dpoll_fd, DP_POLL, &dpoll);
- if (unlikely(n==-1)){
- if (errno==EINTR) goto again; /* signal, ignore it */
- else{
- LOG(L_ERR, "ERROR:io_wait_loop_devpoll: ioctl: %s [%d]\n",
- strerror(errno), errno);
- goto error;
- }
- }
- for (r=0; r< n; r++){
- if (h->fd_array[r].revents & (POLLNVAL|POLLERR)){
- LOG(L_ERR, "ERROR: io_wait_loop_devpoll: pollinval returned"
- " for fd %d, revents=%x\n",
- h->fd_array[r].fd, h->fd_array[r].revents);
- }
- /* POLLIN|POLLHUP just go through */
- fm=get_fd_map(h, h->fd_array[r].fd);
- while(fm->type && (fm->events & h->fd_array[r].revents) &&
- (handle_io(fm, h->fd_array[r].revents, r) > 0) && repeat);
- }
- error:
- return ret;
- }
- #endif
- /* init */
- /* initializes the static vars/arrays
- * params: h - pointer to the io_wait_h that will be initialized
- * max_fd - maximum allowed fd number
- * poll_m - poll method (0 for automatic best fit)
- */
- int init_io_wait(io_wait_h* h, int max_fd, enum poll_types poll_method);
- /* destroys everything init_io_wait allocated */
- void destroy_io_wait(io_wait_h* h);
- #endif
|