tcp_main.c 126 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281
  1. /*
  2. * $Id$
  3. *
  4. * Copyright (C) 2001-2003 FhG Fokus
  5. *
  6. * This file is part of ser, a free SIP server.
  7. *
  8. * ser is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version
  12. *
  13. * For a license to use the ser software under conditions
  14. * other than those described here, or to purchase support for this
  15. * software, please contact iptel.org by e-mail at the following addresses:
  16. * [email protected]
  17. *
  18. * ser is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program; if not, write to the Free Software
  25. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  26. */
  27. /*
  28. * History:
  29. * --------
  30. * 2002-11-29 created by andrei
  31. * 2002-12-11 added tcp_send (andrei)
  32. * 2003-01-20 locking fixes, hashtables (andrei)
  33. * 2003-02-20 s/lock_t/gen_lock_t/ to avoid a conflict on solaris (andrei)
  34. * 2003-02-25 Nagle is disabled if -DDISABLE_NAGLE (andrei)
  35. * 2003-03-29 SO_REUSEADDR before calling bind to allow
  36. * server restart, Nagle set on the (hopefuly)
  37. * correct socket (jiri)
  38. * 2003-03-31 always try to find the corresponding tcp listen socket for
  39. * a temp. socket and store in in *->bind_address: added
  40. * find_tcp_si, modified tcpconn_connect (andrei)
  41. * 2003-04-14 set sockopts to TOS low delay (andrei)
  42. * 2003-06-30 moved tcp new connect checking & handling to
  43. * handle_new_connect (andrei)
  44. * 2003-07-09 tls_close called before closing the tcp connection (andrei)
  45. * 2003-10-24 converted to the new socket_info lists (andrei)
  46. * 2003-10-27 tcp port aliases support added (andrei)
  47. * 2003-11-04 always lock before manipulating refcnt; sendchild
  48. * does not inc refcnt by itself anymore (andrei)
  49. * 2003-11-07 different unix sockets are used for fd passing
  50. * to/from readers/writers (andrei)
  51. * 2003-11-17 handle_new_connect & tcp_connect will close the
  52. * new socket if tcpconn_new return 0 (e.g. out of mem) (andrei)
  53. * 2003-11-28 tcp_blocking_write & tcp_blocking_connect added (andrei)
  54. * 2004-11-08 dropped find_tcp_si and replaced with find_si (andrei)
  55. * 2005-06-07 new tcp optimized code, supports epoll (LT), sigio + real time
  56. * signals, poll & select (andrei)
  57. * 2005-06-26 *bsd kqueue support (andrei)
  58. * 2005-07-04 solaris /dev/poll support (andrei)
  59. * 2005-07-08 tcp_max_connections, tcp_connection_lifetime, don't accept
  60. * more connections if tcp_max_connections is exceeded (andrei)
  61. * 2005-10-21 cleanup all the open connections on exit
  62. * decrement the no. of open connections on timeout too (andrei) * 2006-01-30 queue send_fd request and execute them at the end of the
  63. * poll loop (#ifdef) (andrei)
  64. * process all children requests, before attempting to send
  65. * them new stuff (fixes some deadlocks) (andrei)
  66. * 2006-02-03 timers are run only once per s (andrei)
  67. * tcp children fds can be non-blocking; send fds are queued on
  68. * EAGAIN; lots of bug fixes (andrei)
  69. * 2006-02-06 better tcp_max_connections checks, tcp_connections_no moved to
  70. * shm (andrei)
  71. * 2006-04-12 tcp_send() changed to use struct dest_info (andrei)
  72. * 2006-11-02 switched to atomic ops for refcnt, locking improvements
  73. * (andrei)
  74. * 2006-11-04 switched to raw ticks (to fix conversion errors which could
  75. * result in inf. lifetime) (andrei)
  76. * 2007-07-25 tcpconn_connect can now bind the socket on a specified
  77. * source addr/port (andrei)
  78. * 2007-07-26 tcp_send() and tcpconn_get() can now use a specified source
  79. * addr./port (andrei)
  80. * 2007-08-23 getsockname() for INADDR_ANY(SI_IS_ANY) sockets (andrei)
  81. * 2007-08-27 split init_sock_opt into a lightweight init_sock_opt_accept()
  82. * used when accepting connections and init_sock_opt used for
  83. * connect/ new sockets (andrei)
  84. * 2007-11-22 always add the connection & clear the coresponding flags before
  85. * io_watch_add-ing its fd - it's safer this way (andrei)
  86. * 2007-11-26 improved tcp timers: switched to local_timer (andrei)
  87. * 2007-11-27 added send fd cache and reader fd reuse (andrei)
  88. * 2007-11-28 added support for TCP_DEFER_ACCEPT, KEEPALIVE, KEEPINTVL,
  89. * KEEPCNT, QUICKACK, SYNCNT, LINGER2 (andrei)
  90. * 2007-12-04 support for queueing write requests (andrei)
  91. * 2007-12-12 destroy connection asap on wbuf. timeout (andrei)
  92. * 2007-12-13 changed the refcnt and destroy scheme, now refcnt is 1 if
  93. * linked into the hash tables (was 0) (andrei)
  94. * 2007-12-21 support for pending connects (connections are added to the
  95. * hash immediately and writes on them are buffered) (andrei)
  96. * 2008-02-05 handle POLLRDHUP (if supported), POLLERR and
  97. * POLLHUP (andrei)
  98. * on write error check if there's still data in the socket
  99. * read buffer and process it first (andrei)
  100. * 2009-02-26 direct blacklist support (andrei)
  101. * 2009-03-20 s/wq_timeout/send_timeout ; send_timeout is now in ticks
  102. * (andrei)
  103. * 2009-04-09 tcp ev and tcp stats macros added (andrei)
  104. * 2009-09-15 support for force connection reuse and close after send
  105. * send flags (andrei)
  106. */
  107. /*!
  108. * \file
  109. * \brief SIP-router core ::
  110. * \ingroup core
  111. * Module: \ref core
  112. */
  113. #ifdef USE_TCP
  114. #ifndef SHM_MEM
  115. #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
  116. #endif
  117. #define HANDLE_IO_INLINE
  118. #include "io_wait.h" /* include first to make sure the needed features are
  119. turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
  120. #include <sys/time.h>
  121. #include <sys/types.h>
  122. #include <sys/select.h>
  123. #include <sys/socket.h>
  124. #ifdef HAVE_FILIO_H
  125. #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
  126. #elif defined __OS_solaris
  127. #define BSD_COMP /* needed on older solaris for FIONREAD */
  128. #endif /* HAVE_FILIO_H / __OS_solaris */
  129. #include <sys/ioctl.h> /* ioctl() used on write error */
  130. #include <netinet/in.h>
  131. #include <netinet/in_systm.h>
  132. #include <netinet/ip.h>
  133. #include <netinet/tcp.h>
  134. #include <sys/uio.h> /* writev*/
  135. #include <netdb.h>
  136. #include <stdlib.h> /*exit() */
  137. #include <unistd.h>
  138. #include <errno.h>
  139. #include <string.h>
  140. #ifdef HAVE_SELECT
  141. #include <sys/select.h>
  142. #endif
  143. #include <sys/poll.h>
  144. #include "ip_addr.h"
  145. #include "pass_fd.h"
  146. #include "tcp_conn.h"
  147. #include "globals.h"
  148. #include "pt.h"
  149. #include "locking.h"
  150. #include "mem/mem.h"
  151. #include "mem/shm_mem.h"
  152. #include "timer.h"
  153. #include "sr_module.h"
  154. #include "tcp_server.h"
  155. #include "tcp_init.h"
  156. #include "tcp_stats.h"
  157. #include "tcp_ev.h"
  158. #include "tsend.h"
  159. #include "timer_ticks.h"
  160. #include "local_timer.h"
  161. #ifdef CORE_TLS
  162. #include "tls/tls_server.h"
  163. #define tls_loaded() 1
  164. #else
  165. #include "tls_hooks_init.h"
  166. #include "tls_hooks.h"
  167. #endif /* CORE_TLS*/
  168. #ifdef USE_DST_BLACKLIST
  169. #include "dst_blacklist.h"
  170. #endif /* USE_DST_BLACKLIST */
  171. #include "tcp_info.h"
  172. #include "tcp_options.h"
  173. #include "ut.h"
  174. #include "cfg/cfg_struct.h"
  175. #define local_malloc pkg_malloc
  176. #define local_free pkg_free
  177. #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
  178. #ifdef NO_MSG_DONTWAIT
  179. #ifndef MSG_DONTWAIT
  180. /* should work inside tcp_main */
  181. #define MSG_DONTWAIT 0
  182. #endif
  183. #endif /*NO_MSG_DONTWAIT */
  184. #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
  185. immediately to a child, wait for
  186. some data on it first */
  187. #define TCP_LISTEN_BACKLOG 1024
  188. #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending
  189. them immediately */
  190. #define TCP_CHILD_NON_BLOCKING
  191. #ifdef SEND_FD_QUEUE
  192. #ifndef TCP_CHILD_NON_BLOCKING
  193. #define TCP_CHILD_NON_BLOCKING
  194. #endif
  195. #define MAX_SEND_FD_QUEUE_SIZE tcp_main_max_fd_no
  196. #define SEND_FD_QUEUE_SIZE 128 /* initial size */
  197. #define MAX_SEND_FD_RETRIES 96 /* FIXME: not used for now */
  198. #define SEND_FD_QUEUE_TIMEOUT MS_TO_TICKS(2000) /* 2 s */
  199. #endif
  200. /* minimum interval local_timer_run() is allowed to run, in ticks */
  201. #define TCPCONN_TIMEOUT_MIN_RUN 1 /* once per tick */
  202. #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
  203. #ifdef TCP_ASYNC
  204. static unsigned int* tcp_total_wq=0;
  205. #endif
  206. enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
  207. F_TCPCONN, F_TCPCHILD, F_PROC };
  208. #ifdef TCP_FD_CACHE
  209. #define TCP_FD_CACHE_SIZE 8
  210. struct fd_cache_entry{
  211. struct tcp_connection* con;
  212. int id;
  213. int fd;
  214. };
  215. static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
  216. #endif /* TCP_FD_CACHE */
  217. static int is_tcp_main=0;
  218. enum poll_types tcp_poll_method=0; /* by default choose the best method */
  219. int tcp_main_max_fd_no=0;
  220. int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
  221. static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
  222. static union sockaddr_union* tcp_source_ipv4=0;
  223. #ifdef USE_IPV6
  224. static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
  225. static union sockaddr_union* tcp_source_ipv6=0;
  226. #endif
  227. static int* tcp_connections_no=0; /* current open connections */
  228. /* connection hash table (after ip&port) , includes also aliases */
  229. struct tcp_conn_alias** tcpconn_aliases_hash=0;
  230. /* connection hash table (after connection id) */
  231. struct tcp_connection** tcpconn_id_hash=0;
  232. gen_lock_t* tcpconn_lock=0;
  233. struct tcp_child* tcp_children;
  234. static int* connection_id=0; /* unique for each connection, used for
  235. quickly finding the corresponding connection
  236. for a reply */
  237. int unix_tcp_sock;
  238. static int tcp_proto_no=-1; /* tcp protocol number as returned by
  239. getprotobyname */
  240. static io_wait_h io_h;
  241. static struct local_timer tcp_main_ltimer;
  242. static ticks_t tcp_main_prev_ticks;
  243. static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
  244. inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
  245. struct ip_addr* l_ip, int l_port,
  246. int flags);
  247. /* sets source address used when opening new sockets and no source is specified
  248. * (by default the address is choosen by the kernel)
  249. * Should be used only on init.
  250. * returns -1 on error */
  251. int tcp_set_src_addr(struct ip_addr* ip)
  252. {
  253. switch (ip->af){
  254. case AF_INET:
  255. ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
  256. tcp_source_ipv4=&tcp_source_ipv4_addr;
  257. break;
  258. #ifdef USE_IPV6
  259. case AF_INET6:
  260. ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
  261. tcp_source_ipv6=&tcp_source_ipv6_addr;
  262. break;
  263. #endif
  264. default:
  265. return -1;
  266. }
  267. return 0;
  268. }
  269. static inline int init_sock_keepalive(int s)
  270. {
  271. int optval;
  272. #ifdef HAVE_SO_KEEPALIVE
  273. if (cfg_get(tcp, tcp_cfg, keepalive)){
  274. optval=1;
  275. if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
  276. sizeof(optval))<0){
  277. LOG(L_WARN, "WARNING: init_sock_keepalive: failed to enable"
  278. " SO_KEEPALIVE: %s\n", strerror(errno));
  279. return -1;
  280. }
  281. }
  282. #endif
  283. #ifdef HAVE_TCP_KEEPINTVL
  284. if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
  285. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
  286. sizeof(optval))<0){
  287. LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
  288. " keepalive probes interval: %s\n", strerror(errno));
  289. }
  290. }
  291. #endif
  292. #ifdef HAVE_TCP_KEEPIDLE
  293. if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
  294. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
  295. sizeof(optval))<0){
  296. LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
  297. " keepalive idle interval: %s\n", strerror(errno));
  298. }
  299. }
  300. #endif
  301. #ifdef HAVE_TCP_KEEPCNT
  302. if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
  303. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
  304. sizeof(optval))<0){
  305. LOG(L_WARN, "WARNING: init_sock_keepalive: failed to set"
  306. " maximum keepalive count: %s\n", strerror(errno));
  307. }
  308. }
  309. #endif
  310. return 0;
  311. }
  312. /* set all socket/fd options for new sockets (e.g. before connect):
  313. * disable nagle, tos lowdelay, reuseaddr, non-blocking
  314. *
  315. * return -1 on error */
  316. static int init_sock_opt(int s)
  317. {
  318. int flags;
  319. int optval;
  320. #ifdef DISABLE_NAGLE
  321. flags=1;
  322. if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
  323. &flags, sizeof(flags))<0) ){
  324. LOG(L_WARN, "WARNING: init_sock_opt: could not disable Nagle: %s\n",
  325. strerror(errno));
  326. }
  327. #endif
  328. /* tos*/
  329. optval = tos;
  330. if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,sizeof(optval)) ==-1){
  331. LOG(L_WARN, "WARNING: init_sock_opt: setsockopt tos: %s\n",
  332. strerror(errno));
  333. /* continue since this is not critical */
  334. }
  335. #if !defined(TCP_DONT_REUSEADDR)
  336. optval=1;
  337. if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
  338. (void*)&optval, sizeof(optval))==-1){
  339. LOG(L_ERR, "ERROR: setsockopt SO_REUSEADDR %s\n",
  340. strerror(errno));
  341. /* continue, not critical */
  342. }
  343. #endif /* !TCP_DONT_REUSEADDR */
  344. #ifdef HAVE_TCP_SYNCNT
  345. if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
  346. if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
  347. sizeof(optval))<0){
  348. LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
  349. " maximum SYN retr. count: %s\n", strerror(errno));
  350. }
  351. }
  352. #endif
  353. #ifdef HAVE_TCP_LINGER2
  354. if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
  355. if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
  356. sizeof(optval))<0){
  357. LOG(L_WARN, "WARNING: init_sock_opt: failed to set"
  358. " maximum LINGER2 timeout: %s\n", strerror(errno));
  359. }
  360. }
  361. #endif
  362. #ifdef HAVE_TCP_QUICKACK
  363. if (cfg_get(tcp, tcp_cfg, delayed_ack)){
  364. optval=0; /* reset quick ack => delayed ack */
  365. if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
  366. sizeof(optval))<0){
  367. LOG(L_WARN, "WARNING: init_sock_opt: failed to reset"
  368. " TCP_QUICKACK: %s\n", strerror(errno));
  369. }
  370. }
  371. #endif /* HAVE_TCP_QUICKACK */
  372. init_sock_keepalive(s);
  373. /* non-blocking */
  374. flags=fcntl(s, F_GETFL);
  375. if (flags==-1){
  376. LOG(L_ERR, "ERROR: init_sock_opt: fnctl failed: (%d) %s\n",
  377. errno, strerror(errno));
  378. goto error;
  379. }
  380. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  381. LOG(L_ERR, "ERROR: init_sock_opt: fcntl: set non-blocking failed:"
  382. " (%d) %s\n", errno, strerror(errno));
  383. goto error;
  384. }
  385. return 0;
  386. error:
  387. return -1;
  388. }
  389. /* set all socket/fd options for "accepted" sockets
  390. * only nonblocking is set since the rest is inherited from the
  391. * "parent" (listening) socket
  392. * Note: setting O_NONBLOCK is required on linux but it's not needed on
  393. * BSD and possibly solaris (where the flag is inherited from the
  394. * parent socket). However since there is no standard document
  395. * requiring a specific behaviour in this case it's safer to always set
  396. * it (at least for now) --andrei
  397. * TODO: check on which OSes O_NONBLOCK is inherited and make this
  398. * function a nop.
  399. *
  400. * return -1 on error */
  401. static int init_sock_opt_accept(int s)
  402. {
  403. int flags;
  404. /* non-blocking */
  405. flags=fcntl(s, F_GETFL);
  406. if (flags==-1){
  407. LOG(L_ERR, "ERROR: init_sock_opt_accept: fnctl failed: (%d) %s\n",
  408. errno, strerror(errno));
  409. goto error;
  410. }
  411. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  412. LOG(L_ERR, "ERROR: init_sock_opt_accept: "
  413. "fcntl: set non-blocking failed: (%d) %s\n",
  414. errno, strerror(errno));
  415. goto error;
  416. }
  417. return 0;
  418. error:
  419. return -1;
  420. }
  421. /* blocking connect on a non-blocking fd; it will timeout after
  422. * tcp_connect_timeout
  423. * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
  424. * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
  425. */
  426. static int tcp_blocking_connect(int fd, int type,
  427. const struct sockaddr *servaddr,
  428. socklen_t addrlen)
  429. {
  430. int n;
  431. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  432. fd_set sel_set;
  433. fd_set orig_set;
  434. struct timeval timeout;
  435. #else
  436. struct pollfd pf;
  437. #endif
  438. int elapsed;
  439. int to;
  440. int ticks;
  441. int err;
  442. unsigned int err_len;
  443. int poll_err;
  444. poll_err=0;
  445. to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
  446. ticks=get_ticks();
  447. again:
  448. n=connect(fd, servaddr, addrlen);
  449. if (n==-1){
  450. if (errno==EINTR){
  451. elapsed=(get_ticks()-ticks)*TIMER_TICK;
  452. if (elapsed<to) goto again;
  453. else goto error_timeout;
  454. }
  455. if (errno!=EINPROGRESS && errno!=EALREADY){
  456. goto error_errno;
  457. }
  458. }else goto end;
  459. /* poll/select loop */
  460. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  461. FD_ZERO(&orig_set);
  462. FD_SET(fd, &orig_set);
  463. #else
  464. pf.fd=fd;
  465. pf.events=POLLOUT;
  466. #endif
  467. while(1){
  468. elapsed=(get_ticks()-ticks)*TIMER_TICK;
  469. if (elapsed>=to)
  470. goto error_timeout;
  471. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  472. sel_set=orig_set;
  473. timeout.tv_sec=to-elapsed;
  474. timeout.tv_usec=0;
  475. n=select(fd+1, 0, &sel_set, 0, &timeout);
  476. #else
  477. n=poll(&pf, 1, (to-elapsed)*1000);
  478. #endif
  479. if (n<0){
  480. if (errno==EINTR) continue;
  481. LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll/select failed:"
  482. " (%d) %s\n",
  483. su2a((union sockaddr_union*)servaddr, addrlen),
  484. errno, strerror(errno));
  485. goto error;
  486. }else if (n==0) /* timeout */ continue;
  487. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  488. if (FD_ISSET(fd, &sel_set))
  489. #else
  490. if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){
  491. LOG(L_ERR, "ERROR: tcp_blocking_connect %s: poll error: "
  492. "flags %x\n",
  493. su2a((union sockaddr_union*)servaddr, addrlen),
  494. pf.revents);
  495. poll_err=1;
  496. }
  497. #endif
  498. {
  499. err_len=sizeof(err);
  500. getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
  501. if ((err==0) && (poll_err==0)) goto end;
  502. if (err!=EINPROGRESS && err!=EALREADY){
  503. LOG(L_ERR, "ERROR: tcp_blocking_connect %s: SO_ERROR (%d) "
  504. "%s\n",
  505. su2a((union sockaddr_union*)servaddr, addrlen),
  506. err, strerror(err));
  507. errno=err;
  508. goto error_errno;
  509. }
  510. }
  511. }
  512. error_errno:
  513. switch(errno){
  514. case ENETUNREACH:
  515. case EHOSTUNREACH:
  516. #ifdef USE_DST_BLACKLIST
  517. if (cfg_get(core, core_cfg, use_dst_blacklist))
  518. dst_blacklist_su(BLST_ERR_CONNECT, type,
  519. (union sockaddr_union*)servaddr, 0);
  520. #endif /* USE_DST_BLACKLIST */
  521. TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
  522. (union sockaddr_union*)servaddr, type);
  523. break;
  524. case ETIMEDOUT:
  525. #ifdef USE_DST_BLACKLIST
  526. if (cfg_get(core, core_cfg, use_dst_blacklist))
  527. dst_blacklist_su(BLST_ERR_CONNECT, type,
  528. (union sockaddr_union*)servaddr, 0);
  529. #endif /* USE_DST_BLACKLIST */
  530. TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
  531. (union sockaddr_union*)servaddr, type);
  532. break;
  533. case ECONNREFUSED:
  534. case ECONNRESET:
  535. #ifdef USE_DST_BLACKLIST
  536. if (cfg_get(core, core_cfg, use_dst_blacklist))
  537. dst_blacklist_su(BLST_ERR_CONNECT, type,
  538. (union sockaddr_union*)servaddr, 0);
  539. #endif /* USE_DST_BLACKLIST */
  540. TCP_EV_CONNECT_RST(errno, 0, 0,
  541. (union sockaddr_union*)servaddr, type);
  542. break;
  543. case EAGAIN: /* not posix, but supported on linux and bsd */
  544. TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
  545. (union sockaddr_union*)servaddr, type);
  546. break;
  547. default:
  548. TCP_EV_CONNECT_ERR(errno, 0, 0,
  549. (union sockaddr_union*)servaddr, type);
  550. }
  551. LOG(L_ERR, "ERROR: tcp_blocking_connect %s: (%d) %s\n",
  552. su2a((union sockaddr_union*)servaddr, addrlen),
  553. errno, strerror(errno));
  554. goto error;
  555. error_timeout:
  556. /* timeout */
  557. #ifdef USE_DST_BLACKLIST
  558. if (cfg_get(core, core_cfg, use_dst_blacklist))
  559. dst_blacklist_su(BLST_ERR_CONNECT, type,
  560. (union sockaddr_union*)servaddr, 0);
  561. #endif /* USE_DST_BLACKLIST */
  562. TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
  563. LOG(L_ERR, "ERROR: tcp_blocking_connect %s: timeout %d s elapsed "
  564. "from %d s\n", su2a((union sockaddr_union*)servaddr, addrlen),
  565. elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
  566. error:
  567. TCP_STATS_CONNECT_FAILED();
  568. return -1;
  569. end:
  570. return 0;
  571. }
  572. inline static int _tcpconn_write_nb(int fd, struct tcp_connection* c,
  573. char* buf, int len);
  574. #ifdef TCP_ASYNC
  575. /* unsafe version */
  576. #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
  577. /* unsafe version */
  578. #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
  579. /* unsafe version, call while holding the connection write lock */
  580. inline static int _wbufq_add(struct tcp_connection* c, char* data,
  581. unsigned int size)
  582. {
  583. struct tcp_wbuffer_queue* q;
  584. struct tcp_wbuffer* wb;
  585. unsigned int last_free;
  586. unsigned int wb_size;
  587. unsigned int crt_size;
  588. ticks_t t;
  589. q=&c->wbuf_q;
  590. t=get_ticks_raw();
  591. if (unlikely( ((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
  592. ((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
  593. (q->first &&
  594. TICKS_LT(q->wr_timeout, t)) )){
  595. LOG(L_ERR, "ERROR: wbufq_add(%d bytes): write queue full or timeout "
  596. " (%d, total %d, last write %d s ago)\n",
  597. size, q->queued, *tcp_total_wq,
  598. TICKS_TO_S(t-q->wr_timeout-
  599. cfg_get(tcp, tcp_cfg, send_timeout)));
  600. if (q->first && TICKS_LT(q->wr_timeout, t)){
  601. if (unlikely(c->state==S_CONN_CONNECT)){
  602. #ifdef USE_DST_BLACKLIST
  603. if (likely(cfg_get(core, core_cfg, use_dst_blacklist))){
  604. DBG("blacklisting, state=%d\n", c->state);
  605. dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
  606. &c->rcv.src_su, 0);
  607. }
  608. #endif /* USE_DST_BLACKLIST */
  609. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
  610. TCP_PSU(c), TCP_PROTO(c));
  611. TCP_STATS_CONNECT_FAILED();
  612. }else{
  613. #ifdef USE_DST_BLACKLIST
  614. if (likely(cfg_get(core, core_cfg, use_dst_blacklist))){
  615. DBG("blacklisting, state=%d\n", c->state);
  616. dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
  617. &c->rcv.src_su, 0);
  618. }
  619. #endif /* USE_DST_BLACKLIST */
  620. TCP_EV_SEND_TIMEOUT(0, &c->rcv);
  621. TCP_STATS_SEND_TIMEOUT();
  622. }
  623. }else{
  624. /* if it's not a timeout => queue full */
  625. TCP_EV_SENDQ_FULL(0, &c->rcv);
  626. TCP_STATS_SENDQ_FULL();
  627. }
  628. goto error;
  629. }
  630. if (unlikely(q->last==0)){
  631. wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
  632. wb=shm_malloc(sizeof(*wb)+wb_size-1);
  633. if (unlikely(wb==0))
  634. goto error;
  635. wb->b_size=wb_size;
  636. wb->next=0;
  637. q->last=wb;
  638. q->first=wb;
  639. q->last_used=0;
  640. q->offset=0;
  641. q->wr_timeout=get_ticks_raw()+
  642. ((c->state==S_CONN_CONNECT)?
  643. S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
  644. cfg_get(tcp, tcp_cfg, send_timeout));
  645. }else{
  646. wb=q->last;
  647. }
  648. while(size){
  649. last_free=wb->b_size-q->last_used;
  650. if (last_free==0){
  651. wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
  652. wb=shm_malloc(sizeof(*wb)+wb_size-1);
  653. if (unlikely(wb==0))
  654. goto error;
  655. wb->b_size=wb_size;
  656. wb->next=0;
  657. q->last->next=wb;
  658. q->last=wb;
  659. q->last_used=0;
  660. last_free=wb->b_size;
  661. }
  662. crt_size=MIN_unsigned(last_free, size);
  663. memcpy(wb->buf+q->last_used, data, crt_size);
  664. q->last_used+=crt_size;
  665. size-=crt_size;
  666. data+=crt_size;
  667. q->queued+=crt_size;
  668. atomic_add_int((int*)tcp_total_wq, crt_size);
  669. }
  670. return 0;
  671. error:
  672. return -1;
  673. }
  674. /* unsafe version, call while holding the connection write lock
  675. * inserts data at the beginning, it ignores the max queue size checks and
  676. * the timeout (use sparingly)
  677. * Note: it should never be called on a write buffer after wbufq_run() */
  678. inline static int _wbufq_insert(struct tcp_connection* c, char* data,
  679. unsigned int size)
  680. {
  681. struct tcp_wbuffer_queue* q;
  682. struct tcp_wbuffer* wb;
  683. q=&c->wbuf_q;
  684. if (likely(q->first==0)) /* if empty, use wbufq_add */
  685. return _wbufq_add(c, data, size);
  686. if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
  687. LOG(L_ERR, "ERROR: wbufq_insert(%d bytes): write queue full"
  688. " (%d, total %d, last write %d s ago)\n",
  689. size, q->queued, *tcp_total_wq,
  690. TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
  691. cfg_get(tcp, tcp_cfg, send_timeout)));
  692. goto error;
  693. }
  694. if (unlikely(q->offset)){
  695. LOG(L_CRIT, "BUG: wbufq_insert: non-null offset %d (bad call, should"
  696. "never be called after the wbufq_run())\n", q->offset);
  697. goto error;
  698. }
  699. if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
  700. /* one block with enough space in it for size bytes */
  701. memmove(q->first->buf+size, q->first->buf, size);
  702. memcpy(q->first->buf, data, size);
  703. q->last_used+=size;
  704. }else{
  705. /* create a size bytes block directly */
  706. wb=shm_malloc(sizeof(*wb)+size-1);
  707. if (unlikely(wb==0))
  708. goto error;
  709. wb->b_size=size;
  710. /* insert it */
  711. wb->next=q->first;
  712. q->first=wb;
  713. memcpy(wb->buf, data, size);
  714. }
  715. q->queued+=size;
  716. atomic_add_int((int*)tcp_total_wq, size);
  717. return 0;
  718. error:
  719. return -1;
  720. }
  721. /* unsafe version, call while holding the connection write lock */
  722. inline static void _wbufq_destroy( struct tcp_wbuffer_queue* q)
  723. {
  724. struct tcp_wbuffer* wb;
  725. struct tcp_wbuffer* next_wb;
  726. int unqueued;
  727. unqueued=0;
  728. if (likely(q->first)){
  729. wb=q->first;
  730. do{
  731. next_wb=wb->next;
  732. unqueued+=(wb==q->last)?q->last_used:wb->b_size;
  733. if (wb==q->first)
  734. unqueued-=q->offset;
  735. shm_free(wb);
  736. wb=next_wb;
  737. }while(wb);
  738. }
  739. memset(q, 0, sizeof(*q));
  740. atomic_add_int((int*)tcp_total_wq, -unqueued);
  741. }
  742. /* tries to empty the queue (safe version, c->write_lock must not be hold)
  743. * returns -1 on error, bytes written on success (>=0)
  744. * if the whole queue is emptied => sets *empty*/
  745. inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
  746. {
  747. struct tcp_wbuffer_queue* q;
  748. struct tcp_wbuffer* wb;
  749. int n;
  750. int ret;
  751. int block_size;
  752. char* buf;
  753. *empty=0;
  754. ret=0;
  755. lock_get(&c->write_lock);
  756. q=&c->wbuf_q;
  757. while(q->first){
  758. block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
  759. q->offset;
  760. buf=q->first->buf+q->offset;
  761. n=_tcpconn_write_nb(fd, c, buf, block_size);
  762. if (likely(n>0)){
  763. ret+=n;
  764. if (likely(n==block_size)){
  765. wb=q->first;
  766. q->first=q->first->next;
  767. shm_free(wb);
  768. q->offset=0;
  769. q->queued-=block_size;
  770. atomic_add_int((int*)tcp_total_wq, -block_size);
  771. }else{
  772. q->offset+=n;
  773. q->queued-=n;
  774. atomic_add_int((int*)tcp_total_wq, -n);
  775. break;
  776. }
  777. }else{
  778. if (n<0){
  779. /* EINTR is handled inside _tcpconn_write_nb */
  780. if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
  781. if (unlikely(c->state==S_CONN_CONNECT)){
  782. switch(errno){
  783. case ENETUNREACH:
  784. case EHOSTUNREACH: /* not posix for send() */
  785. #ifdef USE_DST_BLACKLIST
  786. if (cfg_get(core, core_cfg, use_dst_blacklist))
  787. dst_blacklist_su(BLST_ERR_CONNECT,
  788. c->rcv.proto,
  789. &c->rcv.src_su, 0);
  790. #endif /* USE_DST_BLACKLIST */
  791. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  792. TCP_LPORT(c), TCP_PSU(c),
  793. TCP_PROTO(c));
  794. break;
  795. case ECONNREFUSED:
  796. case ECONNRESET:
  797. #ifdef USE_DST_BLACKLIST
  798. if (cfg_get(core, core_cfg, use_dst_blacklist))
  799. dst_blacklist_su(BLST_ERR_CONNECT,
  800. c->rcv.proto,
  801. &c->rcv.src_su, 0);
  802. #endif /* USE_DST_BLACKLIST */
  803. TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
  804. TCP_LPORT(c), TCP_PSU(c),
  805. TCP_PROTO(c));
  806. break;
  807. default:
  808. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
  809. TCP_LPORT(c), TCP_PSU(c),
  810. TCP_PROTO(c));
  811. }
  812. TCP_STATS_CONNECT_FAILED();
  813. }else{
  814. switch(errno){
  815. case ECONNREFUSED:
  816. case ECONNRESET:
  817. TCP_STATS_CON_RESET();
  818. /* no break */
  819. case ENETUNREACH:
  820. case EHOSTUNREACH: /* not posix for send() */
  821. #ifdef USE_DST_BLACKLIST
  822. if (cfg_get(core, core_cfg, use_dst_blacklist))
  823. dst_blacklist_su(BLST_ERR_SEND,
  824. c->rcv.proto,
  825. &c->rcv.src_su, 0);
  826. #endif /* USE_DST_BLACKLIST */
  827. break;
  828. }
  829. }
  830. ret=-1;
  831. LOG(L_ERR, "ERROR: wbuf_runq: %s [%d]\n",
  832. strerror(errno), errno);
  833. }
  834. }
  835. break;
  836. }
  837. }
  838. if (likely(q->first==0)){
  839. q->last=0;
  840. q->last_used=0;
  841. q->offset=0;
  842. *empty=1;
  843. }
  844. lock_release(&c->write_lock);
  845. if (likely(ret>0)){
  846. q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
  847. if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
  848. TCP_STATS_ESTABLISHED(c->state);
  849. c->state=S_CONN_OK;
  850. }
  851. }
  852. return ret;
  853. }
  854. #endif /* TCP_ASYNC */
  855. #if 0
  856. /* blocking write even on non-blocking sockets
  857. * if TCP_TIMEOUT will return with error */
  858. static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
  859. unsigned int len)
  860. {
  861. int n;
  862. fd_set sel_set;
  863. struct timeval timeout;
  864. int ticks;
  865. int initial_len;
  866. initial_len=len;
  867. again:
  868. n=send(fd, buf, len,
  869. #ifdef HAVE_MSG_NOSIGNAL
  870. MSG_NOSIGNAL
  871. #else
  872. 0
  873. #endif
  874. );
  875. if (n<0){
  876. if (errno==EINTR) goto again;
  877. else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
  878. LOG(L_ERR, "tcp_blocking_write: failed to send: (%d) %s\n",
  879. errno, strerror(errno));
  880. TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
  881. TCP_STATS_SEND_TIMEOUT();
  882. goto error;
  883. }
  884. }else if (n<len){
  885. /* partial write */
  886. buf+=n;
  887. len-=n;
  888. }else{
  889. /* success: full write */
  890. goto end;
  891. }
  892. while(1){
  893. FD_ZERO(&sel_set);
  894. FD_SET(fd, &sel_set);
  895. timeout.tv_sec=tcp_send_timeout;
  896. timeout.tv_usec=0;
  897. ticks=get_ticks();
  898. n=select(fd+1, 0, &sel_set, 0, &timeout);
  899. if (n<0){
  900. if (errno==EINTR) continue; /* signal, ignore */
  901. LOG(L_ERR, "ERROR: tcp_blocking_write: select failed: "
  902. " (%d) %s\n", errno, strerror(errno));
  903. goto error;
  904. }else if (n==0){
  905. /* timeout */
  906. if (get_ticks()-ticks>=tcp_send_timeout){
  907. LOG(L_ERR, "ERROR: tcp_blocking_write: send timeout (%d)\n",
  908. tcp_send_timeout);
  909. goto error;
  910. }
  911. continue;
  912. }
  913. if (FD_ISSET(fd, &sel_set)){
  914. /* we can write again */
  915. goto again;
  916. }
  917. }
  918. error:
  919. return -1;
  920. end:
  921. return initial_len;
  922. }
  923. #endif
  924. struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
  925. union sockaddr_union* local_addr,
  926. struct socket_info* ba, int type,
  927. int state)
  928. {
  929. struct tcp_connection *c;
  930. int rd_b_size;
  931. rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
  932. c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
  933. if (c==0){
  934. LOG(L_ERR, "ERROR: tcpconn_new: mem. allocation failure\n");
  935. goto error;
  936. }
  937. memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
  938. c->s=sock;
  939. c->fd=-1; /* not initialized */
  940. if (lock_init(&c->write_lock)==0){
  941. LOG(L_ERR, "ERROR: tcpconn_new: init lock failed\n");
  942. goto error;
  943. }
  944. c->rcv.src_su=*su;
  945. atomic_set(&c->refcnt, 0);
  946. local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
  947. su2ip_addr(&c->rcv.src_ip, su);
  948. c->rcv.src_port=su_getport(su);
  949. c->rcv.bind_address=ba;
  950. if (likely(local_addr)){
  951. su2ip_addr(&c->rcv.dst_ip, local_addr);
  952. c->rcv.dst_port=su_getport(local_addr);
  953. }else if (ba){
  954. c->rcv.dst_ip=ba->address;
  955. c->rcv.dst_port=ba->port_no;
  956. }
  957. print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
  958. DBG( "tcpconn_new: on port %d, type %d\n", c->rcv.src_port, type);
  959. init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
  960. c->id=(*connection_id)++;
  961. c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
  962. c->rcv.proto_reserved2=0;
  963. c->state=state;
  964. c->extra_data=0;
  965. #ifdef USE_TLS
  966. if (type==PROTO_TLS){
  967. if (tls_tcpconn_init(c, sock)==-1) goto error;
  968. }else
  969. #endif /* USE_TLS*/
  970. {
  971. c->type=PROTO_TCP;
  972. c->rcv.proto=PROTO_TCP;
  973. c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
  974. }
  975. return c;
  976. error:
  977. if (c) shm_free(c);
  978. return 0;
  979. }
  980. /* do the actual connect, set sock. options a.s.o
  981. * returns socket on success, -1 on error
  982. * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
  983. * unfinished connect and S_CONN_OK for a finished one)*/
  984. inline static int tcp_do_connect( union sockaddr_union* server,
  985. union sockaddr_union* from,
  986. int type,
  987. union sockaddr_union* res_local_addr,
  988. struct socket_info** res_si,
  989. enum tcp_conn_states *state
  990. )
  991. {
  992. int s;
  993. union sockaddr_union my_name;
  994. socklen_t my_name_len;
  995. struct ip_addr ip;
  996. #ifdef TCP_ASYNC
  997. int n;
  998. #endif /* TCP_ASYNC */
  999. s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
  1000. if (unlikely(s==-1)){
  1001. LOG(L_ERR, "ERROR: tcp_do_connect %s: socket: (%d) %s\n",
  1002. su2a(server, sizeof(*server)), errno, strerror(errno));
  1003. goto error;
  1004. }
  1005. if (init_sock_opt(s)<0){
  1006. LOG(L_ERR, "ERROR: tcp_do_connect %s: init_sock_opt failed\n",
  1007. su2a(server, sizeof(*server)));
  1008. goto error;
  1009. }
  1010. if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
  1011. LOG(L_WARN, "WARNING: tcp_do_connect: binding to source address"
  1012. " %s failed: %s [%d]\n", su2a(from, sizeof(*from)),
  1013. strerror(errno), errno);
  1014. }
  1015. *state=S_CONN_OK;
  1016. #ifdef TCP_ASYNC
  1017. if (likely(cfg_get(tcp, tcp_cfg, async))){
  1018. again:
  1019. n=connect(s, &server->s, sockaddru_len(*server));
  1020. if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
  1021. if (likely(errno==EINPROGRESS))
  1022. *state=S_CONN_CONNECT;
  1023. else if (errno==EINTR) goto again;
  1024. else if (errno!=EALREADY){
  1025. switch(errno){
  1026. case ENETUNREACH:
  1027. case EHOSTUNREACH:
  1028. #ifdef USE_DST_BLACKLIST
  1029. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1030. dst_blacklist_su(BLST_ERR_CONNECT, type, server,0);
  1031. #endif /* USE_DST_BLACKLIST */
  1032. TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
  1033. break;
  1034. case ETIMEDOUT:
  1035. #ifdef USE_DST_BLACKLIST
  1036. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1037. dst_blacklist_su(BLST_ERR_CONNECT, type, server,0);
  1038. #endif /* USE_DST_BLACKLIST */
  1039. TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
  1040. break;
  1041. case ECONNREFUSED:
  1042. case ECONNRESET:
  1043. #ifdef USE_DST_BLACKLIST
  1044. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1045. dst_blacklist_su(BLST_ERR_CONNECT, type, server,0);
  1046. #endif /* USE_DST_BLACKLIST */
  1047. TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
  1048. break;
  1049. case EAGAIN:/* not posix, but supported on linux and bsd */
  1050. TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
  1051. break;
  1052. default:
  1053. TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
  1054. }
  1055. TCP_STATS_CONNECT_FAILED();
  1056. LOG(L_ERR, "ERROR: tcp_do_connect: connect %s: (%d) %s\n",
  1057. su2a(server, sizeof(*server)),
  1058. errno, strerror(errno));
  1059. goto error;
  1060. }
  1061. }
  1062. }else{
  1063. #endif /* TCP_ASYNC */
  1064. if (tcp_blocking_connect(s, type, &server->s,
  1065. sockaddru_len(*server))<0){
  1066. LOG(L_ERR, "ERROR: tcp_do_connect: tcp_blocking_connect %s"
  1067. " failed\n", su2a(server, sizeof(*server)));
  1068. goto error;
  1069. }
  1070. #ifdef TCP_ASYNC
  1071. }
  1072. #endif /* TCP_ASYNC */
  1073. if (from){
  1074. su2ip_addr(&ip, from);
  1075. if (!ip_addr_any(&ip))
  1076. /* we already know the source ip, skip the sys. call */
  1077. goto find_socket;
  1078. }
  1079. my_name_len=sizeof(my_name);
  1080. if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
  1081. LOG(L_ERR, "ERROR: tcp_do_connect: getsockname failed: %s(%d)\n",
  1082. strerror(errno), errno);
  1083. *res_si=0;
  1084. goto error;
  1085. }
  1086. from=&my_name; /* update from with the real "from" address */
  1087. su2ip_addr(&ip, &my_name);
  1088. find_socket:
  1089. #ifdef USE_TLS
  1090. if (unlikely(type==PROTO_TLS))
  1091. *res_si=find_si(&ip, 0, PROTO_TLS);
  1092. else
  1093. #endif
  1094. *res_si=find_si(&ip, 0, PROTO_TCP);
  1095. if (unlikely(*res_si==0)){
  1096. LOG(L_WARN, "WARNING: tcp_do_connect %s: could not find corresponding"
  1097. " listening socket for %s, using default...\n",
  1098. su2a(server, sizeof(*server)), ip_addr2a(&ip));
  1099. if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
  1100. #ifdef USE_IPV6
  1101. else *res_si=sendipv6_tcp;
  1102. #endif
  1103. }
  1104. *res_local_addr=*from;
  1105. return s;
  1106. error:
  1107. if (s!=-1) close(s);
  1108. return -1;
  1109. }
  1110. struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
  1111. union sockaddr_union* from,
  1112. int type)
  1113. {
  1114. int s;
  1115. struct socket_info* si;
  1116. union sockaddr_union my_name;
  1117. struct tcp_connection* con;
  1118. enum tcp_conn_states state;
  1119. s=-1;
  1120. if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
  1121. LOG(L_ERR, "ERROR: tcpconn_connect: maximum number of connections"
  1122. " exceeded (%d/%d)\n",
  1123. *tcp_connections_no,
  1124. cfg_get(tcp, tcp_cfg, max_connections));
  1125. goto error;
  1126. }
  1127. s=tcp_do_connect(server, from, type, &my_name, &si, &state);
  1128. if (s==-1){
  1129. LOG(L_ERR, "ERROR: tcp_do_connect %s: failed (%d) %s\n",
  1130. su2a(server, sizeof(*server)), errno, strerror(errno));
  1131. goto error;
  1132. }
  1133. con=tcpconn_new(s, server, &my_name, si, type, state);
  1134. if (con==0){
  1135. LOG(L_ERR, "ERROR: tcp_connect %s: tcpconn_new failed, closing the "
  1136. " socket\n", su2a(server, sizeof(*server)));
  1137. goto error;
  1138. }
  1139. return con;
  1140. /*FIXME: set sock idx! */
  1141. error:
  1142. if (s!=-1) close(s); /* close the opened socket */
  1143. return 0;
  1144. }
  1145. #ifdef TCP_CONNECT_WAIT
  1146. int tcpconn_finish_connect( struct tcp_connection* c,
  1147. union sockaddr_union* from)
  1148. {
  1149. int s;
  1150. int r;
  1151. union sockaddr_union local_addr;
  1152. struct socket_info* si;
  1153. enum tcp_conn_states state;
  1154. struct tcp_conn_alias* a;
  1155. int new_conn_alias_flags;
  1156. s=tcp_do_connect(&c->rcv.src_su, from, c->type, &local_addr, &si, &state);
  1157. if (unlikely(s==-1)){
  1158. LOG(L_ERR, "ERROR: tcpconn_finish_connect %s: tcp_do_connect for %p"
  1159. " failed\n", su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  1160. c);
  1161. return -1;
  1162. }
  1163. c->rcv.bind_address=si;
  1164. su2ip_addr(&c->rcv.dst_ip, &local_addr);
  1165. c->rcv.dst_port=su_getport(&local_addr);
  1166. /* update aliases if needed */
  1167. if (likely(from==0)){
  1168. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1169. /* add aliases */
  1170. TCPCONN_LOCK;
  1171. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
  1172. new_conn_alias_flags);
  1173. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1174. c->rcv.dst_port, new_conn_alias_flags);
  1175. TCPCONN_UNLOCK;
  1176. }else if (su_cmp(from, &local_addr)!=1){
  1177. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1178. TCPCONN_LOCK;
  1179. /* remove all the aliases except the first one and re-add them
  1180. * (there shouldn't be more then the 3 default aliases at this
  1181. * stage) */
  1182. for (r=1; r<c->aliases; r++){
  1183. a=&c->con_aliases[r];
  1184. tcpconn_listrm(tcpconn_aliases_hash[a->hash], a, next, prev);
  1185. }
  1186. c->aliases=1;
  1187. /* add the local_ip:0 and local_ip:local_port aliases */
  1188. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1189. 0, new_conn_alias_flags);
  1190. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1191. c->rcv.dst_port, new_conn_alias_flags);
  1192. TCPCONN_UNLOCK;
  1193. }
  1194. return s;
  1195. }
  1196. #endif /* TCP_CONNECT_WAIT */
  1197. /* adds a tcp connection to the tcpconn hashes
  1198. * Note: it's called _only_ from the tcp_main process */
  1199. inline static struct tcp_connection* tcpconn_add(struct tcp_connection *c)
  1200. {
  1201. struct ip_addr zero_ip;
  1202. int new_conn_alias_flags;
  1203. if (likely(c)){
  1204. ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
  1205. c->id_hash=tcp_id_hash(c->id);
  1206. c->aliases=0;
  1207. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1208. TCPCONN_LOCK;
  1209. c->flags|=F_CONN_HASHED;
  1210. /* add it at the begining of the list*/
  1211. tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1212. /* set the aliases */
  1213. /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
  1214. * any connection to peer_ip, peer_port
  1215. * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
  1216. * finding any conenction to peer_ip, peer_port from local_addr
  1217. * the third alias is for (peer_ip, peer_port, local_addr, local_port)
  1218. * -- for finding if a fully specified connection exists */
  1219. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
  1220. new_conn_alias_flags);
  1221. if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
  1222. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
  1223. new_conn_alias_flags);
  1224. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1225. c->rcv.dst_port, new_conn_alias_flags);
  1226. }
  1227. /* ignore add_alias errors, there are some valid cases when one
  1228. * of the add_alias would fail (e.g. first add_alias for 2 connections
  1229. * with the same destination but different src. ip*/
  1230. TCPCONN_UNLOCK;
  1231. DBG("tcpconn_add: hashes: %d:%d:%d, %d\n",
  1232. c->con_aliases[0].hash,
  1233. c->con_aliases[1].hash,
  1234. c->con_aliases[2].hash,
  1235. c->id_hash);
  1236. return c;
  1237. }else{
  1238. LOG(L_CRIT, "tcpconn_add: BUG: null connection pointer\n");
  1239. return 0;
  1240. }
  1241. }
  1242. static inline void _tcpconn_detach(struct tcp_connection *c)
  1243. {
  1244. int r;
  1245. tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1246. /* remove all the aliases */
  1247. for (r=0; r<c->aliases; r++)
  1248. tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
  1249. &c->con_aliases[r], next, prev);
  1250. }
  1251. static inline void _tcpconn_free(struct tcp_connection* c)
  1252. {
  1253. #ifdef TCP_ASYNC
  1254. if (unlikely(_wbufq_non_empty(c)))
  1255. _wbufq_destroy(&c->wbuf_q);
  1256. #endif
  1257. lock_destroy(&c->write_lock);
  1258. #ifdef USE_TLS
  1259. if (unlikely(c->type==PROTO_TLS)) tls_tcpconn_clean(c);
  1260. #endif
  1261. shm_free(c);
  1262. }
  1263. /* unsafe tcpconn_rm version (nolocks) */
  1264. void _tcpconn_rm(struct tcp_connection* c)
  1265. {
  1266. _tcpconn_detach(c);
  1267. _tcpconn_free(c);
  1268. }
  1269. void tcpconn_rm(struct tcp_connection* c)
  1270. {
  1271. int r;
  1272. TCPCONN_LOCK;
  1273. tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1274. /* remove all the aliases */
  1275. for (r=0; r<c->aliases; r++)
  1276. tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
  1277. &c->con_aliases[r], next, prev);
  1278. TCPCONN_UNLOCK;
  1279. lock_destroy(&c->write_lock);
  1280. #ifdef USE_TLS
  1281. if ((c->type==PROTO_TLS)&&(c->extra_data)) tls_tcpconn_clean(c);
  1282. #endif
  1283. shm_free(c);
  1284. }
  1285. /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
  1286. * (host byte order) and tries to find the connection that matches all of
  1287. * them. Wild cards can be used for local_ip and local_port (a 0 filled
  1288. * ip address and/or a 0 local port).
  1289. * WARNING: unprotected (locks) use tcpconn_get unless you really
  1290. * know what you are doing */
  1291. struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
  1292. struct ip_addr* l_ip, int l_port)
  1293. {
  1294. struct tcp_connection *c;
  1295. struct tcp_conn_alias* a;
  1296. unsigned hash;
  1297. int is_local_ip_any;
  1298. #ifdef EXTRA_DEBUG
  1299. DBG("tcpconn_find: %d port %d\n",id, port);
  1300. if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
  1301. #endif
  1302. if (likely(id)){
  1303. hash=tcp_id_hash(id);
  1304. for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
  1305. #ifdef EXTRA_DEBUG
  1306. DBG("c=%p, c->id=%d, port=%d\n",c, c->id, c->rcv.src_port);
  1307. print_ip("ip=", &c->rcv.src_ip, "\n");
  1308. #endif
  1309. if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
  1310. }
  1311. }else if (likely(ip)){
  1312. hash=tcp_addr_hash(ip, port, l_ip, l_port);
  1313. is_local_ip_any=ip_addr_any(l_ip);
  1314. for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
  1315. #ifdef EXTRA_DEBUG
  1316. DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
  1317. a->parent->id, a->port, a->parent->rcv.src_port);
  1318. print_ip("ip=",&a->parent->rcv.src_ip,"\n");
  1319. #endif
  1320. if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
  1321. ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
  1322. (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
  1323. (is_local_ip_any ||
  1324. ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
  1325. )
  1326. return a->parent;
  1327. }
  1328. }
  1329. return 0;
  1330. }
  1331. /* _tcpconn_find with locks and timeout
  1332. * local_addr contains the desired local ip:port. If null any local address
  1333. * will be used. IN*ADDR_ANY or 0 port are wild cards.
  1334. */
  1335. struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
  1336. union sockaddr_union* local_addr,
  1337. ticks_t timeout)
  1338. {
  1339. struct tcp_connection* c;
  1340. struct ip_addr local_ip;
  1341. int local_port;
  1342. local_port=0;
  1343. if (likely(ip)){
  1344. if (unlikely(local_addr)){
  1345. su2ip_addr(&local_ip, local_addr);
  1346. local_port=su_getport(local_addr);
  1347. }else{
  1348. ip_addr_mk_any(ip->af, &local_ip);
  1349. local_port=0;
  1350. }
  1351. }
  1352. TCPCONN_LOCK;
  1353. c=_tcpconn_find(id, ip, port, &local_ip, local_port);
  1354. if (likely(c)){
  1355. atomic_inc(&c->refcnt);
  1356. /* update the timeout only if the connection is not handled
  1357. * by a tcp reader (the tcp reader process uses c->timeout for
  1358. * its own internal timeout and c->timeout will be overwritten
  1359. * anyway on return to tcp_main) */
  1360. if (likely(c->reader_pid==0))
  1361. c->timeout=get_ticks_raw()+timeout;
  1362. }
  1363. TCPCONN_UNLOCK;
  1364. return c;
  1365. }
  1366. /* add c->dst:port, local_addr as an alias for the "id" connection,
  1367. * flags: TCP_ALIAS_FORCE_ADD - add an alias even if a previous one exists
  1368. * TCP_ALIAS_REPLACE - if a prev. alias exists, replace it with the
  1369. * new one
  1370. * returns 0 on success, <0 on failure ( -1 - null c, -2 too many aliases,
  1371. * -3 alias already present and pointing to another connection)
  1372. * WARNING: must be called with TCPCONN_LOCK held */
  1373. inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
  1374. struct ip_addr* l_ip, int l_port,
  1375. int flags)
  1376. {
  1377. unsigned hash;
  1378. struct tcp_conn_alias* a;
  1379. struct tcp_conn_alias* nxt;
  1380. struct tcp_connection* p;
  1381. int is_local_ip_any;
  1382. int i;
  1383. int r;
  1384. a=0;
  1385. is_local_ip_any=ip_addr_any(l_ip);
  1386. if (likely(c)){
  1387. hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
  1388. /* search the aliases for an already existing one */
  1389. for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
  1390. nxt=a->next;
  1391. if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
  1392. ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
  1393. (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
  1394. ( is_local_ip_any ||
  1395. ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
  1396. ){
  1397. /* found */
  1398. if (unlikely(a->parent!=c)){
  1399. if (flags & TCP_ALIAS_FORCE_ADD)
  1400. /* still have to walk the whole list to check if
  1401. * the alias was not already added */
  1402. continue;
  1403. else if (flags & TCP_ALIAS_REPLACE){
  1404. /* remove the alias =>
  1405. * remove the current alias and all the following
  1406. * ones from the corresponding connection, shift the
  1407. * connection aliases array and re-add the other
  1408. * aliases (!= current one) */
  1409. p=a->parent;
  1410. for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
  1411. i++);
  1412. if (unlikely(i==p->aliases)){
  1413. LOG(L_CRIT, "BUG: _tcpconn_add_alias_unsafe: "
  1414. " alias %p not found in con %p (id %d)\n",
  1415. a, p, p->id);
  1416. goto error_not_found;
  1417. }
  1418. for (r=i; r<p->aliases; r++){
  1419. tcpconn_listrm(
  1420. tcpconn_aliases_hash[p->con_aliases[r].hash],
  1421. &p->con_aliases[r], next, prev);
  1422. }
  1423. if (likely((i+1)<p->aliases)){
  1424. memmove(&p->con_aliases[i], &p->con_aliases[i+1],
  1425. (p->aliases-i-1)*
  1426. sizeof(p->con_aliases[0]));
  1427. }
  1428. p->aliases--;
  1429. /* re-add the remaining aliases */
  1430. for (r=i; r<p->aliases; r++){
  1431. tcpconn_listadd(
  1432. tcpconn_aliases_hash[p->con_aliases[r].hash],
  1433. &p->con_aliases[r], next, prev);
  1434. }
  1435. }else
  1436. goto error_sec;
  1437. }else goto ok;
  1438. }
  1439. }
  1440. if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
  1441. c->con_aliases[c->aliases].parent=c;
  1442. c->con_aliases[c->aliases].port=port;
  1443. c->con_aliases[c->aliases].hash=hash;
  1444. tcpconn_listadd(tcpconn_aliases_hash[hash],
  1445. &c->con_aliases[c->aliases], next, prev);
  1446. c->aliases++;
  1447. }else goto error_not_found;
  1448. ok:
  1449. #ifdef EXTRA_DEBUG
  1450. if (a) DBG("_tcpconn_add_alias_unsafe: alias already present\n");
  1451. else DBG("_tcpconn_add_alias_unsafe: alias port %d for hash %d, id %d\n",
  1452. port, hash, c->id);
  1453. #endif
  1454. return 0;
  1455. error_aliases:
  1456. /* too many aliases */
  1457. return -2;
  1458. error_not_found:
  1459. /* null connection */
  1460. return -1;
  1461. error_sec:
  1462. /* alias already present and pointing to a different connection
  1463. * (hijack attempt?) */
  1464. return -3;
  1465. }
  1466. /* add port as an alias for the "id" connection,
  1467. * returns 0 on success,-1 on failure */
  1468. int tcpconn_add_alias(int id, int port, int proto)
  1469. {
  1470. struct tcp_connection* c;
  1471. int ret;
  1472. struct ip_addr zero_ip;
  1473. int r;
  1474. int alias_flags;
  1475. /* fix the port */
  1476. port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
  1477. TCPCONN_LOCK;
  1478. /* check if alias already exists */
  1479. c=_tcpconn_find(id, 0, 0, 0, 0);
  1480. if (likely(c)){
  1481. ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
  1482. alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
  1483. /* alias src_ip:port, 0, 0 */
  1484. ret=_tcpconn_add_alias_unsafe(c, port, &zero_ip, 0,
  1485. alias_flags);
  1486. if (ret<0 && ret!=-3) goto error;
  1487. /* alias src_ip:port, local_ip, 0 */
  1488. ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, 0,
  1489. alias_flags);
  1490. if (ret<0 && ret!=-3) goto error;
  1491. /* alias src_ip:port, local_ip, local_port */
  1492. ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
  1493. alias_flags);
  1494. if (unlikely(ret<0)) goto error;
  1495. }else goto error_not_found;
  1496. TCPCONN_UNLOCK;
  1497. return 0;
  1498. error_not_found:
  1499. TCPCONN_UNLOCK;
  1500. LOG(L_ERR, "ERROR: tcpconn_add_alias: no connection found for id %d\n",id);
  1501. return -1;
  1502. error:
  1503. TCPCONN_UNLOCK;
  1504. switch(ret){
  1505. case -2:
  1506. LOG(L_ERR, "ERROR: tcpconn_add_alias: too many aliases (%d)"
  1507. " for connection %p (id %d) %s:%d <- %d\n",
  1508. c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
  1509. c->rcv.src_port, port);
  1510. for (r=0; r<c->aliases; r++){
  1511. LOG(L_ERR, "ERROR: tcpconn_add_alias: alias %d: for %p (%d)"
  1512. " %s:%d <-%d hash %x\n", r, c, c->id,
  1513. ip_addr2a(&c->rcv.src_ip), c->rcv.src_port,
  1514. c->con_aliases[r].port, c->con_aliases[r].hash);
  1515. }
  1516. break;
  1517. case -3:
  1518. LOG(L_ERR, "ERROR: tcpconn_add_alias: possible port"
  1519. " hijack attempt\n");
  1520. LOG(L_ERR, "ERROR: tcpconn_add_alias: alias for %d port %d already"
  1521. " present and points to another connection \n",
  1522. c->id, port);
  1523. break;
  1524. default:
  1525. LOG(L_ERR, "ERROR: tcpconn_add_alias: unkown error %d\n", ret);
  1526. }
  1527. return -1;
  1528. }
  1529. #ifdef TCP_FD_CACHE
  1530. static void tcp_fd_cache_init()
  1531. {
  1532. int r;
  1533. for (r=0; r<TCP_FD_CACHE_SIZE; r++)
  1534. fd_cache[r].fd=-1;
  1535. }
  1536. inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
  1537. {
  1538. int h;
  1539. h=c->id%TCP_FD_CACHE_SIZE;
  1540. if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
  1541. return &fd_cache[h];
  1542. return 0;
  1543. }
  1544. inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
  1545. {
  1546. e->fd=-1;
  1547. }
  1548. inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
  1549. {
  1550. int h;
  1551. h=c->id%TCP_FD_CACHE_SIZE;
  1552. if (likely(fd_cache[h].fd>0))
  1553. close(fd_cache[h].fd);
  1554. fd_cache[h].fd=fd;
  1555. fd_cache[h].id=c->id;
  1556. fd_cache[h].con=c;
  1557. }
  1558. #endif /* TCP_FD_CACHE */
  1559. inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
  1560. /* finds a tcpconn & sends on it
  1561. * uses the dst members to, proto (TCP|TLS) and id and tries to send
  1562. * from the "from" address (if non null and id==0)
  1563. * returns: number of bytes written (>=0) on success
  1564. * <0 on error */
  1565. int tcp_send(struct dest_info* dst, union sockaddr_union* from,
  1566. char* buf, unsigned len)
  1567. {
  1568. struct tcp_connection *c;
  1569. struct tcp_connection *tmp;
  1570. struct ip_addr ip;
  1571. int port;
  1572. int fd;
  1573. long response[2];
  1574. int n;
  1575. int do_close_fd;
  1576. ticks_t con_lifetime;
  1577. #ifdef TCP_ASYNC
  1578. int enable_write_watch;
  1579. #endif /* TCP_ASYNC */
  1580. #ifdef TCP_FD_CACHE
  1581. struct fd_cache_entry* fd_cache_e;
  1582. int use_fd_cache;
  1583. use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
  1584. fd_cache_e=0;
  1585. #endif /* TCP_FD_CACHE */
  1586. do_close_fd=1; /* close the fd on exit */
  1587. port=su_getport(&dst->to);
  1588. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  1589. if (likely(port)){
  1590. su2ip_addr(&ip, &dst->to);
  1591. c=tcpconn_get(dst->id, &ip, port, from, con_lifetime);
  1592. }else if (likely(dst->id)){
  1593. c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
  1594. }else{
  1595. LOG(L_CRIT, "BUG: tcp_send called with null id & to\n");
  1596. return -1;
  1597. }
  1598. if (likely(dst->id)){
  1599. if (unlikely(c==0)) {
  1600. if (likely(port)){
  1601. /* try again w/o id */
  1602. c=tcpconn_get(0, &ip, port, from, con_lifetime);
  1603. }else{
  1604. LOG(L_ERR, "ERROR: tcp_send: id %d not found, dropping\n",
  1605. dst->id);
  1606. return -1;
  1607. }
  1608. }
  1609. }
  1610. /* no_id: */
  1611. if (unlikely((c==0) || tcpconn_close_after_send(c))){
  1612. if (unlikely(c)){
  1613. /* can't use c if it's marked as close-after-send =>
  1614. release it and try opening new one */
  1615. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  1616. c=0;
  1617. }
  1618. /* check if connect() is disabled */
  1619. if (unlikely((dst->send_flags & SND_F_FORCE_CON_REUSE) ||
  1620. cfg_get(tcp, tcp_cfg, no_connect)))
  1621. return -1;
  1622. DBG("tcp_send: no open tcp connection found, opening new one\n");
  1623. /* create tcp connection */
  1624. if (likely(from==0)){
  1625. /* check to see if we have to use a specific source addr. */
  1626. switch (dst->to.s.sa_family) {
  1627. case AF_INET:
  1628. from = tcp_source_ipv4;
  1629. break;
  1630. #ifdef USE_IPV6
  1631. case AF_INET6:
  1632. from = tcp_source_ipv6;
  1633. break;
  1634. #endif
  1635. default:
  1636. /* error, bad af, ignore ... */
  1637. break;
  1638. }
  1639. }
  1640. #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
  1641. if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) &&
  1642. cfg_get(tcp, tcp_cfg, async) )){
  1643. if (unlikely(*tcp_connections_no >=
  1644. cfg_get(tcp, tcp_cfg, max_connections))){
  1645. LOG(L_ERR, "ERROR: tcp_send %s: maximum number of"
  1646. " connections exceeded (%d/%d)\n",
  1647. su2a(&dst->to, sizeof(dst->to)),
  1648. *tcp_connections_no,
  1649. cfg_get(tcp, tcp_cfg, max_connections));
  1650. return -1;
  1651. }
  1652. c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
  1653. S_CONN_CONNECT);
  1654. if (unlikely(c==0)){
  1655. LOG(L_ERR, "ERROR: tcp_send %s: could not create new"
  1656. " connection\n",
  1657. su2a(&dst->to, sizeof(dst->to)));
  1658. return -1;
  1659. }
  1660. c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
  1661. tcpconn_set_send_flags(c, dst->send_flags);
  1662. atomic_set(&c->refcnt, 2); /* ref from here and from main hash
  1663. table */
  1664. /* add it to id hash and aliases */
  1665. if (unlikely(tcpconn_add(c)==0)){
  1666. LOG(L_ERR, "ERROR: tcp_send %s: could not add "
  1667. "connection %p\n",
  1668. su2a(&dst->to, sizeof(dst->to)),
  1669. c);
  1670. _tcpconn_free(c);
  1671. n=-1;
  1672. goto end_no_conn;
  1673. }
  1674. /* do connect and if src ip or port changed, update the
  1675. * aliases */
  1676. if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
  1677. /* tcpconn_finish_connect will automatically blacklist
  1678. on error => no need to do it here */
  1679. LOG(L_ERR, "ERROR: tcp_send %s: tcpconn_finish_connect(%p)"
  1680. " failed\n", su2a(&dst->to, sizeof(dst->to)),
  1681. c);
  1682. goto conn_wait_error;
  1683. }
  1684. /* ? TODO: it might be faster just to queue the write directly
  1685. * and send to main CONN_NEW_PENDING_WRITE */
  1686. /* delay sending the fd to main after the send */
  1687. /* NOTE: no lock here, because the connection is marked as
  1688. * pending and nobody else will try to write on it. However
  1689. * this might produce out-of-order writes. If this is not
  1690. * desired either lock before the write or use
  1691. * _wbufq_insert(...) */
  1692. n=_tcpconn_write_nb(fd, c, buf, len);
  1693. if (unlikely(n<(int)len)){
  1694. if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK){
  1695. DBG("tcp_send: pending write on new connection %p "
  1696. " (%d/%d bytes written)\n", c, n, len);
  1697. if (n<0) n=0;
  1698. else{
  1699. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  1700. c->state=S_CONN_OK; /* partial write => connect()
  1701. ended */
  1702. }
  1703. /* add to the write queue */
  1704. lock_get(&c->write_lock);
  1705. if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
  1706. lock_release(&c->write_lock);
  1707. n=-1;
  1708. LOG(L_ERR, "ERROR: tcp_send %s: EAGAIN and"
  1709. " write queue full or failed for %p\n",
  1710. su2a(&dst->to, sizeof(dst->to)),
  1711. c);
  1712. goto conn_wait_error;
  1713. }
  1714. lock_release(&c->write_lock);
  1715. /* send to tcp_main */
  1716. response[0]=(long)c;
  1717. response[1]=CONN_NEW_PENDING_WRITE;
  1718. if (unlikely(send_fd(unix_tcp_sock, response,
  1719. sizeof(response), fd) <= 0)){
  1720. LOG(L_ERR, "BUG: tcp_send %s: "
  1721. "CONN_NEW_PENDING_WRITE for %p"
  1722. " failed:" " %s (%d)\n",
  1723. su2a(&dst->to, sizeof(dst->to)),
  1724. c, strerror(errno), errno);
  1725. goto conn_wait_error;
  1726. }
  1727. n=len;
  1728. goto end;
  1729. }
  1730. /* if first write failed it's most likely a
  1731. connect error */
  1732. switch(errno){
  1733. case ENETUNREACH:
  1734. case EHOSTUNREACH: /* not posix for send() */
  1735. #ifdef USE_DST_BLACKLIST
  1736. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1737. dst_blacklist_add( BLST_ERR_CONNECT, dst, 0);
  1738. #endif /* USE_DST_BLACKLIST */
  1739. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  1740. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  1741. break;
  1742. case ECONNREFUSED:
  1743. case ECONNRESET:
  1744. #ifdef USE_DST_BLACKLIST
  1745. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1746. dst_blacklist_add( BLST_ERR_CONNECT, dst, 0);
  1747. #endif /* USE_DST_BLACKLIST */
  1748. TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
  1749. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  1750. break;
  1751. default:
  1752. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
  1753. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  1754. }
  1755. /* error: destroy it directly */
  1756. TCP_STATS_CONNECT_FAILED();
  1757. LOG(L_ERR, "ERROR: tcp_send %s: connect & send "
  1758. " for %p failed:" " %s (%d)\n",
  1759. su2a(&dst->to, sizeof(dst->to)),
  1760. c, strerror(errno), errno);
  1761. goto conn_wait_error;
  1762. }
  1763. LOG(L_INFO, "tcp_send: quick connect for %p\n", c);
  1764. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  1765. if (unlikely(dst->send_flags & SND_F_CON_CLOSE)){
  1766. /* if close-after-send requested, don't bother
  1767. sending the fd back to tcp_main, try closing it
  1768. immediately (no other tcp_send should use it,
  1769. because it is marked as close-after-send before
  1770. being added to the hash */
  1771. goto conn_wait_close;
  1772. }
  1773. c->state=S_CONN_OK;
  1774. /* send to tcp_main */
  1775. response[0]=(long)c;
  1776. response[1]=CONN_NEW_COMPLETE;
  1777. if (unlikely(send_fd(unix_tcp_sock, response,
  1778. sizeof(response), fd) <= 0)){
  1779. LOG(L_ERR, "BUG: tcp_send %s: CONN_NEW_COMPLETE for %p"
  1780. " failed:" " %s (%d)\n",
  1781. su2a(&dst->to, sizeof(dst->to)),
  1782. c, strerror(errno), errno);
  1783. goto conn_wait_error;
  1784. }
  1785. goto end;
  1786. }
  1787. #endif /* TCP_CONNECT_WAIT && TCP_ASYNC */
  1788. if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto))==0)){
  1789. LOG(L_ERR, "ERROR: tcp_send %s: connect failed\n",
  1790. su2a(&dst->to, sizeof(dst->to)));
  1791. return -1;
  1792. }
  1793. tcpconn_set_send_flags(c, dst->send_flags);
  1794. if (likely(c->state==S_CONN_OK))
  1795. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  1796. atomic_set(&c->refcnt, 2); /* ref. from here and it will also
  1797. be added in the tcp_main hash */
  1798. fd=c->s;
  1799. c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
  1800. /* ? TODO: it might be faster just to queue the write and
  1801. * send to main a CONN_NEW_PENDING_WRITE */
  1802. /* send the new tcpconn to "tcp main" */
  1803. response[0]=(long)c;
  1804. response[1]=CONN_NEW;
  1805. n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
  1806. if (unlikely(n<=0)){
  1807. LOG(L_ERR, "BUG: tcp_send %s: failed send_fd: %s (%d)\n",
  1808. su2a(&dst->to, sizeof(dst->to)),
  1809. strerror(errno), errno);
  1810. /* we can safely delete it, it's not referenced by anybody */
  1811. _tcpconn_free(c);
  1812. n=-1;
  1813. goto end_no_conn;
  1814. }
  1815. goto send_it;
  1816. }
  1817. /* get_fd: */
  1818. #ifdef TCP_ASYNC
  1819. /* if data is already queued, we don't need the fd any more */
  1820. #ifdef TCP_CONNECT_WAIT
  1821. if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
  1822. (_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
  1823. #else /* ! TCP_CONNECT_WAIT */
  1824. if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
  1825. #endif /* TCP_CONNECT_WAIT */
  1826. {
  1827. lock_get(&c->write_lock);
  1828. #ifdef TCP_CONNECT_WAIT
  1829. if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
  1830. #else /* ! TCP_CONNECT_WAIT */
  1831. if (likely(_wbufq_non_empty(c)))
  1832. #endif /* TCP_CONNECT_WAIT */
  1833. {
  1834. do_close_fd=0;
  1835. if (unlikely(_wbufq_add(c, buf, len)<0)){
  1836. lock_release(&c->write_lock);
  1837. n=-1;
  1838. goto error;
  1839. }
  1840. n=len;
  1841. lock_release(&c->write_lock);
  1842. goto release_c;
  1843. }
  1844. lock_release(&c->write_lock);
  1845. }
  1846. #endif /* TCP_ASYNC */
  1847. /* check if this is not the same reader process holding
  1848. * c and if so send directly on c->fd */
  1849. if (c->reader_pid==my_pid()){
  1850. DBG("tcp_send: send from reader (%d (%d)), reusing fd\n",
  1851. my_pid(), process_no);
  1852. fd=c->fd;
  1853. do_close_fd=0; /* don't close the fd on exit, it's in use */
  1854. #ifdef TCP_FD_CACHE
  1855. use_fd_cache=0; /* don't cache: problems would arise due to the
  1856. close() on cache eviction (if the fd is still
  1857. used). If it has to be cached then dup() _must_
  1858. be used */
  1859. }else if (likely(use_fd_cache &&
  1860. ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
  1861. fd=fd_cache_e->fd;
  1862. do_close_fd=0;
  1863. DBG("tcp_send: found fd in cache ( %d, %p, %d)\n",
  1864. fd, c, fd_cache_e->id);
  1865. #endif /* TCP_FD_CACHE */
  1866. }else{
  1867. DBG("tcp_send: tcp connection found (%p), acquiring fd\n", c);
  1868. /* get the fd */
  1869. response[0]=(long)c;
  1870. response[1]=CONN_GET_FD;
  1871. n=send_all(unix_tcp_sock, response, sizeof(response));
  1872. if (unlikely(n<=0)){
  1873. LOG(L_ERR, "BUG: tcp_send: failed to get fd(write):%s (%d)\n",
  1874. strerror(errno), errno);
  1875. n=-1;
  1876. goto release_c;
  1877. }
  1878. DBG("tcp_send, c= %p, n=%d\n", c, n);
  1879. n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
  1880. if (unlikely(n<=0)){
  1881. LOG(L_ERR, "BUG: tcp_send: failed to get fd(receive_fd):"
  1882. " %s (%d)\n", strerror(errno), errno);
  1883. n=-1;
  1884. do_close_fd=0;
  1885. goto release_c;
  1886. }
  1887. if (unlikely(c!=tmp)){
  1888. LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
  1889. " %p (id= %d, refcnt=%d state=%d) != "
  1890. " %p (n=%d)\n",
  1891. c, c->id, atomic_get(&c->refcnt), c->state,
  1892. tmp, n
  1893. );
  1894. n=-1; /* fail */
  1895. goto end;
  1896. }
  1897. DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
  1898. }
  1899. send_it:
  1900. DBG("tcp_send: sending...\n");
  1901. lock_get(&c->write_lock);
  1902. /* update connection send flags with the current ones */
  1903. tcpconn_set_send_flags(c, dst->send_flags);
  1904. #ifdef TCP_ASYNC
  1905. if (likely(cfg_get(tcp, tcp_cfg, async))){
  1906. if (_wbufq_non_empty(c)
  1907. #ifdef TCP_CONNECT_WAIT
  1908. || (c->flags&F_CONN_PENDING)
  1909. #endif /* TCP_CONNECT_WAIT */
  1910. ){
  1911. if (unlikely(_wbufq_add(c, buf, len)<0)){
  1912. lock_release(&c->write_lock);
  1913. n=-1;
  1914. goto error;
  1915. }
  1916. lock_release(&c->write_lock);
  1917. n=len;
  1918. goto end;
  1919. }
  1920. n=_tcpconn_write_nb(fd, c, buf, len);
  1921. }else{
  1922. #endif /* TCP_ASYNC */
  1923. #ifdef USE_TLS
  1924. if (c->type==PROTO_TLS)
  1925. n=tls_blocking_write(c, fd, buf, len);
  1926. else
  1927. #endif
  1928. /* n=tcp_blocking_write(c, fd, buf, len); */
  1929. n=tsend_stream(fd, buf, len,
  1930. TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
  1931. 1000);
  1932. #ifdef TCP_ASYNC
  1933. }
  1934. #else /* ! TCP_ASYNC */
  1935. lock_release(&c->write_lock);
  1936. #endif /* TCP_ASYNC */
  1937. DBG("tcp_send: after real write: c= %p n=%d fd=%d\n",c, n, fd);
  1938. DBG("tcp_send: buf=\n%.*s\n", (int)len, buf);
  1939. if (unlikely(n<(int)len)){
  1940. #ifdef TCP_ASYNC
  1941. if (cfg_get(tcp, tcp_cfg, async) &&
  1942. ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
  1943. enable_write_watch=_wbufq_empty(c);
  1944. if (n<0) n=0;
  1945. else if (unlikely(c->state==S_CONN_CONNECT ||
  1946. c->state==S_CONN_ACCEPT)){
  1947. TCP_STATS_ESTABLISHED(c->state);
  1948. c->state=S_CONN_OK; /* something was written */
  1949. }
  1950. if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
  1951. lock_release(&c->write_lock);
  1952. n=-1;
  1953. goto error;
  1954. }
  1955. lock_release(&c->write_lock);
  1956. n=len;
  1957. if (likely(enable_write_watch)){
  1958. response[0]=(long)c;
  1959. response[1]=CONN_QUEUED_WRITE;
  1960. if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0){
  1961. LOG(L_ERR, "BUG: tcp_send: error return failed "
  1962. "(write):%s (%d)\n", strerror(errno), errno);
  1963. n=-1;
  1964. goto error;
  1965. }
  1966. }
  1967. goto end;
  1968. }else{
  1969. lock_release(&c->write_lock);
  1970. }
  1971. #endif /* TCP_ASYNC */
  1972. if (unlikely(c->state==S_CONN_CONNECT)){
  1973. switch(errno){
  1974. case ENETUNREACH:
  1975. case EHOSTUNREACH: /* not posix for send() */
  1976. #ifdef USE_DST_BLACKLIST
  1977. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1978. dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
  1979. &c->rcv.src_su, 0);
  1980. #endif /* USE_DST_BLACKLIST */
  1981. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  1982. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  1983. break;
  1984. case ECONNREFUSED:
  1985. case ECONNRESET:
  1986. #ifdef USE_DST_BLACKLIST
  1987. if (cfg_get(core, core_cfg, use_dst_blacklist))
  1988. dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
  1989. &c->rcv.src_su, 0);
  1990. #endif /* USE_DST_BLACKLIST */
  1991. TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
  1992. TCP_PSU(c), TCP_PROTO(c));
  1993. break;
  1994. default:
  1995. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
  1996. TCP_PSU(c), TCP_PROTO(c));
  1997. }
  1998. TCP_STATS_CONNECT_FAILED();
  1999. }else{
  2000. switch(errno){
  2001. case ECONNREFUSED:
  2002. case ECONNRESET:
  2003. TCP_STATS_CON_RESET();
  2004. /* no break */
  2005. case ENETUNREACH:
  2006. /*case EHOSTUNREACH: -- not posix */
  2007. #ifdef USE_DST_BLACKLIST
  2008. if (cfg_get(core, core_cfg, use_dst_blacklist))
  2009. dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
  2010. &c->rcv.src_su, 0);
  2011. #endif /* USE_DST_BLACKLIST */
  2012. break;
  2013. }
  2014. }
  2015. LOG(L_ERR, "ERROR: tcp_send: failed to send on %p (%s:%d->%s): %s (%d)"
  2016. "\n", c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
  2017. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  2018. strerror(errno), errno);
  2019. #ifdef TCP_ASYNC
  2020. error:
  2021. #endif /* TCP_ASYNC */
  2022. /* error on the connection , mark it as bad and set 0 timeout */
  2023. c->state=S_CONN_BAD;
  2024. c->timeout=get_ticks_raw();
  2025. /* tell "main" it should drop this (optional it will t/o anyway?)*/
  2026. response[0]=(long)c;
  2027. response[1]=CONN_ERROR;
  2028. if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
  2029. LOG(L_CRIT, "BUG: tcp_send: error return failed (write):%s (%d)\n",
  2030. strerror(errno), errno);
  2031. tcpconn_chld_put(c); /* deref. it manually */
  2032. n=-1;
  2033. }
  2034. /* CONN_ERROR will auto-dec refcnt => we must not call tcpconn_put
  2035. * if it succeeds */
  2036. #ifdef TCP_FD_CACHE
  2037. if (unlikely(fd_cache_e)){
  2038. LOG(L_ERR, "ERROR: tcp_send %s: error on cached fd, removing from"
  2039. " the cache (%d, %p, %d)\n",
  2040. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  2041. fd, fd_cache_e->con, fd_cache_e->id);
  2042. tcp_fd_cache_rm(fd_cache_e);
  2043. close(fd);
  2044. }else
  2045. #endif /* TCP_FD_CACHE */
  2046. if (do_close_fd) close(fd);
  2047. return n; /* error return, no tcpconn_put */
  2048. }
  2049. #ifdef TCP_ASYNC
  2050. lock_release(&c->write_lock);
  2051. #endif /* TCP_ASYNC */
  2052. /* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
  2053. if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
  2054. TCP_STATS_ESTABLISHED(c->state);
  2055. c->state=S_CONN_OK;
  2056. }
  2057. if (unlikely(dst->send_flags & SND_F_CON_CLOSE)){
  2058. /* close after write => send EOF request to tcp_main */
  2059. c->state=S_CONN_BAD;
  2060. c->timeout=get_ticks_raw();
  2061. /* tell "main" it should drop this*/
  2062. response[0]=(long)c;
  2063. response[1]=CONN_EOF;
  2064. if (send_all(unix_tcp_sock, response, sizeof(response))<=0){
  2065. LOG(L_CRIT, "BUG: tcp_send: error return failed (write):%s (%d)\n",
  2066. strerror(errno), errno);
  2067. tcpconn_chld_put(c); /* deref. it manually */
  2068. n=-1;
  2069. }
  2070. /* CONN_EOF will auto-dec refcnt => we must not call tcpconn_put
  2071. * if it succeeds */
  2072. #ifdef TCP_FD_CACHE
  2073. if (unlikely(fd_cache_e)){
  2074. tcp_fd_cache_rm(fd_cache_e);
  2075. fd_cache_e=0;
  2076. close(fd);
  2077. }else
  2078. #endif /* TCP_FD_CACHE */
  2079. if (do_close_fd) close(fd);
  2080. goto end_no_conn;
  2081. }
  2082. end:
  2083. #ifdef TCP_FD_CACHE
  2084. if (unlikely((fd_cache_e==0) && use_fd_cache)){
  2085. tcp_fd_cache_add(c, fd);
  2086. }else
  2087. #endif /* TCP_FD_CACHE */
  2088. if (do_close_fd) close(fd);
  2089. release_c:
  2090. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  2091. end_no_conn:
  2092. return n;
  2093. #ifdef TCP_CONNECT_WAIT
  2094. conn_wait_error:
  2095. n=-1;
  2096. conn_wait_close:
  2097. /* connect or send failed or immediate close-after-send was requested on
  2098. * newly created connection which was not yet sent to tcp_main (but was
  2099. * already hashed) => don't send to main, unhash and destroy directly
  2100. * (if refcnt>2 it will be destroyed when the last sender releases the
  2101. * connection (tcpconn_chld_put(c))) or when tcp_main receives a
  2102. * CONN_ERROR it*/
  2103. c->state=S_CONN_BAD;
  2104. /* we are here only if we opened a new fd (and not reused a cached or
  2105. a reader one) => if the connect was successful close the fd */
  2106. if (fd>=0) close(fd);
  2107. TCPCONN_LOCK;
  2108. if (c->flags & F_CONN_HASHED){
  2109. /* if some other parallel tcp_send did send CONN_ERROR to
  2110. * tcp_main, the connection might be already detached */
  2111. _tcpconn_detach(c);
  2112. c->flags&=~F_CONN_HASHED;
  2113. TCPCONN_UNLOCK;
  2114. tcpconn_put(c);
  2115. }else
  2116. TCPCONN_UNLOCK;
  2117. /* dec refcnt -> mark it for destruction */
  2118. tcpconn_chld_put(c);
  2119. return n;
  2120. #endif /* TCP_CONNET_WAIT */
  2121. }
  2122. int tcp_init(struct socket_info* sock_info)
  2123. {
  2124. union sockaddr_union* addr;
  2125. int optval;
  2126. #ifdef HAVE_TCP_ACCEPT_FILTER
  2127. struct accept_filter_arg afa;
  2128. #endif /* HAVE_TCP_ACCEPT_FILTER */
  2129. #ifdef DISABLE_NAGLE
  2130. int flag;
  2131. struct protoent* pe;
  2132. if (tcp_proto_no==-1){ /* if not already set */
  2133. pe=getprotobyname("tcp");
  2134. if (pe==0){
  2135. LOG(L_ERR, "ERROR: tcp_init: could not get TCP protocol number\n");
  2136. tcp_proto_no=-1;
  2137. }else{
  2138. tcp_proto_no=pe->p_proto;
  2139. }
  2140. }
  2141. #endif
  2142. addr=&sock_info->su;
  2143. /* sock_info->proto=PROTO_TCP; */
  2144. if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
  2145. LOG(L_ERR, "ERROR: tcp_init: could no init sockaddr_union\n");
  2146. goto error;
  2147. }
  2148. DBG("tcp_init: added %s\n", su2a(addr, sizeof(*addr)));
  2149. sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
  2150. if (sock_info->socket==-1){
  2151. LOG(L_ERR, "ERROR: tcp_init: socket: %s\n", strerror(errno));
  2152. goto error;
  2153. }
  2154. #ifdef DISABLE_NAGLE
  2155. flag=1;
  2156. if ( (tcp_proto_no!=-1) &&
  2157. (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
  2158. &flag, sizeof(flag))<0) ){
  2159. LOG(L_ERR, "ERROR: tcp_init: could not disable Nagle: %s\n",
  2160. strerror(errno));
  2161. }
  2162. #endif
  2163. #if !defined(TCP_DONT_REUSEADDR)
  2164. /* Stevens, "Network Programming", Section 7.5, "Generic Socket
  2165. * Options": "...server started,..a child continues..on existing
  2166. * connection..listening server is restarted...call to bind fails
  2167. * ... ALL TCP servers should specify the SO_REUSEADDRE option
  2168. * to allow the server to be restarted in this situation
  2169. *
  2170. * Indeed, without this option, the server can't restart.
  2171. * -jiri
  2172. */
  2173. optval=1;
  2174. if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
  2175. (void*)&optval, sizeof(optval))==-1) {
  2176. LOG(L_ERR, "ERROR: tcp_init: setsockopt %s\n",
  2177. strerror(errno));
  2178. goto error;
  2179. }
  2180. #endif
  2181. /* tos */
  2182. optval = tos;
  2183. if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval,
  2184. sizeof(optval)) ==-1){
  2185. LOG(L_WARN, "WARNING: tcp_init: setsockopt tos: %s\n", strerror(errno));
  2186. /* continue since this is not critical */
  2187. }
  2188. #ifdef HAVE_TCP_DEFER_ACCEPT
  2189. /* linux only */
  2190. if ((optval=cfg_get(tcp, tcp_cfg, defer_accept))){
  2191. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
  2192. (void*)&optval, sizeof(optval)) ==-1){
  2193. LOG(L_WARN, "WARNING: tcp_init: setsockopt TCP_DEFER_ACCEPT %s\n",
  2194. strerror(errno));
  2195. /* continue since this is not critical */
  2196. }
  2197. }
  2198. #endif /* HAVE_TCP_DEFFER_ACCEPT */
  2199. #ifdef HAVE_TCP_SYNCNT
  2200. if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
  2201. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
  2202. sizeof(optval))<0){
  2203. LOG(L_WARN, "WARNING: tcp_init: failed to set"
  2204. " maximum SYN retr. count: %s\n", strerror(errno));
  2205. }
  2206. }
  2207. #endif
  2208. #ifdef HAVE_TCP_LINGER2
  2209. if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
  2210. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
  2211. sizeof(optval))<0){
  2212. LOG(L_WARN, "WARNING: tcp_init: failed to set"
  2213. " maximum LINGER2 timeout: %s\n", strerror(errno));
  2214. }
  2215. }
  2216. #endif
  2217. init_sock_keepalive(sock_info->socket);
  2218. if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
  2219. LOG(L_ERR, "ERROR: tcp_init: bind(%x, %p, %d) on %s:%d : %s\n",
  2220. sock_info->socket, &addr->s,
  2221. (unsigned)sockaddru_len(*addr),
  2222. sock_info->address_str.s,
  2223. sock_info->port_no,
  2224. strerror(errno));
  2225. goto error;
  2226. }
  2227. if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
  2228. LOG(L_ERR, "ERROR: tcp_init: listen(%x, %p, %d) on %s: %s\n",
  2229. sock_info->socket, &addr->s,
  2230. (unsigned)sockaddru_len(*addr),
  2231. sock_info->address_str.s,
  2232. strerror(errno));
  2233. goto error;
  2234. }
  2235. #ifdef HAVE_TCP_ACCEPT_FILTER
  2236. /* freebsd */
  2237. if (cfg_get(tcp, tcp_cfg, defer_accept)){
  2238. memset(&afa, 0, sizeof(afa));
  2239. strcpy(afa.af_name, "dataready");
  2240. if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
  2241. (void*)&afa, sizeof(afa)) ==-1){
  2242. LOG(L_WARN, "WARNING: tcp_init: setsockopt SO_ACCEPTFILTER %s\n",
  2243. strerror(errno));
  2244. /* continue since this is not critical */
  2245. }
  2246. }
  2247. #endif /* HAVE_TCP_ACCEPT_FILTER */
  2248. return 0;
  2249. error:
  2250. if (sock_info->socket!=-1){
  2251. close(sock_info->socket);
  2252. sock_info->socket=-1;
  2253. }
  2254. return -1;
  2255. }
  2256. /* close tcp_main's fd from a tcpconn
  2257. * WARNING: call only in tcp_main context */
  2258. inline static void tcpconn_close_main_fd(struct tcp_connection* tcpconn)
  2259. {
  2260. int fd;
  2261. fd=tcpconn->s;
  2262. #ifdef USE_TLS
  2263. /*FIXME: lock ->writelock ? */
  2264. if (tcpconn->type==PROTO_TLS)
  2265. tls_close(tcpconn, fd);
  2266. #endif
  2267. #ifdef TCP_FD_CACHE
  2268. if (likely(cfg_get(tcp, tcp_cfg, fd_cache))) shutdown(fd, SHUT_RDWR);
  2269. #endif /* TCP_FD_CACHE */
  2270. close_again:
  2271. if (unlikely(close(fd)<0)){
  2272. if (errno==EINTR)
  2273. goto close_again;
  2274. LOG(L_ERR, "ERROR: tcpconn_put_destroy; close() failed: %s (%d)\n",
  2275. strerror(errno), errno);
  2276. }
  2277. }
  2278. /* dec refcnt & frees the connection if refcnt==0
  2279. * returns 1 if the connection is freed, 0 otherwise
  2280. *
  2281. * WARNING: use only from child processes */
  2282. inline static int tcpconn_chld_put(struct tcp_connection* tcpconn)
  2283. {
  2284. if (unlikely(atomic_dec_and_test(&tcpconn->refcnt))){
  2285. DBG("tcpconn_chld_put: destroying connection %p (%d, %d) "
  2286. "flags %04x\n", tcpconn, tcpconn->id,
  2287. tcpconn->s, tcpconn->flags);
  2288. /* sanity checks */
  2289. membar_read_atomic_op(); /* make sure we see the current flags */
  2290. if (unlikely(!(tcpconn->flags & F_CONN_FD_CLOSED) ||
  2291. (tcpconn->flags &
  2292. (F_CONN_HASHED|F_CONN_MAIN_TIMER|
  2293. F_CONN_READ_W|F_CONN_WRITE_W)) )){
  2294. LOG(L_CRIT, "BUG: tcpconn_chld_put: %p bad flags = %0x\n",
  2295. tcpconn, tcpconn->flags);
  2296. abort();
  2297. }
  2298. _tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
  2299. return 1;
  2300. }
  2301. return 0;
  2302. }
  2303. /* simple destroy function (the connection should be already removed
  2304. * from the hashes and the fds should not be watched anymore for IO)
  2305. */
  2306. inline static void tcpconn_destroy(struct tcp_connection* tcpconn)
  2307. {
  2308. DBG("tcpconn_destroy: destroying connection %p (%d, %d) "
  2309. "flags %04x\n", tcpconn, tcpconn->id,
  2310. tcpconn->s, tcpconn->flags);
  2311. if (unlikely(tcpconn->flags & F_CONN_HASHED)){
  2312. LOG(L_CRIT, "BUG: tcpconn_destroy: called with hashed"
  2313. " connection (%p)\n", tcpconn);
  2314. /* try to continue */
  2315. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
  2316. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2317. TCPCONN_LOCK;
  2318. _tcpconn_detach(tcpconn);
  2319. TCPCONN_UNLOCK;
  2320. }
  2321. if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
  2322. tcpconn_close_main_fd(tcpconn);
  2323. (*tcp_connections_no)--;
  2324. }
  2325. _tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
  2326. }
  2327. /* tries to destroy the connection: dec. refcnt and if 0 destroys the
  2328. * connection, else it will mark it as BAD and close the main fds
  2329. *
  2330. * returns 1 if the connection was destroyed, 0 otherwise
  2331. *
  2332. * WARNING: - the connection _has_ to be removed from the hash and timer
  2333. * first (use tcpconn_try_unhash() for this )
  2334. * - the fd should not be watched anymore (io_watch_del()...)
  2335. * - must be called _only_ from the tcp_main process context
  2336. * (or else the fd will remain open)
  2337. */
  2338. inline static int tcpconn_put_destroy(struct tcp_connection* tcpconn)
  2339. {
  2340. if (unlikely((tcpconn->flags &
  2341. (F_CONN_WRITE_W|F_CONN_HASHED|F_CONN_MAIN_TIMER|F_CONN_READ_W)) )){
  2342. /* sanity check */
  2343. if (unlikely(tcpconn->flags & F_CONN_HASHED)){
  2344. LOG(L_CRIT, "BUG: tcpconn_destroy: called with hashed and/or"
  2345. "on timer connection (%p), flags = %0x\n",
  2346. tcpconn, tcpconn->flags);
  2347. /* try to continue */
  2348. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
  2349. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2350. TCPCONN_LOCK;
  2351. _tcpconn_detach(tcpconn);
  2352. TCPCONN_UNLOCK;
  2353. }else{
  2354. LOG(L_CRIT, "BUG: tcpconn_put_destroy: %p flags = %0x\n",
  2355. tcpconn, tcpconn->flags);
  2356. }
  2357. }
  2358. tcpconn->state=S_CONN_BAD;
  2359. /* in case it's still in a reader timer */
  2360. tcpconn->timeout=get_ticks_raw();
  2361. /* fast close: close fds now */
  2362. if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
  2363. tcpconn_close_main_fd(tcpconn);
  2364. tcpconn->flags|=F_CONN_FD_CLOSED;
  2365. (*tcp_connections_no)--;
  2366. }
  2367. /* all the flags / ops on the tcpconn must be done prior to decrementing
  2368. * the refcnt. and at least a membar_write_atomic_op() mem. barrier or
  2369. * a mb_atomic_* op must * be used to make sure all the changed flags are
  2370. * written into memory prior to the new refcnt value */
  2371. if (unlikely(mb_atomic_dec_and_test(&tcpconn->refcnt))){
  2372. _tcpconn_free(tcpconn);
  2373. return 1;
  2374. }
  2375. return 0;
  2376. }
  2377. /* try to remove a connection from the hashes and timer.
  2378. * returns 1 if the connection was removed, 0 if not (connection not in
  2379. * hash)
  2380. *
  2381. * WARNING: call it only in the tcp_main process context or else the
  2382. * timer removal won't work.
  2383. */
  2384. inline static int tcpconn_try_unhash(struct tcp_connection* tcpconn)
  2385. {
  2386. if (likely(tcpconn->flags & F_CONN_HASHED)){
  2387. tcpconn->state=S_CONN_BAD;
  2388. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER)){
  2389. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2390. tcpconn->flags&=~F_CONN_MAIN_TIMER;
  2391. }else
  2392. /* in case it's still in a reader timer */
  2393. tcpconn->timeout=get_ticks_raw();
  2394. TCPCONN_LOCK;
  2395. if (tcpconn->flags & F_CONN_HASHED){
  2396. tcpconn->flags&=~F_CONN_HASHED;
  2397. _tcpconn_detach(tcpconn);
  2398. TCPCONN_UNLOCK;
  2399. }else{
  2400. /* tcp_send was faster and did unhash it itself */
  2401. TCPCONN_UNLOCK;
  2402. return 0;
  2403. }
  2404. #ifdef TCP_ASYNC
  2405. /* empty possible write buffers (optional) */
  2406. if (unlikely(_wbufq_non_empty(tcpconn))){
  2407. lock_get(&tcpconn->write_lock);
  2408. /* check again, while holding the lock */
  2409. if (likely(_wbufq_non_empty(tcpconn)))
  2410. _wbufq_destroy(&tcpconn->wbuf_q);
  2411. lock_release(&tcpconn->write_lock);
  2412. }
  2413. #endif /* TCP_ASYNC */
  2414. return 1;
  2415. }
  2416. return 0;
  2417. }
  2418. #ifdef SEND_FD_QUEUE
  2419. struct send_fd_info{
  2420. struct tcp_connection* tcp_conn;
  2421. ticks_t expire;
  2422. int unix_sock;
  2423. unsigned int retries; /* debugging */
  2424. };
  2425. struct tcp_send_fd_q{
  2426. struct send_fd_info* data; /* buffer */
  2427. struct send_fd_info* crt; /* pointer inside the buffer */
  2428. struct send_fd_info* end; /* points after the last valid position */
  2429. };
  2430. static struct tcp_send_fd_q send2child_q;
  2431. static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
  2432. {
  2433. q->data=pkg_malloc(size*sizeof(struct send_fd_info));
  2434. if (q->data==0){
  2435. LOG(L_ERR, "ERROR: send_fd_queue_init: out of memory\n");
  2436. return -1;
  2437. }
  2438. q->crt=&q->data[0];
  2439. q->end=&q->data[size];
  2440. return 0;
  2441. }
  2442. static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
  2443. {
  2444. if (q->data){
  2445. pkg_free(q->data);
  2446. q->data=0;
  2447. q->crt=q->end=0;
  2448. }
  2449. }
  2450. static int init_send_fd_queues()
  2451. {
  2452. if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
  2453. goto error;
  2454. return 0;
  2455. error:
  2456. LOG(L_ERR, "ERROR: init_send_fd_queues: init failed\n");
  2457. return -1;
  2458. }
  2459. static void destroy_send_fd_queues()
  2460. {
  2461. send_fd_queue_destroy(&send2child_q);
  2462. }
  2463. inline static int send_fd_queue_add( struct tcp_send_fd_q* q,
  2464. int unix_sock,
  2465. struct tcp_connection *t)
  2466. {
  2467. struct send_fd_info* tmp;
  2468. unsigned long new_size;
  2469. if (q->crt>=q->end){
  2470. new_size=q->end-&q->data[0];
  2471. if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
  2472. new_size*=2;
  2473. }else new_size=MAX_SEND_FD_QUEUE_SIZE;
  2474. if (unlikely(q->crt>=&q->data[new_size])){
  2475. LOG(L_ERR, "ERROR: send_fd_queue_add: queue full: %ld/%ld\n",
  2476. (long)(q->crt-&q->data[0]-1), new_size);
  2477. goto error;
  2478. }
  2479. LOG(L_CRIT, "INFO: send_fd_queue: queue full: %ld, extending to %ld\n",
  2480. (long)(q->end-&q->data[0]), new_size);
  2481. tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
  2482. if (unlikely(tmp==0)){
  2483. LOG(L_ERR, "ERROR: send_fd_queue_add: out of memory\n");
  2484. goto error;
  2485. }
  2486. q->crt=(q->crt-&q->data[0])+tmp;
  2487. q->data=tmp;
  2488. q->end=&q->data[new_size];
  2489. }
  2490. q->crt->tcp_conn=t;
  2491. q->crt->unix_sock=unix_sock;
  2492. q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
  2493. q->crt->retries=0;
  2494. q->crt++;
  2495. return 0;
  2496. error:
  2497. return -1;
  2498. }
  2499. inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
  2500. {
  2501. struct send_fd_info* p;
  2502. struct send_fd_info* t;
  2503. for (p=t=&q->data[0]; p<q->crt; p++){
  2504. if (unlikely(send_fd(p->unix_sock, &(p->tcp_conn),
  2505. sizeof(struct tcp_connection*), p->tcp_conn->s)<=0)){
  2506. if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) &&
  2507. ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
  2508. /* leave in queue for a future try */
  2509. *t=*p;
  2510. t->retries++;
  2511. t++;
  2512. }else{
  2513. LOG(L_ERR, "ERROR: run_send_fd_queue: send_fd failed"
  2514. " on socket %d , queue entry %ld, retries %d,"
  2515. " connection %p, tcp socket %d, errno=%d (%s) \n",
  2516. p->unix_sock, (long)(p-&q->data[0]), p->retries,
  2517. p->tcp_conn, p->tcp_conn->s, errno,
  2518. strerror(errno));
  2519. #ifdef TCP_ASYNC
  2520. if (p->tcp_conn->flags & F_CONN_WRITE_W){
  2521. io_watch_del(&io_h, p->tcp_conn->s, -1, IO_FD_CLOSING);
  2522. p->tcp_conn->flags &=~F_CONN_WRITE_W;
  2523. }
  2524. #endif
  2525. p->tcp_conn->flags &= ~F_CONN_READER;
  2526. if (likely(tcpconn_try_unhash(p->tcp_conn)))
  2527. tcpconn_put(p->tcp_conn);
  2528. tcpconn_put_destroy(p->tcp_conn); /* dec refcnt & destroy */
  2529. }
  2530. }
  2531. }
  2532. q->crt=t;
  2533. }
  2534. #else
  2535. #define send_fd_queue_run(q)
  2536. #endif
  2537. /* non blocking write() on a tcpconnection, unsafe version (should be called
  2538. * while holding c->write_lock). The fd should be non-blocking.
  2539. * returns number of bytes written on success, -1 on error (and sets errno)
  2540. */
  2541. inline static int _tcpconn_write_nb(int fd, struct tcp_connection* c,
  2542. char* buf, int len)
  2543. {
  2544. int n;
  2545. again:
  2546. #ifdef USE_TLS
  2547. if (unlikely(c->type==PROTO_TLS))
  2548. /* FIXME: tls_nonblocking_write !! */
  2549. n=tls_blocking_write(c, fd, buf, len);
  2550. else
  2551. #endif /* USE_TLS */
  2552. n=send(fd, buf, len,
  2553. #ifdef HAVE_MSG_NOSIGNAL
  2554. MSG_NOSIGNAL
  2555. #else
  2556. 0
  2557. #endif /* HAVE_MSG_NOSIGNAL */
  2558. );
  2559. if (unlikely(n<0)){
  2560. if (errno==EINTR) goto again;
  2561. }
  2562. return n;
  2563. }
  2564. /* handles io from a tcp child process
  2565. * params: tcp_c - pointer in the tcp_children array, to the entry for
  2566. * which an io event was detected
  2567. * fd_i - fd index in the fd_array (usefull for optimizing
  2568. * io_watch_deletes)
  2569. * returns: handle_* return convention: -1 on error, 0 on EAGAIN (no more
  2570. * io events queued), >0 on success. success/error refer only to
  2571. * the reads from the fd.
  2572. */
  2573. inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
  2574. {
  2575. struct tcp_connection* tcpconn;
  2576. long response[2];
  2577. int cmd;
  2578. int bytes;
  2579. int n;
  2580. ticks_t t;
  2581. ticks_t crt_timeout;
  2582. ticks_t con_lifetime;
  2583. if (unlikely(tcp_c->unix_sock<=0)){
  2584. /* (we can't have a fd==0, 0 is never closed )*/
  2585. LOG(L_CRIT, "BUG: handle_tcp_child: fd %d for %d "
  2586. "(pid %d, ser no %d)\n", tcp_c->unix_sock,
  2587. (int)(tcp_c-&tcp_children[0]), tcp_c->pid, tcp_c->proc_no);
  2588. goto error;
  2589. }
  2590. /* read until sizeof(response)
  2591. * (this is a SOCK_STREAM so read is not atomic) */
  2592. bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
  2593. if (unlikely(bytes<(int)sizeof(response))){
  2594. if (bytes==0){
  2595. /* EOF -> bad, child has died */
  2596. DBG("DBG: handle_tcp_child: dead tcp child %d (pid %d, no %d)"
  2597. " (shutting down?)\n", (int)(tcp_c-&tcp_children[0]),
  2598. tcp_c->pid, tcp_c->proc_no );
  2599. /* don't listen on it any more */
  2600. io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0);
  2601. goto error; /* eof. so no more io here, it's ok to return error */
  2602. }else if (bytes<0){
  2603. /* EAGAIN is ok if we try to empty the buffer
  2604. * e.g.: SIGIO_RT overflow mode or EPOLL ET */
  2605. if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
  2606. LOG(L_CRIT, "ERROR: handle_tcp_child: read from tcp child %ld "
  2607. " (pid %d, no %d) %s [%d]\n",
  2608. (long)(tcp_c-&tcp_children[0]), tcp_c->pid,
  2609. tcp_c->proc_no, strerror(errno), errno );
  2610. }else{
  2611. bytes=0;
  2612. }
  2613. /* try to ignore ? */
  2614. goto end;
  2615. }else{
  2616. /* should never happen */
  2617. LOG(L_CRIT, "BUG: handle_tcp_child: too few bytes received (%d)\n",
  2618. bytes );
  2619. bytes=0; /* something was read so there is no error; otoh if
  2620. receive_fd returned less then requested => the receive
  2621. buffer is empty => no more io queued on this fd */
  2622. goto end;
  2623. }
  2624. }
  2625. DBG("handle_tcp_child: reader response= %lx, %ld from %d \n",
  2626. response[0], response[1], (int)(tcp_c-&tcp_children[0]));
  2627. cmd=response[1];
  2628. tcpconn=(struct tcp_connection*)response[0];
  2629. if (unlikely(tcpconn==0)){
  2630. /* should never happen */
  2631. LOG(L_CRIT, "BUG: handle_tcp_child: null tcpconn pointer received"
  2632. " from tcp child %d (pid %d): %lx, %lx\n",
  2633. (int)(tcp_c-&tcp_children[0]), tcp_c->pid,
  2634. response[0], response[1]) ;
  2635. goto end;
  2636. }
  2637. switch(cmd){
  2638. case CONN_RELEASE:
  2639. tcp_c->busy--;
  2640. if (unlikely(tcpconn_put(tcpconn))){
  2641. tcpconn_destroy(tcpconn);
  2642. break;
  2643. }
  2644. if (unlikely(tcpconn->state==S_CONN_BAD)){
  2645. #ifdef TCP_ASYNC
  2646. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  2647. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2648. tcpconn->flags &= ~F_CONN_WRITE_W;
  2649. }
  2650. #endif /* TCP_ASYNC */
  2651. if (tcpconn_try_unhash(tcpconn))
  2652. tcpconn_put_destroy(tcpconn);
  2653. break;
  2654. }
  2655. /* update the timeout*/
  2656. t=get_ticks_raw();
  2657. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  2658. tcpconn->timeout=t+con_lifetime;
  2659. crt_timeout=con_lifetime;
  2660. #ifdef TCP_ASYNC
  2661. if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
  2662. _wbufq_non_empty(tcpconn) )){
  2663. if (unlikely(TICKS_GE(t, tcpconn->wbuf_q.wr_timeout))){
  2664. DBG("handle_tcp_child: wr. timeout on CONN_RELEASE for %p "
  2665. "refcnt= %d\n", tcpconn,
  2666. atomic_get(&tcpconn->refcnt));
  2667. /* timeout */
  2668. if (unlikely(tcpconn->state==S_CONN_CONNECT)){
  2669. #ifdef USE_DST_BLACKLIST
  2670. if (cfg_get(core, core_cfg, use_dst_blacklist))
  2671. dst_blacklist_su( BLST_ERR_CONNECT,
  2672. tcpconn->rcv.proto,
  2673. &tcpconn->rcv.src_su, 0);
  2674. #endif /* USE_DST_BLACKLIST */
  2675. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(tcpconn),
  2676. TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
  2677. TCP_PROTO(tcpconn));
  2678. TCP_STATS_CONNECT_FAILED();
  2679. }else{
  2680. #ifdef USE_DST_BLACKLIST
  2681. if (cfg_get(core, core_cfg, use_dst_blacklist))
  2682. dst_blacklist_su( BLST_ERR_SEND,
  2683. tcpconn->rcv.proto,
  2684. &tcpconn->rcv.src_su, 0);
  2685. #endif /* USE_DST_BLACKLIST */
  2686. TCP_EV_SEND_TIMEOUT(0, &tcpconn->rcv);
  2687. TCP_STATS_SEND_TIMEOUT();
  2688. }
  2689. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  2690. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2691. tcpconn->flags&=~F_CONN_WRITE_W;
  2692. }
  2693. if (tcpconn_try_unhash(tcpconn))
  2694. tcpconn_put_destroy(tcpconn);
  2695. break;
  2696. }else{
  2697. crt_timeout=MIN_unsigned(con_lifetime,
  2698. tcpconn->wbuf_q.wr_timeout-t);
  2699. }
  2700. }
  2701. #endif /* TCP_ASYNC */
  2702. /* re-activate the timer */
  2703. tcpconn->timer.f=tcpconn_main_timeout;
  2704. local_timer_reinit(&tcpconn->timer);
  2705. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, crt_timeout, t);
  2706. /* must be after the de-ref*/
  2707. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  2708. tcpconn->flags&=~(F_CONN_READER|F_CONN_OOB_DATA);
  2709. #ifdef TCP_ASYNC
  2710. if (unlikely(tcpconn->flags & F_CONN_WRITE_W))
  2711. n=io_watch_chg(&io_h, tcpconn->s, POLLIN| POLLOUT, -1);
  2712. else
  2713. #endif /* TCP_ASYNC */
  2714. n=io_watch_add(&io_h, tcpconn->s, POLLIN, F_TCPCONN, tcpconn);
  2715. if (unlikely(n<0)){
  2716. LOG(L_CRIT, "ERROR: tcp_main: handle_tcp_child: failed to add"
  2717. " new socket to the fd list\n");
  2718. tcpconn->flags&=~F_CONN_READ_W;
  2719. #ifdef TCP_ASYNC
  2720. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  2721. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2722. tcpconn->flags&=~F_CONN_WRITE_W;
  2723. }
  2724. #endif /* TCP_ASYNC */
  2725. if (tcpconn_try_unhash(tcpconn))
  2726. tcpconn_put_destroy(tcpconn);
  2727. break;
  2728. }
  2729. DBG("handle_tcp_child: CONN_RELEASE %p refcnt= %d\n",
  2730. tcpconn, atomic_get(&tcpconn->refcnt));
  2731. break;
  2732. case CONN_ERROR:
  2733. case CONN_DESTROY:
  2734. case CONN_EOF:
  2735. /* WARNING: this will auto-dec. refcnt! */
  2736. tcp_c->busy--;
  2737. /* main doesn't listen on it => we don't have to delete it
  2738. if (tcpconn->s!=-1)
  2739. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2740. */
  2741. #ifdef TCP_ASYNC
  2742. if ((tcpconn->flags & F_CONN_WRITE_W) && (tcpconn->s!=-1)){
  2743. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2744. tcpconn->flags&=~F_CONN_WRITE_W;
  2745. }
  2746. #endif /* TCP_ASYNC */
  2747. if (tcpconn_try_unhash(tcpconn))
  2748. tcpconn_put(tcpconn);
  2749. tcpconn_put_destroy(tcpconn); /* deref & delete if refcnt==0 */
  2750. break;
  2751. default:
  2752. LOG(L_CRIT, "BUG: handle_tcp_child: unknown cmd %d"
  2753. " from tcp reader %d\n",
  2754. cmd, (int)(tcp_c-&tcp_children[0]));
  2755. }
  2756. end:
  2757. return bytes;
  2758. error:
  2759. return -1;
  2760. }
  2761. /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
  2762. *
  2763. * params: p - pointer in the ser processes array (pt[]), to the entry for
  2764. * which an io event was detected
  2765. * fd_i - fd index in the fd_array (usefull for optimizing
  2766. * io_watch_deletes)
  2767. * returns: handle_* return convention:
  2768. * -1 on error reading from the fd,
  2769. * 0 on EAGAIN or when no more io events are queued
  2770. * (receive buffer empty),
  2771. * >0 on successfull reads from the fd (the receive buffer might
  2772. * be non-empty).
  2773. */
  2774. inline static int handle_ser_child(struct process_table* p, int fd_i)
  2775. {
  2776. struct tcp_connection* tcpconn;
  2777. long response[2];
  2778. int cmd;
  2779. int bytes;
  2780. int ret;
  2781. int fd;
  2782. int flags;
  2783. ticks_t t;
  2784. ticks_t con_lifetime;
  2785. #ifdef TCP_ASYNC
  2786. ticks_t nxt_timeout;
  2787. #endif /* TCP_ASYNC */
  2788. ret=-1;
  2789. if (unlikely(p->unix_sock<=0)){
  2790. /* (we can't have a fd==0, 0 is never closed )*/
  2791. LOG(L_CRIT, "BUG: handle_ser_child: fd %d for %d "
  2792. "(pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
  2793. goto error;
  2794. }
  2795. /* get all bytes and the fd (if transmitted)
  2796. * (this is a SOCK_STREAM so read is not atomic) */
  2797. bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
  2798. MSG_DONTWAIT);
  2799. if (unlikely(bytes<(int)sizeof(response))){
  2800. /* too few bytes read */
  2801. if (bytes==0){
  2802. /* EOF -> bad, child has died */
  2803. DBG("DBG: handle_ser_child: dead child %d, pid %d"
  2804. " (shutting down?)\n", (int)(p-&pt[0]), p->pid);
  2805. /* don't listen on it any more */
  2806. io_watch_del(&io_h, p->unix_sock, fd_i, 0);
  2807. goto error; /* child dead => no further io events from it */
  2808. }else if (bytes<0){
  2809. /* EAGAIN is ok if we try to empty the buffer
  2810. * e.g: SIGIO_RT overflow mode or EPOLL ET */
  2811. if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
  2812. LOG(L_CRIT, "ERROR: handle_ser_child: read from child %d "
  2813. "(pid %d): %s [%d]\n", (int)(p-&pt[0]), p->pid,
  2814. strerror(errno), errno);
  2815. ret=-1;
  2816. }else{
  2817. ret=0;
  2818. }
  2819. /* try to ignore ? */
  2820. goto end;
  2821. }else{
  2822. /* should never happen */
  2823. LOG(L_CRIT, "BUG: handle_ser_child: too few bytes received (%d)\n",
  2824. bytes );
  2825. ret=0; /* something was read so there is no error; otoh if
  2826. receive_fd returned less then requested => the receive
  2827. buffer is empty => no more io queued on this fd */
  2828. goto end;
  2829. }
  2830. }
  2831. ret=1; /* something was received, there might be more queued */
  2832. DBG("handle_ser_child: read response= %lx, %ld, fd %d from %d (%d)\n",
  2833. response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
  2834. cmd=response[1];
  2835. tcpconn=(struct tcp_connection*)response[0];
  2836. if (unlikely(tcpconn==0)){
  2837. LOG(L_CRIT, "BUG: handle_ser_child: null tcpconn pointer received"
  2838. " from child %d (pid %d): %lx, %lx\n",
  2839. (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
  2840. goto end;
  2841. }
  2842. switch(cmd){
  2843. case CONN_ERROR:
  2844. LOG(L_ERR, "handle_ser_child: ERROR: received CON_ERROR for %p"
  2845. " (id %d), refcnt %d\n",
  2846. tcpconn, tcpconn->id, atomic_get(&tcpconn->refcnt));
  2847. case CONN_EOF: /* forced EOF after full send, due to send flags */
  2848. #ifdef TCP_CONNECT_WAIT
  2849. /* if the connection is pending => it might be on the way of
  2850. * reaching tcp_main (e.g. CONN_NEW_COMPLETE or
  2851. * CONN_NEW_PENDING_WRITE) => it cannot be destroyed here */
  2852. if ( !(tcpconn->flags & F_CONN_PENDING) &&
  2853. tcpconn_try_unhash(tcpconn) )
  2854. tcpconn_put(tcpconn);
  2855. #else /* ! TCP_CONNECT_WAIT */
  2856. if ( tcpconn_try_unhash(tcpconn) )
  2857. tcpconn_put(tcpconn);
  2858. #endif /* TCP_CONNECT_WAIT */
  2859. if ( ((tcpconn->flags & (F_CONN_WRITE_W|F_CONN_READ_W)) ) &&
  2860. (tcpconn->s!=-1)){
  2861. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2862. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  2863. }
  2864. tcpconn_put_destroy(tcpconn); /* dec refcnt & destroy on 0 */
  2865. break;
  2866. case CONN_GET_FD:
  2867. /* send the requested FD */
  2868. /* WARNING: take care of setting refcnt properly to
  2869. * avoid race conditions */
  2870. if (unlikely(send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
  2871. tcpconn->s)<=0)){
  2872. LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
  2873. }
  2874. break;
  2875. case CONN_NEW:
  2876. /* update the fd in the requested tcpconn*/
  2877. /* WARNING: take care of setting refcnt properly to
  2878. * avoid race conditions */
  2879. if (unlikely(fd==-1)){
  2880. LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW:"
  2881. " no fd received\n");
  2882. tcpconn->flags|=F_CONN_FD_CLOSED;
  2883. tcpconn_put_destroy(tcpconn);
  2884. break;
  2885. }
  2886. (*tcp_connections_no)++;
  2887. tcpconn->s=fd;
  2888. /* add tcpconn to the list*/
  2889. tcpconn_add(tcpconn);
  2890. /* update the timeout*/
  2891. t=get_ticks_raw();
  2892. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  2893. tcpconn->timeout=t+con_lifetime;
  2894. /* activate the timer (already properly init. in tcpconn_new())
  2895. * no need for reinit */
  2896. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  2897. con_lifetime, t);
  2898. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD)
  2899. #ifdef TCP_ASYNC
  2900. /* not used for now, the connection is sent to tcp_main
  2901. * before knowing whether we can write on it or we should
  2902. * wait */
  2903. | (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1)&
  2904. F_CONN_WRITE_W)
  2905. #endif /* TCP_ASYNC */
  2906. ;
  2907. tcpconn->flags&=~F_CONN_FD_CLOSED;
  2908. flags=POLLIN
  2909. #ifdef TCP_ASYNC
  2910. /* not used for now, the connection is sent to tcp_main
  2911. * before knowing if we can write on it or we should
  2912. * wait */
  2913. | (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1) & POLLOUT)
  2914. #endif /* TCP_ASYNC */
  2915. ;
  2916. if (unlikely(
  2917. io_watch_add(&io_h, tcpconn->s, flags,
  2918. F_TCPCONN, tcpconn)<0)){
  2919. LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed to add"
  2920. " new socket to the fd list\n");
  2921. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  2922. tcpconn_try_unhash(tcpconn); /* unhash & dec refcnt */
  2923. tcpconn_put_destroy(tcpconn);
  2924. }
  2925. break;
  2926. #ifdef TCP_ASYNC
  2927. case CONN_QUEUED_WRITE:
  2928. /* received only if the wr. queue is empty and a write finishes
  2929. * with EAGAIN (common after connect())
  2930. * it should only enable write watching on the fd. The connection
  2931. * should be already in the hash. The refcnt is not changed.
  2932. */
  2933. if (unlikely((tcpconn->state==S_CONN_BAD) ||
  2934. !(tcpconn->flags & F_CONN_HASHED) ))
  2935. break;
  2936. if (!(tcpconn->flags & F_CONN_WANTS_WR)){
  2937. tcpconn->flags|=F_CONN_WANTS_WR;
  2938. t=get_ticks_raw();
  2939. if (likely((tcpconn->flags & F_CONN_MAIN_TIMER) &&
  2940. (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)) &&
  2941. TICKS_LT(t, tcpconn->wbuf_q.wr_timeout) )){
  2942. /* _wbufq_nonempty() is guaranteed here */
  2943. /* update the timer */
  2944. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2945. local_timer_reinit(&tcpconn->timer);
  2946. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  2947. tcpconn->wbuf_q.wr_timeout-t, t);
  2948. DBG("tcp_main: handle_ser_child: CONN_QUEUED_WRITE; %p "
  2949. "timeout adjusted to %d s\n", tcpconn,
  2950. TICKS_TO_S(tcpconn->wbuf_q.wr_timeout-t));
  2951. }
  2952. if (!(tcpconn->flags & F_CONN_WRITE_W)){
  2953. tcpconn->flags|=F_CONN_WRITE_W;
  2954. if (!(tcpconn->flags & F_CONN_READ_W)){
  2955. if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLOUT,
  2956. F_TCPCONN, tcpconn)<0)){
  2957. LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child:"
  2958. " failed to enable write watch on"
  2959. " socket\n");
  2960. if (tcpconn_try_unhash(tcpconn))
  2961. tcpconn_put_destroy(tcpconn);
  2962. break;
  2963. }
  2964. }else{
  2965. if (unlikely(io_watch_chg(&io_h, tcpconn->s,
  2966. POLLIN|POLLOUT, -1)<0)){
  2967. LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child:"
  2968. " failed to change socket watch events\n");
  2969. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  2970. tcpconn->flags&=~F_CONN_READ_W;
  2971. if (tcpconn_try_unhash(tcpconn))
  2972. tcpconn_put_destroy(tcpconn);
  2973. break;
  2974. }
  2975. }
  2976. }
  2977. }else{
  2978. LOG(L_WARN, "tcp_main: handler_ser_child: connection %p"
  2979. " already watched for write\n", tcpconn);
  2980. }
  2981. break;
  2982. #ifdef TCP_CONNECT_WAIT
  2983. case CONN_NEW_COMPLETE:
  2984. case CONN_NEW_PENDING_WRITE:
  2985. /* received when a pending connect completes in the same
  2986. * tcp_send() that initiated it
  2987. * the connection is already in the hash with F_CONN_PENDING
  2988. * flag (added by tcp_send()) and refcnt at least 1 (for the
  2989. * hash)*/
  2990. tcpconn->flags&=~(F_CONN_PENDING|F_CONN_FD_CLOSED);
  2991. if (unlikely((tcpconn->state==S_CONN_BAD) || (fd==-1))){
  2992. if (unlikely(fd==-1))
  2993. LOG(L_CRIT, "BUG: handle_ser_child: CONN_NEW_COMPLETE:"
  2994. " no fd received\n");
  2995. else
  2996. LOG(L_WARN, "WARNING: handle_ser_child: CONN_NEW_COMPLETE:"
  2997. " received connection with error\n");
  2998. tcpconn->flags|=F_CONN_FD_CLOSED;
  2999. tcpconn->state=S_CONN_BAD;
  3000. tcpconn_try_unhash(tcpconn);
  3001. tcpconn_put_destroy(tcpconn);
  3002. break;
  3003. }
  3004. (*tcp_connections_no)++;
  3005. tcpconn->s=fd;
  3006. /* update the timeout*/
  3007. t=get_ticks_raw();
  3008. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  3009. tcpconn->timeout=t+con_lifetime;
  3010. nxt_timeout=con_lifetime;
  3011. if (unlikely(cmd==CONN_NEW_COMPLETE)){
  3012. /* check if needs to be watched for write */
  3013. lock_get(&tcpconn->write_lock);
  3014. /* if queue non empty watch it for write */
  3015. flags=(_wbufq_empty(tcpconn)-1)&POLLOUT;
  3016. lock_release(&tcpconn->write_lock);
  3017. if (flags){
  3018. if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
  3019. && TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
  3020. nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
  3021. tcpconn->flags|=F_CONN_WRITE_W|F_CONN_WANTS_WR;
  3022. }
  3023. /* activate the timer (already properly init. in
  3024. tcpconn_new()) no need for reinit */
  3025. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
  3026. t);
  3027. tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W|
  3028. F_CONN_WANTS_RD;
  3029. }else{
  3030. /* CONN_NEW_PENDING_WRITE */
  3031. /* no need to check, we have something queued for write */
  3032. flags=POLLOUT;
  3033. if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
  3034. && TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
  3035. nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
  3036. /* activate the timer (already properly init. in
  3037. tcpconn_new()) no need for reinit */
  3038. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
  3039. t);
  3040. tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W|
  3041. F_CONN_WANTS_RD |
  3042. F_CONN_WRITE_W|F_CONN_WANTS_WR;
  3043. }
  3044. flags|=POLLIN;
  3045. if (unlikely(
  3046. io_watch_add(&io_h, tcpconn->s, flags,
  3047. F_TCPCONN, tcpconn)<0)){
  3048. LOG(L_CRIT, "ERROR: tcp_main: handle_ser_child: failed to add"
  3049. " new socket to the fd list\n");
  3050. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  3051. tcpconn_try_unhash(tcpconn); /* unhash & dec refcnt */
  3052. tcpconn_put_destroy(tcpconn);
  3053. }
  3054. break;
  3055. #endif /* TCP_CONNECT_WAIT */
  3056. #endif /* TCP_ASYNC */
  3057. default:
  3058. LOG(L_CRIT, "BUG: handle_ser_child: unknown cmd %d\n", cmd);
  3059. }
  3060. end:
  3061. return ret;
  3062. error:
  3063. return -1;
  3064. }
  3065. /* sends a tcpconn + fd to a choosen child */
  3066. inline static int send2child(struct tcp_connection* tcpconn)
  3067. {
  3068. int i;
  3069. int min_busy;
  3070. int idx;
  3071. static int crt=0; /* current child */
  3072. int last;
  3073. min_busy=tcp_children[0].busy;
  3074. idx=0;
  3075. last=crt+tcp_children_no;
  3076. for (; crt<last; crt++){
  3077. i=crt%tcp_children_no;
  3078. if (!tcp_children[i].busy){
  3079. idx=i;
  3080. min_busy=0;
  3081. break;
  3082. }else if (min_busy>tcp_children[i].busy){
  3083. min_busy=tcp_children[i].busy;
  3084. idx=i;
  3085. }
  3086. }
  3087. crt=idx+1; /* next time we start with crt%tcp_children_no */
  3088. tcp_children[idx].busy++;
  3089. tcp_children[idx].n_reqs++;
  3090. if (unlikely(min_busy)){
  3091. DBG("WARNING: send2child: no free tcp receiver, "
  3092. " connection passed to the least busy one (%d)\n",
  3093. min_busy);
  3094. }
  3095. DBG("send2child: to tcp child %d %d(%d), %p\n", idx,
  3096. tcp_children[idx].proc_no,
  3097. tcp_children[idx].pid, tcpconn);
  3098. /* first make sure this child doesn't have pending request for
  3099. * tcp_main (to avoid a possible deadlock: e.g. child wants to
  3100. * send a release command, but the master fills its socket buffer
  3101. * with new connection commands => deadlock) */
  3102. /* answer tcp_send requests first */
  3103. while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0);
  3104. /* process tcp readers requests */
  3105. while(handle_tcp_child(&tcp_children[idx], -1)>0);
  3106. #ifdef SEND_FD_QUEUE
  3107. /* if queue full, try to queue the io */
  3108. if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
  3109. sizeof(tcpconn), tcpconn->s)<=0)){
  3110. if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
  3111. /* FIXME: remove after debugging */
  3112. LOG(L_CRIT, "INFO: tcp child %d, socket %d: queue full,"
  3113. " %d requests queued (total handled %d)\n",
  3114. idx, tcp_children[idx].unix_sock, min_busy,
  3115. tcp_children[idx].n_reqs-1);
  3116. if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock,
  3117. tcpconn)!=0){
  3118. LOG(L_ERR, "ERROR: send2child: queue send op. failed\n");
  3119. return -1;
  3120. }
  3121. }else{
  3122. LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
  3123. return -1;
  3124. }
  3125. }
  3126. #else
  3127. if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
  3128. sizeof(tcpconn), tcpconn->s)<=0)){
  3129. LOG(L_ERR, "ERROR: send2child: send_fd failed\n");
  3130. return -1;
  3131. }
  3132. #endif
  3133. return 0;
  3134. }
  3135. /* handles a new connection, called internally by tcp_main_loop/handle_io.
  3136. * params: si - pointer to one of the tcp socket_info structures on which
  3137. * an io event was detected (connection attempt)
  3138. * returns: handle_* return convention: -1 on error, 0 on EAGAIN (no more
  3139. * io events queued), >0 on success. success/error refer only to
  3140. * the accept.
  3141. */
  3142. static inline int handle_new_connect(struct socket_info* si)
  3143. {
  3144. union sockaddr_union su;
  3145. union sockaddr_union sock_name;
  3146. unsigned sock_name_len;
  3147. union sockaddr_union* dst_su;
  3148. struct tcp_connection* tcpconn;
  3149. socklen_t su_len;
  3150. int new_sock;
  3151. /* got a connection on r */
  3152. su_len=sizeof(su);
  3153. new_sock=accept(si->socket, &(su.s), &su_len);
  3154. if (unlikely(new_sock==-1)){
  3155. if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
  3156. return 0;
  3157. LOG(L_ERR, "WARNING: handle_new_connect: error while accepting"
  3158. " connection(%d): %s\n", errno, strerror(errno));
  3159. return -1;
  3160. }
  3161. if (unlikely(*tcp_connections_no>=cfg_get(tcp, tcp_cfg, max_connections))){
  3162. LOG(L_ERR, "ERROR: maximum number of connections exceeded: %d/%d\n",
  3163. *tcp_connections_no,
  3164. cfg_get(tcp, tcp_cfg, max_connections));
  3165. close(new_sock);
  3166. TCP_STATS_LOCAL_REJECT();
  3167. return 1; /* success, because the accept was succesfull */
  3168. }
  3169. if (unlikely(init_sock_opt_accept(new_sock)<0)){
  3170. LOG(L_ERR, "ERROR: handle_new_connect: init_sock_opt failed\n");
  3171. close(new_sock);
  3172. return 1; /* success, because the accept was succesfull */
  3173. }
  3174. (*tcp_connections_no)++;
  3175. TCP_STATS_ESTABLISHED(S_CONN_ACCEPT);
  3176. dst_su=&si->su;
  3177. if (unlikely(si->flags & SI_IS_ANY)){
  3178. /* INADDR_ANY => get local dst */
  3179. sock_name_len=sizeof(sock_name);
  3180. if (getsockname(new_sock, &sock_name.s, &sock_name_len)!=0){
  3181. LOG(L_ERR, "ERROR: handle_new_connect:"
  3182. " getsockname failed: %s(%d)\n",
  3183. strerror(errno), errno);
  3184. /* go on with the 0.0.0.0 dst from the sock_info */
  3185. }else{
  3186. dst_su=&sock_name;
  3187. }
  3188. }
  3189. /* add socket to list */
  3190. tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_ACCEPT);
  3191. if (likely(tcpconn)){
  3192. tcpconn->flags|=F_CONN_PASSIVE;
  3193. #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
  3194. atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
  3195. outside world */
  3196. tcpconn_add(tcpconn);
  3197. /* activate the timer */
  3198. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  3199. cfg_get(tcp, tcp_cfg, con_lifetime),
  3200. get_ticks_raw());
  3201. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  3202. if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLIN,
  3203. F_TCPCONN, tcpconn)<0)){
  3204. LOG(L_CRIT, "ERROR: tcp_main: handle_new_connect: failed to add"
  3205. " new socket to the fd list\n");
  3206. tcpconn->flags&=~F_CONN_READ_W;
  3207. if (tcpconn_try_unhash(tcpconn))
  3208. tcpconn_put_destroy(tcpconn);
  3209. }
  3210. #else
  3211. atomic_set(&tcpconn->refcnt, 2); /* safe, not yet available to the
  3212. outside world */
  3213. /* prepare it for passing to a child */
  3214. tcpconn->flags|=F_CONN_READER;
  3215. tcpconn_add(tcpconn);
  3216. DBG("handle_new_connect: new connection from %s: %p %d flags: %04x\n",
  3217. su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags);
  3218. if(unlikely(send2child(tcpconn)<0)){
  3219. LOG(L_ERR,"ERROR: handle_new_connect: no children "
  3220. "available\n");
  3221. tcpconn->flags&=~F_CONN_READER;
  3222. tcpconn_put(tcpconn);
  3223. tcpconn_try_unhash(tcpconn);
  3224. tcpconn_put_destroy(tcpconn);
  3225. }
  3226. #endif
  3227. }else{ /*tcpconn==0 */
  3228. LOG(L_ERR, "ERROR: handle_new_connect: tcpconn_new failed, "
  3229. "closing socket\n");
  3230. close(new_sock);
  3231. (*tcp_connections_no)--;
  3232. }
  3233. return 1; /* accept() was succesfull */
  3234. }
  3235. /* handles an io event on one of the watched tcp connections
  3236. *
  3237. * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
  3238. * fd_i - index in the fd_array table (needed for delete)
  3239. * returns: handle_* return convention, but on success it always returns 0
  3240. * (because it's one-shot, after a succesful execution the fd is
  3241. * removed from tcp_main's watch fd list and passed to a child =>
  3242. * tcp_main is not interested in further io events that might be
  3243. * queued for this fd)
  3244. */
  3245. inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
  3246. int fd_i)
  3247. {
  3248. #ifdef TCP_ASYNC
  3249. int empty_q;
  3250. int bytes;
  3251. #endif /* TCP_ASYNC */
  3252. /* is refcnt!=0 really necessary?
  3253. * No, in fact it's a bug: I can have the following situation: a send only
  3254. * tcp connection used by n processes simultaneously => refcnt = n. In
  3255. * the same time I can have a read event and this situation is perfectly
  3256. * valid. -- andrei
  3257. */
  3258. #if 0
  3259. if ((tcpconn->refcnt!=0)){
  3260. /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
  3261. * (there is a short window in which it could generate a sig
  3262. * that would be catched by tcp_main) */
  3263. LOG(L_CRIT, "BUG: handle_tcpconn_ev: io event on referenced"
  3264. " tcpconn (%p), refcnt=%d, fd=%d\n",
  3265. tcpconn, tcpconn->refcnt, tcpconn->s);
  3266. return -1;
  3267. }
  3268. #endif
  3269. /* pass it to child, so remove it from the io watch list and the local
  3270. * timer */
  3271. #ifdef TCP_ASYNC
  3272. empty_q=0; /* warning fix */
  3273. if (unlikely((ev & (POLLOUT|POLLERR|POLLHUP)) &&
  3274. (tcpconn->flags & F_CONN_WRITE_W))){
  3275. if (unlikely((ev & (POLLERR|POLLHUP)) ||
  3276. (wbufq_run(tcpconn->s, tcpconn, &empty_q)<0) ||
  3277. (empty_q && tcpconn_close_after_send(tcpconn))
  3278. )){
  3279. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
  3280. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(1) failed:"
  3281. " for %p, fd %d\n", tcpconn, tcpconn->s);
  3282. }
  3283. if ((tcpconn->flags & F_CONN_READ_W) && (ev & POLLIN)){
  3284. /* connection is watched for read and there is a read event
  3285. * (unfortunately if we have POLLIN here we don't know if
  3286. * there's really any data in the read buffer or the POLLIN
  3287. * was generated by the error or EOF => to avoid loosing
  3288. * data it's safer to either directly check the read buffer
  3289. * or try a read)*/
  3290. /* in most cases the read buffer will be empty, so in general
  3291. * is cheaper to check it here and then send the
  3292. * conn. to a a child only if needed (another syscall + at
  3293. * least 2 * syscalls in the reader + ...) */
  3294. if ((ioctl(tcpconn->s, FIONREAD, &bytes)>=0) && (bytes>0)){
  3295. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
  3296. F_CONN_WANTS_RD|F_CONN_WANTS_WR);
  3297. tcpconn->flags|=F_CONN_FORCE_EOF|F_CONN_WR_ERROR;
  3298. goto send_to_child;
  3299. }
  3300. /* if bytes==0 or ioctl failed, destroy the connection now */
  3301. }
  3302. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
  3303. F_CONN_WANTS_RD|F_CONN_WANTS_WR);
  3304. if (unlikely(ev & POLLERR)){
  3305. if (unlikely(tcpconn->state==S_CONN_CONNECT)){
  3306. #ifdef USE_DST_BLACKLIST
  3307. if (cfg_get(core, core_cfg, use_dst_blacklist))
  3308. dst_blacklist_su(BLST_ERR_CONNECT, tcpconn->rcv.proto,
  3309. &tcpconn->rcv.src_su, 0);
  3310. #endif /* USE_DST_BLACKLIST */
  3311. TCP_EV_CONNECT_ERR(0, TCP_LADDR(tcpconn),
  3312. TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
  3313. TCP_PROTO(tcpconn));
  3314. TCP_STATS_CONNECT_FAILED();
  3315. }else{
  3316. #ifdef USE_DST_BLACKLIST
  3317. if (cfg_get(core, core_cfg, use_dst_blacklist))
  3318. dst_blacklist_su(BLST_ERR_SEND, tcpconn->rcv.proto,
  3319. &tcpconn->rcv.src_su, 0);
  3320. #endif /* USE_DST_BLACKLIST */
  3321. TCP_STATS_CON_RESET(); /* FIXME: it could != RST */
  3322. }
  3323. }
  3324. if (unlikely(!tcpconn_try_unhash(tcpconn))){
  3325. LOG(L_CRIT, "BUG: tcpconn_ev: unhashed connection %p\n",
  3326. tcpconn);
  3327. }
  3328. tcpconn_put_destroy(tcpconn);
  3329. goto error;
  3330. }
  3331. if (empty_q){
  3332. tcpconn->flags&=~F_CONN_WANTS_WR;
  3333. if (!(tcpconn->flags & F_CONN_READ_W)){
  3334. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
  3335. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(2)"
  3336. " failed:" " for %p, fd %d\n",
  3337. tcpconn, tcpconn->s);
  3338. goto error;
  3339. }
  3340. }else{
  3341. if (unlikely(io_watch_chg(&io_h, tcpconn->s,
  3342. POLLIN, fd_i)==-1)){
  3343. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(1)"
  3344. " failed:" " for %p, fd %d\n",
  3345. tcpconn, tcpconn->s);
  3346. goto error;
  3347. }
  3348. }
  3349. tcpconn->flags&=~F_CONN_WRITE_W;
  3350. }
  3351. ev&=~POLLOUT; /* clear POLLOUT */
  3352. }
  3353. if (likely(ev && (tcpconn->flags & F_CONN_READ_W))){
  3354. /* if still some other IO event (POLLIN|POLLHUP|POLLERR) and
  3355. * connection is still watched in tcp_main for reads, send it to a
  3356. * child and stop watching it for input (but continue watching for
  3357. * writes if needed): */
  3358. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3359. if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1)){
  3360. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_chg(2)"
  3361. " failed:" " for %p, fd %d\n",
  3362. tcpconn, tcpconn->s);
  3363. goto error;
  3364. }
  3365. }else
  3366. #else
  3367. {
  3368. #endif /* TCP_ASYNC */
  3369. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
  3370. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(3)"
  3371. " failed:" " for %p, fd %d\n",
  3372. tcpconn, tcpconn->s);
  3373. goto error;
  3374. }
  3375. #ifdef TCP_ASYNC
  3376. send_to_child:
  3377. #endif
  3378. DBG("tcp: DBG: sendig to child, events %x\n", ev);
  3379. #ifdef POLLRDHUP
  3380. tcpconn->flags|=((int)!(ev & (POLLRDHUP|POLLHUP|POLLERR)) -1) &
  3381. F_CONN_EOF_SEEN;
  3382. #else /* POLLRDHUP */
  3383. tcpconn->flags|=((int)!(ev & (POLLHUP|POLLERR)) -1) & F_CONN_EOF_SEEN;
  3384. #endif /* POLLRDHUP */
  3385. tcpconn->flags|= ((int)!(ev & POLLPRI) -1) & F_CONN_OOB_DATA;
  3386. tcpconn->flags|=F_CONN_READER;
  3387. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  3388. tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  3389. tcpconn_ref(tcpconn); /* refcnt ++ */
  3390. if (unlikely(send2child(tcpconn)<0)){
  3391. LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n");
  3392. tcpconn->flags&=~F_CONN_READER;
  3393. #ifdef TCP_ASYNC
  3394. if (tcpconn->flags & F_CONN_WRITE_W){
  3395. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
  3396. LOG(L_ERR, "ERROR: handle_tcpconn_ev: io_watch_del(4)"
  3397. " failed:" " for %p, fd %d\n",
  3398. tcpconn, tcpconn->s);
  3399. }
  3400. tcpconn->flags&=~F_CONN_WRITE_W;
  3401. }
  3402. #endif /* TCP_ASYNC */
  3403. tcpconn_put(tcpconn);
  3404. tcpconn_try_unhash(tcpconn);
  3405. tcpconn_put_destroy(tcpconn); /* because of the tcpconn_ref() */
  3406. }
  3407. }
  3408. return 0; /* we are not interested in possibly queued io events,
  3409. the fd was either passed to a child, closed, or for writes,
  3410. everything possible was already written */
  3411. error:
  3412. return -1;
  3413. }
  3414. /* generic handle io routine, it will call the appropiate
  3415. * handle_xxx() based on the fd_map type
  3416. *
  3417. * params: fm - pointer to a fd hash entry
  3418. * idx - index in the fd_array (or -1 if not known)
  3419. * return: -1 on error
  3420. * 0 on EAGAIN or when by some other way it is known that no more
  3421. * io events are queued on the fd (the receive buffer is empty).
  3422. * Usefull to detect when there are no more io events queued for
  3423. * sigio_rt, epoll_et, kqueue.
  3424. * >0 on successfull read from the fd (when there might be more io
  3425. * queued -- the receive buffer might still be non-empty)
  3426. */
  3427. inline static int handle_io(struct fd_map* fm, short ev, int idx)
  3428. {
  3429. int ret;
  3430. /* update the local config */
  3431. cfg_update();
  3432. switch(fm->type){
  3433. case F_SOCKINFO:
  3434. ret=handle_new_connect((struct socket_info*)fm->data);
  3435. break;
  3436. case F_TCPCONN:
  3437. ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, ev, idx);
  3438. break;
  3439. case F_TCPCHILD:
  3440. ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
  3441. break;
  3442. case F_PROC:
  3443. ret=handle_ser_child((struct process_table*)fm->data, idx);
  3444. break;
  3445. case F_NONE:
  3446. LOG(L_CRIT, "BUG: handle_io: empty fd map: %p {%d, %d, %p},"
  3447. " idx %d\n", fm, fm->fd, fm->type, fm->data, idx);
  3448. goto error;
  3449. default:
  3450. LOG(L_CRIT, "BUG: handle_io: uknown fd type %d\n", fm->type);
  3451. goto error;
  3452. }
  3453. return ret;
  3454. error:
  3455. return -1;
  3456. }
  3457. /* timer handler for tcpconnection handled by tcp_main */
  3458. static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
  3459. {
  3460. struct tcp_connection *c;
  3461. int fd;
  3462. int tcp_async;
  3463. c=(struct tcp_connection*)data;
  3464. /* or (struct tcp...*)(tl-offset(c->timer)) */
  3465. #ifdef TCP_ASYNC
  3466. DBG( "tcp_main: entering timer for %p (ticks=%d, timeout=%d (%d s), "
  3467. "wr_timeout=%d (%d s)), write queue: %d bytes\n",
  3468. c, t, c->timeout, TICKS_TO_S(c->timeout-t),
  3469. c->wbuf_q.wr_timeout, TICKS_TO_S(c->wbuf_q.wr_timeout-t),
  3470. c->wbuf_q.queued);
  3471. tcp_async=cfg_get(tcp, tcp_cfg, async);
  3472. if (likely(TICKS_LT(t, c->timeout) && ( !tcp_async | _wbufq_empty(c) |
  3473. TICKS_LT(t, c->wbuf_q.wr_timeout)) )){
  3474. if (unlikely(tcp_async && _wbufq_non_empty(c)))
  3475. return (ticks_t)MIN_unsigned(c->timeout-t, c->wbuf_q.wr_timeout-t);
  3476. else
  3477. return (ticks_t)(c->timeout - t);
  3478. }
  3479. /* if time out due to write, add it to the blacklist */
  3480. if (tcp_async && _wbufq_non_empty(c) && TICKS_GE(t, c->wbuf_q.wr_timeout)){
  3481. if (unlikely(c->state==S_CONN_CONNECT)){
  3482. #ifdef USE_DST_BLACKLIST
  3483. if (cfg_get(core, core_cfg, use_dst_blacklist))
  3484. dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
  3485. &c->rcv.src_su, 0);
  3486. #endif /* USE_DST_BLACKLIST */
  3487. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c), TCP_PSU(c),
  3488. TCP_PROTO(c));
  3489. TCP_STATS_CONNECT_FAILED();
  3490. }else{
  3491. #ifdef USE_DST_BLACKLIST
  3492. if (cfg_get(core, core_cfg, use_dst_blacklist))
  3493. dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
  3494. &c->rcv.src_su, 0);
  3495. #endif /* USE_DST_BLACKLIST */
  3496. TCP_EV_SEND_TIMEOUT(0, &c->rcv);
  3497. TCP_STATS_SEND_TIMEOUT();
  3498. }
  3499. }else{
  3500. /* idle timeout */
  3501. TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
  3502. TCP_STATS_CON_TIMEOUT();
  3503. }
  3504. #else /* ! TCP_ASYNC */
  3505. if (TICKS_LT(t, c->timeout)){
  3506. /* timeout extended, exit */
  3507. return (ticks_t)(c->timeout - t);
  3508. }
  3509. /* idle timeout */
  3510. TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
  3511. TCP_STATS_CON_TIMEOUT();
  3512. #endif /* TCP_ASYNC */
  3513. DBG("tcp_main: timeout for %p\n", c);
  3514. if (likely(c->flags & F_CONN_HASHED)){
  3515. c->flags&=~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
  3516. c->state=S_CONN_BAD;
  3517. TCPCONN_LOCK;
  3518. _tcpconn_detach(c);
  3519. TCPCONN_UNLOCK;
  3520. }else{
  3521. c->flags&=~F_CONN_MAIN_TIMER;
  3522. LOG(L_CRIT, "BUG: tcp_main: timer: called with unhashed connection %p"
  3523. "\n", c);
  3524. tcpconn_ref(c); /* ugly hack to try to go on */
  3525. }
  3526. fd=c->s;
  3527. if (likely(fd>0)){
  3528. if (likely(c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
  3529. io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
  3530. c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
  3531. }
  3532. }
  3533. tcpconn_put_destroy(c);
  3534. return 0;
  3535. }
  3536. static inline void tcp_timer_run()
  3537. {
  3538. ticks_t ticks;
  3539. ticks=get_ticks_raw();
  3540. if (unlikely((ticks-tcp_main_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN)) return;
  3541. tcp_main_prev_ticks=ticks;
  3542. local_timer_run(&tcp_main_ltimer, ticks);
  3543. }
  3544. /* keep in sync with tcpconn_destroy, the "delete" part should be
  3545. * the same except for io_watch_del..
  3546. * Note: this function is called only on shutdown by the main ser process via
  3547. * cleanup(). However it's also safe to call it from the tcp_main process.
  3548. * => with the ser shutdown exception, it cannot execute in parallel
  3549. * with tcpconn_add() or tcpconn_destroy()*/
  3550. static inline void tcpconn_destroy_all()
  3551. {
  3552. struct tcp_connection *c, *next;
  3553. unsigned h;
  3554. int fd;
  3555. TCPCONN_LOCK;
  3556. for(h=0; h<TCP_ID_HASH_SIZE; h++){
  3557. c=tcpconn_id_hash[h];
  3558. while(c){
  3559. next=c->id_next;
  3560. if (is_tcp_main){
  3561. /* we cannot close or remove the fd if we are not in the
  3562. * tcp main proc.*/
  3563. if ((c->flags & F_CONN_MAIN_TIMER)){
  3564. local_timer_del(&tcp_main_ltimer, &c->timer);
  3565. c->flags&=~F_CONN_MAIN_TIMER;
  3566. } /* else still in some reader */
  3567. fd=c->s;
  3568. if (fd>0 && (c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
  3569. io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
  3570. c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
  3571. }
  3572. }else{
  3573. fd=-1;
  3574. }
  3575. #ifdef USE_TLS
  3576. if (fd>0 && c->type==PROTO_TLS)
  3577. tls_close(c, fd);
  3578. #endif
  3579. _tcpconn_rm(c);
  3580. if (fd>0) {
  3581. #ifdef TCP_FD_CACHE
  3582. if (likely(cfg_get(tcp, tcp_cfg, fd_cache)))
  3583. shutdown(fd, SHUT_RDWR);
  3584. #endif /* TCP_FD_CACHE */
  3585. close(fd);
  3586. }
  3587. (*tcp_connections_no)--;
  3588. c=next;
  3589. }
  3590. }
  3591. TCPCONN_UNLOCK;
  3592. }
  3593. /* tcp main loop */
  3594. void tcp_main_loop()
  3595. {
  3596. struct socket_info* si;
  3597. int r;
  3598. is_tcp_main=1; /* mark this process as tcp main */
  3599. tcp_main_max_fd_no=get_max_open_fds();
  3600. /* init send fd queues (here because we want mem. alloc only in the tcp
  3601. * process */
  3602. #ifdef SEND_FD_QUEUE
  3603. if (init_send_fd_queues()<0){
  3604. LOG(L_CRIT, "ERROR: init_tcp: could not init send fd queues\n");
  3605. goto error;
  3606. }
  3607. #endif
  3608. /* init io_wait (here because we want the memory allocated only in
  3609. * the tcp_main process) */
  3610. if (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
  3611. goto error;
  3612. /* init: start watching all the fds*/
  3613. /* init local timer */
  3614. tcp_main_prev_ticks=get_ticks_raw();
  3615. if (init_local_timer(&tcp_main_ltimer, get_ticks_raw())!=0){
  3616. LOG(L_ERR, "ERROR: init_tcp: failed to init local timer\n");
  3617. goto error;
  3618. }
  3619. #ifdef TCP_FD_CACHE
  3620. if (cfg_get(tcp, tcp_cfg, fd_cache)) tcp_fd_cache_init();
  3621. #endif /* TCP_FD_CACHE */
  3622. /* add all the sockets we listen on for connections */
  3623. for (si=tcp_listen; si; si=si->next){
  3624. if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
  3625. if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
  3626. LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
  3627. "listen socket to the fd list\n");
  3628. goto error;
  3629. }
  3630. }else{
  3631. LOG(L_CRIT, "BUG: tcp_main_loop: non tcp address in tcp_listen\n");
  3632. }
  3633. }
  3634. #ifdef USE_TLS
  3635. if (!tls_disable && tls_loaded()){
  3636. for (si=tls_listen; si; si=si->next){
  3637. if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
  3638. if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
  3639. LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
  3640. "tls listen socket to the fd list\n");
  3641. goto error;
  3642. }
  3643. }else{
  3644. LOG(L_CRIT, "BUG: tcp_main_loop: non tls address"
  3645. " in tls_listen\n");
  3646. }
  3647. }
  3648. }
  3649. #endif
  3650. /* add all the unix sockets used for communcation with other ser processes
  3651. * (get fd, new connection a.s.o) */
  3652. for (r=1; r<process_no; r++){
  3653. if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
  3654. if (io_watch_add(&io_h, pt[r].unix_sock, POLLIN,F_PROC, &pt[r])<0){
  3655. LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
  3656. "process %d unix socket to the fd list\n", r);
  3657. goto error;
  3658. }
  3659. }
  3660. /* add all the unix sokets used for communication with the tcp childs */
  3661. for (r=0; r<tcp_children_no; r++){
  3662. if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
  3663. if (io_watch_add(&io_h, tcp_children[r].unix_sock, POLLIN,
  3664. F_TCPCHILD, &tcp_children[r]) <0){
  3665. LOG(L_CRIT, "ERROR: tcp_main_loop: init: failed to add "
  3666. "tcp child %d unix socket to the fd list\n", r);
  3667. goto error;
  3668. }
  3669. }
  3670. /* initialize the cfg framework */
  3671. if (cfg_child_init()) goto error;
  3672. /* main loop */
  3673. switch(io_h.poll_method){
  3674. case POLL_POLL:
  3675. while(1){
  3676. /* wait and process IO */
  3677. io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  3678. send_fd_queue_run(&send2child_q); /* then new io */
  3679. /* remove old connections */
  3680. tcp_timer_run();
  3681. }
  3682. break;
  3683. #ifdef HAVE_SELECT
  3684. case POLL_SELECT:
  3685. while(1){
  3686. io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  3687. send_fd_queue_run(&send2child_q); /* then new io */
  3688. tcp_timer_run();
  3689. }
  3690. break;
  3691. #endif
  3692. #ifdef HAVE_SIGIO_RT
  3693. case POLL_SIGIO_RT:
  3694. while(1){
  3695. io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
  3696. send_fd_queue_run(&send2child_q); /* then new io */
  3697. tcp_timer_run();
  3698. }
  3699. break;
  3700. #endif
  3701. #ifdef HAVE_EPOLL
  3702. case POLL_EPOLL_LT:
  3703. while(1){
  3704. io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  3705. send_fd_queue_run(&send2child_q); /* then new io */
  3706. tcp_timer_run();
  3707. }
  3708. break;
  3709. case POLL_EPOLL_ET:
  3710. while(1){
  3711. io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
  3712. send_fd_queue_run(&send2child_q); /* then new io */
  3713. tcp_timer_run();
  3714. }
  3715. break;
  3716. #endif
  3717. #ifdef HAVE_KQUEUE
  3718. case POLL_KQUEUE:
  3719. while(1){
  3720. io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  3721. send_fd_queue_run(&send2child_q); /* then new io */
  3722. tcp_timer_run();
  3723. }
  3724. break;
  3725. #endif
  3726. #ifdef HAVE_DEVPOLL
  3727. case POLL_DEVPOLL:
  3728. while(1){
  3729. io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  3730. send_fd_queue_run(&send2child_q); /* then new io */
  3731. tcp_timer_run();
  3732. }
  3733. break;
  3734. #endif
  3735. default:
  3736. LOG(L_CRIT, "BUG: tcp_main_loop: no support for poll method "
  3737. " %s (%d)\n",
  3738. poll_method_name(io_h.poll_method), io_h.poll_method);
  3739. goto error;
  3740. }
  3741. error:
  3742. #ifdef SEND_FD_QUEUE
  3743. destroy_send_fd_queues();
  3744. #endif
  3745. destroy_io_wait(&io_h);
  3746. LOG(L_CRIT, "ERROR: tcp_main_loop: exiting...");
  3747. exit(-1);
  3748. }
  3749. /* cleanup before exit */
  3750. void destroy_tcp()
  3751. {
  3752. if (tcpconn_id_hash){
  3753. if (tcpconn_lock)
  3754. TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
  3755. some process was terminated while holding
  3756. it; this will allow an almost gracious
  3757. shutdown */
  3758. tcpconn_destroy_all();
  3759. shm_free(tcpconn_id_hash);
  3760. tcpconn_id_hash=0;
  3761. }
  3762. DESTROY_TCP_STATS();
  3763. if (tcp_connections_no){
  3764. shm_free(tcp_connections_no);
  3765. tcp_connections_no=0;
  3766. }
  3767. #ifdef TCP_ASYNC
  3768. if (tcp_total_wq){
  3769. shm_free(tcp_total_wq);
  3770. tcp_total_wq=0;
  3771. }
  3772. #endif /* TCP_ASYNC */
  3773. if (connection_id){
  3774. shm_free(connection_id);
  3775. connection_id=0;
  3776. }
  3777. if (tcpconn_aliases_hash){
  3778. shm_free(tcpconn_aliases_hash);
  3779. tcpconn_aliases_hash=0;
  3780. }
  3781. if (tcpconn_lock){
  3782. lock_destroy(tcpconn_lock);
  3783. lock_dealloc((void*)tcpconn_lock);
  3784. tcpconn_lock=0;
  3785. }
  3786. if (tcp_children){
  3787. pkg_free(tcp_children);
  3788. tcp_children=0;
  3789. }
  3790. destroy_local_timer(&tcp_main_ltimer);
  3791. }
  3792. int init_tcp()
  3793. {
  3794. char* poll_err;
  3795. tcp_options_check();
  3796. if (tcp_cfg==0){
  3797. BUG("tcp_cfg not initialized\n");
  3798. goto error;
  3799. }
  3800. /* init lock */
  3801. tcpconn_lock=lock_alloc();
  3802. if (tcpconn_lock==0){
  3803. LOG(L_CRIT, "ERROR: init_tcp: could not alloc lock\n");
  3804. goto error;
  3805. }
  3806. if (lock_init(tcpconn_lock)==0){
  3807. LOG(L_CRIT, "ERROR: init_tcp: could not init lock\n");
  3808. lock_dealloc((void*)tcpconn_lock);
  3809. tcpconn_lock=0;
  3810. goto error;
  3811. }
  3812. /* init globals */
  3813. tcp_connections_no=shm_malloc(sizeof(int));
  3814. if (tcp_connections_no==0){
  3815. LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
  3816. goto error;
  3817. }
  3818. *tcp_connections_no=0;
  3819. if (INIT_TCP_STATS()!=0) goto error;
  3820. connection_id=shm_malloc(sizeof(int));
  3821. if (connection_id==0){
  3822. LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
  3823. goto error;
  3824. }
  3825. *connection_id=1;
  3826. #ifdef TCP_ASYNC
  3827. tcp_total_wq=shm_malloc(sizeof(*tcp_total_wq));
  3828. if (tcp_total_wq==0){
  3829. LOG(L_CRIT, "ERROR: init_tcp: could not alloc globals\n");
  3830. goto error;
  3831. }
  3832. #endif /* TCP_ASYNC */
  3833. /* alloc hashtables*/
  3834. tcpconn_aliases_hash=(struct tcp_conn_alias**)
  3835. shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
  3836. if (tcpconn_aliases_hash==0){
  3837. LOG(L_CRIT, "ERROR: init_tcp: could not alloc address hashtable\n");
  3838. goto error;
  3839. }
  3840. tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
  3841. sizeof(struct tcp_connection*));
  3842. if (tcpconn_id_hash==0){
  3843. LOG(L_CRIT, "ERROR: init_tcp: could not alloc id hashtable\n");
  3844. goto error;
  3845. }
  3846. /* init hashtables*/
  3847. memset((void*)tcpconn_aliases_hash, 0,
  3848. TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
  3849. memset((void*)tcpconn_id_hash, 0,
  3850. TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
  3851. /* fix config variables */
  3852. poll_err=check_poll_method(tcp_poll_method);
  3853. /* set an appropriate poll method */
  3854. if (poll_err || (tcp_poll_method==0)){
  3855. tcp_poll_method=choose_poll_method();
  3856. if (poll_err){
  3857. LOG(L_ERR, "ERROR: init_tcp: %s, using %s instead\n",
  3858. poll_err, poll_method_name(tcp_poll_method));
  3859. }else{
  3860. LOG(L_INFO, "init_tcp: using %s as the io watch method"
  3861. " (auto detected)\n", poll_method_name(tcp_poll_method));
  3862. }
  3863. }else{
  3864. LOG(L_INFO, "init_tcp: using %s io watch method (config)\n",
  3865. poll_method_name(tcp_poll_method));
  3866. }
  3867. return 0;
  3868. error:
  3869. /* clean-up */
  3870. destroy_tcp();
  3871. return -1;
  3872. }
  3873. #ifdef TCP_CHILD_NON_BLOCKING
  3874. /* returns -1 on error */
  3875. static int set_non_blocking(int s)
  3876. {
  3877. int flags;
  3878. /* non-blocking */
  3879. flags=fcntl(s, F_GETFL);
  3880. if (flags==-1){
  3881. LOG(L_ERR, "ERROR: set_non_blocking: fnctl failed: (%d) %s\n",
  3882. errno, strerror(errno));
  3883. goto error;
  3884. }
  3885. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  3886. LOG(L_ERR, "ERROR: set_non_blocking: fcntl: set non-blocking failed:"
  3887. " (%d) %s\n", errno, strerror(errno));
  3888. goto error;
  3889. }
  3890. return 0;
  3891. error:
  3892. return -1;
  3893. }
  3894. #endif
  3895. /* returns -1 on error, 0 on success */
  3896. int tcp_fix_child_sockets(int* fd)
  3897. {
  3898. #ifdef TCP_CHILD_NON_BLOCKING
  3899. if ((set_non_blocking(fd[0])<0) ||
  3900. (set_non_blocking(fd[1])<0)){
  3901. return -1;
  3902. }
  3903. #endif
  3904. return 0;
  3905. }
  3906. /* starts the tcp processes */
  3907. int tcp_init_children()
  3908. {
  3909. int r;
  3910. int reader_fd_1; /* for comm. with the tcp children read */
  3911. pid_t pid;
  3912. struct socket_info *si;
  3913. /* estimate max fd. no:
  3914. * 1 tcp send unix socket/all_proc,
  3915. * + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
  3916. * + no_listen_tcp */
  3917. for(r=0, si=tcp_listen; si; si=si->next, r++);
  3918. #ifdef USE_TLS
  3919. if (! tls_disable)
  3920. for (si=tls_listen; si; si=si->next, r++);
  3921. #endif
  3922. register_fds(r+tcp_max_connections+get_max_procs()-1 /* tcp main */);
  3923. #if 0
  3924. tcp_max_fd_no=get_max_procs()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
  3925. /* max connections can be temporarily exceeded with estimated_process_count
  3926. * - tcp_main (tcpconn_connect called simultaneously in all all the
  3927. * processes) */
  3928. tcp_max_fd_no+=tcp_max_connections+get_max_procs()-1 /* tcp main */;
  3929. #endif
  3930. /* alloc the children array */
  3931. tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
  3932. if (tcp_children==0){
  3933. LOG(L_ERR, "ERROR: tcp_init_children: out of memory\n");
  3934. goto error;
  3935. }
  3936. /* create the tcp sock_info structures */
  3937. /* copy the sockets --moved to main_loop*/
  3938. /* fork children & create the socket pairs*/
  3939. for(r=0; r<tcp_children_no; r++){
  3940. child_rank++;
  3941. pid=fork_tcp_process(child_rank, "tcp receiver", r, &reader_fd_1);
  3942. if (pid<0){
  3943. LOG(L_ERR, "ERROR: tcp_main: fork failed: %s\n",
  3944. strerror(errno));
  3945. goto error;
  3946. }else if (pid>0){
  3947. /* parent */
  3948. }else{
  3949. /* child */
  3950. bind_address=0; /* force a SEGFAULT if someone uses a non-init.
  3951. bind address on tcp */
  3952. tcp_receive_loop(reader_fd_1);
  3953. }
  3954. }
  3955. return 0;
  3956. error:
  3957. return -1;
  3958. }
  3959. void tcp_get_info(struct tcp_gen_info *ti)
  3960. {
  3961. ti->tcp_readers=tcp_children_no;
  3962. ti->tcp_max_connections=tcp_max_connections;
  3963. ti->tcp_connections_no=*tcp_connections_no;
  3964. #ifdef TCP_ASYNC
  3965. ti->tcp_write_queued=*tcp_total_wq;
  3966. #else
  3967. ti->tcp_write_queued=0;
  3968. #endif /* TCP_ASYNC */
  3969. }
  3970. #endif