tcp_main.c 141 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811
  1. /*
  2. * Copyright (C) 2001-2003 FhG Fokus
  3. *
  4. * This file is part of Kamailio, a free SIP server.
  5. *
  6. * Kamailio is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version
  10. *
  11. * Kamailio is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /** Kamailio core: tcp main/dispatcher and tcp send functions.
  21. * @file tcp_main.c
  22. * @ingroup core
  23. * Module: @ref core
  24. */
  25. #ifdef USE_TCP
  26. #ifndef SHM_MEM
  27. #error "shared memory support needed (add -DSHM_MEM to Makefile.defs)"
  28. #endif
  29. #define HANDLE_IO_INLINE
  30. #include "io_wait.h" /* include first to make sure the needed features are
  31. turned on (e.g. _GNU_SOURCE for POLLRDHUP) */
  32. #include <sys/time.h>
  33. #include <sys/types.h>
  34. #include <sys/select.h>
  35. #include <sys/socket.h>
  36. #ifdef HAVE_FILIO_H
  37. #include <sys/filio.h> /* needed on solaris 2.x for FIONREAD */
  38. #elif defined __OS_solaris
  39. #define BSD_COMP /* needed on older solaris for FIONREAD */
  40. #endif /* HAVE_FILIO_H / __OS_solaris */
  41. #include <sys/ioctl.h> /* ioctl() used on write error */
  42. #include <netinet/in.h>
  43. #include <netinet/in_systm.h>
  44. #include <netinet/ip.h>
  45. #include <netinet/tcp.h>
  46. #include <sys/uio.h> /* writev*/
  47. #include <netdb.h>
  48. #include <stdlib.h> /*exit() */
  49. #include <unistd.h>
  50. #include <errno.h>
  51. #include <string.h>
  52. #ifdef HAVE_SELECT
  53. #include <sys/select.h>
  54. #endif
  55. #include <poll.h>
  56. #include "ip_addr.h"
  57. #include "pass_fd.h"
  58. #include "tcp_conn.h"
  59. #include "globals.h"
  60. #include "pt.h"
  61. #include "locking.h"
  62. #include "mem/mem.h"
  63. #include "mem/shm_mem.h"
  64. #include "timer.h"
  65. #include "sr_module.h"
  66. #include "tcp_server.h"
  67. #include "tcp_init.h"
  68. #include "tcp_int_send.h"
  69. #include "tcp_stats.h"
  70. #include "tcp_ev.h"
  71. #include "tsend.h"
  72. #include "timer_ticks.h"
  73. #include "local_timer.h"
  74. #ifdef CORE_TLS
  75. #include "tls/tls_server.h"
  76. #define tls_loaded() 1
  77. #else
  78. #include "tls_hooks_init.h"
  79. #include "tls_hooks.h"
  80. #endif /* CORE_TLS*/
  81. #ifdef USE_DST_BLACKLIST
  82. #include "dst_blacklist.h"
  83. #endif /* USE_DST_BLACKLIST */
  84. #include "tcp_info.h"
  85. #include "tcp_options.h"
  86. #include "ut.h"
  87. #include "cfg/cfg_struct.h"
  88. #define local_malloc pkg_malloc
  89. #define local_free pkg_free
  90. #include <fcntl.h> /* must be included after io_wait.h if SIGIO_RT is used */
  91. #ifdef NO_MSG_DONTWAIT
  92. #ifndef MSG_DONTWAIT
  93. /* should work inside tcp_main */
  94. #define MSG_DONTWAIT 0
  95. #endif
  96. #endif /*NO_MSG_DONTWAIT */
  97. #define TCP_PASS_NEW_CONNECTION_ON_DATA /* don't pass a new connection
  98. immediately to a child, wait for
  99. some data on it first */
  100. #define TCP_LISTEN_BACKLOG 1024
  101. #define SEND_FD_QUEUE /* queue send fd requests on EAGAIN, instead of sending
  102. them immediately */
  103. #define TCP_CHILD_NON_BLOCKING
  104. #ifdef SEND_FD_QUEUE
  105. #ifndef TCP_CHILD_NON_BLOCKING
  106. #define TCP_CHILD_NON_BLOCKING
  107. #endif
  108. #define MAX_SEND_FD_QUEUE_SIZE tcp_main_max_fd_no
  109. #define SEND_FD_QUEUE_SIZE 128 /* initial size */
  110. #define SEND_FD_QUEUE_TIMEOUT MS_TO_TICKS(2000) /* 2 s */
  111. #endif
  112. /* minimum interval local_timer_run() is allowed to run, in ticks */
  113. #define TCPCONN_TIMEOUT_MIN_RUN 1 /* once per tick */
  114. #define TCPCONN_WAIT_TIMEOUT 1 /* 1 tick */
  115. #ifdef TCP_ASYNC
  116. static unsigned int* tcp_total_wq=0;
  117. #endif
  118. enum fd_types { F_NONE, F_SOCKINFO /* a tcp_listen fd */,
  119. F_TCPCONN, F_TCPCHILD, F_PROC };
  120. #ifdef TCP_FD_CACHE
  121. #define TCP_FD_CACHE_SIZE 8
  122. struct fd_cache_entry{
  123. struct tcp_connection* con;
  124. int id;
  125. int fd;
  126. };
  127. static struct fd_cache_entry fd_cache[TCP_FD_CACHE_SIZE];
  128. #endif /* TCP_FD_CACHE */
  129. static int is_tcp_main=0;
  130. enum poll_types tcp_poll_method=0; /* by default choose the best method */
  131. int tcp_main_max_fd_no=0;
  132. int tcp_max_connections=DEFAULT_TCP_MAX_CONNECTIONS;
  133. int tls_max_connections=DEFAULT_TLS_MAX_CONNECTIONS;
  134. static union sockaddr_union tcp_source_ipv4_addr; /* saved bind/srv v4 addr. */
  135. static union sockaddr_union* tcp_source_ipv4=0;
  136. static union sockaddr_union tcp_source_ipv6_addr; /* saved bind/src v6 addr. */
  137. static union sockaddr_union* tcp_source_ipv6=0;
  138. static int* tcp_connections_no=0; /* current tcp (+tls) open connections */
  139. static int* tls_connections_no=0; /* current tls open connections */
  140. /* connection hash table (after ip&port) , includes also aliases */
  141. struct tcp_conn_alias** tcpconn_aliases_hash=0;
  142. /* connection hash table (after connection id) */
  143. struct tcp_connection** tcpconn_id_hash=0;
  144. gen_lock_t* tcpconn_lock=0;
  145. struct tcp_child* tcp_children=0;
  146. static int* connection_id=0; /* unique for each connection, used for
  147. quickly finding the corresponding connection
  148. for a reply */
  149. int unix_tcp_sock;
  150. static int tcp_proto_no=-1; /* tcp protocol number as returned by
  151. getprotobyname */
  152. static io_wait_h io_h;
  153. static struct local_timer tcp_main_ltimer;
  154. static ticks_t tcp_main_prev_ticks;
  155. /* tell if there are tcp workers that should handle only specific socket
  156. * - used to optimize the search of least loaded worker for a tcp socket
  157. * - 0 - no workers per tcp sockets have been set
  158. * - 1 + generic_workers - when there are workers per tcp sockets
  159. */
  160. static int tcp_sockets_gworkers = 0;
  161. static ticks_t tcpconn_main_timeout(ticks_t , struct timer_ln* , void* );
  162. inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
  163. struct ip_addr* l_ip, int l_port,
  164. int flags);
  165. /* sets source address used when opening new sockets and no source is specified
  166. * (by default the address is choosen by the kernel)
  167. * Should be used only on init.
  168. * returns -1 on error */
  169. int tcp_set_src_addr(struct ip_addr* ip)
  170. {
  171. switch (ip->af){
  172. case AF_INET:
  173. ip_addr2su(&tcp_source_ipv4_addr, ip, 0);
  174. tcp_source_ipv4=&tcp_source_ipv4_addr;
  175. break;
  176. case AF_INET6:
  177. ip_addr2su(&tcp_source_ipv6_addr, ip, 0);
  178. tcp_source_ipv6=&tcp_source_ipv6_addr;
  179. break;
  180. default:
  181. return -1;
  182. }
  183. return 0;
  184. }
  185. static inline int init_sock_keepalive(int s)
  186. {
  187. int optval;
  188. #ifdef HAVE_SO_KEEPALIVE
  189. if (cfg_get(tcp, tcp_cfg, keepalive)){
  190. optval=1;
  191. if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &optval,
  192. sizeof(optval))<0){
  193. LM_WARN("failed to enable SO_KEEPALIVE: %s\n", strerror(errno));
  194. return -1;
  195. }
  196. }
  197. #endif
  198. #ifdef HAVE_TCP_KEEPINTVL
  199. if ((optval=cfg_get(tcp, tcp_cfg, keepintvl))){
  200. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &optval,
  201. sizeof(optval))<0){
  202. LM_WARN("failed to set keepalive probes interval: %s\n", strerror(errno));
  203. }
  204. }
  205. #endif
  206. #ifdef HAVE_TCP_KEEPIDLE
  207. if ((optval=cfg_get(tcp, tcp_cfg, keepidle))){
  208. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &optval,
  209. sizeof(optval))<0){
  210. LM_WARN("failed to set keepalive idle interval: %s\n", strerror(errno));
  211. }
  212. }
  213. #endif
  214. #ifdef HAVE_TCP_KEEPCNT
  215. if ((optval=cfg_get(tcp, tcp_cfg, keepcnt))){
  216. if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &optval,
  217. sizeof(optval))<0){
  218. LM_WARN("failed to set maximum keepalive count: %s\n", strerror(errno));
  219. }
  220. }
  221. #endif
  222. return 0;
  223. }
  224. /* set all socket/fd options for new sockets (e.g. before connect):
  225. * disable nagle, tos lowdelay, reuseaddr, non-blocking
  226. *
  227. * return -1 on error */
  228. static int init_sock_opt(int s, int af)
  229. {
  230. int flags;
  231. int optval;
  232. #ifdef DISABLE_NAGLE
  233. flags=1;
  234. if ( (tcp_proto_no!=-1) && (setsockopt(s, tcp_proto_no , TCP_NODELAY,
  235. &flags, sizeof(flags))<0) ){
  236. LM_WARN("could not disable Nagle: %s\n", strerror(errno));
  237. }
  238. #endif
  239. /* tos*/
  240. optval = tos;
  241. if(af==AF_INET){
  242. if (setsockopt(s, IPPROTO_IP, IP_TOS, (void*)&optval,
  243. sizeof(optval)) ==-1){
  244. LM_WARN("setsockopt tos: %s\n", strerror(errno));
  245. /* continue since this is not critical */
  246. }
  247. } else if(af==AF_INET6){
  248. if (setsockopt(s, IPPROTO_IPV6, IPV6_TCLASS,
  249. (void*)&optval, sizeof(optval)) ==-1) {
  250. LM_WARN("setsockopt v6 tos: %s\n", strerror(errno));
  251. /* continue since this is not critical */
  252. }
  253. }
  254. #if !defined(TCP_DONT_REUSEADDR)
  255. optval=1;
  256. if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
  257. (void*)&optval, sizeof(optval))==-1){
  258. LM_ERR("setsockopt SO_REUSEADDR %s\n", strerror(errno));
  259. /* continue, not critical */
  260. }
  261. #endif /* !TCP_DONT_REUSEADDR */
  262. #ifdef HAVE_TCP_SYNCNT
  263. if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
  264. if (setsockopt(s, IPPROTO_TCP, TCP_SYNCNT, &optval,
  265. sizeof(optval))<0){
  266. LM_WARN("failed to set maximum SYN retr. count: %s\n", strerror(errno));
  267. }
  268. }
  269. #endif
  270. #ifdef HAVE_TCP_LINGER2
  271. if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
  272. if (setsockopt(s, IPPROTO_TCP, TCP_LINGER2, &optval,
  273. sizeof(optval))<0){
  274. LM_WARN("failed to set maximum LINGER2 timeout: %s\n", strerror(errno));
  275. }
  276. }
  277. #endif
  278. #ifdef HAVE_TCP_QUICKACK
  279. if (cfg_get(tcp, tcp_cfg, delayed_ack)){
  280. optval=0; /* reset quick ack => delayed ack */
  281. if (setsockopt(s, IPPROTO_TCP, TCP_QUICKACK, &optval,
  282. sizeof(optval))<0){
  283. LM_WARN("failed to reset TCP_QUICKACK: %s\n", strerror(errno));
  284. }
  285. }
  286. #endif /* HAVE_TCP_QUICKACK */
  287. init_sock_keepalive(s);
  288. /* non-blocking */
  289. flags=fcntl(s, F_GETFL);
  290. if (flags==-1){
  291. LM_ERR("fnctl failed: (%d) %s\n", errno, strerror(errno));
  292. goto error;
  293. }
  294. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  295. LM_ERR("fcntl: set non-blocking failed: (%d) %s\n", errno, strerror(errno));
  296. goto error;
  297. }
  298. return 0;
  299. error:
  300. return -1;
  301. }
  302. /* set all socket/fd options for "accepted" sockets
  303. * only nonblocking is set since the rest is inherited from the
  304. * "parent" (listening) socket
  305. * Note: setting O_NONBLOCK is required on linux but it's not needed on
  306. * BSD and possibly solaris (where the flag is inherited from the
  307. * parent socket). However since there is no standard document
  308. * requiring a specific behaviour in this case it's safer to always set
  309. * it (at least for now) --andrei
  310. * TODO: check on which OSes O_NONBLOCK is inherited and make this
  311. * function a nop.
  312. *
  313. * return -1 on error */
  314. static int init_sock_opt_accept(int s)
  315. {
  316. int flags;
  317. /* non-blocking */
  318. flags=fcntl(s, F_GETFL);
  319. if (flags==-1){
  320. LM_ERR("fnctl failed: (%d) %s\n", errno, strerror(errno));
  321. goto error;
  322. }
  323. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  324. LM_ERR("fcntl: set non-blocking failed: (%d) %s\n", errno, strerror(errno));
  325. goto error;
  326. }
  327. return 0;
  328. error:
  329. return -1;
  330. }
  331. /** close a socket, handling errno.
  332. * On EINTR, repeat the close().
  333. * Filter expected errors (return success if close() failed because
  334. * EPIPE, ECONNRST a.s.o). Note that this happens on *BSDs (on linux close()
  335. * does not fail for socket level errors).
  336. * @param s - open valid socket.
  337. * @return - 0 on success, < 0 on error (whatever close() returns). On error
  338. * errno is set.
  339. */
  340. static int tcp_safe_close(int s)
  341. {
  342. int ret;
  343. retry:
  344. if (unlikely((ret = close(s)) < 0 )) {
  345. switch(errno) {
  346. case EINTR:
  347. goto retry;
  348. case EPIPE:
  349. case ENOTCONN:
  350. case ECONNRESET:
  351. case ECONNREFUSED:
  352. case ENETUNREACH:
  353. case EHOSTUNREACH:
  354. /* on *BSD we really get these errors at close() time
  355. => ignore them */
  356. ret = 0;
  357. break;
  358. default:
  359. break;
  360. }
  361. }
  362. return ret;
  363. }
  364. /* blocking connect on a non-blocking fd; it will timeout after
  365. * tcp_connect_timeout
  366. * if BLOCKING_USE_SELECT and HAVE_SELECT are defined it will internally
  367. * use select() instead of poll (bad if fd > FD_SET_SIZE, poll is preferred)
  368. */
  369. static int tcp_blocking_connect(int fd, int type, snd_flags_t* send_flags,
  370. const struct sockaddr *servaddr,
  371. socklen_t addrlen)
  372. {
  373. int n;
  374. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  375. fd_set sel_set;
  376. fd_set orig_set;
  377. struct timeval timeout;
  378. #else
  379. struct pollfd pf;
  380. #endif
  381. int elapsed;
  382. int to;
  383. int ticks;
  384. int err;
  385. unsigned int err_len;
  386. int poll_err;
  387. poll_err=0;
  388. to=cfg_get(tcp, tcp_cfg, connect_timeout_s);
  389. ticks=get_ticks();
  390. again:
  391. n=connect(fd, servaddr, addrlen);
  392. if (n==-1){
  393. if (errno==EINTR){
  394. elapsed=(get_ticks()-ticks)*TIMER_TICK;
  395. if (elapsed<to) goto again;
  396. else goto error_timeout;
  397. }
  398. if (errno!=EINPROGRESS && errno!=EALREADY){
  399. goto error_errno;
  400. }
  401. }else goto end;
  402. /* poll/select loop */
  403. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  404. FD_ZERO(&orig_set);
  405. FD_SET(fd, &orig_set);
  406. #else
  407. pf.fd=fd;
  408. pf.events=POLLOUT;
  409. #endif
  410. while(1){
  411. elapsed=(get_ticks()-ticks)*TIMER_TICK;
  412. if (elapsed>=to)
  413. goto error_timeout;
  414. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  415. sel_set=orig_set;
  416. timeout.tv_sec=to-elapsed;
  417. timeout.tv_usec=0;
  418. n=select(fd+1, 0, &sel_set, 0, &timeout);
  419. #else
  420. n=poll(&pf, 1, (to-elapsed)*1000);
  421. #endif
  422. if (n<0){
  423. if (errno==EINTR) continue;
  424. LM_ERR("%s: poll/select failed: (%d) %s\n",
  425. su2a((union sockaddr_union*)servaddr, addrlen),
  426. errno, strerror(errno));
  427. goto error;
  428. }else if (n==0) /* timeout */ continue;
  429. #if defined(HAVE_SELECT) && defined(BLOCKING_USE_SELECT)
  430. if (FD_ISSET(fd, &sel_set))
  431. #else
  432. if (pf.revents&(POLLERR|POLLHUP|POLLNVAL)){
  433. LM_ERR("%s: poll error: flags %x\n",
  434. su2a((union sockaddr_union*)servaddr, addrlen),
  435. pf.revents);
  436. poll_err=1;
  437. }
  438. #endif
  439. {
  440. err_len=sizeof(err);
  441. getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
  442. if ((err==0) && (poll_err==0)) goto end;
  443. if (err!=EINPROGRESS && err!=EALREADY){
  444. LM_ERR("%s: SO_ERROR (%d) %s\n",
  445. su2a((union sockaddr_union*)servaddr, addrlen),
  446. err, strerror(err));
  447. errno=err;
  448. goto error_errno;
  449. }
  450. }
  451. }
  452. error_errno:
  453. switch(errno){
  454. case ENETUNREACH:
  455. case EHOSTUNREACH:
  456. #ifdef USE_DST_BLACKLIST
  457. dst_blacklist_su(BLST_ERR_CONNECT, type,
  458. (union sockaddr_union*)servaddr, send_flags, 0);
  459. #endif /* USE_DST_BLACKLIST */
  460. TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0,
  461. (union sockaddr_union*)servaddr, type);
  462. break;
  463. case ETIMEDOUT:
  464. #ifdef USE_DST_BLACKLIST
  465. dst_blacklist_su(BLST_ERR_CONNECT, type,
  466. (union sockaddr_union*)servaddr, send_flags, 0);
  467. #endif /* USE_DST_BLACKLIST */
  468. TCP_EV_CONNECT_TIMEOUT(errno, 0, 0,
  469. (union sockaddr_union*)servaddr, type);
  470. break;
  471. case ECONNREFUSED:
  472. case ECONNRESET:
  473. #ifdef USE_DST_BLACKLIST
  474. dst_blacklist_su(BLST_ERR_CONNECT, type,
  475. (union sockaddr_union*)servaddr, send_flags, 0);
  476. #endif /* USE_DST_BLACKLIST */
  477. TCP_EV_CONNECT_RST(errno, 0, 0,
  478. (union sockaddr_union*)servaddr, type);
  479. break;
  480. case EAGAIN: /* not posix, but supported on linux and bsd */
  481. TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0,
  482. (union sockaddr_union*)servaddr, type);
  483. break;
  484. default:
  485. TCP_EV_CONNECT_ERR(errno, 0, 0,
  486. (union sockaddr_union*)servaddr, type);
  487. }
  488. LM_ERR("%s: (%d) %s\n",
  489. su2a((union sockaddr_union*)servaddr, addrlen),
  490. errno, strerror(errno));
  491. goto error;
  492. error_timeout:
  493. /* timeout */
  494. #ifdef USE_DST_BLACKLIST
  495. dst_blacklist_su(BLST_ERR_CONNECT, type,
  496. (union sockaddr_union*)servaddr, send_flags, 0);
  497. #endif /* USE_DST_BLACKLIST */
  498. TCP_EV_CONNECT_TIMEOUT(0, 0, 0, (union sockaddr_union*)servaddr, type);
  499. LM_ERR("%s: timeout %d s elapsed from %d s\n",
  500. su2a((union sockaddr_union*)servaddr, addrlen),
  501. elapsed, cfg_get(tcp, tcp_cfg, connect_timeout_s));
  502. error:
  503. TCP_STATS_CONNECT_FAILED();
  504. return -1;
  505. end:
  506. return 0;
  507. }
  508. #ifdef TCP_ASYNC
  509. /* unsafe version */
  510. #define _wbufq_empty(con) ((con)->wbuf_q.first==0)
  511. /* unsafe version */
  512. #define _wbufq_non_empty(con) ((con)->wbuf_q.first!=0)
  513. /* unsafe version, call while holding the connection write lock */
  514. inline static int _wbufq_add(struct tcp_connection* c, const char* data,
  515. unsigned int size)
  516. {
  517. struct tcp_wbuffer_queue* q;
  518. struct tcp_wbuffer* wb;
  519. unsigned int last_free;
  520. unsigned int wb_size;
  521. unsigned int crt_size;
  522. ticks_t t;
  523. q=&c->wbuf_q;
  524. t=get_ticks_raw();
  525. if (unlikely( ((q->queued+size)>cfg_get(tcp, tcp_cfg, tcpconn_wq_max)) ||
  526. ((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max)) ||
  527. (q->first &&
  528. TICKS_LT(q->wr_timeout, t)) )){
  529. LM_ERR("(%d bytes): write queue full or timeout "
  530. " (%d, total %d, last write %d s ago)\n",
  531. size, q->queued, *tcp_total_wq,
  532. TICKS_TO_S(t-(q->wr_timeout-
  533. cfg_get(tcp, tcp_cfg, send_timeout))));
  534. if (q->first && TICKS_LT(q->wr_timeout, t)){
  535. if (unlikely(c->state==S_CONN_CONNECT)){
  536. #ifdef USE_DST_BLACKLIST
  537. (void)dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
  538. &c->rcv.src_su, &c->send_flags, 0);
  539. #endif /* USE_DST_BLACKLIST */
  540. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c),
  541. TCP_PSU(c), TCP_PROTO(c));
  542. TCP_STATS_CONNECT_FAILED();
  543. }else{
  544. #ifdef USE_DST_BLACKLIST
  545. (void)dst_blacklist_su( BLST_ERR_SEND, c->rcv.proto,
  546. &c->rcv.src_su, &c->send_flags, 0);
  547. #endif /* USE_DST_BLACKLIST */
  548. TCP_EV_SEND_TIMEOUT(0, &c->rcv);
  549. TCP_STATS_SEND_TIMEOUT();
  550. }
  551. }else{
  552. /* if it's not a timeout => queue full */
  553. TCP_EV_SENDQ_FULL(0, &c->rcv);
  554. TCP_STATS_SENDQ_FULL();
  555. }
  556. goto error;
  557. }
  558. if (unlikely(q->last==0)){
  559. wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
  560. wb=shm_malloc(sizeof(*wb)+wb_size-1);
  561. if (unlikely(wb==0))
  562. goto error;
  563. wb->b_size=wb_size;
  564. wb->next=0;
  565. q->last=wb;
  566. q->first=wb;
  567. q->last_used=0;
  568. q->offset=0;
  569. q->wr_timeout=get_ticks_raw()+
  570. ((c->state==S_CONN_CONNECT)?
  571. S_TO_TICKS(cfg_get(tcp, tcp_cfg, connect_timeout_s)):
  572. cfg_get(tcp, tcp_cfg, send_timeout));
  573. }else{
  574. wb=q->last;
  575. }
  576. while(size){
  577. last_free=wb->b_size-q->last_used;
  578. if (last_free==0){
  579. wb_size=MAX_unsigned(cfg_get(tcp, tcp_cfg, wq_blk_size), size);
  580. wb=shm_malloc(sizeof(*wb)+wb_size-1);
  581. if (unlikely(wb==0))
  582. goto error;
  583. wb->b_size=wb_size;
  584. wb->next=0;
  585. q->last->next=wb;
  586. q->last=wb;
  587. q->last_used=0;
  588. last_free=wb->b_size;
  589. }
  590. crt_size=MIN_unsigned(last_free, size);
  591. memcpy(wb->buf+q->last_used, data, crt_size);
  592. q->last_used+=crt_size;
  593. size-=crt_size;
  594. data+=crt_size;
  595. q->queued+=crt_size;
  596. atomic_add_int((int*)tcp_total_wq, crt_size);
  597. }
  598. return 0;
  599. error:
  600. return -1;
  601. }
  602. /* unsafe version, call while holding the connection write lock
  603. * inserts data at the beginning, it ignores the max queue size checks and
  604. * the timeout (use sparingly)
  605. * Note: it should never be called on a write buffer after wbufq_run() */
  606. inline static int _wbufq_insert(struct tcp_connection* c, const char* data,
  607. unsigned int size)
  608. {
  609. struct tcp_wbuffer_queue* q;
  610. struct tcp_wbuffer* wb;
  611. q=&c->wbuf_q;
  612. if (likely(q->first==0)) /* if empty, use wbufq_add */
  613. return _wbufq_add(c, data, size);
  614. if (unlikely((*tcp_total_wq+size)>cfg_get(tcp, tcp_cfg, tcp_wq_max))){
  615. LM_ERR("(%d bytes): write queue full"
  616. " (%d, total %d, last write %d s ago)\n",
  617. size, q->queued, *tcp_total_wq,
  618. TICKS_TO_S(get_ticks_raw()-q->wr_timeout-
  619. cfg_get(tcp, tcp_cfg, send_timeout)));
  620. goto error;
  621. }
  622. if (unlikely(q->offset)){
  623. LM_CRIT("non-null offset %d (bad call, should"
  624. "never be called after the wbufq_run())\n", q->offset);
  625. goto error;
  626. }
  627. if ((q->first==q->last) && ((q->last->b_size-q->last_used)>=size)){
  628. /* one block with enough space in it for size bytes */
  629. memmove(q->first->buf+size, q->first->buf, q->last_used);
  630. memcpy(q->first->buf, data, size);
  631. q->last_used+=size;
  632. }else{
  633. /* create a size bytes block directly */
  634. wb=shm_malloc(sizeof(*wb)+size-1);
  635. if (unlikely(wb==0))
  636. goto error;
  637. wb->b_size=size;
  638. /* insert it */
  639. wb->next=q->first;
  640. q->first=wb;
  641. memcpy(wb->buf, data, size);
  642. }
  643. q->queued+=size;
  644. atomic_add_int((int*)tcp_total_wq, size);
  645. return 0;
  646. error:
  647. return -1;
  648. }
  649. /* unsafe version, call while holding the connection write lock */
  650. inline static void _wbufq_destroy( struct tcp_wbuffer_queue* q)
  651. {
  652. struct tcp_wbuffer* wb;
  653. struct tcp_wbuffer* next_wb;
  654. int unqueued;
  655. unqueued=0;
  656. if (likely(q->first)){
  657. wb=q->first;
  658. do{
  659. next_wb=wb->next;
  660. unqueued+=(wb==q->last)?q->last_used:wb->b_size;
  661. if (wb==q->first)
  662. unqueued-=q->offset;
  663. shm_free(wb);
  664. wb=next_wb;
  665. }while(wb);
  666. }
  667. memset(q, 0, sizeof(*q));
  668. atomic_add_int((int*)tcp_total_wq, -unqueued);
  669. }
  670. /* tries to empty the queue (safe version, c->write_lock must not be hold)
  671. * returns -1 on error, bytes written on success (>=0)
  672. * if the whole queue is emptied => sets *empty*/
  673. inline static int wbufq_run(int fd, struct tcp_connection* c, int* empty)
  674. {
  675. struct tcp_wbuffer_queue* q;
  676. struct tcp_wbuffer* wb;
  677. int n;
  678. int ret;
  679. int block_size;
  680. char* buf;
  681. *empty=0;
  682. ret=0;
  683. lock_get(&c->write_lock);
  684. q=&c->wbuf_q;
  685. while(q->first){
  686. block_size=((q->first==q->last)?q->last_used:q->first->b_size)-
  687. q->offset;
  688. buf=q->first->buf+q->offset;
  689. n=_tcpconn_write_nb(fd, c, buf, block_size);
  690. if (likely(n>0)){
  691. ret+=n;
  692. if (likely(n==block_size)){
  693. wb=q->first;
  694. q->first=q->first->next;
  695. shm_free(wb);
  696. q->offset=0;
  697. q->queued-=block_size;
  698. atomic_add_int((int*)tcp_total_wq, -block_size);
  699. }else{
  700. q->offset+=n;
  701. q->queued-=n;
  702. atomic_add_int((int*)tcp_total_wq, -n);
  703. break;
  704. }
  705. }else{
  706. if (n<0){
  707. /* EINTR is handled inside _tcpconn_write_nb */
  708. if (!(errno==EAGAIN || errno==EWOULDBLOCK)){
  709. if (unlikely(c->state==S_CONN_CONNECT)){
  710. switch(errno){
  711. case ENETUNREACH:
  712. case EHOSTUNREACH: /* not posix for send() */
  713. #ifdef USE_DST_BLACKLIST
  714. dst_blacklist_su(BLST_ERR_CONNECT,
  715. c->rcv.proto,
  716. &c->rcv.src_su,
  717. &c->send_flags, 0);
  718. #endif /* USE_DST_BLACKLIST */
  719. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  720. TCP_LPORT(c), TCP_PSU(c),
  721. TCP_PROTO(c));
  722. break;
  723. case ECONNREFUSED:
  724. case ECONNRESET:
  725. #ifdef USE_DST_BLACKLIST
  726. dst_blacklist_su(BLST_ERR_CONNECT,
  727. c->rcv.proto,
  728. &c->rcv.src_su,
  729. &c->send_flags, 0);
  730. #endif /* USE_DST_BLACKLIST */
  731. TCP_EV_CONNECT_RST(0, TCP_LADDR(c),
  732. TCP_LPORT(c), TCP_PSU(c),
  733. TCP_PROTO(c));
  734. break;
  735. default:
  736. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
  737. TCP_LPORT(c), TCP_PSU(c),
  738. TCP_PROTO(c));
  739. }
  740. TCP_STATS_CONNECT_FAILED();
  741. }else{
  742. switch(errno){
  743. case ECONNREFUSED:
  744. case ECONNRESET:
  745. TCP_STATS_CON_RESET();
  746. /* no break */
  747. case ENETUNREACH:
  748. case EHOSTUNREACH: /* not posix for send() */
  749. #ifdef USE_DST_BLACKLIST
  750. dst_blacklist_su(BLST_ERR_SEND,
  751. c->rcv.proto,
  752. &c->rcv.src_su,
  753. &c->send_flags, 0);
  754. #endif /* USE_DST_BLACKLIST */
  755. break;
  756. }
  757. }
  758. ret=-1;
  759. LM_ERR("%s [%d]\n", strerror(errno), errno);
  760. }
  761. }
  762. break;
  763. }
  764. }
  765. if (likely(q->first==0)){
  766. q->last=0;
  767. q->last_used=0;
  768. q->offset=0;
  769. *empty=1;
  770. }
  771. lock_release(&c->write_lock);
  772. if (likely(ret>0)){
  773. q->wr_timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, send_timeout);
  774. if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
  775. TCP_STATS_ESTABLISHED(c->state);
  776. c->state=S_CONN_OK;
  777. }
  778. }
  779. return ret;
  780. }
  781. #endif /* TCP_ASYNC */
  782. #if 0
  783. /* blocking write even on non-blocking sockets
  784. * if TCP_TIMEOUT will return with error */
  785. static int tcp_blocking_write(struct tcp_connection* c, int fd, char* buf,
  786. unsigned int len)
  787. {
  788. int n;
  789. fd_set sel_set;
  790. struct timeval timeout;
  791. int ticks;
  792. int initial_len;
  793. initial_len=len;
  794. again:
  795. n=send(fd, buf, len,
  796. #ifdef HAVE_MSG_NOSIGNAL
  797. MSG_NOSIGNAL
  798. #else
  799. 0
  800. #endif
  801. );
  802. if (n<0){
  803. if (errno==EINTR) goto again;
  804. else if (errno!=EAGAIN && errno!=EWOULDBLOCK){
  805. LM_ERR("failed to send: (%d) %s\n", errno, strerror(errno));
  806. TCP_EV_SEND_TIMEOUT(errno, &c->rcv);
  807. TCP_STATS_SEND_TIMEOUT();
  808. goto error;
  809. }
  810. }else if (n<len){
  811. /* partial write */
  812. buf+=n;
  813. len-=n;
  814. }else{
  815. /* success: full write */
  816. goto end;
  817. }
  818. while(1){
  819. FD_ZERO(&sel_set);
  820. FD_SET(fd, &sel_set);
  821. timeout.tv_sec=tcp_send_timeout;
  822. timeout.tv_usec=0;
  823. ticks=get_ticks();
  824. n=select(fd+1, 0, &sel_set, 0, &timeout);
  825. if (n<0){
  826. if (errno==EINTR) continue; /* signal, ignore */
  827. LM_ERR("select failed: (%d) %s\n", errno, strerror(errno));
  828. goto error;
  829. }else if (n==0){
  830. /* timeout */
  831. if (get_ticks()-ticks>=tcp_send_timeout){
  832. LM_ERR("send timeout (%d)\n", tcp_send_timeout);
  833. goto error;
  834. }
  835. continue;
  836. }
  837. if (FD_ISSET(fd, &sel_set)){
  838. /* we can write again */
  839. goto again;
  840. }
  841. }
  842. error:
  843. return -1;
  844. end:
  845. return initial_len;
  846. }
  847. #endif
  848. struct tcp_connection* tcpconn_new(int sock, union sockaddr_union* su,
  849. union sockaddr_union* local_addr,
  850. struct socket_info* ba, int type,
  851. int state)
  852. {
  853. struct tcp_connection *c;
  854. int rd_b_size;
  855. rd_b_size=cfg_get(tcp, tcp_cfg, rd_buf_size);
  856. c=shm_malloc(sizeof(struct tcp_connection) + rd_b_size);
  857. if (c==0){
  858. LM_ERR("mem. allocation failure\n");
  859. goto error;
  860. }
  861. memset(c, 0, sizeof(struct tcp_connection)); /* zero init (skip rd buf)*/
  862. c->s=sock;
  863. c->fd=-1; /* not initialized */
  864. if (lock_init(&c->write_lock)==0){
  865. LM_ERR("init lock failed\n");
  866. goto error;
  867. }
  868. c->rcv.src_su=*su;
  869. atomic_set(&c->refcnt, 0);
  870. local_timer_init(&c->timer, tcpconn_main_timeout, c, 0);
  871. su2ip_addr(&c->rcv.src_ip, su);
  872. c->rcv.src_port=su_getport(su);
  873. c->rcv.bind_address=ba;
  874. if (likely(local_addr)){
  875. su2ip_addr(&c->rcv.dst_ip, local_addr);
  876. c->rcv.dst_port=su_getport(local_addr);
  877. }else if (ba){
  878. c->rcv.dst_ip=ba->address;
  879. c->rcv.dst_port=ba->port_no;
  880. }
  881. print_ip("tcpconn_new: new tcp connection: ", &c->rcv.src_ip, "\n");
  882. LM_DBG("on port %d, type %d\n", c->rcv.src_port, type);
  883. init_tcp_req(&c->req, (char*)c+sizeof(struct tcp_connection), rd_b_size);
  884. c->id=(*connection_id)++;
  885. c->rcv.proto_reserved1=0; /* this will be filled before receive_message*/
  886. c->rcv.proto_reserved2=0;
  887. c->state=state;
  888. c->extra_data=0;
  889. #ifdef USE_TLS
  890. if (type==PROTO_TLS){
  891. if (tls_tcpconn_init(c, sock)==-1) goto error;
  892. }else
  893. #endif /* USE_TLS*/
  894. {
  895. c->type=PROTO_TCP;
  896. c->rcv.proto=PROTO_TCP;
  897. c->timeout=get_ticks_raw()+cfg_get(tcp, tcp_cfg, con_lifetime);
  898. c->lifetime = cfg_get(tcp, tcp_cfg, con_lifetime);
  899. }
  900. return c;
  901. error:
  902. if (c) shm_free(c);
  903. return 0;
  904. }
  905. /* do the actual connect, set sock. options a.s.o
  906. * returns socket on success, -1 on error
  907. * sets also *res_local_addr, res_si and state (S_CONN_CONNECT for an
  908. * unfinished connect and S_CONN_OK for a finished one)*/
  909. inline static int tcp_do_connect( union sockaddr_union* server,
  910. union sockaddr_union* from,
  911. int type,
  912. snd_flags_t* send_flags,
  913. union sockaddr_union* res_local_addr,
  914. struct socket_info** res_si,
  915. enum tcp_conn_states *state
  916. )
  917. {
  918. int s;
  919. union sockaddr_union my_name;
  920. socklen_t my_name_len;
  921. struct ip_addr ip;
  922. #ifdef TCP_ASYNC
  923. int n;
  924. #endif /* TCP_ASYNC */
  925. s=socket(AF2PF(server->s.sa_family), SOCK_STREAM, 0);
  926. if (unlikely(s==-1)){
  927. LM_ERR("%s: socket: (%d) %s\n",
  928. su2a(server, sizeof(*server)), errno, strerror(errno));
  929. goto error;
  930. }
  931. if (init_sock_opt(s, server->s.sa_family)<0){
  932. LM_ERR("%s: init_sock_opt failed\n",
  933. su2a(server, sizeof(*server)));
  934. goto error;
  935. }
  936. if (unlikely(from && bind(s, &from->s, sockaddru_len(*from)) != 0)){
  937. LM_WARN("binding to source address %s failed: %s [%d]\n",
  938. su2a(from, sizeof(*from)),
  939. strerror(errno), errno);
  940. }
  941. *state=S_CONN_OK;
  942. #ifdef TCP_ASYNC
  943. if (likely(cfg_get(tcp, tcp_cfg, async))){
  944. again:
  945. n=connect(s, &server->s, sockaddru_len(*server));
  946. if (likely(n==-1)){ /*non-blocking => most probable EINPROGRESS*/
  947. if (likely(errno==EINPROGRESS))
  948. *state=S_CONN_CONNECT;
  949. else if (errno==EINTR) goto again;
  950. else if (errno!=EALREADY){
  951. switch(errno){
  952. case ENETUNREACH:
  953. case EHOSTUNREACH:
  954. #ifdef USE_DST_BLACKLIST
  955. dst_blacklist_su(BLST_ERR_CONNECT, type, server,
  956. send_flags, 0);
  957. #endif /* USE_DST_BLACKLIST */
  958. TCP_EV_CONNECT_UNREACHABLE(errno, 0, 0, server, type);
  959. break;
  960. case ETIMEDOUT:
  961. #ifdef USE_DST_BLACKLIST
  962. dst_blacklist_su(BLST_ERR_CONNECT, type, server,
  963. send_flags, 0);
  964. #endif /* USE_DST_BLACKLIST */
  965. TCP_EV_CONNECT_TIMEOUT(errno, 0, 0, server, type);
  966. break;
  967. case ECONNREFUSED:
  968. case ECONNRESET:
  969. #ifdef USE_DST_BLACKLIST
  970. dst_blacklist_su(BLST_ERR_CONNECT, type, server,
  971. send_flags, 0);
  972. #endif /* USE_DST_BLACKLIST */
  973. TCP_EV_CONNECT_RST(errno, 0, 0, server, type);
  974. break;
  975. case EAGAIN:/* not posix, but supported on linux and bsd */
  976. TCP_EV_CONNECT_NO_MORE_PORTS(errno, 0, 0, server,type);
  977. break;
  978. default:
  979. TCP_EV_CONNECT_ERR(errno, 0, 0, server, type);
  980. }
  981. TCP_STATS_CONNECT_FAILED();
  982. LM_ERR("connect %s: (%d) %s\n",
  983. su2a(server, sizeof(*server)),
  984. errno, strerror(errno));
  985. goto error;
  986. }
  987. }
  988. }else{
  989. #endif /* TCP_ASYNC */
  990. if (tcp_blocking_connect(s, type, send_flags, &server->s,
  991. sockaddru_len(*server))<0){
  992. LM_ERR("tcp_blocking_connect %s failed\n",
  993. su2a(server, sizeof(*server)));
  994. goto error;
  995. }
  996. #ifdef TCP_ASYNC
  997. }
  998. #endif /* TCP_ASYNC */
  999. if (from){
  1000. su2ip_addr(&ip, from);
  1001. if (!ip_addr_any(&ip))
  1002. /* we already know the source ip, skip the sys. call */
  1003. goto find_socket;
  1004. }
  1005. my_name_len=sizeof(my_name);
  1006. if (unlikely(getsockname(s, &my_name.s, &my_name_len)!=0)){
  1007. LM_ERR("getsockname failed: %s(%d)\n", strerror(errno), errno);
  1008. *res_si=0;
  1009. goto error;
  1010. }
  1011. from=&my_name; /* update from with the real "from" address */
  1012. su2ip_addr(&ip, &my_name);
  1013. find_socket:
  1014. #ifdef USE_TLS
  1015. if (unlikely(type==PROTO_TLS))
  1016. *res_si=find_si(&ip, 0, PROTO_TLS);
  1017. else
  1018. #endif
  1019. *res_si=find_si(&ip, 0, PROTO_TCP);
  1020. if (unlikely(*res_si==0)){
  1021. LM_WARN("%s: could not find corresponding"
  1022. " listening socket for %s, using default...\n",
  1023. su2a(server, sizeof(*server)), ip_addr2a(&ip));
  1024. if (server->s.sa_family==AF_INET) *res_si=sendipv4_tcp;
  1025. else *res_si=sendipv6_tcp;
  1026. }
  1027. *res_local_addr=*from;
  1028. return s;
  1029. error:
  1030. if (s!=-1) tcp_safe_close(s);
  1031. return -1;
  1032. }
  1033. struct tcp_connection* tcpconn_connect( union sockaddr_union* server,
  1034. union sockaddr_union* from,
  1035. int type, snd_flags_t* send_flags)
  1036. {
  1037. int s;
  1038. struct socket_info* si;
  1039. union sockaddr_union my_name;
  1040. struct tcp_connection* con;
  1041. enum tcp_conn_states state;
  1042. s=-1;
  1043. if (*tcp_connections_no >= cfg_get(tcp, tcp_cfg, max_connections)){
  1044. LM_ERR("maximum number of connections exceeded (%d/%d)\n",
  1045. *tcp_connections_no,
  1046. cfg_get(tcp, tcp_cfg, max_connections));
  1047. goto error;
  1048. }
  1049. if (unlikely(type==PROTO_TLS)) {
  1050. if (*tls_connections_no >= cfg_get(tcp, tcp_cfg, max_tls_connections)){
  1051. LM_ERR("maximum number of tls connections"
  1052. " exceeded (%d/%d)\n",
  1053. *tls_connections_no,
  1054. cfg_get(tcp, tcp_cfg, max_tls_connections));
  1055. goto error;
  1056. }
  1057. }
  1058. s=tcp_do_connect(server, from, type, send_flags, &my_name, &si, &state);
  1059. if (s==-1){
  1060. LM_ERR("tcp_do_connect %s: failed (%d) %s\n",
  1061. su2a(server, sizeof(*server)), errno, strerror(errno));
  1062. goto error;
  1063. }
  1064. con=tcpconn_new(s, server, &my_name, si, type, state);
  1065. if (con==0){
  1066. LM_ERR("%s: tcpconn_new failed, closing the "
  1067. " socket\n", su2a(server, sizeof(*server)));
  1068. goto error;
  1069. }
  1070. tcpconn_set_send_flags(con, *send_flags);
  1071. return con;
  1072. error:
  1073. if (s!=-1) tcp_safe_close(s); /* close the opened socket */
  1074. return 0;
  1075. }
  1076. #ifdef TCP_CONNECT_WAIT
  1077. int tcpconn_finish_connect( struct tcp_connection* c,
  1078. union sockaddr_union* from)
  1079. {
  1080. int s;
  1081. int r;
  1082. union sockaddr_union local_addr;
  1083. struct socket_info* si;
  1084. enum tcp_conn_states state;
  1085. struct tcp_conn_alias* a;
  1086. int new_conn_alias_flags;
  1087. s=tcp_do_connect(&c->rcv.src_su, from, c->type, &c->send_flags,
  1088. &local_addr, &si, &state);
  1089. if (unlikely(s==-1)){
  1090. LM_ERR("%s: tcp_do_connect for %p failed\n",
  1091. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
  1092. return -1;
  1093. }
  1094. c->rcv.bind_address=si;
  1095. su2ip_addr(&c->rcv.dst_ip, &local_addr);
  1096. c->rcv.dst_port=su_getport(&local_addr);
  1097. /* update aliases if needed */
  1098. if (likely(from==0)){
  1099. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1100. /* add aliases */
  1101. TCPCONN_LOCK;
  1102. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
  1103. new_conn_alias_flags);
  1104. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1105. c->rcv.dst_port, new_conn_alias_flags);
  1106. TCPCONN_UNLOCK;
  1107. }else if (su_cmp(from, &local_addr)!=1){
  1108. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1109. TCPCONN_LOCK;
  1110. /* remove all the aliases except the first one and re-add them
  1111. * (there shouldn't be more then the 3 default aliases at this
  1112. * stage) */
  1113. if (c->aliases > 1) {
  1114. for (r=1; r<c->aliases; r++){
  1115. a=&c->con_aliases[r];
  1116. tcpconn_listrm(tcpconn_aliases_hash[a->hash],
  1117. a, next, prev);
  1118. }
  1119. c->aliases=1;
  1120. }
  1121. /* add the local_ip:0 and local_ip:local_port aliases */
  1122. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1123. 0, new_conn_alias_flags);
  1124. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1125. c->rcv.dst_port, new_conn_alias_flags);
  1126. TCPCONN_UNLOCK;
  1127. }
  1128. return s;
  1129. }
  1130. #endif /* TCP_CONNECT_WAIT */
  1131. /* adds a tcp connection to the tcpconn hashes
  1132. * Note: it's called _only_ from the tcp_main process */
  1133. inline static struct tcp_connection* tcpconn_add(struct tcp_connection *c)
  1134. {
  1135. struct ip_addr zero_ip;
  1136. int new_conn_alias_flags;
  1137. if (likely(c)){
  1138. ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
  1139. c->id_hash=tcp_id_hash(c->id);
  1140. c->aliases=0;
  1141. new_conn_alias_flags=cfg_get(tcp, tcp_cfg, new_conn_alias_flags);
  1142. TCPCONN_LOCK;
  1143. c->flags|=F_CONN_HASHED;
  1144. /* add it at the begining of the list*/
  1145. tcpconn_listadd(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1146. /* set the aliases */
  1147. /* first alias is for (peer_ip, peer_port, 0 ,0) -- for finding
  1148. * any connection to peer_ip, peer_port
  1149. * the second alias is for (peer_ip, peer_port, local_addr, 0) -- for
  1150. * finding any conenction to peer_ip, peer_port from local_addr
  1151. * the third alias is for (peer_ip, peer_port, local_addr, local_port)
  1152. * -- for finding if a fully specified connection exists */
  1153. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &zero_ip, 0,
  1154. new_conn_alias_flags);
  1155. if (likely(c->rcv.dst_ip.af && ! ip_addr_any(&c->rcv.dst_ip))){
  1156. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip, 0,
  1157. new_conn_alias_flags);
  1158. _tcpconn_add_alias_unsafe(c, c->rcv.src_port, &c->rcv.dst_ip,
  1159. c->rcv.dst_port, new_conn_alias_flags);
  1160. }
  1161. /* ignore add_alias errors, there are some valid cases when one
  1162. * of the add_alias would fail (e.g. first add_alias for 2 connections
  1163. * with the same destination but different src. ip*/
  1164. TCPCONN_UNLOCK;
  1165. LM_DBG("hashes: %d:%d:%d, %d\n",
  1166. c->con_aliases[0].hash,
  1167. c->con_aliases[1].hash,
  1168. c->con_aliases[2].hash,
  1169. c->id_hash);
  1170. return c;
  1171. }else{
  1172. LM_CRIT("null connection pointer\n");
  1173. return 0;
  1174. }
  1175. }
  1176. static inline void _tcpconn_detach(struct tcp_connection *c)
  1177. {
  1178. int r;
  1179. tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1180. /* remove all the aliases */
  1181. for (r=0; r<c->aliases; r++)
  1182. tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
  1183. &c->con_aliases[r], next, prev);
  1184. c->aliases = 0;
  1185. }
  1186. static inline void _tcpconn_free(struct tcp_connection* c)
  1187. {
  1188. #ifdef TCP_ASYNC
  1189. if (unlikely(_wbufq_non_empty(c)))
  1190. _wbufq_destroy(&c->wbuf_q);
  1191. #endif
  1192. lock_destroy(&c->write_lock);
  1193. #ifdef USE_TLS
  1194. if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) tls_tcpconn_clean(c);
  1195. #endif
  1196. shm_free(c);
  1197. }
  1198. /* unsafe tcpconn_rm version (nolocks) */
  1199. void _tcpconn_rm(struct tcp_connection* c)
  1200. {
  1201. _tcpconn_detach(c);
  1202. _tcpconn_free(c);
  1203. }
  1204. void tcpconn_rm(struct tcp_connection* c)
  1205. {
  1206. int r;
  1207. TCPCONN_LOCK;
  1208. tcpconn_listrm(tcpconn_id_hash[c->id_hash], c, id_next, id_prev);
  1209. /* remove all the aliases */
  1210. for (r=0; r<c->aliases; r++)
  1211. tcpconn_listrm(tcpconn_aliases_hash[c->con_aliases[r].hash],
  1212. &c->con_aliases[r], next, prev);
  1213. c->aliases = 0;
  1214. TCPCONN_UNLOCK;
  1215. lock_destroy(&c->write_lock);
  1216. #ifdef USE_TLS
  1217. if ((c->type==PROTO_TLS || c->type==PROTO_WSS)&&(c->extra_data)) tls_tcpconn_clean(c);
  1218. #endif
  1219. shm_free(c);
  1220. }
  1221. /* finds a connection, if id=0 uses the ip addr, port, local_ip and local port
  1222. * (host byte order) and tries to find the connection that matches all of
  1223. * them. Wild cards can be used for local_ip and local_port (a 0 filled
  1224. * ip address and/or a 0 local port).
  1225. * WARNING: unprotected (locks) use tcpconn_get unless you really
  1226. * know what you are doing */
  1227. struct tcp_connection* _tcpconn_find(int id, struct ip_addr* ip, int port,
  1228. struct ip_addr* l_ip, int l_port)
  1229. {
  1230. struct tcp_connection *c;
  1231. struct tcp_conn_alias* a;
  1232. unsigned hash;
  1233. int is_local_ip_any;
  1234. #ifdef EXTRA_DEBUG
  1235. LM_DBG("%d port %d\n",id, port);
  1236. if (ip) print_ip("tcpconn_find: ip ", ip, "\n");
  1237. #endif
  1238. if (likely(id)){
  1239. hash=tcp_id_hash(id);
  1240. for (c=tcpconn_id_hash[hash]; c; c=c->id_next){
  1241. #ifdef EXTRA_DEBUG
  1242. LM_DBG("c=%p, c->id=%d, port=%d\n", c, c->id, c->rcv.src_port);
  1243. print_ip("ip=", &c->rcv.src_ip, "\n");
  1244. #endif
  1245. if ((id==c->id)&&(c->state!=S_CONN_BAD)) return c;
  1246. }
  1247. }else if (likely(ip)){
  1248. hash=tcp_addr_hash(ip, port, l_ip, l_port);
  1249. is_local_ip_any=ip_addr_any(l_ip);
  1250. for (a=tcpconn_aliases_hash[hash]; a; a=a->next){
  1251. #ifdef EXTRA_DEBUG
  1252. LM_DBG("a=%p, c=%p, c->id=%d, alias port= %d port=%d\n", a, a->parent,
  1253. a->parent->id, a->port, a->parent->rcv.src_port);
  1254. print_ip("ip=",&a->parent->rcv.src_ip,"\n");
  1255. #endif
  1256. if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
  1257. ((l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
  1258. (ip_addr_cmp(ip, &a->parent->rcv.src_ip)) &&
  1259. (is_local_ip_any ||
  1260. ip_addr_cmp(l_ip, &a->parent->rcv.dst_ip))
  1261. )
  1262. return a->parent;
  1263. }
  1264. }
  1265. return 0;
  1266. }
  1267. /* _tcpconn_find with locks and timeout
  1268. * local_addr contains the desired local ip:port. If null any local address
  1269. * will be used. IN*ADDR_ANY or 0 port are wild cards.
  1270. * If found, the connection's reference counter will be incremented, you might
  1271. * want to decrement it after use.
  1272. */
  1273. struct tcp_connection* tcpconn_get(int id, struct ip_addr* ip, int port,
  1274. union sockaddr_union* local_addr,
  1275. ticks_t timeout)
  1276. {
  1277. struct tcp_connection* c;
  1278. struct ip_addr local_ip;
  1279. int local_port;
  1280. local_port=0;
  1281. if (likely(ip)){
  1282. if (unlikely(local_addr)){
  1283. su2ip_addr(&local_ip, local_addr);
  1284. local_port=su_getport(local_addr);
  1285. }else{
  1286. ip_addr_mk_any(ip->af, &local_ip);
  1287. local_port=0;
  1288. }
  1289. }
  1290. TCPCONN_LOCK;
  1291. c=_tcpconn_find(id, ip, port, &local_ip, local_port);
  1292. if (likely(c)){
  1293. atomic_inc(&c->refcnt);
  1294. /* update the timeout only if the connection is not handled
  1295. * by a tcp reader _and_the timeout is non-zero (the tcp
  1296. * reader process uses c->timeout for its own internal
  1297. * timeout and c->timeout will be overwritten * anyway on
  1298. * return to tcp_main) */
  1299. if (likely(c->reader_pid==0 && timeout != 0))
  1300. c->timeout=get_ticks_raw()+timeout;
  1301. }
  1302. TCPCONN_UNLOCK;
  1303. return c;
  1304. }
  1305. /* add c->dst:port, local_addr as an alias for the "id" connection,
  1306. * flags: TCP_ALIAS_FORCE_ADD - add an alias even if a previous one exists
  1307. * TCP_ALIAS_REPLACE - if a prev. alias exists, replace it with the
  1308. * new one
  1309. * returns 0 on success, <0 on failure ( -1 - null c, -2 too many aliases,
  1310. * -3 alias already present and pointing to another connection)
  1311. * WARNING: must be called with TCPCONN_LOCK held */
  1312. inline static int _tcpconn_add_alias_unsafe(struct tcp_connection* c, int port,
  1313. struct ip_addr* l_ip, int l_port,
  1314. int flags)
  1315. {
  1316. unsigned hash;
  1317. struct tcp_conn_alias* a;
  1318. struct tcp_conn_alias* nxt;
  1319. struct tcp_connection* p;
  1320. int is_local_ip_any;
  1321. int i;
  1322. int r;
  1323. a=0;
  1324. is_local_ip_any=ip_addr_any(l_ip);
  1325. if (likely(c)){
  1326. hash=tcp_addr_hash(&c->rcv.src_ip, port, l_ip, l_port);
  1327. /* search the aliases for an already existing one */
  1328. for (a=tcpconn_aliases_hash[hash], nxt=0; a; a=nxt){
  1329. nxt=a->next;
  1330. if ( (a->parent->state!=S_CONN_BAD) && (port==a->port) &&
  1331. ( (l_port==0) || (l_port==a->parent->rcv.dst_port)) &&
  1332. (ip_addr_cmp(&c->rcv.src_ip, &a->parent->rcv.src_ip)) &&
  1333. ( is_local_ip_any ||
  1334. ip_addr_cmp(&a->parent->rcv.dst_ip, l_ip))
  1335. ){
  1336. /* found */
  1337. if (unlikely(a->parent!=c)){
  1338. if (flags & TCP_ALIAS_FORCE_ADD)
  1339. /* still have to walk the whole list to check if
  1340. * the alias was not already added */
  1341. continue;
  1342. else if (flags & TCP_ALIAS_REPLACE){
  1343. /* remove the alias =>
  1344. * remove the current alias and all the following
  1345. * ones from the corresponding connection, shift the
  1346. * connection aliases array and re-add the other
  1347. * aliases (!= current one) */
  1348. p=a->parent;
  1349. for (i=0; (i<p->aliases) && (&(p->con_aliases[i])!=a);
  1350. i++);
  1351. if (unlikely(i==p->aliases)){
  1352. LM_CRIT("alias %p not found in con %p (id %d)\n",
  1353. a, p, p->id);
  1354. goto error_not_found;
  1355. }
  1356. for (r=i; r<p->aliases; r++){
  1357. tcpconn_listrm(
  1358. tcpconn_aliases_hash[p->con_aliases[r].hash],
  1359. &p->con_aliases[r], next, prev);
  1360. }
  1361. if (likely((i+1)<p->aliases)){
  1362. memmove(&p->con_aliases[i], &p->con_aliases[i+1],
  1363. (p->aliases-i-1)*
  1364. sizeof(p->con_aliases[0]));
  1365. }
  1366. p->aliases--;
  1367. /* re-add the remaining aliases */
  1368. for (r=i; r<p->aliases; r++){
  1369. tcpconn_listadd(
  1370. tcpconn_aliases_hash[p->con_aliases[r].hash],
  1371. &p->con_aliases[r], next, prev);
  1372. }
  1373. }else
  1374. goto error_sec;
  1375. }else goto ok;
  1376. }
  1377. }
  1378. if (unlikely(c->aliases>=TCP_CON_MAX_ALIASES)) goto error_aliases;
  1379. c->con_aliases[c->aliases].parent=c;
  1380. c->con_aliases[c->aliases].port=port;
  1381. c->con_aliases[c->aliases].hash=hash;
  1382. tcpconn_listadd(tcpconn_aliases_hash[hash],
  1383. &c->con_aliases[c->aliases], next, prev);
  1384. c->aliases++;
  1385. }else goto error_not_found;
  1386. ok:
  1387. #ifdef EXTRA_DEBUG
  1388. if (a) LM_DBG("alias already present\n");
  1389. else LM_DBG("alias port %d for hash %d, id %d\n",
  1390. port, hash, c->id);
  1391. #endif
  1392. return 0;
  1393. error_aliases:
  1394. /* too many aliases */
  1395. return -2;
  1396. error_not_found:
  1397. /* null connection */
  1398. return -1;
  1399. error_sec:
  1400. /* alias already present and pointing to a different connection
  1401. * (hijack attempt?) */
  1402. return -3;
  1403. }
  1404. /* add port as an alias for the "id" connection,
  1405. * returns 0 on success,-1 on failure */
  1406. int tcpconn_add_alias(int id, int port, int proto)
  1407. {
  1408. struct tcp_connection* c;
  1409. int ret;
  1410. struct ip_addr zero_ip;
  1411. int r;
  1412. int alias_flags;
  1413. /* fix the port */
  1414. port=port?port:((proto==PROTO_TLS)?SIPS_PORT:SIP_PORT);
  1415. TCPCONN_LOCK;
  1416. /* check if alias already exists */
  1417. c=_tcpconn_find(id, 0, 0, 0, 0);
  1418. if (likely(c)){
  1419. ip_addr_mk_any(c->rcv.src_ip.af, &zero_ip);
  1420. alias_flags=cfg_get(tcp, tcp_cfg, alias_flags);
  1421. /* alias src_ip:port, 0, 0 */
  1422. ret=_tcpconn_add_alias_unsafe(c, port, &zero_ip, 0,
  1423. alias_flags);
  1424. if (ret<0 && ret!=-3) goto error;
  1425. /* alias src_ip:port, local_ip, 0 */
  1426. ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, 0,
  1427. alias_flags);
  1428. if (ret<0 && ret!=-3) goto error;
  1429. /* alias src_ip:port, local_ip, local_port */
  1430. ret=_tcpconn_add_alias_unsafe(c, port, &c->rcv.dst_ip, c->rcv.dst_port,
  1431. alias_flags);
  1432. if (unlikely(ret<0)) goto error;
  1433. }else goto error_not_found;
  1434. TCPCONN_UNLOCK;
  1435. return 0;
  1436. error_not_found:
  1437. TCPCONN_UNLOCK;
  1438. LM_ERR("no connection found for id %d\n",id);
  1439. return -1;
  1440. error:
  1441. TCPCONN_UNLOCK;
  1442. switch(ret){
  1443. case -2:
  1444. LM_ERR("too many aliases (%d) for connection %p (id %d) %s:%d <- %d\n",
  1445. c->aliases, c, c->id, ip_addr2a(&c->rcv.src_ip),
  1446. c->rcv.src_port, port);
  1447. for (r=0; r<c->aliases; r++){
  1448. LM_ERR("alias %d: for %p (%d) %s:%d <-%d hash %x\n", r, c, c->id,
  1449. ip_addr2a(&c->rcv.src_ip), c->rcv.src_port,
  1450. c->con_aliases[r].port, c->con_aliases[r].hash);
  1451. }
  1452. break;
  1453. case -3:
  1454. LM_ERR("possible port hijack attempt\n");
  1455. LM_ERR("alias for %d port %d already"
  1456. " present and points to another connection \n",
  1457. c->id, port);
  1458. break;
  1459. default:
  1460. LM_ERR("unknown error %d\n", ret);
  1461. }
  1462. return -1;
  1463. }
  1464. #ifdef TCP_FD_CACHE
  1465. static void tcp_fd_cache_init(void)
  1466. {
  1467. int r;
  1468. for (r=0; r<TCP_FD_CACHE_SIZE; r++)
  1469. fd_cache[r].fd=-1;
  1470. }
  1471. inline static struct fd_cache_entry* tcp_fd_cache_get(struct tcp_connection *c)
  1472. {
  1473. int h;
  1474. h=c->id%TCP_FD_CACHE_SIZE;
  1475. if ((fd_cache[h].fd>0) && (fd_cache[h].id==c->id) && (fd_cache[h].con==c))
  1476. return &fd_cache[h];
  1477. return 0;
  1478. }
  1479. inline static void tcp_fd_cache_rm(struct fd_cache_entry* e)
  1480. {
  1481. e->fd=-1;
  1482. }
  1483. inline static void tcp_fd_cache_add(struct tcp_connection *c, int fd)
  1484. {
  1485. int h;
  1486. h=c->id%TCP_FD_CACHE_SIZE;
  1487. if (likely(fd_cache[h].fd>0))
  1488. tcp_safe_close(fd_cache[h].fd);
  1489. fd_cache[h].fd=fd;
  1490. fd_cache[h].id=c->id;
  1491. fd_cache[h].con=c;
  1492. }
  1493. #endif /* TCP_FD_CACHE */
  1494. inline static int tcpconn_chld_put(struct tcp_connection* tcpconn);
  1495. static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
  1496. unsigned len, snd_flags_t send_flags);
  1497. static int tcpconn_do_send(int fd, struct tcp_connection* c,
  1498. const char* buf, unsigned len,
  1499. snd_flags_t send_flags, long* resp, int locked);
  1500. static int tcpconn_1st_send(int fd, struct tcp_connection* c,
  1501. const char* buf, unsigned len,
  1502. snd_flags_t send_flags, long* resp, int locked);
  1503. /* finds a tcpconn & sends on it
  1504. * uses the dst members to, proto (TCP|TLS) and id and tries to send
  1505. * from the "from" address (if non null and id==0)
  1506. * returns: number of bytes written (>=0) on success
  1507. * <0 on error */
  1508. int tcp_send(struct dest_info* dst, union sockaddr_union* from,
  1509. const char* buf, unsigned len)
  1510. {
  1511. struct tcp_connection *c;
  1512. struct ip_addr ip;
  1513. int port;
  1514. int fd;
  1515. long response[2];
  1516. int n;
  1517. ticks_t con_lifetime;
  1518. #ifdef USE_TLS
  1519. const char* rest_buf;
  1520. const char* t_buf;
  1521. unsigned rest_len, t_len;
  1522. long resp;
  1523. snd_flags_t t_send_flags;
  1524. #endif /* USE_TLS */
  1525. port=su_getport(&dst->to);
  1526. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  1527. if (likely(port)){
  1528. su2ip_addr(&ip, &dst->to);
  1529. c=tcpconn_get(dst->id, &ip, port, from, con_lifetime);
  1530. }else if (likely(dst->id)){
  1531. c=tcpconn_get(dst->id, 0, 0, 0, con_lifetime);
  1532. }else{
  1533. LM_CRIT("null id & to\n");
  1534. return -1;
  1535. }
  1536. if (likely(dst->id)){
  1537. if (unlikely(c==0)) {
  1538. if (likely(port)){
  1539. /* try again w/o id */
  1540. c=tcpconn_get(0, &ip, port, from, con_lifetime);
  1541. }else{
  1542. LM_ERR("id %d not found, dropping\n", dst->id);
  1543. return -1;
  1544. }
  1545. }
  1546. }
  1547. /* connection not found or unusable => open a new one and send on it */
  1548. if (unlikely((c==0) || tcpconn_close_after_send(c))){
  1549. if (unlikely(c)){
  1550. /* can't use c if it's marked as close-after-send =>
  1551. release it and try opening new one */
  1552. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  1553. c=0;
  1554. }
  1555. /* check if connect() is disabled */
  1556. if (unlikely((dst->send_flags.f & SND_F_FORCE_CON_REUSE) ||
  1557. cfg_get(tcp, tcp_cfg, no_connect)))
  1558. return -1;
  1559. LM_DBG("no open tcp connection found, opening new one\n");
  1560. /* create tcp connection */
  1561. if (likely(from==0)){
  1562. /* check to see if we have to use a specific source addr. */
  1563. switch (dst->to.s.sa_family) {
  1564. case AF_INET:
  1565. from = tcp_source_ipv4;
  1566. break;
  1567. case AF_INET6:
  1568. from = tcp_source_ipv6;
  1569. break;
  1570. default:
  1571. /* error, bad af, ignore ... */
  1572. break;
  1573. }
  1574. }
  1575. #if defined(TCP_CONNECT_WAIT) && defined(TCP_ASYNC)
  1576. if (likely(cfg_get(tcp, tcp_cfg, tcp_connect_wait) &&
  1577. cfg_get(tcp, tcp_cfg, async) )){
  1578. if (unlikely(*tcp_connections_no >=
  1579. cfg_get(tcp, tcp_cfg, max_connections))){
  1580. LM_ERR("%s: maximum number of connections exceeded (%d/%d)\n",
  1581. su2a(&dst->to, sizeof(dst->to)),
  1582. *tcp_connections_no,
  1583. cfg_get(tcp, tcp_cfg, max_connections));
  1584. return -1;
  1585. }
  1586. if (unlikely(dst->proto==PROTO_TLS)) {
  1587. if (unlikely(*tls_connections_no >=
  1588. cfg_get(tcp, tcp_cfg, max_tls_connections))){
  1589. LM_ERR("%s: maximum number of tls connections exceeded (%d/%d)\n",
  1590. su2a(&dst->to, sizeof(dst->to)),
  1591. *tls_connections_no,
  1592. cfg_get(tcp, tcp_cfg, max_tls_connections));
  1593. return -1;
  1594. }
  1595. }
  1596. c=tcpconn_new(-1, &dst->to, from, 0, dst->proto,
  1597. S_CONN_CONNECT);
  1598. if (unlikely(c==0)){
  1599. LM_ERR("%s: could not create new connection\n",
  1600. su2a(&dst->to, sizeof(dst->to)));
  1601. return -1;
  1602. }
  1603. c->flags|=F_CONN_PENDING|F_CONN_FD_CLOSED;
  1604. tcpconn_set_send_flags(c, dst->send_flags);
  1605. atomic_set(&c->refcnt, 2); /* ref from here and from main hash
  1606. table */
  1607. /* add it to id hash and aliases */
  1608. if (unlikely(tcpconn_add(c)==0)){
  1609. LM_ERR("%s: could not add connection %p\n",
  1610. su2a(&dst->to, sizeof(dst->to)), c);
  1611. _tcpconn_free(c);
  1612. n=-1;
  1613. goto end_no_conn;
  1614. }
  1615. /* do connect and if src ip or port changed, update the
  1616. * aliases */
  1617. if (unlikely((fd=tcpconn_finish_connect(c, from))<0)){
  1618. /* tcpconn_finish_connect will automatically blacklist
  1619. on error => no need to do it here */
  1620. LM_ERR("%s: tcpconn_finish_connect(%p) failed\n",
  1621. su2a(&dst->to, sizeof(dst->to)), c);
  1622. goto conn_wait_error;
  1623. }
  1624. /* ? TODO: it might be faster just to queue the write directly
  1625. * and send to main CONN_NEW_PENDING_WRITE */
  1626. /* delay sending the fd to main after the send */
  1627. /* NOTE: no lock here, because the connection is marked as
  1628. * pending and nobody else will try to write on it. However
  1629. * this might produce out-of-order writes. If this is not
  1630. * desired either lock before the write or use
  1631. * _wbufq_insert(...)
  1632. * NOTE2: _wbufq_insert() is used now (no out-of-order).
  1633. */
  1634. #ifdef USE_TLS
  1635. if (unlikely(c->type==PROTO_TLS)) {
  1636. /* for TLS the TLS processing and the send must happen
  1637. atomically w/ respect to other sends on the same connection
  1638. (otherwise reordering might occur which would break TLS) =>
  1639. lock. However in this case this send will always be the first.
  1640. We can have the send() outside the lock only if this is the
  1641. first and only send (tls_encode is not called again), or
  1642. this is the last send for a tls_encode() loop and all the
  1643. previous ones did return CONN_NEW_COMPLETE or CONN_EOF.
  1644. */
  1645. response[1] = CONN_NOP;
  1646. t_buf = buf;
  1647. t_len = len;
  1648. lock_get(&c->write_lock);
  1649. redo_tls_encode:
  1650. t_send_flags = dst->send_flags;
  1651. n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
  1652. &t_send_flags);
  1653. /* There are 4 cases:
  1654. 1. entire buffer consumed from the first try
  1655. (rest_len == rest_buf == 0)
  1656. 2. rest_buf & first call
  1657. 3. rest_buf & not first call
  1658. 3a. CONN_NEW_COMPLETE or CONN_EOF
  1659. 3b. CONN_NEW_PENDING_WRITE
  1660. 4. entire buffer consumed, but not first call
  1661. 4a. CONN_NEW_COMPLETE or CONN_EOF
  1662. 4b. CONN_NEW_PENDING_WRITE
  1663. We misuse response[1] == CONN_NOP to test for the
  1664. first call.
  1665. */
  1666. if (unlikely(n < 0)) {
  1667. lock_release(&c->write_lock);
  1668. goto conn_wait_error;
  1669. }
  1670. if (likely(rest_len == 0)) {
  1671. /* 1 or 4*: CONN_NEW_COMPLETE, CONN_EOF, CONN_NOP
  1672. or CONN_NEW_PENDING_WRITE (*rest_len == 0) */
  1673. if (likely(response[1] != CONN_NEW_PENDING_WRITE)) {
  1674. /* 1 or 4a => it's safe to do the send outside the
  1675. lock (it will either send directly or
  1676. wbufq_insert())
  1677. */
  1678. lock_release(&c->write_lock);
  1679. if (likely(t_len != 0)) {
  1680. n=tcpconn_1st_send(fd, c, t_buf, t_len,
  1681. t_send_flags,
  1682. &response[1], 0);
  1683. } else { /* t_len == 0 */
  1684. if (response[1] == CONN_NOP) {
  1685. /* nothing to send (e.g parallel send
  1686. tls_encode queues some data and then
  1687. WANT_READ => this tls_encode will queue
  1688. the cleartext too and will have nothing
  1689. to send right now) and initial send =>
  1690. behave as if the send was successful
  1691. (but never return EOF here) */
  1692. response[1] = CONN_NEW_COMPLETE;
  1693. }
  1694. }
  1695. /* exit */
  1696. } else {
  1697. /* CONN_NEW_PENDING_WRITE: 4b: it was a
  1698. repeated tls_encode() (or otherwise we would
  1699. have here CONN_NOP) => add to the queue */
  1700. if (unlikely(t_len &&
  1701. _wbufq_add(c, t_buf, t_len) < 0)) {
  1702. response[1] = CONN_ERROR;
  1703. n = -1;
  1704. }
  1705. lock_release(&c->write_lock);
  1706. /* exit (no send) */
  1707. }
  1708. } else { /* rest_len != 0 */
  1709. /* 2 or 3*: if tls_encode hasn't finished, we have to
  1710. call tcpconn_1st_send() under lock (otherwise if it
  1711. returns CONN_NEW_PENDING_WRITE, there is no way
  1712. to find the right place to add the new queued
  1713. data from the 2nd tls_encode()) */
  1714. if (likely((response[1] == CONN_NOP /*2*/ ||
  1715. response[1] == CONN_NEW_COMPLETE /*3a*/ ||
  1716. response[1] == CONN_EOF /*3a*/) && t_len))
  1717. n = tcpconn_1st_send(fd, c, t_buf, t_len,
  1718. t_send_flags,
  1719. &response[1], 1);
  1720. else if (unlikely(t_len &&
  1721. _wbufq_add(c, t_buf, t_len) < 0)) {
  1722. /*3b: CONN_NEW_PENDING_WRITE*/
  1723. response[1] = CONN_ERROR;
  1724. n = -1;
  1725. }
  1726. if (likely(n >= 0)) {
  1727. /* if t_len == 0 => nothing was sent => previous
  1728. response will be kept */
  1729. t_buf = rest_buf;
  1730. t_len = rest_len;
  1731. goto redo_tls_encode;
  1732. } else {
  1733. lock_release(&c->write_lock);
  1734. /* error exit */
  1735. }
  1736. }
  1737. } else
  1738. #endif /* USE_TLS */
  1739. n=tcpconn_1st_send(fd, c, buf, len, dst->send_flags,
  1740. &response[1], 0);
  1741. if (unlikely(n<0)) /* this will catch CONN_ERROR too */
  1742. goto conn_wait_error;
  1743. if (unlikely(response[1]==CONN_EOF)){
  1744. /* if close-after-send requested, don't bother
  1745. sending the fd back to tcp_main, try closing it
  1746. immediately (no other tcp_send should use it,
  1747. because it is marked as close-after-send before
  1748. being added to the hash) */
  1749. goto conn_wait_close;
  1750. }
  1751. /* send to tcp_main */
  1752. response[0]=(long)c;
  1753. if (unlikely(send_fd(unix_tcp_sock, response,
  1754. sizeof(response), fd) <= 0)){
  1755. LM_ERR("%s: %ld for %p failed:" " %s (%d)\n",
  1756. su2a(&dst->to, sizeof(dst->to)),
  1757. response[1], c, strerror(errno), errno);
  1758. goto conn_wait_error;
  1759. }
  1760. goto conn_wait_success;
  1761. }
  1762. #endif /* TCP_CONNECT_WAIT && TCP_ASYNC */
  1763. if (unlikely((c=tcpconn_connect(&dst->to, from, dst->proto,
  1764. &dst->send_flags))==0)){
  1765. LM_ERR("%s: connect failed\n", su2a(&dst->to, sizeof(dst->to)));
  1766. return -1;
  1767. }
  1768. tcpconn_set_send_flags(c, dst->send_flags);
  1769. if (likely(c->state==S_CONN_OK))
  1770. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  1771. atomic_set(&c->refcnt, 2); /* ref. from here and it will also
  1772. be added in the tcp_main hash */
  1773. fd=c->s;
  1774. c->flags|=F_CONN_FD_CLOSED; /* not yet opened in main */
  1775. /* ? TODO: it might be faster just to queue the write and
  1776. * send to main a CONN_NEW_PENDING_WRITE */
  1777. /* send the new tcpconn to "tcp main" */
  1778. response[0]=(long)c;
  1779. response[1]=CONN_NEW;
  1780. n=send_fd(unix_tcp_sock, response, sizeof(response), c->s);
  1781. if (unlikely(n<=0)){
  1782. LM_ERR("%s: failed send_fd: %s (%d)\n",
  1783. su2a(&dst->to, sizeof(dst->to)),
  1784. strerror(errno), errno);
  1785. /* we can safely delete it, it's not referenced by anybody */
  1786. _tcpconn_free(c);
  1787. n=-1;
  1788. goto end_no_conn;
  1789. }
  1790. /* new connection => send on it directly */
  1791. #ifdef USE_TLS
  1792. if (unlikely(c->type==PROTO_TLS)) {
  1793. /* for TLS the TLS processing and the send must happen
  1794. atomically w/ respect to other sends on the same connection
  1795. (otherwise reordering might occur which would break TLS) =>
  1796. lock.
  1797. */
  1798. response[1] = CONN_NOP;
  1799. t_buf = buf;
  1800. t_len = len;
  1801. lock_get(&c->write_lock);
  1802. do {
  1803. t_send_flags = dst->send_flags;
  1804. n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
  1805. &t_send_flags);
  1806. if (likely(n > 0)) {
  1807. n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
  1808. &resp, 1);
  1809. if (likely(response[1] != CONN_QUEUED_WRITE ||
  1810. resp == CONN_ERROR))
  1811. /* don't overwrite a previous CONN_QUEUED_WRITE
  1812. unless error */
  1813. response[1] = resp;
  1814. } else if (unlikely(n < 0)) {
  1815. response[1] = CONN_ERROR;
  1816. break;
  1817. }
  1818. /* else do nothing for n (t_len) == 0, keep
  1819. the last reponse */
  1820. t_buf = rest_buf;
  1821. t_len = rest_len;
  1822. } while(unlikely(rest_len && n > 0));
  1823. lock_release(&c->write_lock);
  1824. } else
  1825. #endif /* USE_TLS */
  1826. n = tcpconn_do_send(fd, c, buf, len, dst->send_flags,
  1827. &response[1], 0);
  1828. if (unlikely(response[1] != CONN_NOP)) {
  1829. response[0]=(long)c;
  1830. if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
  1831. BUG("tcp_main command %ld sending failed (write):"
  1832. "%s (%d)\n", response[1], strerror(errno), errno);
  1833. /* all commands != CONN_NOP returned by tcpconn_do_send()
  1834. (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec
  1835. refcnt => if sending the command fails we have to
  1836. dec. refcnt by hand */
  1837. tcpconn_chld_put(c); /* deref. it manually */
  1838. n=-1;
  1839. }
  1840. /* here refcnt for c is already decremented => c contents can
  1841. no longer be used and refcnt _must_ _not_ be decremented
  1842. again on exit */
  1843. if (unlikely(n < 0 || response[1] == CONN_EOF)) {
  1844. /* on error or eof, close fd */
  1845. tcp_safe_close(fd);
  1846. } else if (response[1] == CONN_QUEUED_WRITE) {
  1847. #ifdef TCP_FD_CACHE
  1848. if (cfg_get(tcp, tcp_cfg, fd_cache)) {
  1849. tcp_fd_cache_add(c, fd);
  1850. } else
  1851. #endif /* TCP_FD_CACHE */
  1852. tcp_safe_close(fd);
  1853. } else {
  1854. BUG("unexpected tcpconn_do_send() return & response:"
  1855. " %d, %ld\n", n, response[1]);
  1856. }
  1857. goto end_no_deref;
  1858. }
  1859. #ifdef TCP_FD_CACHE
  1860. if (cfg_get(tcp, tcp_cfg, fd_cache)) {
  1861. tcp_fd_cache_add(c, fd);
  1862. }else
  1863. #endif /* TCP_FD_CACHE */
  1864. tcp_safe_close(fd);
  1865. /* here we can have only commands that _do_ _not_ dec refcnt.
  1866. (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
  1867. goto release_c;
  1868. } /* if (c==0 or unusable) new connection */
  1869. /* existing connection, send on it */
  1870. n = tcpconn_send_put(c, buf, len, dst->send_flags);
  1871. /* no deref needed (automatically done inside tcpconn_send_put() */
  1872. return n;
  1873. #ifdef TCP_CONNECT_WAIT
  1874. conn_wait_success:
  1875. #ifdef TCP_FD_CACHE
  1876. if (cfg_get(tcp, tcp_cfg, fd_cache)) {
  1877. tcp_fd_cache_add(c, fd);
  1878. } else
  1879. #endif /* TCP_FD_CACHE */
  1880. if (unlikely (tcp_safe_close(fd) < 0))
  1881. LM_ERR("closing temporary send fd for %p: %s: "
  1882. "close(%d) failed (flags 0x%x): %s (%d)\n", c,
  1883. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  1884. fd, c->flags, strerror(errno), errno);
  1885. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  1886. return n;
  1887. conn_wait_error:
  1888. n=-1;
  1889. conn_wait_close:
  1890. /* connect or send failed or immediate close-after-send was requested on
  1891. * newly created connection which was not yet sent to tcp_main (but was
  1892. * already hashed) => don't send to main, unhash and destroy directly
  1893. * (if refcnt>2 it will be destroyed when the last sender releases the
  1894. * connection (tcpconn_chld_put(c))) or when tcp_main receives a
  1895. * CONN_ERROR it*/
  1896. c->state=S_CONN_BAD;
  1897. /* we are here only if we opened a new fd (and not reused a cached or
  1898. a reader one) => if the connect was successful close the fd */
  1899. if (fd>=0) {
  1900. if (unlikely(tcp_safe_close(fd) < 0 ))
  1901. LM_ERR("closing temporary send fd for %p: %s: "
  1902. "close(%d) failed (flags 0x%x): %s (%d)\n", c,
  1903. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  1904. fd, c->flags, strerror(errno), errno);
  1905. }
  1906. /* here the connection is for sure in the hash (tcp_main will not
  1907. remove it because it's marked as PENDing) and the refcnt is at least
  1908. 2
  1909. */
  1910. TCPCONN_LOCK;
  1911. _tcpconn_detach(c);
  1912. c->flags&=~F_CONN_HASHED;
  1913. tcpconn_put(c);
  1914. TCPCONN_UNLOCK;
  1915. /* dec refcnt -> mark it for destruction */
  1916. tcpconn_chld_put(c);
  1917. return n;
  1918. #endif /* TCP_CONNECT_WAIT */
  1919. release_c:
  1920. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  1921. end_no_deref:
  1922. end_no_conn:
  1923. return n;
  1924. }
  1925. /** sends on an existing tcpconn and auto-dec. con. ref counter.
  1926. * As opposed to tcp_send(), this function requires an existing
  1927. * tcp connection.
  1928. * WARNING: the tcp_connection will be de-referenced.
  1929. * @param c - existing tcp connection pointer.
  1930. * @param buf - data to be sent.
  1931. * @param len - data length,
  1932. * @return >=0 on success, -1 on error.
  1933. */
  1934. static int tcpconn_send_put(struct tcp_connection* c, const char* buf,
  1935. unsigned len, snd_flags_t send_flags)
  1936. {
  1937. struct tcp_connection *tmp;
  1938. int fd;
  1939. long response[2];
  1940. int n;
  1941. int do_close_fd;
  1942. #ifdef USE_TLS
  1943. const char* rest_buf;
  1944. const char* t_buf;
  1945. unsigned rest_len, t_len;
  1946. long resp;
  1947. snd_flags_t t_send_flags;
  1948. #endif /* USE_TLS */
  1949. #ifdef TCP_FD_CACHE
  1950. struct fd_cache_entry* fd_cache_e;
  1951. int use_fd_cache;
  1952. use_fd_cache=cfg_get(tcp, tcp_cfg, fd_cache);
  1953. fd_cache_e=0;
  1954. #endif /* TCP_FD_CACHE */
  1955. do_close_fd=1; /* close the fd on exit */
  1956. response[1] = CONN_NOP;
  1957. #ifdef TCP_ASYNC
  1958. /* if data is already queued, we don't need the fd */
  1959. #ifdef TCP_CONNECT_WAIT
  1960. if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
  1961. (_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)) ))
  1962. #else /* ! TCP_CONNECT_WAIT */
  1963. if (unlikely(cfg_get(tcp, tcp_cfg, async) && (_wbufq_non_empty(c)) ))
  1964. #endif /* TCP_CONNECT_WAIT */
  1965. {
  1966. lock_get(&c->write_lock);
  1967. #ifdef TCP_CONNECT_WAIT
  1968. if (likely(_wbufq_non_empty(c) || (c->flags&F_CONN_PENDING)))
  1969. #else /* ! TCP_CONNECT_WAIT */
  1970. if (likely(_wbufq_non_empty(c)))
  1971. #endif /* TCP_CONNECT_WAIT */
  1972. {
  1973. do_close_fd=0;
  1974. #ifdef USE_TLS
  1975. if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
  1976. t_buf = buf;
  1977. t_len = len;
  1978. do {
  1979. t_send_flags = send_flags;
  1980. n = tls_encode(c, &t_buf, &t_len,
  1981. &rest_buf, &rest_len,
  1982. &t_send_flags);
  1983. if (unlikely((n < 0) || (t_len &&
  1984. (_wbufq_add(c, t_buf, t_len) < 0)))) {
  1985. lock_release(&c->write_lock);
  1986. n=-1;
  1987. response[1] = CONN_ERROR;
  1988. c->state=S_CONN_BAD;
  1989. c->timeout=get_ticks_raw(); /* force timeout */
  1990. goto error;
  1991. }
  1992. t_buf = rest_buf;
  1993. t_len = rest_len;
  1994. } while(unlikely(rest_len && n > 0));
  1995. } else
  1996. #endif /* USE_TLS */
  1997. if (unlikely(len && (_wbufq_add(c, buf, len)<0))){
  1998. lock_release(&c->write_lock);
  1999. n=-1;
  2000. response[1] = CONN_ERROR;
  2001. c->state=S_CONN_BAD;
  2002. c->timeout=get_ticks_raw(); /* force timeout */
  2003. goto error;
  2004. }
  2005. n=len;
  2006. lock_release(&c->write_lock);
  2007. goto release_c;
  2008. }
  2009. lock_release(&c->write_lock);
  2010. }
  2011. #endif /* TCP_ASYNC */
  2012. /* check if this is not the same reader process holding
  2013. * c and if so send directly on c->fd */
  2014. if (c->reader_pid==my_pid()){
  2015. LM_DBG("send from reader (%d (%d)), reusing fd\n",
  2016. my_pid(), process_no);
  2017. fd=c->fd;
  2018. do_close_fd=0; /* don't close the fd on exit, it's in use */
  2019. #ifdef TCP_FD_CACHE
  2020. use_fd_cache=0; /* don't cache: problems would arise due to the
  2021. close() on cache eviction (if the fd is still
  2022. used). If it has to be cached then dup() _must_
  2023. be used */
  2024. }else if (likely(use_fd_cache &&
  2025. ((fd_cache_e=tcp_fd_cache_get(c))!=0))){
  2026. fd=fd_cache_e->fd;
  2027. do_close_fd=0;
  2028. LM_DBG("found fd in cache (%d, %p, %d)\n", fd, c, fd_cache_e->id);
  2029. #endif /* TCP_FD_CACHE */
  2030. }else{
  2031. LM_DBG("tcp connection found (%p), acquiring fd\n", c);
  2032. /* get the fd */
  2033. response[0]=(long)c;
  2034. response[1]=CONN_GET_FD;
  2035. n=send_all(unix_tcp_sock, response, sizeof(response));
  2036. if (unlikely(n<=0)){
  2037. LM_ERR("failed to get fd(write):%s (%d)\n", strerror(errno), errno);
  2038. n=-1;
  2039. goto release_c;
  2040. }
  2041. LM_DBG("c=%p, n=%d\n", c, n);
  2042. n=receive_fd(unix_tcp_sock, &tmp, sizeof(tmp), &fd, MSG_WAITALL);
  2043. if (unlikely(n<=0)){
  2044. LM_ERR("failed to get fd(receive_fd): %s (%d)\n",
  2045. strerror(errno), errno);
  2046. n=-1;
  2047. do_close_fd=0;
  2048. goto release_c;
  2049. }
  2050. /* handle fd closed or bad connection/error
  2051. (it's possible that this happened in the time between
  2052. we found the intial connection and the time when we get
  2053. the fd)
  2054. */
  2055. if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
  2056. if (unlikely(c!=tmp && tmp!=0))
  2057. BUG("tcp_send: get_fd: got different connection:"
  2058. " %p (id= %d, refcnt=%d state=%d) != "
  2059. " %p (n=%d)\n",
  2060. c, c->id, atomic_get(&c->refcnt), c->state,
  2061. tmp, n
  2062. );
  2063. n=-1; /* fail */
  2064. /* don't cache fd & close it */
  2065. do_close_fd = (fd==-1)?0:1;
  2066. #ifdef TCP_FD_CACHE
  2067. use_fd_cache = 0;
  2068. #endif /* TCP_FD_CACHE */
  2069. goto end;
  2070. }
  2071. LM_DBG("after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
  2072. }
  2073. #ifdef USE_TLS
  2074. if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS)) {
  2075. /* for TLS the TLS processing and the send must happen
  2076. atomically w/ respect to other sends on the same connection
  2077. (otherwise reordering might occur which would break TLS) =>
  2078. lock.
  2079. */
  2080. response[1] = CONN_NOP;
  2081. t_buf = buf;
  2082. t_len = len;
  2083. lock_get(&c->write_lock);
  2084. do {
  2085. t_send_flags = send_flags;
  2086. n = tls_encode(c, &t_buf, &t_len, &rest_buf, &rest_len,
  2087. &t_send_flags);
  2088. if (likely(n > 0)) {
  2089. n = tcpconn_do_send(fd, c, t_buf, t_len, t_send_flags,
  2090. &resp, 1);
  2091. if (likely(response[1] != CONN_QUEUED_WRITE ||
  2092. resp == CONN_ERROR))
  2093. /* don't overwrite a previous CONN_QUEUED_WRITE
  2094. unless error */
  2095. response[1] = resp;
  2096. } else if (unlikely(n < 0)) {
  2097. response[1] = CONN_ERROR;
  2098. break;
  2099. }
  2100. /* else do nothing for n (t_len) == 0, keep
  2101. the last reponse */
  2102. t_buf = rest_buf;
  2103. t_len = rest_len;
  2104. } while(unlikely(rest_len && n > 0));
  2105. lock_release(&c->write_lock);
  2106. } else
  2107. #endif
  2108. n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 0);
  2109. if (unlikely(response[1] != CONN_NOP)) {
  2110. error:
  2111. response[0]=(long)c;
  2112. if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
  2113. BUG("tcp_main command %ld sending failed (write):%s (%d)\n",
  2114. response[1], strerror(errno), errno);
  2115. /* all commands != CONN_NOP returned by tcpconn_do_send()
  2116. (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
  2117. => if sending the command fails we have to dec. refcnt by hand
  2118. */
  2119. tcpconn_chld_put(c); /* deref. it manually */
  2120. n=-1;
  2121. }
  2122. /* here refcnt for c is already decremented => c contents can no
  2123. longer be used and refcnt _must_ _not_ be decremented again
  2124. on exit */
  2125. if (unlikely(n < 0 || response[1] == CONN_EOF)) {
  2126. /* on error or eof, remove from cache or close fd */
  2127. #ifdef TCP_FD_CACHE
  2128. if (unlikely(fd_cache_e)){
  2129. tcp_fd_cache_rm(fd_cache_e);
  2130. fd_cache_e = 0;
  2131. tcp_safe_close(fd);
  2132. }else
  2133. #endif /* TCP_FD_CACHE */
  2134. if (do_close_fd) tcp_safe_close(fd);
  2135. } else if (response[1] == CONN_QUEUED_WRITE) {
  2136. #ifdef TCP_FD_CACHE
  2137. if (unlikely((fd_cache_e==0) && use_fd_cache)){
  2138. tcp_fd_cache_add(c, fd);
  2139. }else
  2140. #endif /* TCP_FD_CACHE */
  2141. if (do_close_fd) tcp_safe_close(fd);
  2142. } else {
  2143. BUG("unexpected tcpconn_do_send() return & response: %d, %ld\n",
  2144. n, response[1]);
  2145. }
  2146. return n; /* no tcpconn_put */
  2147. }
  2148. end:
  2149. #ifdef TCP_FD_CACHE
  2150. if (unlikely((fd_cache_e==0) && use_fd_cache)){
  2151. tcp_fd_cache_add(c, fd);
  2152. }else
  2153. #endif /* TCP_FD_CACHE */
  2154. if (do_close_fd) {
  2155. if (unlikely(tcp_safe_close(fd) < 0))
  2156. LM_ERR("closing temporary send fd for %p: %s: "
  2157. "close(%d) failed (flags 0x%x): %s (%d)\n", c,
  2158. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  2159. fd, c->flags, strerror(errno), errno);
  2160. }
  2161. /* here we can have only commands that _do_ _not_ dec refcnt.
  2162. (CONN_EOF, CON_ERROR, CON_QUEUED_WRITE are all treated above) */
  2163. release_c:
  2164. tcpconn_chld_put(c); /* release c (dec refcnt & free on 0) */
  2165. return n;
  2166. }
  2167. /* unsafe send on a known tcp connection.
  2168. * Directly send on a known tcp connection with a given fd.
  2169. * It is assumed that the connection locks are already held.
  2170. * Side effects: if needed it will send state update commands to
  2171. * tcp_main (e.g. CON_EOF, CON_ERROR, CON_QUEUED_WRITE).
  2172. * @param fd - fd used for sending.
  2173. * @param c - existing tcp connection pointer (state and flags might be
  2174. * changed).
  2175. * @param buf - data to be sent.
  2176. * @param len - data length.
  2177. * @param send_flags
  2178. * @return <0 on error, number of bytes sent on success.
  2179. */
  2180. int tcpconn_send_unsafe(int fd, struct tcp_connection *c,
  2181. const char* buf, unsigned len, snd_flags_t send_flags)
  2182. {
  2183. int n;
  2184. long response[2];
  2185. n = tcpconn_do_send(fd, c, buf, len, send_flags, &response[1], 1);
  2186. if (unlikely(response[1] != CONN_NOP)) {
  2187. /* all commands != CONN_NOP returned by tcpconn_do_send()
  2188. (CONN_EOF, CONN_ERROR, CONN_QUEUED_WRITE) will auto-dec refcnt
  2189. => increment it (we don't want the connection to be destroyed
  2190. from under us)
  2191. */
  2192. atomic_inc(&c->refcnt);
  2193. response[0]=(long)c;
  2194. if (send_all(unix_tcp_sock, response, sizeof(response)) <= 0) {
  2195. BUG("connection %p command %ld sending failed (write):%s (%d)\n",
  2196. c, response[1], strerror(errno), errno);
  2197. /* send failed => deref. it back by hand */
  2198. tcpconn_chld_put(c);
  2199. n=-1;
  2200. }
  2201. /* here refcnt for c is already decremented => c contents can no
  2202. longer be used and refcnt _must_ _not_ be decremented again
  2203. on exit */
  2204. return n;
  2205. }
  2206. return n;
  2207. }
  2208. /** lower level send (connection and fd should be known).
  2209. * It takes care of possible write-queueing, blacklisting a.s.o.
  2210. * It expects a valid tcp connection. It doesn't touch the ref. cnts.
  2211. * It will also set the connection flags from send_flags (it's better
  2212. * to do it here, because it's guaranteed to be under lock).
  2213. * @param fd - fd used for sending.
  2214. * @param c - existing tcp connection pointer (state and flags might be
  2215. * changed).
  2216. * @param buf - data to be sent.
  2217. * @param len - data length.
  2218. * @param send_flags
  2219. * @param resp - filled with a cmd. for tcp_main:
  2220. * CONN_NOP - nothing needs to be done (do not send
  2221. * anything to tcp_main).
  2222. * CONN_ERROR - error, connection should be closed.
  2223. * CONN_EOF - no error, but connection should be closed.
  2224. * CONN_QUEUED_WRITE - new write queue (connection
  2225. * should be watched for write and the wr.
  2226. * queue flushed).
  2227. * @param locked - if set assume the connection is already locked (call from
  2228. * tls) and do not lock/unlock the connection.
  2229. * @return >=0 on success, < 0 on error && *resp == CON_ERROR.
  2230. *
  2231. */
  2232. static int tcpconn_do_send(int fd, struct tcp_connection* c,
  2233. const char* buf, unsigned len,
  2234. snd_flags_t send_flags, long* resp,
  2235. int locked)
  2236. {
  2237. int n;
  2238. #ifdef TCP_ASYNC
  2239. int enable_write_watch;
  2240. #endif /* TCP_ASYNC */
  2241. LM_DBG("sending...\n");
  2242. *resp = CONN_NOP;
  2243. if (likely(!locked)) lock_get(&c->write_lock);
  2244. /* update connection send flags with the current ones */
  2245. tcpconn_set_send_flags(c, send_flags);
  2246. #ifdef TCP_ASYNC
  2247. if (likely(cfg_get(tcp, tcp_cfg, async))){
  2248. if (_wbufq_non_empty(c)
  2249. #ifdef TCP_CONNECT_WAIT
  2250. || (c->flags&F_CONN_PENDING)
  2251. #endif /* TCP_CONNECT_WAIT */
  2252. ){
  2253. if (unlikely(_wbufq_add(c, buf, len)<0)){
  2254. if (likely(!locked)) lock_release(&c->write_lock);
  2255. n=-1;
  2256. goto error;
  2257. }
  2258. if (likely(!locked)) lock_release(&c->write_lock);
  2259. n=len;
  2260. goto end;
  2261. }
  2262. n=_tcpconn_write_nb(fd, c, buf, len);
  2263. }else{
  2264. #endif /* TCP_ASYNC */
  2265. /* n=tcp_blocking_write(c, fd, buf, len); */
  2266. n=tsend_stream(fd, buf, len,
  2267. TICKS_TO_S(cfg_get(tcp, tcp_cfg, send_timeout)) *
  2268. 1000);
  2269. #ifdef TCP_ASYNC
  2270. }
  2271. #else /* ! TCP_ASYNC */
  2272. if (likely(!locked)) lock_release(&c->write_lock);
  2273. #endif /* TCP_ASYNC */
  2274. LM_DBG("after real write: c= %p n=%d fd=%d\n",c, n, fd);
  2275. LM_DBG("buf=\n%.*s\n", (int)len, buf);
  2276. if (unlikely(n<(int)len)){
  2277. #ifdef TCP_ASYNC
  2278. if (cfg_get(tcp, tcp_cfg, async) &&
  2279. ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK)){
  2280. enable_write_watch=_wbufq_empty(c);
  2281. if (n<0) n=0;
  2282. else if (unlikely(c->state==S_CONN_CONNECT ||
  2283. c->state==S_CONN_ACCEPT)){
  2284. TCP_STATS_ESTABLISHED(c->state);
  2285. c->state=S_CONN_OK; /* something was written */
  2286. }
  2287. if (unlikely(_wbufq_add(c, buf+n, len-n)<0)){
  2288. if (likely(!locked)) lock_release(&c->write_lock);
  2289. n=-1;
  2290. goto error;
  2291. }
  2292. if (likely(!locked)) lock_release(&c->write_lock);
  2293. n=len;
  2294. if (likely(enable_write_watch))
  2295. *resp=CONN_QUEUED_WRITE;
  2296. goto end;
  2297. }else{
  2298. if (likely(!locked)) lock_release(&c->write_lock);
  2299. }
  2300. #endif /* TCP_ASYNC */
  2301. if (unlikely(c->state==S_CONN_CONNECT)){
  2302. switch(errno){
  2303. case ENETUNREACH:
  2304. case EHOSTUNREACH: /* not posix for send() */
  2305. #ifdef USE_DST_BLACKLIST
  2306. dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
  2307. &c->rcv.src_su, &c->send_flags, 0);
  2308. #endif /* USE_DST_BLACKLIST */
  2309. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  2310. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  2311. break;
  2312. case ECONNREFUSED:
  2313. case ECONNRESET:
  2314. #ifdef USE_DST_BLACKLIST
  2315. dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto,
  2316. &c->rcv.src_su, &c->send_flags, 0);
  2317. #endif /* USE_DST_BLACKLIST */
  2318. TCP_EV_CONNECT_RST(errno, TCP_LADDR(c), TCP_LPORT(c),
  2319. TCP_PSU(c), TCP_PROTO(c));
  2320. break;
  2321. default:
  2322. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c), TCP_LPORT(c),
  2323. TCP_PSU(c), TCP_PROTO(c));
  2324. }
  2325. TCP_STATS_CONNECT_FAILED();
  2326. }else{
  2327. switch(errno){
  2328. case ECONNREFUSED:
  2329. case ECONNRESET:
  2330. TCP_STATS_CON_RESET();
  2331. /* no break */
  2332. case ENETUNREACH:
  2333. /*case EHOSTUNREACH: -- not posix */
  2334. #ifdef USE_DST_BLACKLIST
  2335. dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto,
  2336. &c->rcv.src_su, &c->send_flags, 0);
  2337. #endif /* USE_DST_BLACKLIST */
  2338. break;
  2339. }
  2340. }
  2341. LM_ERR("failed to send on %p (%s:%d->%s): %s (%d)\n",
  2342. c, ip_addr2a(&c->rcv.dst_ip), c->rcv.dst_port,
  2343. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  2344. strerror(errno), errno);
  2345. n = -1;
  2346. #ifdef TCP_ASYNC
  2347. error:
  2348. #endif /* TCP_ASYNC */
  2349. /* error on the connection , mark it as bad and set 0 timeout */
  2350. c->state=S_CONN_BAD;
  2351. c->timeout=get_ticks_raw();
  2352. /* tell "main" it should drop this (optional it will t/o anyway?)*/
  2353. *resp=CONN_ERROR;
  2354. return n; /* error return, no tcpconn_put */
  2355. }
  2356. #ifdef TCP_ASYNC
  2357. if (likely(!locked)) lock_release(&c->write_lock);
  2358. #endif /* TCP_ASYNC */
  2359. /* in non-async mode here we're either in S_CONN_OK or S_CONN_ACCEPT*/
  2360. if (unlikely(c->state==S_CONN_CONNECT || c->state==S_CONN_ACCEPT)){
  2361. TCP_STATS_ESTABLISHED(c->state);
  2362. c->state=S_CONN_OK;
  2363. }
  2364. if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
  2365. /* close after write => send EOF request to tcp_main */
  2366. c->state=S_CONN_BAD;
  2367. c->timeout=get_ticks_raw();
  2368. /* tell "main" it should drop this*/
  2369. *resp=CONN_EOF;
  2370. return n;
  2371. }
  2372. end:
  2373. return n;
  2374. }
  2375. /** low level 1st send on a new connection.
  2376. * It takes care of possible write-queueing, blacklisting a.s.o.
  2377. * It expects a valid just-opened tcp connection. It doesn't touch the
  2378. * ref. counters. It's used only in the async first send case.
  2379. * @param fd - fd used for sending.
  2380. * @param c - existing tcp connection pointer (state and flags might be
  2381. * changed). The connection must be new (no previous send on it).
  2382. * @param buf - data to be sent.
  2383. * @param len - data length.
  2384. * @param send_flags
  2385. * @param resp - filled with a fd sending cmd. for tcp_main on success. It
  2386. * _must_ be one of the commands listed below:
  2387. * CONN_NEW_PENDING_WRITE - new connection, first write
  2388. * was partially successful (or EAGAIN) and
  2389. * was queued (connection should be watched
  2390. * for write and the write queue flushed).
  2391. * The fd should be sent to tcp_main.
  2392. * CONN_NEW_COMPLETE - new connection, first write
  2393. * completed successfully and no data is
  2394. * queued. The fd should be sent to tcp_main.
  2395. * CONN_EOF - no error, but the connection should be
  2396. * closed (e.g. SND_F_CON_CLOSE send flag).
  2397. * CONN_ERROR - error, _must_ return < 0.
  2398. * @param locked - if set assume the connection is already locked (call from
  2399. * tls) and do not lock/unlock the connection.
  2400. * @return >=0 on success, < 0 on error (on error *resp is undefined).
  2401. *
  2402. */
  2403. static int tcpconn_1st_send(int fd, struct tcp_connection* c,
  2404. const char* buf, unsigned len,
  2405. snd_flags_t send_flags, long* resp,
  2406. int locked)
  2407. {
  2408. int n;
  2409. n=_tcpconn_write_nb(fd, c, buf, len);
  2410. if (unlikely(n<(int)len)){
  2411. /* on EAGAIN or ENOTCONN return success.
  2412. ENOTCONN appears on newer FreeBSD versions (non-blocking socket,
  2413. connect() & send immediately) */
  2414. if ((n>=0) || errno==EAGAIN || errno==EWOULDBLOCK || errno==ENOTCONN){
  2415. LM_DBG("pending write on new connection %p "
  2416. "(%d/%d bytes written)\n", c, n, len);
  2417. if (unlikely(n<0)) n=0;
  2418. else{
  2419. if (likely(c->state == S_CONN_CONNECT))
  2420. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  2421. c->state=S_CONN_OK; /* partial write => connect()
  2422. ended */
  2423. }
  2424. /* add to the write queue */
  2425. if (likely(!locked)) lock_get(&c->write_lock);
  2426. if (unlikely(_wbufq_insert(c, buf+n, len-n)<0)){
  2427. if (likely(!locked)) lock_release(&c->write_lock);
  2428. n=-1;
  2429. LM_ERR("%s: EAGAIN and write queue full or failed for %p\n",
  2430. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)), c);
  2431. goto error;
  2432. }
  2433. if (likely(!locked)) lock_release(&c->write_lock);
  2434. /* send to tcp_main */
  2435. *resp=CONN_NEW_PENDING_WRITE;
  2436. n=len;
  2437. goto end;
  2438. }
  2439. /* n < 0 and not EAGAIN => write error */
  2440. /* if first write failed it's most likely a
  2441. connect error */
  2442. switch(errno){
  2443. case ENETUNREACH:
  2444. case EHOSTUNREACH: /* not posix for send() */
  2445. #ifdef USE_DST_BLACKLIST
  2446. dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
  2447. &c->rcv.src_su, &c->send_flags, 0);
  2448. #endif /* USE_DST_BLACKLIST */
  2449. TCP_EV_CONNECT_UNREACHABLE(errno, TCP_LADDR(c),
  2450. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  2451. break;
  2452. case ECONNREFUSED:
  2453. case ECONNRESET:
  2454. #ifdef USE_DST_BLACKLIST
  2455. dst_blacklist_su( BLST_ERR_CONNECT, c->rcv.proto,
  2456. &c->rcv.src_su, &c->send_flags, 0);
  2457. #endif /* USE_DST_BLACKLIST */
  2458. TCP_EV_CONNECT_RST(errno, TCP_LADDR(c),
  2459. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  2460. break;
  2461. default:
  2462. TCP_EV_CONNECT_ERR(errno, TCP_LADDR(c),
  2463. TCP_LPORT(c), TCP_PSU(c), TCP_PROTO(c));
  2464. }
  2465. /* error: destroy it directly */
  2466. TCP_STATS_CONNECT_FAILED();
  2467. LM_ERR("%s: connect & send for %p failed:" " %s (%d)\n",
  2468. su2a(&c->rcv.src_su, sizeof(c->rcv.src_su)),
  2469. c, strerror(errno), errno);
  2470. goto error;
  2471. }
  2472. LM_INFO("quick connect for %p\n", c);
  2473. if (likely(c->state == S_CONN_CONNECT))
  2474. TCP_STATS_ESTABLISHED(S_CONN_CONNECT);
  2475. if (unlikely(send_flags.f & SND_F_CON_CLOSE)){
  2476. /* close after write => EOF => close immediately */
  2477. c->state=S_CONN_BAD;
  2478. /* tell our caller that it should drop this*/
  2479. *resp=CONN_EOF;
  2480. }else{
  2481. c->state=S_CONN_OK;
  2482. /* send to tcp_main */
  2483. *resp=CONN_NEW_COMPLETE;
  2484. }
  2485. end:
  2486. return n; /* >= 0 */
  2487. error:
  2488. *resp=CONN_ERROR;
  2489. return -1;
  2490. }
  2491. int tcp_init(struct socket_info* sock_info)
  2492. {
  2493. union sockaddr_union* addr;
  2494. int optval;
  2495. #ifdef HAVE_TCP_ACCEPT_FILTER
  2496. struct accept_filter_arg afa;
  2497. #endif /* HAVE_TCP_ACCEPT_FILTER */
  2498. #ifdef DISABLE_NAGLE
  2499. int flag;
  2500. struct protoent* pe;
  2501. if (tcp_proto_no==-1){ /* if not already set */
  2502. pe=getprotobyname("tcp");
  2503. if (pe==0){
  2504. LM_ERR("could not get TCP protocol number\n");
  2505. tcp_proto_no=-1;
  2506. }else{
  2507. tcp_proto_no=pe->p_proto;
  2508. }
  2509. }
  2510. #endif
  2511. addr=&sock_info->su;
  2512. /* sock_info->proto=PROTO_TCP; */
  2513. if (init_su(addr, &sock_info->address, sock_info->port_no)<0){
  2514. LM_ERR("could no init sockaddr_union\n");
  2515. goto error;
  2516. }
  2517. LM_DBG("added %s\n", su2a(addr, sizeof(*addr)));
  2518. sock_info->socket=socket(AF2PF(addr->s.sa_family), SOCK_STREAM, 0);
  2519. if (sock_info->socket==-1){
  2520. LM_ERR("tcp_init: socket: %s\n", strerror(errno));
  2521. goto error;
  2522. }
  2523. #ifdef DISABLE_NAGLE
  2524. flag=1;
  2525. if ( (tcp_proto_no!=-1) &&
  2526. (setsockopt(sock_info->socket, tcp_proto_no , TCP_NODELAY,
  2527. &flag, sizeof(flag))<0) ){
  2528. LM_ERR("could not disable Nagle: %s\n", strerror(errno));
  2529. }
  2530. #endif
  2531. #if !defined(TCP_DONT_REUSEADDR)
  2532. /* Stevens, "Network Programming", Section 7.5, "Generic Socket
  2533. * Options": "...server started,..a child continues..on existing
  2534. * connection..listening server is restarted...call to bind fails
  2535. * ... ALL TCP servers should specify the SO_REUSEADDRE option
  2536. * to allow the server to be restarted in this situation
  2537. *
  2538. * Indeed, without this option, the server can't restart.
  2539. * -jiri
  2540. */
  2541. optval=1;
  2542. if (setsockopt(sock_info->socket, SOL_SOCKET, SO_REUSEADDR,
  2543. (void*)&optval, sizeof(optval))==-1) {
  2544. LM_ERR("setsockopt %s\n", strerror(errno));
  2545. goto error;
  2546. }
  2547. #endif
  2548. /* tos */
  2549. optval = tos;
  2550. if (setsockopt(sock_info->socket, IPPROTO_IP, IP_TOS, (void*)&optval,
  2551. sizeof(optval)) ==-1){
  2552. LM_WARN("setsockopt tos: %s\n", strerror(errno));
  2553. /* continue since this is not critical */
  2554. }
  2555. #ifdef HAVE_TCP_DEFER_ACCEPT
  2556. /* linux only */
  2557. if ((optval=cfg_get(tcp, tcp_cfg, defer_accept))){
  2558. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_DEFER_ACCEPT,
  2559. (void*)&optval, sizeof(optval)) ==-1){
  2560. LM_WARN("setsockopt TCP_DEFER_ACCEPT %s\n", strerror(errno));
  2561. /* continue since this is not critical */
  2562. }
  2563. }
  2564. #endif /* HAVE_TCP_DEFFER_ACCEPT */
  2565. #ifdef HAVE_TCP_SYNCNT
  2566. if ((optval=cfg_get(tcp, tcp_cfg, syncnt))){
  2567. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_SYNCNT, &optval,
  2568. sizeof(optval))<0){
  2569. LM_WARN("failed to set maximum SYN retr. count: %s\n", strerror(errno));
  2570. }
  2571. }
  2572. #endif
  2573. #ifdef HAVE_TCP_LINGER2
  2574. if ((optval=cfg_get(tcp, tcp_cfg, linger2))){
  2575. if (setsockopt(sock_info->socket, IPPROTO_TCP, TCP_LINGER2, &optval,
  2576. sizeof(optval))<0){
  2577. LM_WARN("failed to set maximum LINGER2 timeout: %s\n", strerror(errno));
  2578. }
  2579. }
  2580. #endif
  2581. init_sock_keepalive(sock_info->socket);
  2582. if (bind(sock_info->socket, &addr->s, sockaddru_len(*addr))==-1){
  2583. LM_ERR("bind(%x, %p, %d) on %s:%d : %s\n",
  2584. sock_info->socket, &addr->s,
  2585. (unsigned)sockaddru_len(*addr),
  2586. sock_info->address_str.s,
  2587. sock_info->port_no,
  2588. strerror(errno));
  2589. goto error;
  2590. }
  2591. if (listen(sock_info->socket, TCP_LISTEN_BACKLOG)==-1){
  2592. LM_ERR("listen(%x, %p, %d) on %s: %s\n",
  2593. sock_info->socket, &addr->s,
  2594. (unsigned)sockaddru_len(*addr),
  2595. sock_info->address_str.s,
  2596. strerror(errno));
  2597. goto error;
  2598. }
  2599. #ifdef HAVE_TCP_ACCEPT_FILTER
  2600. /* freebsd */
  2601. if (cfg_get(tcp, tcp_cfg, defer_accept)){
  2602. memset(&afa, 0, sizeof(afa));
  2603. strcpy(afa.af_name, "dataready");
  2604. if (setsockopt(sock_info->socket, SOL_SOCKET, SO_ACCEPTFILTER,
  2605. (void*)&afa, sizeof(afa)) ==-1){
  2606. LM_WARN("setsockopt SO_ACCEPTFILTER %s\n",
  2607. strerror(errno));
  2608. /* continue since this is not critical */
  2609. }
  2610. }
  2611. #endif /* HAVE_TCP_ACCEPT_FILTER */
  2612. return 0;
  2613. error:
  2614. if (sock_info->socket!=-1){
  2615. tcp_safe_close(sock_info->socket);
  2616. sock_info->socket=-1;
  2617. }
  2618. return -1;
  2619. }
  2620. /* close tcp_main's fd from a tcpconn
  2621. * WARNING: call only in tcp_main context */
  2622. inline static void tcpconn_close_main_fd(struct tcp_connection* tcpconn)
  2623. {
  2624. int fd;
  2625. fd=tcpconn->s;
  2626. #ifdef USE_TLS
  2627. if (tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS)
  2628. tls_close(tcpconn, fd);
  2629. #endif
  2630. #ifdef TCP_FD_CACHE
  2631. if (likely(cfg_get(tcp, tcp_cfg, fd_cache))) shutdown(fd, SHUT_RDWR);
  2632. #endif /* TCP_FD_CACHE */
  2633. if (unlikely(tcp_safe_close(fd)<0))
  2634. LM_ERR("(%p): %s close(%d) failed (flags 0x%x): %s (%d)\n", tcpconn,
  2635. su2a(&tcpconn->rcv.src_su, sizeof(tcpconn->rcv.src_su)),
  2636. fd, tcpconn->flags, strerror(errno), errno);
  2637. tcpconn->s=-1;
  2638. }
  2639. /* dec refcnt & frees the connection if refcnt==0
  2640. * returns 1 if the connection is freed, 0 otherwise
  2641. *
  2642. * WARNING: use only from child processes */
  2643. inline static int tcpconn_chld_put(struct tcp_connection* tcpconn)
  2644. {
  2645. if (unlikely(atomic_dec_and_test(&tcpconn->refcnt))){
  2646. LM_DBG("destroying connection %p (%d, %d) flags %04x\n",
  2647. tcpconn, tcpconn->id, tcpconn->s, tcpconn->flags);
  2648. /* sanity checks */
  2649. membar_read_atomic_op(); /* make sure we see the current flags */
  2650. if (unlikely(!(tcpconn->flags & F_CONN_FD_CLOSED) ||
  2651. (tcpconn->flags &
  2652. (F_CONN_HASHED|F_CONN_MAIN_TIMER|
  2653. F_CONN_READ_W|F_CONN_WRITE_W)) )){
  2654. LM_CRIT("%p bad flags = %0x\n", tcpconn, tcpconn->flags);
  2655. abort();
  2656. }
  2657. _tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
  2658. return 1;
  2659. }
  2660. return 0;
  2661. }
  2662. /* simple destroy function (the connection should be already removed
  2663. * from the hashes. refcnt 0 and the fds should not be watched anymore for IO)
  2664. */
  2665. inline static void tcpconn_destroy(struct tcp_connection* tcpconn)
  2666. {
  2667. LM_DBG("destroying connection %p (%d, %d) flags %04x\n",
  2668. tcpconn, tcpconn->id, tcpconn->s, tcpconn->flags);
  2669. if (unlikely(tcpconn->flags & F_CONN_HASHED)){
  2670. LM_CRIT("called with hashed connection (%p)\n", tcpconn);
  2671. /* try to continue */
  2672. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
  2673. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2674. TCPCONN_LOCK;
  2675. _tcpconn_detach(tcpconn);
  2676. tcpconn->flags &= ~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
  2677. TCPCONN_UNLOCK;
  2678. }
  2679. if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
  2680. tcpconn_close_main_fd(tcpconn);
  2681. tcpconn->flags|=F_CONN_FD_CLOSED;
  2682. (*tcp_connections_no)--;
  2683. if (unlikely(tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS))
  2684. (*tls_connections_no)--;
  2685. }
  2686. _tcpconn_free(tcpconn); /* destroys also the wbuf_q if still present*/
  2687. }
  2688. /* tries to destroy the connection: dec. refcnt and if 0 destroys the
  2689. * connection, else it will mark it as BAD and close the main fds
  2690. *
  2691. * returns 1 if the connection was destroyed, 0 otherwise
  2692. *
  2693. * WARNING: - the connection _has_ to be removed from the hash and timer
  2694. * first (use tcpconn_try_unhash() for this )
  2695. * - the fd should not be watched anymore (io_watch_del()...)
  2696. * - must be called _only_ from the tcp_main process context
  2697. * (or else the fd will remain open)
  2698. */
  2699. inline static int tcpconn_put_destroy(struct tcp_connection* tcpconn)
  2700. {
  2701. if (unlikely((tcpconn->flags &
  2702. (F_CONN_WRITE_W|F_CONN_HASHED|F_CONN_MAIN_TIMER|F_CONN_READ_W)) )){
  2703. /* sanity check */
  2704. if (unlikely(tcpconn->flags & F_CONN_HASHED)){
  2705. LM_CRIT("called with hashed and/or"
  2706. "on timer connection (%p), flags = %0x\n",
  2707. tcpconn, tcpconn->flags);
  2708. /* try to continue */
  2709. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER))
  2710. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2711. TCPCONN_LOCK;
  2712. _tcpconn_detach(tcpconn);
  2713. tcpconn->flags &= ~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
  2714. TCPCONN_UNLOCK;
  2715. }else{
  2716. LM_CRIT("%p flags = %0x\n", tcpconn, tcpconn->flags);
  2717. }
  2718. }
  2719. tcpconn->state=S_CONN_BAD;
  2720. /* in case it's still in a reader timer */
  2721. tcpconn->timeout=get_ticks_raw();
  2722. /* fast close: close fds now */
  2723. if (likely(!(tcpconn->flags & F_CONN_FD_CLOSED))){
  2724. tcpconn_close_main_fd(tcpconn);
  2725. tcpconn->flags|=F_CONN_FD_CLOSED;
  2726. (*tcp_connections_no)--;
  2727. if (unlikely(tcpconn->type==PROTO_TLS || tcpconn->type==PROTO_WSS))
  2728. (*tls_connections_no)--;
  2729. }
  2730. /* all the flags / ops on the tcpconn must be done prior to decrementing
  2731. * the refcnt. and at least a membar_write_atomic_op() mem. barrier or
  2732. * a mb_atomic_* op must * be used to make sure all the changed flags are
  2733. * written into memory prior to the new refcnt value */
  2734. if (unlikely(mb_atomic_dec_and_test(&tcpconn->refcnt))){
  2735. _tcpconn_free(tcpconn);
  2736. return 1;
  2737. }
  2738. return 0;
  2739. }
  2740. /* try to remove a connection from the hashes and timer.
  2741. * returns 1 if the connection was removed, 0 if not (connection not in
  2742. * hash)
  2743. *
  2744. * WARNING: call it only in the tcp_main process context or else the
  2745. * timer removal won't work.
  2746. */
  2747. inline static int tcpconn_try_unhash(struct tcp_connection* tcpconn)
  2748. {
  2749. if (likely(tcpconn->flags & F_CONN_HASHED)){
  2750. tcpconn->state=S_CONN_BAD;
  2751. if (likely(tcpconn->flags & F_CONN_MAIN_TIMER)){
  2752. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  2753. tcpconn->flags&=~F_CONN_MAIN_TIMER;
  2754. }else
  2755. /* in case it's still in a reader timer */
  2756. tcpconn->timeout=get_ticks_raw();
  2757. TCPCONN_LOCK;
  2758. if (tcpconn->flags & F_CONN_HASHED){
  2759. tcpconn->flags&=~F_CONN_HASHED;
  2760. _tcpconn_detach(tcpconn);
  2761. TCPCONN_UNLOCK;
  2762. }else{
  2763. /* tcp_send was faster and did unhash it itself */
  2764. TCPCONN_UNLOCK;
  2765. return 0;
  2766. }
  2767. #ifdef TCP_ASYNC
  2768. /* empty possible write buffers (optional) */
  2769. if (unlikely(_wbufq_non_empty(tcpconn))){
  2770. lock_get(&tcpconn->write_lock);
  2771. /* check again, while holding the lock */
  2772. if (likely(_wbufq_non_empty(tcpconn)))
  2773. _wbufq_destroy(&tcpconn->wbuf_q);
  2774. lock_release(&tcpconn->write_lock);
  2775. }
  2776. #endif /* TCP_ASYNC */
  2777. return 1;
  2778. }
  2779. return 0;
  2780. }
  2781. #ifdef SEND_FD_QUEUE
  2782. struct send_fd_info{
  2783. struct tcp_connection* tcp_conn;
  2784. ticks_t expire;
  2785. int unix_sock;
  2786. unsigned int retries; /* debugging */
  2787. };
  2788. struct tcp_send_fd_q{
  2789. struct send_fd_info* data; /* buffer */
  2790. struct send_fd_info* crt; /* pointer inside the buffer */
  2791. struct send_fd_info* end; /* points after the last valid position */
  2792. };
  2793. static struct tcp_send_fd_q send2child_q;
  2794. static int send_fd_queue_init(struct tcp_send_fd_q *q, unsigned int size)
  2795. {
  2796. q->data=pkg_malloc(size*sizeof(struct send_fd_info));
  2797. if (q->data==0){
  2798. LM_ERR("out of memory\n");
  2799. return -1;
  2800. }
  2801. q->crt=&q->data[0];
  2802. q->end=&q->data[size];
  2803. return 0;
  2804. }
  2805. static void send_fd_queue_destroy(struct tcp_send_fd_q *q)
  2806. {
  2807. if (q->data){
  2808. pkg_free(q->data);
  2809. q->data=0;
  2810. q->crt=q->end=0;
  2811. }
  2812. }
  2813. static int init_send_fd_queues(void)
  2814. {
  2815. if (send_fd_queue_init(&send2child_q, SEND_FD_QUEUE_SIZE)!=0)
  2816. goto error;
  2817. return 0;
  2818. error:
  2819. LM_ERR("init failed\n");
  2820. return -1;
  2821. }
  2822. static void destroy_send_fd_queues(void)
  2823. {
  2824. send_fd_queue_destroy(&send2child_q);
  2825. }
  2826. inline static int send_fd_queue_add( struct tcp_send_fd_q* q,
  2827. int unix_sock,
  2828. struct tcp_connection *t)
  2829. {
  2830. struct send_fd_info* tmp;
  2831. unsigned long new_size;
  2832. if (q->crt>=q->end){
  2833. new_size=q->end-&q->data[0];
  2834. if (new_size< MAX_SEND_FD_QUEUE_SIZE/2){
  2835. new_size*=2;
  2836. }else new_size=MAX_SEND_FD_QUEUE_SIZE;
  2837. if (unlikely(q->crt>=&q->data[new_size])){
  2838. LM_ERR("queue full: %ld/%ld\n",
  2839. (long)(q->crt-&q->data[0]-1), new_size);
  2840. goto error;
  2841. }
  2842. LM_CRIT("queue full: %ld, extending to %ld\n",
  2843. (long)(q->end-&q->data[0]), new_size);
  2844. tmp=pkg_realloc(q->data, new_size*sizeof(struct send_fd_info));
  2845. if (unlikely(tmp==0)){
  2846. LM_ERR("out of memory\n");
  2847. goto error;
  2848. }
  2849. q->crt=(q->crt-&q->data[0])+tmp;
  2850. q->data=tmp;
  2851. q->end=&q->data[new_size];
  2852. }
  2853. q->crt->tcp_conn=t;
  2854. q->crt->unix_sock=unix_sock;
  2855. q->crt->expire=get_ticks_raw()+SEND_FD_QUEUE_TIMEOUT;
  2856. q->crt->retries=0;
  2857. q->crt++;
  2858. return 0;
  2859. error:
  2860. return -1;
  2861. }
  2862. inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
  2863. {
  2864. struct send_fd_info* p;
  2865. struct send_fd_info* t;
  2866. for (p=t=&q->data[0]; p<q->crt; p++){
  2867. if (unlikely(p->tcp_conn->state == S_CONN_BAD ||
  2868. p->tcp_conn->flags & F_CONN_FD_CLOSED ||
  2869. p->tcp_conn->s ==-1)) {
  2870. /* bad and/or already closed connection => remove */
  2871. goto rm_con;
  2872. }
  2873. if (unlikely(send_fd(p->unix_sock, &(p->tcp_conn),
  2874. sizeof(struct tcp_connection*), p->tcp_conn->s)<=0)){
  2875. if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) &&
  2876. ((s_ticks_t)(p->expire-get_ticks_raw())>0)){
  2877. /* leave in queue for a future try */
  2878. *t=*p;
  2879. t->retries++;
  2880. t++;
  2881. }else{
  2882. LM_ERR("send_fd failed on socket %d , queue entry %ld, retries %d,"
  2883. " connection %p, tcp socket %d, errno=%d (%s) \n",
  2884. p->unix_sock, (long)(p-&q->data[0]), p->retries,
  2885. p->tcp_conn, p->tcp_conn->s, errno,
  2886. strerror(errno));
  2887. rm_con:
  2888. #ifdef TCP_ASYNC
  2889. /* if a connection is on the send_fd queue it means it's
  2890. not watched for read anymore => could be watched only for
  2891. write */
  2892. if (p->tcp_conn->flags & F_CONN_WRITE_W){
  2893. io_watch_del(&io_h, p->tcp_conn->s, -1, IO_FD_CLOSING);
  2894. p->tcp_conn->flags &=~F_CONN_WRITE_W;
  2895. }
  2896. #endif
  2897. p->tcp_conn->flags &= ~F_CONN_READER;
  2898. if (likely(tcpconn_try_unhash(p->tcp_conn)))
  2899. tcpconn_put(p->tcp_conn);
  2900. tcpconn_put_destroy(p->tcp_conn); /* dec refcnt & destroy */
  2901. }
  2902. }
  2903. }
  2904. q->crt=t;
  2905. }
  2906. #else
  2907. #define send_fd_queue_run(q)
  2908. #endif
  2909. /* non blocking write() on a tcpconnection, unsafe version (should be called
  2910. * while holding c->write_lock). The fd should be non-blocking.
  2911. * returns number of bytes written on success, -1 on error (and sets errno)
  2912. */
  2913. int _tcpconn_write_nb(int fd, struct tcp_connection* c,
  2914. const char* buf, int len)
  2915. {
  2916. int n;
  2917. again:
  2918. n=send(fd, buf, len,
  2919. #ifdef HAVE_MSG_NOSIGNAL
  2920. MSG_NOSIGNAL
  2921. #else
  2922. 0
  2923. #endif /* HAVE_MSG_NOSIGNAL */
  2924. );
  2925. if (unlikely(n<0)){
  2926. if (errno==EINTR) goto again;
  2927. }
  2928. return n;
  2929. }
  2930. /* handles io from a tcp child process
  2931. * params: tcp_c - pointer in the tcp_children array, to the entry for
  2932. * which an io event was detected
  2933. * fd_i - fd index in the fd_array (usefull for optimizing
  2934. * io_watch_deletes)
  2935. * returns: handle_* return convention: -1 on error, 0 on EAGAIN (no more
  2936. * io events queued), >0 on success. success/error refer only to
  2937. * the reads from the fd.
  2938. */
  2939. inline static int handle_tcp_child(struct tcp_child* tcp_c, int fd_i)
  2940. {
  2941. struct tcp_connection* tcpconn;
  2942. long response[2];
  2943. int cmd;
  2944. int bytes;
  2945. int n;
  2946. ticks_t t;
  2947. ticks_t crt_timeout;
  2948. ticks_t con_lifetime;
  2949. if (unlikely(tcp_c->unix_sock<=0)){
  2950. /* (we can't have a fd==0, 0 is never closed )*/
  2951. LM_CRIT("fd %d for %d (pid %ld, ser no %d)\n", tcp_c->unix_sock,
  2952. (int)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
  2953. tcp_c->proc_no);
  2954. goto error;
  2955. }
  2956. /* read until sizeof(response)
  2957. * (this is a SOCK_STREAM so read is not atomic) */
  2958. bytes=recv_all(tcp_c->unix_sock, response, sizeof(response), MSG_DONTWAIT);
  2959. if (unlikely(bytes<(int)sizeof(response))){
  2960. if (bytes==0){
  2961. /* EOF -> bad, child has died */
  2962. LM_DBG("dead tcp child %d (pid %ld, no %d) (shutting down?)\n",
  2963. (int)(tcp_c-&tcp_children[0]),
  2964. (long)tcp_c->pid, tcp_c->proc_no );
  2965. /* don't listen on it any more */
  2966. io_watch_del(&io_h, tcp_c->unix_sock, fd_i, 0);
  2967. goto error; /* eof. so no more io here, it's ok to return error */
  2968. }else if (bytes<0){
  2969. /* EAGAIN is ok if we try to empty the buffer
  2970. * e.g.: SIGIO_RT overflow mode or EPOLL ET */
  2971. if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
  2972. LM_CRIT("read from tcp child %ld (pid %ld, no %d) %s [%d]\n",
  2973. (long)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
  2974. tcp_c->proc_no, strerror(errno), errno );
  2975. }else{
  2976. bytes=0;
  2977. }
  2978. /* try to ignore ? */
  2979. goto end;
  2980. }else{
  2981. /* should never happen */
  2982. LM_CRIT("too few bytes received (%d)\n", bytes );
  2983. bytes=0; /* something was read so there is no error; otoh if
  2984. receive_fd returned less then requested => the receive
  2985. buffer is empty => no more io queued on this fd */
  2986. goto end;
  2987. }
  2988. }
  2989. LM_DBG("reader response= %lx, %ld from %d \n",
  2990. response[0], response[1], (int)(tcp_c-&tcp_children[0]));
  2991. cmd=response[1];
  2992. tcpconn=(struct tcp_connection*)response[0];
  2993. if (unlikely(tcpconn==0)){
  2994. /* should never happen */
  2995. LM_CRIT("null tcpconn pointer received from tcp child %d (pid %ld): %lx, %lx\n",
  2996. (int)(tcp_c-&tcp_children[0]), (long)tcp_c->pid,
  2997. response[0], response[1]) ;
  2998. goto end;
  2999. }
  3000. switch(cmd){
  3001. case CONN_RELEASE:
  3002. tcp_c->busy--;
  3003. if (unlikely(tcpconn_put(tcpconn))){
  3004. /* if refcnt was 1 => it was used only in the
  3005. tcp reader => it's not hashed or watched for IO
  3006. anymore => no need to io_watch_del() */
  3007. tcpconn_destroy(tcpconn);
  3008. break;
  3009. }
  3010. if (unlikely(tcpconn->state==S_CONN_BAD)){
  3011. if (tcpconn_try_unhash(tcpconn)) {
  3012. #ifdef TCP_ASYNC
  3013. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3014. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3015. tcpconn->flags &= ~F_CONN_WRITE_W;
  3016. }
  3017. #endif /* TCP_ASYNC */
  3018. tcpconn_put_destroy(tcpconn);
  3019. }
  3020. #ifdef TCP_ASYNC
  3021. else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3022. /* should never happen: if it's already unhashed, it
  3023. should not be watched for IO */
  3024. BUG("unhashed connection watched for write\n");
  3025. io_watch_del(&io_h, tcpconn->s, -1, 0);
  3026. tcpconn->flags &= ~F_CONN_WRITE_W;
  3027. }
  3028. #endif /* TCP_ASYNC */
  3029. break;
  3030. }
  3031. /* update the timeout*/
  3032. t=get_ticks_raw();
  3033. con_lifetime=tcpconn->lifetime;
  3034. tcpconn->timeout=t+con_lifetime;
  3035. crt_timeout=con_lifetime;
  3036. #ifdef TCP_ASYNC
  3037. if (unlikely(cfg_get(tcp, tcp_cfg, async) &&
  3038. _wbufq_non_empty(tcpconn) )){
  3039. if (unlikely(TICKS_GE(t, tcpconn->wbuf_q.wr_timeout))){
  3040. LM_DBG("wr. timeout on CONN_RELEASE for %p refcnt= %d\n",
  3041. tcpconn, atomic_get(&tcpconn->refcnt));
  3042. /* timeout */
  3043. if (unlikely(tcpconn->state==S_CONN_CONNECT)){
  3044. #ifdef USE_DST_BLACKLIST
  3045. (void)dst_blacklist_su( BLST_ERR_CONNECT,
  3046. tcpconn->rcv.proto,
  3047. &tcpconn->rcv.src_su,
  3048. &tcpconn->send_flags, 0);
  3049. #endif /* USE_DST_BLACKLIST */
  3050. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(tcpconn),
  3051. TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
  3052. TCP_PROTO(tcpconn));
  3053. TCP_STATS_CONNECT_FAILED();
  3054. }else{
  3055. #ifdef USE_DST_BLACKLIST
  3056. (void)dst_blacklist_su( BLST_ERR_SEND,
  3057. tcpconn->rcv.proto,
  3058. &tcpconn->rcv.src_su,
  3059. &tcpconn->send_flags, 0);
  3060. #endif /* USE_DST_BLACKLIST */
  3061. TCP_EV_SEND_TIMEOUT(0, &tcpconn->rcv);
  3062. TCP_STATS_SEND_TIMEOUT();
  3063. }
  3064. if (tcpconn_try_unhash(tcpconn)) {
  3065. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3066. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3067. tcpconn->flags&=~F_CONN_WRITE_W;
  3068. }
  3069. tcpconn_put_destroy(tcpconn);
  3070. } else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3071. BUG("unhashed connection watched for write\n");
  3072. io_watch_del(&io_h, tcpconn->s, -1, 0);
  3073. tcpconn->flags&=~F_CONN_WRITE_W;
  3074. }
  3075. break;
  3076. }else{
  3077. crt_timeout=MIN_unsigned(con_lifetime,
  3078. tcpconn->wbuf_q.wr_timeout-t);
  3079. }
  3080. }
  3081. #endif /* TCP_ASYNC */
  3082. /* re-activate the timer */
  3083. tcpconn->timer.f=tcpconn_main_timeout;
  3084. local_timer_reinit(&tcpconn->timer);
  3085. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, crt_timeout, t);
  3086. /* must be after the de-ref*/
  3087. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  3088. tcpconn->flags&=~(F_CONN_READER|F_CONN_OOB_DATA);
  3089. #ifdef TCP_ASYNC
  3090. if (unlikely(tcpconn->flags & F_CONN_WRITE_W))
  3091. n=io_watch_chg(&io_h, tcpconn->s, POLLIN| POLLOUT, -1);
  3092. else
  3093. #endif /* TCP_ASYNC */
  3094. n=io_watch_add(&io_h, tcpconn->s, POLLIN, F_TCPCONN, tcpconn);
  3095. if (unlikely(n<0)){
  3096. LM_CRIT("failed to add new socket to the fd list\n");
  3097. tcpconn->flags&=~F_CONN_READ_W;
  3098. if (tcpconn_try_unhash(tcpconn)) {
  3099. #ifdef TCP_ASYNC
  3100. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3101. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3102. tcpconn->flags&=~F_CONN_WRITE_W;
  3103. }
  3104. #endif /* TCP_ASYNC */
  3105. tcpconn_put_destroy(tcpconn);
  3106. }
  3107. #ifdef TCP_ASYNC
  3108. else if (unlikely(tcpconn->flags & F_CONN_WRITE_W)) {
  3109. BUG("unhashed connection watched for write\n");
  3110. io_watch_del(&io_h, tcpconn->s, -1, 0);
  3111. tcpconn->flags&=~F_CONN_WRITE_W;
  3112. }
  3113. #endif /* TCP_ASYNC */
  3114. break;
  3115. }
  3116. LM_DBG("CONN_RELEASE %p refcnt= %d\n",
  3117. tcpconn, atomic_get(&tcpconn->refcnt));
  3118. break;
  3119. case CONN_ERROR:
  3120. case CONN_DESTROY:
  3121. case CONN_EOF:
  3122. /* WARNING: this will auto-dec. refcnt! */
  3123. tcp_c->busy--;
  3124. /* main doesn't listen on it => we don't have to delete it
  3125. if (tcpconn->s!=-1)
  3126. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3127. */
  3128. #ifdef TCP_ASYNC
  3129. if ((tcpconn->flags & F_CONN_WRITE_W) && (tcpconn->s!=-1)){
  3130. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3131. tcpconn->flags&=~F_CONN_WRITE_W;
  3132. }
  3133. #endif /* TCP_ASYNC */
  3134. if (tcpconn_try_unhash(tcpconn))
  3135. tcpconn_put(tcpconn);
  3136. tcpconn_put_destroy(tcpconn); /* deref & delete if refcnt==0 */
  3137. break;
  3138. default:
  3139. LM_CRIT("unknown cmd %d from tcp reader %d\n",
  3140. cmd, (int)(tcp_c-&tcp_children[0]));
  3141. }
  3142. end:
  3143. return bytes;
  3144. error:
  3145. return -1;
  3146. }
  3147. /* handles io from a "generic" ser process (get fd or new_fd from a tcp_send)
  3148. *
  3149. * params: p - pointer in the ser processes array (pt[]), to the entry for
  3150. * which an io event was detected
  3151. * fd_i - fd index in the fd_array (usefull for optimizing
  3152. * io_watch_deletes)
  3153. * returns: handle_* return convention:
  3154. * -1 on error reading from the fd,
  3155. * 0 on EAGAIN or when no more io events are queued
  3156. * (receive buffer empty),
  3157. * >0 on successfull reads from the fd (the receive buffer might
  3158. * be non-empty).
  3159. */
  3160. inline static int handle_ser_child(struct process_table* p, int fd_i)
  3161. {
  3162. struct tcp_connection* tcpconn;
  3163. struct tcp_connection* tmp;
  3164. long response[2];
  3165. int cmd;
  3166. int bytes;
  3167. int ret;
  3168. int fd;
  3169. int flags;
  3170. ticks_t t;
  3171. ticks_t con_lifetime;
  3172. #ifdef TCP_ASYNC
  3173. ticks_t nxt_timeout;
  3174. #endif /* TCP_ASYNC */
  3175. ret=-1;
  3176. if (unlikely(p->unix_sock<=0)){
  3177. /* (we can't have a fd==0, 0 is never closed )*/
  3178. LM_CRIT("fd %d for %d (pid %d)\n", p->unix_sock, (int)(p-&pt[0]), p->pid);
  3179. goto error;
  3180. }
  3181. /* get all bytes and the fd (if transmitted)
  3182. * (this is a SOCK_STREAM so read is not atomic) */
  3183. bytes=receive_fd(p->unix_sock, response, sizeof(response), &fd,
  3184. MSG_DONTWAIT);
  3185. if (unlikely(bytes<(int)sizeof(response))){
  3186. /* too few bytes read */
  3187. if (bytes==0){
  3188. /* EOF -> bad, child has died */
  3189. LM_DBG("dead child %d, pid %d (shutting down?)\n",
  3190. (int)(p-&pt[0]), p->pid);
  3191. /* don't listen on it any more */
  3192. io_watch_del(&io_h, p->unix_sock, fd_i, 0);
  3193. goto error; /* child dead => no further io events from it */
  3194. }else if (bytes<0){
  3195. /* EAGAIN is ok if we try to empty the buffer
  3196. * e.g: SIGIO_RT overflow mode or EPOLL ET */
  3197. if ((errno!=EAGAIN) && (errno!=EWOULDBLOCK)){
  3198. LM_CRIT("read from child %d (pid %d): %s [%d]\n",
  3199. (int)(p-&pt[0]), p->pid,
  3200. strerror(errno), errno);
  3201. ret=-1;
  3202. }else{
  3203. ret=0;
  3204. }
  3205. /* try to ignore ? */
  3206. goto end;
  3207. }else{
  3208. /* should never happen */
  3209. LM_CRIT("too few bytes received (%d)\n", bytes );
  3210. ret=0; /* something was read so there is no error; otoh if
  3211. receive_fd returned less then requested => the receive
  3212. buffer is empty => no more io queued on this fd */
  3213. goto end;
  3214. }
  3215. }
  3216. ret=1; /* something was received, there might be more queued */
  3217. LM_DBG("read response= %lx, %ld, fd %d from %d (%d)\n",
  3218. response[0], response[1], fd, (int)(p-&pt[0]), p->pid);
  3219. cmd=response[1];
  3220. tcpconn=(struct tcp_connection*)response[0];
  3221. if (unlikely(tcpconn==0)){
  3222. LM_CRIT("null tcpconn pointer received from child %d (pid %d): %lx, %lx\n",
  3223. (int)(p-&pt[0]), p->pid, response[0], response[1]) ;
  3224. goto end;
  3225. }
  3226. switch(cmd){
  3227. case CONN_ERROR:
  3228. LM_ERR("received CON_ERROR for %p (id %d), refcnt %d, flags 0x%0x\n",
  3229. tcpconn, tcpconn->id, atomic_get(&tcpconn->refcnt),
  3230. tcpconn->flags);
  3231. case CONN_EOF: /* forced EOF after full send, due to send flags */
  3232. #ifdef TCP_CONNECT_WAIT
  3233. /* if the connection is marked as pending => it might be on
  3234. * the way of reaching tcp_main (e.g. CONN_NEW_COMPLETE or
  3235. * CONN_NEW_PENDING_WRITE) => it cannot be destroyed here,
  3236. * it will be destroyed on CONN_NEW_COMPLETE /
  3237. * CONN_NEW_PENDING_WRITE or in the send error case by the
  3238. * sender process */
  3239. if (unlikely(tcpconn->flags & F_CONN_PENDING)) {
  3240. if (tcpconn_put(tcpconn))
  3241. tcpconn_destroy(tcpconn);
  3242. /* no need for io_watch_del(), if PENDING it should not
  3243. be watched for anything in tcp_main */
  3244. break;
  3245. }
  3246. #endif /* TCP_CONNECT_WAIT */
  3247. if ( tcpconn_try_unhash(tcpconn) )
  3248. tcpconn_put(tcpconn);
  3249. if ( ((tcpconn->flags & (F_CONN_WRITE_W|F_CONN_READ_W)) ) &&
  3250. (tcpconn->s!=-1)){
  3251. io_watch_del(&io_h, tcpconn->s, -1, IO_FD_CLOSING);
  3252. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  3253. }
  3254. tcpconn_put_destroy(tcpconn); /* dec refcnt & destroy on 0 */
  3255. break;
  3256. case CONN_GET_FD:
  3257. /* send the requested FD */
  3258. /* WARNING: take care of setting refcnt properly to
  3259. * avoid race conditions */
  3260. if (unlikely(tcpconn->state == S_CONN_BAD ||
  3261. (tcpconn->flags & F_CONN_FD_CLOSED) ||
  3262. tcpconn->s ==-1)) {
  3263. /* connection is already marked as bad and/or has no
  3264. fd => don't try to send the fd (trying to send a
  3265. closed fd _will_ fail) */
  3266. tmp = 0;
  3267. if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
  3268. BUG("handle_ser_child: CONN_GET_FD: send_all failed\n");
  3269. /* no need to attempt to destroy the connection, it should
  3270. be already in the process of being destroyed */
  3271. } else if (unlikely(send_fd(p->unix_sock, &tcpconn,
  3272. sizeof(tcpconn), tcpconn->s)<=0)){
  3273. LM_ERR("CONN_GET_FD: send_fd failed\n");
  3274. /* try sending error (better then not sending anything) */
  3275. tmp = 0;
  3276. if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
  3277. BUG("handle_ser_child: CONN_GET_FD:"
  3278. " send_fd send_all fallback failed\n");
  3279. }
  3280. break;
  3281. case CONN_NEW:
  3282. /* update the fd in the requested tcpconn*/
  3283. /* WARNING: take care of setting refcnt properly to
  3284. * avoid race conditions */
  3285. if (unlikely(fd==-1)){
  3286. LM_CRIT("CONN_NEW: no fd received\n");
  3287. tcpconn->flags|=F_CONN_FD_CLOSED;
  3288. tcpconn_put_destroy(tcpconn);
  3289. break;
  3290. }
  3291. (*tcp_connections_no)++;
  3292. if (unlikely(tcpconn->type==PROTO_TLS))
  3293. (*tls_connections_no)++;
  3294. tcpconn->s=fd;
  3295. /* add tcpconn to the list*/
  3296. tcpconn_add(tcpconn);
  3297. /* update the timeout*/
  3298. t=get_ticks_raw();
  3299. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  3300. tcpconn->timeout=t+con_lifetime;
  3301. /* activate the timer (already properly init. in tcpconn_new())
  3302. * no need for reinit */
  3303. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  3304. con_lifetime, t);
  3305. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD)
  3306. #ifdef TCP_ASYNC
  3307. /* not used for now, the connection is sent to tcp_main
  3308. * before knowing whether we can write on it or we should
  3309. * wait */
  3310. | (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1)&
  3311. F_CONN_WRITE_W)
  3312. #endif /* TCP_ASYNC */
  3313. ;
  3314. tcpconn->flags&=~F_CONN_FD_CLOSED;
  3315. flags=POLLIN
  3316. #ifdef TCP_ASYNC
  3317. /* not used for now, the connection is sent to tcp_main
  3318. * before knowing if we can write on it or we should
  3319. * wait */
  3320. | (((int)!(tcpconn->flags & F_CONN_WANTS_WR)-1) & POLLOUT)
  3321. #endif /* TCP_ASYNC */
  3322. ;
  3323. if (unlikely(
  3324. io_watch_add(&io_h, tcpconn->s, flags,
  3325. F_TCPCONN, tcpconn)<0)){
  3326. LM_CRIT("failed to add new socket to the fd list\n");
  3327. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  3328. tcpconn_try_unhash(tcpconn); /* unhash & dec refcnt */
  3329. tcpconn_put_destroy(tcpconn);
  3330. }
  3331. break;
  3332. #ifdef TCP_ASYNC
  3333. case CONN_QUEUED_WRITE:
  3334. /* received only if the wr. queue is empty and a write finishes
  3335. * with EAGAIN (common after connect())
  3336. * it should only enable write watching on the fd. The connection
  3337. * should be already in the hash. The refcnt is automatically
  3338. * decremented.
  3339. */
  3340. /* auto-dec refcnt */
  3341. if (unlikely(tcpconn_put(tcpconn))){
  3342. tcpconn_destroy(tcpconn);
  3343. break;
  3344. }
  3345. if (unlikely((tcpconn->state==S_CONN_BAD) ||
  3346. !(tcpconn->flags & F_CONN_HASHED) ))
  3347. /* in the process of being destroyed => do nothing */
  3348. break;
  3349. if (!(tcpconn->flags & F_CONN_WANTS_WR)){
  3350. tcpconn->flags|=F_CONN_WANTS_WR;
  3351. t=get_ticks_raw();
  3352. if (likely((tcpconn->flags & F_CONN_MAIN_TIMER) &&
  3353. (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)) &&
  3354. TICKS_LT(t, tcpconn->wbuf_q.wr_timeout) )){
  3355. /* _wbufq_nonempty() is guaranteed here */
  3356. /* update the timer */
  3357. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  3358. local_timer_reinit(&tcpconn->timer);
  3359. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  3360. tcpconn->wbuf_q.wr_timeout-t, t);
  3361. LM_DBG("CONN_QUEUED_WRITE; %p timeout adjusted to %d s\n",
  3362. tcpconn, TICKS_TO_S(tcpconn->wbuf_q.wr_timeout-t));
  3363. }
  3364. if (!(tcpconn->flags & F_CONN_WRITE_W)){
  3365. tcpconn->flags|=F_CONN_WRITE_W;
  3366. if (!(tcpconn->flags & F_CONN_READ_W)){
  3367. if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLOUT,
  3368. F_TCPCONN, tcpconn)<0)){
  3369. LM_CRIT("failed to enable write watch on socket\n");
  3370. if (tcpconn_try_unhash(tcpconn))
  3371. tcpconn_put_destroy(tcpconn);
  3372. break;
  3373. }
  3374. }else{
  3375. if (unlikely(io_watch_chg(&io_h, tcpconn->s,
  3376. POLLIN|POLLOUT, -1)<0)){
  3377. LM_CRIT("failed to change socket watch events\n");
  3378. if (tcpconn_try_unhash(tcpconn)) {
  3379. io_watch_del(&io_h, tcpconn->s, -1,
  3380. IO_FD_CLOSING);
  3381. tcpconn->flags&=~F_CONN_READ_W;
  3382. tcpconn_put_destroy(tcpconn);
  3383. } else {
  3384. BUG("unhashed connection watched for IO\n");
  3385. io_watch_del(&io_h, tcpconn->s, -1, 0);
  3386. tcpconn->flags&=~F_CONN_READ_W;
  3387. }
  3388. break;
  3389. }
  3390. }
  3391. }
  3392. }else{
  3393. LM_WARN("connection %p already watched for write\n", tcpconn);
  3394. }
  3395. break;
  3396. #ifdef TCP_CONNECT_WAIT
  3397. case CONN_NEW_COMPLETE:
  3398. case CONN_NEW_PENDING_WRITE:
  3399. /* received when a pending connect completes in the same
  3400. * tcp_send() that initiated it
  3401. * the connection is already in the hash with F_CONN_PENDING
  3402. * flag (added by tcp_send()) and refcnt at least 1 (for the
  3403. * hash)*/
  3404. tcpconn->flags&=~(F_CONN_PENDING|F_CONN_FD_CLOSED);
  3405. if (unlikely((tcpconn->state==S_CONN_BAD) || (fd==-1))){
  3406. if (unlikely(fd==-1))
  3407. LM_CRIT("CONN_NEW_COMPLETE: no fd received\n");
  3408. else
  3409. LM_WARN("CONN_NEW_COMPLETE: received connection with error\n");
  3410. tcpconn->flags|=F_CONN_FD_CLOSED;
  3411. tcpconn->state=S_CONN_BAD;
  3412. tcpconn_try_unhash(tcpconn);
  3413. tcpconn_put_destroy(tcpconn);
  3414. break;
  3415. }
  3416. (*tcp_connections_no)++;
  3417. if (unlikely(tcpconn->type==PROTO_TLS))
  3418. (*tls_connections_no)++;
  3419. tcpconn->s=fd;
  3420. /* update the timeout*/
  3421. t=get_ticks_raw();
  3422. con_lifetime=cfg_get(tcp, tcp_cfg, con_lifetime);
  3423. tcpconn->timeout=t+con_lifetime;
  3424. nxt_timeout=con_lifetime;
  3425. if (unlikely(cmd==CONN_NEW_COMPLETE)){
  3426. /* check if needs to be watched for write */
  3427. lock_get(&tcpconn->write_lock);
  3428. /* if queue non empty watch it for write */
  3429. flags=(_wbufq_empty(tcpconn)-1)&POLLOUT;
  3430. lock_release(&tcpconn->write_lock);
  3431. if (flags){
  3432. if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
  3433. && TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
  3434. nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
  3435. tcpconn->flags|=F_CONN_WRITE_W|F_CONN_WANTS_WR;
  3436. }
  3437. /* activate the timer (already properly init. in
  3438. tcpconn_new()) no need for reinit */
  3439. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
  3440. t);
  3441. tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W|
  3442. F_CONN_WANTS_RD;
  3443. }else{
  3444. /* CONN_NEW_PENDING_WRITE */
  3445. /* no need to check, we have something queued for write */
  3446. flags=POLLOUT;
  3447. if (TICKS_LT(tcpconn->wbuf_q.wr_timeout, tcpconn->timeout)
  3448. && TICKS_LT(t, tcpconn->wbuf_q.wr_timeout))
  3449. nxt_timeout=tcpconn->wbuf_q.wr_timeout-t;
  3450. /* activate the timer (already properly init. in
  3451. tcpconn_new()) no need for reinit */
  3452. local_timer_add(&tcp_main_ltimer, &tcpconn->timer, nxt_timeout,
  3453. t);
  3454. tcpconn->flags|=F_CONN_MAIN_TIMER|F_CONN_READ_W|
  3455. F_CONN_WANTS_RD |
  3456. F_CONN_WRITE_W|F_CONN_WANTS_WR;
  3457. }
  3458. flags|=POLLIN;
  3459. if (unlikely(
  3460. io_watch_add(&io_h, tcpconn->s, flags,
  3461. F_TCPCONN, tcpconn)<0)){
  3462. LM_CRIT("failed to add new socket to the fd list\n");
  3463. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W);
  3464. tcpconn_try_unhash(tcpconn); /* unhash & dec refcnt */
  3465. tcpconn_put_destroy(tcpconn);
  3466. }
  3467. break;
  3468. #endif /* TCP_CONNECT_WAIT */
  3469. #endif /* TCP_ASYNC */
  3470. default:
  3471. LM_CRIT("unknown cmd %d\n", cmd);
  3472. }
  3473. end:
  3474. return ret;
  3475. error:
  3476. return -1;
  3477. }
  3478. /* sends a tcpconn + fd to a choosen child */
  3479. inline static int send2child(struct tcp_connection* tcpconn)
  3480. {
  3481. int i;
  3482. int min_busy;
  3483. int idx;
  3484. int wfirst;
  3485. int wlast;
  3486. static int crt=0; /* current child */
  3487. int last;
  3488. if(likely(tcp_sockets_gworkers==0)) {
  3489. /* no child selection based on received socket
  3490. * - use least loaded over all */
  3491. min_busy=tcp_children[0].busy;
  3492. idx=0;
  3493. last=crt+tcp_children_no;
  3494. for (; crt<last; crt++){
  3495. i=crt%tcp_children_no;
  3496. if (!tcp_children[i].busy){
  3497. idx=i;
  3498. min_busy=0;
  3499. break;
  3500. }else if (min_busy>tcp_children[i].busy){
  3501. min_busy=tcp_children[i].busy;
  3502. idx=i;
  3503. }
  3504. }
  3505. crt=idx+1; /* next time we start with crt%tcp_children_no */
  3506. } else {
  3507. /* child selection based on received socket
  3508. * - use least loaded per received socket, starting with the first
  3509. * in its group */
  3510. if(tcpconn->rcv.bind_address->workers>0) {
  3511. wfirst = tcpconn->rcv.bind_address->workers_tcpidx;
  3512. wlast = wfirst + tcpconn->rcv.bind_address->workers;
  3513. LM_DBG("checking per-socket specific workers (%d/%d..%d/%d) [%s]\n",
  3514. tcp_children[wfirst].pid, tcp_children[wfirst].proc_no,
  3515. tcp_children[wlast-1].pid, tcp_children[wlast-1].proc_no,
  3516. (tcpconn->rcv.bind_address)?tcpconn->rcv.bind_address->sock_str.s:"");
  3517. } else {
  3518. wfirst = 0;
  3519. wlast = tcp_sockets_gworkers - 1;
  3520. LM_DBG("checking per-socket generic workers (%d/%d..%d/%d) [%s]\n",
  3521. tcp_children[wfirst].pid, tcp_children[wfirst].proc_no,
  3522. tcp_children[wlast-1].pid, tcp_children[wlast-1].proc_no,
  3523. (tcpconn->rcv.bind_address)?tcpconn->rcv.bind_address->sock_str.s:"");
  3524. }
  3525. idx = wfirst;
  3526. min_busy = tcp_children[idx].busy;
  3527. for(i=wfirst; i<wlast; i++) {
  3528. if (!tcp_children[i].busy){
  3529. idx=i;
  3530. min_busy=0;
  3531. break;
  3532. } else {
  3533. if (min_busy>tcp_children[i].busy) {
  3534. min_busy=tcp_children[i].busy;
  3535. idx=i;
  3536. }
  3537. }
  3538. }
  3539. }
  3540. tcp_children[idx].busy++;
  3541. tcp_children[idx].n_reqs++;
  3542. if (unlikely(min_busy)){
  3543. LM_DBG("WARNING: no free tcp receiver, "
  3544. "connection passed to the least busy one (%d)\n",
  3545. min_busy);
  3546. }
  3547. LM_DBG("selected tcp worker %d %d(%ld) for activity on [%s], %p\n",
  3548. idx, tcp_children[idx].proc_no, (long)tcp_children[idx].pid,
  3549. (tcpconn->rcv.bind_address)?tcpconn->rcv.bind_address->sock_str.s:"",
  3550. tcpconn);
  3551. /* first make sure this child doesn't have pending request for
  3552. * tcp_main (to avoid a possible deadlock: e.g. child wants to
  3553. * send a release command, but the master fills its socket buffer
  3554. * with new connection commands => deadlock) */
  3555. /* answer tcp_send requests first */
  3556. while(unlikely((tcpconn->state != S_CONN_BAD) &&
  3557. (handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0)));
  3558. /* process tcp readers requests */
  3559. while(unlikely((tcpconn->state != S_CONN_BAD &&
  3560. (handle_tcp_child(&tcp_children[idx], -1)>0))));
  3561. /* the above possible pending requests might have included a
  3562. command to close this tcpconn (e.g. CONN_ERROR, CONN_EOF).
  3563. In this case the fd is already closed here (and possible
  3564. even replaced by another one with the same number) so it
  3565. must not be sent to a reader anymore */
  3566. if (unlikely(tcpconn->state == S_CONN_BAD ||
  3567. (tcpconn->flags & F_CONN_FD_CLOSED)))
  3568. return -1;
  3569. #ifdef SEND_FD_QUEUE
  3570. /* if queue full, try to queue the io */
  3571. if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
  3572. sizeof(tcpconn), tcpconn->s)<=0)){
  3573. if ((errno==EAGAIN)||(errno==EWOULDBLOCK)){
  3574. /* FIXME: remove after debugging */
  3575. LM_CRIT("tcp child %d, socket %d: queue full, %d requests queued (total handled %d)\n",
  3576. idx, tcp_children[idx].unix_sock, min_busy,
  3577. tcp_children[idx].n_reqs-1);
  3578. if (send_fd_queue_add(&send2child_q, tcp_children[idx].unix_sock,
  3579. tcpconn)!=0){
  3580. LM_ERR("queue send op. failed\n");
  3581. return -1;
  3582. }
  3583. }else{
  3584. LM_ERR("send_fd failed for %p (flags 0x%0x), fd %d\n",
  3585. tcpconn, tcpconn->flags, tcpconn->s);
  3586. return -1;
  3587. }
  3588. }
  3589. #else
  3590. if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn,
  3591. sizeof(tcpconn), tcpconn->s)<=0)){
  3592. LM_ERR("send_fd failed for %p (flags 0x%0x), fd %d\n",
  3593. tcpconn, tcpconn->flags, tcpconn->s);
  3594. return -1;
  3595. }
  3596. #endif
  3597. return 0;
  3598. }
  3599. /* handles a new connection, called internally by tcp_main_loop/handle_io.
  3600. * params: si - pointer to one of the tcp socket_info structures on which
  3601. * an io event was detected (connection attempt)
  3602. * returns: handle_* return convention: -1 on error, 0 on EAGAIN (no more
  3603. * io events queued), >0 on success. success/error refer only to
  3604. * the accept.
  3605. */
  3606. static inline int handle_new_connect(struct socket_info* si)
  3607. {
  3608. union sockaddr_union su;
  3609. union sockaddr_union sock_name;
  3610. unsigned sock_name_len;
  3611. union sockaddr_union* dst_su;
  3612. struct tcp_connection* tcpconn;
  3613. socklen_t su_len;
  3614. int new_sock;
  3615. /* got a connection on r */
  3616. su_len=sizeof(su);
  3617. new_sock=accept(si->socket, &(su.s), &su_len);
  3618. if (unlikely(new_sock==-1)){
  3619. if ((errno==EAGAIN)||(errno==EWOULDBLOCK))
  3620. return 0;
  3621. LM_ERR("error while accepting connection(%d): %s\n", errno, strerror(errno));
  3622. return -1;
  3623. }
  3624. if (unlikely(*tcp_connections_no>=cfg_get(tcp, tcp_cfg, max_connections))){
  3625. LM_ERR("maximum number of connections exceeded: %d/%d\n",
  3626. *tcp_connections_no,
  3627. cfg_get(tcp, tcp_cfg, max_connections));
  3628. tcp_safe_close(new_sock);
  3629. TCP_STATS_LOCAL_REJECT();
  3630. return 1; /* success, because the accept was succesfull */
  3631. }
  3632. if (unlikely(si->proto==PROTO_TLS)) {
  3633. if (unlikely(*tls_connections_no>=cfg_get(tcp, tcp_cfg, max_tls_connections))){
  3634. LM_ERR("maximum number of tls connections exceeded: %d/%d\n",
  3635. *tls_connections_no,
  3636. cfg_get(tcp, tcp_cfg, max_tls_connections));
  3637. tcp_safe_close(new_sock);
  3638. TCP_STATS_LOCAL_REJECT();
  3639. return 1; /* success, because the accept was succesfull */
  3640. }
  3641. }
  3642. if (unlikely(init_sock_opt_accept(new_sock)<0)){
  3643. LM_ERR("init_sock_opt failed\n");
  3644. tcp_safe_close(new_sock);
  3645. return 1; /* success, because the accept was succesfull */
  3646. }
  3647. (*tcp_connections_no)++;
  3648. if (unlikely(si->proto==PROTO_TLS))
  3649. (*tls_connections_no)++;
  3650. /* stats for established connections are incremented after
  3651. the first received or sent packet.
  3652. Alternatively they could be incremented here for accepted
  3653. connections, but then the connection state must be changed to
  3654. S_CONN_OK:
  3655. TCP_STATS_ESTABLISHED(S_CONN_ACCEPT);
  3656. ...
  3657. tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_OK);
  3658. */
  3659. dst_su=&si->su;
  3660. if (unlikely(si->flags & SI_IS_ANY)){
  3661. /* INADDR_ANY => get local dst */
  3662. sock_name_len=sizeof(sock_name);
  3663. if (getsockname(new_sock, &sock_name.s, &sock_name_len)!=0){
  3664. LM_ERR("getsockname failed: %s(%d)\n",
  3665. strerror(errno), errno);
  3666. /* go on with the 0.0.0.0 dst from the sock_info */
  3667. }else{
  3668. dst_su=&sock_name;
  3669. }
  3670. }
  3671. /* add socket to list */
  3672. tcpconn=tcpconn_new(new_sock, &su, dst_su, si, si->proto, S_CONN_ACCEPT);
  3673. if (likely(tcpconn)){
  3674. tcpconn->flags|=F_CONN_PASSIVE;
  3675. #ifdef TCP_PASS_NEW_CONNECTION_ON_DATA
  3676. atomic_set(&tcpconn->refcnt, 1); /* safe, not yet available to the
  3677. outside world */
  3678. tcpconn_add(tcpconn);
  3679. /* activate the timer */
  3680. local_timer_add(&tcp_main_ltimer, &tcpconn->timer,
  3681. cfg_get(tcp, tcp_cfg, con_lifetime),
  3682. get_ticks_raw());
  3683. tcpconn->flags|=(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  3684. if (unlikely(io_watch_add(&io_h, tcpconn->s, POLLIN,
  3685. F_TCPCONN, tcpconn)<0)){
  3686. LM_CRIT("failed to add new socket to the fd list\n");
  3687. tcpconn->flags&=~F_CONN_READ_W;
  3688. if (tcpconn_try_unhash(tcpconn))
  3689. tcpconn_put_destroy(tcpconn);
  3690. }
  3691. #else
  3692. atomic_set(&tcpconn->refcnt, 2); /* safe, not yet available to the
  3693. outside world */
  3694. /* prepare it for passing to a child */
  3695. tcpconn->flags|=F_CONN_READER;
  3696. tcpconn_add(tcpconn);
  3697. LM_DBG("new connection from %s: %p %d flags: %04x\n",
  3698. su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags);
  3699. if(unlikely(send2child(tcpconn)<0)){
  3700. tcpconn->flags&=~F_CONN_READER;
  3701. if (tcpconn_try_unhash(tcpconn))
  3702. tcpconn_put(tcpconn);
  3703. tcpconn_put_destroy(tcpconn);
  3704. }
  3705. #endif
  3706. }else{ /*tcpconn==0 */
  3707. LM_ERR("tcpconn_new failed, closing socket\n");
  3708. tcp_safe_close(new_sock);
  3709. (*tcp_connections_no)--;
  3710. if (unlikely(si->proto==PROTO_TLS))
  3711. (*tls_connections_no)--;
  3712. }
  3713. return 1; /* accept() was succesfull */
  3714. }
  3715. /* handles an io event on one of the watched tcp connections
  3716. *
  3717. * params: tcpconn - pointer to the tcp_connection for which we have an io ev.
  3718. * fd_i - index in the fd_array table (needed for delete)
  3719. * returns: handle_* return convention, but on success it always returns 0
  3720. * (because it's one-shot, after a succesful execution the fd is
  3721. * removed from tcp_main's watch fd list and passed to a child =>
  3722. * tcp_main is not interested in further io events that might be
  3723. * queued for this fd)
  3724. */
  3725. inline static int handle_tcpconn_ev(struct tcp_connection* tcpconn, short ev,
  3726. int fd_i)
  3727. {
  3728. #ifdef TCP_ASYNC
  3729. int empty_q;
  3730. int bytes;
  3731. #endif /* TCP_ASYNC */
  3732. /* is refcnt!=0 really necessary?
  3733. * No, in fact it's a bug: I can have the following situation: a send only
  3734. * tcp connection used by n processes simultaneously => refcnt = n. In
  3735. * the same time I can have a read event and this situation is perfectly
  3736. * valid. -- andrei
  3737. */
  3738. #if 0
  3739. if ((tcpconn->refcnt!=0)){
  3740. /* FIXME: might be valid for sigio_rt iff fd flags are not cleared
  3741. * (there is a short window in which it could generate a sig
  3742. * that would be catched by tcp_main) */
  3743. LM_CRIT("handle_tcpconn_ev: io event on referenced"
  3744. " tcpconn (%p), refcnt=%d, fd=%d\n",
  3745. tcpconn, tcpconn->refcnt, tcpconn->s);
  3746. return -1;
  3747. }
  3748. #endif
  3749. /* pass it to child, so remove it from the io watch list and the local
  3750. * timer */
  3751. #ifdef TCP_ASYNC
  3752. empty_q=0; /* warning fix */
  3753. if (unlikely((ev & (POLLOUT|POLLERR|POLLHUP)) &&
  3754. (tcpconn->flags & F_CONN_WRITE_W))){
  3755. if (unlikely((ev & (POLLERR|POLLHUP)) ||
  3756. (wbufq_run(tcpconn->s, tcpconn, &empty_q)<0) ||
  3757. (empty_q && tcpconn_close_after_send(tcpconn))
  3758. )){
  3759. if ((tcpconn->flags & F_CONN_READ_W) && (ev & POLLIN)){
  3760. /* connection is watched for read and there is a read event
  3761. * (unfortunately if we have POLLIN here we don't know if
  3762. * there's really any data in the read buffer or the POLLIN
  3763. * was generated by the error or EOF => to avoid loosing
  3764. * data it's safer to either directly check the read buffer
  3765. * or try a read)*/
  3766. /* in most cases the read buffer will be empty, so in general
  3767. * is cheaper to check it here and then send the
  3768. * conn. to a a child only if needed (another syscall + at
  3769. * least 2 * syscalls in the reader + ...) */
  3770. if ((ioctl(tcpconn->s, FIONREAD, &bytes)>=0) && (bytes>0)){
  3771. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)<0)){
  3772. LM_ERR("io_watch_del(1) failed: for %p, fd %d\n",
  3773. tcpconn, tcpconn->s);
  3774. }
  3775. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
  3776. F_CONN_WANTS_RD|F_CONN_WANTS_WR);
  3777. tcpconn->flags|=F_CONN_FORCE_EOF|F_CONN_WR_ERROR;
  3778. goto send_to_child;
  3779. }
  3780. /* if bytes==0 or ioctl failed, destroy the connection now */
  3781. }
  3782. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i,
  3783. IO_FD_CLOSING) < 0)){
  3784. LM_ERR("io_watch_del() failed: for %p, fd %d\n", tcpconn, tcpconn->s);
  3785. }
  3786. tcpconn->flags&=~(F_CONN_WRITE_W|F_CONN_READ_W|
  3787. F_CONN_WANTS_RD|F_CONN_WANTS_WR);
  3788. if (unlikely(ev & POLLERR)){
  3789. if (unlikely(tcpconn->state==S_CONN_CONNECT)){
  3790. #ifdef USE_DST_BLACKLIST
  3791. (void)dst_blacklist_su(BLST_ERR_CONNECT, tcpconn->rcv.proto,
  3792. &tcpconn->rcv.src_su,
  3793. &tcpconn->send_flags, 0);
  3794. #endif /* USE_DST_BLACKLIST */
  3795. TCP_EV_CONNECT_ERR(0, TCP_LADDR(tcpconn),
  3796. TCP_LPORT(tcpconn), TCP_PSU(tcpconn),
  3797. TCP_PROTO(tcpconn));
  3798. TCP_STATS_CONNECT_FAILED();
  3799. }else{
  3800. #ifdef USE_DST_BLACKLIST
  3801. (void)dst_blacklist_su(BLST_ERR_SEND, tcpconn->rcv.proto,
  3802. &tcpconn->rcv.src_su,
  3803. &tcpconn->send_flags, 0);
  3804. #endif /* USE_DST_BLACKLIST */
  3805. TCP_STATS_CON_RESET(); /* FIXME: it could != RST */
  3806. }
  3807. }
  3808. if (unlikely(!tcpconn_try_unhash(tcpconn))){
  3809. LM_CRIT("unhashed connection %p\n", tcpconn);
  3810. }
  3811. tcpconn_put_destroy(tcpconn);
  3812. goto error;
  3813. }
  3814. if (empty_q){
  3815. tcpconn->flags&=~F_CONN_WANTS_WR;
  3816. if (!(tcpconn->flags & F_CONN_READ_W)){
  3817. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
  3818. LM_ERR("io_watch_del(2) failed: for %p, fd %d\n",
  3819. tcpconn, tcpconn->s);
  3820. goto error;
  3821. }
  3822. }else{
  3823. if (unlikely(io_watch_chg(&io_h, tcpconn->s,
  3824. POLLIN, fd_i)==-1)){
  3825. LM_ERR("io_watch_chg(1) failed: for %p, fd %d\n",
  3826. tcpconn, tcpconn->s);
  3827. goto error;
  3828. }
  3829. }
  3830. tcpconn->flags&=~F_CONN_WRITE_W;
  3831. }
  3832. ev&=~POLLOUT; /* clear POLLOUT */
  3833. }
  3834. if (likely(ev && (tcpconn->flags & F_CONN_READ_W))){
  3835. /* if still some other IO event (POLLIN|POLLHUP|POLLERR) and
  3836. * connection is still watched in tcp_main for reads, send it to a
  3837. * child and stop watching it for input (but continue watching for
  3838. * writes if needed): */
  3839. if (unlikely(tcpconn->flags & F_CONN_WRITE_W)){
  3840. if (unlikely(io_watch_chg(&io_h, tcpconn->s, POLLOUT, fd_i)==-1)){
  3841. LM_ERR("io_watch_chg(2) failed: for %p, fd %d\n",
  3842. tcpconn, tcpconn->s);
  3843. goto error;
  3844. }
  3845. }else
  3846. #else
  3847. {
  3848. #endif /* TCP_ASYNC */
  3849. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i, 0)==-1)){
  3850. LM_ERR("io_watch_del(3) failed: for %p, fd %d\n",
  3851. tcpconn, tcpconn->s);
  3852. goto error;
  3853. }
  3854. #ifdef TCP_ASYNC
  3855. send_to_child:
  3856. #endif
  3857. LM_DBG("sending to child, events %x\n", ev);
  3858. #ifdef POLLRDHUP
  3859. tcpconn->flags|=((int)!(ev & (POLLRDHUP|POLLHUP|POLLERR)) -1) &
  3860. F_CONN_EOF_SEEN;
  3861. #else /* POLLRDHUP */
  3862. tcpconn->flags|=((int)!(ev & (POLLHUP|POLLERR)) -1) & F_CONN_EOF_SEEN;
  3863. #endif /* POLLRDHUP */
  3864. tcpconn->flags|= ((int)!(ev & POLLPRI) -1) & F_CONN_OOB_DATA;
  3865. tcpconn->flags|=F_CONN_READER;
  3866. local_timer_del(&tcp_main_ltimer, &tcpconn->timer);
  3867. tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD);
  3868. tcpconn_ref(tcpconn); /* refcnt ++ */
  3869. if (unlikely(send2child(tcpconn)<0)){
  3870. tcpconn->flags&=~F_CONN_READER;
  3871. #ifdef TCP_ASYNC
  3872. if (tcpconn->flags & F_CONN_WRITE_W){
  3873. if (unlikely(io_watch_del(&io_h, tcpconn->s, fd_i,
  3874. IO_FD_CLOSING) < 0)){
  3875. LM_ERR("io_watch_del(4) failed:" " for %p, fd %d\n",
  3876. tcpconn, tcpconn->s);
  3877. }
  3878. tcpconn->flags&=~F_CONN_WRITE_W;
  3879. }
  3880. #endif /* TCP_ASYNC */
  3881. if (tcpconn_try_unhash(tcpconn))
  3882. tcpconn_put(tcpconn);
  3883. tcpconn_put_destroy(tcpconn); /* because of the tcpconn_ref() */
  3884. }
  3885. }
  3886. return 0; /* we are not interested in possibly queued io events,
  3887. the fd was either passed to a child, closed, or for writes,
  3888. everything possible was already written */
  3889. error:
  3890. return -1;
  3891. }
  3892. /* generic handle io routine, it will call the appropiate
  3893. * handle_xxx() based on the fd_map type
  3894. *
  3895. * params: fm - pointer to a fd hash entry
  3896. * idx - index in the fd_array (or -1 if not known)
  3897. * return: -1 on error
  3898. * 0 on EAGAIN or when by some other way it is known that no more
  3899. * io events are queued on the fd (the receive buffer is empty).
  3900. * Usefull to detect when there are no more io events queued for
  3901. * sigio_rt, epoll_et, kqueue.
  3902. * >0 on successfull read from the fd (when there might be more io
  3903. * queued -- the receive buffer might still be non-empty)
  3904. */
  3905. inline static int handle_io(struct fd_map* fm, short ev, int idx)
  3906. {
  3907. int ret;
  3908. /* update the local config */
  3909. cfg_update();
  3910. switch(fm->type){
  3911. case F_SOCKINFO:
  3912. ret=handle_new_connect((struct socket_info*)fm->data);
  3913. break;
  3914. case F_TCPCONN:
  3915. ret=handle_tcpconn_ev((struct tcp_connection*)fm->data, ev, idx);
  3916. break;
  3917. case F_TCPCHILD:
  3918. ret=handle_tcp_child((struct tcp_child*)fm->data, idx);
  3919. break;
  3920. case F_PROC:
  3921. ret=handle_ser_child((struct process_table*)fm->data, idx);
  3922. break;
  3923. case F_NONE:
  3924. LM_CRIT("empty fd map: %p {%d, %d, %p}, idx %d\n",
  3925. fm, fm->fd, fm->type, fm->data, idx);
  3926. goto error;
  3927. default:
  3928. LM_CRIT("unknown fd type %d\n", fm->type);
  3929. goto error;
  3930. }
  3931. return ret;
  3932. error:
  3933. return -1;
  3934. }
  3935. /* timer handler for tcpconnection handled by tcp_main */
  3936. static ticks_t tcpconn_main_timeout(ticks_t t, struct timer_ln* tl, void* data)
  3937. {
  3938. struct tcp_connection *c;
  3939. int fd;
  3940. int tcp_async;
  3941. c=(struct tcp_connection*)data;
  3942. /* or (struct tcp...*)(tl-offset(c->timer)) */
  3943. #ifdef TCP_ASYNC
  3944. LM_DBG("entering timer for %p (ticks=%d, timeout=%d (%d s), "
  3945. "wr_timeout=%d (%d s)), write queue: %d bytes\n",
  3946. c, t, c->timeout, TICKS_TO_S(c->timeout-t),
  3947. c->wbuf_q.wr_timeout, TICKS_TO_S(c->wbuf_q.wr_timeout-t),
  3948. c->wbuf_q.queued);
  3949. tcp_async=cfg_get(tcp, tcp_cfg, async);
  3950. if (likely(TICKS_LT(t, c->timeout) && ( !tcp_async || _wbufq_empty(c) ||
  3951. TICKS_LT(t, c->wbuf_q.wr_timeout)) )){
  3952. if (unlikely(tcp_async && _wbufq_non_empty(c)))
  3953. return (ticks_t)MIN_unsigned(c->timeout-t, c->wbuf_q.wr_timeout-t);
  3954. else
  3955. return (ticks_t)(c->timeout - t);
  3956. }
  3957. /* if time out due to write, add it to the blacklist */
  3958. if (tcp_async && _wbufq_non_empty(c) && TICKS_GE(t, c->wbuf_q.wr_timeout)){
  3959. if (unlikely(c->state==S_CONN_CONNECT)){
  3960. #ifdef USE_DST_BLACKLIST
  3961. (void)dst_blacklist_su(BLST_ERR_CONNECT, c->rcv.proto, &c->rcv.src_su,
  3962. &c->send_flags, 0);
  3963. #endif /* USE_DST_BLACKLIST */
  3964. TCP_EV_CONNECT_TIMEOUT(0, TCP_LADDR(c), TCP_LPORT(c), TCP_PSU(c),
  3965. TCP_PROTO(c));
  3966. TCP_STATS_CONNECT_FAILED();
  3967. }else{
  3968. #ifdef USE_DST_BLACKLIST
  3969. (void)dst_blacklist_su(BLST_ERR_SEND, c->rcv.proto, &c->rcv.src_su,
  3970. &c->send_flags, 0);
  3971. #endif /* USE_DST_BLACKLIST */
  3972. TCP_EV_SEND_TIMEOUT(0, &c->rcv);
  3973. TCP_STATS_SEND_TIMEOUT();
  3974. }
  3975. }else{
  3976. /* idle timeout */
  3977. TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
  3978. TCP_STATS_CON_TIMEOUT();
  3979. }
  3980. #else /* ! TCP_ASYNC */
  3981. if (TICKS_LT(t, c->timeout)){
  3982. /* timeout extended, exit */
  3983. return (ticks_t)(c->timeout - t);
  3984. }
  3985. /* idle timeout */
  3986. TCP_EV_IDLE_CONN_CLOSED(0, &c->rcv);
  3987. TCP_STATS_CON_TIMEOUT();
  3988. #endif /* TCP_ASYNC */
  3989. LM_DBG("timeout for %p\n", c);
  3990. if (likely(c->flags & F_CONN_HASHED)){
  3991. c->flags&=~(F_CONN_HASHED|F_CONN_MAIN_TIMER);
  3992. c->state=S_CONN_BAD;
  3993. TCPCONN_LOCK;
  3994. _tcpconn_detach(c);
  3995. TCPCONN_UNLOCK;
  3996. }else{
  3997. c->flags&=~F_CONN_MAIN_TIMER;
  3998. LM_CRIT("timer: called with unhashed connection %p\n", c);
  3999. tcpconn_ref(c); /* ugly hack to try to go on */
  4000. }
  4001. fd=c->s;
  4002. if (likely(fd>0)){
  4003. if (likely(c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
  4004. io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
  4005. c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
  4006. }
  4007. }
  4008. tcpconn_put_destroy(c);
  4009. return 0;
  4010. }
  4011. static inline void tcp_timer_run(void)
  4012. {
  4013. ticks_t ticks;
  4014. ticks=get_ticks_raw();
  4015. if (unlikely((ticks-tcp_main_prev_ticks)<TCPCONN_TIMEOUT_MIN_RUN)) return;
  4016. tcp_main_prev_ticks=ticks;
  4017. local_timer_run(&tcp_main_ltimer, ticks);
  4018. }
  4019. /* keep in sync with tcpconn_destroy, the "delete" part should be
  4020. * the same except for io_watch_del..
  4021. * Note: this function is called only on shutdown by the main ser process via
  4022. * cleanup(). However it's also safe to call it from the tcp_main process.
  4023. * => with the ser shutdown exception, it cannot execute in parallel
  4024. * with tcpconn_add() or tcpconn_destroy()*/
  4025. static inline void tcpconn_destroy_all(void)
  4026. {
  4027. struct tcp_connection *c, *next;
  4028. unsigned h;
  4029. int fd;
  4030. TCPCONN_LOCK;
  4031. for(h=0; h<TCP_ID_HASH_SIZE; h++){
  4032. c=tcpconn_id_hash[h];
  4033. while(c){
  4034. next=c->id_next;
  4035. if (is_tcp_main){
  4036. /* we cannot close or remove the fd if we are not in the
  4037. * tcp main proc.*/
  4038. if ((c->flags & F_CONN_MAIN_TIMER)){
  4039. local_timer_del(&tcp_main_ltimer, &c->timer);
  4040. c->flags&=~F_CONN_MAIN_TIMER;
  4041. } /* else still in some reader */
  4042. fd=c->s;
  4043. if (fd>0 && (c->flags & (F_CONN_READ_W|F_CONN_WRITE_W))){
  4044. io_watch_del(&io_h, fd, -1, IO_FD_CLOSING);
  4045. c->flags&=~(F_CONN_READ_W|F_CONN_WRITE_W);
  4046. }
  4047. }else{
  4048. fd=-1;
  4049. }
  4050. #ifdef USE_TLS
  4051. if (fd>0 && (c->type==PROTO_TLS || c->type==PROTO_WSS))
  4052. tls_close(c, fd);
  4053. if (unlikely(c->type==PROTO_TLS || c->type==PROTO_WSS))
  4054. (*tls_connections_no)--;
  4055. #endif
  4056. (*tcp_connections_no)--;
  4057. c->flags &= ~F_CONN_HASHED;
  4058. _tcpconn_rm(c);
  4059. if (fd>0) {
  4060. #ifdef TCP_FD_CACHE
  4061. if (likely(cfg_get(tcp, tcp_cfg, fd_cache)))
  4062. shutdown(fd, SHUT_RDWR);
  4063. #endif /* TCP_FD_CACHE */
  4064. tcp_safe_close(fd);
  4065. }
  4066. c=next;
  4067. }
  4068. }
  4069. TCPCONN_UNLOCK;
  4070. }
  4071. /* tcp main loop */
  4072. void tcp_main_loop()
  4073. {
  4074. struct socket_info* si;
  4075. int r;
  4076. is_tcp_main=1; /* mark this process as tcp main */
  4077. tcp_main_max_fd_no=get_max_open_fds();
  4078. /* init send fd queues (here because we want mem. alloc only in the tcp
  4079. * process */
  4080. #ifdef SEND_FD_QUEUE
  4081. if (init_send_fd_queues()<0){
  4082. LM_CRIT("could not init send fd queues\n");
  4083. goto error;
  4084. }
  4085. #endif
  4086. /* init io_wait (here because we want the memory allocated only in
  4087. * the tcp_main process) */
  4088. if (init_io_wait(&io_h, tcp_main_max_fd_no, tcp_poll_method)<0)
  4089. goto error;
  4090. /* init: start watching all the fds*/
  4091. /* init local timer */
  4092. tcp_main_prev_ticks=get_ticks_raw();
  4093. if (init_local_timer(&tcp_main_ltimer, get_ticks_raw())!=0){
  4094. LM_ERR("failed to init local timer\n");
  4095. goto error;
  4096. }
  4097. #ifdef TCP_FD_CACHE
  4098. if (cfg_get(tcp, tcp_cfg, fd_cache)) tcp_fd_cache_init();
  4099. #endif /* TCP_FD_CACHE */
  4100. /* add all the sockets we listen on for connections */
  4101. for (si=tcp_listen; si; si=si->next){
  4102. if ((si->proto==PROTO_TCP) &&(si->socket!=-1)){
  4103. if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
  4104. LM_CRIT("failed to add listen socket to the fd list\n");
  4105. goto error;
  4106. }
  4107. }else{
  4108. LM_CRIT("non tcp address in tcp_listen\n");
  4109. }
  4110. }
  4111. #ifdef USE_TLS
  4112. if (!tls_disable && tls_loaded()){
  4113. for (si=tls_listen; si; si=si->next){
  4114. if ((si->proto==PROTO_TLS) && (si->socket!=-1)){
  4115. if (io_watch_add(&io_h, si->socket, POLLIN, F_SOCKINFO, si)<0){
  4116. LM_CRIT("failed to add tls listen socket to the fd list\n");
  4117. goto error;
  4118. }
  4119. }else{
  4120. LM_CRIT("non tls address in tls_listen\n");
  4121. }
  4122. }
  4123. }
  4124. #endif
  4125. /* add all the unix sockets used for communcation with other ser processes
  4126. * (get fd, new connection a.s.o) */
  4127. for (r=1; r<process_no; r++){
  4128. if (pt[r].unix_sock>0) /* we can't have 0, we never close it!*/
  4129. if (io_watch_add(&io_h, pt[r].unix_sock, POLLIN,F_PROC, &pt[r])<0){
  4130. LM_CRIT("failed to add process %d unix socket to the fd list\n", r);
  4131. goto error;
  4132. }
  4133. }
  4134. /* add all the unix sokets used for communication with the tcp childs */
  4135. for (r=0; r<tcp_children_no; r++){
  4136. if (tcp_children[r].unix_sock>0)/*we can't have 0, we never close it!*/
  4137. if (io_watch_add(&io_h, tcp_children[r].unix_sock, POLLIN,
  4138. F_TCPCHILD, &tcp_children[r]) <0){
  4139. LM_CRIT("failed to add tcp child %d unix socket to the fd list\n", r);
  4140. goto error;
  4141. }
  4142. }
  4143. /* initialize the cfg framework */
  4144. if (cfg_child_init()) goto error;
  4145. /* main loop */
  4146. switch(io_h.poll_method){
  4147. case POLL_POLL:
  4148. while(1){
  4149. /* wait and process IO */
  4150. io_wait_loop_poll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  4151. send_fd_queue_run(&send2child_q); /* then new io */
  4152. /* remove old connections */
  4153. tcp_timer_run();
  4154. }
  4155. break;
  4156. #ifdef HAVE_SELECT
  4157. case POLL_SELECT:
  4158. while(1){
  4159. io_wait_loop_select(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  4160. send_fd_queue_run(&send2child_q); /* then new io */
  4161. tcp_timer_run();
  4162. }
  4163. break;
  4164. #endif
  4165. #ifdef HAVE_SIGIO_RT
  4166. case POLL_SIGIO_RT:
  4167. while(1){
  4168. io_wait_loop_sigio_rt(&io_h, TCP_MAIN_SELECT_TIMEOUT);
  4169. send_fd_queue_run(&send2child_q); /* then new io */
  4170. tcp_timer_run();
  4171. }
  4172. break;
  4173. #endif
  4174. #ifdef HAVE_EPOLL
  4175. case POLL_EPOLL_LT:
  4176. while(1){
  4177. io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  4178. send_fd_queue_run(&send2child_q); /* then new io */
  4179. tcp_timer_run();
  4180. }
  4181. break;
  4182. case POLL_EPOLL_ET:
  4183. while(1){
  4184. io_wait_loop_epoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 1);
  4185. send_fd_queue_run(&send2child_q); /* then new io */
  4186. tcp_timer_run();
  4187. }
  4188. break;
  4189. #endif
  4190. #ifdef HAVE_KQUEUE
  4191. case POLL_KQUEUE:
  4192. while(1){
  4193. io_wait_loop_kqueue(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  4194. send_fd_queue_run(&send2child_q); /* then new io */
  4195. tcp_timer_run();
  4196. }
  4197. break;
  4198. #endif
  4199. #ifdef HAVE_DEVPOLL
  4200. case POLL_DEVPOLL:
  4201. while(1){
  4202. io_wait_loop_devpoll(&io_h, TCP_MAIN_SELECT_TIMEOUT, 0);
  4203. send_fd_queue_run(&send2child_q); /* then new io */
  4204. tcp_timer_run();
  4205. }
  4206. break;
  4207. #endif
  4208. default:
  4209. LM_CRIT("no support for poll method %s (%d)\n",
  4210. poll_method_name(io_h.poll_method), io_h.poll_method);
  4211. goto error;
  4212. }
  4213. error:
  4214. #ifdef SEND_FD_QUEUE
  4215. destroy_send_fd_queues();
  4216. #endif
  4217. destroy_io_wait(&io_h);
  4218. LM_CRIT("exiting...");
  4219. exit(-1);
  4220. }
  4221. /* cleanup before exit */
  4222. void destroy_tcp()
  4223. {
  4224. if (tcpconn_id_hash){
  4225. if (tcpconn_lock)
  4226. TCPCONN_UNLOCK; /* hack: force-unlock the tcp lock in case
  4227. some process was terminated while holding
  4228. it; this will allow an almost gracious
  4229. shutdown */
  4230. tcpconn_destroy_all();
  4231. shm_free(tcpconn_id_hash);
  4232. tcpconn_id_hash=0;
  4233. }
  4234. DESTROY_TCP_STATS();
  4235. if (tcp_connections_no){
  4236. shm_free(tcp_connections_no);
  4237. tcp_connections_no=0;
  4238. }
  4239. if (tls_connections_no){
  4240. shm_free(tls_connections_no);
  4241. tls_connections_no=0;
  4242. }
  4243. #ifdef TCP_ASYNC
  4244. if (tcp_total_wq){
  4245. shm_free(tcp_total_wq);
  4246. tcp_total_wq=0;
  4247. }
  4248. #endif /* TCP_ASYNC */
  4249. if (connection_id){
  4250. shm_free(connection_id);
  4251. connection_id=0;
  4252. }
  4253. if (tcpconn_aliases_hash){
  4254. shm_free(tcpconn_aliases_hash);
  4255. tcpconn_aliases_hash=0;
  4256. }
  4257. if (tcpconn_lock){
  4258. lock_destroy(tcpconn_lock);
  4259. lock_dealloc((void*)tcpconn_lock);
  4260. tcpconn_lock=0;
  4261. }
  4262. if (tcp_children){
  4263. pkg_free(tcp_children);
  4264. tcp_children=0;
  4265. }
  4266. destroy_local_timer(&tcp_main_ltimer);
  4267. }
  4268. int init_tcp()
  4269. {
  4270. char* poll_err;
  4271. tcp_options_check();
  4272. if (tcp_cfg==0){
  4273. BUG("tcp_cfg not initialized\n");
  4274. goto error;
  4275. }
  4276. /* init lock */
  4277. tcpconn_lock=lock_alloc();
  4278. if (tcpconn_lock==0){
  4279. LM_CRIT("could not alloc lock\n");
  4280. goto error;
  4281. }
  4282. if (lock_init(tcpconn_lock)==0){
  4283. LM_CRIT("could not init lock\n");
  4284. lock_dealloc((void*)tcpconn_lock);
  4285. tcpconn_lock=0;
  4286. goto error;
  4287. }
  4288. /* init globals */
  4289. tcp_connections_no=shm_malloc(sizeof(int));
  4290. if (tcp_connections_no==0){
  4291. LM_CRIT("could not alloc globals\n");
  4292. goto error;
  4293. }
  4294. *tcp_connections_no=0;
  4295. tls_connections_no=shm_malloc(sizeof(int));
  4296. if (tls_connections_no==0){
  4297. LM_CRIT("could not alloc globals\n");
  4298. goto error;
  4299. }
  4300. *tls_connections_no=0;
  4301. if (INIT_TCP_STATS()!=0) goto error;
  4302. connection_id=shm_malloc(sizeof(int));
  4303. if (connection_id==0){
  4304. LM_CRIT("could not alloc globals\n");
  4305. goto error;
  4306. }
  4307. *connection_id=1;
  4308. #ifdef TCP_ASYNC
  4309. tcp_total_wq=shm_malloc(sizeof(*tcp_total_wq));
  4310. if (tcp_total_wq==0){
  4311. LM_CRIT("could not alloc globals\n");
  4312. goto error;
  4313. }
  4314. #endif /* TCP_ASYNC */
  4315. /* alloc hashtables*/
  4316. tcpconn_aliases_hash=(struct tcp_conn_alias**)
  4317. shm_malloc(TCP_ALIAS_HASH_SIZE* sizeof(struct tcp_conn_alias*));
  4318. if (tcpconn_aliases_hash==0){
  4319. LM_CRIT("could not alloc address hashtable\n");
  4320. goto error;
  4321. }
  4322. tcpconn_id_hash=(struct tcp_connection**)shm_malloc(TCP_ID_HASH_SIZE*
  4323. sizeof(struct tcp_connection*));
  4324. if (tcpconn_id_hash==0){
  4325. LM_CRIT("could not alloc id hashtable\n");
  4326. goto error;
  4327. }
  4328. /* init hashtables*/
  4329. memset((void*)tcpconn_aliases_hash, 0,
  4330. TCP_ALIAS_HASH_SIZE * sizeof(struct tcp_conn_alias*));
  4331. memset((void*)tcpconn_id_hash, 0,
  4332. TCP_ID_HASH_SIZE * sizeof(struct tcp_connection*));
  4333. /* fix config variables */
  4334. poll_err=check_poll_method(tcp_poll_method);
  4335. /* set an appropriate poll method */
  4336. if (poll_err || (tcp_poll_method==0)){
  4337. tcp_poll_method=choose_poll_method();
  4338. if (poll_err){
  4339. LM_ERR("%s, using %s instead\n",
  4340. poll_err, poll_method_name(tcp_poll_method));
  4341. }else{
  4342. LM_INFO("using %s as the io watch method (auto detected)\n",
  4343. poll_method_name(tcp_poll_method));
  4344. }
  4345. }else{
  4346. LM_INFO("using %s io watch method (config)\n",
  4347. poll_method_name(tcp_poll_method));
  4348. }
  4349. return 0;
  4350. error:
  4351. /* clean-up */
  4352. destroy_tcp();
  4353. return -1;
  4354. }
  4355. #ifdef TCP_CHILD_NON_BLOCKING
  4356. /* returns -1 on error */
  4357. static int set_non_blocking(int s)
  4358. {
  4359. int flags;
  4360. /* non-blocking */
  4361. flags=fcntl(s, F_GETFL);
  4362. if (flags==-1){
  4363. LM_ERR("fnctl failed: (%d) %s\n", errno, strerror(errno));
  4364. goto error;
  4365. }
  4366. if (fcntl(s, F_SETFL, flags|O_NONBLOCK)==-1){
  4367. LM_ERR("fcntl: set non-blocking failed: (%d) %s\n", errno, strerror(errno));
  4368. goto error;
  4369. }
  4370. return 0;
  4371. error:
  4372. return -1;
  4373. }
  4374. #endif
  4375. /* returns -1 on error, 0 on success */
  4376. int tcp_fix_child_sockets(int* fd)
  4377. {
  4378. #ifdef TCP_CHILD_NON_BLOCKING
  4379. if ((set_non_blocking(fd[0])<0) ||
  4380. (set_non_blocking(fd[1])<0)){
  4381. return -1;
  4382. }
  4383. #endif
  4384. return 0;
  4385. }
  4386. /* starts the tcp processes */
  4387. int tcp_init_children()
  4388. {
  4389. int r, i;
  4390. int reader_fd_1; /* for comm. with the tcp children read */
  4391. pid_t pid;
  4392. char si_desc[MAX_PT_DESC];
  4393. struct socket_info *si;
  4394. /* estimate max fd. no:
  4395. * 1 tcp send unix socket/all_proc,
  4396. * + 1 udp sock/udp proc + 1 tcp_child sock/tcp child*
  4397. * + no_listen_tcp */
  4398. for(r=0, si=tcp_listen; si; si=si->next, r++);
  4399. #ifdef USE_TLS
  4400. if (! tls_disable)
  4401. for (si=tls_listen; si; si=si->next, r++);
  4402. #endif
  4403. register_fds(r+tcp_max_connections+get_max_procs()-1 /* tcp main */);
  4404. #if 0
  4405. tcp_max_fd_no=get_max_procs()*2 +r-1 /* timer */ +3; /* stdin/out/err*/
  4406. /* max connections can be temporarily exceeded with estimated_process_count
  4407. * - tcp_main (tcpconn_connect called simultaneously in all all the
  4408. * processes) */
  4409. tcp_max_fd_no+=tcp_max_connections+get_max_procs()-1 /* tcp main */;
  4410. #endif
  4411. /* alloc the children array */
  4412. tcp_children=pkg_malloc(sizeof(struct tcp_child)*tcp_children_no);
  4413. if (tcp_children==0){
  4414. LM_ERR("out of memory\n");
  4415. goto error;
  4416. }
  4417. memset(tcp_children, 0, sizeof(struct tcp_child)*tcp_children_no);
  4418. /* assign own socket for tcp workers, if it is the case
  4419. * - add them from end to start of tcp children array
  4420. * - thus, have generic tcp workers at beginning */
  4421. i = tcp_children_no-1;
  4422. for(si=tcp_listen; si; si=si->next) {
  4423. if(si->workers>0) {
  4424. si->workers_tcpidx = i - si->workers + 1;
  4425. for(r=0; r<si->workers; r++) {
  4426. tcp_children[i].mysocket = si;
  4427. i--;
  4428. }
  4429. }
  4430. }
  4431. #ifdef USE_TLS
  4432. for(si=tls_listen; si; si=si->next) {
  4433. if(si->workers>0) {
  4434. si->workers_tcpidx = i - si->workers + 1;
  4435. for(r=0; r<si->workers; r++) {
  4436. tcp_children[i].mysocket = si;
  4437. i--;
  4438. }
  4439. }
  4440. }
  4441. #endif
  4442. tcp_sockets_gworkers = (i != tcp_children_no-1)?(1 + i + 1):0;
  4443. /* create the tcp sock_info structures */
  4444. /* copy the sockets --moved to main_loop*/
  4445. /* fork children & create the socket pairs*/
  4446. for(r=0; r<tcp_children_no; r++){
  4447. child_rank++;
  4448. snprintf(si_desc, MAX_PT_DESC, "tcp receiver (%s)",
  4449. (tcp_children[r].mysocket!=NULL)?
  4450. tcp_children[r].mysocket->sock_str.s:"generic");
  4451. pid=fork_tcp_process(child_rank, si_desc, r, &reader_fd_1);
  4452. if (pid<0){
  4453. LM_ERR("fork failed: %s\n", strerror(errno));
  4454. goto error;
  4455. }else if (pid>0){
  4456. /* parent */
  4457. }else{
  4458. /* child */
  4459. bind_address=0; /* force a SEGFAULT if someone uses a non-init.
  4460. bind address on tcp */
  4461. tcp_receive_loop(reader_fd_1);
  4462. }
  4463. }
  4464. return 0;
  4465. error:
  4466. return -1;
  4467. }
  4468. void tcp_get_info(struct tcp_gen_info *ti)
  4469. {
  4470. ti->tcp_readers=tcp_children_no;
  4471. ti->tcp_max_connections=tcp_max_connections;
  4472. ti->tls_max_connections=tls_max_connections;
  4473. ti->tcp_connections_no=*tcp_connections_no;
  4474. ti->tls_connections_no=*tls_connections_no;
  4475. #ifdef TCP_ASYNC
  4476. ti->tcp_write_queued=*tcp_total_wq;
  4477. #else
  4478. ti->tcp_write_queued=0;
  4479. #endif /* TCP_ASYNC */
  4480. }
  4481. #endif