| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530 |
- <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>Sphinx 2.2.7-dev reference manual</title><meta name="generator" content="DocBook XSL Stylesheets V1.76.1">
- <style type="text/css">pre.programlisting { background-color: #f0f0f0; padding: 0.5em; margin-left: 2em; margin-right: 2em; }</style>
- </head>
- <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div lang="en" class="book" title="Sphinx 2.2.7-dev reference manual"><div class="titlepage"><div><div><h1 class="title"><a name="idp20034560"></a>Sphinx 2.2.7-dev reference manual</h1></div>
- <div><h2 class="subtitle">Free open-source SQL full-text search engine</h2></div>
- <div><p class="copyright">Copyright © 2001-2014 Andrew Aksyonoff</p></div>
- <div><p class="copyright">Copyright © 2008-2014 Sphinx Technologies Inc, <a class="ulink" href="http://sphinxsearch.com" target="_top">http://sphinxsearch.com</a></p></div></div>
- <hr></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="chapter"><a href="#intro">1. Introduction</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#about">1.1. About</a></span></dt>
- <dt><span class="sect1"><a href="#features">1.2. Sphinx features</a></span></dt>
- <dt><span class="sect1"><a href="#getting">1.3. Where to get Sphinx</a></span></dt>
- <dt><span class="sect1"><a href="#license">1.4. License</a></span></dt>
- <dt><span class="sect1"><a href="#credits">1.5. Credits</a></span></dt>
- <dt><span class="sect1"><a href="#history">1.6. History</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#installation">2. Installation</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#supported-system">2.1. Supported systems</a></span></dt>
- <dt><span class="sect1"><a href="#compiling-from-source">2.2. Compiling Sphinx from source</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#required-tools">2.2.1. Required tools</a></span></dt>
- <dt><span class="sect2"><a href="#compiling-source-linux">2.2.2. Compiling on Linux</a></span></dt>
- <dt><span class="sect2"><a href="#compiling-source-problems">2.2.3. Known compilation issues</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#installing-debian">2.3. Installing Sphinx packages on Debian and Ubuntu</a></span></dt>
- <dt><span class="sect1"><a href="#installing-redhat">2.4. Installing Sphinx packages on RedHat and CentOS</a></span></dt>
- <dt><span class="sect1"><a href="#installing-windows">2.5. Installing Sphinx on Windows</a></span></dt>
- <dt><span class="sect1"><a href="#sphinx-deprecations-defaults">2.6. Sphinx deprecations and changes in default configuration</a></span></dt>
- <dt><span class="sect1"><a href="#quick-tour">2.7. Quick Sphinx usage tour</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#indexing">3. Indexing</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#sources">3.1. Data sources</a></span></dt>
- <dt><span class="sect1"><a href="#fields">3.2. Full-text fields</a></span></dt>
- <dt><span class="sect1"><a href="#attributes">3.3. Attributes</a></span></dt>
- <dt><span class="sect1"><a href="#mva">3.4. MVA (multi-valued attributes)</a></span></dt>
- <dt><span class="sect1"><a href="#indexes">3.5. Indexes</a></span></dt>
- <dt><span class="sect1"><a href="#data-restrictions">3.6. Restrictions on the source data</a></span></dt>
- <dt><span class="sect1"><a href="#charsets">3.7. Charsets, case folding, translation tables, and replacement rules</a></span></dt>
- <dt><span class="sect1"><a href="#sql">3.8. SQL data sources (MySQL, PostgreSQL)</a></span></dt>
- <dt><span class="sect1"><a href="#xmlpipe2">3.9. xmlpipe2 data source</a></span></dt>
- <dt><span class="sect1"><a href="#tsvpipe">3.10. tsvpipe (Tab Separated Values) data source</a></span></dt>
- <dt><span class="sect1"><a href="#live-updates">3.11. Live index updates</a></span></dt>
- <dt><span class="sect1"><a href="#delta-updates">3.12. Delta index updates</a></span></dt>
- <dt><span class="sect1"><a href="#index-merging">3.13. Index merging</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#rt-indexes">4. Real-time indexes</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#rt-overview">4.1. RT indexes overview</a></span></dt>
- <dt><span class="sect1"><a href="#rt-caveats">4.2. Known caveats with RT indexes</a></span></dt>
- <dt><span class="sect1"><a href="#rt-internals">4.3. RT index internals</a></span></dt>
- <dt><span class="sect1"><a href="#rt-binlog">4.4. Binary logging</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#searching">5. Searching</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#matching-modes">5.1. Matching modes</a></span></dt>
- <dt><span class="sect1"><a href="#boolean-syntax">5.2. Boolean query syntax</a></span></dt>
- <dt><span class="sect1"><a href="#extended-syntax">5.3. Extended query syntax</a></span></dt>
- <dt><span class="sect1"><a href="#weighting">5.4. Search results ranking</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#ranking-overview">5.4.1. Ranking overview</a></span></dt>
- <dt><span class="sect2"><a href="#builtin-rankers">5.4.2. Available built-in rankers</a></span></dt>
- <dt><span class="sect2"><a href="#expression-ranker">5.4.3. Expression based ranker (SPH_RANK_EXPR)</a></span></dt>
- <dt><span class="sect2"><a href="#ranking-factors">5.4.4. Quick summary of the ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#document-factors">5.4.5. Document-level ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#field-factors">5.4.6. Field-level ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#factor-aggr-functions">5.4.7. Ranking factor aggregation functions</a></span></dt>
- <dt><span class="sect2"><a href="#formulas-for-builtin-rankers">5.4.8. Formula expressions for all the built-in rankers</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#expressions">5.5. Expressions, functions, and operators</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#operators">5.5.1. Operators</a></span></dt>
- <dt><span class="sect2"><a href="#numeric-functions">5.5.2. Numeric functions</a></span></dt>
- <dt><span class="sect2"><a href="#date-time-functions">5.5.3. Date and time functions</a></span></dt>
- <dt><span class="sect2"><a href="#type-conversion-functions">5.5.4. Type conversion functions</a></span></dt>
- <dt><span class="sect2"><a href="#comparison-functions">5.5.5. Comparison functions</a></span></dt>
- <dt><span class="sect2"><a href="#misc-functions">5.5.6. Miscellaneous functions</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sorting-modes">5.6. Sorting modes</a></span></dt>
- <dt><span class="sect1"><a href="#clustering">5.7. Grouping (clustering) search results </a></span></dt>
- <dt><span class="sect1"><a href="#distributed">5.8. Distributed searching</a></span></dt>
- <dt><span class="sect1"><a href="#query-log-format">5.9. <code class="filename">searchd</code> query log formats</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#plain-log-format">5.9.1. Plain log format</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxql-log-format">5.9.2. SphinxQL log format</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sphinxql">5.10. MySQL protocol support and SphinxQL</a></span></dt>
- <dt><span class="sect1"><a href="#multi-queries">5.11. Multi-queries</a></span></dt>
- <dt><span class="sect1"><a href="#collations">5.12. Collations</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#extending-sphinx">6. Extending Sphinx</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#sphinx-udfs">6.1. Sphinx UDFs (User Defined Functions)</a></span></dt>
- <dt><span class="sect1"><a href="#sphinx-plugins">6.2. Sphinx plugins</a></span></dt>
- <dt><span class="sect1"><a href="#ranker-plugins">6.3. Ranker plugins</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#command-line-tools">7. Command line tools reference</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#ref-indexer">7.1. <code class="filename">indexer</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-searchd">7.2. <code class="filename">searchd</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-spelldump">7.3. <code class="filename">spelldump</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-indextool">7.4. <code class="filename">indextool</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-wordbreaker">7.5. <code class="filename">wordbreaker</code> command reference</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#sphinxql-reference">8. SphinxQL reference</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#sphinxql-select">8.1. SELECT syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-select-sysvar">8.2. SELECT @@system_variable syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-meta">8.3. SHOW META syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-warnings">8.4. SHOW WARNINGS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-status">8.5. SHOW STATUS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-insert">8.6. INSERT and REPLACE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-replace">8.7. REPLACE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-delete">8.8. DELETE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-set">8.9. SET syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-set-transaction">8.10. SET TRANSACTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-commit">8.11. BEGIN, COMMIT, and ROLLBACK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-begin">8.12. BEGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-rollback">8.13. ROLLBACK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-call-snippets">8.14. CALL SNIPPETS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-call-keywords">8.15. CALL KEYWORDS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-tables">8.16. SHOW TABLES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-describe">8.17. DESCRIBE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-create-function">8.18. CREATE FUNCTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-drop-function">8.19. DROP FUNCTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-variables">8.20. SHOW VARIABLES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-collation">8.21. SHOW COLLATION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-character-set">8.22. SHOW CHARACTER SET syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-update">8.23. UPDATE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-attach">8.24. ALTER syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-attach-index">8.25. ATTACH INDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-flush-rtindex">8.26. FLUSH RTINDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-flush-ramchunk">8.27. FLUSH RAMCHUNK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-truncate-rtindex">8.28. TRUNCATE RTINDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-agent-status">8.29. SHOW AGENT STATUS</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-profile">8.30. SHOW PROFILE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-index-status">8.31. SHOW INDEX STATUS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-index-settings">8.32. SHOW INDEX SETTINGS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-optimize-index">8.33. OPTIMIZE INDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-plan">8.34. SHOW PLAN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-databases">8.35. SHOW DATABASES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-create-plugin">8.36. CREATE PLUGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-drop-plugin">8.37. DROP PLUGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-plugins">8.38. SHOW PLUGINS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-threads">8.39. SHOW THREADS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-multi-queries">8.40. Multi-statement queries</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-comment-syntax">8.41. Comment syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-reserved-keywords">8.42. List of SphinxQL reserved keywords</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-upgrading-magics">8.43. SphinxQL upgrade notes, version 2.0.1-beta</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#api-reference">9. API reference</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#api-funcgroup-general">9.1. General API functions</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-getlasterror">9.1.1. GetLastError</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-getlastwarning">9.1.2. GetLastWarning</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setserver">9.1.3. SetServer</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setretries">9.1.4. SetRetries</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setconnecttimeout">9.1.5. SetConnectTimeout</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setarrayresult">9.1.6. SetArrayResult</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-isconnecterror">9.1.7. IsConnectError</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-general-query-settings">9.2. General query settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setlimits">9.2.1. SetLimits</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setmaxquerytime">9.2.2. SetMaxQueryTime</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setoverride">9.2.3. SetOverride</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setselect">9.2.4. SetSelect</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-fulltext-query-settings">9.3. Full-text search query settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setmatchmode">9.3.1. SetMatchMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setrankingmode">9.3.2. SetRankingMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setsortmode">9.3.3. SetSortMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setweights">9.3.4. SetWeights</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfieldweights">9.3.5. SetFieldWeights</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setindexweights">9.3.6. SetIndexWeights</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-filtering">9.4. Result set filtering settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setidrange">9.4.1. SetIDRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilter">9.4.2. SetFilter</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterrange">9.4.3. SetFilterRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterfloatrange">9.4.4. SetFilterFloatRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setgeoanchor">9.4.5. SetGeoAnchor</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterstring">9.4.6. SetFilterString</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-groupby">9.5. GROUP BY settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setgroupby">9.5.1. SetGroupBy</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setgroupdistinct">9.5.2. SetGroupDistinct</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-querying">9.6. Querying</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-query">9.6.1. Query</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-addquery">9.6.2. AddQuery</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-runqueries">9.6.3. RunQueries</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-resetfilters">9.6.4. ResetFilters</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-resetgroupby">9.6.5. ResetGroupBy</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-additional-functionality">9.7. Additional functionality</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-buildexcerpts">9.7.1. BuildExcerpts</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-updateatttributes">9.7.2. UpdateAttributes</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-buildkeywords">9.7.3. BuildKeywords</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-escapestring">9.7.4. EscapeString</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-status">9.7.5. Status</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-flushattributes">9.7.6. FlushAttributes</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-pconn">9.8. Persistent connections</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-open">9.8.1. Open</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-close">9.8.2. Close</a></span></dt>
- </dl></dd></dl></dd><dt><span class="chapter"><a href="#sphinxse">10. MySQL storage engine (SphinxSE)</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#sphinxse-overview">10.1. SphinxSE overview</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxse-installing">10.2. Installing SphinxSE</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#sphinxse-mysql50">10.2.1. Compiling MySQL 5.0.x with SphinxSE</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxse-mysql51">10.2.2. Compiling MySQL 5.1.x with SphinxSE</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxse-checking">10.2.3. Checking SphinxSE installation</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sphinxse-using">10.3. Using SphinxSE</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxse-snippets">10.4. Building snippets (excerpts) via MySQL</a></span></dt>
- </dl></dd><dt><span class="chapter"><a href="#reporting-bugs">11. Reporting bugs</a></span></dt>
- <dt><span class="chapter"><a href="#conf-reference">12. <code class="filename">sphinx.conf</code> options reference</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#confgroup-source">12.1. Data source configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-source-type">12.1.1. type</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-host">12.1.2. sql_host</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-port">12.1.3. sql_port</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-user">12.1.4. sql_user</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-pass">12.1.5. sql_pass</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-db">12.1.6. sql_db</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-sock">12.1.7. sql_sock</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-connect-flags">12.1.8. mysql_connect_flags</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-ssl">12.1.9. mysql_ssl_cert, mysql_ssl_key, mysql_ssl_ca</a></span></dt>
- <dt><span class="sect2"><a href="#conf-odbc-dsn">12.1.10. odbc_dsn</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-pre">12.1.11. sql_query_pre</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query">12.1.12. sql_query</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-joined-field">12.1.13. sql_joined_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-range">12.1.14. sql_query_range</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-range-step">12.1.15. sql_range_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-killlist">12.1.16. sql_query_killlist</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-uint">12.1.17. sql_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-bool">12.1.18. sql_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-bigint">12.1.19. sql_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-timestamp">12.1.20. sql_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-float">12.1.21. sql_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-multi">12.1.22. sql_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-string">12.1.23. sql_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-json">12.1.24. sql_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-column-buffers">12.1.25. sql_column_buffers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-field-string">12.1.26. sql_field_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-file-field">12.1.27. sql_file_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-post">12.1.28. sql_query_post</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-post-index">12.1.29. sql_query_post_index</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-ranged-throttle">12.1.30. sql_ranged_throttle</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-command">12.1.31. xmlpipe_command</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-field">12.1.32. xmlpipe_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-field-string">12.1.33. xmlpipe_field_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-uint">12.1.34. xmlpipe_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-bigint">12.1.35. xmlpipe_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-bool">12.1.36. xmlpipe_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-timestamp">12.1.37. xmlpipe_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-float">12.1.38. xmlpipe_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-multi">12.1.39. xmlpipe_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-multi-64">12.1.40. xmlpipe_attr_multi_64</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-string">12.1.41. xmlpipe_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-json">12.1.42. xmlpipe_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-fixup-utf8">12.1.43. xmlpipe_fixup_utf8</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mssql-winauth">12.1.44. mssql_winauth</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-zlib">12.1.45. unpack_zlib</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-mysqlcompress">12.1.46. unpack_mysqlcompress</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-mysqlcompress-maxsize">12.1.47. unpack_mysqlcompress_maxsize</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-index">12.2. Index configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-index-type">12.2.1. type</a></span></dt>
- <dt><span class="sect2"><a href="#conf-source">12.2.2. source</a></span></dt>
- <dt><span class="sect2"><a href="#conf-path">12.2.3. path</a></span></dt>
- <dt><span class="sect2"><a href="#conf-docinfo">12.2.4. docinfo</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mlock">12.2.5. mlock</a></span></dt>
- <dt><span class="sect2"><a href="#conf-morphology">12.2.6. morphology</a></span></dt>
- <dt><span class="sect2"><a href="#conf-dict">12.2.7. dict</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-sp">12.2.8. index_sp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-zones">12.2.9. index_zones</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-stemming-len">12.2.10. min_stemming_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopwords">12.2.11. stopwords</a></span></dt>
- <dt><span class="sect2"><a href="#conf-wordforms">12.2.12. wordforms</a></span></dt>
- <dt><span class="sect2"><a href="#conf-embedded-limit">12.2.13. embedded_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-exceptions">12.2.14. exceptions</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-word-len">12.2.15. min_word_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-charset-table">12.2.16. charset_table</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ignore-chars">12.2.17. ignore_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-prefix-len">12.2.18. min_prefix_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-infix-len">12.2.19. min_infix_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-substring-len">12.2.20. max_substring_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-prefix-fields">12.2.21. prefix_fields</a></span></dt>
- <dt><span class="sect2"><a href="#conf-infix-fields">12.2.22. infix_fields</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ngram-len">12.2.23. ngram_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ngram-chars">12.2.24. ngram_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-phrase-boundary">12.2.25. phrase_boundary</a></span></dt>
- <dt><span class="sect2"><a href="#conf-phrase-boundary-step">12.2.26. phrase_boundary_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-strip">12.2.27. html_strip</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-index-attrs">12.2.28. html_index_attrs</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-remove-elements">12.2.29. html_remove_elements</a></span></dt>
- <dt><span class="sect2"><a href="#conf-local">12.2.30. local</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent">12.2.31. agent</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-persistent">12.2.32. agent_persistent</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-blackhole">12.2.33. agent_blackhole</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-connect-timeout">12.2.34. agent_connect_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-query-timeout">12.2.35. agent_query_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-preopen">12.2.36. preopen</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-enable">12.2.37. inplace_enable</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-hit-gap">12.2.38. inplace_hit_gap</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-docinfo-gap">12.2.39. inplace_docinfo_gap</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-reloc-factor">12.2.40. inplace_reloc_factor</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-write-factor">12.2.41. inplace_write_factor</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-exact-words">12.2.42. index_exact_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-overshort-step">12.2.43. overshort_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopword-step">12.2.44. stopword_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-hitless-words">12.2.45. hitless_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-expand-keywords">12.2.46. expand_keywords</a></span></dt>
- <dt><span class="sect2"><a href="#conf-blend-chars">12.2.47. blend_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-blend-mode">12.2.48. blend_mode</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-mem-limit">12.2.49. rt_mem_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-field">12.2.50. rt_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-uint">12.2.51. rt_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-bool">12.2.52. rt_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-bigint">12.2.53. rt_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-float">12.2.54. rt_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-multi">12.2.55. rt_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-multi-64">12.2.56. rt_attr_multi_64</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-timestamp">12.2.57. rt_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-string">12.2.58. rt_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-json">12.2.59. rt_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-strategy">12.2.60. ha_strategy</a></span></dt>
- <dt><span class="sect2"><a href="#conf-bigram-freq-words">12.2.61. bigram_freq_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-bigram-index">12.2.62. bigram_index</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-field-lengths">12.2.63. index_field_lengths</a></span></dt>
- <dt><span class="sect2"><a href="#conf-regexp-filter">12.2.64. regexp_filter</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopwords-unstemmed">12.2.65. stopwords_unstemmed</a></span></dt>
- <dt><span class="sect2"><a href="#conf-global-idf">12.2.66. global_idf</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-context">12.2.67. rlp_context</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ondisk-attrs">12.2.68. ondisk_attrs</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-indexer">12.3. <code class="filename">indexer</code> program configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-mem-limit">12.3.1. mem_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-iops">12.3.2. max_iops</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-iosize">12.3.3. max_iosize</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-xmlpipe2-field">12.3.4. max_xmlpipe2_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-write-buffer">12.3.5. write_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-file-field-buffer">12.3.6. max_file_field_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-on-file-field-error">12.3.7. on_file_field_error</a></span></dt>
- <dt><span class="sect2"><a href="#conf-lemmatizer-cache">12.3.8. lemmatizer_cache</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-searchd">12.4. <code class="filename">searchd</code> program configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-listen">12.4.1. listen</a></span></dt>
- <dt><span class="sect2"><a href="#conf-log">12.4.2. log</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log">12.4.3. query_log</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log-format">12.4.4. query_log_format</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-timeout">12.4.5. read_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-client-timeout">12.4.6. client_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-children">12.4.7. max_children</a></span></dt>
- <dt><span class="sect2"><a href="#conf-pid-file">12.4.8. pid_file</a></span></dt>
- <dt><span class="sect2"><a href="#conf-seamless-rotate">12.4.9. seamless_rotate</a></span></dt>
- <dt><span class="sect2"><a href="#conf-preopen-indexes">12.4.10. preopen_indexes</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unlink-old">12.4.11. unlink_old</a></span></dt>
- <dt><span class="sect2"><a href="#conf-attr-flush-period">12.4.12. attr_flush_period</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-packet-size">12.4.13. max_packet_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mva-updates-pool">12.4.14. mva_updates_pool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-filters">12.4.15. max_filters</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-filter-values">12.4.16. max_filter_values</a></span></dt>
- <dt><span class="sect2"><a href="#conf-listen-backlog">12.4.17. listen_backlog</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-buffer">12.4.18. read_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-unhinted">12.4.19. read_unhinted</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-batch-queries">12.4.20. max_batch_queries</a></span></dt>
- <dt><span class="sect2"><a href="#conf-subtree-docs-cache">12.4.21. subtree_docs_cache</a></span></dt>
- <dt><span class="sect2"><a href="#conf-subtree-hits-cache">12.4.22. subtree_hits_cache</a></span></dt>
- <dt><span class="sect2"><a href="#conf-workers">12.4.23. workers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-dist-threads">12.4.24. dist_threads</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-path">12.4.25. binlog_path</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-flush">12.4.26. binlog_flush</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-max-log-size">12.4.27. binlog_max_log_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-snippets-file-prefix">12.4.28. snippets_file_prefix</a></span></dt>
- <dt><span class="sect2"><a href="#conf-collation-server">12.4.29. collation_server</a></span></dt>
- <dt><span class="sect2"><a href="#conf-collation-libc-locale">12.4.30. collation_libc_locale</a></span></dt>
- <dt><span class="sect2"><a href="#conf-plugin-dir">12.4.31. plugin_dir</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-version-string">12.4.32. mysql_version_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-flush-period">12.4.33. rt_flush_period</a></span></dt>
- <dt><span class="sect2"><a href="#conf-thread-stack">12.4.34. thread_stack</a></span></dt>
- <dt><span class="sect2"><a href="#conf-expansion-limit">12.4.35. expansion_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-watchdog">12.4.36. watchdog</a></span></dt>
- <dt><span class="sect2"><a href="#conf-prefork-rotation-throttle">12.4.37. prefork_rotation_throttle</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sphinxql-state">12.4.38. sphinxql_state</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-ping-interval">12.4.39. ha_ping_interval</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-period-karma">12.4.40. ha_period_karma</a></span></dt>
- <dt><span class="sect2"><a href="#conf-persistent-connections-limit">12.4.41. persistent_connections_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-merge-iops">12.4.42. rt_merge_iops</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-merge-maxiosize">12.4.43. rt_merge_maxiosize</a></span></dt>
- <dt><span class="sect2"><a href="#conf-predicted-time-costs">12.4.44. predicted_time_costs</a></span></dt>
- <dt><span class="sect2"><a href="#conf-shutdown-timeout">12.4.45. shutdown_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ondisk-attrs-default">12.4.46. ondisk_attrs_default</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log-min-msec">12.4.47. query_log_min_msec</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-connect-timeout-default">12.4.48. agent_connect_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-query-timeout-default">12.4.49. agent_query_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-retry-count">12.4.50. agent_retry_count</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-retry-delay">12.4.51. agent_retry_delay</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-common">12.5. Common section configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-lemmatizer-base">12.5.1. lemmatizer_base</a></span></dt>
- <dt><span class="sect2"><a href="#conf-on-json-attr-error">12.5.2. on_json_attr_error</a></span></dt>
- <dt><span class="sect2"><a href="#conf-json-autoconv-numbers">12.5.3. json_autoconv_numbers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-json-autoconv-keynames">12.5.4. json_autoconv_keynames</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-root">12.5.5. rlp_root</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-environment">12.5.6. rlp_environment</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-max-batch-size">12.5.7. rlp_max_batch_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-max-batch-docs">12.5.8. rlp_max_batch_docs</a></span></dt>
- </dl></dd></dl></dd><dt><span class="appendix"><a href="#changelog">A. Sphinx revision history</a></span></dt>
- <dd><dl><dt><span class="sect1"><a href="#rel226">A.1. Version 2.2.6-release, 13 nov 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel225">A.2. Version 2.2.5-release, 06 oct 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel224">A.3. Version 2.2.4-release, 11 sep 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel223">A.4. Version 2.2.3-beta, 13 may 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel222">A.5. Version 2.2.2-beta, 11 feb 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel221">A.6. Version 2.2.1-beta, 13 nov 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel219">A.7. Version 2.1.9-release, 03 jul 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel218">A.8. Version 2.1.8-release, 28 apr 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel217">A.9. Version 2.1.7-release, 30 mar 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel216">A.10. Version 2.1.6-release, 24 feb 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel215">A.11. Version 2.1.5-release, 22 jan 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel214">A.12. Version 2.1.4-release, 18 dec 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel213">A.13. Version 2.1.3-release, 12 nov 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel212">A.14. Version 2.1.2-release, 10 oct 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel211">A.15. Version 2.1.1-beta, 20 feb 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel2011">A.16. Version 2.0.11-dev, xx xxx xxxx</a></span></dt>
- <dt><span class="sect1"><a href="#rel2010">A.17. Version 2.0.10-release, 22 jan 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel209">A.18. Version 2.0.9-release, 26 aug 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel208">A.19. Version 2.0.8-release, 26 apr 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel207">A.20. Version 2.0.7-release, 26 mar 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel206">A.21. Version 2.0.6-release, 22 oct 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel205">A.22. Version 2.0.5-release, 28 jul 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel204">A.23. Version 2.0.4-release, 02 mar 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel203">A.24. Version 2.0.3-release, 23 dec 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel202">A.25. Version 2.0.2-beta, 15 nov 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel201">A.26. Version 2.0.1-beta, 22 apr 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel110">A.27. Version 1.10-beta, 19 jul 2010</a></span></dt>
- <dt><span class="sect1"><a href="#rel099">A.28. Version 0.9.9-release, 02 dec 2009</a></span></dt>
- <dt><span class="sect1"><a href="#rel099rc2">A.29. Version 0.9.9-rc2, 08 apr 2009</a></span></dt>
- <dt><span class="sect1"><a href="#rel099rc1">A.30. Version 0.9.9-rc1, 17 nov 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel0981">A.31. Version 0.9.8.1, 30 oct 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel098">A.32. Version 0.9.8, 14 jul 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel097">A.33. Version 0.9.7, 02 apr 2007</a></span></dt>
- <dt><span class="sect1"><a href="#rel097rc2">A.34. Version 0.9.7-rc2, 15 dec 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel097rc">A.35. Version 0.9.7-rc1, 26 oct 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel096">A.36. Version 0.9.6, 24 jul 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel096rc1">A.37. Version 0.9.6-rc1, 26 jun 2006</a></span></dt>
- </dl></dd></dl></div>
- <div class="list-of-tables"><p><b>List of Tables</b></p><dl><dt>5.1. <a href="#ranking-factors-table"></a></dt>
- </dl></div>
- <div class="list-of-examples"><p><b>List of Examples</b></p><dl><dt>3.1. <a href="#ex-ranged-queries">Ranged query usage example</a></dt>
- <dt>3.2. <a href="#ex-xmlpipe2-document">xmlpipe2 document stream</a></dt>
- <dt>3.3. <a href="#ex-live-updates">Fully automated live updates</a></dt>
- <dt>4.1. <a href="#ex-rt-updates">RT index declaration</a></dt>
- <dt>5.1. <a href="#ex-boolean-query">Boolean query example</a></dt>
- <dt>5.2. <a href="#ex-extended-query">Extended matching mode: query example</a></dt>
- </dl></div>
- <div class="chapter" title="Chapter 1. Introduction"><div class="titlepage"><div><div><h2 class="title"><a name="intro"></a>Chapter 1. Introduction</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#about">1.1. About</a></span></dt>
- <dt><span class="sect1"><a href="#features">1.2. Sphinx features</a></span></dt>
- <dt><span class="sect1"><a href="#getting">1.3. Where to get Sphinx</a></span></dt>
- <dt><span class="sect1"><a href="#license">1.4. License</a></span></dt>
- <dt><span class="sect1"><a href="#credits">1.5. Credits</a></span></dt>
- <dt><span class="sect1"><a href="#history">1.6. History</a></span></dt>
- </dl></div>
- <div class="sect1" title="1.1. About"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="about"></a>1.1. About</h2></div></div></div>
- <p>
- Sphinx is a full-text search engine, publicly distributed under GPL version 2.
- Commercial licensing (eg. for embedded use) is available upon request.
- </p><p>
- Technically, Sphinx is a standalone software package provides
- fast and relevant full-text search functionality to client applications.
- It was specially designed to integrate well with SQL databases storing
- the data, and to be easily accessed by scripting languages. However, Sphinx
- does not depend on nor require any specific database to function.
- </p><p>
- Applications can access Sphinx search daemon (searchd) using any of
- the three different access methods: a) via Sphinx own implementation of MySQL
- network protocol (using a small SQL subset called SphinxQL, this is recommended
- way), b) via native search API (SphinxAPI) or c) via MySQL server with a
- pluggable storage engine (SphinxSE).
- </p><p>
- Official native SphinxAPI implementations for PHP, Perl, Python, Ruby and Java
- are included within the distribution package. API is very lightweight
- so porting it to a new language is known to take a few hours or days.
- Third party API ports and plugins exist for Perl, C#, Haskell,
- Ruby-on-Rails, and possibly other languages and frameworks.
- </p><p>
- Starting from version 1.10-beta, Sphinx supports two different indexing
- backends: "disk" index backend, and "realtime" (RT) index backend.
- Disk indexes support online full-text index rebuilds, but online updates
- can only be done on non-text (attribute) data. RT indexes additionally
- allow for online full-text index updates. Previous versions only
- supported disk indexes.
- </p><p>
- Data can be loaded into disk indexes using a so-called data source.
- Built-in sources can fetch data directly from MySQL, PostgreSQL, MSSQL, ODBC
- compliant database (Oracle, etc) or a pipe in TSV or a custom XML format.
- Adding new data sources drivers (eg. to natively support other DBMSes)
- is designed to be as easy as possible. RT indexes, as of 1.10-beta,
- can only be populated using SphinxQL.
- </p><p>
- As for the name, Sphinx is an acronym which is officially decoded
- as SQL Phrase Index. Yes, I know about CMU's Sphinx project.
- </p></div>
- <div class="sect1" title="1.2. Sphinx features"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="features"></a>1.2. Sphinx features</h2></div></div></div>
- <p>
- Key Sphinx features are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>high indexing and searching performance;</p></li>
- <li class="listitem"><p>advanced indexing and querying tools (flexible and feature-rich text tokenizer, querying language, several different ranking modes, etc);</p></li>
- <li class="listitem"><p>advanced result set post-processing (SELECT with expressions, WHERE, ORDER BY, GROUP BY, HAVING etc over text search results);</p></li>
- <li class="listitem"><p>proven scalability up to billions of documents, terabytes of data, and thousands of queries per second;</p></li>
- <li class="listitem"><p>easy integration with SQL and XML data sources, and SphinxQL, SphinxAPI, or SphinxSE search interfaces;</p></li>
- <li class="listitem"><p>easy scaling with distributed searches.</p></li>
- </ul></div>
- <p>
- To expand a bit, Sphinx:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>has high indexing speed (upto 10-15 MB/sec per core on an internal benchmark);</p></li>
- <li class="listitem"><p>has high search speed (upto 150-250 queries/sec per core against 1,000,000 documents, 1.2 GB of data on an internal benchmark);</p></li>
- <li class="listitem"><p>has high scalability (biggest known cluster indexes over 3,000,000,000 documents, and busiest one peaks over 50,000,000 queries/day);</p></li>
- <li class="listitem"><p>provides good relevance ranking through combination of phrase proximity ranking and statistical (BM25) ranking;</p></li>
- <li class="listitem"><p>provides distributed searching capabilities;</p></li>
- <li class="listitem"><p>provides document excerpts (snippets) generation;</p></li>
- <li class="listitem"><p>provides searching from within application with SphinxQL or SphinxAPI interfaces, and from within MySQL with pluggable SphinxSE storage engine;</p></li>
- <li class="listitem"><p>supports boolean, phrase, word proximity and other types of queries;</p></li>
- <li class="listitem"><p>supports multiple full-text fields per document (upto 32 by default);</p></li>
- <li class="listitem"><p>supports multiple additional attributes per document (ie. groups, timestamps, etc);</p></li>
- <li class="listitem"><p>supports stopwords;</p></li>
- <li class="listitem"><p>supports morphological word forms dictionaries;</p></li>
- <li class="listitem"><p>supports tokenizing exceptions;</p></li>
- <li class="listitem"><p>supports UTF-8 encoding;</p></li>
- <li class="listitem"><p>supports stemming (stemmers for English, Russian, Czech and Arabic are built-in; and stemmers for
- French, Spanish, Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Finnish, Hungarian,
- are available by building third party <a class="ulink" href="http://snowball.tartarus.org/" target="_top">libstemmer library</a>);</p></li>
- <li class="listitem"><p>supports MySQL natively (all types of tables, including MyISAM, InnoDB, NDB, Archive, etc are supported);</p></li>
- <li class="listitem"><p>supports PostgreSQL natively;</p></li>
- <li class="listitem"><p>supports ODBC compliant databases (MS SQL, Oracle, etc) natively;</p></li>
- <li class="listitem"><p>...has 50+ other features not listed here, refer configuration manual!</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="1.3. Where to get Sphinx"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="getting"></a>1.3. Where to get Sphinx</h2></div></div></div>
- <p>Sphinx is available through its official Web site at <a class="ulink" href="http://sphinxsearch.com/" target="_top">http://sphinxsearch.com/</a>.
- </p><p>Currently, Sphinx distribution tarball includes the following software:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="filename">indexer</code>: an utility which creates fulltext indexes;</p></li>
- <li class="listitem"><p><code class="filename">searchd</code>: a daemon which enables external software (eg. Web applications) to search through fulltext indexes;</p></li>
- <li class="listitem"><p><code class="filename">sphinxapi</code>: a set of searchd client API libraries for popular Web scripting languages (PHP, Python, Perl, Ruby).</p></li>
- <li class="listitem"><p><code class="filename">spelldump</code>: a simple command-line tool to extract the items from an <code class="filename">ispell</code> or <code class="filename">MySpell</code>
- (as bundled with OpenOffice) format dictionary to help customize your index, for use with <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a>.</p></li>
- <li class="listitem"><p><code class="filename">indextool</code>: an utility to dump miscellaneous debug information about the index, added in version 0.9.9-rc2.</p></li>
- <li class="listitem"><p><code class="filename">wordbreaker</code>: an utility to break down compound words into separate words, added in version 2.1.1.</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="1.4. License"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="license"></a>1.4. License</h2></div></div></div>
- <p>
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License,
- or (at your option) any later version. See COPYING file for details.
- </p><p>
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
- </p><p>
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation, Inc.,
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- </p><p>
- Non-GPL licensing (for OEM/ISV embedded use) can also be arranged, please
- <a class="ulink" href="http://sphinxsearch.com/contacts.html" target="_top">contact us</a> to discuss
- commercial licensing possibilities.
- </p></div>
- <div class="sect1" title="1.5. Credits"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="credits"></a>1.5. Credits</h2></div></div></div>
- <h3><a name="idp25805760"></a>Author</h3><p>
- Sphinx initial author (and a benevolent dictator ever since):
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Andrew Aksyonoff, <a class="ulink" href="http://shodan.ru" target="_top">http://shodan.ru</a></p></li>
- </ul></div>
- <p>
- </p><h3><a name="idp31004880"></a>Team</h3><p>
- Past and present employees of Sphinx Technologies Inc who should be
- noted on their work on Sphinx (in alphabetical order):
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Adam Rice</p></li>
- <li class="listitem"><p>Adrian Nuta</p></li>
- <li class="listitem"><p>Alexander Klimenko</p></li>
- <li class="listitem"><p>Alexey Dvoichenkov</p></li>
- <li class="listitem"><p>Alexey Vinogradov</p></li>
- <li class="listitem"><p>Anton Tsitlionok</p></li>
- <li class="listitem"><p>Eugene Kosov</p></li>
- <li class="listitem"><p>Gloria Vinogradova</p></li>
- <li class="listitem"><p>Ilya Kuznetsov</p></li>
- <li class="listitem"><p>Kirill Shmatov</p></li>
- <li class="listitem"><p>Rich Kelm</p></li>
- <li class="listitem"><p>Stanislav Klinov</p></li>
- <li class="listitem"><p>Steven Barker</p></li>
- <li class="listitem"><p>Vladimir Fedorkov</p></li>
- <li class="listitem"><p>Yuri Schapov</p></li>
- </ul></div>
- <p>
- </p><h3><a name="idp31013856"></a>Contributors</h3><p>People who contributed to Sphinx and their contributions (in no particular order):
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Robert "coredev" Bengtsson (Sweden), initial version of PostgreSQL data source</p></li>
- <li class="listitem"><p>Len Kranendonk, Perl API</p></li>
- <li class="listitem"><p>Dmytro Shteflyuk, Ruby API</p></li>
- </ul></div>
- <p>
- </p><p>
- Many other people have contributed ideas, bug reports, fixes, etc.
- Thank you!
- </p></div>
- <div class="sect1" title="1.6. History"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="history"></a>1.6. History</h2></div></div></div>
- <p>
- Sphinx development was started back in 2001, because I didn't manage
- to find an acceptable search solution (for a database driven Web site)
- which would meet my requirements. Actually, each and every important aspect was a problem:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>search quality (ie. good relevance)
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>statistical ranking methods performed rather bad, especially on large collections of small documents (forums, blogs, etc)</p></li>
- </ul></div>
- <p>
- </p></li>
- <li class="listitem"><p>search speed
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>especially if searching for phrases which contain stopwords, as in "to be or not to be"</p></li>
- </ul></div>
- <p>
- </p></li>
- <li class="listitem"><p>moderate disk and CPU requirements when indexing
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>important in shared hosting environment, not to mention the indexing speed.</p></li>
- </ul></div>
- <p>
- </p></li>
- </ul></div>
- <p>
- </p><p>
- Despite the amount of time passed and numerous improvements made in the
- other solutions, there's still no solution which I personally would
- be eager to migrate to.
- </p><p>
- Considering that and a lot of positive feedback received from Sphinx users
- during last years, the obvious decision is to continue developing Sphinx
- (and, eventually, to take over the world).
- </p></div></div>
- <div class="chapter" title="Chapter 2. Installation"><div class="titlepage"><div><div><h2 class="title"><a name="installation"></a>Chapter 2. Installation</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#supported-system">2.1. Supported systems</a></span></dt>
- <dt><span class="sect1"><a href="#compiling-from-source">2.2. Compiling Sphinx from source</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#required-tools">2.2.1. Required tools</a></span></dt>
- <dt><span class="sect2"><a href="#compiling-source-linux">2.2.2. Compiling on Linux</a></span></dt>
- <dt><span class="sect2"><a href="#compiling-source-problems">2.2.3. Known compilation issues</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#installing-debian">2.3. Installing Sphinx packages on Debian and Ubuntu</a></span></dt>
- <dt><span class="sect1"><a href="#installing-redhat">2.4. Installing Sphinx packages on RedHat and CentOS</a></span></dt>
- <dt><span class="sect1"><a href="#installing-windows">2.5. Installing Sphinx on Windows</a></span></dt>
- <dt><span class="sect1"><a href="#sphinx-deprecations-defaults">2.6. Sphinx deprecations and changes in default configuration</a></span></dt>
- <dt><span class="sect1"><a href="#quick-tour">2.7. Quick Sphinx usage tour</a></span></dt>
- </dl></div>
- <div class="sect1" title="2.1. Supported systems"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="supported-system"></a>2.1. Supported systems</h2></div></div></div>
- <p>
- Sphinx can be compiled either from source or installed using prebuilt
- packages. Most modern UNIX systems with a C++ compiler should be able
- to compile and run Sphinx without any modifications.
- </p><p>
- Currently known systems Sphinx has been successfully running on are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Linux 2.4.x, 2.6.x, 3.x (many various distributions)</p></li>
- <li class="listitem"><p>Windows 2000, XP, 7, 8</p></li>
- <li class="listitem"><p>FreeBSD 4.x, 5.x, 6.x, 7.x, 8.x</p></li>
- <li class="listitem"><p>NetBSD 1.6, 3.0</p></li>
- <li class="listitem"><p>Solaris 9, 11</p></li>
- <li class="listitem"><p>Mac OS X</p></li>
- </ul></div>
- <p>
- </p><p>
- CPU architectures known to work include i386 (aka x86), amd64 (aka x86_64),
- SPARC64, and ARM.
- </p><p>
- Chances are good that Sphinx should work on other Unix platforms and/or
- CPU architectures just as well. Please report any other platforms that
- worked for you!
- </p><p>
- All platforms are production quality. There are no principal functional
- limitations on any platform.
- </p></div>
- <div class="sect1" title="2.2. Compiling Sphinx from source"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="compiling-from-source"></a>2.2. Compiling Sphinx from source</h2></div></div></div>
- <div class="sect2" title="2.2.1. Required tools"><div class="titlepage"><div><div><h3 class="title"><a name="required-tools"></a>2.2.1. Required tools</h3></div></div></div>
- <p>
- On UNIX, you will need the following tools to build
- and install Sphinx:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>a working C++ compiler. GNU gcc and clang are known to work.</p></li>
- <li class="listitem"><p>a good make program. GNU make is known to work.</p></li>
- </ul></div>
- <p>
- </p><p>
- On Windows, you will need Microsoft Visual C/C++ Studio .NET 2005 or above.
- Other compilers/environments will probably work as well, but for the
- time being, you will have to build makefile (or other environment
- specific project files) manually.
- </p></div>
- <div class="sect2" title="2.2.2. Compiling on Linux"><div class="titlepage"><div><div><h3 class="title"><a name="compiling-source-linux"></a>2.2.2. Compiling on Linux</h3></div></div></div>
- <div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>
- Extract everything from the distribution tarball (haven't you already?)
- and go to the <code class="filename">sphinx</code> subdirectory. (We are using
- version 2.2.1-beta here for the sake of example only; be sure to change this
- to a specific version you're using.)
- </p><div class="literallayout"><p><strong class="userinput"><code>$ tar xzvf sphinx-2.2.1-beta.tar.gz<br>
- $ cd sphinx<br>
- </code></strong></p></div></li>
- <li class="listitem"><p>Run the configuration program:</p><div class="literallayout"><p><strong class="userinput"><code>$ ./configure</code></strong></p></div>
- <p>
- There's a number of options to configure. The complete listing may
- be obtained by using <code class="option">--help</code> switch. The most important ones are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--prefix</code>, which specifies where to install Sphinx; such as <code class="option">--prefix=/usr/local/sphinx</code> (all of the examples use this prefix)</p></li>
- <li class="listitem"><p><code class="option">--with-mysql</code>, which specifies where to look for MySQL include and library files, if auto-detection fails;</p></li>
- <li class="listitem"><p><code class="option">--with-static-mysql</code>, which builds Sphinx with statically linked MySQL support;</p></li>
- <li class="listitem"><p><code class="option">--with-pgsql</code>, which specifies where to look for PostgreSQL include and library files.</p></li>
- <li class="listitem"><p><code class="option">--with-static-pgsql</code>, which builds Sphinx with statically linked PostgreSQL support;</p></li>
- </ul></div>
- <p>
- </p></li>
- <li class="listitem"><p>Build the binaries:</p><div class="literallayout"><p><strong class="userinput"><code>$ make</code></strong></p></div></li>
- <li class="listitem"><p>Install the binaries in the directory of your choice:
- (defaults to <code class="filename">/usr/local/bin/</code> on *nix systems,
- but is overridden with <code class="option">configure --prefix</code>)</p><div class="literallayout"><p><strong class="userinput"><code>$ make install</code></strong></p></div></li>
- </ol></div></div>
- <div class="sect2" title="2.2.3. Known compilation issues"><div class="titlepage"><div><div><h3 class="title"><a name="compiling-source-problems"></a>2.2.3. Known compilation issues</h3></div></div></div>
- <p>
- If <code class="filename">configure</code> fails to locate MySQL headers and/or libraries,
- try checking for and installing <code class="filename">mysql-devel</code> package. On some systems,
- it is not installed by default.
- </p><p>
- If <code class="filename">make</code> fails with a message which look like
- </p><pre class="programlisting">
- /bin/sh: g++: command not found
- make[1]: *** [libsphinx_a-sphinx.o] Error 127
- </pre><p>
- try checking for and installing <code class="filename">gcc-c++</code> package.
- </p><p>
- If you are getting compile-time errors which look like
- </p><pre class="programlisting">
- sphinx.cpp:67: error: invalid application of `sizeof' to
- incomplete type `Private::SizeError<false>'
- </pre><p>
- this means that some compile-time type size check failed.
- The most probable reason is that off_t type is less than 64-bit
- on your system. As a quick hack, you can edit sphinx.h and replace off_t
- with DWORD in a typedef for SphOffset_t, but note that this will prohibit
- you from using full-text indexes larger than 2 GB. Even if the hack helps,
- please report such issues, providing the exact error message and
- compiler/OS details, so I could properly fix them in next releases.
- </p><p>
- If you keep getting any other error, or the suggestions above
- do not seem to help you, please don't hesitate to contact me.
- </p></div></div>
- <div class="sect1" title="2.3. Installing Sphinx packages on Debian and Ubuntu"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="installing-debian"></a>2.3. Installing Sphinx packages on Debian and Ubuntu</h2></div></div></div>
- <p>There are two ways of getting Sphinx for Ubuntu: regular deb packages and the Launchpad PPA repository.</p><p>Deb packages:</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Sphinx requires a few libraries to be installed on Debian/Ubuntu. Use apt-get to download and install these dependencies:</p><strong class="userinput"><code>$ sudo apt-get install mysql-client unixodbc libpq5</code></strong></li>
- <li class="listitem"><p>Now you can install Sphinx:</p><strong class="userinput"><code>$ sudo dpkg -i sphinxsearch_2.2.1-beta-0ubuntu11~precise_amd64.deb</code></strong></li>
- </ol></div>
- <p>PPA repository (Ubuntu only).</p><p>Installing Sphinx is much easier from Sphinxsearch PPA repository, because you will get all dependencies and can also update Sphinx to the latest version with the same command.</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>First, add Sphinxsearch repository and update the list of packages:</p><p><strong class="userinput"><code>$ sudo add-apt-repository ppa:builds/sphinxsearch-daily</code></strong></p><p><strong class="userinput"><code>$ sudo apt-get update</code></strong></p></li>
- <li class="listitem"><p>Install/update sphinxsearch package:</p><p><strong class="userinput"><code>$ sudo apt-get install sphinxsearch</code></strong></p></li>
- </ol></div>
- <p>Sphinx <code class="filename">searchd</code> daemon can be started/stopped using service command:</p><p><strong class="userinput"><code>$ sudo service sphinxsearch start</code></strong></p></div>
- <div class="sect1" title="2.4. Installing Sphinx packages on RedHat and CentOS"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="installing-redhat"></a>2.4. Installing Sphinx packages on RedHat and CentOS</h2></div></div></div>
- <p>Currently we distribute Sphinx RPMS and SRPMS on our website for both 5.x and 6.x
- versions of Red Hat Enterprise Linux, but they can be installed on CentOS as well.</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Before installation make sure you have these packages installed:</p><p><strong class="userinput"><code>$ yum install postgresql-libs unixODBC</code></strong></p></li>
- <li class="listitem"><p>Download RedHat RPM from Sphinx website and install it:</p><p><strong class="userinput"><code>$ rpm -Uhv sphinx-2.2.1-1.rhel6.x86_64.rpm</code></strong></p></li>
- <li class="listitem"><p>After preparing configuration file (see <a class="link" href="#quick-tour" title="2.7. Quick Sphinx usage tour">Quick tour</a>), you can start searchd daemon:</p><p><strong class="userinput"><code>$ service searchd start</code></strong></p></li>
- </ol></div></div>
- <div class="sect1" title="2.5. Installing Sphinx on Windows"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="installing-windows"></a>2.5. Installing Sphinx on Windows</h2></div></div></div>
- <p>Installing Sphinx on a Windows server is often easier than installing on a Linux environment;
- unless you are preparing code patches, you can use the pre-compiled binary files from the Downloads
- area on the website.</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Extract everything from the .zip file you have downloaded -
- <code class="filename">sphinx-2.2.1-beta-win32.zip</code>,
- or <code class="filename">sphinx-2.2.1-beta-win32-pgsql.zip</code> if you need PostgresSQL support as well.
- (We are using version 2.2.1-beta here for the sake of example only;
- be sure to change this to a specific version you're using.)
- You can use Windows Explorer in Windows XP and up to extract the files,
- or a freeware package like 7Zip to open the archive.</p><p>For the remainder of this guide, we will assume that the folders are unzipped into <code class="filename">C:\Sphinx</code>,
- such that <code class="filename">searchd.exe</code> can be found in <code class="filename">C:\Sphinx\bin\searchd.exe</code>. If you decide
- to use any different location for the folders or configuration file, please change it accordingly.</p></li>
- <li class="listitem"><p>Edit the contents of sphinx.conf.in - specifically entries relating to @CONFDIR@ - to paths suitable for your system.</p></li>
- <li class="listitem"><p>Install the <code class="filename">searchd</code> system as a Windows service:</p><p><strong class="userinput"><code>C:\Sphinx\bin> C:\Sphinx\bin\searchd --install --config C:\Sphinx\sphinx.conf.in --servicename SphinxSearch</code></strong></p></li>
- <li class="listitem"><p>The <code class="filename">searchd</code> service will now be listed in the Services panel
- within the Management Console, available from Administrative Tools. It will not have been
- started, as you will need to configure it and build your indexes with <code class="filename">indexer</code>
- before starting the service. A guide to do this can be found under
- <a class="link" href="#quick-tour" title="2.7. Quick Sphinx usage tour">Quick tour</a>.</p><p>During the next steps of the install (which involve running indexer pretty much as
- you would on Linux) you may find that you get an error relating to libmysql.dll not being found.
- If you have MySQL installed, you should find a copy of this library in your Windows directory,
- or sometimes in Windows\System32, or failing that in the MySQL core directories. If you
- do receive an error please copy libmysql.dll into the bin directory.</p></li>
- </ol></div></div>
- <div class="sect1" title="2.6. Sphinx deprecations and changes in default configuration"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinx-deprecations-defaults"></a>2.6. Sphinx deprecations and changes in default configuration</h2></div></div></div>
- <p>
- In 2.2.1-beta version we decided to start removing some old features. All
- of them was 'unofficially' deprecated for some time. And we're informing
- you now about it.
- </p><p>
- Changes are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>32-bit document IDs are now deprecated. Our binary releases
- are now all built with 64-bit IDs by default. Note that they can still
- load older indexes with 32-bit IDs, but that support will eventually be
- removed. In fact, that was deprecated awhile ago, but now we just want to
- make it clear: we don't see any sense in trying to save your server's RAM
- this way.</p></li>
- <li class="listitem"><p>dict=crc is now deprecated. It has a bunch of limitations,
- the most important ones being keyword collisions, and no (good) wildcard
- matching support. You can read more about those limitations in our
- documentation.</p></li>
- <li class="listitem"><p>charset_type=sbcs is now deprecated, we're slowly switching
- to UTF-only. Even if your database is SBCS (likely for legacy reasons
- too, eh?), this should be absolutely trivial to workaround, just add a
- pre-query to fetch your data in UTF-8 and you're all set. Also, in fact,
- our current UTF-8 tokenizer is even faster than the SBCS one.</p></li>
- <li class="listitem"><p>custom sort (@custom) is now removed from Sphinx. This
- feature was introduced long before sort by expression became a reality
- and it has been deprecated for a very long time.</p></li>
- <li class="listitem"><p>enable_star is deprecated now. Previous default mode was
- enable_star=0 which was due to compatibility with a very old Sphinx
- version. Such implicit star search isn't very intuitive. So, we've decided
- to eventually remove it and have marked it as deprecated just recently. We plan
- to totally remove this configuration key in the 2.2.X branch.</p></li>
- <li class="listitem"><p>str2ordinal attributes are deprecated. This feature allows
- you to perform sorting by a string. But it's also possible to do this with
- ordinary string attributes, which is much easier to use. str2ordinal only
- covers a small part of this functionality and is not needed now.</p></li>
- <li class="listitem"><p>str2wordcount attributes are deprecated.
- <a class="link" href="#conf-index-field-lengths" title="12.2.63. index_field_lengths">index_field_lengths=1</a>
- will create an integer attribute with field length set automatically and we
- recommend to use this configuration key when you need to store field
- lengths. Also, index_field_lengths=1 allows you to use new ranking formulas
- like BM25F().</p></li>
- <li class="listitem"><p>hit_format is deprecated. This is a hidden configuration
- key - it's not mentioned in our documentation. But, it's there and it's
- possible that someone may use it. And now we're urging you: don't use it.
- The default value is 'inline' and it's a new standard. 'plain' hit_format
- is obsolete and will be removed in the near future.</p></li>
- <li class="listitem"><p>docinfo=inline is deprecated. You can now use
- <a class="link" href="#conf-ondisk-attrs" title="12.2.68. ondisk_attrs">ondisk_attrs</a> or
- <a class="link" href="#conf-ondisk-attrs-default" title="12.4.46. ondisk_attrs_default">ondisk_attrs_default</a> instead.</p></li>
- <li class="listitem"><p>workers=threads is a new default for all OS now.
- We're gonna get rid of other modes in future.</p></li>
- <li class="listitem"><p>mem_limit=128M is a new default.</p></li>
- <li class="listitem"><p>rt_mem_limit=128M is a new default.</p></li>
- <li class="listitem"><p>ondisk_dict is deprecated. No need to save RAM this way.</p></li>
- <li class="listitem"><p>ondisk_dict_default is deprecated. No need to save RAM this way.
- </p></li>
- <li class="listitem"><p>compat_sphinxql_magics was removed. Now you can't use an old
- result format and SphinxQL always looks more like ANSI SQL.</p></li>
- <li class="listitem"><p>Completely removed xmlpipe. This was a very old ad hoc solution
- for a particular customer. xmlpipe2 surpasses it in every single aspect.</p></li>
- </ul></div>
- <p>
- </p><p>None of the different querying methods are deprecated, but as of
- version 2.2.1-beta, SphinxQL is the most advanced method. We plan to
- remove SphinxAPI and Sphinx SE someday so it would be a good idea to
- start using SphinxQL.</p><p>
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>The SetWeights() API call has been deprecated for a long
- time and has now been removed from official APIs.</p></li>
- <li class="listitem"><p>The default matching mode for the API is now 'extended'.
- Actually, all other modes are deprecated. We recommend using the
- <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">extended query syntax</a> instead.
- </p></li>
- </ul></div>
- <p>
- </p><p>
- Changes for 2.2.2-beta:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Removed deprecated "address" and "port" directives.
- Use "listen" instead.</p></li>
- <li class="listitem"><p>Removed str2wordcount attributes.
- Use <a class="link" href="#conf-index-field-lengths" title="12.2.63. index_field_lengths">index_field_lengths=1</a>
- instead.</p></li>
- <li class="listitem"><p>Removed str2ordinal attributes. Use string attributes
- for sorting.</p></li>
- <li class="listitem"><p>ondisk_dict and ondisk_dict_default was removed.
- </p></li>
- <li class="listitem"><p>Removed charset_type and mssql_unicode - we now support
- only UTF-8 encoding.</p></li>
- <li class="listitem"><p>Removed deprecated enable_star. Now always work as
- with enable_star=1.</p></li>
- <li class="listitem"><p>Removed CLI search which confused people instead of
- helping them and sql_query_info.</p></li>
- <li class="listitem"><p>Deprecated SetMatchMode() API call.</p></li>
- <li class="listitem"><p>Changed default <a class="link" href="#conf-thread-stack" title="12.4.34. thread_stack">thread_stack
- </a> value to 1M.</p></li>
- <li class="listitem"><p>Deprecated SetOverride() API call.</p></li>
- </ul></div>
- <p>
- </p><p>
- Changes for 2.2.3-beta:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Removed unneeded max_matches key from config file.</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="2.7. Quick Sphinx usage tour"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="quick-tour"></a>2.7. Quick Sphinx usage tour</h2></div></div></div>
- <p>
- All the example commands below assume that you installed Sphinx
- in <code class="filename">/usr/local/sphinx</code>, so <code class="filename">searchd</code> can
- be found in <code class="filename">/usr/local/sphinx/bin/searchd</code>.
- </p><p>
- To use Sphinx, you will need to:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Create a configuration file.</p><p>
- Default configuration file name is <code class="filename">sphinx.conf</code>.
- All Sphinx programs look for this file in current working directory
- by default.
- </p><p>
- Sample configuration file, <code class="filename">sphinx.conf.dist</code>, which has
- all the options documented, is created by <code class="filename">configure</code>.
- Copy and edit that sample file to make your own configuration: (assuming Sphinx is installed into <code class="filename">/usr/local/sphinx/</code>)
- </p><div class="literallayout"><p><strong class="userinput"><code>$ cd /usr/local/sphinx/etc<br>
- $ cp sphinx.conf.dist sphinx.conf<br>
- $ vi sphinx.conf</code></strong></p></div>
- <p>
- Sample configuration file is setup to index <code class="filename">documents</code>
- table from MySQL database <code class="filename">test</code>; so there's <code class="filename">example.sql</code>
- sample data file to populate that table with a few documents for testing purposes:
- </p><div class="literallayout"><p><strong class="userinput"><code>$ mysql -u test < /usr/local/sphinx/etc/example.sql</code></strong></p></div></li>
- <li class="listitem"><p>Run the indexer to create full-text index from your data:</p><div class="literallayout"><p><strong class="userinput"><code>$ cd /usr/local/sphinx/etc<br>
- $ /usr/local/sphinx/bin/indexer --all</code></strong></p></div></li>
- <li class="listitem"><p>Query your newly created index!</p></li>
- </ol></div>
- <p>Now query your indexes!</p><p>Connect to server:</p><div class="literallayout"><p><strong class="userinput"><code>$ mysql -h0 -P9306</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT * FROM test1 WHERE MATCH('my document');</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>INSERT INTO rt VALUES (1, 'this is', 'a sample text', 11);</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>INSERT INTO rt VALUES (2, 'some more', 'text here', 22);</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT gid/11 FROM rt WHERE MATCH('text') GROUP BY gid;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT * FROM rt ORDER BY gid DESC;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SHOW TABLES;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT *, WEIGHT() FROM test1 WHERE MATCH('"document one"/1');SHOW META;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SET profiling=1;SELECT * FROM test1 WHERE id IN (1,2,4);SHOW PROFILE;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT id, id%3 idd FROM test1 WHERE MATCH('this is | nothing') GROUP BY idd;SHOW PROFILE;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT id FROM test1 WHERE MATCH('is this a good plan?');SHOW PLAN;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT COUNT(*) c, id%3 idd FROM test1 GROUP BY idd HAVING COUNT(*)>1;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>SELECT COUNT(*) FROM test1;</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>CALL KEYWORDS ('one two three', 'test1');</code></strong></p></div>
- <div class="literallayout"><p><strong class="userinput"><code>CALL KEYWORDS ('one two three', 'test1', 1);</code></strong></p></div>
- <p>
- Happy searching!
- </p></div></div>
- <div class="chapter" title="Chapter 3. Indexing"><div class="titlepage"><div><div><h2 class="title"><a name="indexing"></a>Chapter 3. Indexing</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#sources">3.1. Data sources</a></span></dt>
- <dt><span class="sect1"><a href="#fields">3.2. Full-text fields</a></span></dt>
- <dt><span class="sect1"><a href="#attributes">3.3. Attributes</a></span></dt>
- <dt><span class="sect1"><a href="#mva">3.4. MVA (multi-valued attributes)</a></span></dt>
- <dt><span class="sect1"><a href="#indexes">3.5. Indexes</a></span></dt>
- <dt><span class="sect1"><a href="#data-restrictions">3.6. Restrictions on the source data</a></span></dt>
- <dt><span class="sect1"><a href="#charsets">3.7. Charsets, case folding, translation tables, and replacement rules</a></span></dt>
- <dt><span class="sect1"><a href="#sql">3.8. SQL data sources (MySQL, PostgreSQL)</a></span></dt>
- <dt><span class="sect1"><a href="#xmlpipe2">3.9. xmlpipe2 data source</a></span></dt>
- <dt><span class="sect1"><a href="#tsvpipe">3.10. tsvpipe (Tab Separated Values) data source</a></span></dt>
- <dt><span class="sect1"><a href="#live-updates">3.11. Live index updates</a></span></dt>
- <dt><span class="sect1"><a href="#delta-updates">3.12. Delta index updates</a></span></dt>
- <dt><span class="sect1"><a href="#index-merging">3.13. Index merging</a></span></dt>
- </dl></div>
- <div class="sect1" title="3.1. Data sources"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sources"></a>3.1. Data sources</h2></div></div></div>
- <p>
- The data to be indexed can generally come from very different
- sources: SQL databases, plain text files, HTML files, mailboxes,
- and so on. From Sphinx point of view, the data it indexes is a
- set of structured <em class="glossterm">documents</em>, each of which has the
- same set of <em class="glossterm">fields</em> and <em class="glossterm">attributes</em>.
- This is similar to SQL, where each row would correspond to a document,
- and each column to either a field or an attribute.
- </p><p>
- Depending on what source Sphinx should get the data from,
- different code is required to fetch the data and prepare it for indexing.
- This code is called <em class="glossterm">data source driver</em> (or simply
- <em class="glossterm">driver</em> or <em class="glossterm">data source</em> for brevity).
- </p><p>
- At the time of this writing, there are built-in drivers for
- MySQL, PostgreSQL, MS SQL (on Windows), and ODBC. There is also
- a generic driver called xmlpipe2, which runs a specified command
- and reads the data from its <code class="filename">stdout</code>.
- See <a class="xref" href="#xmlpipe2" title="3.9. xmlpipe2 data source">Section 3.9, “xmlpipe2 data source”</a> section for the format description.
- In 2.2.1-beta a tsvpipe (Tab Separated Values) data source was added.
- You can get more information here <a class="xref" href="#tsvpipe" title="3.10. tsvpipe (Tab Separated Values) data source">Section 3.10, “tsvpipe (Tab Separated Values) data source”</a>.
- </p><p>
- There can be as many sources per index as necessary. They will be
- sequentially processed in the very same order which was specified in
- index definition. All the documents coming from those sources
- will be merged as if they were coming from a single source.
- </p></div>
- <div class="sect1" title="3.2. Full-text fields"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="fields"></a>3.2. Full-text fields</h2></div></div></div>
- <p>
- Full-text fields (or just <em class="glossterm">fields</em> for brevity)
- are the textual document contents that get indexed by Sphinx, and can be
- (quickly) searched for keywords.
- </p><p>
- Fields are named, and you can limit your searches to a single
- field (eg. search through "title" only) or a subset of fields
- (eg. to "title" and "abstract" only). Sphinx index format generally
- supports up to 256 fields. However, up to version 2.0.1-beta indexes
- were forcibly limited by 32 fields, because of certain complications
- in the matching engine. Full support for up to 256 fields was added
- in version 2.0.2-beta.
- </p><p>
- Note that the original contents of the fields are <span class="bold"><strong>not</strong></span> stored
- in the Sphinx index. The text that you send to Sphinx gets processed,
- and a full-text index (a special data structure that enables quick
- searches for a keyword) gets built from that text. But the original
- text contents are then simply discarded. Sphinx assumes that you store
- those contents elsewhere anyway.
- </p><p>
- Moreover, it is impossible to <span class="emphasis"><em>fully</em></span> reconstruct
- the original text, because the specific whitespace, capitalization,
- punctuation, etc will all be lost during indexing. It is theoretically
- possible to partially reconstruct a given document from the Sphinx
- full-text index, but that would be a slow process (especially if
- the <a class="link" href="#conf-dict" title="12.2.7. dict">CRC dictionary</a> is used,
- which does not even store the original keywords and works with
- their hashes instead).
- </p></div>
- <div class="sect1" title="3.3. Attributes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="attributes"></a>3.3. Attributes</h2></div></div></div>
- <p>
- Attributes are additional values associated with each document
- that can be used to perform additional filtering and sorting during search.
- </p><p>
- It is often desired to additionally process full-text search results
- based not only on matching document ID and its rank, but on a number
- of other per-document values as well. For instance, one might need to
- sort news search results by date and then relevance,
- or search through products within specified price range,
- or limit blog search to posts made by selected users,
- or group results by month. To do that efficiently, Sphinx allows
- to attach a number of additional <em class="glossterm">attributes</em>
- to each document, and store their values in the full-text index.
- It's then possible to use stored values to filter, sort,
- or group full-text matches.
- </p><p>Attributes, unlike the fields, are not full-text indexed. They
- are stored in the index, but it is not possible to search them as full-text,
- and attempting to do so results in an error.</p><p>For example, it is impossible to use the extended matching mode expression
- <code class="option">@column 1</code> to match documents where column is 1, if column is an
- attribute, and this is still true even if the numeric digits are normally indexed.</p><p>Attributes can be used for filtering, though, to restrict returned
- rows, as well as sorting or <a class="link" href="#clustering" title="5.7. Grouping (clustering) search results">result grouping</a>;
- it is entirely possible to sort results purely based on attributes, and ignore the search
- relevance tools. Additionally, attributes are returned from the search daemon, while the
- indexed text is not.</p><p>
- A good example for attributes would be a forum posts table. Assume
- that only title and content fields need to be full-text searchable -
- but that sometimes it is also required to limit search to a certain
- author or a sub-forum (ie. search only those rows that have some
- specific values of author_id or forum_id columns in the SQL table);
- or to sort matches by post_date column; or to group matching posts
- by month of the post_date and calculate per-group match counts.
- </p><p>
- This can be achieved by specifying all the mentioned columns
- (excluding title and content, that are full-text fields) as
- attributes, indexing them, and then using API calls to
- setup filtering, sorting, and grouping. Here as an example.
- </p><h3><a name="idp31184032"></a>Example sphinx.conf part:</h3><pre class="programlisting">
- ...
- sql_query = SELECT id, title, content, \
- author_id, forum_id, post_date FROM my_forum_posts
- sql_attr_uint = author_id
- sql_attr_uint = forum_id
- sql_attr_timestamp = post_date
- ...
- </pre><h3><a name="idp31185248"></a>Example application code (in PHP):</h3><pre class="programlisting">
- // only search posts by author whose ID is 123
- $cl->SetFilter ( "author_id", array ( 123 ) );
- // only search posts in sub-forums 1, 3 and 7
- $cl->SetFilter ( "forum_id", array ( 1,3,7 ) );
- // sort found posts by posting date in descending order
- $cl->SetSortMode ( SPH_SORT_ATTR_DESC, "post_date" );
- </pre><p>
- Attributes are named. Attribute names are case insensitive.
- Attributes are <span class="emphasis"><em>not</em></span> full-text indexed; they are stored in the index as is.
- Currently supported attribute types are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>unsigned integers (1-bit to 32-bit wide);</p></li>
- <li class="listitem"><p>UNIX timestamps;</p></li>
- <li class="listitem"><p>floating point values (32-bit, IEEE 754 single precision);</p></li>
- <li class="listitem"><p><a class="link" href="#conf-sql-attr-string" title="12.1.23. sql_attr_string">strings</a> (since 1.10-beta);</p></li>
- <li class="listitem"><p><a class="link" href="#conf-sql-attr-json" title="12.1.24. sql_attr_json">JSON</a> (since 2.1.1-beta);</p></li>
- <li class="listitem"><p><a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">MVA</a>, multi-value attributes (variable-length lists of 32-bit unsigned integers).</p></li>
- </ul></div>
- <p>
- </p><p>
- The complete set of per-document attribute values is sometimes
- referred to as <em class="glossterm">docinfo</em>. Docinfos can either be
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>stored separately from the main full-text index data ("extern" storage, in <code class="filename">.spa</code> file), or</p></li>
- <li class="listitem"><p>attached to each occurrence of document ID in full-text index data ("inline" storage, in <code class="filename">.spd</code> file).</p></li>
- </ul></div>
- <p>
- </p><p>
- When using extern storage, a copy of <code class="filename">.spa</code> file
- (with all the attribute values for all the documents) is kept in RAM by
- <code class="filename">searchd</code> at all times. This is for performance reasons;
- random disk I/O would be too slow. On the contrary, inline storage does not
- require any additional RAM at all, but that comes at the cost of greatly
- inflating the index size: remember that it copies <span class="emphasis"><em>all</em></span>
- attribute value <span class="emphasis"><em>every</em></span> time when the document ID
- is mentioned, and that is exactly as many times as there are
- different keywords in the document. Inline may be the only viable
- option if you have only a few attributes and need to work with big
- datasets in limited RAM. However, in most cases extern storage
- makes both indexing and searching <span class="emphasis"><em>much</em></span> more efficient.
- </p><p>
- Search-time memory requirements for extern storage are
- (1+number_of_attrs)*number_of_docs*4 bytes, ie. 10 million docs with
- 2 groups and 1 timestamp will take (1+2+1)*10M*4 = 160 MB of RAM.
- This is <span class="emphasis"><em>PER DAEMON</em></span>, not per query. <code class="filename">searchd</code>
- will allocate 160 MB on startup, read the data and keep it shared between queries.
- The children will <span class="emphasis"><em>NOT</em></span> allocate any additional
- copies of this data.
- </p></div>
- <div class="sect1" title="3.4. MVA (multi-valued attributes)"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="mva"></a>3.4. MVA (multi-valued attributes)</h2></div></div></div>
- <p>
- MVAs, or multi-valued attributes, are an important special type of per-document attributes in Sphinx.
- MVAs let you attach sets of numeric values to every document.
- That is useful to implement article tags, product categories, etc.
- Filtering and group-by (but not sorting) on MVA attributes is supported.
- </p><p>
- As of version 2.0.2-beta, MVA values can either be unsigned 32-bit integers
- (UNSIGNED INTEGER) or signed 64-bit integers (BIGINT). Up to version 2.0.1-beta,
- only the unsigned 32-bit values were supported.
- </p><p>
- The set size is not limited, you can have an arbitrary number of values
- attached to each document as long as RAM permits (<code class="filename">.spm</code> file
- that contains the MVA values will be precached in RAM by <code class="filename">searchd</code>).
- The source data can be taken either from a separate query, or from a document field;
- see source type in <a class="link" href="#conf-sql-attr-multi" title="12.1.22. sql_attr_multi">sql_attr_multi</a>.
- In the first case the query will have to return pairs of document ID and MVA values,
- in the second one the field will be parsed for integer values.
- There are absolutely no requirements as to incoming data order; the values will be
- automatically grouped by document ID (and internally sorted within the same ID)
- during indexing anyway.
- </p><p>
- When filtering, a document will match the filter on MVA attribute
- if <span class="emphasis"><em>any</em></span> of the values satisfy the filtering condition.
- (Therefore, documents that pass through exclude filters will not
- contain any of the forbidden values.)
- When grouping by MVA attribute, a document will contribute to as
- many groups as there are different MVA values associated with that document.
- For instance, if the collection contains exactly 1 document having a 'tag' MVA
- with values 5, 7, and 11, grouping on 'tag' will produce 3 groups with
- 'COUNT(*)' equal to 1 and 'GROUPBY()' key values of 5, 7, and 11 respectively.
- Also note that grouping by MVA might lead to duplicate documents in the result set:
- because each document can participate in many groups, it can be chosen as the best
- one in in more than one group, leading to duplicate IDs. PHP API historically
- uses ordered hash on the document ID for the resulting rows; so you'll also need to use
- <a class="link" href="#api-func-setarrayresult" title="9.1.6. SetArrayResult">SetArrayResult()</a> in order
- to employ group-by on MVA with PHP API.
- </p></div>
- <div class="sect1" title="3.5. Indexes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="indexes"></a>3.5. Indexes</h2></div></div></div>
- <p>
- To be able to answer full-text search queries fast, Sphinx needs
- to build a special data structure optimized for such queries from
- your text data. This structure is called <em class="glossterm">index</em>; and
- the process of building index from text is called <em class="glossterm">indexing</em>.
- </p><p>
- Different index types are well suited for different tasks.
- For example, a disk-based tree-based index would be easy to
- update (ie. insert new documents to existing index), but rather
- slow to search. Sphinx architecture allows internally for different
- <em class="glossterm">index types</em>, or <em class="glossterm">backends</em>,
- to be implemented comparatively easily.
- </p><p>
- Starting with 1.10-beta, Sphinx provides 2 different backends:
- a <span class="bold"><strong>disk index</strong></span> backend, and a <span class="bold"><strong>RT (realtime) index</strong></span> backend.
- </p><p>
- <span class="bold"><strong>Disk indexes</strong></span> are designed to provide maximum indexing and searching
- speed, while keeping the RAM footprint as low as possible. That comes
- at a cost of text index updates. You can not update an existing document or
- incrementally add a new document to a disk index. You only can batch
- rebuild the entire disk index from scratch. (Note that you still can
- update document's <span class="bold"><strong>attributes</strong></span> on the fly, even with the disk
- indexes.)
- </p><p>
- This "rebuild only" limitation might look as a big constraint
- at a first glance. But in reality, it can very frequently be worked
- around rather easily by setting up multiple disk indexes, searching
- through them all, and only rebuilding the one with a fraction
- of the most recently changed data.
- See <a class="xref" href="#live-updates" title="3.11. Live index updates">Section 3.11, “Live index updates”</a> for details.
- </p><p>
- <span class="bold"><strong>RT indexes</strong></span> enable you to implement dynamic updates and
- incremental additions to the full text index. RT stands for Real Time
- and they are indeed "soft realtime" in terms of writes, meaning that
- most index changes become available for searching as quick as 1 millisecond
- or less, but could occasionally stall for seconds. (Searches will still work
- even during that occasional writing stall.) Refer to
- <a class="xref" href="#rt-indexes" title="Chapter 4. Real-time indexes">Chapter 4, <i>Real-time indexes</i></a> for details.
- </p><p>
- Last but not least, Sphinx supports so-called <span class="bold"><strong>distributed indexes</strong></span>.
- Compared to disk and RT indexes, those are not a real physical backend,
- but rather just lists of either local or remote indexes that can be
- searched transparently to the application, with Sphinx doing all the chores
- of sending search requests to remote machines in the cluster, aggregating
- the result sets, retrying the failed requests, and even doing some
- load balancing. See <a class="xref" href="#distributed" title="5.8. Distributed searching">Section 5.8, “Distributed searching”</a> for a discussion
- of distributed indexes.
- </p><p>
- There can be as many indexes per configuration file as necessary.
- <code class="filename">indexer</code> utility can reindex either all of them
- (if <code class="option">--all</code> option is specified), or a certain explicitly
- specified subset. <code class="filename">searchd</code> utility will serve all
- the specified indexes, and the clients can specify what indexes to
- search in run time.
- </p></div>
- <div class="sect1" title="3.6. Restrictions on the source data"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="data-restrictions"></a>3.6. Restrictions on the source data</h2></div></div></div>
- <p>
- There are a few different restrictions imposed on the source data
- which is going to be indexed by Sphinx, of which the single most
- important one is:
- </p><p><span class="bold"><strong>
- ALL DOCUMENT IDS MUST BE UNIQUE UNSIGNED NON-ZERO INTEGER NUMBERS (32-BIT OR 64-BIT, DEPENDING ON BUILD TIME SETTINGS).
- </strong></span></p><p>
- If this requirement is not met, different bad things can happen.
- For instance, Sphinx can crash with an internal assertion while indexing;
- or produce strange results when searching due to conflicting IDs.
- Also, a 1000-pound gorilla might eventually come out of your
- display and start throwing barrels at you. You've been warned.
- </p></div>
- <div class="sect1" title="3.7. Charsets, case folding, translation tables, and replacement rules"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="charsets"></a>3.7. Charsets, case folding, translation tables, and replacement rules</h2></div></div></div>
- <p>
- When indexing some index, Sphinx fetches documents from
- the specified sources, splits the text into words, and does
- case folding so that "Abc", "ABC" and "abc" would be treated
- as the same word (or, to be pedantic, <em class="glossterm">term</em>).
- </p><p>
- To do that properly, Sphinx needs to know
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>what encoding is the source text in (and this encoding should always be UTF-8);</p></li>
- <li class="listitem"><p>what characters are letters and what are not;</p></li>
- <li class="listitem"><p>what letters should be folded to what letters.</p></li>
- </ul></div>
- <p>
- This should be configured on a per-index basis using
- <code class="option"><a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a></code> option.
- <code class="option"><a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a></code>
- specifies the table that maps letter characters to their case
- folded versions. The characters that are not in the table are considered
- to be non-letters and will be treated as word separators when indexing
- or searching through this index.
- </p><p>
- Default tables currently include English and Russian characters.
- Please do submit your tables for other languages!
- </p><p>As of version 2.1.1-beta, you can also specify text pattern replacement rules.
- For example, given the rules</p><pre class="programlisting">
- regexp_filter = \b(\d+)\" => \1 inch
- regexp_filter = (BLUE|RED) => COLOR
- </pre><p>the text 'RED TUBE 5" LONG' would be indexed as 'COLOR TUBE 5 INCH LONG', and
- 'PLANK 2" x 4"' as 'PLANK 2 INCH x 4 INCH'. Rules are applied in the given order.
- Text in queries is also replaced; a search for "BLUE TUBE" would
- actually become a search for "COLOR TUBE". Note that Sphinx must
- be built with the --with-re2 option to use this feature.</p></div>
- <div class="sect1" title="3.8. SQL data sources (MySQL, PostgreSQL)"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sql"></a>3.8. SQL data sources (MySQL, PostgreSQL)</h2></div></div></div>
- <p>
- With all the SQL drivers, indexing generally works as follows.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>connection to the database is established;</p></li>
- <li class="listitem"><p>pre-query (see <a class="xref" href="#conf-sql-query-pre" title="12.1.11. sql_query_pre">Section 12.1.11, “sql_query_pre”</a>) is executed
- to perform any necessary initial setup, such as setting per-connection encoding with MySQL;</p></li>
- <li class="listitem"><p>main query (see <a class="xref" href="#conf-sql-query" title="12.1.12. sql_query">Section 12.1.12, “sql_query”</a>) is executed and the rows it returns are indexed;</p></li>
- <li class="listitem"><p>post-query (see <a class="xref" href="#conf-sql-query-post" title="12.1.28. sql_query_post">Section 12.1.28, “sql_query_post”</a>) is executed
- to perform any necessary cleanup;</p></li>
- <li class="listitem"><p>connection to the database is closed;</p></li>
- <li class="listitem"><p>indexer does the sorting phase (to be pedantic, index-type specific post-processing);</p></li>
- <li class="listitem"><p>connection to the database is established again;</p></li>
- <li class="listitem"><p>post-index query (see <a class="xref" href="#conf-sql-query-post-index" title="12.1.29. sql_query_post_index">Section 12.1.29, “sql_query_post_index”</a>) is executed
- to perform any necessary final cleanup;</p></li>
- <li class="listitem"><p>connection to the database is closed again.</p></li>
- </ul></div>
- <p>
- Most options, such as database user/host/password, are straightforward.
- However, there are a few subtle things, which are discussed in more detail here.
- </p><h3><a name="ranged-queries"></a>Ranged queries</h3><p>
- Main query, which needs to fetch all the documents, can impose
- a read lock on the whole table and stall the concurrent queries
- (eg. INSERTs to MyISAM table), waste a lot of memory for result set, etc.
- To avoid this, Sphinx supports so-called <em class="glossterm">ranged queries</em>.
- With ranged queries, Sphinx first fetches min and max document IDs from
- the table, and then substitutes different ID intervals into main query text
- and runs the modified query to fetch another chunk of documents.
- Here's an example.
- </p><div class="example"><a name="ex-ranged-queries"></a><p class="title"><b>Example 3.1. Ranged query usage example</b></p><div class="example-contents"><pre class="programlisting">
- # in sphinx.conf
- sql_query_range = SELECT MIN(id),MAX(id) FROM documents
- sql_range_step = 1000
- sql_query = SELECT * FROM documents WHERE id>=$start AND id<=$end
- </pre></div></div>
- <br class="example-break"><p>
- If the table contains document IDs from 1 to, say, 2345, then sql_query would
- be run three times:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>with <code class="option">$start</code> replaced with 1 and <code class="option">$end</code> replaced with 1000;</p></li>
- <li class="listitem"><p>with <code class="option">$start</code> replaced with 1001 and <code class="option">$end</code> replaced with 2000;</p></li>
- <li class="listitem"><p>with <code class="option">$start</code> replaced with 2001 and <code class="option">$end</code> replaced with 2345.</p></li>
- </ol></div>
- <p>
- Obviously, that's not much of a difference for 2000-row table,
- but when it comes to indexing 10-million-row MyISAM table,
- ranged queries might be of some help.
- </p><h3><a name="idp31253776"></a><code class="option">sql_query_post</code> vs. <code class="option">sql_query_post_index</code></h3><p>
- The difference between post-query and post-index query is in that post-query
- is run immediately when Sphinx received all the documents, but further indexing
- <span class="bold"><strong>may</strong></span> still fail for some other reason. On the contrary,
- by the time the post-index query gets executed, it is <span class="bold"><strong>guaranteed</strong></span>
- that the indexing was successful. Database connection is dropped and re-established
- because sorting phase can be very lengthy and would just timeout otherwise.
- </p></div>
- <div class="sect1" title="3.9. xmlpipe2 data source"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="xmlpipe2"></a>3.9. xmlpipe2 data source</h2></div></div></div>
- <p>
- xmlpipe2 lets you pass arbitrary full-text and attribute data to Sphinx
- in yet another custom XML format. It also allows to specify the schema
- (ie. the set of fields and attributes) either in the XML stream itself,
- or in the source settings.
- </p><p>
- When indexing xmlpipe2 source, indexer runs the given command, opens
- a pipe to its stdout, and expects well-formed XML stream. Here's sample
- stream data:
- </p><div class="example"><a name="ex-xmlpipe2-document"></a><p class="title"><b>Example 3.2. xmlpipe2 document stream</b></p><div class="example-contents"><pre class="programlisting">
- <?xml version="1.0" encoding="utf-8"?>
- <sphinx:docset>
- <sphinx:schema>
- <sphinx:field name="subject"/>
- <sphinx:field name="content"/>
- <sphinx:attr name="published" type="timestamp"/>
- <sphinx:attr name="author_id" type="int" bits="16" default="1"/>
- </sphinx:schema>
- <sphinx:document id="1234">
- <content>this is the main content <![CDATA[[and this <cdata> entry
- must be handled properly by xml parser lib]]></content>
- <published>1012325463</published>
- <subject>note how field/attr tags can be
- in <b class="red">randomized</b> order</subject>
- <misc>some undeclared element</misc>
- </sphinx:document>
- <sphinx:document id="1235">
- <subject>another subject</subject>
- <content>here comes another document, and i am given to understand,
- that in-document field order must not matter, sir</content>
- <published>1012325467</published>
- </sphinx:document>
- <!-- ... even more sphinx:document entries here ... -->
- <sphinx:killlist>
- <id>1234</id>
- <id>4567</id>
- </sphinx:killlist>
- </sphinx:docset>
- </pre></div></div>
- <p><br class="example-break">
- </p><p>
- Arbitrary fields and attributes are allowed.
- They also can occur in the stream in arbitrary order within each document; the order is ignored.
- There is a restriction on maximum field length; fields longer than 2 MB will be truncated to 2 MB (this limit can be changed in the source).
- </p><p>
- The schema, ie. complete fields and attributes list, must be declared
- before any document could be parsed. This can be done either in the
- configuration file using <code class="option">xmlpipe_field</code> and <code class="option">xmlpipe_attr_XXX</code>
- settings, or right in the stream using <sphinx:schema> element.
- <sphinx:schema> is optional. It is only allowed to occur as the very
- first sub-element in <sphinx:docset>. If there is no in-stream
- schema definition, settings from the configuration file will be used.
- Otherwise, stream settings take precedence.
- </p><p>
- Unknown tags (which were not declared neither as fields nor as attributes)
- will be ignored with a warning. In the example above, <misc> will be ignored.
- All embedded tags and their attributes (such as <b> in <subject>
- in the example above) will be silently ignored.
- </p><p>
- Support for incoming stream encodings depends on whether <code class="filename">iconv</code>
- is installed on the system. xmlpipe2 is parsed using <code class="filename">libexpat</code>
- parser that understands US-ASCII, ISO-8859-1, UTF-8 and a few UTF-16 variants
- natively. Sphinx <code class="filename">configure</code> script will also check
- for <code class="filename">libiconv</code> presence, and utilize it to handle
- other encodings. <code class="filename">libexpat</code> also enforces the
- requirement to use UTF-8 charset on Sphinx side, because the
- parsed data it returns is always in UTF-8.
- </p><p>
- XML elements (tags) recognized by xmlpipe2 (and their attributes where applicable) are:
- </p><div class="variablelist"><dl><dt><span class="term">sphinx:docset</span></dt>
- <dd><p>Mandatory top-level element, denotes and contains xmlpipe2 document set.</p></dd><dt><span class="term">sphinx:schema</span></dt>
- <dd><p>Optional element, must either occur as the very first child
- of sphinx:docset, or never occur at all. Declares the document schema.
- Contains field and attribute declarations. If present, overrides
- per-source settings from the configuration file.
- </p></dd><dt><span class="term">sphinx:field</span></dt>
- <dd><p>Optional element, child of sphinx:schema. Declares a full-text field.
- Known attributes are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>"name", specifies the XML element name that will be treated as a full-text field in the subsequent documents.</p></li>
- <li class="listitem"><p>"attr", specifies whether to also index this field as a string. Possible value is "string". Introduced in version 1.10-beta.</p></li>
- </ul></div>
- <p>
- </p></dd><dt><span class="term">sphinx:attr</span></dt>
- <dd><p>Optional element, child of sphinx:schema. Declares an attribute.
- Known attributes are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>"name", specifies the element name that should be treated as an attribute in the subsequent documents.</p></li>
- <li class="listitem"><p>"type", specifies the attribute type. Possible values are "int", "bigint", "timestamp", "bool", "float", "multi" and "json".</p></li>
- <li class="listitem"><p>"bits", specifies the bit size for "int" attribute type. Valid values are 1 to 32.</p></li>
- <li class="listitem"><p>"default", specifies the default value for this attribute that should be used if the attribute's element is not present in the document.</p></li>
- </ul></div>
- <p>
- </p></dd><dt><span class="term">sphinx:document</span></dt>
- <dd><p>Mandatory element, must be a child of sphinx:docset.
- Contains arbitrary other elements with field and attribute values
- to be indexed, as declared either using sphinx:field and sphinx:attr
- elements or in the configuration file. The only known attribute
- is "id" that must contain the unique integer document ID.
- </p></dd><dt><span class="term">sphinx:killlist</span></dt>
- <dd><p>Optional element, child of sphinx:docset.
- Contains a number of "id" elements whose contents are document IDs
- to be put into a <a class="link" href="#conf-sql-query-killlist" title="12.1.16. sql_query_killlist">kill-list</a> for this index.
- </p></dd></dl></div>
- <p>
- </p></div>
- <div class="sect1" title="3.10. tsvpipe (Tab Separated Values) data source"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="tsvpipe"></a>3.10. tsvpipe (Tab Separated Values) data source</h2></div></div></div>
- <p>
- This is the simplest way to pass data to the indexer. It was created due to
- xmlpipe2 limitations. Namely, indexer must map each attribute and field tag
- in XML file to corresponding schema element. This mapping requires some time.
- And time increases with increasing the number of fields and attributes in
- schema. There is no such issue in tsvpipe because each field and attribute
- is a particular column in TSV file. So, in some cases tsvpipe could work
- slightly faster than xmlpipe2. Added in 2.2.1-beta.
- </p><p>
- The first column in TSV file must be a document ID. The rest ones must mirror
- the declaration of fields and attributes in schema definition.
- </p><pre class="programlisting">
- source tsv_test
- {
- type = tsvpipe
- tsvpipe_command = cat /tmp/rock_bands.tsv
- tsvpipe_field = name
- tsvpipe_attr_multi = genre_tags
- }
- </pre><pre class="programlisting">
- 1 Led Zeppelin 35,23,16
- 2 Deep Purple 35,92
- 3 Frank Zappa 35,23,16,92,33,24
- </pre></div>
- <div class="sect1" title="3.11. Live index updates"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="live-updates"></a>3.11. Live index updates</h2></div></div></div>
- <p>
- There are two major approaches to maintaining the full-text index
- contents up to date. Note, however, that both these approaches deal
- with the task of <span class="emphasis"><em>full-text data updates</em></span>, and not
- attribute updates. Instant attribute updates are supported since
- version 0.9.8. Refer to <a class="link" href="#api-func-updateatttributes" title="9.7.2. UpdateAttributes">UpdateAttributes()</a>
- API call description for details.
- </p><p>
- First, you can use disk-based indexes, partition them manually,
- and only rebuild the smaller partitions (so-called "deltas") frequently.
- By minimizing the rebuild size, you can reduce the average indexing lag
- to something as low as 30-60 seconds. This approach was the the only one
- available in versions 0.9.x. On huge collections it actually might be
- the most efficient one. Refer to <a class="xref" href="#delta-updates" title="3.12. Delta index updates">Section 3.12, “Delta index updates”</a>
- for details.
- </p><p>
- Second, versions 1.x (starting with 1.10-beta) add support for so-called
- real-time indexes (RT indexes for short) that on-the-fly updates of the
- full-text data. Updates on a RT index can appear in the search results in
- 1-2 milliseconds, ie. 0.001-0.002 seconds. However, RT index are less
- efficient for bulk indexing huge amounts of data. Refer to
- <a class="xref" href="#rt-indexes" title="Chapter 4. Real-time indexes">Chapter 4, <i>Real-time indexes</i></a> for details.
- </p></div>
- <div class="sect1" title="3.12. Delta index updates"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="delta-updates"></a>3.12. Delta index updates</h2></div></div></div>
- <p>
- There's a frequent situation when the total dataset is too big
- to be reindexed from scratch often, but the amount of new records
- is rather small. Example: a forum with a 1,000,000 archived posts,
- but only 1,000 new posts per day.
- </p><p>
- In this case, "live" (almost real time) index updates could be
- implemented using so called "main+delta" scheme.
- </p><p>
- The idea is to set up two sources and two indexes, with one
- "main" index for the data which only changes rarely (if ever),
- and one "delta" for the new documents. In the example above,
- 1,000,000 archived posts would go to the main index, and newly
- inserted 1,000 posts/day would go to the delta index. Delta index
- could then be reindexed very frequently, and the documents can
- be made available to search in a matter of minutes.
- </p><p>
- Specifying which documents should go to what index and
- reindexing main index could also be made fully automatic.
- One option would be to make a counter table which would track
- the ID which would split the documents, and update it
- whenever the main index is reindexed.
- </p><div class="example"><a name="ex-live-updates"></a><p class="title"><b>Example 3.3. Fully automated live updates</b></p><div class="example-contents"><pre class="programlisting">
- # in MySQL
- CREATE TABLE sph_counter
- (
- counter_id INTEGER PRIMARY KEY NOT NULL,
- max_doc_id INTEGER NOT NULL
- );
- # in sphinx.conf
- source main
- {
- # ...
- sql_query_pre = SET NAMES utf8
- sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM documents
- sql_query = SELECT id, title, body FROM documents \
- WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
- }
- source delta : main
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT id, title, body FROM documents \
- WHERE id>( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
- }
- index main
- {
- source = main
- path = /path/to/main
- # ... all the other settings
- }
- # note how all other settings are copied from main,
- # but source and path are overridden (they MUST be)
- index delta : main
- {
- source = delta
- path = /path/to/delta
- }
- </pre></div></div>
- <p><br class="example-break">
- </p><p>
- Note how we're overriding <code class="code">sql_query_pre</code> in the delta source.
- We need to explicitly have that override. Otherwise <code class="code">REPLACE</code> query
- would be run when indexing delta source too, effectively nullifying it. However,
- when we issue the directive in the inherited source for the first time, it removes
- <span class="emphasis"><em>all</em></span> inherited values, so the encoding setup is also lost.
- So <code class="code">sql_query_pre</code> in the delta can not just be empty; and we need
- to issue the encoding setup query explicitly once again.
- </p></div>
- <div class="sect1" title="3.13. Index merging"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="index-merging"></a>3.13. Index merging</h2></div></div></div>
- <p>
- Merging two existing indexes can be more efficient that indexing the data
- from scratch, and desired in some cases (such as merging 'main' and 'delta'
- indexes instead of simply reindexing 'main' in 'main+delta' partitioning
- scheme). So <code class="filename">indexer</code> has an option to do that.
- Merging the indexes is normally faster than reindexing but still
- <span class="emphasis"><em>not</em></span> instant on huge indexes. Basically,
- it will need to read the contents of both indexes once and write
- the result once. Merging 100 GB and 1 GB index, for example,
- will result in 202 GB of IO (but that's still likely less than
- the indexing from scratch requires).
- </p><p>
- The basic command syntax is as follows:
- </p><pre class="programlisting">
- indexer --merge DSTINDEX SRCINDEX [--rotate]
- </pre><p>
- Only the DSTINDEX index will be affected: the contents of SRCINDEX will be merged into it.
- <code class="option">--rotate</code> switch will be required if DSTINDEX is already being served by <code class="filename">searchd</code>.
- The initially devised usage pattern is to merge a smaller update from SRCINDEX into DSTINDEX.
- Thus, when merging the attributes, values from SRCINDEX will win if duplicate document IDs are encountered.
- Note, however, that the "old" keywords will <span class="emphasis"><em>not</em></span> be automatically removed in such cases.
- For example, if there's a keyword "old" associated with document 123 in DSTINDEX, and a keyword "new" associated
- with it in SRCINDEX, document 123 will be found by <span class="emphasis"><em>both</em></span> keywords after the merge.
- You can supply an explicit condition to remove documents from DSTINDEX to mitigate that;
- the relevant switch is <code class="option">--merge-dst-range</code>:
- </p><pre class="programlisting">
- indexer --merge main delta --merge-dst-range deleted 0 0
- </pre><p>
- This switch lets you apply filters to the destination index along with merging.
- There can be several filters; all of their conditions must be met in order
- to include the document in the resulting merged index. In the example above,
- the filter passes only those records where 'deleted' is 0, eliminating all
- records that were flagged as deleted (for instance, using
- <a class="link" href="#api-func-updateatttributes" title="9.7.2. UpdateAttributes">UpdateAttributes()</a> call).
- </p></div></div>
- <div class="chapter" title="Chapter 4. Real-time indexes"><div class="titlepage"><div><div><h2 class="title"><a name="rt-indexes"></a>Chapter 4. Real-time indexes</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#rt-overview">4.1. RT indexes overview</a></span></dt>
- <dt><span class="sect1"><a href="#rt-caveats">4.2. Known caveats with RT indexes</a></span></dt>
- <dt><span class="sect1"><a href="#rt-internals">4.3. RT index internals</a></span></dt>
- <dt><span class="sect1"><a href="#rt-binlog">4.4. Binary logging</a></span></dt>
- </dl></div>
- <p>
- Real-time indexes (or RT indexes for brevity) are a new backend
- that lets you insert, update, or delete documents (rows) on the fly.
- RT indexes were added in version 1.10-beta. While querying of RT indexes
- is possible using any of the SphinxAPI, SphinxQL, or SphinxSE, updating
- them is only possible via SphinxQL at the moment. Full SphinxQL
- reference is available in <a class="xref" href="#sphinxql-reference" title="Chapter 8. SphinxQL reference">Chapter 8, <i>SphinxQL reference</i></a>.
- </p><div class="sect1" title="4.1. RT indexes overview"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rt-overview"></a>4.1. RT indexes overview</h2></div></div></div>
- <p>
- RT indexes should be declared in <code class="filename">sphinx.conf</code>,
- just as every other index type. Notable differences from the regular,
- disk-based indexes are that a) data sources are not required and ignored,
- and b) you should explicitly enumerate all the text fields, not just
- attributes. Here's an example:
- </p><div class="example"><a name="ex-rt-updates"></a><p class="title"><b>Example 4.1. RT index declaration</b></p><div class="example-contents"><pre class="programlisting">
- index rt
- {
- type = rt
- path = /usr/local/sphinx/data/rt
- rt_field = title
- rt_field = content
- rt_attr_uint = gid
- }
- </pre></div></div>
- <br class="example-break"><p>
- As of 2.0.1-beta and above, RT indexes are production quality,
- despite a few missing features.
- </p><p>
- RT index can be accessed using MySQL protocol. INSERT, REPLACE, DELETE, and
- SELECT statements against RT index are supported. For instance, this
- is an example session with the sample index above:
- </p><pre class="programlisting">
- $ mysql -h 127.0.0.1 -P 9306
- Welcome to the MySQL monitor. Commands end with ; or \g.
- Your MySQL connection id is 1
- Server version: 1.10-dev (r2153)
- Type 'help;' or '\h' for help. Type '\c' to clear the buffer.
- mysql> INSERT INTO rt VALUES ( 1, 'first record', 'test one', 123 );
- Query OK, 1 row affected (0.05 sec)
- mysql> INSERT INTO rt VALUES ( 2, 'second record', 'test two', 234 );
- Query OK, 1 row affected (0.00 sec)
- mysql> SELECT * FROM rt;
- +------+--------+------+
- | id | weight | gid |
- +------+--------+------+
- | 1 | 1 | 123 |
- | 2 | 1 | 234 |
- +------+--------+------+
- 2 rows in set (0.02 sec)
- mysql> SELECT * FROM rt WHERE MATCH('test');
- +------+--------+------+
- | id | weight | gid |
- +------+--------+------+
- | 1 | 1643 | 123 |
- | 2 | 1643 | 234 |
- +------+--------+------+
- 2 rows in set (0.01 sec)
- mysql> SELECT * FROM rt WHERE MATCH('@title test');
- Empty set (0.00 sec)
- </pre><p>
- Both partial and batch INSERT syntaxes are supported, ie.
- you can specify a subset of columns, and insert several rows at a time.
- Deletions are also possible using DELETE statement; the only currently
- supported syntax is DELETE FROM <index> WHERE id=<id>.
- REPLACE is also supported, enabling you to implement updates.
- </p><pre class="programlisting">
- mysql> INSERT INTO rt ( id, title ) VALUES ( 3, 'third row' ), ( 4, 'fourth entry' );
- Query OK, 2 rows affected (0.01 sec)
- mysql> SELECT * FROM rt;
- +------+--------+------+
- | id | weight | gid |
- +------+--------+------+
- | 1 | 1 | 123 |
- | 2 | 1 | 234 |
- | 3 | 1 | 0 |
- | 4 | 1 | 0 |
- +------+--------+------+
- 4 rows in set (0.00 sec)
- mysql> DELETE FROM rt WHERE id=2;
- Query OK, 0 rows affected (0.00 sec)
- mysql> SELECT * FROM rt WHERE MATCH('test');
- +------+--------+------+
- | id | weight | gid |
- +------+--------+------+
- | 1 | 1500 | 123 |
- +------+--------+------+
- 1 row in set (0.00 sec)
- mysql> INSERT INTO rt VALUES ( 1, 'first record on steroids', 'test one', 123 );
- ERROR 1064 (42000): duplicate id '1'
- mysql> REPLACE INTO rt VALUES ( 1, 'first record on steroids', 'test one', 123 );
- Query OK, 1 row affected (0.01 sec)
- mysql> SELECT * FROM rt WHERE MATCH('steroids');
- +------+--------+------+
- | id | weight | gid |
- +------+--------+------+
- | 1 | 1500 | 123 |
- +------+--------+------+
- 1 row in set (0.01 sec)
- </pre><p>
- Data stored in RT index should survive clean shutdown. When binary logging
- is enabled, it should also survive crash and/or dirty shutdown, and recover
- on subsequent startup.
- </p></div>
- <div class="sect1" title="4.2. Known caveats with RT indexes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rt-caveats"></a>4.2. Known caveats with RT indexes</h2></div></div></div>
- <p>
- RT indexes are currently quality feature, but there are still a few known
- usage quirks. Those quirks are listed in this section.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Prefix indexing is supported with dict = keywords starting 2.0.2-beta. Infix indexing is experimental in trunk.</p></li>
- <li class="listitem"><p>Disk chunks optimization routine is not implemented yet.</p></li>
- <li class="listitem"><p>On initial index creation, attributes are reordered by type,
- in the following order: uint, bigint, float, timestamp, string. So when
- using INSERT without an explicit column names list, specify all uint
- column values first, then bigint, etc.</p></li>
- <li class="listitem"><p>Default conservative RAM chunk limit (<code class="option">rt_mem_limit</code>)
- of 32M can lead to poor performance on bigger indexes, you should raise it to
- 256..1024M if you're planning to index gigabytes.</p></li>
- <li class="listitem"><p>High DELETE/REPLACE rate can lead to kill-list fragmentation
- and impact searching performance.</p></li>
- <li class="listitem"><p>No transaction size limits are currently imposed;
- too many concurrent INSERT/REPLACE transactions might therefore
- consume a lot of RAM.</p></li>
- <li class="listitem"><p>In case of a damaged binlog, recovery will stop on the
- first damaged transaction, even though it's technically possible
- to keep looking further for subsequent undamaged transactions, and
- recover those. This mid-file damage case (due to flaky HDD/CDD/tape?)
- is supposed to be extremely rare, though.</p></li>
- <li class="listitem"><p>Multiple INSERTs grouped in a single transaction perform
- better than equivalent single-row transactions and are recommended for
- batch loading of data.</p></li>
- </ul></div></div>
- <div class="sect1" title="4.3. RT index internals"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rt-internals"></a>4.3. RT index internals</h2></div></div></div>
- <p>
- RT index is internally chunked. It keeps a so-called RAM chunk
- that stores all the most recent changes. RAM chunk memory usage
- is rather strictly limited with per-index
- <a class="link" href="#conf-rt-mem-limit" title="12.2.49. rt_mem_limit">rt_mem_limit</a> directive.
- Once RAM chunk grows over this limit, a new disk chunk is created
- from its data, and RAM chunk is reset. Thus, while most changes
- on the RT index will be performed in RAM only and complete instantly
- (in milliseconds), those changes that overflow the RAM chunk will
- stall for the duration of disk chunk creation (a few seconds).
- </p><p>
- Since version 2.1.1-beta, Sphinx uses double-buffering to avoid INSERT stalls. When data is
- being dumped to disk, the second buffer is used, so further INSERTs
- won't be delayed. The second buffer is defined to be 10% the size
- of the standard buffer, <a class="link" href="#conf-rt-mem-limit" title="12.2.49. rt_mem_limit">rt_mem_limit</a>,
- but future versions of Sphinx may allow configuring this further.
- </p><p>
- Disk chunks are, in fact, just regular disk-based indexes.
- But they're a part of an RT index and automatically managed by it,
- so you need not configure nor manage them manually. Because a new
- disk chunk is created every time RT chunk overflows the limit, and
- because in-memory chunk format is close to on-disk format, the disk
- chunks will be approximately <code class="option">rt_mem_limit</code> bytes
- in size each.
- </p><p>
- Generally, it is better to set the limit bigger, to minimize both
- the frequency of flushes, and the index fragmentation (number of disk
- chunks). For instance, on a dedicated search server that handles
- a big RT index, it can be advised to set <code class="option">rt_mem_limit</code>
- to 1-2 GB. A global limit on all indexes is also planned, but not yet
- implemented yet as of 1.10-beta.
- </p><p>
- Disk chunk full-text index data can not be actually modified,
- so the full-text field changes (ie. row deletions and updates)
- suppress a previous row version from a disk chunk using a kill-list,
- but do not actually physically purge the data. Therefore, on workloads
- with high full-text updates ratio index might eventually get polluted
- by these previous row versions, and searching performance would
- degrade. Physical index purging that would improve the performance
- is planned, but not yet implemented as of 1.10-beta.
- </p><p>
- Data in RAM chunk gets saved to disk on clean daemon shutdown, and
- then loaded back on startup. However, on daemon or server crash,
- updates from RAM chunk might be lost. To prevent that, binary logging
- of transactions can be used; see <a class="xref" href="#rt-binlog" title="4.4. Binary logging">Section 4.4, “Binary logging”</a> for details.
- </p><p>
- Full-text changes in RT index are transactional. They are stored
- in a per-thread accumulator until COMMIT, then applied at once.
- Bigger batches per single COMMIT should result in faster indexing.
- </p></div>
- <div class="sect1" title="4.4. Binary logging"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rt-binlog"></a>4.4. Binary logging</h2></div></div></div>
- <p>
- Binary logs are essentially a recovery mechanism. With binary logs
- enabled, <code class="filename">searchd</code> writes every given transaction
- to the binlog file, and uses that for recovery after an unclean shutdown.
- On clean shutdown, RAM chunks are saved to disk, and then all the binlog
- files are unlinked.
- </p><p>
- During normal operation, a new binlog file will be opened every time
- when <code class="option">binlog_max_log_size</code> limit
- is reached. Older, already closed binlog files are kept until all of the
- transactions stored in them (from all indexes) are flushed as a disk chunk.
- Setting the limit to 0 pretty much prevents binlog from being unlinked
- at all while <code class="filename">searchd</code> is running; however, it will
- still be unlinked on clean shutdown. (This is the default case as of
- 2.0.3-release, <code class="option">binlog_max_log_size</code> defaults to 0.)
- </p><p>
- There are 3 different binlog flushing strategies, controlled by
- <a class="link" href="#conf-binlog-flush" title="12.4.26. binlog_flush">binlog_flush</a> directive
- which takes the values of 0, 1, or 2. 0 means to flush the log
- to OS and sync it to disk every second; 1 means flush and sync
- every transaction; and 2 (the default mode) means flush every
- transaction but sync every second. Sync is relatively slow because
- it has to perform physical disk writes, so mode 1 is the safest
- (every committed transaction is guaranteed to be written on disk)
- but the slowest. Flushing log to OS prevents from data loss on
- <code class="filename">searchd</code> crashes but not system crashes.
- Mode 2 is the default.
- </p><p>
- On recovery after an unclean shutdown, binlogs are replayed
- and all logged transactions since the last good on-disk state
- are restored. Transactions are checksummed so in case of binlog
- file corruption garbage data will <span class="bold"><strong>not</strong></span> be replayed; such
- a broken transaction will be detected and, currently, will stop
- replay. Transactions also start with a magic marker and timestamped,
- so in case of binlog damage in the middle of the file, it's technically
- possible to skip broken transactions and keep replaying from the next
- good one, and/or it's possible to replay transactions until a given
- timestamp (point-in-time recovery), but none of that is implemented yet
- as of 1.10-beta.
- </p><p>
- One unwanted side effect of binlogs is that actively updating
- a small RT index that fully fits into a RAM chunk part will lead
- to an ever-growing binlog that can never be unlinked until clean
- shutdown. Binlogs are essentially append-only deltas against
- the last known good saved state on disk, and unless RAM chunk
- gets saved, they can not be unlinked. An ever-growing binlog
- is not very good for disk use and crash recovery time. Starting
- with 2.0.1-beta you can configure <code class="filename">searchd</code>
- to perform a periodic RAM chunk flush to fix that problem
- using a <a class="link" href="#conf-rt-flush-period" title="12.4.33. rt_flush_period">rt_flush_period</a>
- directive. With periodic flushes enabled, <code class="filename">searchd</code>
- will keep a separate thread, checking whether RT indexes RAM
- chunks need to be written back to disk. Once that happens,
- the respective binlogs can be (and are) safely unlinked.
- </p><p>
- Note that <code class="code">rt_flush_period</code> only controls the
- frequency at which the <span class="emphasis"><em>checks</em></span> happen.
- There are no <span class="emphasis"><em>guarantees</em></span> that the
- particular RAM chunk will get saved. For instance, it does
- not make sense to regularly re-save a huge RAM chunk that
- only gets a few rows worth of updates. The search daemon
- determine whether to actually perform the flush with a few
- heuristics.
- </p></div></div>
- <div class="chapter" title="Chapter 5. Searching"><div class="titlepage"><div><div><h2 class="title"><a name="searching"></a>Chapter 5. Searching</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#matching-modes">5.1. Matching modes</a></span></dt>
- <dt><span class="sect1"><a href="#boolean-syntax">5.2. Boolean query syntax</a></span></dt>
- <dt><span class="sect1"><a href="#extended-syntax">5.3. Extended query syntax</a></span></dt>
- <dt><span class="sect1"><a href="#weighting">5.4. Search results ranking</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#ranking-overview">5.4.1. Ranking overview</a></span></dt>
- <dt><span class="sect2"><a href="#builtin-rankers">5.4.2. Available built-in rankers</a></span></dt>
- <dt><span class="sect2"><a href="#expression-ranker">5.4.3. Expression based ranker (SPH_RANK_EXPR)</a></span></dt>
- <dt><span class="sect2"><a href="#ranking-factors">5.4.4. Quick summary of the ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#document-factors">5.4.5. Document-level ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#field-factors">5.4.6. Field-level ranking factors</a></span></dt>
- <dt><span class="sect2"><a href="#factor-aggr-functions">5.4.7. Ranking factor aggregation functions</a></span></dt>
- <dt><span class="sect2"><a href="#formulas-for-builtin-rankers">5.4.8. Formula expressions for all the built-in rankers</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#expressions">5.5. Expressions, functions, and operators</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#operators">5.5.1. Operators</a></span></dt>
- <dt><span class="sect2"><a href="#numeric-functions">5.5.2. Numeric functions</a></span></dt>
- <dt><span class="sect2"><a href="#date-time-functions">5.5.3. Date and time functions</a></span></dt>
- <dt><span class="sect2"><a href="#type-conversion-functions">5.5.4. Type conversion functions</a></span></dt>
- <dt><span class="sect2"><a href="#comparison-functions">5.5.5. Comparison functions</a></span></dt>
- <dt><span class="sect2"><a href="#misc-functions">5.5.6. Miscellaneous functions</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sorting-modes">5.6. Sorting modes</a></span></dt>
- <dt><span class="sect1"><a href="#clustering">5.7. Grouping (clustering) search results </a></span></dt>
- <dt><span class="sect1"><a href="#distributed">5.8. Distributed searching</a></span></dt>
- <dt><span class="sect1"><a href="#query-log-format">5.9. <code class="filename">searchd</code> query log formats</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#plain-log-format">5.9.1. Plain log format</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxql-log-format">5.9.2. SphinxQL log format</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sphinxql">5.10. MySQL protocol support and SphinxQL</a></span></dt>
- <dt><span class="sect1"><a href="#multi-queries">5.11. Multi-queries</a></span></dt>
- <dt><span class="sect1"><a href="#collations">5.12. Collations</a></span></dt>
- </dl></div>
- <div class="sect1" title="5.1. Matching modes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="matching-modes"></a>5.1. Matching modes</h2></div></div></div>
- <p>
- So-called matching modes are a legacy feature that used to provide
- (very) limited query syntax and ranking support. Currently, they are
- deprecated in favor of <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">full-text query
- language</a> and so-called <a class="link" href="#weighting" title="5.4. Search results ranking">rankers</a>.
- Starting with version 0.9.9-release, it is thus strongly recommended
- to use SPH_MATCH_EXTENDED and proper query syntax rather than any other
- legacy mode. All those other modes are actually internally converted
- to extended syntax anyway. SphinxAPI still defaults to SPH_MATCH_ALL
- but that is for compatibility reasons only.
- </p><p>
- There are the following matching modes available:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>SPH_MATCH_ALL, matches all query words;</p></li>
- <li class="listitem"><p>SPH_MATCH_ANY, matches any of the query words;</p></li>
- <li class="listitem"><p>SPH_MATCH_PHRASE, matches query as a phrase, requiring perfect match;</p></li>
- <li class="listitem"><p>SPH_MATCH_BOOLEAN, matches query as a boolean expression (see <a class="xref" href="#boolean-syntax" title="5.2. Boolean query syntax">Section 5.2, “Boolean query syntax”</a>);</p></li>
- <li class="listitem"><p>SPH_MATCH_EXTENDED, matches query as an expression in Sphinx internal query language
- (see <a class="xref" href="#extended-syntax" title="5.3. Extended query syntax">Section 5.3, “Extended query syntax”</a>);</p></li>
- <li class="listitem"><p>SPH_MATCH_EXTENDED2, an alias for SPH_MATCH_EXTENDED (default mode);</p></li>
- <li class="listitem"><p>SPH_MATCH_FULLSCAN, matches query, forcibly using the "full scan" mode as below.
- NB, any query terms will be ignored, such that filters, filter-ranges and grouping
- will still be applied, but no text-matching.</p></li>
- </ul></div>
- <p>
- </p><p>
- SPH_MATCH_EXTENDED2 was used during 0.9.8 and 0.9.9 development cycle,
- when the internal matching engine was being rewritten (for the sake of
- additional functionality and better performance). By 0.9.9-release,
- the older version was removed, and SPH_MATCH_EXTENDED and SPH_MATCH_EXTENDED2
- are now just aliases.
- </p><p>
- The SPH_MATCH_FULLSCAN mode will be automatically activated in place of the specified matching mode when the following conditions are met:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>The query string is empty (ie. its length is zero).</p></li>
- <li class="listitem"><p><a class="link" href="#conf-docinfo" title="12.2.4. docinfo">docinfo</a> storage is set to <code class="code">extern</code>.</p></li>
- </ol></div>
- <p>
- In full scan mode, all the indexed documents will be considered as matching.
- Such queries will still apply filters, sorting, and group by, but will not perform any full-text searching.
- This can be useful to unify full-text and non-full-text searching code, or to offload SQL server
- (there are cases when Sphinx scans will perform better than analogous MySQL queries).
- An example of using the full scan mode might be to find posts in a forum.
- By selecting the forum's user ID via <code class="code">SetFilter()</code> but not actually providing any search text,
- Sphinx will match every document (i.e. every post) where <code class="code">SetFilter()</code> would match -
- in this case providing every post from that user. By default this will be ordered by relevancy,
- followed by Sphinx document ID in ascending order (earliest first).
- </p></div>
- <div class="sect1" title="5.2. Boolean query syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="boolean-syntax"></a>5.2. Boolean query syntax</h2></div></div></div>
- <p>
- Boolean queries allow the following special operators to be used:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>explicit operator AND: </p><pre class="programlisting">hello & world</pre></li>
- <li class="listitem"><p>operator OR: </p><pre class="programlisting">hello | world</pre></li>
- <li class="listitem"><p>operator NOT:
- </p><pre class="programlisting">
- hello -world
- hello !world
- </pre><p>
- </p></li>
- <li class="listitem"><p>grouping: </p><pre class="programlisting">( hello world )</pre></li>
- </ul></div>
- <p>
- Here's an example query which uses all these operators:
- </p><div class="example"><a name="ex-boolean-query"></a><p class="title"><b>Example 5.1. Boolean query example</b></p><div class="example-contents"><pre class="programlisting">
- ( cat -dog ) | ( cat -mouse)
- </pre></div></div>
- <p><br class="example-break">
- </p><p>
- There always is implicit AND operator, so "hello world" query actually
- means "hello & world".
- </p><p>
- OR operator precedence is higher than AND, so "looking for cat | dog | mouse"
- means "looking for ( cat | dog | mouse )" and <span class="emphasis"><em>not</em></span>
- "(looking for cat) | dog | mouse".
- </p><p>
- Since version 2.1.1-beta, queries may be automatically optimized if OPTION boolean_simplify=1 is specified.
- Some transformations performed by this optimization include:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Excess brackets: ((A | B) | C) becomes ( A | B | C ); ((A B) C) becomes ( A B C )</p></li>
- <li class="listitem"><p>Excess AND NOT: ((A !N1) !N2) becomes (A !(N1 | N2))</p></li>
- <li class="listitem"><p>Common NOT: ((A !N) | (B !N)) becomes ((A|B) !N)</p></li>
- <li class="listitem"><p>Common Compound NOT: ((A !(N AA)) | (B !(N BB))) becomes (((A|B) !N) | (A !AA) | (B !BB)) if the cost of evaluating N is greater than the added together costs of evaluating A and B</p></li>
- <li class="listitem"><p>Common subterm: ((A (N | AA)) | (B (N | BB))) becomes (((A|B) N) | (A AA) | (B BB)) if the cost of evaluating N is greater than the added together costs of evaluating A and B</p></li>
- <li class="listitem"><p>Common keywords: (A | "A B"~N) becomes A; ("A B" | "A B C") becomes "A B"; ("A B"~N | "A B C"~N) becomes ("A B"~N)</p></li>
- <li class="listitem"><p>Common phrase: ("X A B" | "Y A B") becomes (("X|Y") "A B")</p></li>
- <li class="listitem"><p>Common AND NOT: ((A !X) | (A !Y) | (A !Z)) becomes (A !(X Y Z))</p></li>
- <li class="listitem"><p>Common OR NOT: ((A !(N | N1)) | (B !(N | N2))) becomes (( (A !N1) | (B !N2) ) !N)</p></li>
- </ul></div>
- <p>
- Note that optimizing the queries consumes CPU time, so for simple queries -or for hand-optimized queries- you'll do
- better with the default boolean_simplify=0 value. Simplifications are often better for complex queries, or
- algorithmically generated queries.
- </p><p>
- Queries like "-dog", which implicitly include all documents from the
- collection, can not be evaluated. This is both for technical and performance
- reasons. Technically, Sphinx does not always keep a list of all IDs.
- Performance-wise, when the collection is huge (ie. 10-100M documents),
- evaluating such queries could take very long.
- </p></div>
- <div class="sect1" title="5.3. Extended query syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="extended-syntax"></a>5.3. Extended query syntax</h2></div></div></div>
- <p>
- The following special operators and modifiers can be used when using the extended matching mode:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>operator OR: </p><pre class="programlisting">hello | world</pre></li>
- <li class="listitem"><p>operator MAYBE (introduced in verion 2.2.3-beta): </p><pre class="programlisting">hello MAYBE world</pre></li>
- <li class="listitem"><p>operator NOT:
- </p><pre class="programlisting">
- hello -world
- hello !world
- </pre><p>
- </p></li>
- <li class="listitem"><p>field search operator: </p><pre class="programlisting">@title hello @body world</pre></li>
- <li class="listitem"><p>field position limit modifier (introduced in version 0.9.9-rc1): </p><pre class="programlisting">@body[50] hello</pre></li>
- <li class="listitem"><p>multiple-field search operator: </p><pre class="programlisting">@(title,body) hello world</pre></li>
- <li class="listitem"><p>ignore field search operator (will ignore any matches of 'hello world' from field 'title'): </p><pre class="programlisting">@!title hello world</pre></li>
- <li class="listitem"><p>ignore multiple-field search operator (if we have fields title, subject and body then @!(title) is equivalent to @(subject,body)): </p><pre class="programlisting">@!(title,body) hello world</pre></li>
- <li class="listitem"><p>all-field search operator: </p><pre class="programlisting">@* hello</pre></li>
- <li class="listitem"><p>phrase search operator: </p><pre class="programlisting">"hello world"</pre></li>
- <li class="listitem"><p>proximity search operator: </p><pre class="programlisting">"hello world"~10</pre></li>
- <li class="listitem"><p>quorum matching operator: </p><pre class="programlisting">"the world is a wonderful place"/3</pre></li>
- <li class="listitem"><p>strict order operator (aka operator "before"): </p><pre class="programlisting">aaa << bbb << ccc</pre></li>
- <li class="listitem"><p>exact form modifier (introduced in version 0.9.9-rc1): </p><pre class="programlisting">raining =cats and =dogs</pre></li>
- <li class="listitem"><p>field-start and field-end modifier (introduced in version 0.9.9-rc2): </p><pre class="programlisting">^hello world$</pre></li>
- <li class="listitem"><p>keyword IDF boost modifier (introduced in version 2.2.3-beta): </p><pre class="programlisting">boosted^1.234 boostedfieldend$^1.234</pre></li>
- <li class="listitem"><p>NEAR, generalized proximity operator (introduced in version 2.0.1-beta): </p><pre class="programlisting">hello NEAR/3 world NEAR/4 "my test"</pre></li>
- <li class="listitem"><p>SENTENCE operator (introduced in version 2.0.1-beta): </p><pre class="programlisting">all SENTENCE words SENTENCE "in one sentence"</pre></li>
- <li class="listitem"><p>PARAGRAPH operator (introduced in version 2.0.1-beta): </p><pre class="programlisting">"Bill Gates" PARAGRAPH "Steve Jobs"</pre></li>
- <li class="listitem"><p>ZONE limit operator: </p><pre class="programlisting">ZONE:(h3,h4)</pre><p> only in these titles</p></li>
- <li class="listitem"><p>ZONESPAN limit operator: </p><pre class="programlisting">ZONESPAN:(h2)</pre><p> only in a (single) title</p></li>
- </ul></div>
- <p>
- Here's an example query that uses some of these operators:
- </p><div class="example"><a name="ex-extended-query"></a><p class="title"><b>Example 5.2. Extended matching mode: query example</b></p><div class="example-contents"><pre class="programlisting">
- "hello world" @title "example program"~5 @body python -(php|perl) @* code
- </pre></div></div>
- <p><br class="example-break">
- The full meaning of this search is:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Find the words 'hello' and 'world' adjacently in any field in a document;</p></li>
- <li class="listitem"><p>Additionally, the same document must also contain the words 'example' and 'program'
- in the title field, with up to, but not including, 5 words between the words in question;
- (E.g. "example PHP program" would be matched however "example script to introduce outside data
- into the correct context for your program" would not because two terms have 5 or more words between them)</p></li>
- <li class="listitem"><p>Additionally, the same document must contain the word 'python' in the body field, but not contain either 'php' or 'perl';</p></li>
- <li class="listitem"><p>Additionally, the same document must contain the word 'code' in any field.</p></li>
- </ul></div>
- <p>
- </p><p>
- There always is implicit AND operator, so "hello world" means that
- both "hello" and "world" must be present in matching document.
- </p><p>
- OR operator precedence is higher than AND, so "looking for cat | dog | mouse"
- means "looking for ( cat | dog | mouse )" and <span class="emphasis"><em>not</em></span>
- "(looking for cat) | dog | mouse".
- </p><p>
- Field limit operator limits subsequent searching to a given field.
- Normally, query will fail with an error message if given field name does not exist
- in the searched index. However, that can be suppressed by specifying "@@relaxed"
- option at the very beginning of the query:
- </p><pre class="programlisting">
- @@relaxed @nosuchfield my query
- </pre><p>
- This can be helpful when searching through heterogeneous indexes with
- different schemas.
- </p><p>
- Field position limit, introduced in version 0.9.9-rc1, additionally restricts the searching
- to first N position within given field (or fields). For example, "@body[50] hello" will
- <span class="bold"><strong>not</strong></span> match the documents where the keyword 'hello' occurs at position 51 and below
- in the body.
- </p><p>
- Proximity distance is specified in words, adjusted for word count, and
- applies to all words within quotes. For instance, "cat dog mouse"~5 query
- means that there must be less than 8-word span which contains all 3 words,
- ie. "CAT aaa bbb ccc DOG eee fff MOUSE" document will <span class="emphasis"><em>not</em></span>
- match this query, because this span is exactly 8 words long.
- </p><p>
- Quorum matching operator introduces a kind of fuzzy matching.
- It will only match those documents that pass a given threshold of given words.
- The example above ("the world is a wonderful place"/3) will match all documents
- that have at least 3 of the 6 specified words. Operator is limited to 255 keywords.
- Instead of an absolute number, you can also specify a number between 0.0 and 1.0
- (standing for 0% and 100%), and Sphinx will match only documents with at least
- the specified percentage of given words. The same example above could also have
- been written "the world is a wonderful place"/0.5 and it would match documents
- with at least 50% of the 6 words.
- </p><p>
- Strict order operator (aka operator "before"), introduced in version 0.9.9-rc2,
- will match the document only if its argument keywords occur in the document
- exactly in the query order. For instance, "black << cat" query (without
- quotes) will match the document "black and white cat" but <span class="emphasis"><em>not</em></span>
- the "that cat was black" document. Order operator has the lowest priority.
- It can be applied both to just keywords and more complex expressions,
- ie. this is a valid query:
- </p><pre class="programlisting">
- (bag of words) << "exact phrase" << red|green|blue
- </pre><p>
- </p><p>
- Exact form keyword modifier, introduced in version 0.9.9-rc1, will match the document only if the keyword occurred
- in exactly the specified form. The default behavior is to match the document
- if the stemmed keyword matches. For instance, "runs" query will match both
- the document that contains "runs" <span class="emphasis"><em>and</em></span> the document that
- contains "running", because both forms stem to just "run" - while "=runs"
- query will only match the first document. Exact form operator requires
- <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> option to be enabled.
- This is a modifier that affects the keyword and thus can be used within
- operators such as phrase, proximity, and quorum operators.
- Starting with 2.2.2-beta, it is possible to apply an exact form modifier
- to the phrase operator. It's really just syntax sugar - it adds an exact form
- modifier to all terms contained within the phrase.
- </p><pre class="programlisting">
- ="exact phrase"
- </pre><p>
- </p><p>
- Field-start and field-end keyword modifiers, introduced in version 0.9.9-rc2,
- will make the keyword match only if it occurred at the very start or the very end
- of a fulltext field, respectively. For instance, the query "^hello world$"
- (with quotes and thus combining phrase operator and start/end modifiers)
- will only match documents that contain at least one field that has exactly
- these two keywords.
- </p><p>
- Starting with 0.9.9-rc1, arbitrarily nested brackets and negations are allowed.
- However, the query must be possible to compute without involving an implicit
- list of all documents:
- </p><pre class="programlisting">
- // correct query
- aaa -(bbb -(ccc ddd))
- // queries that are non-computable
- -aaa
- aaa | -bbb
- </pre><p>
- </p><p>
- Starting with 2.2.2-beta, the phrase search operator may include a 'match any term'
- modifier. Terms within the phrase operator are position significant. When
- the 'match any term' modifier is implemented, the position of the subsequent terms
- from that phrase query will be shifted. Therefore, 'match any' has no impact
- on search performance.
- </p><pre class="programlisting">
- "exact * phrase * * for terms"
- </pre><p>
- </p><p>
- <span class="bold"><strong>NEAR operator</strong></span>, added in 2.0.1-beta, is a generalized version
- of a proximity operator. The syntax is <code class="code">NEAR/N</code>, it is
- case-sensitive, and no spaces are allowed between the NEAR keyword,
- the slash sign, and the distance value.
- </p><p>
- The original proximity operator only worked on sets of keywords.
- NEAR is more generic and can accept arbitrary subexpressions as
- its two arguments, matching the document when both subexpressions
- are found within N words of each other, no matter in which order.
- NEAR is left associative and has the same (lowest) precedence
- as BEFORE.
- </p><p>
- You should also note how a <code class="code">(one NEAR/7 two NEAR/7 three)</code>
- query using NEAR is not really equivalent to a
- <code class="code">("one two three"~7)</code> one using keyword proximity operator.
- The difference here is that the proximity operator allows for up to
- 6 non-matching words between all the 3 matching words, but the version
- with NEAR is less restrictive: it would allow for up to 6 words between
- 'one' and 'two' and then for up to 6 more between that two-word
- matching and a 'three' keyword.
- </p><p>
- <span class="bold"><strong>SENTENCE and PARAGRAPH operators</strong></span>, added in 2.0.1-beta,
- matches the document when both its arguments are within the same
- sentence or the same paragraph of text, respectively. The arguments
- can be either keywords, or phrases, or the instances of the same
- operator. Here are a few examples:
- </p><pre class="programlisting">
- one SENTENCE two
- one SENTENCE "two three"
- one SENTENCE "two three" SENTENCE four
- </pre><p>
- The order of the arguments within the sentence or paragraph
- does not matter. These operators only work on indexes built
- with <a class="link" href="#conf-index-sp" title="12.2.8. index_sp">index_sp</a> (sentence
- and paragraph indexing feature) enabled, and revert to a mere
- AND otherwise. Refer to the <code class="code">index_sp</code> directive
- documentation for the notes on what's considered a sentence
- and a paragraph.
- </p><p>
- <span class="bold"><strong>ZONE limit operator</strong></span>, added in 2.0.1-beta, is quite similar
- to field limit operator, but restricts matching to a given in-field
- zone or a list of zones. Note that the subsequent subexpressions
- are <span class="emphasis"><em>not</em></span> required to match in a single contiguous
- span of a given zone, and may match in multiple spans.
- For instance, <code class="code">(ZONE:th hello world)</code> query
- <span class="emphasis"><em>will</em></span> match this example document:
- </p><pre class="programlisting">
- <th>Table 1. Local awareness of Hello Kitty brand.</th>
- .. some table data goes here ..
- <th>Table 2. World-wide brand awareness.</th>
- </pre><p>
- ZONE operator affects the query until the next
- field or ZONE limit operator, or the closing parenthesis.
- It only works on the indexes built with zones support
- (see <a class="xref" href="#conf-index-zones" title="12.2.9. index_zones">Section 12.2.9, “index_zones”</a>) and will be ignored
- otherwise.
- </p><p>
- <span class="bold"><strong>ZONESPAN limit operator</strong></span>, added in 2.1.1-beta, is similar to the ZONE operator,
- but requires the match to occur in a single contiguous span. In the example
- above, <code class="code">(ZONESPAN:th hello world)></code> would not match the document,
- since "hello" and "world" do not occur within the same span.
- </p><p>
- <span class="bold"><strong>MAYBE</strong></span> operator was added in 2.2.3-beta. It works much like |
- operator but doesn't return documents which match only right subtree expression.
- </p></div>
- <div class="sect1" title="5.4. Search results ranking"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="weighting"></a>5.4. Search results ranking</h2></div></div></div>
- <div class="sect2" title="5.4.1. Ranking overview"><div class="titlepage"><div><div><h3 class="title"><a name="ranking-overview"></a>5.4.1. Ranking overview</h3></div></div></div>
- <p>
- Ranking (aka weighting) of the search results can be defined
- as a process of computing a so-called relevance (aka weight)
- for every given matched document with regards to a given query
- that matched it. So relevance is in the end just a number attached
- to every document that estimates how relevant the document is to
- the query. Search results can then be sorted based on this number
- and/or some additional parameters, so that the most sought after
- results would come up higher on the results page.
- </p><p>
- There is no single standard one-size-fits-all way to rank
- any document in any scenario. Moreover, there can not ever be
- such a way, because relevance is <span class="emphasis"><em>subjective</em></span>.
- As in, what seems relevant to you might not seem relevant to me.
- Hence, in general case it's not just hard to compute, it's
- theoretically impossible.
- </p><p>
- So ranking in Sphinx is configurable. It has a notion of
- a so-called <span class="bold"><strong>ranker</strong></span>. A ranker can formally be defined
- as a function that takes document and query as its input and
- produces a relevance value as output. In layman's terms,
- a ranker controls exactly how (using which specific algorithm)
- will Sphinx assign weights to the document.
- </p><p>
- Previously, this ranking function was rigidly bound to the matching mode.
- So in the legacy matching modes (that is, SPH_MATCH_ALL, SPH_MATCH_ANY,
- SPH_MATCH_PHRASE, and SPH_MATCH_BOOLEAN) you can not choose the ranker.
- You can only do that in the SPH_MATCH_EXTENDED mode. (Which is the only
- mode in SphinxQL and the suggested mode in SphinxAPI anyway.) To choose
- a non-default ranker you can either use
- <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SetRankingMode()</a>
- with SphinxAPI, or <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION ranker</a>
- clause in <code class="code">SELECT</code> statement when using SphinxQL.
- </p><p>
- As a sidenote, legacy matching modes are internally implemented via
- the unified syntax anyway. When you use one of those modes, Sphinx just
- internally adjusts the query and sets the associated ranker, then
- executes the query using the very same unified code path.
- </p></div>
- <div class="sect2" title="5.4.2. Available built-in rankers"><div class="titlepage"><div><div><h3 class="title"><a name="builtin-rankers"></a>5.4.2. Available built-in rankers</h3></div></div></div>
- <p>
- Sphinx ships with a number of built-in rankers suited for different
- purposes. A number of them uses two factors, phrase proximity (aka LCS)
- and BM25. Phrase proximity works on the keyword positions, while BM25
- works on the keyword frequencies. Basically, the better the degree of
- the phrase match between the document body and the query, the higher
- is the phrase proximity (it maxes out when the document contains
- the entire query as a verbatim quote). And BM25 is higher when
- the document contains more rare words. We'll save the detailed
- discussion for later.
- </p><p>
- Currently implemented rankers are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- SPH_RANK_PROXIMITY_BM25, the default ranking mode that uses and combines
- both phrase proximity and BM25 ranking.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_BM25, statistical ranking mode which uses BM25 ranking only (similar to
- most other full-text engines). This mode is faster but may result in worse quality
- on queries which contain more than 1 keyword.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_NONE, no ranking mode. This mode is obviously the fastest.
- A weight of 1 is assigned to all matches. This is sometimes called boolean
- searching that just matches the documents but does not rank them.
- </p></li>
- <li class="listitem"><p>SPH_RANK_WORDCOUNT, ranking by the keyword occurrences count.
- This ranker computes the per-field keyword occurrence counts, then multiplies
- them by field weights, and sums the resulting values.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_PROXIMITY, added in version 0.9.9-rc1, returns raw phrase proximity
- value as a result. This mode is internally used to emulate SPH_MATCH_ALL queries.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_MATCHANY, added in version 0.9.9-rc1, returns rank as it was computed
- in SPH_MATCH_ANY mode earlier, and is internally used to emulate SPH_MATCH_ANY queries.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_FIELDMASK, added in version 0.9.9-rc2, returns a 32-bit mask with
- N-th bit corresponding to N-th fulltext field, numbering from 0. The bit will
- only be set when the respective field has any keyword occurrences satisfying
- the query.
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_SPH04, added in version 1.10-beta, is generally based on the default
- SPH_RANK_PROXIMITY_BM25 ranker, but additionally boosts the matches when
- they occur in the very beginning or the very end of a text field. Thus,
- if a field equals the exact query, SPH04 should rank it higher than a field
- that contains the exact query but is not equal to it. (For instance, when
- the query is "Hyde Park", a document entitled "Hyde Park" should be ranked
- higher than a one entitled "Hyde Park, London" or "The Hyde Park Cafe".)
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_EXPR, added in version 2.0.2-beta, lets you specify the ranking
- formula in run time. It exposes a number of internal text factors and lets
- you define how the final weight should be computed from those factors.
- You can find more details about its syntax and a reference available
- factors in a subsection below.
- </p></li>
- </ul></div>
- <p>
- </p><p>
- You should specify the <code class="code">SPH_RANK_</code> prefix and use capital letters only
- when using the <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SetRankingMode()</a>
- call from the SphinxAPI. The API ports expose these as global constants.
- Using SphinxQL syntax, the prefix should be omitted and the ranker name
- is case insensitive. Example:
- </p><pre class="programlisting">
- // SphinxAPI
- $client->SetRankingMode ( SPH_RANK_SPH04 );
- // SphinxQL
- mysql_query ( "SELECT ... OPTION ranker=sph04" );
- </pre><p>
- </p><h4><a name="idp31462512"></a>Legacy matching modes rankers</h4><p>
- Legacy matching modes automatically select a ranker as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>SPH_MATCH_ALL uses SPH_RANK_PROXIMITY ranker;</p></li>
- <li class="listitem"><p>SPH_MATCH_ANY uses SPH_RANK_MATCHANY ranker;</p></li>
- <li class="listitem"><p>SPH_MATCH_PHRASE uses SPH_RANK_PROXIMITY ranker;</p></li>
- <li class="listitem"><p>SPH_MATCH_BOOLEAN uses SPH_RANK_NONE ranker.</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect2" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)"><div class="titlepage"><div><div><h3 class="title"><a name="expression-ranker"></a>5.4.3. Expression based ranker (SPH_RANK_EXPR)</h3></div></div></div>
- <p>
- Expression ranker, added in version 2.0.2-beta, lets you change the ranking
- formula on the fly, on a per-query basis. For a quick kickoff, this is how you
- emulate PROXIMITY_BM25 ranker using the expression based one:
- </p><pre class="programlisting">
- SELECT *, WEIGHT() FROM myindex WHERE MATCH('hello world')
- OPTION ranker=expr('sum(lcs*user_weight)*1000+bm25')
- </pre><p>
- The output of this query must not change if you omit the <code class="code">OPTION</code>
- clause, because the default ranker (PROXIMITY_BM25) behaves exactly like
- specified in the ranker formula above. But the expression ranker is somewhat
- more flexible than just that and provides access to many more factors.
- </p><p>
- The ranking formula is an arbitrary arithmetic expression that can use
- constants, document attributes, built-in functions and operators (described
- in <a class="xref" href="#expressions" title="5.5. Expressions, functions, and operators">Section 5.5, “Expressions, functions, and operators”</a>), and also a few ranking-specific things
- that are only accessible in a ranking formula. Namely, those are field
- aggregation functions, field-level, and document-level ranking factors.
- </p></div>
- <div class="sect2" title="5.4.4. Quick summary of the ranking factors"><div class="titlepage"><div><div><h3 class="title"><a name="ranking-factors"></a>5.4.4. Quick summary of the ranking factors</h3></div></div></div>
- <p>
- </p><div class="table"><a name="ranking-factors-table"></a><p class="title"><b>Table 5.1. </b></p><div class="table-contents"><table border="1"><colgroup><col class="name"><col class="level"><col class="type"></colgroup><thead><tr><th>Name</th><th>Level</th><th>Type</th><th>Summary</th></tr></thead><tbody><tr><td>max_lcs</td><td>query</td><td>int</td><td>maximum possible LCS value for the current query</td></tr><tr><td>bm25</td><td>document</td><td>int</td><td>quick estimate of BM25(1.2, 0) without syntax support</td></tr><tr><td>bm25a(k1, b)</td><td>document</td><td>int</td><td>precise BM25() value with configurable K1, B constants and syntax support</td></tr><tr><td>bm25f(k1, b, {field=weight, ...})</td><td>document</td><td>int</td><td>precise BM25F() value with extra configurable field weights</td></tr><tr><td>field_mask</td><td>document</td><td>int</td><td>bit mask of matched fields</td></tr><tr><td>query_word_count</td><td>document</td><td>int</td><td>number of unique inclusive keywords in a query</td></tr><tr><td>doc_word_count</td><td>document</td><td>int</td><td>number of unique keywords matched in the document</td></tr><tr><td>lcs</td><td>field</td><td>int</td><td>Longest Common Subsequence between query and document, in words</td></tr><tr><td>user_weight</td><td>field</td><td>int</td><td>user field weight</td></tr><tr><td>hit_count</td><td>field</td><td>int</td><td>total number of keyword occurrences</td></tr><tr><td>word_count</td><td>field</td><td>int</td><td>number of unique matched keywords</td></tr><tr><td>tf_idf</td><td>field</td><td>float</td><td>sum(tf*idf) over matched keywords == sum(idf) over occurrences</td></tr><tr><td>min_hit_pos</td><td>field</td><td>int</td><td>first matched occurrence position, in words, 1-based</td></tr><tr><td>min_best_span_pos</td><td>field</td><td>int</td><td>first maximum LCS span position, in words, 1-based</td></tr><tr><td>exact_hit</td><td>field</td><td>bool</td><td>whether query == field</td></tr><tr><td>min_idf</td><td>field</td><td>float</td><td>min(idf) over matched keywords</td></tr><tr><td>max_idf</td><td>field</td><td>float</td><td>max(idf) over matched keywords</td></tr><tr><td>sum_idf</td><td>field</td><td>float</td><td>sum(idf) over matched keywords</td></tr><tr><td>exact_order</td><td>field</td><td>bool</td><td>whether all query keywords were a) matched and b) in query order</td></tr><tr><td>min_gaps</td><td>field</td><td>int</td><td>minimum number of gaps between the matched keywords over the matching spans</td></tr><tr><td>lccs</td><td>field</td><td>int</td><td>Longest Common Contiguous Subsequence between query and document, in words</td></tr><tr><td>wlccs</td><td>field</td><td>float</td><td>Weighted Longest Common Contiguous Subsequence, sum(idf) over contiguous keyword spans</td></tr><tr><td>atc</td><td>field</td><td>float</td><td>Aggregate Term Closeness, log(1+sum(idf1*idf2*pow(distance, -1.75)) over the best pairs of keywords</td></tr></tbody></table></div></div>
- <p><br class="table-break">
- </p></div>
- <div class="sect2" title="5.4.5. Document-level ranking factors"><div class="titlepage"><div><div><h3 class="title"><a name="document-factors"></a>5.4.5. Document-level ranking factors</h3></div></div></div>
- <p>
- A <span class="bold"><strong>document-level factor</strong></span> is a numeric value computed by the ranking
- engine for every matched document with regards to the current query.
- (So it differs from a plain document attribute in that the attribute
- do not depend on the full text query, while factors might.) Those
- factors can be used anywhere in the ranking expression.
- Currently implemented document-level factors are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- <code class="code">bm25</code> (integer), a document-level BM25 estimate (computed without
- keyword occurrence filtering).
- </p></li>
- <li class="listitem"><p>
- <code class="code">max_lcs</code> (integer), a query-level maximum possible value that
- the sum(lcs*user_weight) expression can ever take. This can be
- useful for weight boost scaling. For instance, MATCHANY ranker
- formula uses this to guarantee that a full phrase match in any
- field ranks higher than any combination of partial matches
- in all fields.
- </p></li>
- <li class="listitem"><p>
- <code class="code">field_mask</code> (integer), a document-level 32-bit mask of matched
- fields.
- </p></li>
- <li class="listitem"><p>
- <code class="code">query_word_count</code> (integer), the number of unique keywords
- in a query, adjusted for a number of excluded keywords. For instance,
- both <code class="code">(one one one one)</code> and <code class="code">(one !two)</code> queries
- should assign a value of 1 to this factor, because there is just one unique
- non-excluded keyword.
- </p></li>
- <li class="listitem"><p>
- <code class="code">doc_word_count</code> (integer), the number of unique keywords
- matched in the entire document.
- </p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect2" title="5.4.6. Field-level ranking factors"><div class="titlepage"><div><div><h3 class="title"><a name="field-factors"></a>5.4.6. Field-level ranking factors</h3></div></div></div>
- <p>
- A <span class="bold"><strong>field-level factor</strong></span> is a numeric value computed by the ranking
- engine for every matched in-document text field with regards to the
- current query. As more than one field can be matched by a query,
- but the final weight needs to be a single integer value, these
- values need to be folded into a single one. To achieve that,
- field-level factors can only be used within a field aggregation
- function, they can <span class="bold"><strong>not</strong></span> be used anywhere in the expression.
- For example, you can not use <code class="code">(lcs+bm25)</code> as your
- ranking expression, as <code class="code">lcs</code> takes multiple values (one
- in every matched field). You should use <code class="code">(sum(lcs)+bm25)</code>
- instead, that expression sums <code class="code">lcs</code> over all matching fields,
- and then adds <code class="code">bm25</code> to that per-field sum.
- Currently implemented field-level factors are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- <code class="code">lcs</code> (integer), the length of a maximum verbatim match between
- the document and the query, counted in words. LCS stands for Longest Common
- Subsequence (or Subset). Takes a minimum value of 1 when only stray keywords
- were matched in a field, and a maximum value of query keywords count
- when the entire query was matched in a field verbatim (in the exact
- query keywords order). For example, if the query is 'hello world'
- and the field contains these two words quoted from the query (that is,
- adjacent to each other, and exactly in the query order), <code class="code">lcs</code>
- will be 2. For example, if the query is 'hello world program' and
- the field contains 'hello world', <code class="code">lcs</code> will be 2.
- Note that any subset of the query keyword works, not just a subset
- of adjacent keywords. For example, if the query is 'hello world program'
- and the field contains 'hello (test program)', <code class="code">lcs</code> will be 2
- just as well, because both 'hello' and 'program' matched in the same
- respective positions as they were in the query. Finally, if the query
- is 'hello world program' and the field contains 'hello world program',
- <code class="code">lcs</code> will be 3. (Hopefully that is unsurprising at this point.)
- </p></li>
- <li class="listitem"><p>
- <code class="code">user_weight</code> (integer), the user specified per-field weight
- (refer to <a class="link" href="#api-func-setfieldweights" title="9.3.5. SetFieldWeights">SetFieldWeights()</a>
- in SphinxAPI and <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION field_weights</a>
- in SphinxQL respectively). The weights default to 1 if not specified
- explicitly.
- </p></li>
- <li class="listitem"><p>
- <code class="code">hit_count</code> (integer), the number of keyword occurrences
- that matched in the field. Note that a single keyword may occur multiple
- times. For example, if 'hello' occurs 3 times in a field and 'world'
- occurs 5 times, <code class="code">hit_count</code> will be 8.
- </p></li>
- <li class="listitem"><p>
- <code class="code">word_count</code> (integer), the number of unique keywords matched
- in the field. For example, if 'hello' and 'world' occur anywhere in a field,
- <code class="code">word_count</code> will be 2, irregardless of how many times do both
- keywords occur.
- </p></li>
- <li class="listitem"><p>
- <code class="code">tf_idf</code> (float), the sum of TF*IDF over all the keywords matched in the
- field. IDF is the Inverse Document Frequency, a floating point value
- between 0 and 1 that describes how frequent is the keywords (basically,
- 0 for a keyword that occurs in every document indexed, and 1 for a unique
- keyword that occurs in just a single document). TF is the Term Frequency,
- the number of matched keyword occurrences in the field. As a side note,
- <code class="code">tf_idf</code> is actually computed by summing IDF over all matched
- occurrences. That's by construction equivalent to summing TF*IDF over
- all matched keywords.
- </p></li>
- <li class="listitem"><p>
- <code class="code">min_hit_pos</code> (integer), the position of the first matched keyword occurrence,
- counted in words. Indexing begins from position 1.
- </p></li>
- <li class="listitem"><p>
- <code class="code">min_best_span_pos</code> (integer), the position of the first maximum LCS
- occurrences span. For example, assume that our query was 'hello world
- program' and 'hello world' subphrase was matched twice in the field,
- in positions 13 and 21. Assume that 'hello' and 'world' additionally
- occurred elsewhere in the field, but never next to each other and thus
- never as a subphrase match. In that case, <code class="code">min_best_span_pos</code>
- will be 13. Note how for the single keyword queries
- <code class="code">min_best_span_pos</code> will always equal <code class="code">min_hit_pos</code>.
- </p></li>
- <li class="listitem"><p>
- <code class="code">exact_hit</code> (boolean), whether a query was an exact match
- of the entire current field. Used in the SPH04 ranker.
- </p></li>
- <li class="listitem"><p>
- <code class="code">min_idf</code>, <code class="code">max_idf</code>, and <code class="code">sum_idf</code> (float),
- added in version 2.1.1-beta. These factors respectively represent the min(idf),
- max(idf) and sum(idf) over all keywords that were matched in the field.
- </p></li>
- <li class="listitem"><p>
- <code class="code">exact_order</code> (boolean), added in version 2.2.1-beta. Whether all of the
- query keywords were matched in the field in the exact query order. For example,
- <code class="code">(microsoft office)</code> query would yield exact_order=1 in a field with the
- following contents: <code class="code">(We use Microsoft software in our office.)</code>.
- However, the very same query in a <code class="code">(Our office is Microsoft free.)</code>
- field would yield exact_order=0.
- </p></li>
- <li class="listitem"><p>
- <code class="code">min_gaps</code> (integer), added in version 2.2.1-beta, the minimum number
- of positional gaps between (just) the keywords matched in field. Always 0 when less
- than 2 keywords match; always greater or equal than 0 otherwise.
- </p><p>
- For example, with a <code class="code">[big wolf]</code> query, <code class="code">[big bad wolf]</code> field
- would yield min_gaps=1; <code class="code">[big bad hairy wolf]</code> field would yield min_gaps=2;
- <code class="code">[the wolf was scary and big]</code> field would yield min_gaps=3; etc.
- However, a field like <code class="code">[i heard a wolf howl]</code> would yield min_gaps=0,
- because only one keyword would be matching in that field, and, naturally, there
- would be no gaps between the <span class="emphasis"><em>matched</em></span>keywords.
- </p><p>
- Therefore, this is a rather low-level, "raw" factor that you would most likely
- want to <span class="emphasis"><em>adjust</em></span> before actually using for ranking. Specific
- adjustments depend heavily on your data and the resulting formula, but here are
- a few ideas you can start with: (a) any min_gaps based boosts could be simply ignored
- when word_count<2; (b) non-trivial min_gaps values (i.e. when word_count>=2)
- could be clamped with a certain "worst case" constant while trivial values
- (i.e. when min_gaps=0 and word_count<2) could be replaced by that constant;
- (c) a transfer function like 1/(1+min_gaps) could be applied (so that better,
- smaller min_gaps values would maximize it and worse, bigger min_gaps values
- would fall off slowly); and so on.
- </p></li>
- <li class="listitem"><p>
- <code class="code">lccs</code> (integer), added in version 2.2.1-beta. Longest Common Contiguous
- Subsequence. A length of the longest subphrase that is common between the query and
- the document, computed in keywords.
- </p><p>
- LCCS factor is rather similar to LCS but more restrictive, in a sense. While LCS could
- be greater than 1 though no two query words are matched next to each other, LCCS
- would only get greater than 1 if there are <span class="emphasis"><em>exact</em></span>, contiguous
- query subphrases in the document. For example, (one two three four five) query
- vs (one hundred three hundred five hundred) document would yield lcs=3, but lccs=1,
- because even though mutual dispositions of 3 keywords (one, three, five) match between
- the query and the document, no 2 matching positions are actually next to each other.
- </p><p>
- Note that LCCS still does not differentiate between the frequent and rare keywords;
- for that, see WLCS and WLLCS.
- </p></li>
- <li class="listitem"><p>
- <code class="code">wlccs</code> (float), added in version 2.2.1-beta. Weighted Longest Common Contiguous
- Subsequence. A sum of IDFs of the keywords of the longest subphrase that is common
- between the query and the document.
- </p><p>
- WLCCS is computed very similarly to LCCS, but every "suitable" keyword occurrence
- increases it by the keyword IDF rather than just by 1 (which is the case with LCS and
- LCCS). That lets us rank sequences of more rare and important keywords higher than
- sequences of frequent keywords, even if the latter are longer. For example, a query
- <code class="code">(Zanzibar bed and breakfast)</code> would yield lccs=1 for a
- <code class="code">(hotels of Zanzibar)</code> document, but lccs=3 against
- <code class="code">(London bed and breakfast)</code>, even though "Zanzibar" is actually
- somewhat more rare than the entire "bed and breakfast" phrase. WLCCS factor alleviates
- that problem by using the keyword frequencies.
- </p></li>
- <li class="listitem"><p>
- <code class="code">atc</code> (float), added in version 2.2.1-beta. Aggregate Term Closeness.
- A proximity based measure that grows higher when the document contains more groups
- of more closely located and more important (rare) query keywords. <span class="bold"><strong>WARNING:</strong></span>
- you should use ATC with OPTION idf='plain,tfidf_unnormalized'; otherwise you would
- get unexpected results.
- </p><p>
- ATC basically works as follows. For every keyword <span class="emphasis"><em>occurrence</em></span>
- in the document, we compute the so called <span class="emphasis"><em>term closeness</em></span>. For that,
- we examine all the other closest occurrences of all the query keywords (keyword itself
- included too) to the left and to the right of the subject occurrence, compute a distance
- dampening coefficient as k = pow(distance, -1.75) for those occurrences, and sum the
- dampened IDFs. Thus for every occurrence of every keyword, we get a "closeness" value
- that describes the "neighbors" of that occurrence. We then multiply those per-occurrence
- closenesses by their respective subject keyword IDF, sum them all, and finally,
- compute a logarithm of that sum.
- </p><p>
- Or in other words, we process the best (closest) matched keyword pairs in the document,
- and compute pairwise "closenesses" as the product of their IDFs scaled by the distance
- coefficient:
- </p><pre class="programlisting">
- pair_tc = idf(pair_word1) * idf(pair_word2) * pow(pair_distance, -1.75)
- </pre><p>
- We then sum such closenesses, and compute the final, log-dampened ATC value:
- </p><pre class="programlisting">
- atc = log(1+sum(pair_tc))
- </pre><p>
- Note that this final dampening logarithm is exactly the reason you should use
- OPTION idf=plain, because without it, the expression inside the log() could be negative.
- </p><p>
- Having closer keyword occurrences actually contributes <span class="emphasis"><em>much</em></span> more
- to ATC than having more frequent keywords. Indeed, when the keywords are right next to
- each other, distance=1 and k=1; when there just one word in between them, distance=2 and
- k=0.297, with two words between, distance=3 and k=0.146, and so on. At the same time
- IDF attenuates somewhat slower. For example, in a 1 million document collection, the IDF
- values for keywords that match in 10, 100, and 1000 documents would be respectively
- 0.833, 0.667, and 0.500. So a keyword pair with two rather rare keywords that occur
- in just 10 documents each but with 2 other words in between would yield pair_tc = 0.101
- and thus just barely outweigh a pair with a 100-doc and a 1000-doc keyword with 1 other
- word between them and pair_tc = 0.099. Moreover, a pair of two <span class="emphasis"><em>unique</em></span>,
- 1-doc keywords with 3 words between them would get a pair_tc = 0.088 and lose to a pair of
- two 1000-doc keywords located right next to each other and yielding a pair_tc = 0.25.
- So, basically, while ATC does combine both keyword frequency and proximity, it is still
- somewhat favoring the proximity.
- </p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect2" title="5.4.7. Ranking factor aggregation functions"><div class="titlepage"><div><div><h3 class="title"><a name="factor-aggr-functions"></a>5.4.7. Ranking factor aggregation functions</h3></div></div></div>
- <p>
- A <span class="bold"><strong>field aggregation function</strong></span> is a single argument function
- that takes an expression with field-level factors, iterates it over
- all the matched fields, and computes the final results.
- Currently implemented field aggregation functions are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- <code class="code">sum</code>, sums the argument expression over all matched
- fields. For instance, <code class="code">sum(1)</code> should return a number
- of matched fields.
- </p></li>
- <li class="listitem"><p>
- <code class="code">top</code>, returns the greatest value of the argument over all
- matched fields.
- </p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect2" title="5.4.8. Formula expressions for all the built-in rankers"><div class="titlepage"><div><div><h3 class="title"><a name="formulas-for-builtin-rankers"></a>5.4.8. Formula expressions for all the built-in rankers</h3></div></div></div>
- <p>
- Most of the other rankers can actually be emulated with the expression
- based ranker. You just need to pass a proper expression. Such emulation is,
- of course, going to be slower than using the built-in, compiled ranker but
- still might be of interest if you want to fine-tune your ranking formula
- starting with one of the existing ones. Also, the formulas define the
- nitty gritty ranker details in a nicely readable fashion.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- SPH_RANK_PROXIMITY_BM25 = sum(lcs*user_weight)*1000+bm25
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_BM25 = bm25
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_NONE = 1
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_WORDCOUNT = sum(hit_count*user_weight)
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_PROXIMITY = sum(lcs*user_weight)
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_MATCHANY = sum((word_count+(lcs-1)*max_lcs)*user_weight)
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_FIELDMASK = field_mask
- </p></li>
- <li class="listitem"><p>
- SPH_RANK_SPH04 = sum((4*lcs+2*(min_hit_pos==1)+exact_hit)*user_weight)*1000+bm25
- </p></li>
- </ul></div></div></div>
- <div class="sect1" title="5.5. Expressions, functions, and operators"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="expressions"></a>5.5. Expressions, functions, and operators</h2></div></div></div>
- <p>
- Sphinx lets you use arbitrary arithmetic expressions both via SphinxQL
- and SphinxAPI, involving attribute values, internal attributes (document ID
- and relevance weight), arithmetic operations, a number of built-in functions,
- and user-defined functions.
- This section documents the supported operators and functions.
- Here's the complete reference list for quick access.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><a class="link" href="#expr-ari-ops">Arithmetic operators: +, -, *, /, %, DIV, MOD</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-comp-ops">Comparison operators: <, > <=, >=, =, <></a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-bool-ops">Boolean operators: AND, OR, NOT</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-bitwise-ops">Bitwise operators: &, |</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-abs">ABS()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-all">ALL()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-any">ANY()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-atan2">ATAN2()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-bigint">BIGINT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-bitdot">BITDOT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-ceil">CEIL()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-contains">CONTAINS()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-cos">COS()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-crc32">CRC32()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-day">DAY()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-double">DOUBLE()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-exp">EXP()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-fibonacci">FIBONACCI()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-floor">FLOOR()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-geodist">GEODIST()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-geopoly2d">GEOPOLY2D()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-greatest">GREATEST()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-idiv">IDIV()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-if">IF()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-in">IN()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-indexof">INDEXOF()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-integer">INTEGER()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-interval">INTERVAL()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-least">LEAST()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-length">LENGTH()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-ln">LN()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-log10">LOG10()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-log2">LOG2()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-max">MAX()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-min">MIN()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-min-top-sortval">MIN_TOP_SORTVAL()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-min-top-weight">MIN_TOP_WEIGHT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-month">MONTH()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-now">NOW()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-poly2d">POLY2D()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-pow">POW()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-remap">REMAP()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-sin">SIN()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-sint">SINT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-sqrt">SQRT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-uint">UINT()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-year">YEAR()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-yearmonth">YEARMONTH()</a></p></li>
- <li class="listitem"><p><a class="link" href="#expr-func-yearmonthday">YEARMONTHDAY()</a></p></li>
- </ul></div>
- <p>
- </p><div class="sect2" title="5.5.1. Operators"><div class="titlepage"><div><div><h3 class="title"><a name="operators"></a>5.5.1. Operators</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-ari-ops"></a>Arithmetic operators: +, -, *, /, %, DIV, MOD</span></dt>
- <dd><p>
- The standard arithmetic operators. Arithmetic calculations involving those
- can be performed in three different modes: (a) using single-precision,
- 32-bit IEEE 754 floating point values (the default), (b) using signed 32-bit integers,
- (c) using 64-bit signed integers. The expression parser will automatically switch
- to integer mode if there are no operations the result in a floating point value.
- Otherwise, it will use the default floating point mode. For instance, <code class="code">a+b</code>
- will be computed using 32-bit integers if both arguments are 32-bit integers;
- or using 64-bit integers if both arguments are integers but one of them is
- 64-bit; or in floats otherwise. However, <code class="code">a/b</code> or <code class="code">sqrt(a)</code>
- will always be computed in floats, because these operations return a result
- of non-integer type. To avoid the first, you can either use <code class="code">IDIV(a,b)</code>
- or <code class="code">a DIV b</code> form. Also, <code class="code">a*b</code>
- will not be automatically promoted to 64-bit when the arguments are 32-bit.
- To enforce 64-bit results, you can use BIGINT(). (But note that if there are
- non-integer operations, BIGINT() will simply be ignored.)
- </p></dd><dt><span class="term"><a name="expr-comp-ops"></a>Comparison operators: <, > <=, >=, =, <></span></dt>
- <dd><p>
- Comparison operators (eg. = or <=) return 1.0 when the condition is true and 0.0 otherwise.
- For instance, <code class="code">(a=b)+3</code> will evaluate to 4 when attribute 'a' is equal to attribute 'b', and to 3 when 'a' is not.
- Unlike MySQL, the equality comparisons (ie. = and <> operators) introduce a small equality threshold (1e-6 by default).
- If the difference between compared values is within the threshold, they will be considered equal.
- </p></dd><dt><span class="term"><a name="expr-bool-ops"></a>Boolean operators: AND, OR, NOT</span></dt>
- <dd><p>
- Boolean operators (AND, OR, NOT) were introduced in 0.9.9-rc2 and behave as usual.
- They are left-associative and have the least priority compared to other operators.
- NOT has more priority than AND and OR but nevertheless less than any other operator.
- AND and OR have the same priority so brackets use is recommended to avoid confusion
- in complex expressions.
- </p></dd><dt><span class="term"><a name="expr-bitwise-ops"></a>Bitwise operators: &, |</span></dt>
- <dd><p>
- These operators perform bitwise AND and OR respectively. The operands
- must be of an integer types. Introduced in version 1.10-beta.
- </p></dd></dl></div></div>
- <div class="sect2" title="5.5.2. Numeric functions"><div class="titlepage"><div><div><h3 class="title"><a name="numeric-functions"></a>5.5.2. Numeric functions</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-func-abs"></a>ABS()</span></dt>
- <dd><p>Returns the absolute value of the argument.</p></dd><dt><span class="term"><a name="expr-func-bitdot"></a>BITDOT()</span></dt>
- <dd><p>BITDOT(mask, w0, w1, ...) returns the sum of products of an each bit of a mask multiplied with its weight.
- <code class="code">bit0*w0 + bit1*w1 + ...</code></p></dd><dt><span class="term"><a name="expr-func-ceil"></a>CEIL()</span></dt>
- <dd><p>Returns the smallest integer value greater or equal to the argument.</p></dd><dt><span class="term"><a name="expr-func-contains"></a>CONTAINS()</span></dt>
- <dd><p>CONTAINS(polygon, x, y) checks whether the (x,y) point is within the given polygon,
- and returns 1 if true, or 0 if false. The polygon has to be specified using either the <a class="link" href="#expr-func-poly2d">POLY2D()</a> function
- or the <a class="link" href="#expr-func-poly2d">GEOPOLY2D()</a> function. The former function is intended for "small" polygons, meaning less than
- 500 km (300 miles) a side, and it doesn't take into account the Earth's curvature for speed. For larger
- distances, you should use GEOPOLY2D, which tessellates the given polygon in smaller parts, accounting
- for the Earth's curvature.
- These functions were added in version 2.1.1-beta.
- </p></dd><dt><span class="term"><a name="expr-func-cos"></a>COS()</span></dt>
- <dd><p>Returns the cosine of the argument.</p></dd><dt><span class="term"><a name="expr-func-double"></a>DOUBLE()</span></dt>
- <dd><p>Forcibly promotes given argument to floating point type. Intended to help enforce evaluation of numeric JSON fields. Introduced in version 2.2.1-beta.</p></dd><dt><span class="term"><a name="expr-func-exp"></a>EXP()</span></dt>
- <dd><p>Returns the exponent of the argument (e=2.718... to the power of the argument).</p></dd><dt><span class="term"><a name="expr-func-fibonacci"></a>FIBONACCI()</span></dt>
- <dd><p>Returns the N-th Fibonacci number, where N is the integer
- argument. That is, arguments of 0 and up will generate the values 0, 1, 1,
- 2, 3, 5, 8, 13 and so on. Note that the computations are done using 32-bit
- integer math and thus numbers 48th and up will be returned modulo 2^32.
- </p></dd><dt><span class="term"><a name="expr-func-floor"></a>FLOOR()</span></dt>
- <dd><p>Returns the largest integer value lesser or equal to the argument.</p></dd><dt><span class="term"><a name="expr-func-geopoly2d"></a>GEOPOLY2D()</span></dt>
- <dd><p>GEOPOLY2D(x1,y1,x2,y2,x3,y3...) produces a polygon to be used with the <a class="link" href="#expr-func-contains">CONTAINS()</a> function.
- This function takes into account the Earth's curvature by tessellating the polygon into smaller ones,
- and should be used for larger areas; see the <a class="link" href="#expr-func-poly2d">POLY2D()</a> function.
- </p></dd><dt><span class="term"><a name="expr-func-idiv"></a>IDIV()</span></dt>
- <dd><p>
- Returns the result of an integer division of the first
- argument by the second argument. Both arguments must be
- of an integer type.
- </p></dd><dt><span class="term"><a name="expr-func-ln"></a>LN()</span></dt>
- <dd><p>Returns the natural logarithm of the argument (with the base of e=2.718...).</p></dd><dt><span class="term"><a name="expr-func-log10"></a>LOG10()</span></dt>
- <dd><p>Returns the common logarithm of the argument (with the base of 10).</p></dd><dt><span class="term"><a name="expr-func-log2"></a>LOG2()</span></dt>
- <dd><p>Returns the binary logarithm of the argument (with the base of 2).</p></dd><dt><span class="term"><a name="expr-func-max"></a>MAX()</span></dt>
- <dd><p>Returns the bigger of two arguments.</p></dd><dt><span class="term"><a name="expr-func-min"></a>MIN()</span></dt>
- <dd><p>Returns the smaller of two arguments.</p></dd><dt><span class="term"><a name="expr-func-poly2d"></a>POLY2D()</span></dt>
- <dd><p>POLY2D(x1,y1,x2,y2,x3,y3...) produces a polygon to be used with the <a class="link" href="#expr-func-contains">CONTAINS()</a> function.
- This polygon assumes a flat Earth, so it should not be too large; see the <a class="link" href="#expr-func-poly2d">POLY2D()</a> function.
- </p></dd><dt><span class="term"><a name="expr-func-pow"></a>POW()</span></dt>
- <dd><p>Returns the first argument raised to the power of the second argument.</p></dd><dt><span class="term"><a name="expr-func-sin"></a>SIN()</span></dt>
- <dd><p>Returns the sine of the argument.</p></dd><dt><span class="term"><a name="expr-func-sqrt"></a>SQRT()</span></dt>
- <dd><p>Returns the square root of the argument.</p></dd><dt><span class="term"><a name="expr-func-uint"></a>UINT()</span></dt>
- <dd><p>Forcibly reinterprets given argument to 64-bit unsigned type. Introduced in version 2.2.1-beta.</p></dd></dl></div></div>
- <div class="sect2" title="5.5.3. Date and time functions"><div class="titlepage"><div><div><h3 class="title"><a name="date-time-functions"></a>5.5.3. Date and time functions</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-func-day"></a>DAY()</span></dt>
- <dd><p>Returns the integer day of month (in 1..31 range) from a timestamp argument, according to the current timezone. Introduced in version 2.0.1-beta.</p></dd><dt><span class="term"><a name="expr-func-month"></a>MONTH()</span></dt>
- <dd><p>Returns the integer month (in 1..12 range) from a timestamp argument, according to the current timezone. Introduced in version 2.0.1-beta.</p></dd><dt><span class="term"><a name="expr-func-now"></a>NOW()</span></dt>
- <dd><p>Returns the current timestamp as an INTEGER. Introduced in version 0.9.9-rc1.</p></dd><dt><span class="term"><a name="expr-func-year"></a>YEAR()</span></dt>
- <dd><p>Returns the integer year (in 1969..2038 range) from a timestamp argument, according to the current timezone. Introduced in version 2.0.1-beta.</p></dd><dt><span class="term"><a name="expr-func-yearmonth"></a>YEARMONTH()</span></dt>
- <dd><p>Returns the integer year and month code (in 196912..203801 range) from a timestamp argument, according to the current timezone. Introduced in version 2.0.1-beta.</p></dd><dt><span class="term"><a name="expr-func-yearmonthday"></a>YEARMONTHDAY()</span></dt>
- <dd><p>Returns the integer year, month, and date code (in 19691231..20380119 range) from a timestamp argument, according to the current timezone. Introduced in version 2.0.1-beta.</p></dd></dl></div></div>
- <div class="sect2" title="5.5.4. Type conversion functions"><div class="titlepage"><div><div><h3 class="title"><a name="type-conversion-functions"></a>5.5.4. Type conversion functions</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-func-bigint"></a>BIGINT()</span></dt>
- <dd><p>
- Forcibly promotes the integer argument to 64-bit type,
- and does nothing on floating point argument. It's intended to help enforce evaluation
- of certain expressions (such as <code class="code">a*b</code>) in 64-bit mode even though all the arguments
- are 32-bit.
- Introduced in version 0.9.9-rc1.
- </p></dd><dt><span class="term"><a name="expr-func-integer"></a>INTEGER()</span></dt>
- <dd><p>Forcibly promotes given argument to 64-bit signed type. Intended to help enforce evaluation of numeric JSON fields. Introduced in version 2.2.1-beta.</p></dd><dt><span class="term"><a name="expr-func-sint"></a>SINT()</span></dt>
- <dd><p>
- Forcibly reinterprets its
- 32-bit unsigned integer argument as signed, and also expands it to 64-bit type
- (because 32-bit type is unsigned). It's easily illustrated by the following
- example: 1-2 normally evaluates to 4294967295, but SINT(1-2) evaluates to -1.
- Introduced in version 1.10-beta.
- </p></dd></dl></div></div>
- <div class="sect2" title="5.5.5. Comparison functions"><div class="titlepage"><div><div><h3 class="title"><a name="comparison-functions"></a>5.5.5. Comparison functions</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-func-if"></a>IF()</span></dt>
- <dd><p>
- <code class="code">IF()</code> behavior is slightly different that that of its MySQL counterpart.
- It takes 3 arguments, check whether the 1st argument is equal to 0.0, returns the 2nd argument if it is not zero, or the 3rd one when it is.
- Note that unlike comparison operators, <code class="code">IF()</code> does <span class="bold"><strong>not</strong></span> use a threshold!
- Therefore, it's safe to use comparison results as its 1st argument, but arithmetic operators might produce unexpected results.
- For instance, the following two calls will produce <span class="emphasis"><em>different</em></span> results even though they are logically equivalent:
- </p><pre class="programlisting">
- IF ( sqrt(3)*sqrt(3)-3<>0, a, b )
- IF ( sqrt(3)*sqrt(3)-3, a, b )
- </pre><p>
- In the first case, the comparison operator <> will return 0.0 (false)
- because of a threshold, and <code class="code">IF()</code> will always return 'b' as a result.
- In the second one, the same <code class="code">sqrt(3)*sqrt(3)-3</code> expression will be compared
- with zero <span class="emphasis"><em>without</em></span> threshold by the <code class="code">IF()</code> function itself.
- But its value will be slightly different from zero because of limited floating point
- calculations precision. Because of that, the comparison with 0.0 done by <code class="code">IF()</code>
- will not pass, and the second variant will return 'a' as a result.
- </p></dd><dt><span class="term"><a name="expr-func-in"></a>IN()</span></dt>
- <dd><p>
- IN(expr,val1,val2,...), introduced in version 0.9.9-rc1, takes 2 or more arguments, and returns 1 if 1st argument
- (expr) is equal to any of the other arguments (val1..valN), or 0 otherwise.
- Currently, all the checked values (but not the expression itself!) are required
- to be constant. (Its technically possible to implement arbitrary expressions too,
- and that might be implemented in the future.) Constants are pre-sorted and then
- binary search is used, so IN() even against a big arbitrary list of constants
- will be very quick. Starting with 0.9.9-rc2, first argument can also be
- a MVA attribute. In that case, IN() will return 1 if any of the MVA values
- is equal to any of the other arguments. Starting with 2.0.1-beta, IN() also
- supports <code class="code">IN(expr,@uservar)</code> syntax to check whether the value
- belongs to the list in the given global user variable. First argument can be
- JSON attribute since 2.2.1-beta.
- </p></dd><dt><span class="term"><a name="expr-func-interval"></a>INTERVAL()</span></dt>
- <dd><p>
- INTERVAL(expr,point1,point2,point3,...), introduced in version 0.9.9-rc1, takes 2 or more arguments, and returns
- the index of the argument that is less than the first argument: it returns
- 0 if expr<point1, 1 if point1<=expr<point2, and so on.
- It is required that point1<point2<...<pointN for this function
- to work correctly.
- </p></dd></dl></div></div>
- <div class="sect2" title="5.5.6. Miscellaneous functions"><div class="titlepage"><div><div><h3 class="title"><a name="misc-functions"></a>5.5.6. Miscellaneous functions</h3></div></div></div>
- <div class="variablelist"><dl><dt><span class="term"><a name="expr-func-all"></a>ALL()</span></dt>
- <dd><p>ALL(cond FOR var IN json.array) function was introduced in 2.2.1-beta. It
- applies to JSON arrays and returns 1 if condition is true for all elements in
- array and 0 otherwise. 'cond' is a general expression which additionally can use
- 'var' as current value of an array element within itself.</p><pre class="programlisting">
- SELECT ALL(x>3 AND x<7 FOR x IN j.intarray) FROM test;
- </pre></dd><dt><span class="term"><a name="expr-func-any"></a>ANY()</span></dt>
- <dd><p>ANY(cond FOR var IN json.array) function was introduced in 2.2.1-beta.
- It works similar to <a class="link" href="#expr-func-all">ALL()</a> except for it
- returns 1 if condition is true for any element in array.</p></dd><dt><span class="term"><a name="expr-func-atan2"></a>ATAN2()</span></dt>
- <dd><p>
- Returns the arctangent function of two arguments, expressed in <span class="bold"><strong>radians</strong></span>.
- </p></dd><dt><span class="term"><a name="expr-func-crc32"></a>CRC32()</span></dt>
- <dd><p>
- Returns the CRC32 value of a string argument. Introduced in version 2.0.1-beta.
- </p></dd><dt><span class="term"><a name="expr-func-geodist"></a>GEODIST()</span></dt>
- <dd><p>
- GEODIST(lat1, lon1, lat2, lon2, [...]) function, introduced in version 0.9.9-rc2,
- computes geosphere distance between two given points specified by their
- coordinates. Note that by default both latitudes and longitudes must be in <span class="bold"><strong>radians</strong></span>
- and the result will be in <span class="bold"><strong>meters</strong></span>. You can use arbitrary expression as any
- of the four coordinates. An optimized path will be selected when one pair
- of the arguments refers directly to a pair attributes and the other one
- is constant.
- </p><p>
- Starting with version 2.2.1-beta, GEODIST() also takes an optional 5th argument
- that lets you easily convert between input and output units, and pick the specific
- geodistance formula to use. The complete syntax and a few examples are as follows:
- </p><pre class="programlisting">
- GEODIST(lat1, lon1, lat2, lon2, { option=value, ... })
- GEODIST(40.7643929, -73.9997683, 40.7642578, -73.9994565, {in=degrees, out=feet})
- GEODIST(51.50, -0.12, 29.98, 31.13, {in=deg, out=mi}}
- </pre><p>
- The known options and their values are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">in = {deg | degrees | rad | radians}</code>, specifies the input units;</li>
- <li class="listitem"><code class="code">out = {m | meters | km | kilometers | ft | feet | mi | miles}</code>, specifies the output units;</li>
- <li class="listitem"><code class="code">method = {haversine | adaptive}</code>, specifies the geodistance calculation method.</li>
- </ul></div>
- <p>
- Upto version 2.1.x (inclusive), "haversine" method was the default.
- Starting with 2.2.1-beta, the default method changed to "adaptive",
- a new, well optimized implementation that is both more precise
- <span class="emphasis"><em>and</em></span> much faster at all times.
- </p></dd><dt><span class="term"><a name="expr-func-greatest"></a>GREATEST()</span></dt>
- <dd><p>
- GREATEST(attr_json.some_array) was introduced in version 2.2.1-beta. First argument
- is JSON array and return value is the greatest value in that array.
- Also works for MVA.
- </p></dd><dt><span class="term"><a name="expr-func-indexof"></a>INDEXOF()</span></dt>
- <dd><p>INDEXOF(cond FOR var IN json.array) function was introduced in 2.2.1-beta. It
- iterates through all elements in array and returns index of first element for which
- 'cond' is true and -1 if 'cond' is false for every element in array.</p><pre class="programlisting">
- SELECT INDEXOF(name='John' FOR name IN j.peoples) FROM test;
- </pre></dd><dt><span class="term"><a name="expr-func-least"></a>LEAST()</span></dt>
- <dd><p>
- LEAST(attr_json.some_array) was introduced in version 2.2.1-beta. First argument
- is JSON array and return value is the least value in that array.
- Also works for MVA.
- </p></dd><dt><span class="term"><a name="expr-func-length"></a>LENGTH()</span></dt>
- <dd><p>
- LENGTH(attr_mva) function, introduced in version 2.1.2-stable,
- returns amount of elements in MVA set. It works with both 32-bit and
- 64-bit MVA attributes.
- LENGTH(attr_json) was introduced in version 2.2.1-beta. It returns length of
- a field in JSON. Return value depends on type of a field.
- For example LENGTH(json_attr.some_int) always returns 1 and
- LENGTH(json_attr.some_array) returns number of elements in array.
- </p></dd><dt><span class="term"><a name="expr-func-min-top-sortval"></a>MIN_TOP_SORTVAL()</span></dt>
- <dd><p>Returns sort key value of the worst found element in the current top-N matches if sort key is float and 0 otherwise.</p></dd><dt><span class="term"><a name="expr-func-min-top-weight"></a>MIN_TOP_WEIGHT()</span></dt>
- <dd><p>Returns weight of the worst found element in the current top-N matches.</p></dd><dt><span class="term"><a name="expr-func-packedfactors"></a>PACKEDFACTORS()</span></dt>
- <dd><p>
- PACKEDFACTORS(), introduced in version 2.1.1-beta, can be used in queries,
- either to just see all the weighting factors calculated when doing the matching, or to
- provide a binary attribute that can be used to write a custom ranking UDF.
- This function works only if expression ranker is specified and the query
- is not a full scan, otherwise it will return an error. Starting with 2.2.2-beta
- PACKEDFACTORS() can take an optional argument that disables ATC ranking factor calculation:
- </p><pre class="programlisting">
- PACKEDFACTORS({no_atc=1})
- </pre><p>
- Calculating ATC slows down query processing considerably, so this option can be useful
- if you need to see the ranking factors, but do not need ATC.
- Starting with 2.2.3-beta PACKEDFACTORS() can also be told to format its output as JSON:
- </p><pre class="programlisting">
- PACKEDFACTORS({json=1})
- </pre><p>
- The respective outputs in either key-value pair or JSON format would look
- as follows below. (Note that the examples below are wrapped for readability;
- actual returned values would be single-line.)
- </p><pre class="programlisting">
- mysql> SELECT id, PACKEDFACTORS() FROM test1
- -> WHERE MATCH('test one') OPTION ranker=expr('1') \G
- *************************** 1. row ***************************
- id: 1
- packedfactors(): bm25=569, bm25a=0.617197, field_mask=2, doc_word_count=2,
- field1=(lcs=1, hit_count=2, word_count=2, tf_idf=0.152356,
- min_idf=-0.062982, max_idf=0.215338, sum_idf=0.152356, min_hit_pos=4,
- min_best_span_pos=4, exact_hit=0, max_window_hits=1, min_gaps=2,
- exact_order=1, lccs=1, wlccs=0.215338, atc=-0.003974),
- word0=(tf=1, idf=-0.062982),
- word1=(tf=1, idf=0.215338)
- 1 row in set (0.00 sec)
- mysql> SELECT id, PACKEDFACTORS({json=1}) FROM test1
- -> WHERE MATCH('test one') OPTION ranker=expr('1') \G
- *************************** 1. row ***************************
- id: 1
- packedfactors({json=1}):
- {
- "bm25": 569,
- "bm25a": 0.617197,
- "field_mask": 2,
- "doc_word_count": 2,
- "fields": [
- {
- "lcs": 1,
- "hit_count": 2,
- "word_count": 2,
- "tf_idf": 0.152356,
- "min_idf": -0.062982,
- "max_idf": 0.215338,
- "sum_idf": 0.152356,
- "min_hit_pos": 4,
- "min_best_span_pos": 4,
- "exact_hit": 0,
- "max_window_hits": 1,
- "min_gaps": 2,
- "exact_order": 1,
- "lccs": 1,
- "wlccs": 0.215338,
- "atc": -0.003974
- }
- ],
- "words": [
- {
- "tf": 1,
- "idf": -0.062982
- },
- {
- "tf": 1,
- "idf": 0.215338
- }
- ]
- }
- 1 row in set (0.01 sec)
- </pre><p>
- </p><p>
- This function can be used to implement custom ranking functions in UDFs, as in
- </p><pre class="programlisting">
- SELECT *, CUSTOM_RANK(PACKEDFACTORS()) AS r
- FROM my_index
- WHERE match('hello')
- ORDER BY r DESC
- OPTION ranker=expr('1');
- </pre><p>
- Where CUSTOM_RANK() is a function implemented in an UDF. It should declare a
- SPH_UDF_FACTORS structure (defined in <code class="filename">sphinxudf.h</code>), initialize this structure,
- unpack the factors into it before usage, and deinitialize it afterwards, as follows:
- </p><pre class="programlisting">
- SPH_UDF_FACTORS factors;
- sphinx_factors_init(&factors);
- sphinx_factors_unpack((DWORD*)args->arg_values[0], &factors);
- // ... can use the contents of factors variable here ...
- sphinx_factors_deinit(&factors);
- </pre><p>
- </p><p>
- PACKEDFACTORS() data is available at all query stages, not just
- when doing the initial matching and ranking pass. That enables
- another particularly interesting application of PACKEDFACTORS(),
- namely <span class="bold"><strong>re-ranking</strong></span>.
- </p><p>
- In the example just above, we used an expression-based ranker with
- a dummy expression, and sorted the result set by the value computed
- by our UDF. In other words, we used the UDF to <span class="emphasis"><em>rank</em></span>
- all our results. Assume now, for the sake of an example, that our UDF
- is extremely expensive to compute and has a throughput of just
- 10,000 calls per second. Assume that our query matches 1,000,000 documents.
- To maintain reasonable performance, we would then want to use a (much)
- simpler expression to do most of our ranking, and then apply the
- expensive UDF to only a few top results, say, top-100 results.
- Or, in other words, build top-100 results using a simpler ranking
- function and then <span class="emphasis"><em>re-rank</em></span> those with a complex one.
- We can do that just as well with subselects:
- </p><pre class="programlisting">
- SELECT * FROM (
- SELECT *, CUSTOM_RANK(PACKEDFACTORS()) AS r
- FROM my_index WHERE match('hello')
- OPTION ranker=expr('sum(lcs)*1000+bm25')
- ORDER BY WEIGHT() DESC
- LIMIT 100
- ) ORDER BY r DESC LIMIT 10
- </pre><p>
- In this example, expression-based ranker will be called for every
- matched document to compute WEIGHT(). So it will get called 1,000,000
- times. But the UDF computation can be postponed until the outer sort.
- And it also will be done for just the top-100 matches by WEIGHT(),
- according to the inner limit. So the UDF will only get called 100 times.
- And then the final top-10 matches by UDF value will be selected
- and returned to the application.
- </p><p>
- For reference, in the distributed case PACKEDFACTORS() data gets
- sent from the agents to master in a binary format, too. This makes
- it technically feasible to implement additional re-ranking pass
- (or passes) on the master node, if needed.
- </p><p>
- If used with SphinxQL but not called from any UDFs, the result of PACKEDFACTORS()
- is simply formatted as plain text, which can be used to manually assess the ranking
- factors. Note that this feature is not currently supported by the Sphinx API.
- </p></dd><dt><span class="term"><a name="expr-func-remap"></a>REMAP()</span></dt>
- <dd><p>
- REMAP(condition, expression, (cond1, cond2, ...), (expr1, expr2, ...)) function
- was added in 2.2.2-beta. It allows you to make some exceptions of an expression
- values depending on condition values. Condition expression should always result
- integer, expression can result in integer or float.
- </p><pre class="programlisting">
- SELECT REMAP(userid, karmapoints, (1, 67), (999, 0)) FROM users;
- SELECT REMAP(id%10, salary, (0), (0.0)) FROM employes;
- </pre><p>
- </p></dd></dl></div></div></div>
- <div class="sect1" title="5.6. Sorting modes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sorting-modes"></a>5.6. Sorting modes</h2></div></div></div>
- <p>
- There are the following result sorting modes available:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>SPH_SORT_RELEVANCE mode, that sorts by relevance in descending order (best matches first);</p></li>
- <li class="listitem"><p>SPH_SORT_ATTR_DESC mode, that sorts by an attribute in descending order (bigger attribute values first);</p></li>
- <li class="listitem"><p>SPH_SORT_ATTR_ASC mode, that sorts by an attribute in ascending order (smaller attribute values first);</p></li>
- <li class="listitem"><p>SPH_SORT_TIME_SEGMENTS mode, that sorts by time segments (last hour/day/week/month) in descending order, and then by relevance in descending order;</p></li>
- <li class="listitem"><p>SPH_SORT_EXTENDED mode, that sorts by SQL-like combination of columns in ASC/DESC order;</p></li>
- <li class="listitem"><p>SPH_SORT_EXPR mode, that sorts by an arithmetic expression.</p></li>
- </ul></div>
- <p>
- </p><p>
- SPH_SORT_RELEVANCE ignores any additional parameters and always sorts matches
- by relevance rank. All other modes require an additional sorting clause, with the
- syntax depending on specific mode. SPH_SORT_ATTR_ASC, SPH_SORT_ATTR_DESC and
- SPH_SORT_TIME_SEGMENTS modes require simply an attribute name.
- SPH_SORT_RELEVANCE is equivalent to sorting by "@weight DESC, @id ASC" in extended sorting mode,
- SPH_SORT_ATTR_ASC is equivalent to "attribute ASC, @weight DESC, @id ASC",
- and SPH_SORT_ATTR_DESC to "attribute DESC, @weight DESC, @id ASC" respectively.
- </p><h3><a name="idp31775520"></a>SPH_SORT_TIME_SEGMENTS mode</h3><p>
- In SPH_SORT_TIME_SEGMENTS mode, attribute values are split into so-called
- time segments, and then sorted by time segment first, and by relevance second.
- </p><p>
- The segments are calculated according to the <span class="emphasis"><em>current timestamp</em></span>
- at the time when the search is performed, so the results would change over time.
- The segments are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>last hour,</p></li>
- <li class="listitem"><p>last day,</p></li>
- <li class="listitem"><p>last week,</p></li>
- <li class="listitem"><p>last month,</p></li>
- <li class="listitem"><p>last 3 months,</p></li>
- <li class="listitem"><p>everything else.</p></li>
- </ul></div>
- <p>
- These segments are hardcoded, but it is trivial to change them if necessary.
- </p><p>
- This mode was added to support searching through blogs, news headlines, etc.
- When using time segments, recent records would be ranked higher because of segment,
- but within the same segment, more relevant records would be ranked higher -
- unlike sorting by just the timestamp attribute, which would not take relevance
- into account at all.
- </p><h3><a name="sort-extended"></a>SPH_SORT_EXTENDED mode</h3><p>
- In SPH_SORT_EXTENDED mode, you can specify an SQL-like sort expression
- with up to 5 attributes (including internal attributes), eg:
- </p><pre class="programlisting">
- @relevance DESC, price ASC, @id DESC
- </pre><p>
- </p><p>
- Both internal attributes (that are computed by the engine on the fly)
- and user attributes that were configured for this index are allowed.
- Internal attribute names must start with magic @-symbol; user attribute
- names can be used as is. In the example above, <code class="option">@relevance</code>
- and <code class="option">@id</code> are internal attributes and <code class="option">price</code> is user-specified.
- </p><p>
- Known internal attributes are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>@id (match ID)</p></li>
- <li class="listitem"><p>@weight (match weight)</p></li>
- <li class="listitem"><p>@rank (match weight)</p></li>
- <li class="listitem"><p>@relevance (match weight)</p></li>
- <li class="listitem"><p>@random (return results in random order)</p></li>
- </ul></div>
- <p>
- <code class="option">@rank</code> and <code class="option">@relevance</code> are just additional
- aliases to <code class="option">@weight</code>.
- </p><h3><a name="sort-expr"></a>SPH_SORT_EXPR mode</h3><p>
- Expression sorting mode lets you sort the matches by an arbitrary arithmetic
- expression, involving attribute values, internal attributes (@id and @weight),
- arithmetic operations, and a number of built-in functions. Here's an example:
- </p><pre class="programlisting">
- $cl->SetSortMode ( SPH_SORT_EXPR,
- "@weight + ( user_karma + ln(pageviews) )*0.1" );
- </pre><p>
- The operators and functions supported in the expressions are discussed
- in a separate section, <a class="xref" href="#expressions" title="5.5. Expressions, functions, and operators">Section 5.5, “Expressions, functions, and operators”</a>.
- </p></div>
- <div class="sect1" title="5.7. Grouping (clustering) search results"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="clustering"></a>5.7. Grouping (clustering) search results </h2></div></div></div>
- <p>
- Sometimes it could be useful to group (or in other terms, cluster)
- search results and/or count per-group match counts - for instance,
- to draw a nice graph of how much matching blog posts were there per
- each month; or to group Web search results by site; or to group
- matching forum posts by author; etc.
- </p><p>
- In theory, this could be performed by doing only the full-text search
- in Sphinx and then using found IDs to group on SQL server side. However,
- in practice doing this with a big result set (10K-10M matches) would
- typically kill performance.
- </p><p>
- To avoid that, Sphinx offers so-called grouping mode. It is enabled
- with SetGroupBy() API call. When grouping, all matches are assigned to
- different groups based on group-by value. This value is computed from
- specified attribute using one of the following built-in functions:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>SPH_GROUPBY_DAY, extracts year, month and day in YYYYMMDD format from timestamp;</p></li>
- <li class="listitem"><p>SPH_GROUPBY_WEEK, extracts year and first day of the week number (counting from year start) in YYYYNNN format from timestamp;</p></li>
- <li class="listitem"><p>SPH_GROUPBY_MONTH, extracts month in YYYYMM format from timestamp;</p></li>
- <li class="listitem"><p>SPH_GROUPBY_YEAR, extracts year in YYYY format from timestamp;</p></li>
- <li class="listitem"><p>SPH_GROUPBY_ATTR, uses attribute value itself for grouping.</p></li>
- </ul></div>
- <p>
- </p><p>
- The final search result set then contains one best match per group.
- Grouping function value and per-group match count are returned along
- as "virtual" attributes named
- <span class="bold"><strong>@group</strong></span> and
- <span class="bold"><strong>@count</strong></span> respectively.
- </p><p>
- The result set is sorted by group-by sorting clause, with the syntax similar
- to <a class="link" href="#sort-extended" title="5.6. SPH_SORT_EXTENDED mode"><code class="option">SPH_SORT_EXTENDED</code> sorting clause</a>
- syntax. In addition to <code class="option">@id</code> and <code class="option">@weight</code>,
- group-by sorting clause may also include:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>@group (groupby function value),</p></li>
- <li class="listitem"><p>@count (amount of matches in group).</p></li>
- </ul></div>
- <p>
- </p><p>
- The default mode is to sort by groupby value in descending order,
- ie. by <code class="option">"@group desc"</code>.
- </p><p>
- On completion, <code class="option">total_found</code> result parameter would
- contain total amount of matching groups over he whole index.
- </p><p>
- <span class="bold"><strong>WARNING:</strong></span> grouping is done in fixed memory
- and thus its results are only approximate; so there might be more groups reported
- in <code class="option">total_found</code> than actually present. <code class="option">@count</code> might also
- be underestimated. To reduce inaccuracy, one should raise <code class="option">max_matches</code>.
- If <code class="option">max_matches</code> allows to store all found groups, results will be 100% correct.
- </p><p>
- For example, if sorting by relevance and grouping by <code class="code">"published"</code>
- attribute with <code class="code">SPH_GROUPBY_DAY</code> function, then the result set will
- contain
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>one most relevant match per each day when there were any
- matches published,</p></li>
- <li class="listitem"><p>with day number and per-day match count attached,</p></li>
- <li class="listitem"><p>sorted by day number in descending order (ie. recent days first).</p></li>
- </ul></div>
- <p>
- </p><p>
- Starting with version 0.9.9-rc2, aggregate functions (AVG(), MIN(),
- MAX(), SUM()) are supported through <a class="link" href="#api-func-setselect" title="9.2.4. SetSelect">SetSelect()</a> API call
- when using GROUP BY.
- </p></div>
- <div class="sect1" title="5.8. Distributed searching"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="distributed"></a>5.8. Distributed searching</h2></div></div></div>
- <p>
- To scale well, Sphinx has distributed searching capabilities.
- Distributed searching is useful to improve query latency (ie. search
- time) and throughput (ie. max queries/sec) in multi-server, multi-CPU
- or multi-core environments. This is essential for applications which
- need to search through huge amounts data (ie. billions of records
- and terabytes of text).
- </p><p>
- The key idea is to horizontally partition (HP) searched data
- across search nodes and then process it in parallel.
- </p><p>
- Partitioning is done manually. You should
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>setup several instances
- of Sphinx programs (<code class="filename">indexer</code> and <code class="filename">searchd</code>)
- on different servers;</p></li>
- <li class="listitem"><p>make the instances index (and search) different parts of data;</p></li>
- <li class="listitem"><p>configure a special distributed index on some of the <code class="filename">searchd</code>
- instances;</p></li>
- <li class="listitem"><p>and query this index.</p></li>
- </ul></div>
- <p>
- This index only contains references to other
- local and remote indexes - so it could not be directly reindexed,
- and you should reindex those indexes which it references instead.
- </p><p>
- When <code class="filename">searchd</code> receives a query against distributed index,
- it does the following:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>connects to configured remote agents;</p></li>
- <li class="listitem"><p>issues the query;</p></li>
- <li class="listitem"><p>sequentially searches configured local indexes (while the remote agents are searching);</p></li>
- <li class="listitem"><p>retrieves remote agents' search results;</p></li>
- <li class="listitem"><p>merges all the results together, removing the duplicates;</p></li>
- <li class="listitem"><p>sends the merged results to client.</p></li>
- </ol></div>
- <p>
- </p><p>
- From the application's point of view, there are no differences
- between searching through a regular index, or a distributed index at all.
- That is, distributed indexes are fully transparent to the application,
- and actually there's no way to tell whether the index you queried
- was distributed or local. (Even though as of 0.9.9 Sphinx does not
- allow to combine searching through distributed indexes with anything else,
- this constraint will be lifted in the future.)
- </p><p>
- Any <code class="filename">searchd</code> instance could serve both as a master
- (which aggregates the results) and a slave (which only does local searching)
- at the same time. This has a number of uses:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>every machine in a cluster could serve as a master which
- searches the whole cluster, and search requests could be balanced between
- masters to achieve a kind of HA (high availability) in case any of the nodes fails;
- </p></li>
- <li class="listitem"><p>
- if running within a single multi-CPU or multi-core machine, there
- would be only 1 searchd instance querying itself as an agent and thus
- utilizing all CPUs/core.
- </p></li>
- </ol></div>
- <p>
- </p><p>
- It is scheduled to implement better HA support which would allow
- to specify which agents mirror each other, do health checks, keep track
- of alive agents, load-balance requests, etc.
- </p></div>
- <div class="sect1" title="5.9. searchd query log formats"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="query-log-format"></a>5.9. <code class="filename">searchd</code> query log formats</h2></div></div></div>
- <p>
- In version 2.0.1-beta and above two query log formats are supported.
- Previous versions only supported a custom plain text format. That format
- is still the default one. However, while it might be more convenient for
- manual monitoring and review, but hard to replay for benchmarks, it only
- logs <span class="emphasis"><em>search</em></span> queries but not the other types
- of requests, does not always contain the complete search query
- data, etc. The default text format is also harder (and sometimes
- impossible) to replay for benchmarking purposes. The new <code class="code">sphinxql</code>
- format alleviates that. It aims to be complete and automatable,
- even though at the cost of brevity and readability.
- </p><div class="sect2" title="5.9.1. Plain log format"><div class="titlepage"><div><div><h3 class="title"><a name="plain-log-format"></a>5.9.1. Plain log format</h3></div></div></div>
- <p>
- By default, <code class="filename">searchd</code> logs all successfully executed search queries
- into a query log file. Here's an example:
- </p><pre class="programlisting">
- [Fri Jun 29 21:17:58 2007] 0.004 sec 0.004 sec [all/0/rel 35254 (0,20)] [lj] test
- [Fri Jun 29 21:20:34 2007] 0.024 sec 0.024 sec [all/0/rel 19886 (0,20) @channel_id] [lj] test
- </pre><p>
- This log format is as follows:
- </p><pre class="programlisting">
- [query-date] real-time wall-time [match-mode/filters-count/sort-mode
- total-matches (offset,limit) @groupby-attr] [index-name] query
- </pre><p>
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>real-time is a time measured just from start to finish of the query</p></li>
- <li class="listitem"><p>wall-time like real-time but not including waiting for agents and merging result sets time</p></li>
- </ul></div>
- <p>
- Match mode can take one of the following values:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>"all" for SPH_MATCH_ALL mode;</p></li>
- <li class="listitem"><p>"any" for SPH_MATCH_ANY mode;</p></li>
- <li class="listitem"><p>"phr" for SPH_MATCH_PHRASE mode;</p></li>
- <li class="listitem"><p>"bool" for SPH_MATCH_BOOLEAN mode;</p></li>
- <li class="listitem"><p>"ext" for SPH_MATCH_EXTENDED mode;</p></li>
- <li class="listitem"><p>"ext2" for SPH_MATCH_EXTENDED2 mode;</p></li>
- <li class="listitem"><p>"scan" if the full scan mode was used, either by being specified with SPH_MATCH_FULLSCAN, or if the query was empty (as documented under <a class="link" href="#matching-modes" title="5.1. Matching modes">Matching Modes</a>)</p></li>
- </ul></div>
- <p>
- Sort mode can take one of the following values:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>"rel" for SPH_SORT_RELEVANCE mode;</p></li>
- <li class="listitem"><p>"attr-" for SPH_SORT_ATTR_DESC mode;</p></li>
- <li class="listitem"><p>"attr+" for SPH_SORT_ATTR_ASC mode;</p></li>
- <li class="listitem"><p>"tsegs" for SPH_SORT_TIME_SEGMENTS mode;</p></li>
- <li class="listitem"><p>"ext" for SPH_SORT_EXTENDED mode.</p></li>
- </ul></div>
- <p>
- </p><p>Additionally, if <code class="filename">searchd</code> was started with <code class="option">--iostats</code>, there will be a block of data after where the index(es) searched are listed.</p><p>A query log entry might take the form of:</p><pre class="programlisting">
- [Fri Jun 29 21:17:58 2007] 0.004 sec [all/0/rel 35254 (0,20)] [lj]
- [ios=6 kb=111.1 ms=0.5] test
- </pre><p>
- This additional block is information regarding I/O operations in performing the search:
- the number of file I/O operations carried out, the amount of data in kilobytes read from
- the index files and time spent on I/O operations (although there is a background processing
- component, the bulk of this time is the I/O operation time).
- </p></div>
- <div class="sect2" title="5.9.2. SphinxQL log format"><div class="titlepage"><div><div><h3 class="title"><a name="sphinxql-log-format"></a>5.9.2. SphinxQL log format</h3></div></div></div>
- <p>
- This is a new log format introduced in 2.0.1-beta, with the goals
- begin logging everything and then some, and in a format easy to automate
- (for instance, automatically replay). New format can either be enabled
- via the <a class="link" href="#conf-query-log-format" title="12.4.4. query_log_format">query_log_format</a>
- directive in the configuration file, or switched back and forth
- on the fly with the
- <a class="link" href="#sphinxql-set" title="8.9. SET syntax"><code class="code">SET GLOBAL query_log_format=...</code></a>
- statement via SphinxQL. In the new format, the example from the previous
- section would look as follows. (Wrapped below for readability, but with
- just one query per line in the actual log.)
- </p><pre class="programlisting">
- /* Fri Jun 29 21:17:58.609 2007 2011 conn 2 real 0.004 wall 0.004 found 35254 */
- SELECT * FROM lj WHERE MATCH('test') OPTION ranker=proximity;
- /* Fri Jun 29 21:20:34 2007.555 conn 3 real 0.024 wall 0.024 found 19886 */
- SELECT * FROM lj WHERE MATCH('test') GROUP BY channel_id
- OPTION ranker=proximity;
- </pre><p>
- Note that <span class="bold"><strong>all</strong></span> requests would be logged in this format,
- including those sent via SphinxAPI and SphinxSE, not just those
- sent via SphinxQL. Also note, that this kind of logging works only with plain log
- files and will not work if you use 'syslog' for logging.
- </p><p>
- The features of SphinxQL log format compared to the default text
- one are as follows.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>All request types should be logged. (This is still work in progress.)</p></li>
- <li class="listitem"><p>Full statement data will be logged where possible.</p></li>
- <li class="listitem"><p>Errors and warnings are logged.</p></li>
- <li class="listitem"><p>The log should be automatically replayable via SphinxQL.</p></li>
- <li class="listitem"><p>Additional performance counters (currently, per-agent distributed query times) are logged.</p></li>
- </ul></div>
- <p>
- </p><p>
- Use sphinxql:compact_in to shorten your IN() clauses in log if you have
- too much values in it.
- </p><p>
- Every request (including both SphinxAPI and SphinxQL) request
- must result in exactly one log line. All request types, including
- INSERT, CALL SNIPPETS, etc will eventually get logged, though as of
- time of this writing, that is a work in progress). Every log line
- must be a valid SphinxQL statement that reconstructs the full request,
- except if the logged request is too big and needs shortening
- for performance reasons. Additional messages, counters, etc can be
- logged in the comments section after the request.
- </p></div></div>
- <div class="sect1" title="5.10. MySQL protocol support and SphinxQL"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql"></a>5.10. MySQL protocol support and SphinxQL</h2></div></div></div>
- <p>
- Starting with version 0.9.9-rc2, Sphinx searchd daemon supports MySQL binary
- network protocol and can be accessed with regular MySQL API. For instance,
- 'mysql' CLI client program works well. Here's an example of querying
- Sphinx using MySQL client:
- </p><pre class="programlisting">
- $ mysql -P 9306
- Welcome to the MySQL monitor. Commands end with ; or \g.
- Your MySQL connection id is 1
- Server version: 0.9.9-dev (r1734)
- Type 'help;' or '\h' for help. Type '\c' to clear the buffer.
- mysql> SELECT * FROM test1 WHERE MATCH('test')
- -> ORDER BY group_id ASC OPTION ranker=bm25;
- +------+--------+----------+------------+
- | id | weight | group_id | date_added |
- +------+--------+----------+------------+
- | 4 | 1442 | 2 | 1231721236 |
- | 2 | 2421 | 123 | 1231721236 |
- | 1 | 2421 | 456 | 1231721236 |
- +------+--------+----------+------------+
- 3 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Note that mysqld was not even running on the test machine. Everything was
- handled by searchd itself.
- </p><p>
- The new access method is supported <span class="emphasis"><em>in addition</em></span>
- to native APIs which all still work perfectly well. In fact, both
- access methods can be used at the same time. Also, native API is still
- the default access method. MySQL protocol support needs to be additionally
- configured. This is a matter of 1-line config change, adding a new
- <a class="link" href="#conf-listen" title="12.4.1. listen">listener</a> with mysql41 specified
- as a protocol:
- </p><pre class="programlisting">
- listen = localhost:9306:mysql41
- </pre><p>
- </p><p>
- Just supporting the protocol and not the SQL syntax would be useless
- so Sphinx now also supports a subset of SQL that we dubbed SphinxQL.
- It supports the standard querying all the index types with SELECT,
- modifying RT indexes with INSERT, REPLACE, and DELETE, and much more.
- Full SphinxQL reference is available in <a class="xref" href="#sphinxql-reference" title="Chapter 8. SphinxQL reference">Chapter 8, <i>SphinxQL reference</i></a>.
- </p></div>
- <div class="sect1" title="5.11. Multi-queries"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="multi-queries"></a>5.11. Multi-queries</h2></div></div></div>
- <p>
- Multi-queries, or query batches, let you send multiple queries to Sphinx
- in one go (more formally, one network request).
- </p><p>
- Two API methods that implement multi-query mechanism are
- <a class="link" href="#api-func-addquery" title="9.6.2. AddQuery">AddQuery()</a> and
- <a class="link" href="#api-func-runqueries" title="9.6.3. RunQueries">RunQueries()</a>.
- You can also run multiple queries with SphinxQL, see
- <a class="xref" href="#sphinxql-multi-queries" title="8.40. Multi-statement queries">Section 8.40, “Multi-statement queries”</a>.
- (In fact, regular <a class="link" href="#api-func-addquery" title="9.6.2. AddQuery">Query()</a>
- call is internally implemented as a single AddQuery() call immediately
- followed by RunQueries() call.) AddQuery() captures the current state
- of all the query settings set by previous API calls, and memorizes
- the query. RunQueries() actually sends all the memorized queries,
- and returns multiple result sets. There are no restrictions on
- the queries at all, except just a sanity check on a number of queries
- in a single batch (see <a class="xref" href="#conf-max-batch-queries" title="12.4.20. max_batch_queries">Section 12.4.20, “max_batch_queries”</a>).
- </p><p>
- Why use multi-queries? Generally, it all boils down to performance.
- First, by sending requests to <code class="filename">searchd</code> in a batch
- instead of one by one, you always save a bit by doing less network
- roundtrips. Second, and somewhat more important, sending queries
- in a batch enables <code class="filename">searchd</code> to perform certain
- internal optimizations. As new types of optimizations are being
- added over time, it generally makes sense to pack all the queries
- into batches where possible, so that simply upgrading Sphinx
- to a new version would automatically enable new optimizations.
- In the case when there aren't any possible batch optimizations
- to apply, queries will be processed one by one internally.
- </p><p>
- Why (or rather when) not use multi-queries? Multi-queries requires
- all the queries in a batch to be independent, and sometimes they aren't.
- That is, sometimes query B is based on query A results, and so can only be
- set up after executing query A. For instance, you might want to display
- results from a secondary index if and only if there were no results
- found in a primary index. Or maybe just specify offset into 2nd result set
- based on the amount of matches in the 1st result set. In that case,
- you will have to use separate queries (or separate batches).
- </p><p>
- As of 0.9.10, there are two major optimizations to be aware of:
- common query optimization (available since 0.9.8); and common
- subtree optimization (available since 0.9.10).
- </p><p>
- <span class="bold"><strong>Common query optimization</strong></span> means that <code class="filename">searchd</code>
- will identify all those queries in a batch where only the sorting
- and group-by settings differ, and <span class="emphasis"><em>only perform searching once</em></span>.
- For instance, if a batch consists of 3 queries, all of them are for
- "ipod nano", but 1st query requests top-10 results sorted by price,
- 2nd query groups by vendor ID and requests top-5 vendors sorted by
- rating, and 3rd query requests max price, full-text search for
- "ipod nano" will only be performed once, and its results will be
- reused to build 3 different result sets.
- </p><p>
- So-called <span class="bold"><strong>faceted searching</strong></span> is a particularly important case
- that benefits from this optimization. Indeed, faceted searching
- can be implemented by running a number of queries, one to retrieve
- search results themselves, and a few other ones with same full-text
- query but different group-by settings to retrieve all the required
- groups of results (top-3 authors, top-5 vendors, etc). And as long
- as full-text query and filtering settings stay the same, common
- query optimization will trigger, and greatly improve performance.
- </p><p>
- <span class="bold"><strong>Common subtree optimization</strong></span> is even more interesting.
- It lets <code class="filename">searchd</code> exploit similarities between
- batched full-text queries. It identifies common full-text query parts
- (subtrees) in all queries, and caches them between queries. For instance,
- look at the following query batch:
- </p><pre class="programlisting">
- barack obama president
- barack obama john mccain
- barack obama speech
- </pre><p>
- There's a common two-word part ("barack obama") that can be computed
- only once, then cached and shared across the queries. And common subtree
- optimization does just that. Per-query cache size is strictly controlled
- by <a class="link" href="#conf-subtree-docs-cache" title="12.4.21. subtree_docs_cache">subtree_docs_cache</a>
- and <a class="link" href="#conf-subtree-hits-cache" title="12.4.22. subtree_hits_cache">subtree_hits_cache</a>
- directives (so that caching <span class="emphasis"><em>all</em></span> sixteen gazillions
- of documents that match "i am" does not exhaust the RAM and instantly
- kill your server).
- </p><p>
- Here's a code sample (in PHP) that fire the same query in 3 different
- sorting modes:
- </p><pre class="programlisting">
- require ( "sphinxapi.php" );
- $cl = new SphinxClient ();
- $cl->SetMatchMode ( SPH_MATCH_EXTENDED );
- $cl->SetSortMode ( SPH_SORT_RELEVANCE );
- $cl->AddQuery ( "the", "lj" );
- $cl->SetSortMode ( SPH_SORT_EXTENDED, "published desc" );
- $cl->AddQuery ( "the", "lj" );
- $cl->SetSortMode ( SPH_SORT_EXTENDED, "published asc" );
- $cl->AddQuery ( "the", "lj" );
- $res = $cl->RunQueries();
- </pre><p>
- </p><p>
- How to tell whether the queries in the batch were actually optimized?
- If they were, respective query log will have a "multiplier" field that
- specifies how many queries were processed together:
- </p><pre class="programlisting">
- [Sun Jul 12 15:18:17.000 2009] 0.040 sec x3 [ext/0/rel 747541 (0,20)] [lj] the
- [Sun Jul 12 15:18:17.000 2009] 0.040 sec x3 [ext/0/ext 747541 (0,20)] [lj] the
- [Sun Jul 12 15:18:17.000 2009] 0.040 sec x3 [ext/0/ext 747541 (0,20)] [lj] the
- </pre><p>
- Note the "x3" field. It means that this query was optimized and
- processed in a sub-batch of 3 queries. For reference, this is how
- the regular log would look like if the queries were not batched:
- </p><pre class="programlisting">
- [Sun Jul 12 15:18:17.062 2009] 0.059 sec [ext/0/rel 747541 (0,20)] [lj] the
- [Sun Jul 12 15:18:17.156 2009] 0.091 sec [ext/0/ext 747541 (0,20)] [lj] the
- [Sun Jul 12 15:18:17.250 2009] 0.092 sec [ext/0/ext 747541 (0,20)] [lj] the
- </pre><p>
- Note how per-query time in multi-query case was improved by a factor
- of 1.5x to 2.3x, depending on a particular sorting mode. In fact, for both
- common query and common subtree optimizations, there were reports of 3x and
- even more improvements, and that's from production instances, not just
- synthetic tests.
- </p></div>
- <div class="sect1" title="5.12. Collations"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="collations"></a>5.12. Collations</h2></div></div></div>
- <p>
- Introduced to Sphinx in version 2.0.1-beta to supplement string sorting,
- collations essentially affect the string attribute comparisons. They specify
- both the character set encoding and the strategy that Sphinx uses to compare
- strings when doing ORDER BY or GROUP BY with a string attribute involved.
- </p><p>
- String attributes are stored as is when indexing, and no character set
- or language information is attached to them. That's okay as long as Sphinx
- only needs to store and return the strings to the calling application verbatim.
- But when you ask Sphinx to sort by a string value, that request immediately
- becomes quite ambiguous.
- </p><p>
- First, single-byte (ASCII, or ISO-8859-1, or Windows-1251) strings
- need to be processed differently that the UTF-8 ones that may encode
- every character with a variable number of bytes. So we need to know
- what is the character set type to interpret the raw bytes as meaningful
- characters properly.
- </p><p>
- Second, we additionally need to know the language-specific
- string sorting rules. For instance, when sorting according to US rules
- in en_US locale, the accented character 'ï' (small letter i with diaeresis)
- should be placed somewhere after 'z'. However, when sorting with French rules
- and fr_FR locale in mind, it should be placed between 'i' and 'j'. And some
- other set of rules might choose to ignore accents at all, allowing 'ï'
- and 'i' to be mixed arbitrarily.
- </p><p>
- Third, but not least, we might need case-sensitive sorting in some
- scenarios and case-insensitive sorting in some others.
- </p><p>
- Collations combine all of the above: the character set, the language rules,
- and the case sensitivity. Sphinx currently provides the following four
- collations.
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p><code class="option">libc_ci</code></p></li>
- <li class="listitem"><p><code class="option">libc_cs</code></p></li>
- <li class="listitem"><p><code class="option">utf8_general_ci</code></p></li>
- <li class="listitem"><p><code class="option">binary</code></p></li>
- </ol></div>
- <p>
- </p><p>
- The first two collations rely on several standard C library (libc) calls
- and can thus support any locale that is installed on your system. They provide
- case-insensitive (_ci) and case-sensitive (_cs) comparisons respectively.
- By default they will use C locale, effectively resorting to bytewise
- comparisons. To change that, you need to specify a different available
- locale using <a class="link" href="#conf-collation-libc-locale" title="12.4.30. collation_libc_locale">collation_libc_locale</a>
- directive. The list of locales available on your system can usually be obtained
- with the <code class="filename">locale</code> command:
- </p><pre class="programlisting">
- $ locale -a
- C
- en_AG
- en_AU.utf8
- en_BW.utf8
- en_CA.utf8
- en_DK.utf8
- en_GB.utf8
- en_HK.utf8
- en_IE.utf8
- en_IN
- en_NG
- en_NZ.utf8
- en_PH.utf8
- en_SG.utf8
- en_US.utf8
- en_ZA.utf8
- en_ZW.utf8
- es_ES
- fr_FR
- POSIX
- ru_RU.utf8
- ru_UA.utf8
- </pre><p>
- </p><p>
- The specific list of the system locales may vary. Consult your OS documentation
- to install additional needed locales.
- </p><p>
- <code class="option">utf8_general_ci</code> and <code class="option">binary</code> locales are
- built-in into Sphinx. The first one is a generic collation for UTF-8 data
- (without any so-called language tailoring); it should behave similar to
- <code class="option">utf8_general_ci</code> collation in MySQL. The second one
- is a simple bytewise comparison.
- </p><p>
- Collation can be overridden via SphinxQL on a per-session basis using
- <code class="code">SET collation_connection</code> statement. All subsequent SphinxQL
- queries will use this collation. SphinxAPI and SphinxSE queries will use
- the server default collation, as specified in
- <a class="link" href="#conf-collation-server" title="12.4.29. collation_server">collation_server</a> configuration
- directive. Sphinx currently defaults to <code class="option">libc_ci</code> collation.
- </p><p>
- Collations should affect all string attribute comparisons, including
- those within ORDER BY and GROUP BY, so differently ordered or grouped results
- can be returned depending on the collation chosen.
- </p></div></div>
- <div class="chapter" title="Chapter 6. Extending Sphinx"><div class="titlepage"><div><div><h2 class="title"><a name="extending-sphinx"></a>Chapter 6. Extending Sphinx</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#sphinx-udfs">6.1. Sphinx UDFs (User Defined Functions)</a></span></dt>
- <dt><span class="sect1"><a href="#sphinx-plugins">6.2. Sphinx plugins</a></span></dt>
- <dt><span class="sect1"><a href="#ranker-plugins">6.3. Ranker plugins</a></span></dt>
- </dl></div>
- <div class="sect1" title="6.1. Sphinx UDFs (User Defined Functions)"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinx-udfs"></a>6.1. Sphinx UDFs (User Defined Functions)</h2></div></div></div>
- <p>
- Starting with 2.0.1-beta, our expression engine can be extended with
- user defined functions, or UDFs for short, like this:
- </p><pre class="programlisting">
- SELECT id, attr1, myudf(attr2, attr3+attr4) ...
- </pre><p>
- You can load and unload UDFs dynamically into <code class="filename">searchd</code>
- without having to restart the daemon, and used them in expressions when
- searching, ranking, etc. Quick summary of the UDF features is as follows.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>UDFs can take integer (both 32-bit and 64-bit), float,
- string, MVA, or PACKEDFACTORS() arguments.</p></li>
- <li class="listitem"><p>UDFs can return integer, float, or string values.</p></li>
- <li class="listitem"><p>UDFs can check the argument number, types, and names
- during the query setup phase, and raise errors.</p></li>
- <li class="listitem"><p>Aggregation UDFs are not yet supported (but might be
- in the future).</p></li>
- </ul></div>
- <p>
- UDFs have a wide variety of uses, for instance:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>adding custom mathematical or string functions;</p></li>
- <li class="listitem"><p>accessing the database or files from within Sphinx;</p></li>
- <li class="listitem"><p>implementing complex ranking functions.</p></li>
- </ul></div>
- <p>
- </p><p>
- UDFs reside in the external dynamic libraries (.so files on UNIX and .dll
- on Windows systems). Library files need to reside in a trusted folder
- specified by <a class="link" href="#conf-plugin-dir" title="12.4.31. plugin_dir">plugin_dir</a> directive,
- for obvious security reasons: securing a single folder is easy; letting
- anyone install arbitrary code into <code class="filename">searchd</code> is a risk.
- You can load and unload them dynamically into searchd
- with <a class="link" href="#sphinxql-create-function" title="8.18. CREATE FUNCTION syntax">CREATE FUNCTION</a> and
- <a class="link" href="#sphinxql-drop-function" title="8.19. DROP FUNCTION syntax">DROP FUNCTION</a> SphinxQL statements
- respectively. Sphinx keeps track of the currently loaded functions, that is,
- every time you create or drop an UDF, <code class="filename">searchd</code> writes
- its state to the <a class="link" href="#conf-sphinxql-state" title="12.4.38. sphinxql_state">sphinxql_state</a> file
- as a plain good old SQL script.
- </p><p>
- Once you successfully load an UDF, you can use it in your SELECT or other
- statements just as well as any of the builtin functions:
- </p><pre class="programlisting">
- SELECT id, MYCUSTOMFUNC(groupid, authorname), ... FROM myindex
- </pre><p>
- </p><p>
- UDFs are completely supported in <a class="link" href="#conf-workers" title="12.4.23. workers">workers=threads</a>
- mode only. They are partially supported in <code class="code">workers=prefork</code> mode too:
- namely, CREATEs from the <code class="code">sphinxql_state</code> startup script will work and
- those UDFs will be accessible. However, DROPs will not be available. UDFs are not
- supported in <code class="code">workers=fork</code> mode.
- </p><p>
- Multiple UDFs (and other plugins) may reside in a single library. That library
- will only be loaded once. It gets automatically unloaded once all the UDFs and
- plugins from it are dropped.
- </p><p>
- In theory you can write an UDF in any language as long as its compiler
- is able to import standard C header, and emit standard dynamic libraries with
- properly exported functions. Of course, the path of least resistance is to write
- in either C++ or plain C. We provide an example UDF library written in plain C
- and implementing several functions (demonstrating a few different techniques)
- along with our source code, see
- <a class="ulink" href="http://code.google.com/p/sphinxsearch/source/browse/trunk/src/udfexample.c" target="_top">src/udfexample.c</a>.
- That example includes
- <a class="ulink" href="http://code.google.com/p/sphinxsearch/source/browse/trunk/src/sphinxudf.h" target="_top">src/sphinxudf.h</a>
- header file definitions of a few UDF related structures and types. For most
- UDFs and plugins, a mere <code class="code">#include "sphinxudf.h"</code>, like in the example,
- should be completely sufficient, too. However, if you're writing a ranking function and
- need to access the ranking signals (factors) data from within the UDF, you will
- also need to compile and link with <code class="filename">src/sphinxudf.c</code> (also
- available in our source code), because the <span class="emphasis"><em>implementations</em></span>
- of the fuctions that let you access the signal data from within the UDF reside
- in that file.
- </p><p>
- Both <code class="filename">sphinxudf.h</code> header and <code class="filename">sphinxudf.c</code>
- are standalone. So you can copy around those files only; they do not depend
- on any other bits of Sphinx source code.
- </p><p>
- Within your UDF, you <span class="bold"><strong>must</strong></span> implement and export only a couple functions,
- literally. First, for UDF interface version control, you <span class="bold"><strong>must</strong></span> define
- a function <code class="code">int LIBRARYNAME_ver()</code>, where LIBRARYNAME is the name
- of your library file, and you must return <code class="code">SPH_UDF_VERSION</code> (a value
- defined in <code class="filename">sphinxudf.h</code>) from it. Here's an example.
- </p><pre class="programlisting">
- #include <sphinxudf.h>
- // our library will be called udfexample.so, thus, so it must define
- // a version function named udfexample_ver()
- int udfexample_ver()
- {
- return SPH_UDF_VERSION;
- }
- </pre><p>
- That protects you from accidentally loading a library with a mismatching
- UDF interface version into a newer or older <code class="filename">searchd</code>.
- Second, yout <span class="bold"><strong>must</strong></span> implement the actual function, too.
- <code class="code">
- sphinx_int64_t testfunc ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args,
- char * error_flag )
- {
- return 123;
- }
- </code>
- </p><p>
- UDF function names in SphinxQL are case insensitive. However, the
- respective C function names are not, they need to be all <span class="bold"><strong>lower-case</strong></span>,
- or the UDF will not load. More importantly, it is vital that a) the calling
- convention is C (aka __cdecl), b) arguments list matches the plugin system
- expectations exactly, and c) the return type matches the one you specify in
- <code class="code">CREATE FUNCTION</code>. Unfortunately, there is no (easy) way for us
- to check for those mistakes when loading the function, and they could crash
- the server and/or result in unexpected results. Last but not least,
- all the C functions you implement need to be thread-safe.
- </p><p>
- The first argument, a pointer to SPH_UDF_INIT structure, is essentially
- a pointer to our function state. It is option. In the example just above
- the function is stateless, it simply returns 123 every time it gets called.
- So we do not have to define an initialization function, and we can simply
- ignore that argument.
- </p><p>
- The second argument, a pointer to SPH_UDF_ARGS, is the most important one.
- All the actual call arguments are passed to your UDF via this structure;
- it contians the call argument count, names, types, etc. So whether your
- function gets called like <code class="code">SELECT id, testfunc(1)</code> or like
- <code class="code">SELECT id, testfunc('abc', 1000*id+gid, WEIGHT())</code> or anyhow
- else, it will receive the very same SPH_UDF_ARGS structure in all of these
- cases. However, the data passed in the <code class="code">args</code> structure will be
- different. In the first example <code class="code">args->arg_count</code> will be set to 1,
- in the second example it will be set to 3, <code class="code">args->arg_types</code> array
- will contain different type data, and so on.
- </p><p>
- Finally, the third argument is an error flag. UDF can raise it to indicate
- that some kinda of an internal error happened, the UDF can not continue, and
- the query should terminate early. You should <span class="bold"><strong>not</strong></span> use this for argument
- type checks or for any other error reporting that is likely to happen during
- normal use. This flag is designed to report sudden critical runtime errors,
- such as running out of memory.
- </p><p>
- If we wanted to, say, allocate temporary storage for our function to use,
- or check upfront whether the arguments are of the supported types, then
- we would need to add two more functions, with UDF initialization and deinitialization,
- respectively.
- </p><pre class="programlisting">
- int testfunc_init ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args,
- char * error_message )
- {
- // allocate and initialize a little bit of temporary storage
- init->func_data = malloc ( sizeof(int) );
- *(int*)init->func_data = 123;
- // return a success code
- return 0;
- }
- void testfunc_deinit ( SPH_UDF_INIT * init )
- {
- // free up our temporary storage
- free ( init->func_data );
- }
- </pre><p>
- Note how <code class="code">testfunc_init()</code> also receives the call arguments
- structure. By the time it is called it does not receive any actual values,
- so the <code class="code">args->arg_values</code> will be NULL. But the argument
- names and types are known and will be passed. You can check them in
- the initialization function and return an error if they are of an
- unsupported type.
- </p><p>
- UDFs can receive arguments of pretty much any valid internal Sphinx type.
- Refer to <code class="code">sphinx_udf_argtype</code> enumeration in <code class="filename">sphinxudf.h</code>
- for a full list. Most of the types map straightforwardly to the respective C types.
- The most notable exception is the SPH_UDF_TYPE_FACTORS argument type.
- You get that type by calling your UDF with a
- <a class="link" href="#expr-func-packedfactors">PACKEDFACTOR()</a> argument.
- It's data is a binary blob in a certain internal format, and to extract
- individual ranking signals from that blob, you need to use either of the
- two <code class="code">sphinx_factors_XXX()</code> or <code class="code">sphinx_get_YYY_factor()</code>
- families of functions. The first family consists of just 3 functions,
- <code class="code">sphinx_factors_init()</code> that initializes the unpacked
- SPH_UDF_FACTORS structure, <code class="code">sphinx_factors_unpack()</code> that
- unpacks a binary blob into it, and <code class="code">sphinx_factors_deinit()</code>
- that cleans up an deallocates the SPH_UDF_FACTORS. So you need to call
- init() and unpack(), then you can use the SPH_UDF_FACTORS fields, and
- then you need to cleanup with deinit(). That is simple, but results
- in a bunch of memory allocations per each processed document, and might
- be slow. The other interface, consisting of a bunch of
- <code class="code">sphinx_get_YYY_factor()</code> functions, is a little more wordy
- to use, but accesses the blob data directly and guarantees that there
- will be zero allocations. So for top-notch ranking UDF performance,
- you want to use that one.
- </p><p>
- As for the return types, UDFs can currently return a signle INT, BIGINT,
- FLOAT, or STRING value. The C function return type should be sphinx_int64_t,
- sphinx_int64_t, double, or char* respectively. In the last case you <span class="bold"><strong>must</strong></span>
- use <code class="code">args->fn_malloc</code> function to allocate the returned
- string values. Internally in your UDF you can use whatever you want,
- so the <code class="code">testfunc_init()</code> example above is correct code
- even though it uses malloc() directly: you manage that pointer yourself,
- it gets freed up using a matching free() call, and all is well. However,
- the returned strings values are managed by Sphinx and we have our own
- allocator, so for the return values specifically, you need to use it too.
- </p><p>
- Depending on how your UDFs are used in the query, the main function
- call (<code class="code">testfunc()</code> in our example) might be called in a rather
- different volume and order. Specifically,
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>UDFs referenced in WHERE, ORDER BY, or GROUP BY clauses
- must and will be evaluated for every matched document. They will be called
- in the natural matching order.
- </p></li>
- <li class="listitem"><p>without subselects, UDFs that can be evaluated at the very
- last stage over the final result set will be evaluated that way, but before
- applying the LIMIT clause. They will be called in the result set order.
- </p></li>
- <li class="listitem"><p>with subselects, such UDFs will also be evaluated after
- applying the inner LIMIT clause.
- </p></li>
- </ul></div>
- <p>
- </p><p>
- The calling sequence of the other functions is fixed, though. Namely,
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="code">testfunc_init()</code> is called once when initializing
- the query. It can return a non-zero code to indicate a failure; in that case
- query will be terminated, and the error message from the <code class="code">error_message</code>
- buffer will be returned.</p></li>
- <li class="listitem"><p><code class="code">testfunc()</code> is called for every eligible row
- (see above), whenever Sphinx needs to compute the UDF value. It can also
- indicate an (internal) failure error by writing a non-zero byte value to
- <code class="code">error_flag</code>. In that case, it is guaranteed that will no more be
- called for subsequent rows, and a default return value of 0 will be substituted.
- Sphinx might or might not choose to terminate such queries early, neither
- behavior is currently guaranteed.
- </p></li>
- <li class="listitem"><p><code class="code">testfunc_deinit()</code> is called once when the query
- processing (in a given index shard) ends.</p></li>
- </ul></div>
- <p>
- </p><p>
- As of 2.2.2-beta, we do not yet support aggregation functions. In other words,
- your UDFs will be called for just a single document at a time and are expected
- to return some value for that document. Writing a function that can compute an
- aggregate value like AVG() over the entire group of documents that share the same
- GROUP BY key is not yet possible. However, you can use UDFs within the builtin
- aggregate functions: that is, even though MYCUSTOMAVG() is not supported yet,
- AVG(MYCUSTOMFUNC()) should work alright!
- </p><p>
- UDFs are local. In order to use them on a cluster, you have to put the same
- library on all its nodes and run CREATEs on all the nodes too. This might change
- in the future versions.
- </p></div>
- <div class="sect1" title="6.2. Sphinx plugins"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinx-plugins"></a>6.2. Sphinx plugins</h2></div></div></div>
- <p>
- Starting with version 2.2.2-beta, we generalized our dynamic plugin
- system, and added a few more types of dynamic plugins. Here's the complete
- plugin type list.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>UDF plugins;</p></li>
- <li class="listitem"><p>ranker plugins;</p></li>
- <li class="listitem"><p>indexing-time token filter plugins;</p></li>
- <li class="listitem"><p>query-time token filter plugins.</p></li>
- </ul></div>
- <p>
- This section discusses writing and managing plugins in general;
- things specific to writing this or that type of a plugin are then
- discussed in their respective subsections.
- </p><p>
- So, how do you write and use a plugin? Three-line crash course
- goes as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>create a dynamic library (either .so or.dll),
- most likely in C or C++;</p></li>
- <li class="listitem"><p>load that plugin into searchd using
- <a class="link" href="#sphinxql-create-plugin" title="8.36. CREATE PLUGIN syntax">CREATE PLUGIN</a>;
- </p></li>
- <li class="listitem"><p>invoke it using the plugin specific calls
- (typically using this or that OPTION).
- </p></li>
- </ul></div>
- <p>
- Note that while UDFs are first-class plugins they are nevertheless
- installed using a separate
- <a class="link" href="#sphinxql-create-function" title="8.18. CREATE FUNCTION syntax">CREATE FUNCTION</a>
- statement. It lets you specify the return type neatly so there was
- especially little reason to ruin backwards compatibility <span class="emphasis"><em>and</em></span>
- change the syntax.
- </p><p>
- Dynamic plugins are supported in <a class="link" href="#conf-workers" title="12.4.23. workers">workers=threads</a>
- mode only. Multiple plugins (and/or UDFs) may reside in a single library file.
- So you might choose to either put all your project-specific plugins in a single
- common uber-library; or you might choose to have a separate library for every
- UDF and plugin; that is up to you.
- </p><p>
- Just as with UDFs, you want to include <code class="filename">src/sphinxudf.h</code>
- header file. At the very least, you will need the SPH_UDF_VERSION
- constant to implement a proper version function. Depending on the specific
- plugin type, you might or might not need to link your plugin with
- <code class="filename">src/sphinxudf.c</code>. However, as of 2.2.2-beta all
- the functions implemented in <code class="filename">sphinxudf.c</code> are about
- unpacking the PACKEDFACTORS() blob, and no plugin types are exposed to that
- kind of data. So currently, you would never need to link with the C-file,
- just the header would be sufficient. (In fact, if you copy over the
- UDF version number, then for some of the plugin types you would not
- even need the header file.)
- </p><p>
- Formally, plugins are just sets of C functions that follow a certain
- naming parttern. You are typically required to define just one key function
- that does the most important work, but you may define a bunch of other
- functions, too. For example, to implement a ranker called "myrank",
- you must define <code class="code">myrank_finalize()</code> function that actually returns
- the rank value, however, you might also define <code class="code">myrank_init()</code>,
- <code class="code">myrank_update()</code>, and <code class="code">myrank_deinit()</code> functions.
- Specific sets of well-known suffixes and the call arguments do differ
- based on the plugin type, but _init() and _deinit() are generic, every
- plugin has those. Protip: for a quick reference on the known suffixes and
- their argument types, refer to <code class="filename">sphinxplugin.h</code>,
- we define the call prototoypes in the very beginning of that file.
- </p><p>
- Despite having the public interface defined in ye good olde good pure C,
- our plugins essentially follow the <span class="emphasis"><em>object-oriented model</em></span>.
- Indeed, every <code class="code">_init()</code> function receives a <code class="code">void ** userdata</code>
- out-parameter. And the pointer value that you store at <code class="code">(*userdata)</code>
- location is then be passed as a 1st argument to all the other plugin functions.
- So you can think of a plugin as <span class="emphasis"><em>class</em></span> that gets instantiated
- every time an object of that class is needed to handle a request: the <code class="code">userdata</code>
- pointer would be its <code class="code">this</code> pointer; the functions would be its methods,
- and the <code class="code">_init()</code> and <code class="code">_deinit()</code> functions would be
- the constructor and destructor respectively.
- </p><p>
- Why this (minor) OOP-in-C complication? Well, plugins run in a multi-threaded
- environment, and some of them have to be stateful. You can't keep that state in
- a global variable in your plugin. So we have to pass around a userdata parameter
- anyway to let you keep that state. And that naturally brings us to the OOP model.
- And if you've got a simple, stateless plugin, the interface lets you omit the
- <code class="code">_init()</code> and <code class="code">_deinit()</code> and whatever other functions
- just as well.
- </p><p>
- To summarize, here goes the simplest complete ranker plugin, in just
- 3 lines of C code.
- </p><pre class="programlisting">
- // gcc -fPIC -shared -o myrank.so myrank.c
- #include "sphinxudf.h"
- int myrank_ver() { return SPH_UDF_VERSION; }
- int myrank_finalize(void *u, int w) { return 123; }
- </pre><p>
- And this is how you use it:
- </p><pre class="programlisting">
- mysql> CREATE PLUGIN myrank TYPE 'ranker' SONAME 'myrank.dll';
- Query OK, 0 rows affected (0.00 sec)
- mysql> SELECT id, weight() FROM test1 WHERE MATCH('test')
- -> OPTION ranker=myrank('');
- +------+----------+
- | id | weight() |
- +------+----------+
- | 1 | 123 |
- | 2 | 123 |
- +------+----------+
- 2 rows in set (0.01 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="6.3. Ranker plugins"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ranker-plugins"></a>6.3. Ranker plugins</h2></div></div></div>
- <p>
- Ranker plugins let you implement a custom ranker that receives
- all the occurrences of the keywords matched in the document, and
- computes a WEIGHT() value. They can be called as follows:
- </p><pre class="programlisting">
- SELECT id, attr1 FROM test WHERE match('hello')
- OPTION ranker=myranker('option1=1');
- </pre><p>
- </p><p>
- The call workflow is as follows:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><code class="code">XXX_init()</code> gets called once per query
- per index, in the very beginning. A few query-wide options are
- passed to it through a <code class="code">SPH_RANKER_INIT</code> structure,
- including the user options strings (in the example just above,
- "option1=1" is that string).</li>
- <li class="listitem"><code class="code">XXX_update()</code> gets called multiple times per
- matched document, with every matched keyword occurrence passed as its
- parameter, a <code class="code">SPH_RANKER_HIT</code> structure. The occurrences
- within each document are guaranteed to be passed in the order of
- ascending <code class="code">hit->hit_pos</code> values.</li>
- <li class="listitem"><code class="code">XXX_finalize()</code> gets called once per matched
- document, once there are no more keyword occurrences. It must return
- the WEIGHT() value. This is the only mandatory function.</li>
- <li class="listitem"><code class="code">XXX_deinit()</code> gets called once per query,
- in the very end.</li>
- </ol></div>
- <p>
- </p></div></div>
- <div class="chapter" title="Chapter 7. Command line tools reference"><div class="titlepage"><div><div><h2 class="title"><a name="command-line-tools"></a>Chapter 7. Command line tools reference</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#ref-indexer">7.1. <code class="filename">indexer</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-searchd">7.2. <code class="filename">searchd</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-spelldump">7.3. <code class="filename">spelldump</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-indextool">7.4. <code class="filename">indextool</code> command reference</a></span></dt>
- <dt><span class="sect1"><a href="#ref-wordbreaker">7.5. <code class="filename">wordbreaker</code> command reference</a></span></dt>
- </dl></div>
- <p>As mentioned elsewhere, Sphinx is not a single program called 'sphinx',
- but a collection of 4 separate programs which collectively form Sphinx. This section
- covers these tools and how to use them.</p><div class="sect1" title="7.1. indexer command reference"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ref-indexer"></a>7.1. <code class="filename">indexer</code> command reference</h2></div></div></div>
- <p><code class="filename">indexer</code> is the first of the two principal tools
- as part of Sphinx. Invoked from either the command line directly, or as part
- of a larger script, <code class="filename">indexer</code> is solely responsible
- for gathering the data that will be searchable.</p><p>The calling syntax for <code class="filename">indexer</code> is as follows:</p><pre class="programlisting">
- indexer [OPTIONS] [indexname1 [indexname2 [...]]]
- </pre><p>Essentially you would list the different possible indexes (that you would later
- make available to search) in <code class="filename">sphinx.conf</code>, so when calling
- <code class="filename">indexer</code>, as a minimum you need to be telling it what index
- (or indexes) you want to index.</p><p>If <code class="filename">sphinx.conf</code> contained details on 2 indexes,
- <code class="filename">mybigindex</code> and <code class="filename">mysmallindex</code>,
- you could do the following:</p><pre class="programlisting">
- $ indexer mybigindex
- $ indexer mysmallindex mybigindex
- </pre><p>As part of the configuration file, <code class="filename">sphinx.conf</code>, you specify
- one or more indexes for your data. You might call <code class="filename">indexer</code> to reindex
- one of them, ad-hoc, or you can tell it to process all indexes - you are not limited
- to calling just one, or all at once, you can always pick some combination
- of the available indexes.</p><p>The majority of the options for <code class="filename">indexer</code> are given
- in the configuration file, however there are some options you might need to specify
- on the command line as well, as they can affect how the indexing operation is performed.
- These options are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--config <file></code> (<code class="option">-c <file></code> for short)
- tells <code class="filename">indexer</code> to use the given file as its configuration. Normally,
- it will look for <code class="filename">sphinx.conf</code> in the installation directory
- (e.g. <code class="filename">/usr/local/sphinx/etc/sphinx.conf</code> if installed into
- <code class="filename">/usr/local/sphinx</code>), followed by the current directory you are
- in when calling <code class="filename">indexer</code> from the shell. This is most of use
- in shared environments where the binary files are installed somewhere like
- <code class="filename">/usr/local/sphinx/</code> but you want to provide users with
- the ability to make their own custom Sphinx set-ups, or if you want to run
- multiple instances on a single server. In cases like those you could allow them
- to create their own <code class="filename">sphinx.conf</code> files and pass them to
- <code class="filename">indexer</code> with this option. For example:
- </p><pre class="programlisting">
- $ indexer --config /home/myuser/sphinx.conf myindex
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--all</code> tells <code class="filename">indexer</code> to update
- every index listed in <code class="filename">sphinx.conf</code>, instead of listing individual indexes.
- This would be useful in small configurations, or <code class="filename">cron</code>-type or maintenance
- jobs where the entire index set will get rebuilt each day, or week, or whatever period is best.
- Example usage:
- </p><pre class="programlisting">
- $ indexer --config /home/myuser/sphinx.conf --all
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--rotate</code> is used for rotating indexes. Unless you have the situation
- where you can take the search function offline without troubling users, you will almost certainly
- need to keep search running whilst indexing new documents. <code class="option">--rotate</code> creates
- a second index, parallel to the first (in the same place, simply including <code class="filename">.new</code>
- in the filenames). Once complete, <code class="filename">indexer</code> notifies <code class="filename">searchd</code>
- via sending the <code class="option">SIGHUP</code> signal, and <code class="filename">searchd</code> will attempt
- to rename the indexes (renaming the existing ones to include <code class="filename">.old</code>
- and renaming the <code class="filename">.new</code> to replace them), and then start serving
- from the newer files. Depending on the setting of
- <a class="link" href="#conf-seamless-rotate" title="12.4.9. seamless_rotate">seamless_rotate</a>, there may be a slight delay
- in being able to search the newer indexes. Example usage:
- </p><pre class="programlisting">
- $ indexer --rotate --all
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--quiet</code> tells <code class="filename">indexer</code> not to output anything,
- unless there is an error. Again, most used for <code class="filename">cron</code>-type, or other script
- jobs where the output is irrelevant or unnecessary, except in the event of some kind of error.
- Example usage:
- </p><pre class="programlisting">
- $ indexer --rotate --all --quiet
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--noprogress</code> does not display progress details as they occur;
- instead, the final status details (such as documents indexed, speed of indexing and so on
- are only reported at completion of indexing. In instances where the script is not being
- run on a console (or 'tty'), this will be on by default. Example usage:
- </p><pre class="programlisting">
- $ indexer --rotate --all --noprogress
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--buildstops <outputfile.text> <N></code> reviews
- the index source, as if it were indexing the data, and produces a list of the terms
- that are being indexed. In other words, it produces a list of all the searchable terms
- that are becoming part of the index. Note; it does not update the index in question,
- it simply processes the data 'as if' it were indexing, including running queries
- defined with <code class="option">sql_query_pre</code> or <code class="option">sql_query_post</code>.
- <code class="filename">outputfile.txt</code> will contain the list of words, one per line,
- sorted by frequency with most frequent first, and <code class="filename">N</code> specifies
- the maximum number of words that will be listed; if sufficiently large to encompass
- every word in the index, only that many words will be returned. Such a dictionary list
- could be used for client application features around "Did you mean..." functionality,
- usually in conjunction with <code class="option">--buildfreqs</code>, below. Example:
- </p><pre class="programlisting">
- $ indexer myindex --buildstops word_freq.txt 1000
- </pre><p>
- This would produce a document in the current directory, <code class="filename">word_freq.txt</code>
- with the 1,000 most common words in 'myindex', ordered by most common first. Note that
- the file will pertain to the last index indexed when specified with multiple indexes or
- <code class="option">--all</code> (i.e. the last one listed in the configuration file)
- </p></li>
- <li class="listitem"><p><code class="option">--buildfreqs</code> works with <code class="option">--buildstops</code>
- (and is ignored if <code class="option">--buildstops</code> is not specified).
- As <code class="option">--buildstops</code> provides the list of words used within the index,
- <code class="option">--buildfreqs</code> adds the quantity present in the index, which would be
- useful in establishing whether certain words should be considered stopwords
- if they are too prevalent. It will also help with developing "Did you mean..."
- features where you can how much more common a given word compared to another,
- similar one. Example:
- </p><pre class="programlisting">
- $ indexer myindex --buildstops word_freq.txt 1000 --buildfreqs
- </pre><p>
- This would produce the <code class="filename">word_freq.txt</code> as above, however after each word would be the number of times it occurred in the index in question.
- </p></li>
- <li class="listitem"><p><code class="option">--merge <dst-index> <src-index></code> is used
- for physically merging indexes together, for example if you have a main+delta scheme,
- where the main index rarely changes, but the delta index is rebuilt frequently,
- and <code class="option">--merge</code> would be used to combine the two. The operation moves
- from right to left - the contents of <code class="filename">src-index</code> get examined
- and physically combined with the contents of <code class="filename">dst-index</code>
- and the result is left in <code class="filename">dst-index</code>.
- In pseudo-code, it might be expressed as: <code class="code">dst-index += src-index</code>
- An example:
- </p><pre class="programlisting">
- $ indexer --merge main delta --rotate
- </pre><p>
- In the above example, where the main is the master, rarely modified index,
- and delta is the less frequently modified one, you might use the above to call
- <code class="filename">indexer</code> to combine the contents of the delta into the
- main index and rotate the indexes.
- </p></li>
- <li class="listitem"><p><code class="option">--merge-dst-range <attr> <min> <max></code>
- runs the filter range given upon merging. Specifically, as the merge is applied
- to the destination index (as part of <code class="option">--merge</code>, and is ignored
- if <code class="option">--merge</code> is not specified), <code class="filename">indexer</code>
- will also filter the documents ending up in the destination index, and only
- documents will pass through the filter given will end up in the final index.
- This could be used for example, in an index where there is a 'deleted' attribute,
- where 0 means 'not deleted'. Such an index could be merged with:
- </p><pre class="programlisting">
- $ indexer --merge main delta --merge-dst-range deleted 0 0
- </pre><p>
- Any documents marked as deleted (value 1) would be removed from the newly-merged
- destination index. It can be added several times to the command line,
- to add successive filters to the merge, all of which must be met in order
- for a document to become part of the final index.
- </p></li>
- <li class="listitem"><p><code class="option">--merge-killlists</code> (and its
- shorter alias <code class="option">--merge-klists</code>) changes the way
- kill lists are processed when merging indexes. By default, both
- kill lists get discarded after a merge. That supports the most typical
- main+delta merge scenario. With this option enabled, however, kill lists
- from both indexes get concatenated and stored into the destination index.
- Note that a source (delta) index kill list will be used to suppress rows
- from a destination (main) index at all times.
- </p></li>
- <li class="listitem"><p><code class="option">--keep-attrs</code> (added in version 2.1.1-beta)
- allows to reuse existing attributes on reindexing. Whenever
- the index is rebuilt, each new document id is checked for presence in the
- "old" index, and if it already exists, its attributes are transferred to
- the "new" index; if not found, attributes from the new index are used. If
- the user has updated attributes in the index, but not in the actual source
- used for the index, all updates will be lost when reindexing; using --keep-attrs
- enables saving the updated attribute values from the previous index
- </p></li>
- <li class="listitem"><p><code class="option">--dump-rows <FILE></code> dumps rows fetched
- by SQL source(s) into the specified file, in a MySQL compatible syntax.
- Resulting dumps are the exact representation of data as received by
- <code class="filename">indexer</code> and help to repeat indexing-time issues.
- </p></li>
- <li class="listitem"><p><code class="option">--verbose</code> guarantees that every row that
- caused problems indexing (duplicate, zero, or missing document ID;
- or file field IO issues; etc) will be reported. By default, this option
- is off, and problem summaries may be reported instead.
- </p></li>
- <li class="listitem"><p><code class="option">--sighup-each</code> is useful when you are
- rebuilding many big indexes, and want each one rotated into
- <code class="filename">searchd</code> as soon as possible. With
- <code class="option">--sighup-each</code>, <code class="filename">indexer</code>
- will send a SIGHUP signal to searchd after successfully
- completing the work on each index. (The default behavior
- is to send a single SIGHUP after all the indexes were built.)
- </p></li>
- <li class="listitem"><p><code class="option">--nohup</code> is useful when you want to check your
- index with indextool before actually rotating it. indexer won't send
- SIGHUP if this option is on.
- </p></li>
- <li class="listitem"><p><code class="option">--print-queries</code> prints out
- SQL queries that <code class="filename">indexer</code> sends to
- the database, along with SQL connection and disconnection
- events. That is useful to diagnose and fix problems with
- SQL sources.
- </p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="7.2. searchd command reference"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ref-searchd"></a>7.2. <code class="filename">searchd</code> command reference</h2></div></div></div>
- <p><code class="filename">searchd</code> is the second of the two principle tools as part of Sphinx.
- <code class="filename">searchd</code> is the part of the system which actually handles searches;
- it functions as a server and is responsible for receiving queries, processing them and
- returning a dataset back to the different APIs for client applications.</p><p>Unlike <code class="filename">indexer</code>, <code class="filename">searchd</code> is not designed
- to be run either from a regular script or command-line calling, but instead either
- as a daemon to be called from init.d (on Unix/Linux type systems) or to be called
- as a service (on Windows-type systems), so not all of the command line options will
- always apply, and so will be build-dependent.</p><p>Calling <code class="filename">searchd</code> is simply a case of:</p><pre class="programlisting">
- $ searchd [OPTIONS]
- </pre><p>The options available to <code class="filename">searchd</code> on all builds are:</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--help</code> (<code class="option">-h</code> for short) lists all of the
- parameters that can be called in your particular build of <code class="filename">searchd</code>.
- </p></li>
- <li class="listitem"><p><code class="option">--config <file></code> (<code class="option">-c <file></code> for short)
- tells <code class="filename">searchd</code> to use the given file as its configuration,
- just as with <code class="filename">indexer</code> above.
- </p></li>
- <li class="listitem"><p><code class="option">--stop</code> is used to asynchronously stop <code class="filename">searchd</code>,
- using the details of the PID file as specified in the <code class="filename">sphinx.conf</code> file,
- so you may also need to confirm to <code class="filename">searchd</code> which configuration
- file to use with the <code class="option">--config</code> option. NB, calling <code class="option">--stop</code>
- will also make sure any changes applied to the indexes with
- <a class="link" href="#api-func-updateatttributes" title="9.7.2. UpdateAttributes"><code class="code">UpdateAttributes()</code></a>
- will be applied to the index files themselves. Example:
- </p><pre class="programlisting">
- $ searchd --config /home/myuser/sphinx.conf --stop
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--stopwait</code> is used to synchronously stop <code class="filename">searchd</code>.
- <code class="option">--stop</code> essentially tells the running instance to exit (by sending it a SIGTERM)
- and then immediately returns. <code class="option">--stopwait</code> will also attempt to wait until the
- running <code class="filename">searchd</code> instance actually finishes the shutdown (eg. saves all
- the pending attribute changes) and exits. Example:
- </p><pre class="programlisting">
- $ searchd --config /home/myuser/sphinx.conf --stopwait
- </pre><p>
- Possible exit codes are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>0 on success;</p></li>
- <li class="listitem"><p>1 if connection to running searchd daemon failed;</p></li>
- <li class="listitem"><p>2 if daemon reported an error during shutdown;</p></li>
- <li class="listitem"><p>3 if daemon crashed during shutdown.</p></li>
- </ul></div>
- <p>
- </p></li>
- <li class="listitem"><p><code class="option">--status</code> command is used to query running
- <code class="filename">searchd</code> instance status, using the connection details
- from the (optionally) provided configuration file. It will try to connect
- to the running instance using the first configured UNIX socket or TCP port.
- On success, it will query for a number of status and performance counter
- values and print them. You can use <a class="link" href="#api-func-status" title="9.7.5. Status">Status()</a>
- API call to access the very same counters from your application. Examples:
- </p><pre class="programlisting">
- $ searchd --status
- $ searchd --config /home/myuser/sphinx.conf --status
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--pidfile</code> is used to explicitly force
- using a PID file (where the <code class="filename">searchd</code> process number
- is stored) despite any other debugging options that say otherwise
- (for instance, <code class="option">--console</code>). This is a debugging option.
- </p><pre class="programlisting">
- $ searchd --console --pidfile
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--console</code> is used to force <code class="filename">searchd</code>
- into console mode; typically it will be running as a conventional server application,
- and will aim to dump information into the log files (as specified in
- <code class="filename">sphinx.conf</code>). Sometimes though, when debugging issues
- in the configuration or the daemon itself, or trying to diagnose hard-to-track-down
- problems, it may be easier to force it to dump information directly
- to the console/command line from which it is being called. Running in console mode
- also means that the process will not be forked (so searches are done in sequence)
- and logs will not be written to. (It should be noted that console mode
- is not the intended method for running <code class="filename">searchd</code>.)
- You can invoke it as such:
- </p><pre class="programlisting">
- $ searchd --config /home/myuser/sphinx.conf --console
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--logdebug</code>, <code class="option">--logdebugv</code>,
- and <code class="option">--logdebugvv</code> options enable additional debug output
- in the daemon log. They differ by the logging verboseness level. These are
- debugging options, they pollute the log a lot, and thus they should
- <span class="emphasis"><em>not</em></span> be normally enabled. (The normal use case for
- these is to enable them temporarily on request, to assist with some
- particularly complicated debugging session.)
- </p></li>
- <li class="listitem"><p><code class="option">--iostats</code> is used in conjunction with the
- logging options (the <code class="option">query_log</code> will need to have been
- activated in <code class="filename">sphinx.conf</code>) to provide more detailed
- information on a per-query basis as to the input/output operations
- carried out in the course of that query, with a slight performance hit
- and of course bigger logs. Further details are available under the
- <a class="link" href="#query-log-format" title="5.9. searchd query log formats">query log format</a> section.
- You might start <code class="filename">searchd</code> thus:
- </p><pre class="programlisting">
- $ searchd --config /home/myuser/sphinx.conf --iostats
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--cpustats</code> is used to provide actual CPU time
- report (in addition to wall time) in both query log file (for every given
- query) and status report (aggregated). It depends on clock_gettime() system
- call and might therefore be unavailable on certain systems. You might start
- <code class="filename">searchd</code> thus:
- </p><pre class="programlisting">
- $ searchd --config /home/myuser/sphinx.conf --cpustats
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--port portnumber</code> (<code class="option">-p</code> for short)
- is used to specify the port that <code class="filename">searchd</code> should listen on,
- usually for debugging purposes. This will usually default to 9312, but sometimes
- you need to run it on a different port. Specifying it on the command line
- will override anything specified in the configuration file. The valid range
- is 0 to 65535, but ports numbered 1024 and below usually require
- a privileged account in order to run. An example of usage:
- </p><pre class="programlisting">
- $ searchd --port 9313
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--listen ( address ":" port | port | path ) [ ":" protocol ]</code>
- (or <code class="option">-l</code> for short) Works as <code class="option">--port</code>, but allow
- you to specify not only the port, but full path, as IP address and port, or
- Unix-domain socket path, that <code class="filename">searchd</code> will listen on.
- Otherwords, you can specify either an IP address (or hostname) and port number, or
- just a port number, or Unix socket path. If you specify port number
- but not the address, searchd will listen on all network interfaces.
- Unix path is identified by a leading slash. As the last param you
- can also specify a protocol handler (listener) to be used for
- connections on this socket. Supported protocol values are 'sphinx'
- (Sphinx 0.9.x API protocol) and 'mysql41' (MySQL protocol used since
- 4.1 upto at least 5.1).</p></li>
- <li class="listitem"><p><code class="option">--index <index></code> (or <code class="option">-i
- <index></code> for short) forces this instance of
- <code class="filename">searchd</code> only to serve the specified index.
- Like <code class="option">--port</code>, above, this is usually for debugging purposes;
- more long-term changes would generally be applied to the configuration file
- itself. Example usage:
- </p><pre class="programlisting">
- $ searchd --index myindex
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--strip-path</code> strips the path names from
- all the file names referenced from the index (stopwords, wordforms,
- exceptions, etc). This is useful for picking up indexes built on another
- machine with possibly different path layouts.
- </p></li>
- <li class="listitem"><p><code class="option">--replay-flags=<OPTIONS></code> switch,
- added in version 2.0.2-beta, can be used to specify a list of extra binary log
- replay options. The supported options are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p><code class="option">accept-desc-timestamp</code>,
- ignore descending transaction timestamps and replay such
- transactions anyway (the default behavior is to exit
- with an error).
- </p></li>
- </ul></div>
- <p>
- Example:
- </p><pre class="programlisting">
- $ searchd --replay-flags=accept-desc-timestamp
- </pre><p>
- </p></li>
- </ul></div>
- <p>There are some options for <code class="filename">searchd</code> that are specific
- to Windows platforms, concerning handling as a service, are only be available on Windows binaries.</p><p>Note that on Windows searchd will default to <code class="option">--console</code> mode, unless you install it as a service.</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--install</code> installs <code class="filename">searchd</code> as a service
- into the Microsoft Management Console (Control Panel / Administrative Tools / Services).
- Any other parameters specified on the command line, where <code class="option">--install</code>
- is specified will also become part of the command line on future starts of the service.
- For example, as part of calling <code class="filename">searchd</code>, you will likely also need
- to specify the configuration file with <code class="option">--config</code>, and you would do that
- as well as specifying <code class="option">--install</code>. Once called, the usual start/stop
- facilities will become available via the management console, so any methods you could
- use for starting, stopping and restarting services would also apply to
- <code class="filename">searchd</code>. Example:
- </p><pre class="programlisting">
- C:\WINDOWS\system32> C:\Sphinx\bin\searchd.exe --install
- --config C:\Sphinx\sphinx.conf
- </pre><p>
- If you wanted to have the I/O stats every time you started <code class="filename">searchd</code>,
- you would specify its option on the same line as the <code class="option">--install</code> command thus:
- </p><pre class="programlisting">
- C:\WINDOWS\system32> C:\Sphinx\bin\searchd.exe --install
- --config C:\Sphinx\sphinx.conf --iostats
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--delete</code> removes the service from the Microsoft Management Console
- and other places where services are registered, after previously installed with
- <code class="option">--install</code>. Note, this does not uninstall the software or delete the indexes.
- It means the service will not be called from the services systems, and will not be started
- on the machine's next start. If currently running as a service, the current instance
- will not be terminated (until the next reboot, or <code class="filename">searchd</code> is called
- with <code class="option">--stop</code>). If the service was installed with a custom name
- (with <code class="option">--servicename</code>), the same name will need to be specified
- with <code class="option">--servicename</code> when calling to uninstall. Example:
- </p><pre class="programlisting">
- C:\WINDOWS\system32> C:\Sphinx\bin\searchd.exe --delete
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--servicename <name></code> applies the given name to
- <code class="filename">searchd</code> when installing or deleting the service, as would appear
- in the Management Console; this will default to searchd, but if being deployed on servers
- where multiple administrators may log into the system, or a system with multiple
- <code class="filename">searchd</code> instances, a more descriptive name may be applicable.
- Note that unless combined with <code class="option">--install</code> or <code class="option">--delete</code>,
- this option does not do anything. Example:
- </p><pre class="programlisting">
- C:\WINDOWS\system32> C:\Sphinx\bin\searchd.exe --install
- --config C:\Sphinx\sphinx.conf --servicename SphinxSearch
- </pre><p>
- </p></li>
- <li class="listitem"><p><code class="option">--ntservice</code> is the option that is passed by the
- Management Console to <code class="filename">searchd</code> to invoke it as a service
- on Windows platforms. It would not normally be necessary to call this directly;
- this would normally be called by Windows when the service would be started,
- although if you wanted to call this as a regular service from the command-line
- (as the complement to <code class="option">--console</code>) you could do so in theory.
- </p></li>
- <li class="listitem"><p><code class="option">--safetrace</code> forces <code class="filename">searchd</code>
- to only use system backtrace() call in crash reports. In certain (rare) scenarios,
- this might be a "safer" way to get that report. This is a debugging option.
- </p></li>
- <li class="listitem"><p><code class="option">--nodetach</code> switch (Linux only) tells
- <code class="filename">searchd</code> not to detach into background. This will also
- cause log entry to be printed out to console. Query processing operates
- as usual. This is a debugging option.
- </p></li>
- </ul></div>
- <p>
- Last but not least, as every other daemon, <code class="filename">searchd</code> supports a number of signals.
- </p><div class="variablelist"><dl><dt><span class="term">SIGTERM</span></dt>
- <dd><p>Initiates a clean shutdown. New queries will not be handled; but queries
- that are already started will not be forcibly interrupted.</p></dd><dt><span class="term">SIGHUP</span></dt>
- <dd><p>Initiates index rotation. Depending on the value of
- <a class="link" href="#conf-seamless-rotate" title="12.4.9. seamless_rotate">seamless_rotate</a> setting,
- new queries might be shortly stalled; clients will receive temporary
- errors.</p></dd><dt><span class="term">SIGUSR1</span></dt>
- <dd><p>Forces reopen of searchd log and query log files, letting
- you implement log file rotation.</p></dd></dl></div>
- <p>
- </p></div>
- <div class="sect1" title="7.3. spelldump command reference"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ref-spelldump"></a>7.3. <code class="filename">spelldump</code> command reference</h2></div></div></div>
- <p><code class="filename">spelldump</code> is one of the helper tools within the Sphinx package.</p><p>It is used to extract the contents of a dictionary file that uses
- <code class="filename">ispell</code> or <code class="filename">MySpell</code> format, which
- can help build word lists for <em class="glossterm">wordforms</em> - all of
- the possible forms are pre-built for you.</p><p>Its general usage is:</p><pre class="programlisting">
- spelldump [options] <dictionary> <affix> [result] [locale-name]
- </pre><p>The two main parameters are the dictionary's main file and its affix
- file; usually these are named as
- <code class="filename">[language-prefix].dict</code> and
- <code class="filename">[language-prefix].aff</code> and will be available with most
- common Linux distributions, as well as various places online.</p><p><code class="option">[result]</code> specifies where the dictionary data should
- be output to, and <code class="option">[locale-name]</code> additionally specifies
- the locale details you wish to use.</p><p>There is an additional option, <code class="option">-c [file]</code>, which
- specifies a file for case conversion details.</p><p>Examples of its usage are:</p><pre class="programlisting">
- spelldump en.dict en.aff
- spelldump ru.dict ru.aff ru.txt ru_RU.CP1251
- spelldump ru.dict ru.aff ru.txt .1251
- </pre><p>The results file will contain a list of all the words in the
- dictionary in alphabetical order, output in the format of a wordforms file,
- which you can use to customize for your specific circumstances. An example
- of the result file:</p><pre class="programlisting">
- zone > zone
- zoned > zoned
- zoning > zoning
- </pre></div>
- <div class="sect1" title="7.4. indextool command reference"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ref-indextool"></a>7.4. <code class="filename">indextool</code> command reference</h2></div></div></div>
- <p>
- <code class="filename">indextool</code> is one of the helper tools within
- the Sphinx package, introduced in version 0.9.9-rc2. It is used to
- dump miscellaneous debug information about the physical index.
- (Additional functionality such as index verification is planned
- in the future, hence the indextool name rather than just indexdump.)
- Its general usage is:
- </p><pre class="programlisting">
- indextool <command> [options]
- </pre><p>
- Options apply to all commands:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--config <file></code> (<code class="option">-c <file></code> for short)
- overrides the built-in config file names.
- </p></li>
- <li class="listitem"><p><code class="option">--quiet</code> (<code class="option">-q</code> for short)
- keep indextool quiet - it will not output banner, etc.
- </p></li>
- </ul></div>
- <p>
- </p><p>
- The commands are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="option">--checkconfig</code> just loads and verifies the
- config file to check if it's valid, without syntax errors.
- This option was added in version 2.1.1-beta.
- </p></li>
- <li class="listitem"><p><code class="option">--build-infixes INDEXNAME</code> build infixes for
- an existing dict=keywords index (upgrades .sph, .spi in place). You can use
- this option for legacy index files that already use dict=keywords, but now
- need to support infix searching too; updating the index files with indextool
- may prove easier or faster than regenerating them from scratch with indexer.
- This option was added in version 2.1.1-beta.
- </p></li>
- <li class="listitem"><p><code class="option">--dumpheader FILENAME.sph</code> quickly dumps
- the provided index header file without touching any other index files
- or even the configuration file. The report provides a breakdown of
- all the index settings, in particular the entire attribute and
- field list. Prior to 0.9.9-rc2, this command was present in now removed
- CLI search utility.
- </p></li>
- <li class="listitem"><p><code class="option">--dumpconfig FILENAME.sph</code> dumps
- the index definition from the given index header file in (almost)
- compliant <code class="filename">sphinx.conf</code> file format.
- Added in version 2.0.1-beta.
- </p></li>
- <li class="listitem"><p><code class="option">--dumpheader INDEXNAME</code> dumps index header
- by index name with looking up the header path in the configuration file.
- </p></li>
- <li class="listitem"><p><code class="option">--dumpdict INDEXNAME</code> dumps dictionary. This was
- added in version 2.1.1-beta.
- </p></li>
- <li class="listitem"><p><code class="option">--dumpdocids INDEXNAME</code> dumps document IDs
- by index name. It takes the data from attribute (.spa) file and therefore
- requires docinfo=extern to work.
- </p></li>
- <li class="listitem"><p><code class="option">--dumphitlist INDEXNAME KEYWORD</code> dumps all
- the hits (occurrences) of a given keyword in a given index, with keyword
- specified as text.
- </p></li>
- <li class="listitem"><p><code class="option">--dumphitlist INDEXNAME --wordid ID</code> dumps all
- the hits (occurrences) of a given keyword in a given index, with keyword
- specified as internal numeric ID.
- </p></li>
- <li class="listitem"><p><code class="option">--fold INDEXNAME OPTFILE</code>
- This options is useful too see how actually tokenizer proceeds input.
- You can feed indextool with text from file if specified or from stdin otherwise.
- The output will contain spaces instead of separators (accordingly to your
- charset_table settings) and lowercased letters in words.
- </p></li>
- <li class="listitem"><p><code class="option">--htmlstrip INDEXNAME</code> filters stdin using
- HTML stripper settings for a given index, and prints the filtering
- results to stdout. Note that the settings will be taken from sphinx.conf,
- and not the index header.
- </p></li>
- <li class="listitem"><p><code class="option">--morph INDEXNAME</code> applies morphology to the
- given stdin and prints the result to stdout.
- </p></li>
- <li class="listitem"><p><code class="option">--check INDEXNAME</code> checks the index data
- files for consistency errors that might be introduced either by bugs
- in <code class="filename">indexer</code> and/or hardware faults. Starting with
- version 2.1.1-beta, <code class="option">--check</code> also works on RT indexes, RAM and disk chunks.
- </p></li>
- <li class="listitem"><p><code class="option">--strip-path</code> strips the path names from
- all the file names referenced from the index (stopwords, wordforms,
- exceptions, etc). This is useful for checking indexes built on another
- machine with possibly different path layouts.
- </p></li>
- <li class="listitem"><p><code class="option">--optimize-rt-klists</code> optimizes
- the kill list memory use in the disk chunk of a given RT index. That
- is a one-off optimization intended for rather old RT indexes, created
- by development versions prior to 1.10-beta release. As of 1.10-beta
- releases, this kill list optimization (purging) should happen
- automatically, and there should never be a need to use this option.
- </p></li>
- <li class="listitem"><p><code class="option">--rotate</code> works only with <code class="option">--check</code> and defines
- whether to check index waiting for rotation, i.e. with .new extension. This
- is useful when you want to check your index before actually using it.
- </p></li>
- </ul></div></div>
- <div class="sect1" title="7.5. wordbreaker command reference"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ref-wordbreaker"></a>7.5. <code class="filename">wordbreaker</code> command reference</h2></div></div></div>
- <p>
- <code class="filename">wordbreaker</code> is one of the helper tools within
- the Sphinx package, introduced in version 2.1.1-beta. It is used to
- split compound words, as usual in URLs, into its component words.
- For example, this tool can split "lordoftherings" into its four
- component words, or "http://manofsteel.warnerbros.com" into "man
- of steel warner bros". This helps searching, without requiring
- prefixes or infixes: searching for "sphinx" wouldn't match "sphinxsearch"
- but if you break the compound word and index the separate components,
- you'll get a match without the costs of prefix and infix larger index files.
- </p><p>Examples of its usage are:</p><pre class="programlisting">
- echo manofsteel | bin/wordbreaker -dict dict.txt split
- </pre><p>The input stream will be separated in words using the <code class="option">-dict</code>
- dictionary file. (The dictionary should match the language of the compound word.)
- The <code class="option">split</code> command breaks words from the standard input, and
- outputs the result in the standard output. There are also <code class="option">test</code> and
- <code class="option">bench</code> commands that let you test the splitting quality and benchmark
- the splitting functionality.
- </p><p>Wordbreaker
- Wordbreaker needs a dictionary to recognize individual substrings within a string. To
- differentiate between different guesses, it uses the relative frequency of each
- word in the dictionary: higher frequency means higher split probability. You can
- generate such a file using the <code class="filename">indexer</code> tool, as in
- </p><pre class="programlisting">
- indexer --buildstops dict.txt 100000 --buildfreqs myindex -c /path/to/sphinx.conf
- </pre><p>
- which will write the 100,000 most frequent words, along with their counts, from
- myindex into dict.txt. The output file is a text file, so you can edit it by hand,
- if need be, to add or remove words.
- </p><p>See
- <a class="ulink" href="http://sphinxsearch.com/blog/2013/01/29/a-new-tool-in-the-trunk-wordbreaker/" target="_top">
- http://sphinxsearch.com/blog/2013/01/29/a-new-tool-in-the-trunk-wordbreaker/</a>
- for more on this tool.
- </p></div></div>
- <div class="chapter" title="Chapter 8. SphinxQL reference"><div class="titlepage"><div><div><h2 class="title"><a name="sphinxql-reference"></a>Chapter 8. SphinxQL reference</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#sphinxql-select">8.1. SELECT syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-select-sysvar">8.2. SELECT @@system_variable syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-meta">8.3. SHOW META syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-warnings">8.4. SHOW WARNINGS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-status">8.5. SHOW STATUS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-insert">8.6. INSERT and REPLACE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-replace">8.7. REPLACE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-delete">8.8. DELETE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-set">8.9. SET syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-set-transaction">8.10. SET TRANSACTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-commit">8.11. BEGIN, COMMIT, and ROLLBACK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-begin">8.12. BEGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-rollback">8.13. ROLLBACK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-call-snippets">8.14. CALL SNIPPETS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-call-keywords">8.15. CALL KEYWORDS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-tables">8.16. SHOW TABLES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-describe">8.17. DESCRIBE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-create-function">8.18. CREATE FUNCTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-drop-function">8.19. DROP FUNCTION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-variables">8.20. SHOW VARIABLES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-collation">8.21. SHOW COLLATION syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-character-set">8.22. SHOW CHARACTER SET syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-update">8.23. UPDATE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-attach">8.24. ALTER syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-attach-index">8.25. ATTACH INDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-flush-rtindex">8.26. FLUSH RTINDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-flush-ramchunk">8.27. FLUSH RAMCHUNK syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-truncate-rtindex">8.28. TRUNCATE RTINDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-agent-status">8.29. SHOW AGENT STATUS</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-profile">8.30. SHOW PROFILE syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-index-status">8.31. SHOW INDEX STATUS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-index-settings">8.32. SHOW INDEX SETTINGS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-optimize-index">8.33. OPTIMIZE INDEX syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-plan">8.34. SHOW PLAN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-databases">8.35. SHOW DATABASES syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-create-plugin">8.36. CREATE PLUGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-drop-plugin">8.37. DROP PLUGIN syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-show-plugins">8.38. SHOW PLUGINS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-threads">8.39. SHOW THREADS syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-multi-queries">8.40. Multi-statement queries</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-comment-syntax">8.41. Comment syntax</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-reserved-keywords">8.42. List of SphinxQL reserved keywords</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxql-upgrading-magics">8.43. SphinxQL upgrade notes, version 2.0.1-beta</a></span></dt>
- </dl></div>
- <p>
- SphinxQL is our SQL dialect that exposes all of the search daemon
- functionality using a standard SQL syntax with a few Sphinx-specific
- extensions. Everything available via the SphinxAPI is also available
- via SphinxQL but not vice versa; for instance, writes into RT indexes
- are only available via SphinxQL. This chapter documents supported
- SphinxQL statements syntax.
- </p><div class="sect1" title="8.1. SELECT syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-select"></a>8.1. SELECT syntax</h2></div></div></div>
- <pre class="programlisting">
- SELECT
- select_expr [, select_expr ...]
- FROM index [, index2 ...]
- [WHERE where_condition]
- [GROUP [N] BY {col_name | expr_alias} [, {col_name | expr_alias}]]
- [WITHIN GROUP ORDER BY {col_name | expr_alias} {ASC | DESC}]
- [HAVING having_condition]
- [ORDER BY {col_name | expr_alias} {ASC | DESC} [, ...]]
- [LIMIT [offset,] row_count]
- [OPTION opt_name = opt_value [, ...]]
- [FACET facet_options[ FACET facet_options][ ...]]
- </pre><p>
- <span class="bold"><strong>SELECT</strong></span> statement was introduced in version 0.9.9-rc2.
- It's syntax is based upon regular SQL but adds several Sphinx-specific
- extensions and has a few omissions (such as (currently) missing support for JOINs).
- Specifically,
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Column list clause. Column names, arbitrary expressions,
- and star ('*') are all allowed (ie.
- <code class="code">SELECT id, group_id*123+456 AS expr1 FROM test1</code>
- will work). Unlike in regular SQL, all computed expressions must be aliased
- with a valid identifier. Starting with version 2.0.1-beta, <code class="code">AS</code>
- is optional.
- </p></li>
- <li class="listitem"><p>EXIST() function (added in version 2.1.1-beta) is supported.
- EXIST ( "attr-name", default-value )
- replaces non-existent columns with default values. It returns either a value
- of an attribute specified by 'attr-name', or 'default-value' if that
- attribute does not exist. As of 2.1.1-beta it does not support STRING
- or MVA attributes. This function is handy when you are searching through
- several indexes with different schemas.
- </p><pre class="programlisting">
- SELECT *, EXIST('gid', 6) as cnd FROM i1, i2 WHERE cnd>5
- </pre></li>
- <li class="listitem"><p>SNIPPET() function (added in version 2.1.1-beta) is supported.
- This is a wrapper around the snippets functionality, similar to what is
- available via CALL SNIPPETS. The first two arguments are: the text
- to highlight, and a query. Starting with 2.2-1-beta it's possible to pass
- <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">options</a> to function.
- The intended use is as follows:
- </p><pre class="programlisting">
- SELECT id, SNIPPET(myUdf(id), 'my.query', 'limit=100')
- FROM myIndex WHERE MATCH('my.query')
- </pre><p>
- where myUdf() would be a UDF that fetches a document by its ID from
- some external storage. This enables applications to fetch the entire
- result set directly from Sphinx in one query, without having to separately
- fetch the documents in the application and then send them back to Sphinx
- for highlighting.
- </p><p>
- SNIPPET() is a so-called "post limit" function, meaning that computing
- snippets is postponed not just until the entire final result set is ready,
- but even after the LIMIT clause is applied. For example, with a LIMIT 20,10
- clause, SNIPPET() will be called at most 10 times.
- </p><p>
- Table functions is a mechanism of post-query result set processing. It was
- added in 2.2.1-beta. Table functions take an arbitrary result set as their
- input, and return a new, processed set as their output. The first argument
- should be the input result set, but a table function can optionally take
- and handle more arguments. Table functions can completely change the result
- set, including the schema. For now, only built in table functions are
- supported. UDFs are planned when the internal call interface is stabilized.
- Table functions work for both outer SELECT and nested SELECT.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>REMOVE_REPEATS ( result_set, column, offset, limit ) - removes repeated
- adjusted rows with the same 'column' value.</p></li>
- </ul></div>
- <p>
- </p><pre class="programlisting">
- SELECT REMOVE_REPEATS((SELECT * FROM dist1), gid, 0, 10)
- </pre><p>
- </p></li>
- <li class="listitem"><p>FROM clause. FROM clause should contain the list of indexes
- to search through. Unlike in regular SQL, comma means enumeration of
- full-text indexes as in <a class="link" href="#api-func-query" title="9.6.1. Query">Query()</a>
- API call rather than JOIN. Index name should be according to the rules of
- a C identifier.
- </p></li>
- <li class="listitem"><p>WHERE clause. This clause will map both to fulltext query
- and filters. Comparison operators (=, !=, <, >, <=, >=), IN,
- AND, NOT, and BETWEEN are all supported and map directly to filters.
- OR is not supported yet but will be in the future. MATCH('query')
- is supported and maps to fulltext query. Query will be interpreted
- according to <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">full-text query language rules</a>.
- There must be at most one MATCH() in the clause. Starting with version
- 2.0.1-beta, <code class="code">{col_name | expr_alias} [NOT] IN @uservar</code>
- condition syntax is supported. (Refer to <a class="xref" href="#sphinxql-set" title="8.9. SET syntax">Section 8.9, “SET syntax”</a>
- for a discussion of global user variables.)
- </p></li>
- <li class="listitem"><p>GROUP BY clause. Supports grouping by multiple columns
- or computed expressions:
- </p><pre class="programlisting">
- SELECT *, group_id*1000+article_type AS gkey FROM example GROUP BY gkey
- SELECT id FROM products GROUP BY region, price
- </pre><p>
- Implicit grouping supported when using aggregate functions without
- specifiying a GROUP BY clause. Consider these two queries:
- </p><pre class="programlisting">
- SELECT MAX(id), MIN(id), COUNT(*) FROM books
- SELECT MAX(id), MIN(id), COUNT(*), 1 AS grp FROM books GROUP BY grp
- </pre><p>
- Aggregate functions (AVG(), MIN(), MAX(), SUM()) in column list
- clause are supported. Arguments to aggregate functions can be either
- plain attributes or arbitrary expressions. COUNT(*), COUNT(DISTINCT attr)
- are supported. Currently there can be at most one COUNT(DISTINCT) per
- query and an argument needs to be an attribute. Both current restrictions
- on COUNT(DISTINCT) might be lifted in the future. A special GROUPBY()
- function is also supported. It returns the GROUP BY key. That is
- particularly useful when grouping by an MVA value, in order to pick the
- specific value that was used to create the current group.
- </p><pre class="programlisting">
- SELECT *, AVG(price) AS avgprice, COUNT(DISTINCT storeid), GROUPBY()
- FROM products
- WHERE MATCH('ipod')
- GROUP BY vendorid
- </pre><p>
- </p><p>
- Starting with 2.0.1-beta, GROUP BY on a string attribute is supported,
- with respect for current collation (see <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a>).
- </p><p>Starting with 2.2.1-beta, you can query Sphinx to return (no more than)
- N top matches for each group accordingly to WITHIN GROUP ORDER BY.</p><pre class="programlisting">
- SELECT id FROM products GROUP 3 BY category
- </pre><p>
- You can sort the result set by (an alias of) the aggregate value.
- </p><pre class="programlisting">
- SELECT group_id, MAX(id) AS max_id
- FROM my_index WHERE MATCH('the')
- GROUP BY group_id ORDER BY max_id DESC
- </pre><p>
- </p></li>
- <li class="listitem"><p>GROUP_CONCAT() function is supported, starting with version 2.1.1-beta.
- When you group by an attribute, the result set only shows attributes from a single document representing the whole group.
- GROUP_CONCAT() produces a comma-separated list of the attribute values of all documents in the group.
- </p><pre class="programlisting">
- SELECT id, GROUP_CONCAT(price) as pricesList, GROUPBY() AS name FROM shops GROUP BY shopName;
- </pre></li>
- <li class="listitem"><p>
- ZONESPANLIST() function returns pairs of matched zone spans. Each pair
- contains the matched zone span identifier, a colon, and the order number
- of the matched zone span. For example, if a document reads
- <emphasis role="bold"><i>text</i> the <i>text</i></emphasis>, and you query for
- 'ZONESPAN:(i,b) text', then ZONESPANLIST() will return the string
- "1:1 1:2 2:1" meaning that the first zone span matched "text"
- in spans 1 and 2, and the second zone span in span 1 only.
- This was added in version 2.1.1-beta.
- </p></li>
- <li class="listitem"><p>WITHIN GROUP ORDER BY clause. This is a Sphinx specific
- extension that lets you control how the best row within a group
- will to be selected. The syntax matches that of regular ORDER BY
- clause:
- </p><pre class="programlisting">
- SELECT *, INTERVAL(posted,NOW()-7*86400,NOW()-86400) AS timeseg, WEIGHT() AS w
- FROM example WHERE MATCH('my search query')
- GROUP BY siteid
- WITHIN GROUP ORDER BY w DESC
- ORDER BY timeseg DESC, w DESC
- </pre><p>
- Starting with 2.0.1-beta, WITHIN GROUP ORDER BY on a string attribute is supported,
- with respect for current collation (see <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a>).
- </p></li>
- <li class="listitem"><p>
- HAVING clause. This is used to filter on GROUP BY values. It was added in
- 2.2.1-beta. Currently supports only one filtering condition.
- </p><pre class="programlisting">
- SELECT id FROM plain GROUP BY title HAVING group_id=16;
- SELECT id FROM plain GROUP BY attribute HAVING COUNT(*)>1;
- </pre><p>
- </p></li>
- <li class="listitem"><p>ORDER BY clause. Unlike in regular SQL, only column names
- (not expressions) are allowed and explicit ASC and DESC are required.
- The columns however can be computed expressions:
- </p><pre class="programlisting">
- SELECT *, WEIGHT()*10+docboost AS skey FROM example ORDER BY skey
- </pre><p>
- Starting with 2.1.1-beta, you can use subqueries to speed up specific searches, which involve reranking, by postponing hard (slow) calculations as
- late as possible. For example, SELECT id,a_slow_expression() AS cond FROM an_index ORDER BY id ASC, cond DESC LIMIT 100; could be
- better written as SELECT * FROM (SELECT id,a_slow_expression() AS cond FROM an_index ORDER BY id ASC LIMIT 100) ORDER BY cond DESC;
- because in the first case the slow expression would be evaluated for the whole set, while in the second one it would be
- evaluated just for a subset of values.
- </p><p>
- Starting with 2.0.1-beta, ORDER BY on a string attribute is supported,
- with respect for current collation (see <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a>).
- </p><p>
- Starting with 2.0.2-beta, ORDER BY RAND() syntax is supported.
- Note that this syntax is actually going to randomize the weight
- values and then order matches by those randomized weights.
- </p></li>
- <li class="listitem"><p>LIMIT clause. Both LIMIT N and LIMIT M,N forms are supported.
- Unlike in regular SQL (but like in Sphinx API), an implicit LIMIT 0,20
- is present by default.
- </p></li>
- <li class="listitem"><p>OPTION clause. This is a Sphinx specific extension that
- lets you control a number of per-query options. The syntax is:
- </p><pre class="programlisting">
- OPTION <optionname>=<value> [ , ... ]
- </pre><p>
- Supported options and respectively allowed values are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>'agent_query_timeout' - integer (max time in milliseconds to wait for remote queries to complete,
- see <a class="link" href="#conf-agent-query-timeout" title="12.2.35. agent_query_timeout">agent_query_timeout</a> under Index configuration options for details)</p></li>
- <li class="listitem"><p>'boolean_simplify' - 0 or 1, enables simplifying the query to speed it up</p></li>
- <li class="listitem"><p>'comment' - string, user comment that gets copied to a query log file</p></li>
- <li class="listitem"><p>'cutoff' - integer (max found matches threshold)</p></li>
- <li class="listitem"><p>'field_weights' - a named integer list (per-field user weights for ranking)</p></li>
- <li class="listitem"><p>'global_idf' - use global statistics (frequencies)
- from the <a class="link" href="#conf-global-idf" title="12.2.66. global_idf">global_idf file</a> for IDF
- computations, rather than the local index statistics.
- Added in version 2.1.1-beta.
- </p></li>
- <li class="listitem"><p>'idf' - a quoted, comma-separated list of IDF computation flags. Added in version 2.1.1-beta.
- Known flags are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="square"><li class="listitem"><p>normalized: BM25 variant, idf = log((N-n+1)/n), as per Robertson et al</p></li>
- <li class="listitem"><p>plain: plain variant, idf = log(N/n), as per Sparck-Jones</p></li>
- <li class="listitem"><p>tfidf_normalized (added in 2.2.1-beta): additionally divide IDF
- by query word count, so that TF*IDF fits into [0, 1] range</p></li>
- <li class="listitem"><p>tfidf_unnormalized (added in 2.2.1-beta): do not additionally
- divide IDF by query word count</p></li>
- </ul></div>
- <p>
- where <span class="bold"><strong>N</strong></span> is the collection size and <span class="bold"><strong>n</strong></span> is the number of matched
- documents.
- </p><p>
- The historically default IDF (Inverse Document Frequency) in Sphinx
- is equivalent to <code class="code">OPTION idf='normalized,tfidf_normalized'</code>,
- and those normalizations may cause several undesired effects.
- </p><p>
- First, idf=normalized causes keyword penalization. For instance,
- if you search for [the | something] and [the] occurs
- in more than 50% of the documents, then documents with both keywords
- [the] and [something] will get <span class="bold"><strong>less</strong></span> weight than documents with
- just one keyword [something]. Using <code class="code">OPTION idf=plain</code> avoids this.
- Plain IDF varies in [0, log(N)] range, and keywords
- are never penalized; while the normalized IDF varies in [-log(N), log(N)]
- range, and too frequent keywords are penalized.
- </p><p>
- Second, idf=tfidf_normalized causes IDF drift over queries. Historically,
- we additionally divided IDF by query keyword count, so that the entire
- sum(tf*idf) over all keywords would still fit into [0,1] range. However,
- that means that queries [word1] and [word1 | nonmatchingword2] would
- assign different weights to the exactly same result set, because the IDFs
- for both "word1" and "nonmatchingword2" would be divided by 2.
- <code class="code">OPTION idf=tfidf_unnormalized</code> fixes that. Note that
- BM25, BM25A, BM25F() ranking factors will be scale accordingly
- once you disable this normalization.
- </p><p>
- IDF flags can be mixed; 'plain' and 'normalized' are mutually exclusive;
- 'tfidf_unnormalized' and 'tfidf_normalized' are mutually exclusive;
- and unspecified flags in such a mutually exclusive group take their
- defaults. That means that <code class="code">OPTION idf=plain</code> is equivalent
- to a complete <code class="code">OPTION idf='plain,tfidf_normalized'</code> specification.
- </p></li>
- <li class="listitem"><p>local_df (added in 2.2.1-beta): 0 or 1,automatically sum DFs over all the
- local parts of a distributed index, so that the IDF is consistent (and precise) over
- a locally sharded index.
- </p></li>
- <li class="listitem"><p>'index_weights' - a named integer list (per-index user weights for ranking)</p></li>
- <li class="listitem"><p>'max_matches' - integer (per-query max matches value)</p><p>
- Maximum amount of matches that the daemon keeps in RAM for each index and can return to the client.
- Default is 1000.
- </p><p>
- Introduced in order to control and limit RAM usage, <code class="code">max_matches</code>
- setting defines how much matches will be kept in RAM while searching each index.
- Every match found will still be <span class="emphasis"><em>processed</em></span>; but only
- best N of them will be kept in memory and return to the client in the end.
- Assume that the index contains 2,000,000 matches for the query. You rarely
- (if ever) need to retrieve <span class="emphasis"><em>all</em></span> of them. Rather, you need
- to scan all of them, but only choose "best" at most, say, 500 by some criteria
- (ie. sorted by relevance, or price, or anything else), and display those
- 500 matches to the end user in pages of 20 to 100 matches. And tracking
- only the best 500 matches is much more RAM and CPU efficient than keeping
- all 2,000,000 matches, sorting them, and then discarding everything but
- the first 20 needed to display the search results page. <code class="code">max_matches</code>
- controls N in that "best N" amount.
- </p><p>
- This parameter noticeably affects per-query RAM and CPU usage.
- Values of 1,000 to 10,000 are generally fine, but higher limits must be
- used with care. Recklessly raising <code class="code">max_matches</code> to 1,000,000
- means that <code class="filename">searchd</code> will have to allocate and
- initialize 1-million-entry matches buffer for <span class="emphasis"><em>every</em></span>
- query. That will obviously increase per-query RAM usage, and in some cases
- can also noticeably impact performance.
- </p></li>
- <li class="listitem"><p>'max_query_time' - integer (max search time threshold, msec)</p></li>
- <li class="listitem"><p>'max_predicted_time' - integer (max predicted search time, see <a class="xref" href="#conf-predicted-time-costs" title="12.4.44. predicted_time_costs">Section 12.4.44, “predicted_time_costs”</a>)</p></li>
- <li class="listitem"><p>'ranker' - any of 'proximity_bm25', 'bm25', 'none', 'wordcount', 'proximity',
- 'matchany', 'fieldmask', 'sph04', 'expr', or 'export' (refer to <a class="xref" href="#weighting" title="5.4. Search results ranking">Section 5.4, “Search results ranking”</a>
- for more details on each ranker)</p></li>
- <li class="listitem"><p>'retry_count' - integer (distributed retries count)</p></li>
- <li class="listitem"><p>'retry_delay' - integer (distributed retry delay, msec)</p></li>
- <li class="listitem"><p>'reverse_scan' - 0 or 1, lets you control the order in which full-scan query processes the rows</p></li>
- <li class="listitem"><p>'sort_method' - 'pq' (priority queue, set by default) or 'kbuffer' (gives faster sorting for already pre-sorted data, e.g. index data sorted by id). The
- result set is in both cases the same; picking one option or the other may just improve (or worsen!) performance. This option was added in version 2.1.1-beta.</p></li>
- <li class="listitem"><p>'rand_seed' - lets you specify a specific integer seed value
- for an <code class="code">ORDER BY RAND()</code> query, for example: ... OPTION <code class="code">rand_seed=1234</code>.
- By default, a new and different seed value is autogenerated for every query.
- </p></li>
- </ul></div>
- <p>
- Example:
- </p><pre class="programlisting">
- SELECT * FROM test WHERE MATCH('@title hello @body world')
- OPTION ranker=bm25, max_matches=3000,
- field_weights=(title=10, body=3), agent_query_timeout=10000
- </pre><p>
- </p></li>
- <li class="listitem"><p>FACET clause. This Sphinx specific extension enables faceted search with subtree optimization.
- It is capable of returning multiple result sets with a single SQL statement, without the need for complicated <a class="link" href="#sphinxql-multi-queries" title="8.40. Multi-statement queries">multi-queries</a>.
- FACET clauses should be written at the very end of SELECT statements with spaces between them.
- </p><pre class="programlisting">
- FACET {expr_list} [BY {expr_list}] [ORDER BY {expr | FACET()} {ASC | DESC}] [LIMIT [offset,] count]
- SELECT * FROM test FACET brand_id FACET categories;
- SELECT * FROM test FACET brand_name BY brand_id ORDER BY brand_name ASC FACET property;
- </pre><p>
- Working example:
- </p><pre class="programlisting">
- mysql> SELECT *, IN(brand_id,1,2,3,4) AS b FROM facetdemo WHERE MATCH('Product') AND b=1 LIMIT 0,10
- FACET brand_name, brand_id BY brand_id ORDER BY brand_id ASC
- FACET property ORDER BY COUNT(*) DESC
- FACET INTERVAL(price,200,400,600,800) ORDER BY FACET() ASC
- FACET categories ORDER BY FACET() ASC;
- +------+-------+----------+-------------------+-------------+----------+------------+------+
- | id | price | brand_id | title | brand_name | property | categories | b |
- +------+-------+----------+-------------------+-------------+----------+------------+------+
- | 1 | 668 | 3 | Product Four Six | Brand Three | Three | 11,12,13 | 1 |
- | 2 | 101 | 4 | Product Two Eight | Brand Four | One | 12,13,14 | 1 |
- | 8 | 750 | 3 | Product Ten Eight | Brand Three | Five | 13 | 1 |
- | 9 | 49 | 1 | Product Ten Two | Brand One | Three | 13,14,15 | 1 |
- | 13 | 613 | 1 | Product Six Two | Brand One | Eight | 13 | 1 |
- | 20 | 985 | 2 | Product Two Six | Brand Two | Nine | 10 | 1 |
- | 22 | 501 | 3 | Product Five Two | Brand Three | Four | 12,13,14 | 1 |
- | 23 | 765 | 1 | Product Six Seven | Brand One | Nine | 11,12 | 1 |
- | 28 | 992 | 1 | Product Six Eight | Brand One | Two | 12,13 | 1 |
- | 29 | 259 | 1 | Product Nine Ten | Brand One | Five | 12,13,14 | 1 |
- +------+-------+----------+-------------------+-------------+----------+------------+------+
- +-------------+----------+----------+
- | brand_name | brand_id | count(*) |
- +-------------+----------+----------+
- | Brand One | 1 | 1012 |
- | Brand Two | 2 | 1025 |
- | Brand Three | 3 | 994 |
- | Brand Four | 4 | 973 |
- +-------------+----------+----------+
- +----------+----------+
- | property | count(*) |
- +----------+----------+
- | One | 427 |
- | Five | 420 |
- | Seven | 420 |
- | Two | 418 |
- | Three | 407 |
- | Six | 401 |
- | Nine | 396 |
- | Eight | 387 |
- | Four | 371 |
- | Ten | 357 |
- +----------+----------+
- +---------------------------------+----------+
- | interval(price,200,400,600,800) | count(*) |
- +---------------------------------+----------+
- | 0 | 799 |
- | 1 | 795 |
- | 2 | 757 |
- | 3 | 833 |
- | 4 | 820 |
- +---------------------------------+----------+
- +------------+----------+
- | categories | count(*) |
- +------------+----------+
- | 10 | 961 |
- | 11 | 1653 |
- | 12 | 1998 |
- | 13 | 2090 |
- | 14 | 1058 |
- | 15 | 347 |
- +------------+----------+
- </pre><p>
- </p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="8.2. SELECT @@system_variable syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-select-sysvar"></a>8.2. SELECT @@system_variable syntax</h2></div></div></div>
- <pre class="programlisting">
- SELECT @@system_variable [LIMIT [offset,] row_count]
- </pre><p>
- Added in version 2.0.2-beta, this is currently a placeholder
- query that does nothing and reports success. That is in order
- to keep compatibility with frameworks and connectors that
- automatically execute this statement.
- </p></div>
- <div class="sect1" title="8.3. SHOW META syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-meta"></a>8.3. SHOW META syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW META [ LIKE pattern ]
- </pre><p><span class="bold"><strong>SHOW META</strong></span> shows additional meta-information about the latest
- query such as query time and keyword statistics. IO and CPU counters will only be available if searchd was started with --iostats and --cpustats switches respectively.
- Additional predicted_time, dist_predicted_time, [{local|dist}]_fetched_[{docs|hits|skips}] counters will only be available if searchd was configured with
- <a class="link" href="#conf-predicted-time-costs" title="12.4.44. predicted_time_costs">predicted time costs</a> and query had predicted_time in OPTION clause.
- </p><pre class="programlisting">
- mysql> SELECT * FROM test1 WHERE MATCH('test|one|two');
- +------+--------+----------+------------+
- | id | weight | group_id | date_added |
- +------+--------+----------+------------+
- | 1 | 3563 | 456 | 1231721236 |
- | 2 | 2563 | 123 | 1231721236 |
- | 4 | 1480 | 2 | 1231721236 |
- +------+--------+----------+------------+
- 3 rows in set (0.01 sec)
- mysql> SHOW META;
- +-----------------------+-------+
- | Variable_name | Value |
- +-----------------------+-------+
- | total | 3 |
- | total_found | 3 |
- | time | 0.005 |
- | keyword[0] | test |
- | docs[0] | 3 |
- | hits[0] | 5 |
- | keyword[1] | one |
- | docs[1] | 1 |
- | hits[1] | 2 |
- | keyword[2] | two |
- | docs[2] | 1 |
- | hits[2] | 2 |
- | cpu_time | 0.350 |
- | io_read_time | 0.004 |
- | io_read_ops | 2 |
- | io_read_kbytes | 0.4 |
- | io_write_time | 0.000 |
- | io_write_ops | 0 |
- | io_write_kbytes | 0.0 |
- | agents_cpu_time | 0.000 |
- | agent_io_read_time | 0.000 |
- | agent_io_read_ops | 0 |
- | agent_io_read_kbytes | 0.0 |
- | agent_io_write_time | 0.000 |
- | agent_io_write_ops | 0 |
- | agent_io_write_kbytes | 0.0 |
- +-----------------------+-------+
- 12 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Starting version 2.1.1-beta, you can also use the optional LIKE clause.
- It lets you pick just the variables that match a pattern. The pattern syntax
- is that of regular SQL wildcards, that is, '%' means any number of any
- characters, and '_' means a single character:
- </p><pre class="programlisting">
- mysql> SHOW META LIKE 'total%';
- +-----------------------+-------+
- | Variable_name | Value |
- +-----------------------+-------+
- | total | 3 |
- | total_found | 3 |
- +-----------------------+-------+
- 2 rows in set (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="8.4. SHOW WARNINGS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-warnings"></a>8.4. SHOW WARNINGS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW WARNINGS
- </pre><p><span class="bold"><strong>SHOW WARNINGS</strong></span> statement, introduced in version 0.9.9-rc2,
- can be used to retrieve the warning
- produced by the latest query. The error message will be returned along with
- the query itself:
- </p><pre class="programlisting">
- mysql> SELECT * FROM test1 WHERE MATCH('@@title hello') \G
- ERROR 1064 (42000): index test1: syntax error, unexpected TOK_FIELDLIMIT
- near '@title hello'
- mysql> SELECT * FROM test1 WHERE MATCH('@title -hello') \G
- ERROR 1064 (42000): index test1: query is non-computable (single NOT operator)
- mysql> SELECT * FROM test1 WHERE MATCH('"test doc"/3') \G
- *************************** 1. row ***************************
- id: 4
- weight: 2500
- group_id: 2
- date_added: 1231721236
- 1 row in set, 1 warning (0.00 sec)
- mysql> SHOW WARNINGS \G
- *************************** 1. row ***************************
- Level: warning
- Code: 1000
- Message: quorum threshold too high (words=2, thresh=3); replacing quorum operator
- with AND operator
- 1 row in set (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="8.5. SHOW STATUS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-status"></a>8.5. SHOW STATUS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW STATUS [ LIKE pattern ]
- </pre><p><span class="bold"><strong>SHOW STATUS</strong></span>, introduced in version 0.9.9-rc2,
- displays a number of useful performance counters. IO and CPU
- counters will only be available if searchd was started with --iostats and --cpustats
- switches respectively.
- </p><pre class="programlisting">
- mysql> SHOW STATUS;
- +--------------------+-------+
- | Variable_name | Value |
- +--------------------+-------+
- | uptime | 216 |
- | connections | 3 |
- | maxed_out | 0 |
- | command_search | 0 |
- | command_excerpt | 0 |
- | command_update | 0 |
- | command_keywords | 0 |
- | command_persist | 0 |
- | command_status | 0 |
- | agent_connect | 0 |
- | agent_retry | 0 |
- | queries | 10 |
- | dist_queries | 0 |
- | query_wall | 0.075 |
- | query_cpu | OFF |
- | dist_wall | 0.000 |
- | dist_local | 0.000 |
- | dist_wait | 0.000 |
- | query_reads | OFF |
- | query_readkb | OFF |
- | query_readtime | OFF |
- | avg_query_wall | 0.007 |
- | avg_query_cpu | OFF |
- | avg_dist_wall | 0.000 |
- | avg_dist_local | 0.000 |
- | avg_dist_wait | 0.000 |
- | avg_query_reads | OFF |
- | avg_query_readkb | OFF |
- | avg_query_readtime | OFF |
- +--------------------+-------+
- 29 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Starting from version 2.1.1-beta, an optional LIKE clause is supported.
- Refer to <a class="xref" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">Section 8.3, “SHOW META syntax”</a> for its syntax details.
- </p></div>
- <div class="sect1" title="8.6. INSERT and REPLACE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-insert"></a>8.6. INSERT and REPLACE syntax</h2></div></div></div>
- <pre class="programlisting">
- {INSERT | REPLACE} INTO index [(column, ...)]
- VALUES (value, ...)
- [, (...)]
- </pre><p>
- INSERT statement, introduced in version 1.10-beta, is only supported for RT indexes.
- It inserts new rows (documents) into an existing index, with the provided column values.
- </p><p>
- ID column must be present in all cases. Rows with duplicate IDs will <span class="bold"><strong>not</strong></span>
- be overwritten by INSERT; use REPLACE to do that.
- </p><p>
- <code class="option">index</code> is the name of RT index into which the new row(s)
- should be inserted. The optional column names list lets you only explicitly specify
- values for some of the columns present in the index. All the other columns will be
- filled with their default values (0 for scalar types, empty string for text types).
- </p><p>
- Expressions are not currently supported in INSERT and values should be explicitly
- specified.
- </p><p>
- Multiple rows can be inserted using a single INSERT statement by providing
- several comma-separated, parentheses-enclosed lists of rows values.
- </p></div>
- <div class="sect1" title="8.7. REPLACE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-replace"></a>8.7. REPLACE syntax</h2></div></div></div>
- <pre class="programlisting">
- {INSERT | REPLACE} INTO index [(column, ...)]
- VALUES (value, ...)
- [, (...)]
- </pre><p>
- REPLACE syntax is identical to INSERT syntax and is discussed in <a class="xref" href="#sphinxql-insert" title="8.6. INSERT and REPLACE syntax">Section 8.6, “INSERT and REPLACE syntax”</a>.
- </p></div>
- <div class="sect1" title="8.8. DELETE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-delete"></a>8.8. DELETE syntax</h2></div></div></div>
- <pre class="programlisting">
- DELETE FROM index WHERE where_condition
- </pre><p>
- DELETE statement, introduced in version 1.10-beta, is only supported for RT indexes and for distributed which contains only RT indexes as agents
- It deletes existing rows (documents) from an existing index based on ID.
- </p><p>
- <code class="option">index</code> is the name of RT index from which the row should be deleted.
- </p><p>
- <code class="code">where_condition</code> has the same syntax
- as in the SELECT statement (see <a class="xref" href="#sphinxql-select" title="8.1. SELECT syntax">Section 8.1, “SELECT syntax”</a> for details).
- </p><pre class="programlisting">
- mysql> select * from rt;
- +------+------+-------------+------+
- | id | gid | mva1 | mva2 |
- +------+------+-------------+------+
- | 100 | 1000 | 100,201 | 100 |
- | 101 | 1001 | 101,202 | 101 |
- | 102 | 1002 | 102,203 | 102 |
- | 103 | 1003 | 103,204 | 103 |
- | 104 | 1004 | 104,204,205 | 104 |
- | 105 | 1005 | 105,206 | 105 |
- | 106 | 1006 | 106,207 | 106 |
- | 107 | 1007 | 107,208 | 107 |
- +------+------+-------------+------+
- 8 rows in set (0.00 sec)
- mysql> delete from rt where match ('dumy') and mva1>206;
- Query OK, 2 rows affected (0.00 sec)
- mysql> select * from rt;
- +------+------+-------------+------+
- | id | gid | mva1 | mva2 |
- +------+------+-------------+------+
- | 100 | 1000 | 100,201 | 100 |
- | 101 | 1001 | 101,202 | 101 |
- | 102 | 1002 | 102,203 | 102 |
- | 103 | 1003 | 103,204 | 103 |
- | 104 | 1004 | 104,204,205 | 104 |
- | 105 | 1005 | 105,206 | 105 |
- +------+------+-------------+------+
- 6 rows in set (0.00 sec)
- mysql> delete from rt where id in (100,104,105);
- Query OK, 3 rows affected (0.01 sec)
- mysql> select * from rt;
- +------+------+---------+------+
- | id | gid | mva1 | mva2 |
- +------+------+---------+------+
- | 101 | 1001 | 101,202 | 101 |
- | 102 | 1002 | 102,203 | 102 |
- | 103 | 1003 | 103,204 | 103 |
- +------+------+---------+------+
- 3 rows in set (0.00 sec)
- mysql> delete from rt where mva1 in (102,204);
- Query OK, 2 rows affected (0.01 sec)
- mysql> select * from rt;
- +------+------+---------+------+
- | id | gid | mva1 | mva2 |
- +------+------+---------+------+
- | 101 | 1001 | 101,202 | 101 |
- +------+------+---------+------+
- 1 row in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.9. SET syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-set"></a>8.9. SET syntax</h2></div></div></div>
- <pre class="programlisting">
- SET [GLOBAL] server_variable_name = value
- SET [INDEX index_name] GLOBAL @user_variable_name = (int_val1 [, int_val2, ...])
- SET NAMES value
- SET @@dummy_variable = ignored_value
- </pre><p>
- SET statement, introduced in version 1.10-beta, modifies a variable value.
- The variable names are case-insensitive. No variable value changes survive
- server restart.
- </p><p>
- SET NAMES statement and SET @@variable_name syntax, both introduced
- in version 2.0.2-beta, do nothing. They were implemented to maintain
- compatibility with 3rd party MySQL client libraries, connectors,
- and frameworks that may need to run this statement when connecting.
- </p><p>
- There are the following classes of the variables:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>per-session server variable (1.10-beta and above)</p></li>
- <li class="listitem"><p>global server variable (2.0.1-beta and above)</p></li>
- <li class="listitem"><p>global user variable (2.0.1-beta and above)</p></li>
- <li class="listitem"><p>global distributed variable (2.2.3-beta and above)</p></li>
- </ol></div>
- <p>
- </p><p>
- Global user variables are shared between concurrent sessions. Currently,
- the only supported value type is the list of BIGINTs, and these variables
- can only be used along with IN() for filtering purpose. The intended usage
- scenario is uploading huge lists of values to <code class="filename">searchd</code>
- (once) and reusing them (many times) later, saving on network overheads.
- Starting with 2.2.3-beta, global user variables might be either transferred to
- all agents of distributed index or set locally in case of local index
- defined at distibuted index. Example:
- </p><pre class="programlisting">
- // in session 1
- mysql> SET GLOBAL @myfilter=(2,3,5,7,11,13);
- Query OK, 0 rows affected (0.00 sec)
- // later in session 2
- mysql> SELECT * FROM test1 WHERE group_id IN @myfilter;
- +------+--------+----------+------------+-----------------+------+
- | id | weight | group_id | date_added | title | tag |
- +------+--------+----------+------------+-----------------+------+
- | 3 | 1 | 2 | 1299338153 | another doc | 15 |
- | 4 | 1 | 2 | 1299338153 | doc number four | 7,40 |
- +------+--------+----------+------------+-----------------+------+
- 2 rows in set (0.02 sec)
- </pre><p>
- </p><p>
- Per-session and global server variables affect certain server settings in the respective scope.
- Known per-session server variables are:
- </p><div class="variablelist"><dl><dt><span class="term"><code class="code">AUTOCOMMIT = {0 | 1}</code></span></dt>
- <dd><p>
- Whether any data modification statement should be implicitly
- wrapped by BEGIN and COMMIT.
- Introduced in version 1.10-beta.
- </p></dd><dt><span class="term"><code class="code">COLLATION_CONNECTION = collation_name</code></span></dt>
- <dd><p>
- Selects the collation to be used for ORDER BY or GROUP BY on string
- values in the subsequent queries. Refer to <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a>
- for a list of known collation names.
- Introduced in version 2.0.1-beta.
- </p></dd><dt><span class="term"><code class="code">CHARACTER_SET_RESULTS = charset_name</code></span></dt>
- <dd><p>
- Does nothing; a placeholder to support frameworks, clients, and
- connectors that attempt to automatically enforce a charset when
- connecting to a Sphinx server.
- Introduced in version 2.0.1-beta.
- </p></dd><dt><span class="term"><code class="code">SQL_AUTO_IS_NULL = value</code></span></dt>
- <dd><p>
- Does nothing; a placeholder to support frameworks, clients, and
- connectors that attempt to automatically enforce a charset when
- connecting to a Sphinx server.
- Introduced in version 2.0.2-beta.
- </p></dd><dt><span class="term"><code class="code">SQL_MODE = value</code></span></dt>
- <dd><p>
- Does nothing; a placeholder to support frameworks, clients, and
- connectors that attempt to automatically enforce a charset when
- connecting to a Sphinx server.
- Introduced in version 2.0.2-beta.
- </p></dd><dt><span class="term"><code class="code">PROFILING = {0 | 1}</code></span></dt>
- <dd><p>
- Enables query profiling in the current session. Defaults to 0.
- See also <a class="xref" href="#sphinxql-show-profile" title="8.30. SHOW PROFILE syntax">Section 8.30, “SHOW PROFILE syntax”</a>.
- Introduced in version 2.1.1-beta.
- </p></dd></dl></div>
- <p>
- </p><p>
- Known global server variables are:
- </p><div class="variablelist"><dl><dt><span class="term"><code class="code">QUERY_LOG_FORMAT = {plain | sphinxql}</code></span></dt>
- <dd><p>
- Changes the current log format.
- Introduced in version 2.0.1-beta.
- </p></dd><dt><span class="term"><code class="code">LOG_LEVEL = {info | debug | debugv | debugvv}</code></span></dt>
- <dd><p>
- Changes the current log verboseness level.
- Introduced in version 2.0.1-beta.
- </p></dd></dl></div>
- <p>
- </p><p>
- Examples:
- </p><pre class="programlisting">
- mysql> SET autocommit=0;
- Query OK, 0 rows affected (0.00 sec)
- mysql> SET GLOBAL query_log_format=sphinxql;
- Query OK, 0 rows affected (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="8.10. SET TRANSACTION syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-set-transaction"></a>8.10. SET TRANSACTION syntax</h2></div></div></div>
- <pre class="programlisting">
- SET TRANSACTION ISOLATION LEVEL { READ UNCOMMITTED
- | READ COMMITTED
- | REPEATABLE READ
- | SERIALIZABLE }
- </pre><p>
- SET TRANSACTION statement, introduced in version 2.0.2-beta, does nothing.
- It was implemented to maintain compatibility with 3rd party MySQL client
- libraries, connectors, and frameworks that may need to run this statement
- when connecting.
- </p><p>
- Example:
- </p><pre class="programlisting">
- mysql> SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
- Query OK, 0 rows affected (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="8.11. BEGIN, COMMIT, and ROLLBACK syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-commit"></a>8.11. BEGIN, COMMIT, and ROLLBACK syntax</h2></div></div></div>
- <pre class="programlisting">
- START TRANSACTION | BEGIN
- COMMIT
- ROLLBACK
- SET AUTOCOMMIT = {0 | 1}
- </pre><p>
- BEGIN, COMMIT, and ROLLBACK statements were introduced in version 1.10-beta.
- BEGIN statement (or its START TRANSACTION alias) forcibly commits pending
- transaction, if any, and begins a new one. COMMIT statement commits the current
- transaction, making all its changes permanent. ROLLBACK statement rolls back the
- current transaction, canceling all its changes. SET AUTOCOMMIT controls the
- autocommit mode in the active session.
- </p><p>
- AUTOCOMMIT is set to 1 by default, meaning that every statement that performs
- any changes on any index is implicitly wrapped in BEGIN and COMMIT.
- </p><p>
- Transactions are limited to a single RT index, and also limited in size.
- They are atomic, consistent, overly isolated, and durable. Overly isolated
- means that the changes are not only invisible to the concurrent transactions
- but even to the current session itself.
- </p></div>
- <div class="sect1" title="8.12. BEGIN syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-begin"></a>8.12. BEGIN syntax</h2></div></div></div>
- <pre class="programlisting">
- START TRANSACTION | BEGIN
- </pre><p>
- BEGIN syntax is discussed in detail in <a class="xref" href="#sphinxql-commit" title="8.11. BEGIN, COMMIT, and ROLLBACK syntax">Section 8.11, “BEGIN, COMMIT, and ROLLBACK syntax”</a>.
- </p></div>
- <div class="sect1" title="8.13. ROLLBACK syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-rollback"></a>8.13. ROLLBACK syntax</h2></div></div></div>
- <pre class="programlisting">
- ROLLBACK
- </pre><p>
- ROLLBACK syntax is discussed in detail in <a class="xref" href="#sphinxql-commit" title="8.11. BEGIN, COMMIT, and ROLLBACK syntax">Section 8.11, “BEGIN, COMMIT, and ROLLBACK syntax”</a>.
- </p></div>
- <div class="sect1" title="8.14. CALL SNIPPETS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-call-snippets"></a>8.14. CALL SNIPPETS syntax</h2></div></div></div>
- <pre class="programlisting">
- CALL SNIPPETS(data, index, query[, opt_value AS opt_name[, ...]])
- </pre><p>
- CALL SNIPPETS statement, introduced in version 1.10-beta, builds a snippet
- from provided data and query, using specified index settings.
- </p><p>
- <code class="option">data</code> is the source data to extract a snippet from. It could be a single string,
- or the list of the strings enclosed in curly brackets.
- <code class="option">index</code> is the name of the index from which to take the text
- processing settings. <code class="option">query</code> is the full-text query to build
- snippets for. Additional options are documented in
- <a class="xref" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">Section 9.7.1, “BuildExcerpts”</a>. Usage example:
- </p><pre class="programlisting">
- CALL SNIPPETS('this is my document text', 'test1', 'hello world',
- 5 AS around, 200 AS limit);
- CALL SNIPPETS(('this is my document text','this is my another text'), 'test1', 'hello world',
- 5 AS around, 200 AS limit);
- CALL SNIPPETS(('data/doc1.txt','data/doc2.txt','/home/sphinx/doc3.txt'), 'test1', 'hello world',
- 5 AS around, 200 AS limit, 1 AS load_files);
- </pre></div>
- <div class="sect1" title="8.15. CALL KEYWORDS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-call-keywords"></a>8.15. CALL KEYWORDS syntax</h2></div></div></div>
- <pre class="programlisting">
- CALL KEYWORDS(text, index [, 1])
- </pre><p>
- CALL KEYWORDS statement, introduced in version 1.10-beta, splits text
- into particular keywords. It returns tokenized and normalized forms
- of the keywords, and, optionally, keyword statistics. Since version 2.2.2-beta
- it also returns the position of each keyword in the query and all
- forms of tokenized keywords in the case that lemmatizers were used.
- </p><p>
- <code class="option">text</code> is the text to break down to keywords.
- <code class="option">index</code> is the name of the index from which to take the text
- processing settings. <code class="option">hits</code> is an optional boolean parameter
- that specifies whether to return document and hit occurrence statistics.
- </p></div>
- <div class="sect1" title="8.16. SHOW TABLES syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-tables"></a>8.16. SHOW TABLES syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW TABLES [ LIKE pattern ]
- </pre><p>
- SHOW TABLES statement, introduced in version 2.0.1-beta, enumerates
- all currently active indexes along with their types. As of 2.0.1-beta,
- existing index types are <code class="option">local</code>, <code class="option">distributed</code>,
- and <code class="option">rt</code> respectively.
- Example:
- </p><pre class="programlisting">
- mysql> SHOW TABLES;
- +-------+-------------+
- | Index | Type |
- +-------+-------------+
- | dist1 | distributed |
- | rt | rt |
- | test1 | local |
- | test2 | local |
- +-------+-------------+
- 4 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Starting from version 2.1.1-beta, an optional LIKE clause is supported.
- Refer to <a class="xref" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">Section 8.3, “SHOW META syntax”</a> for its syntax details.
- </p><pre class="programlisting">
- mysql> SHOW TABLES LIKE '%4';
- +-------+-------------+
- | Index | Type |
- +-------+-------------+
- | dist4 | distributed |
- +-------+-------------+
- 1 row in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.17. DESCRIBE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-describe"></a>8.17. DESCRIBE syntax</h2></div></div></div>
- <pre class="programlisting">
- {DESC | DESCRIBE} index [ LIKE pattern ]
- </pre><p>
- DESCRIBE statement, introduced in version 2.0.1-beta, lists
- index columns and their associated types. Columns are document ID,
- full-text fields, and attributes. The order matches that in which
- fields and attributes are expected by INSERT and REPLACE statements.
- As of 2.0.1-beta, column types are <code class="option">field</code>,
- <code class="option">integer</code>, <code class="option">timestamp</code>,
- <code class="option">ordinal</code>, <code class="option">bool</code>,
- <code class="option">float</code>, <code class="option">bigint</code>,
- <code class="option">string</code>, and <code class="option">mva</code>.
- ID column will be typed either <code class="option">integer</code>
- or <code class="option">bigint</code> based on whether the binaries
- were built with 32-bit or 64-bit document ID support.
- Example:
- </p><pre class="programlisting">
- mysql> DESC rt;
- +---------+---------+
- | Field | Type |
- +---------+---------+
- | id | integer |
- | title | field |
- | content | field |
- | gid | integer |
- +---------+---------+
- 4 rows in set (0.00 sec)
- </pre><p>
- Starting from version 2.1.1-beta, an optional LIKE clause is supported.
- Refer to <a class="xref" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">Section 8.3, “SHOW META syntax”</a> for its syntax details.
- </p></div>
- <div class="sect1" title="8.18. CREATE FUNCTION syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-create-function"></a>8.18. CREATE FUNCTION syntax</h2></div></div></div>
- <pre class="programlisting">
- CREATE FUNCTION udf_name
- RETURNS {INT | BIGINT | FLOAT | STRING}
- SONAME 'udf_lib_file'
- </pre><p>
- CREATE FUNCTION statement, introduced in version 2.0.1-beta,
- installs a <a class="link" href="#sphinx-udfs" title="6.1. Sphinx UDFs (User Defined Functions)">user-defined function (UDF)</a>
- with the given name and type from the given library file.
- The library file must reside in a trusted
- <a class="link" href="#conf-plugin-dir" title="12.4.31. plugin_dir">plugin_dir</a> directory.
- On success, the function is available for use in all subsequent
- queries that the server receives. Example:
- </p><pre class="programlisting">
- mysql> CREATE FUNCTION avgmva RETURNS INT SONAME 'udfexample.dll';
- Query OK, 0 rows affected (0.03 sec)
- mysql> SELECT *, AVGMVA(tag) AS q from test1;
- +------+--------+---------+-----------+
- | id | weight | tag | q |
- +------+--------+---------+-----------+
- | 1 | 1 | 1,3,5,7 | 4.000000 |
- | 2 | 1 | 2,4,6 | 4.000000 |
- | 3 | 1 | 15 | 15.000000 |
- | 4 | 1 | 7,40 | 23.500000 |
- +------+--------+---------+-----------+
- </pre></div>
- <div class="sect1" title="8.19. DROP FUNCTION syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-drop-function"></a>8.19. DROP FUNCTION syntax</h2></div></div></div>
- <pre class="programlisting">
- DROP FUNCTION udf_name
- </pre><p>
- DROP FUNCTION statement, introduced in version 2.0.1-beta,
- deinstalls a <a class="link" href="#sphinx-udfs" title="6.1. Sphinx UDFs (User Defined Functions)">user-defined function (UDF)</a>
- with the given name. On success, the function is no longer available
- for use in subsequent queries. Pending concurrent queries will not be
- affected and the library unload, if necessary, will be postponed
- until those queries complete. Example:
- </p><pre class="programlisting">
- mysql> DROP FUNCTION avgmva;
- Query OK, 0 rows affected (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.20. SHOW VARIABLES syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-variables"></a>8.20. SHOW VARIABLES syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW [{GLOBAL | SESSION}] VARIABLES [WHERE variable_name='xxx']
- </pre><p><span class="bold"><strong>SHOW VARIABLES</strong></span> statement was added in version 2.0.1-beta
- to improve compatibility with 3rd party MySQL connectors and frameworks
- that automatically execute this statement. The WHERE option was added in
- version 2.1.1-beta.
- </p><p>
- In version 2.0.1-beta, it did nothing.
- </p><p>
- Starting from version 2.0.2-beta, it returns the current values of
- a few server-wide variables. Also, support for GLOBAL and SESSION clauses
- was added.
- </p><pre class="programlisting">
- mysql> SHOW GLOBAL VARIABLES;
- +----------------------+----------+
- | Variable_name | Value |
- +----------------------+----------+
- | autocommit | 1 |
- | collation_connection | libc_ci |
- | query_log_format | sphinxql |
- | log_level | info |
- +----------------------+----------+
- 4 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Starting from 2.1.1-beta, support for WHERE variable_name clause was added,
- to help certain connectors.
- </p><p>
- </p></div>
- <div class="sect1" title="8.21. SHOW COLLATION syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-collation"></a>8.21. SHOW COLLATION syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW COLLATION
- </pre><p>
- Added in version 2.0.1-beta, this is currently a placeholder
- query that does nothing and reports success. That is in order
- to keep compatibility with frameworks and connectors that
- automatically execute this statement.
- </p><pre class="programlisting">
- mysql> SHOW COLLATION;
- Query OK, 0 rows affected (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.22. SHOW CHARACTER SET syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-character-set"></a>8.22. SHOW CHARACTER SET syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW CHARACTER SET
- </pre><p>
- Added in version 2.1.1-beta, this is currently a placeholder
- query that does nothing and reports that a UTF-8 character set
- is available. It was added in order
- to keep compatibility with frameworks and connectors that
- automatically execute this statement.
- </p><pre class="programlisting">
- mysql> SHOW CHARACTER SET;
- +---------+---------------+-------------------+--------+
- | Charset | Description | Default collation | Maxlen |
- +---------+---------------+-------------------+--------+
- | utf8 | UTF-8 Unicode | utf8_general_ci | 3 |
- +---------+---------------+-------------------+--------+
- 1 row in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.23. UPDATE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-update"></a>8.23. UPDATE syntax</h2></div></div></div>
- <pre class="programlisting">
- UPDATE index SET col1 = newval1 [, ...] WHERE where_condition [OPTION opt_name = opt_value [, ...]]
- </pre><p>
- UPDATE statement was added in version 2.0.1-beta. Multiple attributes
- and values can be specified in a single statement. Both RT and disk indexes
- are supported.
- </p><p>
- As of version 2.0.2-beta, all attributes types (int, bigint, float, MVA),
- except for strings and JSON attributes, can be dynamically updated.
- Previously, some of these types were not supported.
- </p><p>
- <code class="code">where_condition</code> (also added in 2.0.2-beta) has the same syntax
- as in the SELECT statement (see <a class="xref" href="#sphinxql-select" title="8.1. SELECT syntax">Section 8.1, “SELECT syntax”</a> for details).
- </p><p>
- When assigning the out-of-range values to 32-bit attributes, they
- will be trimmed to their lower 32 bits without a prompt. For example,
- if you try to update the 32-bit unsigned int with a value of 4294967297,
- the value of 1 will actually be stored, because the lower 32 bits of
- 4294967297 (0x100000001 in hex) amount to 1 (0x00000001 in hex).
- </p><p>
- MVA values sets for updating (and also for INSERT or REPLACE, refer
- to <a class="xref" href="#sphinxql-insert" title="8.6. INSERT and REPLACE syntax">Section 8.6, “INSERT and REPLACE syntax”</a>) must be specified as comma-separated
- lists in parentheses. To erase the MVA value, just assign () to it.
- </p><p>
- Starting from 2.2.1-beta version UPDATE can be used to update integer and float
- values in JSON array. No strings, arrays and other types yet.
- </p><pre class="programlisting">
- mysql> UPDATE myindex SET enabled=0 WHERE id=123;
- Query OK, 1 rows affected (0.00 sec)
- mysql> UPDATE myindex
- SET bigattr=-100000000000,
- fattr=3465.23,
- mvattr1=(3,6,4),
- mvattr2=()
- WHERE MATCH('hehe') AND enabled=1;
- Query OK, 148 rows affected (0.01 sec)
- </pre><p>OPTION clause. This is a Sphinx specific extension that
- lets you control a number of per-update options. The syntax is:
- </p><pre class="programlisting">
- OPTION <optionname>=<value> [ , ... ]
- </pre><p>
- The list of allowed options are the same as for <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT</a> statement. Specifically for UPDATE
- statement you can use these options:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>'ignore_nonexistent_columns' - this option, added in version 2.1.1-beta, points that the update will silently
- ignore any warnings about trying to update a column which is not exists in current index schema.
- </p><p>'strict' - this option is used while updating JSON attributes. As of
- 2.2.1-beta, it's possible to update just some types in JSON. And if you
- try to update, for example, array type you'll get error with 'strict' option
- on and warning otherwise.</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="8.24. ALTER syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-attach"></a>8.24. ALTER syntax</h2></div></div></div>
- <pre class="programlisting">
- ALTER TABLE index {ADD|DROP} COLUMN column_name [{INTEGER|BIGINT|FLOAT|BOOL|MULTI|MULTI64|JSON|STRING}]
- </pre><p>
- The ALTER statement was added in version 2.2.1-beta. As of 2.2.1-beta, it supports adding one
- attribute at a time for both plain and RT indexes. The int, bigint, float, bool, multi-valued,
- multi-valued 64bit, json and string attribute types are supported. Support for multi, multi64,
- json and string attributes was added in 2.2.2-beta. As of 2.2.2-beta, you can add json and
- string attributes, but you cannot modify their values. The ability to remove attributes was
- added in 2.2.2-beta.
- </p><p>
- Implementation details. As of 2.2.1-beta, the querying of an index is
- impossible (because of a write lock) while adding a column. This may change
- in the future. The newly created attribute values are set to 0. ALTER will
- not work for distributed indexes and indexes without any attributes.
- DROP COLUMN will fail if an index has only one attribute.
- </p><pre class="programlisting">
- ALTER RTINDEX index RECONFIGURE
- </pre><p>
- As of 2.2.3-beta, ALTER can also reconfigure an existing RT index, so that
- new tokenization, morphology, and other text processing settings from sphinx.conf
- take effect on the newly INSERT-ed rows, while retaining the existing rows
- as they were. Internally, it forcibly saves the current RAM chunk as a new
- disk chunk, and adjusts the index header, so that the new rows are tokenized
- using the new rules. Note that as the queries are currently parsed separately
- for every disk chunk, this might result in warnings regarding the keyword sets
- mismatch.
- </p><pre class="programlisting">
- mysql> desc plain;
- +------------+-----------+
- | Field | Type |
- +------------+-----------+
- | id | bigint |
- | text | field |
- | group_id | uint |
- | date_added | timestamp |
- +------------+-----------+
- 4 rows in set (0.01 sec)
- mysql> alter table plain add column test integer;
- Query OK, 0 rows affected (0.04 sec)
- mysql> desc plain;
- +------------+-----------+
- | Field | Type |
- +------------+-----------+
- | id | bigint |
- | text | field |
- | group_id | uint |
- | date_added | timestamp |
- | test | uint |
- +------------+-----------+
- 5 rows in set (0.00 sec)
- mysql> alter table plain drop column group_id;
- Query OK, 0 rows affected (0.01 sec)
- mysql> desc plain;
- +------------+-----------+
- | Field | Type |
- +------------+-----------+
- | id | bigint |
- | text | field |
- | date_added | timestamp |
- | test | uint |
- +------------+-----------+
- 4 rows in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.25. ATTACH INDEX syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-attach-index"></a>8.25. ATTACH INDEX syntax</h2></div></div></div>
- <pre class="programlisting">
- ATTACH INDEX diskindex TO RTINDEX rtindex
- </pre><p>
- ATTACH INDEX statement, added in version 2.0.2-beta, lets you move
- data from a regular disk index to a RT index.
- </p><p>
- After a successful ATTACH, the data originally stored in the source
- disk index becomes a part of the target RT index, and the source disk
- index becomes unavailable (until the next rebuild). ATTACH does not
- result in any index data changes. Basically, it just renames the files
- (making the source index a new disk chunk of the target RT index),
- and updates the metadata. So it is a generally quick operation
- which might (frequently) complete as fast as under a second.
- </p><p>
- Note that when an index is attached to an empty RT index, the fields,
- attributes, and text processing settings (tokenizer, wordforms, etc) from
- the <span class="emphasis"><em>source</em></span> index are copied over and take effect.
- The respective parts of the RT index definition from the configuration
- file will be ignored.
- </p><p>
- As of 2.0.2-beta, ATTACH INDEX comes with a number of restrictions.
- Most notably, the target RT index is currently required to be empty,
- making ATTACH INDEX a one-time conversion operation only. Those restrictions
- may be lifted in future releases, as we add the needed functionality to the
- RT indexes. The complete list is as follows.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Target RT index needs to be empty. (See <a class="xref" href="#sphinxql-truncate-rtindex" title="8.28. TRUNCATE RTINDEX syntax">Section 8.28, “TRUNCATE RTINDEX syntax”</a>)</p></li>
- <li class="listitem"><p>Source disk index needs to have index_sp=0, boundary_step=0, stopword_step=1.</p></li>
- <li class="listitem"><p>Source disk index needs to have an empty index_zones setting.</p></li>
- </ul></div>
- <p>
- </p><pre class="programlisting">
- mysql> DESC rt;
- +-----------+---------+
- | Field | Type |
- +-----------+---------+
- | id | integer |
- | testfield | field |
- | testattr | uint |
- +-----------+---------+
- 3 rows in set (0.00 sec)
- mysql> SELECT * FROM rt;
- Empty set (0.00 sec)
- mysql> SELECT * FROM disk WHERE MATCH('test');
- +------+--------+----------+------------+
- | id | weight | group_id | date_added |
- +------+--------+----------+------------+
- | 1 | 1304 | 1 | 1313643256 |
- | 2 | 1304 | 1 | 1313643256 |
- | 3 | 1304 | 1 | 1313643256 |
- | 4 | 1304 | 1 | 1313643256 |
- +------+--------+----------+------------+
- 4 rows in set (0.00 sec)
- mysql> ATTACH INDEX disk TO RTINDEX rt;
- Query OK, 0 rows affected (0.00 sec)
- mysql> DESC rt;
- +------------+-----------+
- | Field | Type |
- +------------+-----------+
- | id | integer |
- | title | field |
- | content | field |
- | group_id | uint |
- | date_added | timestamp |
- +------------+-----------+
- 5 rows in set (0.00 sec)
- mysql> SELECT * FROM rt WHERE MATCH('test');
- +------+--------+----------+------------+
- | id | weight | group_id | date_added |
- +------+--------+----------+------------+
- | 1 | 1304 | 1 | 1313643256 |
- | 2 | 1304 | 1 | 1313643256 |
- | 3 | 1304 | 1 | 1313643256 |
- | 4 | 1304 | 1 | 1313643256 |
- +------+--------+----------+------------+
- 4 rows in set (0.00 sec)
- mysql> SELECT * FROM disk WHERE MATCH('test');
- ERROR 1064 (42000): no enabled local indexes to search
- </pre></div>
- <div class="sect1" title="8.26. FLUSH RTINDEX syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-flush-rtindex"></a>8.26. FLUSH RTINDEX syntax</h2></div></div></div>
- <pre class="programlisting">
- FLUSH RTINDEX rtindex
- </pre><p>
- FLUSH RTINDEX statement, added in version 2.0.2-beta, forcibly
- flushes RT index RAM chunk contents to disk.
- </p><p>
- Backing up a RT index is as simple as copying over its data files,
- followed by the binary log. However, recovering from that backup means
- that all the transactions in the log since the last successful RAM chunk
- write would need to be replayed. Those writes normally happen either
- on a clean shutdown, or periodically with a (big enough!) interval
- between writes specified in
- <a class="link" href="#conf-rt-flush-period" title="12.4.33. rt_flush_period">rt_flush_period</a> directive.
- So such a backup made at an arbitrary point in time just might end up
- with way too much binary log data to replay.
- </p><p>
- FLUSH RTINDEX forcibly writes the RAM chunk contents to disk,
- and also causes the subsequent cleanup of (now-redundant) binary
- log files. Thus, recovering from a backup made just after
- FLUSH RTINDEX should be almost instant.
- </p><pre class="programlisting">
- mysql> FLUSH RTINDEX rt;
- Query OK, 0 rows affected (0.05 sec)
- </pre></div>
- <div class="sect1" title="8.27. FLUSH RAMCHUNK syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-flush-ramchunk"></a>8.27. FLUSH RAMCHUNK syntax</h2></div></div></div>
- <pre class="programlisting">
- FLUSH RAMCHUNK rtindex
- </pre><p>
- FLUSH RAMCHUNK statement, added in version 2.1.2-release, forcibly
- creates a new disk chunk in a RT index.
- </p><p>
- Normally, RT index would flush and convert the contents of the
- RAM chunk into a new disk chunk automatically, once the RAM chunk
- reaches the maximum allowed
- <a class="link" href="#conf-rt-mem-limit" title="12.2.49. rt_mem_limit">rt_mem_limit</a> size.
- However, for debugging and testing it might be useful to forcibly
- create a new disk chunk, and FLUSH RAMCHUNK statement does exactly that.
- </p><p>
- Note that using FLUSH RAMCHUNK increases RT index fragmentation.
- Most likely, you want to use FLUSH RTINDEX instead. We suggest that
- you abstain from using this statement unless you're absolutely sure
- what you're doing.
- </p><pre class="programlisting">
- mysql> FLUSH RAMCHUNK rt;
- Query OK, 0 rows affected (0.05 sec)
- </pre></div>
- <div class="sect1" title="8.28. TRUNCATE RTINDEX syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-truncate-rtindex"></a>8.28. TRUNCATE RTINDEX syntax</h2></div></div></div>
- <pre class="programlisting">
- TRUNCATE RTINDEX rtindex
- </pre><p>
- TRUNCATE RTINDEX statement, added in version 2.1.1-beta, clears
- the RT index completely. It disposes the in-memory data, unlinks
- all the index data files, and releases the associated binary logs.
- </p><pre class="programlisting">
- mysql> TRUNCATE RTINDEX rt;
- Query OK, 0 rows affected (0.05 sec)
- </pre><p>
- You may want to use this if you are using RT indices as "delta index" files; when
- you build the main index, you need to wipe the delta index, and thus TRUNCATE RTINDEX.
- You also need to use this command before attaching an index; see <a class="xref" href="#sphinxql-attach-index" title="8.25. ATTACH INDEX syntax">Section 8.25, “ATTACH INDEX syntax”</a>.
- </p></div>
- <div class="sect1" title="8.29. SHOW AGENT STATUS"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-agent-status"></a>8.29. SHOW AGENT STATUS</h2></div></div></div>
- <pre class="programlisting">
- SHOW AGENT ['agent'|'index'|index] STATUS [ LIKE pattern ]
- </pre><p>
- Displays the statistic of <a class="link" href="#conf-agent" title="12.2.31. agent">remote
- agents</a> or distributed index. It includes the values like the age of the last
- request, last answer, the number of different kind of errors and
- successes, etc. The statistic is shown for every agent for last 1, 5
- and 15 intervals, each of them of <a class="link" href="#conf-ha-period-karma" title="12.4.40. ha_period_karma">ha_period_karma</a> seconds.
- The command exists only in sphinxql.
- </p><pre class="programlisting">
- mysql> SHOW AGENT STATUS;
- +------------------------------------+----------------------------+
- | Key | Value |
- +------------------------------------+----------------------------+
- | status_period_seconds | 60 |
- | status_stored_periods | 15 |
- | ag_0_hostname | 192.168.0.202:6713 |
- | ag_0_references | 2 |
- | ag_0_lastquery | 0.41 |
- | ag_0_lastanswer | 0.19 |
- | ag_0_lastperiodmsec | 222 |
- | ag_0_errorsarow | 0 |
- | ag_0_1periods_query_timeouts | 0 |
- | ag_0_1periods_connect_timeouts | 0 |
- | ag_0_1periods_connect_failures | 0 |
- | ag_0_1periods_network_errors | 0 |
- | ag_0_1periods_wrong_replies | 0 |
- | ag_0_1periods_unexpected_closings | 0 |
- | ag_0_1periods_warnings | 0 |
- | ag_0_1periods_succeeded_queries | 27 |
- | ag_0_1periods_msecsperquery | 232.31 |
- | ag_0_5periods_query_timeouts | 0 |
- | ag_0_5periods_connect_timeouts | 0 |
- | ag_0_5periods_connect_failures | 0 |
- | ag_0_5periods_network_errors | 0 |
- | ag_0_5periods_wrong_replies | 0 |
- | ag_0_5periods_unexpected_closings | 0 |
- | ag_0_5periods_warnings | 0 |
- | ag_0_5periods_succeeded_queries | 146 |
- | ag_0_5periods_msecsperquery | 231.83 |
- | ag_1_hostname | 192.168.0.202:6714 |
- | ag_1_references | 2 |
- | ag_1_lastquery | 0.41 |
- | ag_1_lastanswer | 0.19 |
- | ag_1_lastperiodmsec | 220 |
- | ag_1_errorsarow | 0 |
- | ag_1_1periods_query_timeouts | 0 |
- | ag_1_1periods_connect_timeouts | 0 |
- | ag_1_1periods_connect_failures | 0 |
- | ag_1_1periods_network_errors | 0 |
- | ag_1_1periods_wrong_replies | 0 |
- | ag_1_1periods_unexpected_closings | 0 |
- | ag_1_1periods_warnings | 0 |
- | ag_1_1periods_succeeded_queries | 27 |
- | ag_1_1periods_msecsperquery | 231.24 |
- | ag_1_5periods_query_timeouts | 0 |
- | ag_1_5periods_connect_timeouts | 0 |
- | ag_1_5periods_connect_failures | 0 |
- | ag_1_5periods_network_errors | 0 |
- | ag_1_5periods_wrong_replies | 0 |
- | ag_1_5periods_unexpected_closings | 0 |
- | ag_1_5periods_warnings | 0 |
- | ag_1_5periods_succeeded_queries | 146 |
- | ag_1_5periods_msecsperquery | 230.85 |
- +------------------------------------+----------------------------+
- 50 rows in set (0.01 sec)
- </pre><p>
- Starting from version 2.1.1-beta, an optional LIKE clause is supported.
- Refer to <a class="xref" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">Section 8.3, “SHOW META syntax”</a> for its syntax details.
- </p><pre class="programlisting">
- mysql> SHOW AGENT STATUS LIKE '%5period%msec%';
- +-----------------------------+--------+
- | Key | Value |
- +-----------------------------+--------+
- | ag_0_5periods_msecsperquery | 234.72 |
- | ag_1_5periods_msecsperquery | 233.73 |
- | ag_2_5periods_msecsperquery | 343.81 |
- +-----------------------------+--------+
- 3 rows in set (0.00 sec)
- </pre><p>
- You can specify a particular agent by its address. In this case only
- that agent's data will be displayed. Also, 'agent_' prefix will be used
- instead of 'ag_N_':
- </p><pre class="programlisting">
- mysql> SHOW AGENT '192.168.0.202:6714' STATUS LIKE '%15periods%';
- +-------------------------------------+--------+
- | Variable_name | Value |
- +-------------------------------------+--------+
- | agent_15periods_query_timeouts | 0 |
- | agent_15periods_connect_timeouts | 0 |
- | agent_15periods_connect_failures | 0 |
- | agent_15periods_network_errors | 0 |
- | agent_15periods_wrong_replies | 0 |
- | agent_15periods_unexpected_closings | 0 |
- | agent_15periods_warnings | 0 |
- | agent_15periods_succeeded_queries | 439 |
- | agent_15periods_msecsperquery | 231.73 |
- +-------------------------------------+--------+
- 9 rows in set (0.00 sec)
- </pre><p>Finally, you can check the status of the agents in a specific
- distributed index. It can be done with a SHOW AGENT index STATUS statement.
- That statement shows the index HA status (ie. whether or not it uses
- agent mirrors at all), and then the mirror information (specifically:
- address, blackhole and persistent flags, and the mirror selection
- probability used when one of the
- <a class="link" href="#conf-ha-strategy" title="12.2.60. ha_strategy">weighted-probability strategies</a>
- is in effect).
- </p><pre class="programlisting">
- mysql> SHOW AGENT dist_index STATUS;
- +--------------------------------------+--------------------------------+
- | Variable_name | Value |
- +--------------------------------------+--------------------------------+
- | dstindex_1_is_ha | 1 |
- | dstindex_1mirror1_id | 192.168.0.202:6713:loc |
- | dstindex_1mirror1_probability_weight | 0.372864 |
- | dstindex_1mirror1_is_blackhole | 0 |
- | dstindex_1mirror1_is_persistent | 0 |
- | dstindex_1mirror2_id | 192.168.0.202:6714:loc |
- | dstindex_1mirror2_probability_weight | 0.374635 |
- | dstindex_1mirror2_is_blackhole | 0 |
- | dstindex_1mirror2_is_persistent | 0 |
- | dstindex_1mirror3_id | dev1.sphinxsearch.com:6714:loc |
- | dstindex_1mirror3_probability_weight | 0.252501 |
- | dstindex_1mirror3_is_blackhole | 0 |
- | dstindex_1mirror3_is_persistent | 0 |
- +--------------------------------------+--------------------------------+
- 13 rows in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.30. SHOW PROFILE syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-profile"></a>8.30. SHOW PROFILE syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW PROFILE
- </pre><p>
- SHOW PROFILE statement, added in version 2.1.1-beta, shows a detailed
- execution profile of the previous SQL statement executed in the current
- SphinxQL session. Also, profiling must be enabled in the current session
- <span class="bold"><strong>before</strong></span> running the statement to be instrumented. That can be done
- with a <code class="code">SET profiling=1</code> statement. By default, profiling
- is disabled to avoid potential performance implications, and therefore
- the profile will be empty.
- </p><p>
- Here's a complete instrumentation example:
- </p><pre class="programlisting">
- mysql> SET profiling=1;
- Query OK, 0 rows affected (0.00 sec)
- mysql> SELECT id FROM lj WHERE MATCH('the test') LIMIT 1;
- +--------+
- | id |
- +--------+
- | 946418 |
- +--------+
- 1 row in set (0.05 sec)
- mysql> SHOW PROFILE;
- +--------------+----------+----------+
- | Status | Duration | Switches |
- +--------------+----------+----------+
- | unknown | 0.000610 | 6 |
- | net_read | 0.000007 | 1 |
- | dist_connect | 0.000036 | 1 |
- | sql_parse | 0.000048 | 1 |
- | dict_setup | 0.000001 | 1 |
- | parse | 0.000023 | 1 |
- | transforms | 0.000002 | 1 |
- | init | 0.000401 | 3 |
- | open | 0.000104 | 1 |
- | read_docs | 0.001570 | 71 |
- | read_hits | 0.003936 | 222 |
- | get_docs | 0.029837 | 1347 |
- | get_hits | 0.000548 | 1433 |
- | filter | 0.000619 | 1274 |
- | rank | 0.009892 | 2909 |
- | sort | 0.001562 | 52 |
- | finalize | 0.000250 | 1 |
- | dist_wait | 0.000000 | 1 |
- | aggregate | 0.000145 | 1 |
- | net_write | 0.000031 | 1 |
- +--------------+----------+----------+
- 20 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- Status column briefly describes where exactly (in which state)
- was the time spent. Duration column shows the wall clock time,
- in seconds. Switches column displays the number of times query
- engine changed to the given state. Those are just logical engine
- state switches and <span class="bold"><strong>not</strong></span> any OS level context switches nor
- function calls (even though some of the sections can actually map
- to function calls) and they do <span class="bold"><strong>not</strong></span> have any direct effect
- on the performance. In a sense, number of switches is just a number
- of times when the respective instrumentation point was hit.
- </p><p>
- States in the profile are returned in a prerecorded order
- that roughly maps (but is <span class="bold"><strong>not</strong></span> identical) to the actual
- query order.
- </p><p>
- A list of states may (and will) vary over time, as we refine
- the states. Here's a brief description of the currently profiled
- states.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><span class="bold"><strong>unknown</strong></span>, generic catch-all state. Accounts for both
- not-yet-instrumented code, or just small miscellaneous tasks that do not
- really belong in any other state, but are too small to deserve their own state.
- </li>
- <li class="listitem"><span class="bold"><strong>net_read</strong></span>, reading the query from the network (that is, the application).</li>
- <li class="listitem"><span class="bold"><strong>io</strong></span>, generic file IO time.</li>
- <li class="listitem"><span class="bold"><strong>dist_connect</strong></span>, connecting to remote agents in the distributed index case.</li>
- <li class="listitem"><span class="bold"><strong>sql_parse</strong></span>, parsing the SphinxQL syntax.</li>
- <li class="listitem"><span class="bold"><strong>dict_setup</strong></span>, dictionary and tokenizer setup.</li>
- <li class="listitem"><span class="bold"><strong>parse</strong></span>, parsing the full-text query syntax.</li>
- <li class="listitem"><span class="bold"><strong>transforms</strong></span>, full-text query transformations (wildcard and other expansions, simplification, etc).</li>
- <li class="listitem"><span class="bold"><strong>init</strong></span>, initializing the query evaluation.</li>
- <li class="listitem"><span class="bold"><strong>open</strong></span>, opening the index files.</li>
- <li class="listitem"><span class="bold"><strong>read_docs</strong></span>, IO time spent reading document lists.</li>
- <li class="listitem"><span class="bold"><strong>read_hits</strong></span>, IO time spent reading keyword positions.</li>
- <li class="listitem"><span class="bold"><strong>get_docs</strong></span>, computing the matching documents.</li>
- <li class="listitem"><span class="bold"><strong>get_hits</strong></span>, computing the matching positions.</li>
- <li class="listitem"><span class="bold"><strong>filter</strong></span>, filtering the full-text matches.</li>
- <li class="listitem"><span class="bold"><strong>rank</strong></span>, computing the relevance rank.</li>
- <li class="listitem"><span class="bold"><strong>sort</strong></span>, sorting the matches.</li>
- <li class="listitem"><span class="bold"><strong>finalize</strong></span>, finalizing the per-index search result set (last stage expressions, etc).</li>
- <li class="listitem"><span class="bold"><strong>dist_wait</strong></span>, waiting for the remote results from the agents in the distributed index case.</li>
- <li class="listitem"><span class="bold"><strong>aggregate</strong></span>, aggregating multiple result sets.</li>
- <li class="listitem"><span class="bold"><strong>net_write</strong></span>, writing the result set to the network.</li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="8.31. SHOW INDEX STATUS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-index-status"></a>8.31. SHOW INDEX STATUS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW INDEX index_name STATUS
- </pre><p>
- Added in version 2.1.1-beta. Displays various per-index statistics. Currently,
- those include:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><span class="bold"><strong>indexed_documents</strong></span> and <span class="bold"><strong>indexed_bytes</strong></span>, number of
- the documents indexed and their text size in bytes, respectively.</li>
- <li class="listitem"><span class="bold"><strong>field_tokens_XXX</strong></span>, sums of per-field lengths (in tokens)
- over the entire index (that is used internally in BM25A and BM25F functions
- for ranking purposes). Only available for indexes built with index_field_lengths=1.</li>
- <li class="listitem"><span class="bold"><strong>ram_bytes</strong></span>, total size (in bytes) of the RAM-resident
- index portion.
- </li>
- </ul></div>
- <p>
- </p><pre class="programlisting">
- mysql> SHOW INDEX lj STATUS;
- +--------------------+-------------+
- | Variable_name | Value |
- +--------------------+-------------+
- | index_type | disk |
- | indexed_documents | 2495219 |
- | indexed_bytes | 10380483879 |
- | field_tokens_title | 6999145 |
- | field_tokens_body | 1501825050 |
- | total_tokens | 1508824195 |
- | ram_bytes | 305963599 |
- | disk_bytes | 5455804365 |
- | mem_limit | 536870912 |
- +--------------------+-------------+
- 8 rows in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.32. SHOW INDEX SETTINGS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-index-settings"></a>8.32. SHOW INDEX SETTINGS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW INDEX index_name[.N | CHUNK N] SETTINGS
- </pre><p>
- Displays per-index settings in a <code class="filename">sphinx.conf</code> compliant
- file format, similar to the <a class="link" href="#ref-indextool" title="7.4. indextool command reference">--dumpconfig</a>
- option of the indextool. The report provides a breakdown of all the index
- settings, including tokenizer and dictionary options. You may also specify
- a particular <a class="link" href="#conf-rt-mem-limit" title="12.2.49. rt_mem_limit">chunk number</a>
- for the RT indexes.
- </p></div>
- <div class="sect1" title="8.33. OPTIMIZE INDEX syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-optimize-index"></a>8.33. OPTIMIZE INDEX syntax</h2></div></div></div>
- <pre class="programlisting">
- OPTIMIZE INDEX index_name
- </pre><p>
- Available since version 2.1.1-beta, OPTIMIZE statement enqueues
- a RT index for optimization in a background thread.
- </p><p>
- Over time, RT indexes can grow fragmented into many disk chunks
- and/or tainted with deleted, but unpurged data, impacting search
- performance. When that happens, they can be optimized. Basically,
- the optimization pass merges together disk chunks pairs, purging
- off documents suppressed by K-list as it goes.
- </p><p>
- That is a lengthy and IO intensive process, so to limit the
- impact, all the actual merge work is executed serially in
- a special background thread, and the OPTIMIZE statement simply
- adds a job to its queue. Currently, there is no way to check
- the index or queue status (that might be added in the future
- to the SHOW INDEX STATUS and SHOW STATUS statements respectively).
- The optimization thread can be IO-throttled, you can control the
- maximum number of IOs per second and the maximum IO size
- with <a class="link" href="#conf-rt-merge-iops" title="12.4.42. rt_merge_iops">rt_merge_iops</a>
- and <a class="link" href="#conf-rt-merge-maxiosize" title="12.4.43. rt_merge_maxiosize">rt_merge_maxiosize</a>
- directives respectively. The optimization jobs queue is lost
- on daemon crash.
- </p><p>
- The RT index being optimized stays online and available
- for both searching and updates at (almost) all times during
- the optimization. It gets locked (very) briefly every time
- that a pair of disk chunks is merged successfully, to rename
- the old and the new files, and update the index header.
- </p><p>
- At the moment, OPTIMIZE needs to be issued manually,
- the indexes will <span class="emphasis"><em>not</em></span> be optimized
- automatically. That might change in the future releases.
- </p><pre class="programlisting">
- mysql> OPTIMIZE INDEX rt;
- Query OK, 0 rows affected (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.34. SHOW PLAN syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-plan"></a>8.34. SHOW PLAN syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW PLAN
- </pre><p>
- SHOW PLAN statement, added in 2.1.2-release, displays the execution plan
- of the previous SELECT statement. The plan gets generated and stored
- during the actual execution, so profiling must be enabled in the current
- session <span class="bold"><strong>before</strong></span> running that statement. That can be done
- with a <code class="code">SET profiling=1</code> statement.
- </p><p>
- Here's a complete instrumentation example:
- </p><pre class="programlisting">
- mysql> SET profiling=1 \G
- Query OK, 0 rows affected (0.00 sec)
- mysql> SELECT id FROM lj WHERE MATCH('the i') LIMIT 1 \G
- *************************** 1. row ***************************
- id: 39815
- 1 row in set (1.53 sec)
- mysql> SHOW PLAN \G
- *************************** 1. row ***************************
- Variable: transformed_tree
- Value: AND(
- AND(KEYWORD(the, querypos=1)),
- AND(KEYWORD(i, querypos=2)))
- 1 row in set (0.00 sec)
- </pre><p>
- And here's a less trivial example that shows how the actually
- evaluated query tree can be rather different from the original one
- because of expansions and other transformations:
- </p><pre class="programlisting">
- mysql> SELECT * FROM test WHERE MATCH('@title abc* @body hey') \G SHOW PLAN \G
- ...
- *************************** 1. row ***************************
- Variable: transformed_tree
- Value: AND(
- OR(fields=(title), KEYWORD(abcx, querypos=1, expanded), KEYWORD(abcm, querypos=1, expanded)),
- AND(fields=(body), KEYWORD(hey, querypos=2)))
- 1 row in set (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="8.35. SHOW DATABASES syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-databases"></a>8.35. SHOW DATABASES syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW DATABASES
- </pre><p>
- Added in 2.2.1-beta. This is a dummy statement to support MySQL Workbench
- and other clients that require it. Currently, it does absolutely nothing.
- </p></div>
- <div class="sect1" title="8.36. CREATE PLUGIN syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-create-plugin"></a>8.36. CREATE PLUGIN syntax</h2></div></div></div>
- <pre class="programlisting">
- CREATE PLUGIN plugin_name TYPE 'plugin_type' SONAME 'plugin_library'
- </pre><p>
- Added in 2.2.2-beta. Loads the given library (if it is not loaded yet) and loads
- the specified plugin from it. As of 2.2.2-beta, the known plugin types are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>ranker</p></li>
- <li class="listitem"><p>index_token_filter</p></li>
- <li class="listitem"><p>query_token_filter</p></li>
- </ul></div>
- <p>
- Refer to <a class="xref" href="#sphinx-plugins" title="6.2. Sphinx plugins">Section 6.2, “Sphinx plugins”</a> for more information regarding
- writing the plugins.
- </p><pre class="programlisting">
- mysql> CREATE PLUGIN myranker TYPE 'ranker' SONAME 'myplugins.so';
- Query OK, 0 rows affected (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.37. DROP PLUGIN syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-drop-plugin"></a>8.37. DROP PLUGIN syntax</h2></div></div></div>
- <pre class="programlisting">
- DROP PLUGIN plugin_name TYPE 'plugin_type'
- </pre><p>
- Added in 2.2.2-beta. Markes the specified plugin for unloading.
- The unloading is <span class="bold"><strong>not</strong></span> immediate, because the concurrent queries
- might be using it. However, after a DROP new queries will not be able
- to use it. Then, once all the currently executing queries using it
- are completed, the plugin will be unloaded. Once all the plugins
- from the given library are unloaded, the library is also automatically
- unloaded.
- </p><pre class="programlisting">
- mysql> DROP PLUGIN myranker TYPE 'ranker';
- Query OK, 0 rows affected (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.38. SHOW PLUGINS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-show-plugins"></a>8.38. SHOW PLUGINS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW PLUGINS
- </pre><p>
- Added in 2.2.2-beta. Displays all the loaded plugins and UDFs.
- "Type" column should be one of the udf, ranker, index_token_filter,
- or query_token_filter. "Users" column is the number of thread that
- are currently using that plugin in a query. "Extra" column is intended
- for various additional plugin-type specific information; currently,
- it shows the return type for the UDFs and is empty for all the other
- plugin types.
- </p><pre class="programlisting">
- mysql> SHOW PLUGINS;
- +------+----------+----------------+-------+-------+
- | Type | Name | Library | Users | Extra |
- +------+----------+----------------+-------+-------+
- | udf | sequence | udfexample.dll | 0 | INT |
- +------+----------+----------------+-------+-------+
- 1 row in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.39. SHOW THREADS syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-threads"></a>8.39. SHOW THREADS syntax</h2></div></div></div>
- <pre class="programlisting">
- SHOW THREADS [ OPTION columns=width ]
- </pre><p>
- SHOW THREADS statement, introduced in version 2.2.2-beta, lists all
- currently active client threads, not counting system threads.
- It returns a table with columns that describe:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><span class="bold"><strong>thread id</strong></span></li>
- <li class="listitem"><span class="bold"><strong>connection protocol</strong></span>, possible values are sphinxapi and sphinxql</li>
- <li class="listitem"><span class="bold"><strong>thread state</strong></span>, possible values are handshake, net_read,
- net_write, query, net_idle</li>
- <li class="listitem"><span class="bold"><strong>time</strong></span> since the current state was changed (in seconds,
- with microsecond precision)</li>
- <li class="listitem"><span class="bold"><strong>information</strong></span> about queries</li>
- </ul></div>
- <p>
- The 'Info' column will be cut at the width you've specified in the
- 'columns=width' option (notice the third row in the example table below).
- This column will contain raw SphinxQL queries and, if there are
- API queries, full text syntax and comments will be displayed.
- With an API-snippet, the data size will be displayed along with the query.
- </p><pre class="programlisting">
- mysql> SHOW THREADS OPTION columns=50;
- +------+----------+-------+----------+----------------------------------------------------+
- | Tid | Proto | State | Time | Info |
- +------+----------+-------+----------+----------------------------------------------------+
- | 5168 | sphinxql | query | 0.000002 | show threads option columns=50 |
- | 5175 | sphinxql | query | 0.000002 | select * from rt where match ( 'the box' ) |
- | 1168 | sphinxql | query | 0.000002 | select * from rt where match ( 'the box and faximi |
- +------+----------+-------+----------+----------------------------------------------------+
- 3 row in set (0.00 sec)
- </pre></div>
- <div class="sect1" title="8.40. Multi-statement queries"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-multi-queries"></a>8.40. Multi-statement queries</h2></div></div></div>
- <p>
- Starting version 2.0.1-beta, SphinxQL supports multi-statement
- queries, or batches. Possible inter-statement optimizations described
- in <a class="xref" href="#multi-queries" title="5.11. Multi-queries">Section 5.11, “Multi-queries”</a> do apply to SphinxQL just as well.
- The batched queries should be separated by a semicolon. Your MySQL
- client library needs to support MySQL multi-query mechanism and
- multiple result set. For instance, mysqli interface in PHP
- and DBI/DBD libraries in Perl are known to work.
- </p><p>
- Here's a PHP sample showing how to utilize mysqli interface
- with Sphinx.
- </p><pre class="programlisting">
- <?php
- $link = mysqli_connect ( "127.0.0.1", "root", "", "", 9306 );
- if ( mysqli_connect_errno() )
- die ( "connect failed: " . mysqli_connect_error() );
- $batch = "SELECT * FROM test1 ORDER BY group_id ASC;";
- $batch .= "SELECT * FROM test1 ORDER BY group_id DESC";
- if ( !mysqli_multi_query ( $link, $batch ) )
- die ( "query failed" );
- do
- {
- // fetch and print result set
- if ( $result = mysqli_store_result($link) )
- {
- while ( $row = mysqli_fetch_row($result) )
- printf ( "id=%s\n", $row[0] );
- mysqli_free_result($result);
- }
- // print divider
- if ( mysqli_more_results($link) )
- printf ( "------\n" );
- } while ( mysqli_next_result($link) );
- </pre><p>
- Its output with the sample <code class="code">test1</code> index included
- with Sphinx is as follows.
- </p><pre class="programlisting">
- $ php test_multi.php
- id=1
- id=2
- id=3
- id=4
- ------
- id=3
- id=4
- id=1
- id=2
- </pre><p>
- </p><p>
- The following statements can currently be used in a batch:
- SELECT, SHOW WARNINGS, SHOW STATUS, and SHOW META. Arbitrary
- sequence of these statements are allowed. The results sets
- returned should match those that would be returned if the
- batched queries were sent one by one.
- </p></div>
- <div class="sect1" title="8.41. Comment syntax"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-comment-syntax"></a>8.41. Comment syntax</h2></div></div></div>
- <p>
- Since version 2.0.1-beta, SphinxQL supports C-style comment syntax.
- Everything from an opening <code class="code">/*</code> sequence to a closing
- <code class="code">*/</code> sequence is ignored. Comments can span multiple lines,
- can not nest, and should not get logged. MySQL specific
- <code class="code">/*! ... */</code> comments are also currently ignored.
- (As the comments support was rather added for better compatibility
- with <code class="filename">mysqldump</code> produced dumps, rather than
- improving general query interoperability between Sphinx and MySQL.)
- </p><pre class="programlisting">
- SELECT /*! SQL_CALC_FOUND_ROWS */ col1 FROM table1 WHERE ...
- </pre><p>
- </p></div>
- <div class="sect1" title="8.42. List of SphinxQL reserved keywords"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-reserved-keywords"></a>8.42. List of SphinxQL reserved keywords</h2></div></div></div>
- <p>A complete alphabetical list of keywords that are currently reserved
- in SphinxQL syntax (and therefore can not be used as identifiers).
- </p><pre class="programlisting">
- AND
- AGENT
- AS
- ASC
- AVG
- BEGIN
- BETWEEN
- BY
- CALL
- COLLATION
- COMMIT
- COUNT
- DELETE
- DESC
- DESCRIBE
- DISTINCT
- FALSE
- FROM
- GLOBAL
- GROUP
- ID
- IN
- INSERT
- INTO
- LIMIT
- MATCH
- MAX
- META
- MIN
- NOT
- NULL
- OPTION
- OR
- ORDER
- REPLACE
- ROLLBACK
- SELECT
- SET
- SHOW
- START
- STATUS
- SUM
- TABLES
- TRANSACTION
- TRUE
- UPDATE
- VALUES
- VARIABLES
- WARNINGS
- WEIGHT
- WHERE
- WITHIN
- </pre></div>
- <div class="sect1" title="8.43. SphinxQL upgrade notes, version 2.0.1-beta"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxql-upgrading-magics"></a>8.43. SphinxQL upgrade notes, version 2.0.1-beta</h2></div></div></div>
- <p>
- This section only applies to existing applications that
- use SphinxQL versions prior to 2.0.1-beta.
- </p><p>
- In previous versions, SphinxQL just wrapped around SphinxAPI
- and inherited its magic columns and column set quirks. Essentially,
- SphinxQL queries could return (slightly) different columns and
- in a (slightly) different order than it was explicitly requested
- in the query. Namely, <code class="code">weight</code> magic column (which is not
- a real column in any index) was added at all times, and GROUP BY
- related <code class="code">@count</code>, <code class="code">@group</code>, and <code class="code">@distinct</code>
- magic columns were conditionally added when grouping. Also, the order
- of columns (attributes) in the result set was actually taken from the
- index rather than the query. (So if you asked for columns C, B, A
- in your query but they were in the A, B, C order in the index,
- they would have been returned in the A, B, C order.)
- </p><p>
- In version 2.0.1-beta, we fixed that. SphinxQL is now more
- SQL compliant (and will be further brought in as much compliance
- with standard SQL syntax as possible).
- </p><p>
- The important changes are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>
- <span class="bold"><strong><code class="code">@ID</code> magic name is deprecated in favor of
- <code class="code">ID</code>.</strong></span> Document ID is considered an attribute.
- </p></li>
- <li class="listitem"><p>
- <span class="bold"><strong><code class="code">WEIGHT</code> is no longer implicitly returned</strong></span>,
- because it is not actually a column (an index attribute),
- but rather an internal function computed per each row (a match).
- You have to explicitly ask for it, using the <code class="code">WEIGHT()</code>
- function. (The requirement to alias the result will be lifted
- in the next release.)
- </p><pre class="programlisting">
- SELECT id, WEIGHT() w FROM myindex WHERE MATCH('test')
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- <span class="bold"><strong>You can now use quoted reserved keywords as aliases.</strong></span>
- The quote character is backtick ("`", ASCII code 96 decimal,
- 60 hex). One particularly useful example would be returning
- <code class="code">weight</code> column like the old mode:
- </p><pre class="programlisting">
- SELECT id, WEIGHT() `weight` FROM myindex WHERE MATCH('test')
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- The column order is now different and should now match the
- one explicitly defined in the query. So if you are accessing
- columns based on their position in the result set rather than
- the name (for instance, by using <code class="code">mysql_fetch_row()</code>
- rather than <code class="code">mysql_fetch_assoc()</code> in PHP),
- <span class="bold"><strong>check and fix the order of columns in your queries.</strong></span>
- </p></li>
- <li class="listitem"><p>
- <code class="code">SELECT *</code> return the columns in index order,
- as it used to, including the ID column. However,
- <span class="bold"><strong><code class="code">SELECT *</code> does not automatically return WEIGHT().</strong></span>
- To update such queries in case you access columns by names,
- simply add it to the query:
- </p><pre class="programlisting">
- SELECT *, WEIGHT() `weight` FROM myindex WHERE MATCH('test')
- </pre><p>
- Otherwise, i.e., in case you rely on column order, select
- ID, weight, and then other columns:
- </p><pre class="programlisting">
- SELECT id, *, WEIGHT() `weight` FROM myindex WHERE MATCH('test')
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- <span class="bold"><strong>Magic <code class="code">@count</code> and <code class="code">@distinct</code>
- attributes are no longer implicitly returned</strong></span>. You now
- have to explicitly ask for them when using GROUP BY.
- (Also note that you currently have to alias them;
- that requirement will be lifted in the future.)
- </p><pre class="programlisting">
- SELECT gid, COUNT(*) q FROM myindex WHERE MATCH('test')
- GROUP BY gid ORDER BY q DESC
- </pre><p>
- </p></li>
- </ul></div>
- <p>
- </p></div></div>
- <div class="chapter" title="Chapter 9. API reference"><div class="titlepage"><div><div><h2 class="title"><a name="api-reference"></a>Chapter 9. API reference</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#api-funcgroup-general">9.1. General API functions</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-getlasterror">9.1.1. GetLastError</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-getlastwarning">9.1.2. GetLastWarning</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setserver">9.1.3. SetServer</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setretries">9.1.4. SetRetries</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setconnecttimeout">9.1.5. SetConnectTimeout</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setarrayresult">9.1.6. SetArrayResult</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-isconnecterror">9.1.7. IsConnectError</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-general-query-settings">9.2. General query settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setlimits">9.2.1. SetLimits</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setmaxquerytime">9.2.2. SetMaxQueryTime</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setoverride">9.2.3. SetOverride</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setselect">9.2.4. SetSelect</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-fulltext-query-settings">9.3. Full-text search query settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setmatchmode">9.3.1. SetMatchMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setrankingmode">9.3.2. SetRankingMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setsortmode">9.3.3. SetSortMode</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setweights">9.3.4. SetWeights</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfieldweights">9.3.5. SetFieldWeights</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setindexweights">9.3.6. SetIndexWeights</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-filtering">9.4. Result set filtering settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setidrange">9.4.1. SetIDRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilter">9.4.2. SetFilter</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterrange">9.4.3. SetFilterRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterfloatrange">9.4.4. SetFilterFloatRange</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setgeoanchor">9.4.5. SetGeoAnchor</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setfilterstring">9.4.6. SetFilterString</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-groupby">9.5. GROUP BY settings</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-setgroupby">9.5.1. SetGroupBy</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-setgroupdistinct">9.5.2. SetGroupDistinct</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-querying">9.6. Querying</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-query">9.6.1. Query</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-addquery">9.6.2. AddQuery</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-runqueries">9.6.3. RunQueries</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-resetfilters">9.6.4. ResetFilters</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-resetgroupby">9.6.5. ResetGroupBy</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-additional-functionality">9.7. Additional functionality</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-buildexcerpts">9.7.1. BuildExcerpts</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-updateatttributes">9.7.2. UpdateAttributes</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-buildkeywords">9.7.3. BuildKeywords</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-escapestring">9.7.4. EscapeString</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-status">9.7.5. Status</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-flushattributes">9.7.6. FlushAttributes</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#api-funcgroup-pconn">9.8. Persistent connections</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#api-func-open">9.8.1. Open</a></span></dt>
- <dt><span class="sect2"><a href="#api-func-close">9.8.2. Close</a></span></dt>
- </dl></dd></dl></div>
- <p>
- There is a number of native searchd client API implementations
- for Sphinx. As of time of this writing, we officially support our own
- PHP, Python, and Java implementations. There also are third party
- free, open-source API implementations for Perl, Ruby, and C++.
- </p><p>
- The reference API implementation is in PHP, because (we believe)
- Sphinx is most widely used with PHP than any other language.
- This reference documentation is in turn based on reference PHP API,
- and all code samples in this section will be given in PHP.
- </p><p>
- However, all other APIs provide the same methods and implement
- the very same network protocol. Therefore the documentation does
- apply to them as well. There might be minor differences as to the
- method naming conventions or specific data structures used.
- But the provided functionality must not differ across languages.
- </p><div class="sect1" title="9.1. General API functions"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-general"></a>9.1. General API functions</h2></div></div></div>
- <div class="sect2" title="9.1.1. GetLastError"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-getlasterror"></a>9.1.1. GetLastError</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function GetLastError()</p><p>
- Returns last error message, as a string, in human readable format.
- If there were no errors during the previous API call, empty string is returned.
- </p><p>
- You should call it when any other function (such as <a class="link" href="#api-func-query" title="9.6.1. Query">Query()</a>)
- fails (typically, the failing function returns false). The returned string will
- contain the error description.
- </p><p>
- The error message is <span class="emphasis"><em>not</em></span> reset by this call; so you can safely
- call it several times if needed.
- </p></div>
- <div class="sect2" title="9.1.2. GetLastWarning"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-getlastwarning"></a>9.1.2. GetLastWarning</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function GetLastWarning ()</p><p>
- Returns last warning message, as a string, in human readable format.
- If there were no warnings during the previous API call, empty string is returned.
- </p><p>
- You should call it to verify whether your request
- (such as <a class="link" href="#api-func-query" title="9.6.1. Query">Query()</a>) was completed but with warnings.
- For instance, search query against a distributed index might complete
- successfully even if several remote agents timed out. In that case,
- a warning message would be produced.
- </p><p>
- The warning message is <span class="emphasis"><em>not</em></span> reset by this call; so you can safely
- call it several times if needed.
- </p></div>
- <div class="sect2" title="9.1.3. SetServer"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setserver"></a>9.1.3. SetServer</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetServer ( $host, $port )</p><p>
- Sets <code class="filename">searchd</code> host name and TCP port.
- All subsequent requests will use the new host and port settings.
- Default host and port are 'localhost' and 9312, respectively.
- </p></div>
- <div class="sect2" title="9.1.4. SetRetries"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setretries"></a>9.1.4. SetRetries</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetRetries ( $count, $delay=0 )</p><p>
- Sets distributed retry count and delay.
- </p><p>
- On temporary failures <code class="filename">searchd</code> will attempt up to
- <code class="code">$count</code> retries per agent. <code class="code">$delay</code> is the delay
- between the retries, in milliseconds. Retries are disabled by default.
- Note that this call will <span class="bold"><strong>not</strong></span> make the API itself retry on
- temporary failure; it only tells <code class="filename">searchd</code> to do so.
- Currently, the list of temporary failures includes all kinds of connect()
- failures and maxed out (too busy) remote agents.
- </p></div>
- <div class="sect2" title="9.1.5. SetConnectTimeout"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setconnecttimeout"></a>9.1.5. SetConnectTimeout</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetConnectTimeout ( $timeout )</p><p>
- Sets the time allowed to spend connecting to the server before giving up.
- </p><p>Under some circumstances, the server can be delayed in responding, either
- due to network delays, or a query backlog. In either instance, this allows
- the client application programmer some degree of control over how their
- program interacts with <code class="filename">searchd</code> when not available,
- and can ensure that the client application does not fail due to exceeding
- the script execution limits (especially in PHP).
- </p><p>In the event of a failure to connect, an appropriate error code should
- be returned back to the application in order for application-level error handling
- to advise the user.
- </p></div>
- <div class="sect2" title="9.1.6. SetArrayResult"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setarrayresult"></a>9.1.6. SetArrayResult</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetArrayResult ( $arrayresult )</p><p>
- PHP specific. Controls matches format in the search results set
- (whether matches should be returned as an array or a hash).
- </p><p>
- <code class="code">$arrayresult</code> argument must be boolean. If <code class="code">$arrayresult</code> is <code class="code">false</code>
- (the default mode), matches will returned in PHP hash format with
- document IDs as keys, and other information (weight, attributes)
- as values. If <code class="code">$arrayresult</code> is true, matches will be returned
- as a plain array with complete per-match information including
- document ID.
- </p><p>
- Introduced along with GROUP BY support on MVA attributes.
- Group-by-MVA result sets may contain duplicate document IDs.
- Thus they need to be returned as plain arrays, because hashes
- will only keep one entry per document ID.
- </p></div>
- <div class="sect2" title="9.1.7. IsConnectError"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-isconnecterror"></a>9.1.7. IsConnectError</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function IsConnectError ()</p><p>
- Checks whether the last error was a network error on API side, or a remote error
- reported by searchd. Returns true if the last connection attempt to searchd failed on API side,
- false otherwise (if the error was remote, or there were no connection attempts at all).
- Introduced in version 0.9.9-rc1.
- </p></div></div>
- <div class="sect1" title="9.2. General query settings"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-general-query-settings"></a>9.2. General query settings</h2></div></div></div>
- <div class="sect2" title="9.2.1. SetLimits"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setlimits"></a>9.2.1. SetLimits</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetLimits ( $offset, $limit, $max_matches=1000, $cutoff=0 )</p><p>
- Sets offset into server-side result set (<code class="code">$offset</code>) and amount of matches
- to return to client starting from that offset (<code class="code">$limit</code>). Can additionally
- control maximum server-side result set size for current query (<code class="code">$max_matches</code>)
- and the threshold amount of matches to stop searching at (<code class="code">$cutoff</code>).
- All parameters must be non-negative integers.
- </p><p>
- First two parameters to SetLimits() are identical in behavior to MySQL
- LIMIT clause. They instruct <code class="filename">searchd</code> to return at
- most <code class="code">$limit</code> matches starting from match number <code class="code">$offset</code>.
- The default offset and limit settings are 0 and 20, that is, to return
- first 20 matches.
- </p><p>
- <code class="code">max_matches</code> setting controls how much matches <code class="filename">searchd</code>
- will keep in RAM while searching. <span class="bold"><strong>All</strong></span> matching documents will be normally
- processed, ranked, filtered, and sorted even if <code class="code">max_matches</code> is set to 1.
- But only best N documents are stored in memory at any given moment for performance
- and RAM usage reasons, and this setting controls that N. Note that there are
- <span class="bold"><strong>two</strong></span> places where <code class="code">max_matches</code> limit is enforced. Per-query
- limit is controlled by this API call, but there also is per-server limit
- controlled by <code class="code">max_matches</code> setting in the config file. To prevent
- RAM usage abuse, server will not allow to set per-query limit
- higher than the per-server limit.
- </p><p>
- You can't retrieve more than <code class="code">max_matches</code> matches to the client application.
- The default limit is set to 1000. Normally, you must not have to go over
- this limit. One thousand records is enough to present to the end user.
- And if you're thinking about pulling the results to application
- for further sorting or filtering, that would be <span class="bold"><strong>much</strong></span> more efficient
- if performed on Sphinx side.
- </p><p>
- <code class="code">$cutoff</code> setting is intended for advanced performance control.
- It tells <code class="filename">searchd</code> to forcibly stop search query
- once <code class="code">$cutoff</code> matches had been found and processed.
- </p></div>
- <div class="sect2" title="9.2.2. SetMaxQueryTime"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setmaxquerytime"></a>9.2.2. SetMaxQueryTime</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetMaxQueryTime ( $max_query_time )</p><p>
- Sets maximum search query time, in milliseconds. Parameter must be
- a non-negative integer. Default value is 0 which means "do not limit".
- </p><p>Similar to <code class="code">$cutoff</code> setting from <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a>,
- but limits elapsed query time instead of processed matches count. Local search queries
- will be stopped once that much time has elapsed. Note that if you're performing
- a search which queries several local indexes, this limit applies to each index
- separately.
- </p></div>
- <div class="sect2" title="9.2.3. SetOverride"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setoverride"></a>9.2.3. SetOverride</h3></div></div></div>
- <p><span class="bold"><strong>DEPRECATED</strong></span></p><p><span class="bold"><strong>Prototype:</strong></span> function SetOverride ( $attrname, $attrtype, $values )</p><p>
- Sets temporary (per-query) per-document attribute value overrides.
- Only supports scalar attributes. $values must be a hash that maps document
- IDs to overridden attribute values. Introduced in version 0.9.9-rc1.
- </p><p>
- Override feature lets you "temporary" update attribute values for some documents
- within a single query, leaving all other queries unaffected. This might be useful
- for personalized data. For example, assume you're implementing a personalized
- search function that wants to boost the posts that the user's friends recommend.
- Such data is not just dynamic, but also personal; so you can't simply put it
- in the index because you don't want everyone's searches affected. Overrides,
- on the other hand, are local to a single query and invisible to everyone else.
- So you can, say, setup a "friends_weight" value for every document, defaulting to 0,
- then temporary override it with 1 for documents 123, 456 and 789 (recommended by
- exactly the friends of current user), and use that value when ranking.
- </p></div>
- <div class="sect2" title="9.2.4. SetSelect"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setselect"></a>9.2.4. SetSelect</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetSelect ( $clause )</p><p>
- Sets the select clause, listing specific attributes to fetch, and <a class="link" href="#sort-expr" title="5.6. SPH_SORT_EXPR mode">expressions</a>
- to compute and fetch. Clause syntax mimics SQL. Introduced in version 0.9.9-rc1.</p><p>
- SetSelect() is very similar to the part of a typical SQL query between SELECT and FROM.
- It lets you choose what attributes (columns) to fetch, and also what expressions
- over the columns to compute and fetch. A certain difference from SQL is that expressions
- <span class="bold"><strong>must</strong></span> always be aliased to a correct identifier (consisting of letters and digits)
- using 'AS' keyword. SQL also lets you do that but does not require to. Sphinx enforces
- aliases so that the computation results can always be returned under a "normal" name
- in the result set, used in other clauses, etc.
- </p><p>
- Everything else is basically identical to SQL. Star ('*') is supported.
- Functions are supported. Arbitrary amount of expressions is supported.
- Computed expressions can be used for sorting, filtering, and grouping,
- just as the regular attributes.
- </p><p>
- Starting with version 0.9.9-rc2, aggregate functions (AVG(), MIN(),
- MAX(), SUM()) are supported when using GROUP BY.
- </p><p>
- Expression sorting (<a class="xref" href="#sort-expr" title="5.6. SPH_SORT_EXPR mode">Section 5.6, “SPH_SORT_EXPR mode”</a>) and geodistance functions
- (<a class="xref" href="#api-func-setgeoanchor" title="9.4.5. SetGeoAnchor">Section 9.4.5, “SetGeoAnchor”</a>) are now internally implemented using
- this computed expressions mechanism, using magic names '@expr' and '@geodist'
- respectively.
- </p><h4><a name="idp32629808"></a>Example:</h4><pre class="programlisting">
- $cl->SetSelect ( "*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight" );
- $cl->SetSelect ( "exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd,
- IF(age>40,1,0) AS over40" );
- $cl->SetSelect ( "*, AVG(price) AS avgprice" );
- </pre></div></div>
- <div class="sect1" title="9.3. Full-text search query settings"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-fulltext-query-settings"></a>9.3. Full-text search query settings</h2></div></div></div>
- <div class="sect2" title="9.3.1. SetMatchMode"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setmatchmode"></a>9.3.1. SetMatchMode</h3></div></div></div>
- <p><span class="bold"><strong>DEPRECATED</strong></span></p><p><span class="bold"><strong>Prototype:</strong></span> function SetMatchMode ( $mode )</p><p>
- Sets full-text query matching mode, as described in <a class="xref" href="#matching-modes" title="5.1. Matching modes">Section 5.1, “Matching modes”</a>.
- Parameter must be a constant specifying one of the known modes.
- </p><p>
- <span class="bold"><strong>WARNING:</strong></span> (PHP specific) you <span class="bold"><strong>must not</strong></span> take the matching mode
- constant name in quotes, that syntax specifies a string and is incorrect:
- </p><pre class="programlisting">
- $cl->SetMatchMode ( "SPH_MATCH_ANY" ); // INCORRECT! will not work as expected
- $cl->SetMatchMode ( SPH_MATCH_ANY ); // correct, works OK
- </pre><p>
- </p></div>
- <div class="sect2" title="9.3.2. SetRankingMode"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setrankingmode"></a>9.3.2. SetRankingMode</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetRankingMode ( $ranker, $rankexpr="" )</p><p>
- Sets ranking mode (aka ranker). Only available in SPH_MATCH_EXTENDED
- matching mode. Parameter must be a constant specifying one of the known
- rankers.
- </p><p>
- By default, in the EXTENDED matching mode Sphinx computes two factors
- which contribute to the final match weight. The major part is a phrase
- proximity value between the document text and the query.
- The minor part is so-called BM25 statistical function, which varies
- from 0 to 1 depending on the keyword frequency within document
- (more occurrences yield higher weight) and within the whole index
- (more rare keywords yield higher weight).
- </p><p>
- However, in some cases you'd want to compute weight differently -
- or maybe avoid computing it at all for performance reasons because
- you're sorting the result set by something else anyway. This can be
- accomplished by setting the appropriate ranking mode. The list of
- the modes is available in <a class="xref" href="#weighting" title="5.4. Search results ranking">Section 5.4, “Search results ranking”</a>.
- </p><p>
- <code class="code">$rankexpr</code> argument was added in version 2.0.2-beta.
- It lets you specify a ranking formula to use with the
- <a class="link" href="#expression-ranker" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)">expression based ranker</a>,
- that is, when <code class="code">$ranker</code> is set to SPH_RANK_EXPR.
- In all other cases, <code class="code">$rankexpr</code> is ignored.
- </p></div>
- <div class="sect2" title="9.3.3. SetSortMode"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setsortmode"></a>9.3.3. SetSortMode</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetSortMode ( $mode, $sortby="" )</p><p>
- Set matches sorting mode, as described in <a class="xref" href="#sorting-modes" title="5.6. Sorting modes">Section 5.6, “Sorting modes”</a>.
- Parameter must be a constant specifying one of the known modes.
- </p><p>
- <span class="bold"><strong>WARNING:</strong></span> (PHP specific) you <span class="bold"><strong>must not</strong></span> take the matching mode
- constant name in quotes, that syntax specifies a string and is incorrect:
- </p><pre class="programlisting">
- $cl->SetSortMode ( "SPH_SORT_ATTR_DESC" ); // INCORRECT! will not work as expected
- $cl->SetSortMode ( SPH_SORT_ATTR_ASC ); // correct, works OK
- </pre><p>
- </p></div>
- <div class="sect2" title="9.3.4. SetWeights"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setweights"></a>9.3.4. SetWeights</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetWeights ( $weights )</p><p>
- Binds per-field weights in the order of appearance in the index.
- <span class="bold"><strong>DEPRECATED</strong></span>, use <a class="link" href="#api-func-setfieldweights" title="9.3.5. SetFieldWeights">SetFieldWeights()</a> instead.
- </p></div>
- <div class="sect2" title="9.3.5. SetFieldWeights"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setfieldweights"></a>9.3.5. SetFieldWeights</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetFieldWeights ( $weights )</p><p>
- Binds per-field weights by name. Parameter must be a hash (associative array)
- mapping string field names to integer weights.
- </p><p>
- Match ranking can be affected by per-field weights. For instance,
- see <a class="xref" href="#weighting" title="5.4. Search results ranking">Section 5.4, “Search results ranking”</a> for an explanation how phrase proximity
- ranking is affected. This call lets you specify what non-default
- weights to assign to different full-text fields.
- </p><p>
- The weights must be positive 32-bit integers. The final weight
- will be a 32-bit integer too. Default weight value is 1. Unknown
- field names will be silently ignored.
- </p><p>
- There is no enforced limit on the maximum weight value at the
- moment. However, beware that if you set it too high you can start
- hitting 32-bit wraparound issues. For instance, if you set
- a weight of 10,000,000 and search in extended mode, then
- maximum possible weight will be equal to 10 million (your weight)
- by 1 thousand (internal BM25 scaling factor, see <a class="xref" href="#weighting" title="5.4. Search results ranking">Section 5.4, “Search results ranking”</a>)
- by 1 or more (phrase proximity rank). The result is at least 10 billion
- that does not fit in 32 bits and will be wrapped around, producing
- unexpected results.
- </p></div>
- <div class="sect2" title="9.3.6. SetIndexWeights"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setindexweights"></a>9.3.6. SetIndexWeights</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetIndexWeights ( $weights )</p><p>
- Sets per-index weights, and enables weighted summing of match weights
- across different indexes. Parameter must be a hash (associative array)
- mapping string index names to integer weights. Default is empty array
- that means to disable weighting summing.
- </p><p>
- When a match with the same document ID is found in several different
- local indexes, by default Sphinx simply chooses the match from the index
- specified last in the query. This is to support searching through
- partially overlapping index partitions.
- </p><p>
- However in some cases the indexes are not just partitions, and you
- might want to sum the weights across the indexes instead of picking one.
- <code class="code">SetIndexWeights()</code> lets you do that. With summing enabled,
- final match weight in result set will be computed as a sum of match
- weight coming from the given index multiplied by respective per-index
- weight specified in this call. Ie. if the document 123 is found in
- index A with the weight of 2, and also in index B with the weight of 3,
- and you called <code class="code">SetIndexWeights ( array ( "A"=>100, "B"=>10 ) )</code>,
- the final weight return to the client will be 2*100+3*10 = 230.
- </p></div></div>
- <div class="sect1" title="9.4. Result set filtering settings"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-filtering"></a>9.4. Result set filtering settings</h2></div></div></div>
- <div class="sect2" title="9.4.1. SetIDRange"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setidrange"></a>9.4.1. SetIDRange</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetIDRange ( $min, $max )</p><p>
- Sets an accepted range of document IDs. Parameters must be integers.
- Defaults are 0 and 0; that combination means to not limit by range.
- </p><p>
- After this call, only those records that have document ID
- between <code class="code">$min</code> and <code class="code">$max</code> (including IDs
- exactly equal to <code class="code">$min</code> or <code class="code">$max</code>)
- will be matched.
- </p></div>
- <div class="sect2" title="9.4.2. SetFilter"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setfilter"></a>9.4.2. SetFilter</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetFilter ( $attribute, $values, $exclude=false )</p><p>
- Adds new integer values set filter.
- </p><p>
- On this call, additional new filter is added to the existing
- list of filters. <code class="code">$attribute</code> must be a string with
- attribute name. <code class="code">$values</code> must be a plain array
- containing integer values. <code class="code">$exclude</code> must be a boolean
- value; it controls whether to accept the matching documents
- (default mode, when <code class="code">$exclude</code> is false) or reject them.
- </p><p>
- Only those documents where <code class="code">$attribute</code> column value
- stored in the index matches any of the values from <code class="code">$values</code>
- array will be matched (or rejected, if <code class="code">$exclude</code> is true).
- </p></div>
- <div class="sect2" title="9.4.3. SetFilterRange"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setfilterrange"></a>9.4.3. SetFilterRange</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetFilterRange ( $attribute, $min, $max, $exclude=false )</p><p>
- Adds new integer range filter.
- </p><p>
- On this call, additional new filter is added to the existing
- list of filters. <code class="code">$attribute</code> must be a string with
- attribute name. <code class="code">$min</code> and <code class="code">$max</code> must be
- integers that define the acceptable attribute values range
- (including the boundaries). <code class="code">$exclude</code> must be a boolean
- value; it controls whether to accept the matching documents
- (default mode, when <code class="code">$exclude</code> is false) or reject them.
- </p><p>
- Only those documents where <code class="code">$attribute</code> column value
- stored in the index is between <code class="code">$min</code> and <code class="code">$max</code>
- (including values that are exactly equal to <code class="code">$min</code> or <code class="code">$max</code>)
- will be matched (or rejected, if <code class="code">$exclude</code> is true).
- </p></div>
- <div class="sect2" title="9.4.4. SetFilterFloatRange"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setfilterfloatrange"></a>9.4.4. SetFilterFloatRange</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )</p><p>
- Adds new float range filter.
- </p><p>
- On this call, additional new filter is added to the existing
- list of filters. <code class="code">$attribute</code> must be a string with
- attribute name. <code class="code">$min</code> and <code class="code">$max</code> must be
- floats that define the acceptable attribute values range
- (including the boundaries). <code class="code">$exclude</code> must be a boolean
- value; it controls whether to accept the matching documents
- (default mode, when <code class="code">$exclude</code> is false) or reject them.
- </p><p>
- Only those documents where <code class="code">$attribute</code> column value
- stored in the index is between <code class="code">$min</code> and <code class="code">$max</code>
- (including values that are exactly equal to <code class="code">$min</code> or <code class="code">$max</code>)
- will be matched (or rejected, if <code class="code">$exclude</code> is true).
- </p></div>
- <div class="sect2" title="9.4.5. SetGeoAnchor"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setgeoanchor"></a>9.4.5. SetGeoAnchor</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )</p><p>
- Sets anchor point for and geosphere distance (geodistance) calculations, and enable them.
- </p><p>
- <code class="code">$attrlat</code> and <code class="code">$attrlong</code> must be strings that contain the names
- of latitude and longitude attributes, respectively. <code class="code">$lat</code> and <code class="code">$long</code>
- are floats that specify anchor point latitude and longitude, in radians.
- </p><p>
- Once an anchor point is set, you can use magic <code class="code">"@geodist"</code> attribute
- name in your filters and/or sorting expressions. Sphinx will compute geosphere distance
- between the given anchor point and a point specified by latitude and longitude
- attributes from each full-text match, and attach this value to the resulting match.
- The latitude and longitude values both in <code class="code">SetGeoAnchor</code> and the index
- attribute data are expected to be in radians. The result will be returned in meters,
- so geodistance value of 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.
- </p></div>
- <div class="sect2" title="9.4.6. SetFilterString"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setfilterstring"></a>9.4.6. SetFilterString</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetFilterString ( $attribute, $value, $exclude=false )</p><p>
- Adds new string value filter.
- </p><p>
- On this call, additional new filter is added to the existing
- list of filters. <code class="code">$attribute</code> must be a string with
- attribute name. <code class="code">$value</code> must be a string. <code class="code">$exclude</code> must be a boolean
- value; it controls whether to accept the matching documents
- (default mode, when <code class="code">$exclude</code> is false) or reject them.
- </p><p>
- Only those documents where <code class="code">$attribute</code> column value
- stored in the index matches string value from <code class="code">$value</code>
- will be matched (or rejected, if <code class="code">$exclude</code> is true).
- </p></div></div>
- <div class="sect1" title="9.5. GROUP BY settings"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-groupby"></a>9.5. GROUP BY settings</h2></div></div></div>
- <div class="sect2" title="9.5.1. SetGroupBy"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setgroupby"></a>9.5.1. SetGroupBy</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )</p><p>
- Sets grouping attribute, function, and groups sorting mode; and enables grouping
- (as described in <a class="xref" href="#clustering" title="5.7. Grouping (clustering) search results">Section 5.7, “Grouping (clustering) search results ”</a>).
- </p><p>
- <code class="code">$attribute</code> is a string that contains group-by attribute name.
- <code class="code">$func</code> is a constant that chooses a function applied to the attribute value in order to compute group-by key.
- <code class="code">$groupsort</code> is a clause that controls how the groups will be sorted. Its syntax is similar
- to that described in <a class="xref" href="#sort-extended" title="5.6. SPH_SORT_EXTENDED mode">Section 5.6, “SPH_SORT_EXTENDED mode”</a>.
- </p><p>
- Grouping feature is very similar in nature to GROUP BY clause from SQL.
- Results produces by this function call are going to be the same as produced
- by the following pseudo code:
- </p><pre class="programlisting">
- SELECT ... GROUP BY $func($attribute) ORDER BY $groupsort
- </pre><p>
- Note that it's <code class="code">$groupsort</code> that affects the order of matches
- in the final result set. Sorting mode (see <a class="xref" href="#api-func-setsortmode" title="9.3.3. SetSortMode">Section 9.3.3, “SetSortMode”</a>)
- affect the ordering of matches <span class="emphasis"><em>within</em></span> group, ie.
- what match will be selected as the best one from the group.
- So you can for instance order the groups by matches count
- and select the most relevant match within each group at the same time.
- </p><p>
- Starting with version 0.9.9-rc2, aggregate functions (AVG(), MIN(),
- MAX(), SUM()) are supported through <a class="link" href="#api-func-setselect" title="9.2.4. SetSelect">SetSelect()</a> API call
- when using GROUP BY.
- </p><p>
- Starting with version 2.0.1-beta, grouping on string attributes
- is supported, with respect to current collation.
- </p></div>
- <div class="sect2" title="9.5.2. SetGroupDistinct"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-setgroupdistinct"></a>9.5.2. SetGroupDistinct</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function SetGroupDistinct ( $attribute )</p><p>
- Sets attribute name for per-group distinct values count calculations.
- Only available for grouping queries.
- </p><p>
- <code class="code">$attribute</code> is a string that contains the attribute name.
- For each group, all values of this attribute will be stored (as RAM limits
- permit), then the amount of distinct values will be calculated and returned
- to the client. This feature is similar to <code class="code">COUNT(DISTINCT)</code>
- clause in standard SQL; so these Sphinx calls:
- </p><pre class="programlisting">
- $cl->SetGroupBy ( "category", SPH_GROUPBY_ATTR, "@count desc" );
- $cl->SetGroupDistinct ( "vendor" );
- </pre><p>
- can be expressed using the following SQL clauses:
- </p><pre class="programlisting">
- SELECT id, weight, all-attributes,
- COUNT(DISTINCT vendor) AS @distinct,
- COUNT(*) AS @count
- FROM products
- GROUP BY category
- ORDER BY @count DESC
- </pre><p>
- In the sample pseudo code shown just above, <code class="code">SetGroupDistinct()</code> call
- corresponds to <code class="code">COUNT(DISINCT vendor)</code> clause only.
- <code class="code">GROUP BY</code>, <code class="code">ORDER BY</code>, and <code class="code">COUNT(*)</code>
- clauses are all an equivalent of <code class="code">SetGroupBy()</code> settings. Both queries
- will return one matching row for each category. In addition to indexed attributes,
- matches will also contain total per-category matches count, and the count
- of distinct vendor IDs within each category.
- </p></div></div>
- <div class="sect1" title="9.6. Querying"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-querying"></a>9.6. Querying</h2></div></div></div>
- <div class="sect2" title="9.6.1. Query"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-query"></a>9.6.1. Query</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function Query ( $query, $index="*", $comment="" )</p><p>
- Connects to <code class="filename">searchd</code> server, runs given search query
- with current settings, obtains and returns the result set.
- </p><p>
- <code class="code">$query</code> is a query string. <code class="code">$index</code> is an index name (or names) string.
- Returns false and sets <code class="code">GetLastError()</code> message on general error.
- Returns search result set on success.
- Additionally, the contents of <code class="code">$comment</code> are sent to the query log, marked in square brackets, just before the search terms, which can be very useful for debugging.
- Currently, the comment is limited to 128 characters.
- </p><p>
- Default value for <code class="code">$index</code> is <code class="code">"*"</code> that means
- to query all local indexes. Characters allowed in index names include
- Latin letters (a-z), numbers (0-9) and underscore (_);
- everything else is considered a separator. Note that index name should
- not start with underscore character. Therefore, all of the
- following samples calls are valid and will search the same
- two indexes:
- </p><pre class="programlisting">
- $cl->Query ( "test query", "main delta" );
- $cl->Query ( "test query", "main;delta" );
- $cl->Query ( "test query", "main, delta" );
- </pre><p>
- Index specification order matters. If document with identical IDs are found
- in two or more indexes, weight and attribute values from the very last matching
- index will be used for sorting and returning to client (unless explicitly
- overridden with <a class="link" href="#api-func-setindexweights" title="9.3.6. SetIndexWeights">SetIndexWeights()</a>). Therefore,
- in the example above, matches from "delta" index will always win over
- matches from "main".
- </p><p>
- On success, <code class="code">Query()</code> returns a result set that contains
- some of the found matches (as requested by <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a>)
- and additional general per-query statistics. The result set is a hash
- (PHP specific; other languages might utilize other structures instead
- of hash) with the following keys and values:
- </p><div class="variablelist"><dl><dt><span class="term">"matches":</span></dt>
- <dd><p>Hash which maps found document IDs to another small hash containing document weight and attribute values
- (or an array of the similar small hashes if <a class="link" href="#api-func-setarrayresult" title="9.1.6. SetArrayResult">SetArrayResult()</a> was enabled).
- </p></dd><dt><span class="term">"total":</span></dt>
- <dd><p>Total amount of matches retrieved <span class="emphasis"><em>on server</em></span> (ie. to the server side result set) by this query.
- You can retrieve up to this amount of matches from server for this query text with current query settings.
- </p></dd><dt><span class="term">"total_found":</span></dt>
- <dd><p>Total amount of matching documents in index (that were found and processed on server).</p></dd><dt><span class="term">"words":</span></dt>
- <dd><p>Hash which maps query keywords (case-folded, stemmed, and otherwise processed) to a small hash with per-keyword statistics ("docs", "hits").</p></dd><dt><span class="term">"error":</span></dt>
- <dd><p>Query error message reported by <code class="filename">searchd</code> (string, human readable). Empty if there were no errors.</p></dd><dt><span class="term">"warning":</span></dt>
- <dd><p>Query warning message reported by <code class="filename">searchd</code> (string, human readable). Empty if there were no warnings.</p></dd></dl></div>
- <p>
- </p><p>
- It should be noted that <code class="code">Query()</code> carries out the same actions as
- <code class="code">AddQuery()</code> and <code class="code">RunQueries()</code> without the intermediate steps;
- it is analogous to a single <code class="code">AddQuery()</code> call, followed by a corresponding
- <code class="code">RunQueries()</code>, then returning the first array element of matches
- (from the first, and only, query.)
- </p></div>
- <div class="sect2" title="9.6.2. AddQuery"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-addquery"></a>9.6.2. AddQuery</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function AddQuery ( $query, $index="*", $comment="" )</p><p>
- Adds additional query with current settings to multi-query batch.
- <code class="code">$query</code> is a query string. <code class="code">$index</code> is an index name (or names) string.
- Additionally if provided, the contents of <code class="code">$comment</code> are sent to the query log,
- marked in square brackets, just before the search terms, which can be very useful for debugging.
- Currently, this is limited to 128 characters.
- Returns index to results array returned from <a class="link" href="#api-func-runqueries" title="9.6.3. RunQueries">RunQueries()</a>.
- </p><p>
- Batch queries (or multi-queries) enable <code class="filename">searchd</code> to perform internal
- optimizations if possible. They also reduce network connection overheads and search process
- creation overheads in all cases. They do not result in any additional overheads compared
- to simple queries. Thus, if you run several different queries from your web page,
- you should always consider using multi-queries.
- </p><p>
- For instance, running the same full-text query but with different
- sorting or group-by settings will enable <code class="filename">searchd</code>
- to perform expensive full-text search and ranking operation only once,
- but compute multiple group-by results from its output.
- </p><p>
- This can be a big saver when you need to display not just plain
- search results but also some per-category counts, such as the amount of
- products grouped by vendor. Without multi-query, you would have to run several
- queries which perform essentially the same search and retrieve the
- same matches, but create result sets differently. With multi-query,
- you simply pass all these queries in a single batch and Sphinx
- optimizes the redundant full-text search internally.
- </p><p>
- <code class="code">AddQuery()</code> internally saves full current settings state
- along with the query, and you can safely change them afterwards for subsequent
- <code class="code">AddQuery()</code> calls. Already added queries will not be affected;
- there's actually no way to change them at all. Here's an example:
- </p><pre class="programlisting">
- $cl->SetSortMode ( SPH_SORT_RELEVANCE );
- $cl->AddQuery ( "hello world", "documents" );
- $cl->SetSortMode ( SPH_SORT_ATTR_DESC, "price" );
- $cl->AddQuery ( "ipod", "products" );
- $cl->AddQuery ( "harry potter", "books" );
- $results = $cl->RunQueries ();
- </pre><p>
- With the code above, 1st query will search for "hello world" in "documents" index
- and sort results by relevance, 2nd query will search for "ipod" in "products"
- index and sort results by price, and 3rd query will search for "harry potter"
- in "books" index while still sorting by price. Note that 2nd <code class="code">SetSortMode()</code> call
- does not affect the first query (because it's already added) but affects both other
- subsequent queries.
- </p><p>
- Additionally, any filters set up before an <code class="code">AddQuery()</code> will fall through to subsequent
- queries. So, if <code class="code">SetFilter()</code> is called before the first query, the same filter
- will be in place for the second (and subsequent) queries batched through <code class="code">AddQuery()</code>
- unless you call <code class="code">ResetFilters()</code> first. Alternatively, you can add additional filters
- as well.</p><p>This would also be true for grouping options and sorting options; no current sorting,
- filtering, and grouping settings are affected by this call; so subsequent queries will reuse
- current query settings.
- </p><p>
- <code class="code">AddQuery()</code> returns an index into an array of results
- that will be returned from <code class="code">RunQueries()</code> call. It is simply
- a sequentially increasing 0-based integer, ie. first call will return 0,
- second will return 1, and so on. Just a small helper so you won't have
- to track the indexes manually if you need then.
- </p></div>
- <div class="sect2" title="9.6.3. RunQueries"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-runqueries"></a>9.6.3. RunQueries</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function RunQueries ()</p><p>
- Connect to searchd, runs a batch of all queries added using <code class="code">AddQuery()</code>,
- obtains and returns the result sets. Returns false and sets <code class="code">GetLastError()</code>
- message on general error (such as network I/O failure). Returns a plain array
- of result sets on success.
- </p><p>
- Each result set in the returned array is exactly the same as
- the result set returned from <a class="link" href="#api-func-query" title="9.6.1. Query"><code class="code">Query()</code></a>.
- </p><p>
- Note that the batch query request itself almost always succeeds -
- unless there's a network error, blocking index rotation in progress,
- or another general failure which prevents the whole request from being
- processed.
- </p><p>
- However individual queries within the batch might very well fail.
- In this case their respective result sets will contain non-empty <code class="code">"error"</code> message,
- but no matches or query statistics. In the extreme case all queries within the batch
- could fail. There still will be no general error reported, because API was able to
- successfully connect to <code class="filename">searchd</code>, submit the batch, and receive
- the results - but every result set will have a specific error message.
- </p></div>
- <div class="sect2" title="9.6.4. ResetFilters"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-resetfilters"></a>9.6.4. ResetFilters</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function ResetFilters ()</p><p>
- Clears all currently set filters.
- </p><p>
- This call is only normally required when using multi-queries. You might want
- to set different filters for different queries in the batch. To do that,
- you should call <code class="code">ResetFilters()</code> and add new filters using
- the respective calls.
- </p></div>
- <div class="sect2" title="9.6.5. ResetGroupBy"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-resetgroupby"></a>9.6.5. ResetGroupBy</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function ResetGroupBy ()</p><p>
- Clears all currently group-by settings, and disables group-by.
- </p><p>
- This call is only normally required when using multi-queries.
- You can change individual group-by settings using <code class="code">SetGroupBy()</code>
- and <code class="code">SetGroupDistinct()</code> calls, but you can not disable
- group-by using those calls. <code class="code">ResetGroupBy()</code>
- fully resets previous group-by settings and disables group-by mode
- in the current state, so that subsequent <code class="code">AddQuery()</code>
- calls can perform non-grouping searches.
- </p></div></div>
- <div class="sect1" title="9.7. Additional functionality"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-additional-functionality"></a>9.7. Additional functionality</h2></div></div></div>
- <div class="sect2" title="9.7.1. BuildExcerpts"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-buildexcerpts"></a>9.7.1. BuildExcerpts</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function BuildExcerpts ( $docs, $index, $words, $opts=array() )</p><p>
- Excerpts (snippets) builder function. Connects to <code class="filename">searchd</code>,
- asks it to generate excerpts (snippets) from given documents, and returns the results.
- </p><p>
- <code class="code">$docs</code> is a plain array of strings that carry the documents' contents.
- <code class="code">$index</code> is an index name string. Different settings (such as charset,
- morphology, wordforms) from given index will be used.
- <code class="code">$words</code> is a string that contains the keywords to highlight. They will
- be processed with respect to index settings. For instance, if English stemming
- is enabled in the index, "shoes" will be highlighted even if keyword is "shoe".
- Starting with version 0.9.9-rc1, keywords can contain wildcards, that work similarly to
- star-syntax available in queries.
- <code class="code">$opts</code> is a hash which contains additional optional highlighting parameters:
- </p><div class="variablelist"><dl><dt><span class="term">"before_match":</span></dt>
- <dd><p>A string to insert before a keyword match. Starting with version 1.10-beta,
- a %PASSAGE_ID% macro can be used in this string. The macro is replaced with an incrementing
- passage number within a current snippet. Numbering starts at 1 by default but can be
- overridden with "start_passage_id" option. In a multi-document call, %PASSAGE_ID% would
- restart at every given document. Default is "<b>".</p></dd><dt><span class="term">"after_match":</span></dt>
- <dd><p>A string to insert after a keyword match. Starting with version 1.10-beta,
- a %PASSAGE_ID% macro can be used in this string. Default is "</b>".</p></dd><dt><span class="term">"chunk_separator":</span></dt>
- <dd><p>A string to insert between snippet chunks (passages). Default is " ... ".</p></dd><dt><span class="term">"limit":</span></dt>
- <dd><p>Maximum snippet size, in symbols (codepoints). Integer, default is 256.</p></dd><dt><span class="term">"around":</span></dt>
- <dd><p>How much words to pick around each matching keywords block. Integer, default is 5.</p></dd><dt><span class="term">"exact_phrase":</span></dt>
- <dd><p>Whether to highlight exact query phrase matches only instead of individual keywords. Boolean, default is false.</p></dd><dt><span class="term">"use_boundaries":</span></dt>
- <dd><p>Whether to additionally break passages by phrase
- boundary characters, as configured in index settings with
- <a class="link" href="#conf-phrase-boundary" title="12.2.25. phrase_boundary">phrase_boundary</a>
- directive. Boolean, default is false.
- </p></dd><dt><span class="term">"weight_order":</span></dt>
- <dd><p>Whether to sort the extracted passages in order of relevance (decreasing weight),
- or in order of appearance in the document (increasing position). Boolean, default is false.</p></dd><dt><span class="term">"query_mode":</span></dt>
- <dd><p>Added in version 1.10-beta. Whether to handle $words as a query in
- <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">extended syntax</a>, or as a bag of words
- (default behavior). For instance, in query mode ("one two" | "three four") will
- only highlight and include those occurrences "one two" or "three four" when
- the two words from each pair are adjacent to each other. In default mode,
- any single occurrence of "one", "two", "three", or "four" would be
- highlighted. Boolean, default is false.
- </p></dd><dt><span class="term">"force_all_words":</span></dt>
- <dd><p>Added in version 1.10-beta. Ignores the snippet length limit until it
- includes all the keywords. Boolean, default is false.
- </p></dd><dt><span class="term">"limit_passages":</span></dt>
- <dd><p>Added in version 1.10-beta. Limits the maximum number of passages
- that can be included into the snippet. Integer, default is 0 (no limit).
- </p></dd><dt><span class="term">"limit_words":</span></dt>
- <dd><p>Added in version 1.10-beta. Limits the maximum number of words
- that can be included into the snippet. Note the limit applies to any words, and
- not just the matched keywords to highlight. For example, if we are highlighting
- "Mary" and a passage "Mary had a little lamb" is selected, then it contributes
- 5 words to this limit, not just 1. Integer, default is 0 (no limit).
- </p></dd><dt><span class="term">"start_passage_id":</span></dt>
- <dd><p>Added in version 1.10-beta. Specifies the starting value of
- %PASSAGE_ID% macro (that gets detected and expanded in <code class="option">before_match</code>,
- <code class="option">after_match</code> strings). Integer, default is 1.
- </p></dd><dt><span class="term">"load_files":</span></dt>
- <dd><p>Added in version 1.10-beta. Whether to handle $docs as data
- to extract snippets from (default behavior), or to treat it as file names,
- and load data from specified files on the server side. Starting with
- version 2.0.1-beta, up to <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a>
- worker threads per request will be created to parallelize the work
- when this flag is enabled. Boolean, default is false. Starting with version 2.0.2-beta,
- building of the snippets could be parallelized between remote agents. Just set the <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">'dist_threads'</a> param in the config
- to the value greater than 1, and then invoke the snippets
- generation over the distributed index, which contain only one(!) <a class="link" href="#conf-local" title="12.2.30. local">local</a> agent and several remotes.
- Starting with version 2.1.1-beta, the <a class="link" href="#conf-snippets-file-prefix" title="12.4.28. snippets_file_prefix">snippets_file_prefix</a> option is
- also in the game and the final filename is calculated by concatenation of the prefix with given name.
- Otherwords, when snippets_file_prefix is '/var/data' and filename is 'text.txt' the sphinx will try to generate the snippets
- from the file '/var/datatext.txt', which is exactly '/var/data' + 'text.txt'.
- </p></dd><dt><span class="term">"load_files_scattered":</span></dt>
- <dd><p>Added in version 2.0.2-beta. It works only with distributed snippets generation
- with remote agents. The source files for snippets could be distributed among different agents, and the main daemon will merge
- together all non-erroneous results. So, if one agent of the distributed index has 'file1.txt', another has 'file2.txt' and you call for the snippets
- with both these files, the sphinx will merge results from the agents together, so you will get the snippets from both 'file1.txt' and 'file2.txt'.
- Boolean, default is false.
- </p><p>If the "load_files" is also set, the request will return the error in case if any of the files is not available anywhere. Otherwise (if "load_files" is not set)
- it will just return the empty strings for all absent files. The master instance reset this flag when distributes the snippets among agents. So, for agents the absence of a file
- is not critical error, but for the master it might be so. If you want to be sure that all snippets are actually created, set both "load_files_scattered" and "load_files". If the
- absence of some snippets caused by some agents is not critical for you - set just "load_files_scattered", leaving "load_files" not set.
- </p></dd><dt><span class="term">"html_strip_mode":</span></dt>
- <dd><p>Added in version 1.10-beta. HTML stripping mode setting.
- Defaults to "index", which means that index settings will be used.
- The other values are "none" and "strip", that forcibly skip or apply
- stripping irregardless of index settings; and "retain", that retains
- HTML markup and protects it from highlighting. The "retain" mode can
- only be used when highlighting full documents and thus requires that
- no snippet size limits are set. String, allowed values are "none",
- "strip", "index", and "retain".
- </p></dd><dt><span class="term">"allow_empty":</span></dt>
- <dd><p>Added in version 1.10-beta. Allows empty string to be
- returned as highlighting result when a snippet could not be generated
- (no keywords match, or no passages fit the limit). By default,
- the beginning of original text would be returned instead of an empty
- string. Boolean, default is false.
- </p></dd><dt><span class="term">"passage_boundary":</span></dt>
- <dd><p>Added in version 2.0.1-beta. Ensures that passages do not
- cross a sentence, paragraph, or zone boundary (when used with an index
- that has the respective indexing settings enabled). String, allowed
- values are "sentence", "paragraph", and "zone".
- </p></dd><dt><span class="term">"emit_zones":</span></dt>
- <dd><p>Added in version 2.0.1-beta. Emits an HTML tag with
- an enclosing zone name before each passage. Boolean, default is false.
- </p></dd></dl></div>
- <p>
- </p><p>
- Snippets extraction algorithm currently favors better passages
- (with closer phrase matches), and then passages with keywords not
- yet in snippet. Generally, it will try to highlight the best match
- with the query, and it will also to highlight all the query keywords,
- as made possible by the limits. In case the document does not match
- the query, beginning of the document trimmed down according to the
- limits will be return by default. Starting with 1.10-beta, you can
- also return an empty snippet instead case by setting "allow_empty"
- option to true.
- </p><p>
- Returns false on failure. Returns a plain array of strings with excerpts (snippets) on success.
- </p></div>
- <div class="sect2" title="9.7.2. UpdateAttributes"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-updateatttributes"></a>9.7.2. UpdateAttributes</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function UpdateAttributes ( $index, $attrs, $values, $mva=false, $ignorenonexistent=false )</p><p>
- Instantly updates given attribute values in given documents.
- Returns number of actually updated documents (0 or more) on success, or -1 on failure.
- </p><p>
- <code class="code">$index</code> is a name of the index (or indexes) to be updated.
- <code class="code">$attrs</code> is a plain array with string attribute names, listing attributes that are updated.
- <code class="code">$values</code> is a hash where key is document ID, and value is a plain array of new attribute values.
- Optional boolean parameter <code class="code">mva</code> points that there is update of MVA
- attributes. In this case the $values must be a dict with int key (document ID)
- and list of lists of int values (new MVA attribute values).
- Optional boolean parameter <code class="code">$ignorenonexistent</code>
- (added in version 2.1.1-beta) points that the
- update will silently ignore any warnings about trying to update a
- column which is not exists in current index schema. </p><p>
- <code class="code">$index</code> can be either a single index name or a list, like in <code class="code">Query()</code>.
- Unlike <code class="code">Query()</code>, wildcard is not allowed and all the indexes
- to update must be specified explicitly. The list of indexes can include
- distributed index names. Updates on distributed indexes will be pushed
- to all agents.
- </p><p>
- The updates only work with <code class="code">docinfo=extern</code> storage strategy.
- They are very fast because they're working fully in RAM, but they can also
- be made persistent: updates are saved on disk on clean <code class="filename">searchd</code>
- shutdown initiated by SIGTERM signal. With additional restrictions, updates
- are also possible on MVA attributes; refer to <a class="link" href="#conf-mva-updates-pool" title="12.4.14. mva_updates_pool">mva_updates_pool</a>
- directive for details.
- </p><p>
- Usage example:
- </p><pre class="programlisting">
- $cl->UpdateAttributes ( "test1", array("group_id"), array(1=>array(456)) );
- $cl->UpdateAttributes ( "products", array ( "price", "amount_in_stock" ),
- array ( 1001=>array(123,5), 1002=>array(37,11), 1003=>(25,129) ) );
- </pre><p>
- The first sample statement will update document 1 in index "test1", setting "group_id" to 456.
- The second one will update documents 1001, 1002 and 1003 in index "products". For document 1001,
- the new price will be set to 123 and the new amount in stock to 5; for document 1002, the new price
- will be 37 and the new amount will be 11; etc.
- </p></div>
- <div class="sect2" title="9.7.3. BuildKeywords"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-buildkeywords"></a>9.7.3. BuildKeywords</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function BuildKeywords ( $query, $index, $hits )</p><p>
- Extracts keywords from query using tokenizer settings for given index, optionally with per-keyword occurrence statistics.
- Returns an array of hashes with per-keyword information.
- </p><p>
- <code class="code">$query</code> is a query to extract keywords from.
- <code class="code">$index</code> is a name of the index to get tokenizing settings and keyword occurrence statistics from.
- <code class="code">$hits</code> is a boolean flag that indicates whether keyword occurrence statistics are required.
- </p><p>
- Usage example:
- </p><pre class="programlisting">
- $keywords = $cl->BuildKeywords ( "this.is.my query", "test1", false );
- </pre></div>
- <div class="sect2" title="9.7.4. EscapeString"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-escapestring"></a>9.7.4. EscapeString</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function EscapeString ( $string )</p><p>
- Escapes characters that are treated as special operators by the query language parser.
- Returns an escaped string.
- </p><p>
- <code class="code">$string</code> is a string to escape.
- </p><p>
- This function might seem redundant because it's trivial to implement in any calling
- application. However, as the set of special characters might change over time, it makes
- sense to have an API call that is guaranteed to escape all such characters at all times.
- </p><p>
- Usage example:
- </p><pre class="programlisting">
- $escaped = $cl->EscapeString ( "escaping-sample@query/string" );
- </pre></div>
- <div class="sect2" title="9.7.5. Status"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-status"></a>9.7.5. Status</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function Status ()</p><p>
- Queries searchd status, and returns an array of status variable name and value pairs.
- </p><p>
- Usage example:
- </p><pre class="programlisting">
- $status = $cl->Status ();
- foreach ( $status as $row )
- print join ( ": ", $row ) . "\n";
- </pre></div>
- <div class="sect2" title="9.7.6. FlushAttributes"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-flushattributes"></a>9.7.6. FlushAttributes</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function FlushAttributes ()</p><p>
- Forces <code class="filename">searchd</code> to flush pending attribute updates
- to disk, and blocks until completion. Returns a non-negative internal
- "flush tag" on success. Returns -1 and sets an error message on error.
- Introduced in version 1.10-beta.
- </p><p>
- Attribute values updated using <a class="link" href="#api-func-updateatttributes" title="9.7.2. UpdateAttributes">UpdateAttributes()</a>
- API call are only kept in RAM until a so-called flush (which writes
- the current, possibly updated attribute values back to disk). FlushAttributes()
- call lets you enforce a flush. The call will block until <code class="filename">searchd</code>
- finishes writing the data to disk, which might take seconds or even minutes
- depending on the total data size (.spa file size). All the currently updated
- indexes will be flushed.
- </p><p>
- Flush tag should be treated as an ever growing magic number that does not
- mean anything. It's guaranteed to be non-negative. It is guaranteed to grow over
- time, though not necessarily in a sequential fashion; for instance, two calls that
- return 10 and then 1000 respectively are a valid situation. If two calls to
- FlushAttrs() return the same tag, it means that there were no actual attribute
- updates in between them, and therefore current flushed state remained the same
- (for all indexes).
- </p><p>
- Usage example:
- </p><pre class="programlisting">
- $status = $cl->FlushAttributes ();
- if ( $status<0 )
- print "ERROR: " . $cl->GetLastError();
- </pre></div></div>
- <div class="sect1" title="9.8. Persistent connections"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="api-funcgroup-pconn"></a>9.8. Persistent connections</h2></div></div></div>
- <p>
- Persistent connections allow to use single network connection to run
- multiple commands that would otherwise require reconnects.
- </p><div class="sect2" title="9.8.1. Open"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-open"></a>9.8.1. Open</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function Open ()</p><p>
- Opens persistent connection to the server.
- </p></div>
- <div class="sect2" title="9.8.2. Close"><div class="titlepage"><div><div><h3 class="title"><a name="api-func-close"></a>9.8.2. Close</h3></div></div></div>
- <p><span class="bold"><strong>Prototype:</strong></span> function Close ()</p><p>
- Closes previously opened persistent connection.
- </p></div></div></div>
- <div class="chapter" title="Chapter 10. MySQL storage engine (SphinxSE)"><div class="titlepage"><div><div><h2 class="title"><a name="sphinxse"></a>Chapter 10. MySQL storage engine (SphinxSE)</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#sphinxse-overview">10.1. SphinxSE overview</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxse-installing">10.2. Installing SphinxSE</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#sphinxse-mysql50">10.2.1. Compiling MySQL 5.0.x with SphinxSE</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxse-mysql51">10.2.2. Compiling MySQL 5.1.x with SphinxSE</a></span></dt>
- <dt><span class="sect2"><a href="#sphinxse-checking">10.2.3. Checking SphinxSE installation</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#sphinxse-using">10.3. Using SphinxSE</a></span></dt>
- <dt><span class="sect1"><a href="#sphinxse-snippets">10.4. Building snippets (excerpts) via MySQL</a></span></dt>
- </dl></div>
- <div class="sect1" title="10.1. SphinxSE overview"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxse-overview"></a>10.1. SphinxSE overview</h2></div></div></div>
- <p>
- SphinxSE is MySQL storage engine which can be compiled
- into MySQL server 5.x using its pluggable architecture.
- It is not available for MySQL 4.x series. It also requires
- MySQL 5.0.22 or higher in 5.0.x series, or MySQL 5.1.12
- or higher in 5.1.x series.
- </p><p>
- Despite the name, SphinxSE does <span class="emphasis"><em>not</em></span>
- actually store any data itself. It is actually a built-in client
- which allows MySQL server to talk to <code class="filename">searchd</code>,
- run search queries, and obtain search results. All indexing and
- searching happen outside MySQL.
- </p><p>
- Obvious SphinxSE applications include:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>easier porting of MySQL FTS applications to Sphinx;</p></li>
- <li class="listitem"><p>allowing Sphinx use with programming languages for which native APIs are not available yet;</p></li>
- <li class="listitem"><p>optimizations when additional Sphinx result set processing on MySQL side is required
- (eg. JOINs with original document tables, additional MySQL-side filtering, etc).</p></li>
- </ul></div>
- <p>
- </p></div>
- <div class="sect1" title="10.2. Installing SphinxSE"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxse-installing"></a>10.2. Installing SphinxSE</h2></div></div></div>
- <p>
- You will need to obtain a copy of MySQL sources, prepare those,
- and then recompile MySQL binary.
- MySQL sources (mysql-5.x.yy.tar.gz) could be obtained from
- <a class="ulink" href="http://dev.mysql.com" target="_top">dev.mysql.com</a> Web site.
- </p><p>
- For some MySQL versions, there are delta tarballs with already
- prepared source versions available from Sphinx Web site. After unzipping
- those over original sources MySQL would be ready to be configured and
- built with Sphinx support.
- </p><p>
- If such tarball is not available, or does not work for you for any
- reason, you would have to prepare sources manually. You will need to
- GNU Autotools framework (autoconf, automake and libtool) installed
- to do that.
- </p><div class="sect2" title="10.2.1. Compiling MySQL 5.0.x with SphinxSE"><div class="titlepage"><div><div><h3 class="title"><a name="sphinxse-mysql50"></a>10.2.1. Compiling MySQL 5.0.x with SphinxSE</h3></div></div></div>
- <div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>copy <code class="filename">sphinx.5.0.yy.diff</code> patch file
- into MySQL sources directory and run
- </p><pre class="programlisting">
- patch -p1 < sphinx.5.0.yy.diff
- </pre><p>
- If there's no .diff file exactly for the specific version you need
- to build, try applying .diff with closest version numbers. It is important
- that the patch should apply with no rejects.
- </p></li>
- <li class="listitem"><p>in MySQL sources directory, run
- </p><pre class="programlisting">
- sh BUILD/autorun.sh
- </pre><p>
- </p></li>
- <li class="listitem"><p>in MySQL sources directory, create <code class="filename">sql/sphinx</code>
- directory in and copy all files in <code class="filename">mysqlse</code> directory
- from Sphinx sources there. Example:
- </p><pre class="programlisting">
- cp -R /root/builds/sphinx-0.9.7/mysqlse /root/builds/mysql-5.0.24/sql/sphinx
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- configure MySQL and enable Sphinx engine:
- </p><pre class="programlisting">
- ./configure --with-sphinx-storage-engine
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- build and install MySQL:
- </p><pre class="programlisting">
- make
- make install
- </pre><p>
- </p></li>
- </ol></div></div>
- <div class="sect2" title="10.2.2. Compiling MySQL 5.1.x with SphinxSE"><div class="titlepage"><div><div><h3 class="title"><a name="sphinxse-mysql51"></a>10.2.2. Compiling MySQL 5.1.x with SphinxSE</h3></div></div></div>
- <div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>in MySQL sources directory, create <code class="filename">storage/sphinx</code>
- directory in and copy all files in <code class="filename">mysqlse</code> directory
- from Sphinx sources there. Example:
- </p><pre class="programlisting">
- cp -R /root/builds/sphinx-0.9.7/mysqlse /root/builds/mysql-5.1.14/storage/sphinx
- </pre><p>
- </p></li>
- <li class="listitem"><p>in MySQL sources directory, run
- </p><pre class="programlisting">
- sh BUILD/autorun.sh
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- configure MySQL and enable Sphinx engine:
- </p><pre class="programlisting">
- ./configure --with-plugins=sphinx
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- build and install MySQL:
- </p><pre class="programlisting">
- make
- make install
- </pre><p>
- </p></li>
- </ol></div></div>
- <div class="sect2" title="10.2.3. Checking SphinxSE installation"><div class="titlepage"><div><div><h3 class="title"><a name="sphinxse-checking"></a>10.2.3. Checking SphinxSE installation</h3></div></div></div>
- <p>
- To check whether SphinxSE has been successfully compiled
- into MySQL, launch newly built servers, run mysql client and
- issue <code class="code">SHOW ENGINES</code> query. You should see a list
- of all available engines. Sphinx should be present and "Support"
- column should contain "YES":
- </p><pre class="programlisting">
- mysql> show engines;
- +------------+----------+-------------------------------------------------------------+
- | Engine | Support | Comment |
- +------------+----------+-------------------------------------------------------------+
- | MyISAM | DEFAULT | Default engine as of MySQL 3.23 with great performance |
- ...
- | SPHINX | YES | Sphinx storage engine |
- ...
- +------------+----------+-------------------------------------------------------------+
- 13 rows in set (0.00 sec)
- </pre></div></div>
- <div class="sect1" title="10.3. Using SphinxSE"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxse-using"></a>10.3. Using SphinxSE</h2></div></div></div>
- <p>
- To search via SphinxSE, you would need to create special ENGINE=SPHINX "search table",
- and then SELECT from it with full text query put into WHERE clause for query column.
- </p><p>
- Let's begin with an example create statement and search query:
- </p><pre class="programlisting">
- CREATE TABLE t1
- (
- id INTEGER UNSIGNED NOT NULL,
- weight INTEGER NOT NULL,
- query VARCHAR(3072) NOT NULL,
- group_id INTEGER,
- INDEX(query)
- ) ENGINE=SPHINX CONNECTION="sphinx://localhost:9312/test";
- SELECT * FROM t1 WHERE query='test it;mode=any';
- </pre><p>
- </p><p>
- First 3 columns of search table <span class="emphasis"><em>must</em></span> have a types of
- <code class="code">INTEGER UNSINGED</code> or <code class="code">BIGINT</code> for the 1st column (document id),
- <code class="code">INTEGER</code> or <code class="code">BIGINT</code> for the 2nd column (match weight), and
- <code class="code">VARCHAR</code> or <code class="code">TEXT</code> for the 3rd column (your query), respectively.
- This mapping is fixed; you can not omit any of these three required columns,
- or move them around, or change types. Also, query column must be indexed;
- all the others must be kept unindexed. Columns' names are ignored so you
- can use arbitrary ones.
- </p><p>
- Additional columns must be either <code class="code">INTEGER</code>, <code class="code">TIMESTAMP</code>,
- <code class="code">BIGINT</code>, <code class="code">VARCHAR</code>, or <code class="code">FLOAT</code>.
- They will be bound to attributes provided in Sphinx result set by name, so their
- names must match attribute names specified in <code class="filename">sphinx.conf</code>.
- If there's no such attribute name in Sphinx search results, column will have
- <code class="code">NULL</code> values.
- </p><p>
- Special "virtual" attributes names can also be bound to SphinxSE columns.
- <code class="code">_sph_</code> needs to be used instead of <code class="code">@</code> for that.
- For instance, to obtain the values of <code class="code">@groupby</code>, <code class="code">@count</code>,
- or <code class="code">@distinct</code> virtual attributes, use <code class="code">_sph_groupby</code>,
- <code class="code">_sph_count</code> or <code class="code">_sph_distinct</code> column names, respectively.
- </p><p>
- <code class="code">CONNECTION</code> string parameter can be used to specify default
- searchd host, port and indexes for queries issued using this table.
- If no connection string is specified in <code class="code">CREATE TABLE</code>,
- index name "*" (ie. search all indexes) and localhost:9312 are assumed.
- Connection string syntax is as follows:
- </p><pre class="programlisting">
- CONNECTION="sphinx://HOST:PORT/INDEXNAME"
- </pre><p>
- You can change the default connection string later:
- </p><pre class="programlisting">
- ALTER TABLE t1 CONNECTION="sphinx://NEWHOST:NEWPORT/NEWINDEXNAME";
- </pre><p>
- You can also override all these parameters per-query.
- </p><p>
- As seen in example, both query text and search options should be put
- into WHERE clause on search query column (ie. 3rd column); the options
- are separated by semicolons; and their names from values by equality sign.
- Any number of options can be specified. Available options are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>query - query text;</p></li>
- <li class="listitem"><p>mode - matching mode. Must be one of "all", "any", "phrase",
- "boolean", or "extended". Default is "all";</p></li>
- <li class="listitem"><p>sort - match sorting mode. Must be one of "relevance", "attr_desc",
- "attr_asc", "time_segments", or "extended". In all modes besides "relevance"
- attribute name (or sorting clause for "extended") is also required after a colon:
- </p><pre class="programlisting">
- ... WHERE query='test;sort=attr_asc:group_id';
- ... WHERE query='test;sort=extended:@weight desc, group_id asc';
- </pre><p>
- </p></li>
- <li class="listitem"><p>offset - offset into result set, default is 0;</p></li>
- <li class="listitem"><p>limit - amount of matches to retrieve from result set, default is 20;</p></li>
- <li class="listitem"><p>index - names of the indexes to search:
- </p><pre class="programlisting">
- ... WHERE query='test;index=test1;';
- ... WHERE query='test;index=test1,test2,test3;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>minid, maxid - min and max document ID to match;</p></li>
- <li class="listitem"><p>weights - comma-separated list of weights to be assigned to Sphinx full-text fields:
- </p><pre class="programlisting">
- ... WHERE query='test;weights=1,2,3;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>filter, !filter - comma-separated attribute name and a set of values to match:
- </p><pre class="programlisting">
- # only include groups 1, 5 and 19
- ... WHERE query='test;filter=group_id,1,5,19;';
- # exclude groups 3 and 11
- ... WHERE query='test;!filter=group_id,3,11;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>range, !range - comma-separated (integer or bigint) Sphinx attribute name,
- and min and max values to match:
- </p><pre class="programlisting">
- # include groups from 3 to 7, inclusive
- ... WHERE query='test;range=group_id,3,7;';
- # exclude groups from 5 to 25
- ... WHERE query='test;!range=group_id,5,25;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>floatrange, !floatrange - comma-separated (floating point) Sphinx attribute name,
- and min and max values to match:
- </p><pre class="programlisting">
- # filter by a float size
- ... WHERE query='test;floatrange=size,2,3;';
- # pick all results within 1000 meter from geoanchor
- ... WHERE query='test;floatrange=@geodist,0,1000;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>maxmatches - per-query max matches value, as in max_matches parameter to
- <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a> API call:
- </p><pre class="programlisting">
- ... WHERE query='test;maxmatches=2000;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>cutoff - maximum allowed matches, as in cutoff parameter to
- <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a> API call:
- </p><pre class="programlisting">
- ... WHERE query='test;cutoff=10000;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>maxquerytime - maximum allowed query time (in milliseconds), as in
- <a class="link" href="#api-func-setmaxquerytime" title="9.2.2. SetMaxQueryTime">SetMaxQueryTime()</a> API call:
- </p><pre class="programlisting">
- ... WHERE query='test;maxquerytime=1000;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>groupby - group-by function and attribute, corresponding to
- <a class="link" href="#api-func-setgroupby" title="9.5.1. SetGroupBy">SetGroupBy()</a> API call:
- </p><pre class="programlisting">
- ... WHERE query='test;groupby=day:published_ts;';
- ... WHERE query='test;groupby=attr:group_id;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>groupsort - group-by sorting clause:
- </p><pre class="programlisting">
- ... WHERE query='test;groupsort=@count desc;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>distinct - an attribute to compute COUNT(DISTINCT) for when doing group-by, as in
- <a class="link" href="#api-func-setgroupdistinct" title="9.5.2. SetGroupDistinct">SetGroupDistinct()</a> API call:
- </p><pre class="programlisting">
- ... WHERE query='test;groupby=attr:country_id;distinct=site_id';
- </pre><p>
- </p></li>
- <li class="listitem"><p>indexweights - comma-separated list of index names and weights
- to use when searching through several indexes:
- </p><pre class="programlisting">
- ... WHERE query='test;indexweights=idx_exact,2,idx_stemmed,1;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>fieldweights - comma-separated list of per-field weights
- that can be used by the ranker:
- </p><pre class="programlisting">
- ... WHERE query='test;fieldweights=title,10,abstract,3,content,1;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>comment - a string to mark this query in query log
- (mapping to $comment parameter in <a class="link" href="#api-func-query" title="9.6.1. Query">Query()</a> API call):
- </p><pre class="programlisting">
- ... WHERE query='test;comment=marker001;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>select - a string with expressions to compute
- (mapping to <a class="link" href="#api-func-setselect" title="9.2.4. SetSelect">SetSelect()</a> API call):
- </p><pre class="programlisting">
- ... WHERE query='test;select=2*a+3*b as myexpr;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>host, port - remote <code class="filename">searchd</code> host name
- and TCP port, respectively:
- </p><pre class="programlisting">
- ... WHERE query='test;host=sphinx-test.loc;port=7312;';
- </pre><p>
- </p></li>
- <li class="listitem"><p>ranker - a ranking function to use with "extended" matching mode,
- as in <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SetRankingMode()</a> API call
- (the only mode that supports full query syntax).
- Known values are "proximity_bm25", "bm25", "none", "wordcount", "proximity",
- "matchany", "fieldmask", "sph04" (starting with 1.10-beta),
- "expr:EXPRESSION" (starting with 2.0.4-release)
- syntax to support expression-based ranker (where EXPRESSION should be replaced
- with your specific ranking formula), and "export:EXPRESSION" (starting with 2.1.1-beta):
- </p><pre class="programlisting">
- ... WHERE query='test;mode=extended;ranker=bm25;';
- ... WHERE query='test;mode=extended;ranker=expr:sum(lcs);';
- </pre><p>
- The "export" ranker works exactly like ranker=expr, but it stores the per-document
- factor values, while ranker=expr discards them after computing the final WEIGHT() value.
- Note that ranker=export is meant to be used but rarely, only to train a ML (machine learning)
- function or to define your own ranking function by hand, and never in actual production. When using
- this ranker, you'll probably want to examine the output of the RANKFACTORS() function (added in
- version 2.1.1-beta) that produces a string with all the field level factors for each document.
- </p><pre class="programlisting">
- SELECT *, WEIGHT(), RANKFACTORS()
- FROM myindex
- WHERE MATCH('dog')
- OPTION ranker=export('100*bm25')
- </pre><p>would produce something like</p><pre class="programlisting">
- *************************** 1. row ***************************
- id: 555617
- published: 1110067331
- channel_id: 1059819
- title: 7
- content: 428
- weight(): 69900
- rankfactors(): bm25=699, bm25a=0.666478, field_mask=2,
- doc_word_count=1, field1=(lcs=1, hit_count=4, word_count=1,
- tf_idf=1.038127, min_idf=0.259532, max_idf=0.259532, sum_idf=0.259532,
- min_hit_pos=120, min_best_span_pos=120, exact_hit=0,
- max_window_hits=1), word1=(tf=4, idf=0.259532)
- *************************** 2. row ***************************
- id: 555313
- published: 1108438365
- channel_id: 1058561
- title: 8
- content: 249
- weight(): 68500
- rankfactors(): bm25=685, bm25a=0.675213, field_mask=3,
- doc_word_count=1, field0=(lcs=1, hit_count=1, word_count=1,
- tf_idf=0.259532, min_idf=0.259532, max_idf=0.259532, sum_idf=0.259532,
- min_hit_pos=8, min_best_span_pos=8, exact_hit=0, max_window_hits=1),
- field1=(lcs=1, hit_count=2, word_count=1, tf_idf=0.519063,
- min_idf=0.259532, max_idf=0.259532, sum_idf=0.259532, min_hit_pos=36,
- min_best_span_pos=36, exact_hit=0, max_window_hits=1), word1=(tf=3,
- idf=0.259532)
- </pre></li>
- <li class="listitem"><p>geoanchor - geodistance anchor, as in
- <a class="link" href="#api-func-setgeoanchor" title="9.4.5. SetGeoAnchor">SetGeoAnchor()</a> API call.
- Takes 4 parameters which are latitude and longitude attribute names,
- and anchor point coordinates respectively:
- </p><pre class="programlisting">
- ... WHERE query='test;geoanchor=latattr,lonattr,0.123,0.456';
- </pre><p>
- </p></li>
- </ul></div>
- <p>
- </p><p>
- One <span class="bold"><strong>very important</strong></span> note that it is
- <span class="bold"><strong>much</strong></span> more efficient to allow Sphinx
- to perform sorting, filtering and slicing the result set than to raise
- max matches count and use WHERE, ORDER BY and LIMIT clauses on MySQL
- side. This is for two reasons. First, Sphinx does a number of
- optimizations and performs better than MySQL on these tasks.
- Second, less data would need to be packed by searchd, transferred
- and unpacked by SphinxSE.
- </p><p>
- Starting with version 0.9.9-rc1, additional query info besides result set could be
- retrieved with <code class="code">SHOW ENGINE SPHINX STATUS</code> statement:
- </p><pre class="programlisting">
- mysql> SHOW ENGINE SPHINX STATUS;
- +--------+-------+-------------------------------------------------+
- | Type | Name | Status |
- +--------+-------+-------------------------------------------------+
- | SPHINX | stats | total: 25, total found: 25, time: 126, words: 2 |
- | SPHINX | words | sphinx:591:1256 soft:11076:15945 |
- +--------+-------+-------------------------------------------------+
- 2 rows in set (0.00 sec)
- </pre><p>
- This information can also be accessed through status variables. Note
- that this method does not require super-user privileges.
- </p><pre class="programlisting">
- mysql> SHOW STATUS LIKE 'sphinx_%';
- +--------------------+----------------------------------+
- | Variable_name | Value |
- +--------------------+----------------------------------+
- | sphinx_total | 25 |
- | sphinx_total_found | 25 |
- | sphinx_time | 126 |
- | sphinx_word_count | 2 |
- | sphinx_words | sphinx:591:1256 soft:11076:15945 |
- +--------------------+----------------------------------+
- 5 rows in set (0.00 sec)
- </pre><p>
- </p><p>
- You could perform JOINs on SphinxSE search table and tables using
- other engines. Here's an example with "documents" from example.sql:
- </p><pre class="programlisting">
- mysql> SELECT content, date_added FROM test.documents docs
- -> JOIN t1 ON (docs.id=t1.id)
- -> WHERE query="one document;mode=any";
- +-------------------------------------+---------------------+
- | content | docdate |
- +-------------------------------------+---------------------+
- | this is my test document number two | 2006-06-17 14:04:28 |
- | this is my test document number one | 2006-06-17 14:04:28 |
- +-------------------------------------+---------------------+
- 2 rows in set (0.00 sec)
- mysql> SHOW ENGINE SPHINX STATUS;
- +--------+-------+---------------------------------------------+
- | Type | Name | Status |
- +--------+-------+---------------------------------------------+
- | SPHINX | stats | total: 2, total found: 2, time: 0, words: 2 |
- | SPHINX | words | one:1:2 document:2:2 |
- +--------+-------+---------------------------------------------+
- 2 rows in set (0.00 sec)
- </pre><p>
- </p></div>
- <div class="sect1" title="10.4. Building snippets (excerpts) via MySQL"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="sphinxse-snippets"></a>10.4. Building snippets (excerpts) via MySQL</h2></div></div></div>
- <p>
- Starting with version 0.9.9-rc2, SphinxSE also includes a UDF function
- that lets you create snippets through MySQL. The functionality is fully
- similar to <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">BuildExcerprts</a>
- API call but accessible through MySQL+SphinxSE.
- </p><p>
- The binary that provides the UDF is named <code class="filename">sphinx.so</code>
- and should be automatically built and installed to proper location
- along with SphinxSE itself. If it does not get installed automatically
- for some reason, look for <code class="filename">sphinx.so</code> in the build
- directory and copy it to the plugins directory of your MySQL instance.
- After that, register the UDF using the following statement:
- </p><pre class="programlisting">
- CREATE FUNCTION sphinx_snippets RETURNS STRING SONAME 'sphinx.so';
- </pre><p>
- </p><p>
- Function name <span class="emphasis"><em>must</em></span> be sphinx_snippets,
- you can not use an arbitrary name. Function arguments are as follows:
- </p><p>
- <span class="bold"><strong>Prototype:</strong></span> function sphinx_snippets ( document, index, words, [options] );
- </p><p>
- Document and words arguments can be either strings or table columns.
- Options must be specified like this: <code class="code">'value' AS option_name</code>.
- For a list of supported options, refer to
- <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">BuildExcerprts()</a> API call.
- The only UDF-specific additional option is named <code class="code">'sphinx'</code>
- and lets you specify searchd location (host and port).
- </p><p>
- Usage examples:
- </p><pre class="programlisting">
- SELECT sphinx_snippets('hello world doc', 'main', 'world',
- 'sphinx://192.168.1.1/' AS sphinx, true AS exact_phrase,
- '[b]' AS before_match, '[/b]' AS after_match)
- FROM documents;
- SELECT title, sphinx_snippets(text, 'index', 'mysql php') AS text
- FROM sphinx, documents
- WHERE query='mysql php' AND sphinx.id=documents.id;
- </pre><p>
- </p></div></div>
- <div class="chapter" title="Chapter 11. Reporting bugs"><div class="titlepage"><div><div><h2 class="title"><a name="reporting-bugs"></a>Chapter 11. Reporting bugs</h2></div></div></div>
- <p>
- Unfortunately, Sphinx is not yet 100% bug free (even though we're working hard
- towards that), so you might occasionally run into some issues.
- </p><p>
- Reporting as much as possible about each bug is very important -
- because to fix it, we need to be able either to reproduce and fix the bug,
- or to deduce what's causing it from the information that you provide.
- So here are some instructions on how to do that.
- </p><h2><a name="idp32999280"></a>Bug-tracker</h2><p>Nothing special to say here. Here is the
- <a href="http://sphinxsearch.com/bugs">link</a>. Create a new
- ticket and describe your bug in details so both you and developers can
- save their time.</p><h2><a name="idp33000656"></a>Crashes</h2><p>In case of crashes we sometimes can get enough info to fix from
- backtrace.</p><p>Sphinx tries to write crash backtrace to its log file. It may look like
- this:
- </p><pre class="programlisting">
- ./indexer(_Z12sphBacktraceib+0x2d6)[0x5d337e]
- ./indexer(_Z7sigsegvi+0xbc)[0x4ce26a]
- /lib64/libpthread.so.0[0x3f75a0dd40]
- /lib64/libc.so.6(fwrite+0x34)[0x3f74e5f564]
- ./indexer(_ZN27CSphCharsetDefinitionParser5ParseEPKcR10CSphVectorI14CSphRemapRange16CSphVe
- ctorPolicyIS3_EE+0x5b)[0x51701b]
- ./indexer(_ZN13ISphTokenizer14SetCaseFoldingEPKcR10CSphString+0x62)[0x517e4c]
- ./indexer(_ZN17CSphTokenizerBase14SetCaseFoldingEPKcR10CSphString+0xbd)[0x518283]
- ./indexer(_ZN18CSphTokenizer_SBCSILb0EEC1Ev+0x3f)[0x5b312b]
- ./indexer(_Z22sphCreateSBCSTokenizerv+0x20)[0x51835c]
- ./indexer(_ZN13ISphTokenizer6CreateERK21CSphTokenizerSettingsPK17CSphEmbeddedFilesR10CSphS
- tring+0x47)[0x5183d7]
- ./indexer(_Z7DoIndexRK17CSphConfigSectionPKcRK17SmallStringHash_TIS_EbP8_IO_FILE+0x494)[0x
- 4d31c8]
- ./indexer(main+0x1a17)[0x4d6719]
- /lib64/libc.so.6(__libc_start_main+0xf4)[0x3f74e1d8a4]
- ./indexer(__gxx_personality_v0+0x231)[0x4cd779]
- </pre><p>
- This is an example of a good backtrace - we can see mangled function names
- here.</p><p>But sometimes backtrace may look like this:
- </p><pre class="programlisting">
- /opt/piler/bin/indexer[0x4c4919]
- /opt/piler/bin/indexer[0x405cf0]
- /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcb0)[0x7fc659cb6cb0]
- /opt/piler/bin/indexer[0x4237fd]
- /opt/piler/bin/indexer[0x491de6]
- /opt/piler/bin/indexer[0x451704]
- /opt/piler/bin/indexer[0x40861a]
- /opt/piler/bin/indexer[0x40442c]
- /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7fc6588aa76d]
- /opt/piler/bin/indexer[0x405b89]
- </pre><p>
- Developers can get nothing useful from those cryptic numbers. They're
- ordinary humans and want to see function names. To help them you need to
- provide symbols (function and variable names). If you've installed sphinx by
- building from the sources, run the following command over your binary:
- </p><pre class="programlisting">
- nm -n indexer > indexer.sym
- </pre><p>
- Attach this file to bug report along with backtrace. You should however ensure
- that the binary is not stripped. Our official binary packages should be fine.
- (That, or we have the symbols stored.) However, if you manually build Sphinx
- from the source tarball, do not run <code class="filename">strip</code> utility on that
- binary, and/or do not let your build/packaging system do that!</p><h2><a name="idp33007312"></a>Uploading your data</h2><p>To fix your bug developers often need to reproduce it on their machines.
- To do this they need your sphinx.conf, index files, binlog (if present),
- sometimes data to index (like SQL tables or XMLpipe2 data files) and queries.
- </p><p>
- Attach your data to ticket. In case it's too big to attach ask developers and
- they give you an address to write-only FTP created exactly for such puproses.
- </p></div>
- <div class="chapter" title="Chapter 12. sphinx.conf options reference"><div class="titlepage"><div><div><h2 class="title"><a name="conf-reference"></a>Chapter 12. <code class="filename">sphinx.conf</code> options reference</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#confgroup-source">12.1. Data source configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-source-type">12.1.1. type</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-host">12.1.2. sql_host</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-port">12.1.3. sql_port</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-user">12.1.4. sql_user</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-pass">12.1.5. sql_pass</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-db">12.1.6. sql_db</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-sock">12.1.7. sql_sock</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-connect-flags">12.1.8. mysql_connect_flags</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-ssl">12.1.9. mysql_ssl_cert, mysql_ssl_key, mysql_ssl_ca</a></span></dt>
- <dt><span class="sect2"><a href="#conf-odbc-dsn">12.1.10. odbc_dsn</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-pre">12.1.11. sql_query_pre</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query">12.1.12. sql_query</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-joined-field">12.1.13. sql_joined_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-range">12.1.14. sql_query_range</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-range-step">12.1.15. sql_range_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-killlist">12.1.16. sql_query_killlist</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-uint">12.1.17. sql_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-bool">12.1.18. sql_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-bigint">12.1.19. sql_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-timestamp">12.1.20. sql_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-float">12.1.21. sql_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-multi">12.1.22. sql_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-string">12.1.23. sql_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-attr-json">12.1.24. sql_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-column-buffers">12.1.25. sql_column_buffers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-field-string">12.1.26. sql_field_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-file-field">12.1.27. sql_file_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-post">12.1.28. sql_query_post</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-query-post-index">12.1.29. sql_query_post_index</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sql-ranged-throttle">12.1.30. sql_ranged_throttle</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-command">12.1.31. xmlpipe_command</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-field">12.1.32. xmlpipe_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-field-string">12.1.33. xmlpipe_field_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-uint">12.1.34. xmlpipe_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-bigint">12.1.35. xmlpipe_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-bool">12.1.36. xmlpipe_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-timestamp">12.1.37. xmlpipe_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-float">12.1.38. xmlpipe_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-multi">12.1.39. xmlpipe_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-multi-64">12.1.40. xmlpipe_attr_multi_64</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-string">12.1.41. xmlpipe_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-attr-json">12.1.42. xmlpipe_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-xmlpipe-fixup-utf8">12.1.43. xmlpipe_fixup_utf8</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mssql-winauth">12.1.44. mssql_winauth</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-zlib">12.1.45. unpack_zlib</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-mysqlcompress">12.1.46. unpack_mysqlcompress</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unpack-mysqlcompress-maxsize">12.1.47. unpack_mysqlcompress_maxsize</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-index">12.2. Index configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-index-type">12.2.1. type</a></span></dt>
- <dt><span class="sect2"><a href="#conf-source">12.2.2. source</a></span></dt>
- <dt><span class="sect2"><a href="#conf-path">12.2.3. path</a></span></dt>
- <dt><span class="sect2"><a href="#conf-docinfo">12.2.4. docinfo</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mlock">12.2.5. mlock</a></span></dt>
- <dt><span class="sect2"><a href="#conf-morphology">12.2.6. morphology</a></span></dt>
- <dt><span class="sect2"><a href="#conf-dict">12.2.7. dict</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-sp">12.2.8. index_sp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-zones">12.2.9. index_zones</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-stemming-len">12.2.10. min_stemming_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopwords">12.2.11. stopwords</a></span></dt>
- <dt><span class="sect2"><a href="#conf-wordforms">12.2.12. wordforms</a></span></dt>
- <dt><span class="sect2"><a href="#conf-embedded-limit">12.2.13. embedded_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-exceptions">12.2.14. exceptions</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-word-len">12.2.15. min_word_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-charset-table">12.2.16. charset_table</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ignore-chars">12.2.17. ignore_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-prefix-len">12.2.18. min_prefix_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-min-infix-len">12.2.19. min_infix_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-substring-len">12.2.20. max_substring_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-prefix-fields">12.2.21. prefix_fields</a></span></dt>
- <dt><span class="sect2"><a href="#conf-infix-fields">12.2.22. infix_fields</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ngram-len">12.2.23. ngram_len</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ngram-chars">12.2.24. ngram_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-phrase-boundary">12.2.25. phrase_boundary</a></span></dt>
- <dt><span class="sect2"><a href="#conf-phrase-boundary-step">12.2.26. phrase_boundary_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-strip">12.2.27. html_strip</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-index-attrs">12.2.28. html_index_attrs</a></span></dt>
- <dt><span class="sect2"><a href="#conf-html-remove-elements">12.2.29. html_remove_elements</a></span></dt>
- <dt><span class="sect2"><a href="#conf-local">12.2.30. local</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent">12.2.31. agent</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-persistent">12.2.32. agent_persistent</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-blackhole">12.2.33. agent_blackhole</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-connect-timeout">12.2.34. agent_connect_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-query-timeout">12.2.35. agent_query_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-preopen">12.2.36. preopen</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-enable">12.2.37. inplace_enable</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-hit-gap">12.2.38. inplace_hit_gap</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-docinfo-gap">12.2.39. inplace_docinfo_gap</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-reloc-factor">12.2.40. inplace_reloc_factor</a></span></dt>
- <dt><span class="sect2"><a href="#conf-inplace-write-factor">12.2.41. inplace_write_factor</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-exact-words">12.2.42. index_exact_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-overshort-step">12.2.43. overshort_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopword-step">12.2.44. stopword_step</a></span></dt>
- <dt><span class="sect2"><a href="#conf-hitless-words">12.2.45. hitless_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-expand-keywords">12.2.46. expand_keywords</a></span></dt>
- <dt><span class="sect2"><a href="#conf-blend-chars">12.2.47. blend_chars</a></span></dt>
- <dt><span class="sect2"><a href="#conf-blend-mode">12.2.48. blend_mode</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-mem-limit">12.2.49. rt_mem_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-field">12.2.50. rt_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-uint">12.2.51. rt_attr_uint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-bool">12.2.52. rt_attr_bool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-bigint">12.2.53. rt_attr_bigint</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-float">12.2.54. rt_attr_float</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-multi">12.2.55. rt_attr_multi</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-multi-64">12.2.56. rt_attr_multi_64</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-timestamp">12.2.57. rt_attr_timestamp</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-string">12.2.58. rt_attr_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-attr-json">12.2.59. rt_attr_json</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-strategy">12.2.60. ha_strategy</a></span></dt>
- <dt><span class="sect2"><a href="#conf-bigram-freq-words">12.2.61. bigram_freq_words</a></span></dt>
- <dt><span class="sect2"><a href="#conf-bigram-index">12.2.62. bigram_index</a></span></dt>
- <dt><span class="sect2"><a href="#conf-index-field-lengths">12.2.63. index_field_lengths</a></span></dt>
- <dt><span class="sect2"><a href="#conf-regexp-filter">12.2.64. regexp_filter</a></span></dt>
- <dt><span class="sect2"><a href="#conf-stopwords-unstemmed">12.2.65. stopwords_unstemmed</a></span></dt>
- <dt><span class="sect2"><a href="#conf-global-idf">12.2.66. global_idf</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-context">12.2.67. rlp_context</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ondisk-attrs">12.2.68. ondisk_attrs</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-indexer">12.3. <code class="filename">indexer</code> program configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-mem-limit">12.3.1. mem_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-iops">12.3.2. max_iops</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-iosize">12.3.3. max_iosize</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-xmlpipe2-field">12.3.4. max_xmlpipe2_field</a></span></dt>
- <dt><span class="sect2"><a href="#conf-write-buffer">12.3.5. write_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-file-field-buffer">12.3.6. max_file_field_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-on-file-field-error">12.3.7. on_file_field_error</a></span></dt>
- <dt><span class="sect2"><a href="#conf-lemmatizer-cache">12.3.8. lemmatizer_cache</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-searchd">12.4. <code class="filename">searchd</code> program configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-listen">12.4.1. listen</a></span></dt>
- <dt><span class="sect2"><a href="#conf-log">12.4.2. log</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log">12.4.3. query_log</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log-format">12.4.4. query_log_format</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-timeout">12.4.5. read_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-client-timeout">12.4.6. client_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-children">12.4.7. max_children</a></span></dt>
- <dt><span class="sect2"><a href="#conf-pid-file">12.4.8. pid_file</a></span></dt>
- <dt><span class="sect2"><a href="#conf-seamless-rotate">12.4.9. seamless_rotate</a></span></dt>
- <dt><span class="sect2"><a href="#conf-preopen-indexes">12.4.10. preopen_indexes</a></span></dt>
- <dt><span class="sect2"><a href="#conf-unlink-old">12.4.11. unlink_old</a></span></dt>
- <dt><span class="sect2"><a href="#conf-attr-flush-period">12.4.12. attr_flush_period</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-packet-size">12.4.13. max_packet_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mva-updates-pool">12.4.14. mva_updates_pool</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-filters">12.4.15. max_filters</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-filter-values">12.4.16. max_filter_values</a></span></dt>
- <dt><span class="sect2"><a href="#conf-listen-backlog">12.4.17. listen_backlog</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-buffer">12.4.18. read_buffer</a></span></dt>
- <dt><span class="sect2"><a href="#conf-read-unhinted">12.4.19. read_unhinted</a></span></dt>
- <dt><span class="sect2"><a href="#conf-max-batch-queries">12.4.20. max_batch_queries</a></span></dt>
- <dt><span class="sect2"><a href="#conf-subtree-docs-cache">12.4.21. subtree_docs_cache</a></span></dt>
- <dt><span class="sect2"><a href="#conf-subtree-hits-cache">12.4.22. subtree_hits_cache</a></span></dt>
- <dt><span class="sect2"><a href="#conf-workers">12.4.23. workers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-dist-threads">12.4.24. dist_threads</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-path">12.4.25. binlog_path</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-flush">12.4.26. binlog_flush</a></span></dt>
- <dt><span class="sect2"><a href="#conf-binlog-max-log-size">12.4.27. binlog_max_log_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-snippets-file-prefix">12.4.28. snippets_file_prefix</a></span></dt>
- <dt><span class="sect2"><a href="#conf-collation-server">12.4.29. collation_server</a></span></dt>
- <dt><span class="sect2"><a href="#conf-collation-libc-locale">12.4.30. collation_libc_locale</a></span></dt>
- <dt><span class="sect2"><a href="#conf-plugin-dir">12.4.31. plugin_dir</a></span></dt>
- <dt><span class="sect2"><a href="#conf-mysql-version-string">12.4.32. mysql_version_string</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-flush-period">12.4.33. rt_flush_period</a></span></dt>
- <dt><span class="sect2"><a href="#conf-thread-stack">12.4.34. thread_stack</a></span></dt>
- <dt><span class="sect2"><a href="#conf-expansion-limit">12.4.35. expansion_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-watchdog">12.4.36. watchdog</a></span></dt>
- <dt><span class="sect2"><a href="#conf-prefork-rotation-throttle">12.4.37. prefork_rotation_throttle</a></span></dt>
- <dt><span class="sect2"><a href="#conf-sphinxql-state">12.4.38. sphinxql_state</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-ping-interval">12.4.39. ha_ping_interval</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ha-period-karma">12.4.40. ha_period_karma</a></span></dt>
- <dt><span class="sect2"><a href="#conf-persistent-connections-limit">12.4.41. persistent_connections_limit</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-merge-iops">12.4.42. rt_merge_iops</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rt-merge-maxiosize">12.4.43. rt_merge_maxiosize</a></span></dt>
- <dt><span class="sect2"><a href="#conf-predicted-time-costs">12.4.44. predicted_time_costs</a></span></dt>
- <dt><span class="sect2"><a href="#conf-shutdown-timeout">12.4.45. shutdown_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-ondisk-attrs-default">12.4.46. ondisk_attrs_default</a></span></dt>
- <dt><span class="sect2"><a href="#conf-query-log-min-msec">12.4.47. query_log_min_msec</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-connect-timeout-default">12.4.48. agent_connect_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-query-timeout-default">12.4.49. agent_query_timeout</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-retry-count">12.4.50. agent_retry_count</a></span></dt>
- <dt><span class="sect2"><a href="#conf-agent-retry-delay">12.4.51. agent_retry_delay</a></span></dt>
- </dl></dd><dt><span class="sect1"><a href="#confgroup-common">12.5. Common section configuration options</a></span></dt>
- <dd><dl><dt><span class="sect2"><a href="#conf-lemmatizer-base">12.5.1. lemmatizer_base</a></span></dt>
- <dt><span class="sect2"><a href="#conf-on-json-attr-error">12.5.2. on_json_attr_error</a></span></dt>
- <dt><span class="sect2"><a href="#conf-json-autoconv-numbers">12.5.3. json_autoconv_numbers</a></span></dt>
- <dt><span class="sect2"><a href="#conf-json-autoconv-keynames">12.5.4. json_autoconv_keynames</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-root">12.5.5. rlp_root</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-environment">12.5.6. rlp_environment</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-max-batch-size">12.5.7. rlp_max_batch_size</a></span></dt>
- <dt><span class="sect2"><a href="#conf-rlp-max-batch-docs">12.5.8. rlp_max_batch_docs</a></span></dt>
- </dl></dd></dl></div>
- <div class="sect1" title="12.1. Data source configuration options"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="confgroup-source"></a>12.1. Data source configuration options</h2></div></div></div>
- <div class="sect2" title="12.1.1. type"><div class="titlepage"><div><div><h3 class="title"><a name="conf-source-type"></a>12.1.1. type</h3></div></div></div>
- <p>
- Data source type.
- Mandatory, no default value.
- Known types are <code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>,
- <code class="option">xmlpipe2</code>, tsvpipe, and <code class="option">odbc</code>.
- </p><p>
- All other per-source options depend on source type selected by this option.
- Names of the options used for SQL sources (ie. MySQL, PostgreSQL, MS SQL) start with "sql_";
- names of the ones used for xmlpipe2 or tsvpipe start with "xmlpipe_" and "tsvpipe_" correspondingly.
- All source types are conditional; they might or might
- not be supported depending on your build settings, installed client libraries, etc.
- <code class="option">mssql</code> type is currently only available on Windows.
- <code class="option">odbc</code> type is available both on Windows natively and on
- Linux through <a class="ulink" href="http://www.unixodbc.org/" target="_top">UnixODBC library</a>.
- </p><h4><a name="idp33017584"></a>Example:</h4><pre class="programlisting">
- type = mysql
- </pre></div>
- <div class="sect2" title="12.1.2. sql_host"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-host"></a>12.1.2. sql_host</h3></div></div></div>
- <p>
- SQL server host to connect to.
- Mandatory, no default value.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- In the simplest case when Sphinx resides on the same host with your MySQL
- or PostgreSQL installation, you would simply specify "localhost". Note that
- MySQL client library chooses whether to connect over TCP/IP or over UNIX
- socket based on the host name. Specifically "localhost" will force it
- to use UNIX socket (this is the default and generally recommended mode)
- and "127.0.0.1" will force TCP/IP usage. Refer to
- <a class="ulink" href="http://dev.mysql.com/doc/refman/5.0/en/mysql-real-connect.html" target="_top">MySQL manual</a>
- for more details.
- </p><h4><a name="idp33023200"></a>Example:</h4><pre class="programlisting">
- sql_host = localhost
- </pre></div>
- <div class="sect2" title="12.1.3. sql_port"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-port"></a>12.1.3. sql_port</h3></div></div></div>
- <p>
- SQL server IP port to connect to.
- Optional, default is 3306 for <code class="option">mysql</code> source type and 5432 for <code class="option">pgsql</code> type.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Note that it depends on <a class="link" href="#conf-sql-host" title="12.1.2. sql_host">sql_host</a> setting whether this value will actually be used.
- </p><h4><a name="idp33029072"></a>Example:</h4><pre class="programlisting">
- sql_port = 3306
- </pre></div>
- <div class="sect2" title="12.1.4. sql_user"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-user"></a>12.1.4. sql_user</h3></div></div></div>
- <p>
- SQL user to use when connecting to <a class="link" href="#conf-sql-host" title="12.1.2. sql_host">sql_host</a>.
- Mandatory, no default value.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><h4><a name="idp33033920"></a>Example:</h4><pre class="programlisting">
- sql_user = test
- </pre></div>
- <div class="sect2" title="12.1.5. sql_pass"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-pass"></a>12.1.5. sql_pass</h3></div></div></div>
- <p>
- SQL user password to use when connecting to <a class="link" href="#conf-sql-host" title="12.1.2. sql_host">sql_host</a>.
- Mandatory, no default value.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><h4><a name="idp33038736"></a>Example:</h4><pre class="programlisting">
- sql_pass = mysecretpassword
- </pre></div>
- <div class="sect2" title="12.1.6. sql_db"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-db"></a>12.1.6. sql_db</h3></div></div></div>
- <p>
- SQL database (in MySQL terms) to use after the connection and perform further queries within.
- Mandatory, no default value.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><h4><a name="idp33042848"></a>Example:</h4><pre class="programlisting">
- sql_db = test
- </pre></div>
- <div class="sect2" title="12.1.7. sql_sock"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-sock"></a>12.1.7. sql_sock</h3></div></div></div>
- <p>
- UNIX socket name to connect to for local SQL servers.
- Optional, default value is empty (use client library default settings).
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- On Linux, it would typically be <code class="filename">/var/lib/mysql/mysql.sock</code>.
- On FreeBSD, it would typically be <code class="filename">/tmp/mysql.sock</code>.
- Note that it depends on <a class="link" href="#conf-sql-host" title="12.1.2. sql_host">sql_host</a> setting whether this value will actually be used.
- </p><h4><a name="idp33049728"></a>Example:</h4><pre class="programlisting">
- sql_sock = /tmp/mysql.sock
- </pre></div>
- <div class="sect2" title="12.1.8. mysql_connect_flags"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mysql-connect-flags"></a>12.1.8. mysql_connect_flags</h3></div></div></div>
- <p>
- MySQL client connection flags.
- Optional, default value is 0 (do not set any flags).
- Applies to <code class="option">mysql</code> source type only.
- </p><p>
- This option must contain an integer value with the sum of the flags.
- The value will be passed to <a class="ulink" href="http://dev.mysql.com/doc/refman/5.0/en/mysql-real-connect.html" target="_top">mysql_real_connect()</a> verbatim.
- The flags are enumerated in mysql_com.h include file.
- Flags that are especially interesting in regard to indexing, with their respective values, are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>CLIENT_COMPRESS = 32; can use compression protocol</p></li>
- <li class="listitem"><p>CLIENT_SSL = 2048; switch to SSL after handshake</p></li>
- <li class="listitem"><p>CLIENT_SECURE_CONNECTION = 32768; new 4.1 authentication</p></li>
- </ul></div>
- <p>
- For instance, you can specify 2080 (2048+32) to use both compression and SSL,
- or 32768 to use new authentication only. Initially, this option was introduced
- to be able to use compression when the <code class="filename">indexer</code>
- and <code class="filename">mysqld</code> are on different hosts. Compression on 1 Gbps
- links is most likely to hurt indexing time though it reduces network traffic,
- both in theory and in practice. However, enabling compression on 100 Mbps links
- may improve indexing time significantly (upto 20-30% of the total indexing time
- improvement was reported). Your mileage may vary.
- </p><h4><a name="idp33058432"></a>Example:</h4><pre class="programlisting">
- mysql_connect_flags = 32 # enable compression
- </pre></div>
- <div class="sect2" title="12.1.9. mysql_ssl_cert, mysql_ssl_key, mysql_ssl_ca"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mysql-ssl"></a>12.1.9. mysql_ssl_cert, mysql_ssl_key, mysql_ssl_ca</h3></div></div></div>
- <p>
- SSL certificate settings to use for connecting to MySQL server.
- Optional, default values are empty strings (do not use SSL).
- Applies to <code class="option">mysql</code> source type only.
- </p><p>
- These directives let you set up secure SSL connection between
- <code class="filename">indexer</code> and MySQL. The details on creating
- the certificates and setting up MySQL server can be found in
- MySQL documentation.
- </p><h4><a name="idp33062912"></a>Example:</h4><pre class="programlisting">
- mysql_ssl_cert = /etc/ssl/client-cert.pem
- mysql_ssl_key = /etc/ssl/client-key.pem
- mysql_ssl_ca = /etc/ssl/cacert.pem
- </pre></div>
- <div class="sect2" title="12.1.10. odbc_dsn"><div class="titlepage"><div><div><h3 class="title"><a name="conf-odbc-dsn"></a>12.1.10. odbc_dsn</h3></div></div></div>
- <p>
- ODBC DSN to connect to.
- Mandatory, no default value.
- Applies to <code class="option">odbc</code> source type only.
- </p><p>
- ODBC DSN (Data Source Name) specifies the credentials (host, user, password, etc)
- to use when connecting to ODBC data source. The format depends on specific ODBC
- driver used.
- </p><h4><a name="idp33066640"></a>Example:</h4><pre class="programlisting">
- odbc_dsn = Driver={Oracle ODBC Driver};Dbq=myDBName;Uid=myUsername;Pwd=myPassword
- </pre></div>
- <div class="sect2" title="12.1.11. sql_query_pre"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query-pre"></a>12.1.11. sql_query_pre</h3></div></div></div>
- <p>
- Pre-fetch query, or pre-query.
- Multi-value, optional, default is empty list of queries.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Multi-value means that you can specify several pre-queries.
- They are executed before <a class="link" href="#conf-sql-query" title="12.1.12. sql_query">the main fetch query</a>,
- and they will be executed exactly in order of appearance in the configuration file.
- Pre-query results are ignored.
- </p><p>
- Pre-queries are useful in a lot of ways. They are used to setup encoding,
- mark records that are going to be indexed, update internal counters,
- set various per-connection SQL server options and variables, and so on.
- </p><p>
- Perhaps the most frequent pre-query usage is to specify the encoding
- that the server will use for the rows it returns. Note that Sphinx accepts
- only UTF-8 texts.
- Two MySQL specific examples of setting the encoding are:
- </p><pre class="programlisting">
- sql_query_pre = SET CHARACTER_SET_RESULTS=utf8
- sql_query_pre = SET NAMES utf8
- </pre><p>
- Also specific to MySQL sources, it is useful to disable query cache
- (for indexer connection only) in pre-query, because indexing queries
- are not going to be re-run frequently anyway, and there's no sense
- in caching their results. That could be achieved with:
- </p><pre class="programlisting">
- sql_query_pre = SET SESSION query_cache_type=OFF
- </pre><p>
- </p><h4><a name="idp33075104"></a>Example:</h4><pre class="programlisting">
- sql_query_pre = SET NAMES utf8
- sql_query_pre = SET SESSION query_cache_type=OFF
- </pre></div>
- <div class="sect2" title="12.1.12. sql_query"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query"></a>12.1.12. sql_query</h3></div></div></div>
- <p>
- Main document fetch query.
- Mandatory, no default value.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- There can be only one main query.
- This is the query which is used to retrieve documents from SQL server.
- You can specify up to 32 full-text fields (formally, upto SPH_MAX_FIELDS from sphinx.h), and an arbitrary amount of attributes.
- All of the columns that are neither document ID (the first one) nor attributes will be full-text indexed.
- </p><p>
- Document ID <span class="bold"><strong>MUST</strong></span> be the very first field,
- and it <span class="bold"><strong>MUST BE UNIQUE UNSIGNED POSITIVE (NON-ZERO, NON-NEGATIVE) INTEGER NUMBER</strong></span>.
- It can be either 32-bit or 64-bit, depending on how you built Sphinx;
- by default it builds with 32-bit IDs support but <code class="option">--enable-id64</code> option
- to <code class="filename">configure</code> allows to build with 64-bit document and word IDs support.
- </p><h4><a name="idp33083488"></a>Example:</h4><pre class="programlisting">
- sql_query = \
- SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, \
- title, content \
- FROM documents
- </pre></div>
- <div class="sect2" title="12.1.13. sql_joined_field"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-joined-field"></a>12.1.13. sql_joined_field</h3></div></div></div>
- <p>
- Joined/payload field fetch query.
- Multi-value, optional, default is empty list of queries.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- <code class="option">sql_joined_field</code> lets you use two different features:
- joined fields, and payloads (payload fields). It's syntax is as follows:
- </p><pre class="programlisting">
- sql_joined_field = FIELD-NAME 'from' ( 'query' | 'payload-query' \
- | 'ranged-query' ); QUERY [ ; RANGE-QUERY ]
- </pre><p>
- where
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>FIELD-NAME is a joined/payload field name;</p></li>
- <li class="listitem"><p>QUERY is an SQL query that must fetch values to index.</p></li>
- <li class="listitem"><p>RANGE-QUERY is an optional SQL query that fetches a range
- of values to index. (Added in version 2.0.1-beta.)</p></li>
- </ul></div>
- <p>
- </p><p>
- <span class="bold"><strong>Joined fields</strong></span> let you avoid JOIN and/or GROUP_CONCAT statements in the main
- document fetch query (sql_query). This can be useful when SQL-side JOIN is slow,
- or needs to be offloaded on Sphinx side, or simply to emulate MySQL-specific
- GROUP_CONCAT functionality in case your database server does not support it.
- </p><p>
- The query must return exactly 2 columns: document ID, and text to append
- to a joined field. Document IDs can be duplicate, but they <span class="bold"><strong>must</strong></span> be
- in ascending order. All the text rows fetched for a given ID will be
- concatenated together, and the concatenation result will be indexed
- as the entire contents of a joined field. Rows will be concatenated
- in the order returned from the query, and separating whitespace
- will be inserted between them. For instance, if joined field query
- returns the following rows:
- </p><pre class="programlisting">
- ( 1, 'red' )
- ( 1, 'right' )
- ( 1, 'hand' )
- ( 2, 'mysql' )
- ( 2, 'sphinx' )
- </pre><p>
- then the indexing results would be equivalent to that of adding
- a new text field with a value of 'red right hand' to document 1 and
- 'mysql sphinx' to document 2.
- </p><p>
- Joined fields are only indexed differently. There are no other differences
- between joined fields and regular text fields.
- </p><p>
- Starting with 2.0.1-beta, <span class="bold"><strong>ranged queries</strong></span> can be used when
- a single query is not efficient enough or does not work because of
- the database driver limitations. It works similar to the ranged
- queries in the main indexing loop, see <a class="xref" href="#ranged-queries" title="3.8. Ranged queries">Section 3.8, “Ranged queries”</a>.
- The range will be queried for and fetched upfront once,
- then multiple queries with different <code class="code">$start</code>
- and <code class="code">$end</code> substitutions will be run to fetch
- the actual data.
- </p><p>
- <span class="bold"><strong>Payloads</strong></span> let you create a special field in which, instead of
- keyword positions, so-called user payloads are stored. Payloads are
- custom integer values attached to every keyword. They can then be used
- in search time to affect the ranking.
- </p><p>
- The payload query must return exactly 3 columns: document ID; keyword;
- and integer payload value. Document IDs can be duplicate, but they <span class="bold"><strong>must</strong></span> be
- in ascending order. Payloads must be unsigned integers within 24-bit range,
- ie. from 0 to 16777215. For reference, payloads are currently internally
- stored as in-field keyword positions, but that is not guaranteed
- and might change in the future.
- </p><p>
- Currently, the only method to account for payloads is to use
- SPH_RANK_PROXIMITY_BM25 ranker. On indexes with payload fields,
- it will automatically switch to a variant that matches keywords
- in those fields, computes a sum of matched payloads multiplied
- by field weights, and adds that sum to the final rank.
- </p><h4><a name="idp33102560"></a>Example:</h4><pre class="programlisting">
- sql_joined_field = \
- tagstext from query; \
- SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
- sql_joined_field = bigint tag from ranged-query; \
- SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
- SELECT MIN(id), MAX(id) FROM tags ORDER BY docid ASC
- </pre></div>
- <div class="sect2" title="12.1.14. sql_query_range"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query-range"></a>12.1.14. sql_query_range</h3></div></div></div>
- <p>
- Range query setup.
- Optional, default is empty.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Setting this option enables ranged document fetch queries (see <a class="xref" href="#ranged-queries" title="3.8. Ranged queries">Section 3.8, “Ranged queries”</a>).
- Ranged queries are useful to avoid notorious MyISAM table locks when indexing
- lots of data. (They also help with other less notorious issues, such as reduced
- performance caused by big result sets, or additional resources consumed by InnoDB
- to serialize big read transactions.)
- </p><p>
- The query specified in this option must fetch min and max document IDs that will be
- used as range boundaries. It must return exactly two integer fields, min ID first
- and max ID second; the field names are ignored.
- </p><p>
- When ranged queries are enabled, <a class="link" href="#conf-sql-query" title="12.1.12. sql_query">sql_query</a>
- will be required to contain <code class="option">$start</code> and <code class="option">$end</code> macros
- (because it obviously would be a mistake to index the whole table many times over).
- Note that the intervals specified by <code class="option">$start</code>..<code class="option">$end</code>
- will not overlap, so you should <span class="bold"><strong>not</strong></span> remove document IDs that are
- exactly equal to <code class="option">$start</code> or <code class="option">$end</code> from your query.
- The example in <a class="xref" href="#ranged-queries" title="3.8. Ranged queries">Section 3.8, “Ranged queries”</a>) illustrates that; note how it
- uses greater-or-equal and less-or-equal comparisons.
- </p><h4><a name="idp33114560"></a>Example:</h4><pre class="programlisting">
- sql_query_range = SELECT MIN(id),MAX(id) FROM documents
- </pre></div>
- <div class="sect2" title="12.1.15. sql_range_step"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-range-step"></a>12.1.15. sql_range_step</h3></div></div></div>
- <p>
- Range query step.
- Optional, default is 1024.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Only used when <a class="link" href="#ranged-queries" title="3.8. Ranged queries">ranged queries</a> are enabled.
- The full document IDs interval fetched by <a class="link" href="#conf-sql-query-range" title="12.1.14. sql_query_range">sql_query_range</a>
- will be walked in this big steps. For example, if min and max IDs fetched
- are 12 and 3456 respectively, and the step is 1000, indexer will call
- <a class="link" href="#conf-sql-query" title="12.1.12. sql_query">sql_query</a> several times with the
- following substitutions:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>$start=12, $end=1011</p></li>
- <li class="listitem"><p>$start=1012, $end=2011</p></li>
- <li class="listitem"><p>$start=2012, $end=3011</p></li>
- <li class="listitem"><p>$start=3012, $end=3456</p></li>
- </ul></div>
- <p>
- </p><h4><a name="idp33124224"></a>Example:</h4><pre class="programlisting">
- sql_range_step = 1000
- </pre></div>
- <div class="sect2" title="12.1.16. sql_query_killlist"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query-killlist"></a>12.1.16. sql_query_killlist</h3></div></div></div>
- <p>
- Kill-list query.
- Optional, default is empty (no query).
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This query is expected to return a number of 1-column rows, each containing
- just the document ID. The returned document IDs are stored within an index.
- Kill-list for a given index suppresses results from <span class="emphasis"><em>other</em></span>
- indexes, depending on index order in the query. The intended use is to help
- implement deletions and updates on existing indexes without rebuilding
- (actually even touching them), and especially to fight phantom results
- problem.
- </p><p>
- Let us dissect an example. Assume we have two indexes, 'main' and 'delta'.
- Assume that documents 2, 3, and 5 were deleted since last reindex of 'main',
- and documents 7 and 11 were updated (ie. their text contents were changed).
- Assume that a keyword 'test' occurred in all these mentioned documents
- when we were indexing 'main'; still occurs in document 7 as we index 'delta';
- but does not occur in document 11 any more. We now reindex delta and then
- search through both these indexes in proper (least to most recent) order:
- </p><pre class="programlisting">
- $res = $cl->Query ( "test", "main delta" );
- </pre><p>
- </p><p>
- First, we need to properly handle deletions. The result set should not
- contain documents 2, 3, or 5. Second, we also need to avoid phantom results.
- Unless we do something about it, document 11 <span class="emphasis"><em>will</em></span>
- appear in search results! It will be found in 'main' (but not 'delta').
- And it will make it to the final result set unless something stops it.
- </p><p>
- Kill-list, or K-list for short, is that something. Kill-list attached
- to 'delta' will suppress the specified rows from <span class="bold"><strong>all</strong></span> the preceding
- indexes, in this case just 'main'. So to get the expected results,
- we should put all the updated <span class="emphasis"><em>and</em></span> deleted
- document IDs into it.
- </p><p>
- Note that in the distributed index setup, K-lists are <span class="bold"><strong>local
- to every node in the cluster</strong></span>. They are <span class="bold"><strong>not</strong></span> get transmitted
- over the network when sending queries. (Because that might be too much
- of an impact when the K-list is huge.) You will need to setup a
- separate per-server K-lists in that case.
- </p><h4><a name="idp33136032"></a>Example:</h4><pre class="programlisting">
- sql_query_killlist = \
- SELECT id FROM documents WHERE updated_ts>=@last_reindex UNION \
- SELECT id FROM documents_deleted WHERE deleted_ts>=@last_reindex
- </pre></div>
- <div class="sect2" title="12.1.17. sql_attr_uint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-uint"></a>12.1.17. sql_attr_uint</h3></div></div></div>
- <p>
- Unsigned integer <a class="link" href="#attributes" title="3.3. Attributes">attribute</a> declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- The column value should fit into 32-bit unsigned integer range.
- Values outside this range will be accepted but wrapped around.
- For instance, -1 will be wrapped around to 2^32-1 or 4,294,967,295.
- </p><p>
- You can specify bit count for integer attributes by appending
- ':BITCOUNT' to attribute name (see example below). Attributes with
- less than default 32-bit size, or bitfields, perform slower.
- But they require less RAM when using <a class="link" href="#conf-docinfo" title="12.2.4. docinfo">extern storage</a>:
- such bitfields are packed together in 32-bit chunks in <code class="filename">.spa</code>
- attribute data file. Bit size settings are ignored if using
- <a class="link" href="#conf-docinfo" title="12.2.4. docinfo">inline storage</a>.
- </p><h4><a name="idp33144720"></a>Example:</h4><pre class="programlisting">
- sql_attr_uint = group_id
- sql_attr_uint = forum_id:9 # 9 bits for forum_id
- </pre></div>
- <div class="sect2" title="12.1.18. sql_attr_bool"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-bool"></a>12.1.18. sql_attr_bool</h3></div></div></div>
- <p>
- Boolean <a class="link" href="#attributes" title="3.3. Attributes">attribute</a> declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Equivalent to <a class="link" href="#conf-sql-attr-uint" title="12.1.17. sql_attr_uint">sql_attr_uint</a> declaration with a bit count of 1.
- </p><h4><a name="idp33150576"></a>Example:</h4><pre class="programlisting">
- sql_attr_bool = is_deleted # will be packed to 1 bit
- </pre></div>
- <div class="sect2" title="12.1.19. sql_attr_bigint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-bigint"></a>12.1.19. sql_attr_bigint</h3></div></div></div>
- <p>
- 64-bit signed integer <a class="link" href="#attributes" title="3.3. Attributes">attribute</a> declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Note that unlike <a class="link" href="#conf-sql-attr-uint" title="12.1.17. sql_attr_uint">sql_attr_uint</a>,
- these values are <span class="bold"><strong>signed</strong></span>.
- Introduced in version 0.9.9-rc1.
- </p><h4><a name="idp33157008"></a>Example:</h4><pre class="programlisting">
- sql_attr_bigint = my_bigint_id
- </pre></div>
- <div class="sect2" title="12.1.20. sql_attr_timestamp"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-timestamp"></a>12.1.20. sql_attr_timestamp</h3></div></div></div>
- <p>
- UNIX timestamp <a class="link" href="#attributes" title="3.3. Attributes">attribute</a> declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Timestamps can store date and time in the range of Jan 01, 1970
- to Jan 19, 2038 with a precision of one second.
- The expected column value should be a timestamp in UNIX format, ie. 32-bit unsigned
- integer number of seconds elapsed since midnight, January 01, 1970, GMT.
- Timestamps are internally stored and handled as integers everywhere.
- But in addition to working with timestamps as integers, it's also legal
- to use them along with different date-based functions, such as time segments
- sorting mode, or day/week/month/year extraction for GROUP BY.
- </p><p>
- Note that DATE or DATETIME column types in MySQL can <span class="bold"><strong>not</strong></span> be directly
- used as timestamp attributes in Sphinx; you need to explicitly convert such
- columns using UNIX_TIMESTAMP function (if data is in range).
- </p><p>
- Note timestamps can not represent dates before January 01, 1970,
- and UNIX_TIMESTAMP() in MySQL will not return anything expected.
- If you only needs to work with dates, not times, consider TO_DAYS()
- function in MySQL instead.
- </p><h4><a name="idp33164624"></a>Example:</h4><pre class="programlisting">
- # sql_query = ... UNIX_TIMESTAMP(added_datetime) AS added_ts ...
- sql_attr_timestamp = added_ts
- </pre></div>
- <div class="sect2" title="12.1.21. sql_attr_float"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-float"></a>12.1.21. sql_attr_float</h3></div></div></div>
- <p>
- Floating point <a class="link" href="#attributes" title="3.3. Attributes">attribute</a> declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- The values will be stored in single precision, 32-bit IEEE 754 format.
- Represented range is approximately from 1e-38 to 1e+38. The amount
- of decimal digits that can be stored precisely is approximately 7.
- One important usage of the float attributes is storing latitude
- and longitude values (in radians), for further usage in query-time
- geosphere distance calculations.
- </p><h4><a name="idp33170288"></a>Example:</h4><pre class="programlisting">
- sql_attr_float = lat_radians
- sql_attr_float = long_radians
- </pre></div>
- <div class="sect2" title="12.1.22. sql_attr_multi"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-multi"></a>12.1.22. sql_attr_multi</h3></div></div></div>
- <p>
- <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">Multi-valued attribute</a> (MVA) declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Plain attributes only allow to attach 1 value per each document.
- However, there are cases (such as tags or categories) when it is
- desired to attach multiple values of the same attribute and be able
- to apply filtering or grouping to value lists.
- </p><p>
- The declaration format is as follows (backslashes are for clarity only;
- everything can be declared in a single line as well):
- </p><pre class="programlisting">
- sql_attr_multi = ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE \
- [;QUERY] \
- [;RANGE-QUERY]
- </pre><p>
- where
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>ATTR-TYPE is 'uint', 'bigint' or 'timestamp'</p></li>
- <li class="listitem"><p>SOURCE-TYPE is 'field', 'query', or 'ranged-query'</p></li>
- <li class="listitem"><p>QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs</p></li>
- <li class="listitem"><p>RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'</p></li>
- </ul></div>
- <p>
- </p><h4><a name="idp33179840"></a>Example:</h4><pre class="programlisting">
- sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
- sql_attr_multi = bigint tag from ranged-query; \
- SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
- SELECT MIN(id), MAX(id) FROM tags
- </pre></div>
- <div class="sect2" title="12.1.23. sql_attr_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-string"></a>12.1.23. sql_attr_string</h3></div></div></div>
- <p>
- String attribute declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 1.10-beta.
- </p><p>
- String attributes can store arbitrary strings attached to every document.
- There's a fixed size limit of 4 MB per value. Also, <code class="filename">searchd</code>
- will currently cache all the values in RAM, which is an additional implicit limit.
- </p><p>
- Starting from 2.0.1-beta string attributes can be used for sorting and
- grouping(ORDER BY, GROUP BY, WITHIN GROUP ORDER BY). Note that attributes
- declared using <code class="option">sql_attr_string</code> will <span class="bold"><strong>not</strong></span> be full-text
- indexed; you can use <a class="link" href="#conf-sql-field-string" title="12.1.26. sql_field_string">sql_field_string</a>
- directive for that.
- </p><h4><a name="idp33188160"></a>Example:</h4><pre class="programlisting">
- sql_attr_string = title # will be stored but will not be indexed
- </pre></div>
- <div class="sect2" title="12.1.24. sql_attr_json"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-attr-json"></a>12.1.24. sql_attr_json</h3></div></div></div>
- <p>
- JSON attribute declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 2.1.1-beta.
- </p><p>
- When indexing JSON attributes, Sphinx expects a text field
- with JSON formatted data. As of 2.2.1-beta JSON attributes supports arbitrary
- JSON data with no limitation in nested levels or types.
- </p><pre class="programlisting">
- {
- "id": 1,
- "gid": 2,
- "title": "some title",
- "tags": [
- "tag1",
- "tag2",
- "tag3"
- {
- "one": "two",
- "three": [4, 5]
- }
- ]
- }
- </pre><p>
- These attributes allow Sphinx to work with documents without a fixed set of
- attribute columns. When you filter on a key of a JSON attribute, documents
- that don't include the key will simply be ignored.
- </p><p>
- You can read more on JSON attributes in <a class="ulink" href="http://sphinxsearch.com/blog/2013/08/08/full-json-support-in-trunk/" target="_top">
- http://sphinxsearch.com/blog/2013/08/08/full-json-support-in-trunk/</a>.
- </p><h4><a name="idp33195168"></a>Example:</h4><pre class="programlisting">
- sql_attr_json = properties
- </pre></div>
- <div class="sect2" title="12.1.25. sql_column_buffers"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-column-buffers"></a>12.1.25. sql_column_buffers</h3></div></div></div>
- <p>
- Per-column buffer sizes.
- Optional, default is empty (deduce the sizes automatically).
- Applies to <code class="option">odbc</code>, <code class="option">mssql</code> source types only.
- Introduced in version 2.0.1-beta.
- </p><p>
- ODBC and MS SQL drivers sometimes can not return the maximum
- actual column size to be expected. For instance, NVARCHAR(MAX) columns
- always report their length as 2147483647 bytes to
- <code class="filename">indexer</code> even though the actually used length
- is likely considerably less. However, the receiving buffers still
- need to be allocated upfront, and their sizes have to be determined.
- When the driver does not report the column length at all, Sphinx
- allocates default 1 KB buffers for each non-char column, and 1 MB
- buffers for each char column. Driver-reported column length
- also gets clamped by an upper limit of 8 MB, so in case the
- driver reports (almost) a 2 GB column length, it will be clamped
- and a 8 MB buffer will be allocated instead for that column.
- These hard-coded limits can be overridden using the
- <code class="code">sql_column_buffers</code> directive, either in order
- to save memory on actually shorter columns, or overcome
- the 8 MB limit on actually longer columns. The directive values
- must be a comma-separated lists of selected column names and sizes:
- </p><pre class="programlisting">
- sql_column_buffers = <colname>=<size>[K|M] [, ...]
- </pre><p>
- </p><h4><a name="idp33202656"></a>Example:</h4><pre class="programlisting">
- sql_query = SELECT id, mytitle, mycontent FROM documents
- sql_column_buffers = mytitle=64K, mycontent=10M
- </pre></div>
- <div class="sect2" title="12.1.26. sql_field_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-field-string"></a>12.1.26. sql_field_string</h3></div></div></div>
- <p>
- Combined string attribute and full-text field declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 1.10-beta.
- </p><p>
- <a class="link" href="#conf-sql-attr-string" title="12.1.23. sql_attr_string">sql_attr_string</a> only stores the column
- value but does not full-text index it. In some cases it might be desired to both full-text
- index the column and store it as attribute. <code class="option">sql_field_string</code> lets you do
- exactly that. Both the field and the attribute will be named the same.
- </p><h4><a name="idp33208768"></a>Example:</h4><pre class="programlisting">
- sql_field_string = title # will be both indexed and stored
- </pre></div>
- <div class="sect2" title="12.1.27. sql_file_field"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-file-field"></a>12.1.27. sql_file_field</h3></div></div></div>
- <p>
- File based field declaration.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 1.10-beta.
- </p><p>
- This directive makes <code class="filename">indexer</code> interpret field contents
- as a file name, and load and index the referred file. Files larger than
- <a class="link" href="#conf-max-file-field-buffer" title="12.3.6. max_file_field_buffer">max_file_field_buffer</a>
- in size are skipped. Any errors during the file loading (IO errors, missed
- limits, etc) will be reported as indexing warnings and will <span class="bold"><strong>not</strong></span> early
- terminate the indexing. No content will be indexed for such files.
- </p><h4><a name="idp33215968"></a>Example:</h4><pre class="programlisting">
- sql_file_field = my_file_path # load and index files referred to by my_file_path
- </pre></div>
- <div class="sect2" title="12.1.28. sql_query_post"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query-post"></a>12.1.28. sql_query_post</h3></div></div></div>
- <p>
- Post-fetch query.
- Optional, default value is empty.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- This query is executed immediately after <a class="link" href="#conf-sql-query" title="12.1.12. sql_query">sql_query</a>
- completes successfully. When post-fetch query produces errors,
- they are reported as warnings, but indexing is <span class="bold"><strong>not</strong></span> terminated.
- It's result set is ignored. Note that indexing is <span class="bold"><strong>not</strong></span> yet completed
- at the point when this query gets executed, and further indexing still may fail.
- Therefore, any permanent updates should not be done from here.
- For instance, updates on helper table that permanently change
- the last successfully indexed ID should not be run from post-fetch
- query; they should be run from <a class="link" href="#conf-sql-query-post-index" title="12.1.29. sql_query_post_index">post-index query</a> instead.
- </p><h4><a name="idp33223904"></a>Example:</h4><pre class="programlisting">
- sql_query_post = DROP TABLE my_tmp_table
- </pre></div>
- <div class="sect2" title="12.1.29. sql_query_post_index"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-query-post-index"></a>12.1.29. sql_query_post_index</h3></div></div></div>
- <p>
- Post-index query.
- Optional, default value is empty.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- This query is executed when indexing is fully and successfully completed.
- If this query produces errors, they are reported as warnings,
- but indexing is <span class="bold"><strong>not</strong></span> terminated. It's result set is ignored.
- <code class="code">$maxid</code> macro can be used in its text; it will be
- expanded to maximum document ID which was actually fetched
- from the database during indexing. If no documents were indexed,
- $maxid will be expanded to 0.
- </p><h4><a name="idp33229808"></a>Example:</h4><pre class="programlisting">
- sql_query_post_index = REPLACE INTO counters ( id, val ) \
- VALUES ( 'max_indexed_id', $maxid )
- </pre></div>
- <div class="sect2" title="12.1.30. sql_ranged_throttle"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sql-ranged-throttle"></a>12.1.30. sql_ranged_throttle</h3></div></div></div>
- <p>
- Ranged query throttling period, in milliseconds.
- Optional, default is 0 (no throttling).
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- </p><p>
- Throttling can be useful when indexer imposes too much load on the
- database server. It causes the indexer to sleep for given amount of
- milliseconds once per each ranged query step. This sleep is unconditional,
- and is performed before the fetch query.
- </p><h4><a name="idp33234608"></a>Example:</h4><pre class="programlisting">
- sql_ranged_throttle = 1000 # sleep for 1 sec before each query step
- </pre></div>
- <div class="sect2" title="12.1.31. xmlpipe_command"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-command"></a>12.1.31. xmlpipe_command</h3></div></div></div>
- <p>
- Shell command that invokes xmlpipe2 stream producer.
- Mandatory.
- Applies to <code class="option">xmlpipe2</code> source types only.
- </p><p>
- Specifies a command that will be executed and which output
- will be parsed for documents. Refer to <a class="xref" href="#xmlpipe2" title="3.9. xmlpipe2 data source">Section 3.9, “xmlpipe2 data source”</a> for specific format description.
- </p><h4><a name="idp33239008"></a>Example:</h4><pre class="programlisting">
- xmlpipe_command = cat /home/sphinx/test.xml
- </pre></div>
- <div class="sect2" title="12.1.32. xmlpipe_field"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-field"></a>12.1.32. xmlpipe_field</h3></div></div></div>
- <p>
- xmlpipe field declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only. Refer to <a class="xref" href="#xmlpipe2" title="3.9. xmlpipe2 data source">Section 3.9, “xmlpipe2 data source”</a>.
- </p><h4><a name="idp33242864"></a>Example:</h4><pre class="programlisting">
- xmlpipe_field = subject
- xmlpipe_field = content
- </pre></div>
- <div class="sect2" title="12.1.33. xmlpipe_field_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-field-string"></a>12.1.33. xmlpipe_field_string</h3></div></div></div>
- <p>
- xmlpipe field and string attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only. Refer to <a class="xref" href="#xmlpipe2" title="3.9. xmlpipe2 data source">Section 3.9, “xmlpipe2 data source”</a>.
- Introduced in version 1.10-beta.
- </p><p>
- Makes the specified XML element indexed as both a full-text field and a string attribute.
- Equivalent to <sphinx:field name="field" attr="string"/> declaration within the XML file.
- </p><h4><a name="idp33247568"></a>Example:</h4><pre class="programlisting">
- xmlpipe_field_string = subject
- </pre></div>
- <div class="sect2" title="12.1.34. xmlpipe_attr_uint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-uint"></a>12.1.34. xmlpipe_attr_uint</h3></div></div></div>
- <p>
- xmlpipe integer attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Syntax fully matches that of <a class="link" href="#conf-sql-attr-uint" title="12.1.17. sql_attr_uint">sql_attr_uint</a>.
- </p><h4><a name="idp33251392"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_uint = author_id
- </pre></div>
- <div class="sect2" title="12.1.35. xmlpipe_attr_bigint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-bigint"></a>12.1.35. xmlpipe_attr_bigint</h3></div></div></div>
- <p>
- xmlpipe signed 64-bit integer attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Syntax fully matches that of <a class="link" href="#conf-sql-attr-bigint" title="12.1.19. sql_attr_bigint">sql_attr_bigint</a>.
- </p><h4><a name="idp33255600"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_bigint = my_bigint_id
- </pre></div>
- <div class="sect2" title="12.1.36. xmlpipe_attr_bool"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-bool"></a>12.1.36. xmlpipe_attr_bool</h3></div></div></div>
- <p>
- xmlpipe boolean attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Syntax fully matches that of <a class="link" href="#conf-sql-attr-bool" title="12.1.18. sql_attr_bool">sql_attr_bool</a>.
- </p><h4><a name="idp33259760"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_bool = is_deleted # will be packed to 1 bit
- </pre></div>
- <div class="sect2" title="12.1.37. xmlpipe_attr_timestamp"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-timestamp"></a>12.1.37. xmlpipe_attr_timestamp</h3></div></div></div>
- <p>
- xmlpipe UNIX timestamp attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Syntax fully matches that of <a class="link" href="#conf-sql-attr-timestamp" title="12.1.20. sql_attr_timestamp">sql_attr_timestamp</a>.
- </p><h4><a name="idp33263920"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_timestamp = published
- </pre></div>
- <div class="sect2" title="12.1.38. xmlpipe_attr_float"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-float"></a>12.1.38. xmlpipe_attr_float</h3></div></div></div>
- <p>
- xmlpipe floating point attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Syntax fully matches that of <a class="link" href="#conf-sql-attr-float" title="12.1.21. sql_attr_float">sql_attr_float</a>.
- </p><h4><a name="idp33267952"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_float = lat_radians
- xmlpipe_attr_float = long_radians
- </pre></div>
- <div class="sect2" title="12.1.39. xmlpipe_attr_multi"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-multi"></a>12.1.39. xmlpipe_attr_multi</h3></div></div></div>
- <p>
- xmlpipe MVA attribute declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- </p><p>
- This setting declares an MVA attribute tag in xmlpipe2 stream.
- The contents of the specified tag will be parsed and a list of integers
- that will constitute the MVA will be extracted, similar to how
- <a class="link" href="#conf-sql-attr-multi" title="12.1.22. sql_attr_multi">sql_attr_multi</a> parses
- SQL column contents when 'field' MVA source type is specified.
- </p><h4><a name="idp33272592"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_multi = taglist
- </pre></div>
- <div class="sect2" title="12.1.40. xmlpipe_attr_multi_64"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-multi-64"></a>12.1.40. xmlpipe_attr_multi_64</h3></div></div></div>
- <p>
- xmlpipe MVA attribute declaration. Declares the BIGINT (signed 64-bit integer) MVA attribute.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- </p><p>
- This setting declares an MVA attribute tag in xmlpipe2 stream.
- The contents of the specified tag will be parsed and a list of integers
- that will constitute the MVA will be extracted, similar to how
- <a class="link" href="#conf-sql-attr-multi" title="12.1.22. sql_attr_multi">sql_attr_multi</a> parses
- SQL column contents when 'field' MVA source type is specified.
- </p><h4><a name="idp33277280"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_multi_64 = taglist
- </pre></div>
- <div class="sect2" title="12.1.41. xmlpipe_attr_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-string"></a>12.1.41. xmlpipe_attr_string</h3></div></div></div>
- <p>
- xmlpipe string declaration.
- Multi-value, optional.
- Applies to <code class="option">xmlpipe2</code> source type only.
- Introduced in version 1.10-beta.
- </p><p>
- This setting declares a string attribute tag in xmlpipe2 stream.
- The contents of the specified tag will be parsed and stored as a string value.
- </p><h4><a name="idp33281008"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_string = subject
- </pre></div>
- <div class="sect2" title="12.1.42. xmlpipe_attr_json"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-attr-json"></a>12.1.42. xmlpipe_attr_json</h3></div></div></div>
- <p>
- JSON attribute declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Introduced in version 2.1.1-beta.
- </p><p>
- This directive is used to declare that the contents of a given
- XML tag are to be treated as a JSON document and stored into a Sphinx
- index for later use. Refer to <a class="xref" href="#conf-sql-attr-json" title="12.1.24. sql_attr_json">Section 12.1.24, “sql_attr_json”</a>
- for more details on the JSON attributes.
- </p><h4><a name="idp33285072"></a>Example:</h4><pre class="programlisting">
- xmlpipe_attr_json = properties
- </pre></div>
- <div class="sect2" title="12.1.43. xmlpipe_fixup_utf8"><div class="titlepage"><div><div><h3 class="title"><a name="conf-xmlpipe-fixup-utf8"></a>12.1.43. xmlpipe_fixup_utf8</h3></div></div></div>
- <p>
- Perform Sphinx-side UTF-8 validation and filtering to prevent XML parser from choking on non-UTF-8 documents.
- Optional, default is 0.
- Applies to <code class="option">xmlpipe2</code> source type only.
- </p><p>
- Under certain occasions it might be hard or even impossible to guarantee
- that the incoming XMLpipe2 document bodies are in perfectly valid and
- conforming UTF-8 encoding. For instance, documents with national
- single-byte encodings could sneak into the stream. libexpat XML parser
- is fragile, meaning that it will stop processing in such cases.
- UTF8 fixup feature lets you avoid that. When fixup is enabled,
- Sphinx will preprocess the incoming stream before passing it to the
- XML parser and replace invalid UTF-8 sequences with spaces.
- </p><h4><a name="idp33289200"></a>Example:</h4><pre class="programlisting">
- xmlpipe_fixup_utf8 = 1
- </pre></div>
- <div class="sect2" title="12.1.44. mssql_winauth"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mssql-winauth"></a>12.1.44. mssql_winauth</h3></div></div></div>
- <p>
- MS SQL Windows authentication flag.
- Boolean, optional, default value is 0 (false).
- Applies to <code class="option">mssql</code> source type only.
- Introduced in version 0.9.9-rc1.
- </p><p>
- Whether to use currently logged in Windows account credentials for
- authentication when connecting to MS SQL Server. Note that when running
- <code class="filename">searchd</code> as a service, account user can differ
- from the account you used to install the service.
- </p><h4><a name="idp33293792"></a>Example:</h4><pre class="programlisting">
- mssql_winauth = 1
- </pre></div>
- <div class="sect2" title="12.1.45. unpack_zlib"><div class="titlepage"><div><div><h3 class="title"><a name="conf-unpack-zlib"></a>12.1.45. unpack_zlib</h3></div></div></div>
- <p>
- Columns to unpack using zlib (aka deflate, aka gunzip).
- Multi-value, optional, default value is empty list of columns.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 0.9.9-rc1.
- </p><p>
- Columns specified using this directive will be unpacked by <code class="filename">indexer</code>
- using standard zlib algorithm (called deflate and also implemented by <code class="filename">gunzip</code>).
- When indexing on a different box than the database, this lets you offload the database, and save on network traffic.
- The feature is only available if zlib and zlib-devel were both available during build time.
- </p><h4><a name="idp33300000"></a>Example:</h4><pre class="programlisting">
- unpack_zlib = col1
- unpack_zlib = col2
- </pre></div>
- <div class="sect2" title="12.1.46. unpack_mysqlcompress"><div class="titlepage"><div><div><h3 class="title"><a name="conf-unpack-mysqlcompress"></a>12.1.46. unpack_mysqlcompress</h3></div></div></div>
- <p>
- Columns to unpack using MySQL UNCOMPRESS() algorithm.
- Multi-value, optional, default value is empty list of columns.
- Applies to SQL source types (<code class="option">mysql</code>, <code class="option">pgsql</code>, <code class="option">mssql</code>) only.
- Introduced in version 0.9.9-rc1.
- </p><p>
- Columns specified using this directive will be unpacked by <code class="filename">indexer</code>
- using modified zlib algorithm used by MySQL COMPRESS() and UNCOMPRESS() functions.
- When indexing on a different box than the database, this lets you offload the database, and save on network traffic.
- The feature is only available if zlib and zlib-devel were both available during build time.
- </p><h4><a name="idp33305664"></a>Example:</h4><pre class="programlisting">
- unpack_mysqlcompress = body_compressed
- unpack_mysqlcompress = description_compressed
- </pre></div>
- <div class="sect2" title="12.1.47. unpack_mysqlcompress_maxsize"><div class="titlepage"><div><div><h3 class="title"><a name="conf-unpack-mysqlcompress-maxsize"></a>12.1.47. unpack_mysqlcompress_maxsize</h3></div></div></div>
- <p>
- Buffer size for UNCOMPRESS()ed data.
- Optional, default value is 16M.
- Introduced in version 0.9.9-rc1.
- </p><p>
- When using <a class="link" href="#conf-unpack-mysqlcompress" title="12.1.46. unpack_mysqlcompress">unpack_mysqlcompress</a>,
- due to implementation intricacies it is not possible to deduce the required buffer size
- from the compressed data. So the buffer must be preallocated in advance, and unpacked
- data can not go over the buffer size. This option lets you control the buffer size,
- both to limit <code class="filename">indexer</code> memory use, and to enable unpacking
- of really long data fields if necessary.
- </p><h4><a name="idp33310864"></a>Example:</h4><pre class="programlisting">
- unpack_mysqlcompress_maxsize = 1M
- </pre></div></div>
- <div class="sect1" title="12.2. Index configuration options"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="confgroup-index"></a>12.2. Index configuration options</h2></div></div></div>
- <div class="sect2" title="12.2.1. type"><div class="titlepage"><div><div><h3 class="title"><a name="conf-index-type"></a>12.2.1. type</h3></div></div></div>
- <p>
- Index type.
- Known values are 'plain', 'distributed', 'rt' and 'template'.
- Optional, default is 'plain' (plain local index).
- </p><p>
- Sphinx supports several different types of indexes.
- Versions 0.9.x supported two index types: plain local indexes
- that are stored and processed on the local machine; and distributed indexes,
- that involve not only local searching but querying remote <code class="filename">searchd</code>
- instances over the network as well (see <a class="xref" href="#distributed" title="5.8. Distributed searching">Section 5.8, “Distributed searching”</a>).
- Version 1.10-beta also adds support
- for so-called real-time indexes (or RT indexes for short) that
- are also stored and processed locally, but additionally allow
- for on-the-fly updates of the full-text index (see <a class="xref" href="#rt-indexes" title="Chapter 4. Real-time indexes">Chapter 4, <i>Real-time indexes</i></a>).
- Note that <span class="emphasis"><em>attributes</em></span> can be updated on-the-fly using
- either plain local indexes or RT ones.
- In 2.2.1-beta template indexes was introduced. They are actually a
- pseudo-indexes because they do not store any data. That means they do not create
- any files on your hard drive. But you can use them for keywords and snippets
- generation, which may be useful in some cases.
- </p><p>
- Index type setting lets you choose the needed type.
- By default, plain local index type will be assumed.
- </p><h4><a name="idp33319024"></a>Example:</h4><pre class="programlisting">
- type = distributed
- </pre></div>
- <div class="sect2" title="12.2.2. source"><div class="titlepage"><div><div><h3 class="title"><a name="conf-source"></a>12.2.2. source</h3></div></div></div>
- <p>
- Adds document source to local index.
- Multi-value, mandatory.
- </p><p>
- Specifies document source to get documents from when the current
- index is indexed. There must be at least one source. There may be multiple
- sources, without any restrictions on the source types: ie. you can pull
- part of the data from MySQL server, part from PostgreSQL, part from
- the filesystem using xmlpipe2 wrapper.
- </p><p>
- However, there are some restrictions on the source data. First,
- document IDs must be globally unique across all sources. If that
- condition is not met, you might get unexpected search results.
- Second, source schemas must be the same in order to be stored
- within the same index.
- </p><p>
- No source ID is stored automatically. Therefore, in order to be able
- to tell what source the matched document came from, you will need to
- store some additional information yourself. Two typical approaches
- include:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>mangling document ID and encoding source ID in it:
- </p><pre class="programlisting">
- source src1
- {
- sql_query = SELECT id*10+1, ... FROM table1
- ...
- }
- source src2
- {
- sql_query = SELECT id*10+2, ... FROM table2
- ...
- }
- </pre><p>
- </p></li>
- <li class="listitem"><p>
- storing source ID simply as an attribute:
- </p><pre class="programlisting">
- source src1
- {
- sql_query = SELECT id, 1 AS source_id FROM table1
- sql_attr_uint = source_id
- ...
- }
- source src2
- {
- sql_query = SELECT id, 2 AS source_id FROM table2
- sql_attr_uint = source_id
- ...
- }
- </pre><p>
- </p></li>
- </ol></div>
- <p>
- </p><h4><a name="idp33327488"></a>Example:</h4><pre class="programlisting">
- source = srcpart1
- source = srcpart2
- source = srcpart3
- </pre></div>
- <div class="sect2" title="12.2.3. path"><div class="titlepage"><div><div><h3 class="title"><a name="conf-path"></a>12.2.3. path</h3></div></div></div>
- <p>
- Index files path and file name (without extension).
- Mandatory.
- </p><p>
- Path specifies both directory and file name, but without extension.
- <code class="filename">indexer</code> will append different extensions
- to this path when generating final names for both permanent and
- temporary index files. Permanent data files have several different
- extensions starting with '.sp'; temporary files' extensions
- start with '.tmp'. It's safe to remove <code class="filename">.tmp*</code>
- files is if indexer fails to remove them automatically.
- </p><p>
- For reference, different index files store the following data:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="filename">.spa</code> stores document attributes (used in <a class="link" href="#conf-docinfo" title="12.2.4. docinfo">extern docinfo</a> storage mode only);</p></li>
- <li class="listitem"><p><code class="filename">.spd</code> stores matching document ID lists for each word ID;</p></li>
- <li class="listitem"><p><code class="filename">.sph</code> stores index header information;</p></li>
- <li class="listitem"><p><code class="filename">.spi</code> stores word lists (word IDs and pointers to <code class="filename">.spd</code> file);</p></li>
- <li class="listitem"><p><code class="filename">.spk</code> stores kill-lists;</p></li>
- <li class="listitem"><p><code class="filename">.spm</code> stores MVA data;</p></li>
- <li class="listitem"><p><code class="filename">.spp</code> stores hit (aka posting, aka word occurrence) lists for each word ID;</p></li>
- <li class="listitem"><p><code class="filename">.sps</code> stores string attribute data.</p></li>
- </ul></div>
- <p>
- </p><h4><a name="idp33343840"></a>Example:</h4><pre class="programlisting">
- path = /var/data/test1
- </pre></div>
- <div class="sect2" title="12.2.4. docinfo"><div class="titlepage"><div><div><h3 class="title"><a name="conf-docinfo"></a>12.2.4. docinfo</h3></div></div></div>
- <p>
- Document attribute values (docinfo) storage mode.
- Optional, default is 'extern'.
- Known values are 'none', 'extern' and 'inline'.
- </p><p>
- Docinfo storage mode defines how exactly docinfo will be
- physically stored on disk and RAM. "none" means that there will be
- no docinfo at all (ie. no attributes). Normally you need not to set
- "none" explicitly because Sphinx will automatically select "none"
- when there are no attributes configured. "inline" means that the
- docinfo will be stored in the <code class="filename">.spd</code> file,
- along with the document ID lists. "extern" means that the docinfo
- will be stored separately (externally) from document ID lists,
- in a special <code class="filename">.spa</code> file.
- </p><p>
- Basically, externally stored docinfo must be kept in RAM when querying.
- for performance reasons. So in some cases "inline" might be the only option.
- However, such cases are infrequent, and docinfo defaults to "extern".
- Refer to <a class="xref" href="#attributes" title="3.3. Attributes">Section 3.3, “Attributes”</a> for in-depth discussion
- and RAM usage estimates.
- </p><h4><a name="idp33350272"></a>Example:</h4><pre class="programlisting">
- docinfo = inline
- </pre></div>
- <div class="sect2" title="12.2.5. mlock"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mlock"></a>12.2.5. mlock</h3></div></div></div>
- <p>
- Memory locking for cached data.
- Optional, default is 0 (do not call mlock()).
- </p><p>
- For search performance, <code class="filename">searchd</code> preloads
- a copy of <code class="filename">.spa</code> and <code class="filename">.spi</code>
- files in RAM, and keeps that copy in RAM at all times. But if there
- are no searches on the index for some time, there are no accesses
- to that cached copy, and OS might decide to swap it out to disk.
- First queries to such "cooled down" index will cause swap-in
- and their latency will suffer.
- </p><p>
- Setting mlock option to 1 makes Sphinx lock physical RAM used
- for that cached data using mlock(2) system call, and that prevents
- swapping (see man 2 mlock for details). mlock(2) is a privileged call,
- so it will require <code class="filename">searchd</code> to be either run
- from root account, or be granted enough privileges otherwise.
- If mlock() fails, a warning is emitted, but index continues
- working.
- </p><h4><a name="idp33357264"></a>Example:</h4><pre class="programlisting">
- mlock = 1
- </pre></div>
- <div class="sect2" title="12.2.6. morphology"><div class="titlepage"><div><div><h3 class="title"><a name="conf-morphology"></a>12.2.6. morphology</h3></div></div></div>
- <p>
- A list of morphology preprocessors (stemmers or lemmatizers) to apply.
- Optional, default is empty (do not apply any preprocessor).
- </p><p>
- Morphology preprocessors can be applied to the words being
- indexed to replace different forms of the same word with the base,
- normalized form. For instance, English stemmer will normalize
- both "dogs" and "dog" to "dog", making search results for
- both searches the same.
- </p><p>
- There are 3 different morphology preprocessors that Sphinx implements:
- lemmatizers, stemmers, and phonetic algorithms.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Lemmatizer reduces a keyword form to a so-called lemma,
- a proper normal form, or in other words, a valid natural language
- root word. For example, "running" could be reduced to "run",
- the infinitive verb form, and "octopi" would be reduced to "octopus",
- the singular noun form. Note that sometimes a word form can have
- multiple corresponding root words. For instance, by looking at
- "dove" it is not possible to tell whether this is a past tense
- of "dive" the verb as in "He dove into a pool.", or "dove" the noun
- as in "White dove flew over the cuckoo's nest." In this case
- lemmatizer can generate all the possible root forms.
- </p></li>
- <li class="listitem"><p>Stemmer reduces a keyword form to a so-called stem
- by removing and/or replacing certain well-known suffixes.
- The resulting stem is however <span class="bold"><strong>not</strong></span>guaranteed to be
- a valid word on itself. For instance, with a Porter English
- stemmers "running" would still reduce to "run", which is fine,
- but "business" would reduce to "busi", which is not a word,
- and "octopi" would not reduce at all. Stemmers are essentially
- (much) simpler but still pretty good replacements of full-blown
- lemmatizers.
- </p></li>
- <li class="listitem"><p>Phonetic algorithms replace the words with specially
- crafted phonetic codes that are equal even when the words original
- are different, but phonetically close.
- </p></li>
- </ul></div>
- <p>
- The morphology processors that come with our own built-in Sphinx
- implementations are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>English, Russian, and German lemmatizers;</p></li>
- <li class="listitem"><p>English, Russian, Arabic, and Czech stemmers;</p></li>
- <li class="listitem"><p>SoundEx and MetaPhone phonetic algorithms.</p></li>
- </ul></div>
- <p>
- You can also link with <span class="bold"><strong>libstemmer</strong></span> library for even more
- stemmers (see details below). With libstemmer, Sphinx also supports
- morphological processing for more than 15 other languages. Binary
- packages should come prebuilt with libstemmer support, too.
- </p><p>
- Lemmatizer support was added in version 2.1.1-beta, starting with
- a Russian lemmatizer. English and German lemmatizers were then added
- in version 2.2.1-beta.
- </p><p>
- Lemmatizers require a dictionary that needs to be
- additionally downloaded from the Sphinx website. That dictionary
- needs to be installed in a directory specified by
- <a class="link" href="#conf-lemmatizer-base" title="12.5.1. lemmatizer_base">lemmatizer_base</a>
- directive. Also, there is a
- <a class="link" href="#conf-lemmatizer-cache" title="12.3.8. lemmatizer_cache">lemmatizer_cache</a>
- directive that lets you speed up lemmatizing (and therefore
- indexing) by spending more RAM for, basically, an uncompressed
- cache of a dictionary.
- </p><p>
- Chinese segmentation using Rosette Linguistics Platform was added in 2.2.1-beta.
- It is a much more precise but slower way (compared to n-grams) to segment Chinese documents.
- <code class="option"><a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a></code> must contain all Chinese characters except
- Chinese punctuation marks because incoming documents are first processed by sphinx tokenizer and then the result
- is processed by RLP. Sphinx performs per-token language detection on the incoming documents. If token language is
- identified as Chinese, it will only be processed the RLP, even if multiple morphology processors are specified.
- Otherwise, it will be processed by all the morphology processors specified in the "morphology" option. Rosette
- Linguistics Platform must be installed and configured and sphinx must be built with a --with-rlp switch. See also
- <code class="option"><a class="link" href="#conf-rlp-root" title="12.5.5. rlp_root">rlp_root</a></code>,
- <code class="option"><a class="link" href="#conf-rlp-environment" title="12.5.6. rlp_environment">rlp_environment</a></code> and
- <code class="option"><a class="link" href="#conf-rlp-context" title="12.2.67. rlp_context">rlp_context</a></code> options.
- A batched version of RLP segmentation is also available (<code class="option">rlp_chinese_batched</code>). It provides the
- same functionality as the basic <code class="option">rlp_chinese</code> segmentation, but enables batching documents before
- processing them by the RLP. Processing several documents at once can result in a substantial indexing speedup if
- the documents are small (for example, less than 1k). See also
- <code class="option"><a class="link" href="#conf-rlp-max-batch-size" title="12.5.7. rlp_max_batch_size">rlp_max_batch_size</a></code> and
- <code class="option"><a class="link" href="#conf-rlp-max-batch-docs" title="12.5.8. rlp_max_batch_docs">rlp_max_batch_docs</a></code> options.
- </p><p>
- Additional stemmers provided by <a class="ulink" href="http://snowball.tartarus.org/" target="_top">Snowball</a>
- project <a class="ulink" href="http://snowball.tartarus.org/dist/libstemmer_c.tgz" target="_top">libstemmer</a> library
- can be enabled at compile time using <code class="option">--with-libstemmer</code> <code class="filename">configure</code> option.
- Built-in English and Russian stemmers should be faster than their
- libstemmer counterparts, but can produce slightly different results,
- because they are based on an older version.
- </p><p>
- Soundex implementation matches that of MySQL. Metaphone implementation
- is based on Double Metaphone algorithm and indexes the primary code.
- </p><p>
- Built-in values that are available for use in <code class="option">morphology</code>
- option are as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>none - do not perform any morphology processing;</p></li>
- <li class="listitem"><p>lemmatize_ru - apply Russian lemmatizer and pick a single root form (added in 2.1.1-beta);</p></li>
- <li class="listitem"><p>lemmatize_en - apply English lemmatizer and pick a single root form (added in 2.2.1-beta);</p></li>
- <li class="listitem"><p>lemmatize_de - apply German lemmatizer and pick a single root form (added in 2.2.1-beta);</p></li>
- <li class="listitem"><p>lemmatize_ru_all - apply Russian lemmatizer and index all possible root forms (added in 2.1.1-beta);</p></li>
- <li class="listitem"><p>lemmatize_en_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta);</p></li>
- <li class="listitem"><p>lemmatize_de_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta);</p></li>
- <li class="listitem"><p>stem_en - apply Porter's English stemmer;</p></li>
- <li class="listitem"><p>stem_ru - apply Porter's Russian stemmer;</p></li>
- <li class="listitem"><p>stem_enru - apply Porter's English and Russian stemmers;</p></li>
- <li class="listitem"><p>stem_cz - apply Czech stemmer;</p></li>
- <li class="listitem"><p>stem_ar - apply Arabic stemmer (added in 2.1.1-beta);</p></li>
- <li class="listitem"><p>soundex - replace keywords with their SOUNDEX code;</p></li>
- <li class="listitem"><p>metaphone - replace keywords with their METAPHONE code.</p></li>
- <li class="listitem"><p>rlp_chinese - apply Chinese text segmentation using Rosette Linguistics Platform</p></li>
- <li class="listitem"><p>rlp_chinese_batched - apply Chinese text segmentation using Rosette Linguistics Platform with document batching</p></li>
- </ul></div>
- <p>
- Additional values provided by libstemmer are in 'libstemmer_XXX' format,
- where XXX is libstemmer algorithm codename (refer to
- <code class="filename">libstemmer_c/libstemmer/modules.txt</code> for a complete list).
- </p><p>
- Several stemmers can be specified (comma-separated). They will be applied
- to incoming words in the order they are listed, and the processing will stop
- once one of the stemmers actually modifies the word.
- Also when <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a> feature is enabled
- the word will be looked up in word forms dictionary first, and if there is
- a matching entry in the dictionary, stemmers will not be applied at all.
- Or in other words, <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a> can be
- used to implement stemming exceptions.
- </p><h4><a name="idp33398320"></a>Example:</h4><pre class="programlisting">
- morphology = stem_en, libstemmer_sv
- </pre></div>
- <div class="sect2" title="12.2.7. dict"><div class="titlepage"><div><div><h3 class="title"><a name="conf-dict"></a>12.2.7. dict</h3></div></div></div>
- <p>
- The keywords dictionary type.
- Known values are 'crc' and 'keywords'.
- 'crc' is DEPRECATED. Use 'keywords' instead.
- Optional, default is 'keywords'.
- Introduced in version 2.0.1-beta.
- </p><p>
- CRC dictionary mode (dict=crc) is the default dictionary type
- in Sphinx, and the only one available until version 2.0.1-beta.
- Keywords dictionary mode (dict=keywords) was added in 2.0.1-beta,
- primarily to (greatly) reduce indexing impact and enable substring
- searches on huge collections. They also eliminate the chance of
- CRC32 collisions. In 2.0.1-beta, that mode was only supported
- for disk indexes. Starting with 2.0.2-beta, RT indexes are
- also supported.
- </p><p>
- CRC dictionaries never store the original keyword text in the index.
- Instead, keywords are replaced with their control sum value (either CRC32 or
- FNV64, depending whether Sphinx was built with <code class="option">--enable-id64</code>)
- both when searching and indexing, and that value is used internally
- in the index.
- </p><p>
- That approach has two drawbacks. First, in CRC32 case there is
- a chance of control sum collision between several pairs of different
- keywords, growing quadratically with the number of unique keywords
- in the index. (FNV64 case is unaffected in practice, as a chance
- of a single FNV64 collision in a dictionary of 1 billion entries
- is approximately 1:16, or 6.25 percent. And most dictionaries
- will be much more compact that a billion keywords, as a typical
- spoken human language has in the region of 1 to 10 million word
- forms.) Second, and more importantly, substring searches are not
- directly possible with control sums. Sphinx alleviated that by
- pre-indexing all the possible substrings as separate keywords
- (see <a class="xref" href="#conf-min-prefix-len" title="12.2.18. min_prefix_len">Section 12.2.18, “min_prefix_len”</a>, <a class="xref" href="#conf-min-infix-len" title="12.2.19. min_infix_len">Section 12.2.19, “min_infix_len”</a>
- directives). That actually has an added benefit of matching
- substrings in the quickest way possible. But at the same time
- pre-indexing all substrings grows the index size a lot (factors
- of 3-10x and even more would not be unusual) and impacts the
- indexing time respectively, rendering substring searches
- on big indexes rather impractical.
- </p><p>
- Keywords dictionary, introduced in 2.0.1-beta, fixes both these
- drawbacks. It stores the keywords in the index and performs
- search-time wildcard expansion. For example, a search for a
- 'test*' prefix could internally expand to 'test|tests|testing'
- query based on the dictionary contents. That expansion is fully
- transparent to the application, except that the separate
- per-keyword statistics for all the actually matched keywords
- would now also be reported.
- </p><p>
- Version 2.1.1-beta introduced extended wildcards support, now special
- symbols like '?' and '%' are supported along with substring (infix) search (e.g. "t?st*", "run%", "*abc*").
- Note, however, these wildcards work only with dict=keywords, and not elsewhere.
- </p><p>
- Indexing with keywords dictionary should be 1.1x to 1.3x slower
- compared to regular, non-substring indexing - but times faster
- compared to substring indexing (either prefix or infix). Index size
- should only be slightly bigger that than of the regular non-substring
- index, with a 1..10% percent total difference.
- Regular keyword searching time must be very close or identical across
- all three discussed index kinds (CRC non-substring, CRC substring,
- keywords). Substring searching time can vary greatly depending
- on how many actual keywords match the given substring (in other
- words, into how many keywords does the search term expand).
- The maximum number of keywords matched is restricted by the
- <a class="link" href="#conf-expansion-limit" title="12.4.35. expansion_limit">expansion_limit</a>
- directive.
- </p><p>
- Essentially, keywords and CRC dictionaries represent the two
- different trade-off substring searching decisions. You can choose
- to either sacrifice indexing time and index size in favor of
- top-speed worst-case searches (CRC dictionary), or only slightly
- impact indexing time but sacrifice worst-case searching time when
- the prefix expands into very many keywords (keywords dictionary).
- </p><h4><a name="idp33411072"></a>Example:</h4><pre class="programlisting">
- dict = keywords
- </pre></div>
- <div class="sect2" title="12.2.8. index_sp"><div class="titlepage"><div><div><h3 class="title"><a name="conf-index-sp"></a>12.2.8. index_sp</h3></div></div></div>
- <p>
- Whether to detect and index sentence and paragraph boundaries.
- Optional, default is 0 (do not detect and index).
- Introduced in version 2.0.1-beta.
- </p><p>
- This directive enables sentence and paragraph boundary indexing.
- It's required for the SENTENCE and PARAGRAPH operators to work.
- Sentence boundary detection is based on plain text analysis, so you
- only need to set <code class="code">index_sp = 1</code> to enable it. Paragraph
- detection is however based on HTML markup, and happens in the
- <a class="link" href="#conf-html-strip" title="12.2.27. html_strip">HTML stripper</a>.
- So to index paragraph locations you also need to enable the stripper
- by specifying <code class="code">html_strip = 1</code>. Both types of boundaries
- are detected based on a few built-in rules enumerated just below.
- </p><p>
- Sentence boundary detection rules are as follows.
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Question and exclamation signs (? and !) are always a sentence boundary.</p></li>
- <li class="listitem"><p>Trailing dot (.) is a sentence boundary, except:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>When followed by a letter. That's considered a part of an abbreviation (as in "S.T.A.L.K.E.R" or "Goldman Sachs S.p.A.").</p></li>
- <li class="listitem"><p>When followed by a comma. That's considered an abbreviation followed by a comma (as in "Telecom Italia S.p.A., founded in 1994").</p></li>
- <li class="listitem"><p>When followed by a space and a small letter. That's considered an abbreviation within a sentence (as in "News Corp. announced in February").</p></li>
- <li class="listitem"><p>When preceded by a space and a capital letter, and followed by a space. That's considered a middle initial (as in "John D. Doe").</p></li>
- </ul></div>
- <p>
- </p></li>
- </ul></div>
- <p>
- </p><p>
- Paragraph boundaries are inserted at every block-level HTML tag.
- Namely, those are (as taken from HTML 4 standard) ADDRESS, BLOCKQUOTE,
- CAPTION, CENTER, DD, DIV, DL, DT, H1, H2, H3, H4, H5, LI, MENU, OL, P,
- PRE, TABLE, TBODY, TD, TFOOT, TH, THEAD, TR, and UL.
- </p><p>
- Both sentences and paragraphs increment the keyword position counter by 1.
- </p><h4><a name="idp33422656"></a>Example:</h4><pre class="programlisting">
- index_sp = 1
- </pre></div>
- <div class="sect2" title="12.2.9. index_zones"><div class="titlepage"><div><div><h3 class="title"><a name="conf-index-zones"></a>12.2.9. index_zones</h3></div></div></div>
- <p>
- A list of in-field HTML/XML zones to index.
- Optional, default is empty (do not index zones).
- Introduced in version 2.0.1-beta.
- </p><p>
- Zones can be formally defined as follows. Everything between
- an opening and a matching closing tag is called a span, and
- the aggregate of all spans corresponding sharing the same
- tag name is called a zone. For instance, everything between
- the occurrences of <H1> and </H1> in the document
- field belongs to H1 zone.
- </p><p>
- Zone indexing, enabled by <code class="code">index_zones</code> directive,
- is an optional extension of the HTML stripper. So it will also
- require that the <a class="link" href="#conf-html-strip" title="12.2.27. html_strip">stripper</a>
- is enabled (with <code class="code">html_strip = 1</code>). The value of the
- <code class="code">index_zones</code> should be a comma-separated list of
- those tag names and wildcards (ending with a star) that should
- be indexed as zones.
- </p><p>
- Zones can nest and overlap arbitrarily. The only requirement
- is that every opening tag has a matching tag. You can also have
- an arbitrary number of both zones (as in unique zone names,
- such as H1) and spans (all the occurrences of those H1 tags)
- in a document.
- Once indexed, zones can then be used for matching with
- the ZONE operator, see <a class="xref" href="#extended-syntax" title="5.3. Extended query syntax">Section 5.3, “Extended query syntax”</a>.
- </p><h4><a name="idp33430464"></a>Example:</h4><pre class="programlisting">
- index_zones = h*, th, title
- </pre><p>
- Earlier versions than 2.1.1-beta only provided this feature for plain
- index files; currently, RT index files also provide it.
- </p></div>
- <div class="sect2" title="12.2.10. min_stemming_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-min-stemming-len"></a>12.2.10. min_stemming_len</h3></div></div></div>
- <p>
- Minimum word length at which to enable stemming.
- Optional, default is 1 (stem everything).
- Introduced in version 0.9.9-rc1.
- </p><p>
- Stemmers are not perfect, and might sometimes produce undesired results.
- For instance, running "gps" keyword through Porter stemmer for English
- results in "gp", which is not really the intent. <code class="option">min_stemming_len</code>
- feature lets you suppress stemming based on the source word length,
- ie. to avoid stemming too short words. Keywords that are shorter than
- the given threshold will not be stemmed. Note that keywords that are
- exactly as long as specified <span class="bold"><strong>will</strong></span> be stemmed. So in order to avoid
- stemming 3-character keywords, you should specify 4 for the value.
- For more finely grained control, refer to <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a> feature.
- </p><h4><a name="idp33436640"></a>Example:</h4><pre class="programlisting">
- min_stemming_len = 4
- </pre></div>
- <div class="sect2" title="12.2.11. stopwords"><div class="titlepage"><div><div><h3 class="title"><a name="conf-stopwords"></a>12.2.11. stopwords</h3></div></div></div>
- <p>
- Stopword files list (space separated).
- Optional, default is empty.
- </p><p>
- Stopwords are the words that will not be indexed. Typically you'd
- put most frequent words in the stopwords list because they do not add
- much value to search results but consume a lot of resources to process.
- </p><p>
- You can specify several file names, separated by spaces. All the files
- will be loaded. Stopwords file format is simple plain text. The encoding
- must be UTF-8.
- File data will be tokenized with respect to <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a>
- settings, so you can use the same separators as in the indexed data.
- </p><p>
- The <a class="link" href="#conf-morphology" title="12.2.6. morphology">stemmers</a> will normally be
- applied when parsing stopwords file. That might however lead to undesired
- results. Starting with 2.1.1-beta, you can turn that off with
- <a class="link" href="#conf-stopwords-unstemmed" title="12.2.65. stopwords_unstemmed">stopwords_unstemmed</a>.
- </p><p>
- Starting with version 2.1.1-beta small enough files are stored in the index
- header, see <a class="xref" href="#conf-embedded-limit" title="12.2.13. embedded_limit">Section 12.2.13, “embedded_limit”</a> for details.
- </p><p>
- While stopwords are not indexed, they still do affect the keyword positions.
- For instance, assume that "the" is a stopword, that document 1 contains the line
- "in office", and that document 2 contains "in the office". Searching for "in office"
- as for exact phrase will only return the first document, as expected, even though
- "the" in the second one is stopped. That behavior can be tweaked through the
- <a class="link" href="#conf-stopword-step" title="12.2.44. stopword_step">stopword_step</a> directive.
- </p><p>
- Stopwords files can either be created manually, or semi-automatically.
- <code class="filename">indexer</code> provides a mode that creates a frequency dictionary
- of the index, sorted by the keyword frequency, see <code class="option">--buildstops</code>
- and <code class="option">--buildfreqs</code> switch in <a class="xref" href="#ref-indexer" title="7.1. indexer command reference">Section 7.1, “<code class="filename">indexer</code> command reference”</a>.
- Top keywords from that dictionary can usually be used as stopwords.
- </p><h4><a name="idp33449904"></a>Example:</h4><pre class="programlisting">
- stopwords = /usr/local/sphinx/data/stopwords.txt
- stopwords = stopwords-ru.txt stopwords-en.txt
- </pre></div>
- <div class="sect2" title="12.2.12. wordforms"><div class="titlepage"><div><div><h3 class="title"><a name="conf-wordforms"></a>12.2.12. wordforms</h3></div></div></div>
- <p>
- Word forms dictionary.
- Optional, default is empty.
- </p><p>
- Word forms are applied after tokenizing the incoming text
- by <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a> rules.
- They essentially let you replace one word with another. Normally,
- that would be used to bring different word forms to a single
- normal form (eg. to normalize all the variants such as "walks",
- "walked", "walking" to the normal form "walk"). It can also be used
- to implement stemming exceptions, because stemming is not applied
- to words found in the forms list.
- </p><p>
- Starting with version 2.1.1-beta small enough files are stored in the index
- header, see <a class="xref" href="#conf-embedded-limit" title="12.2.13. embedded_limit">Section 12.2.13, “embedded_limit”</a> for details.
- </p><p>
- Dictionaries are used to normalize incoming words both during indexing
- and searching. Therefore, to pick up changes in wordforms file
- it's required to rotate index.
- </p><p>
- Word forms support in Sphinx is designed to support big dictionaries well.
- They moderately affect indexing speed: for instance, a dictionary with 1 million
- entries slows down indexing about 1.5 times. Searching speed is not affected at all.
- Additional RAM impact is roughly equal to the dictionary file size,
- and dictionaries are shared across indexes: ie. if the very same 50 MB wordforms
- file is specified for 10 different indexes, additional <code class="filename">searchd</code>
- RAM usage will be about 50 MB.
- </p><p>
- Dictionary file should be in a simple plain text format. Each line
- should contain source and destination word forms, in UTF-8 encoding,
- separated by "greater" sign. Rules from the
- <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a> will be
- applied when the file is loaded. So basically it's as case sensitive
- as your other full-text indexed data, ie. typically case insensitive.
- Here's the file contents sample:
- </p><pre class="programlisting">
- walks > walk
- walked > walk
- walking > walk
- </pre><p>
- </p><p>
- There is a bundled <code class="filename">spelldump</code> utility that
- helps you create a dictionary file in the format Sphinx can read
- from source <code class="filename">.dict</code> and <code class="filename">.aff</code>
- dictionary files in <code class="filename">ispell</code> or <code class="filename">MySpell</code>
- format (as bundled with OpenOffice).
- </p><p>
- Starting with version 0.9.9-rc1, you can map several source words
- to a single destination word. Because the work happens on tokens,
- not the source text, differences in whitespace and markup are ignored.
- </p><p>
- Starting with version 2.1.1-beta, you can use "=>" instead of ">". Comments
- (starting with "#" are also allowed. Finally, if a line starts with a tilde ("~")
- the wordform will be applied after morphology, instead of before.
- </p><pre class="programlisting">
- core 2 duo > c2d
- e6600 > c2d
- core 2duo => c2d # Some people write '2duo' together...
- </pre><p>
- </p><p>
- Stating with version 2.2.4, you can specify multiple destination tokens:
- </p><pre class="programlisting">
- s02e02 > season 2 episode 2
- s3 e3 > season 3 episode 3
- </pre><p>
- </p><h4><a name="idp33467520"></a>Example:</h4><pre class="programlisting">
- wordforms = /usr/local/sphinx/data/wordforms.txt
- wordforms = /usr/local/sphinx/data/alternateforms.txt
- wordforms = /usr/local/sphinx/private/dict*.txt
- </pre><p>
- Starting with version 2.1.1-beta you can specify several files and not
- only just one. Masks can be used as a pattern, and all matching files will
- be processed in simple ascending order. (If multi-byte codepages are used,
- and file names can include foreign characters, the resulting order may not
- be exactly alphabetic.) If a same wordform definition is found in several
- files, the latter one is used, and it overrides previous definitions.
- </p></div>
- <div class="sect2" title="12.2.13. embedded_limit"><div class="titlepage"><div><div><h3 class="title"><a name="conf-embedded-limit"></a>12.2.13. embedded_limit</h3></div></div></div>
- <p>
- Embedded exceptions, wordforms, or stopwords file size limit.
- Optional, default is 16K.
- Added in version 2.1.1-beta.
- </p><p>
- Before 2.1.1-beta, the contents of exceptions, wordforms, or stopwords
- files were always kept in the files. Only the file names were stored into
- the index. Starting with 2.1.1-beta, indexer can either save the file name,
- or embed the file contents directly into the index. Files sized under
- <code class="code">embedded_limit</code> get stored into the index. For bigger files,
- only the file names are stored. This also simplifies moving index files
- to a different machine; you may get by just copying a single file.
- </p><p>
- With smaller files, such embedding reduces the number of the external
- files on which the index depends, and helps maintenance. But at the same
- time it makes no sense to embed a 100 MB wordforms dictionary into a tiny
- delta index. So there needs to be a size threshold, and <code class="code">embedded_limit</code>
- is that threshold.
- </p><h4><a name="idp33473600"></a>Example:</h4><pre class="programlisting">
- embedded_limit = 32K
- </pre></div>
- <div class="sect2" title="12.2.14. exceptions"><div class="titlepage"><div><div><h3 class="title"><a name="conf-exceptions"></a>12.2.14. exceptions</h3></div></div></div>
- <p>
- Tokenizing exceptions file.
- Optional, default is empty.
- </p><p>
- Exceptions allow to map one or more tokens (including tokens with
- characters that would normally be excluded) to a single keyword.
- They are similar to <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a>
- in that they also perform mapping, but have a number of important
- differences.
- </p><p>
- Starting with version 2.1.1-beta small enough files are stored in the index
- header, see <a class="xref" href="#conf-embedded-limit" title="12.2.13. embedded_limit">Section 12.2.13, “embedded_limit”</a> for details.
- </p><p>
- Short summary of the differences is as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>exceptions are case sensitive, wordforms are not;</p></li>
- <li class="listitem"><p>exceptions can use special characters that are <span class="bold"><strong>not</strong></span> in charset_table, wordforms fully obey charset_table;</p></li>
- <li class="listitem"><p>exceptions can underperform on huge dictionaries, wordforms handle millions of entries well.</p></li>
- </ul></div>
- <p>
- </p><p>
- The expected file format is also plain text, with one line per exception,
- and the line format is as follows:
- </p><pre class="programlisting">
- map-from-tokens => map-to-token
- </pre><p>
- Example file:
- </p><pre class="programlisting">
- at & t => at&t
- AT&T => AT&T
- Standarten Fuehrer => standartenfuhrer
- Standarten Fuhrer => standartenfuhrer
- MS Windows => ms windows
- Microsoft Windows => ms windows
- C++ => cplusplus
- c++ => cplusplus
- C plus plus => cplusplus
- </pre><p>
- All tokens here are case sensitive: they will <span class="bold"><strong>not</strong></span> be processed by
- <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a> rules. Thus, with
- the example exceptions file above, "at&t" text will be tokenized as two
- keywords "at" and "t", because of lowercase letters. On the other hand,
- "AT&T" will match exactly and produce single "AT&T" keyword.
- </p><p>
- Note that this map-to keyword is a) always interpreted
- as a <span class="emphasis"><em>single</em></span> word, and b) is both case and space
- sensitive! In our sample, "ms windows" query will <span class="emphasis"><em>not</em></span>
- match the document with "MS Windows" text. The query will be interpreted
- as a query for two keywords, "ms" and "windows". And what "MS Windows"
- gets mapped to is a <span class="emphasis"><em>single</em></span> keyword "ms windows",
- with a space in the middle. On the other hand, "standartenfuhrer"
- will retrieve documents with "Standarten Fuhrer" or "Standarten Fuehrer"
- contents (capitalized exactly like this), or any capitalization variant
- of the keyword itself, eg. "staNdarTenfUhreR". (It won't catch
- "standarten fuhrer", however: this text does not match any of the
- listed exceptions because of case sensitivity, and gets indexed
- as two separate keywords.)
- </p><p>
- Whitespace in the map-from tokens list matters, but its amount does not.
- Any amount of the whitespace in the map-form list will match any other amount
- of whitespace in the indexed document or query. For instance, "AT & T"
- map-from token will match "AT & T" text,
- whatever the amount of space in both map-from part and the indexed text.
- Such text will therefore be indexed as a special "AT&T" keyword,
- thanks to the very first entry from the sample.
- </p><p>
- Exceptions also allow to capture special characters (that are exceptions
- from general <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a> rules;
- hence the name). Assume that you generally do not want to treat '+'
- as a valid character, but still want to be able search for some exceptions
- from this rule such as 'C++'. The sample above will do just that, totally
- independent of what characters are in the table and what are not.
- </p><p>
- Exceptions are applied to raw incoming document and query data
- during indexing and searching respectively. Therefore, to pick up
- changes in the file it's required to reindex and restart
- <code class="filename">searchd</code>.
- </p><h4><a name="idp33493312"></a>Example:</h4><pre class="programlisting">
- exceptions = /usr/local/sphinx/data/exceptions.txt
- </pre></div>
- <div class="sect2" title="12.2.15. min_word_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-min-word-len"></a>12.2.15. min_word_len</h3></div></div></div>
- <p>
- Minimum indexed word length.
- Optional, default is 1 (index everything).
- </p><p>
- Only those words that are not shorter than this minimum will be indexed.
- For instance, if min_word_len is 4, then 'the' won't be indexed, but 'they' will be.
- </p><h4><a name="idp33496640"></a>Example:</h4><pre class="programlisting">
- min_word_len = 4
- </pre></div>
- <div class="sect2" title="12.2.16. charset_table"><div class="titlepage"><div><div><h3 class="title"><a name="conf-charset-table"></a>12.2.16. charset_table</h3></div></div></div>
- <p>
- Accepted characters table, with case folding rules.
- Optional, default value are latin and cyrillic characters.
- </p><p>
- charset_table is the main workhorse of Sphinx tokenizing process,
- ie. the process of extracting keywords from document text or query text.
- It controls what characters are accepted as valid and what are not,
- and how the accepted characters should be transformed (eg. should
- the case be removed or not).
- </p><p>
- You can think of charset_table as of a big table that has a mapping
- for each and every of 100K+ characters in Unicode. By default,
- every character maps to 0, which means that it does not occur
- within keywords and should be treated as a separator. Once
- mentioned in the table, character is mapped to some other
- character (most frequently, either to itself or to a lowercase
- letter), and is treated as a valid keyword part.
- </p><p>
- The expected value format is a commas-separated list of mappings.
- Two simplest mappings simply declare a character as valid, and map
- a single character to another single character, respectively.
- But specifying the whole table in such form would result
- in bloated and barely manageable specifications. So there are
- several syntax shortcuts that let you map ranges of characters
- at once. The complete list is as follows:
- </p><div class="variablelist"><dl><dt><span class="term">A->a</span></dt>
- <dd><p>Single char mapping, declares source char 'A' as allowed
- to occur within keywords and maps it to destination char 'a'
- (but does <span class="emphasis"><em>not</em></span> declare 'a' as allowed).
- </p></dd><dt><span class="term">A..Z->a..z</span></dt>
- <dd><p>Range mapping, declares all chars in source range
- as allowed and maps them to the destination range. Does <span class="emphasis"><em>not</em></span>
- declare destination range as allowed. Also checks ranges' lengths
- (the lengths must be equal).
- </p></dd><dt><span class="term">a</span></dt>
- <dd><p>Stray char mapping, declares a character as allowed
- and maps it to itself. Equivalent to a->a single char mapping.
- </p></dd><dt><span class="term">a..z</span></dt>
- <dd><p>Stray range mapping, declares all characters in range
- as allowed and maps them to themselves. Equivalent to
- a..z->a..z range mapping.
- </p></dd><dt><span class="term">A..Z/2</span></dt>
- <dd><p>Checkerboard range map. Maps every pair of chars
- to the second char. More formally, declares odd characters
- in range as allowed and maps them to the even ones; also
- declares even characters as allowed and maps them to themselves.
- For instance, A..Z/2 is equivalent to A->B, B->B, C->D, D->D,
- ..., Y->Z, Z->Z. This mapping shortcut is helpful for
- a number of Unicode blocks where uppercase and lowercase
- letters go in such interleaved order instead of contiguous
- chunks.
- </p></dd></dl></div>
- <p>
- </p><p>
- Control characters with codes from 0 to 31 are always treated as separators.
- Characters with codes 32 to 127, ie. 7-bit ASCII characters, can be used
- in the mappings as is. To avoid configuration file encoding issues,
- 8-bit ASCII characters and Unicode characters must be specified in U+xxx form,
- where 'xxx' is hexadecimal codepoint number. This form can also be used
- for 7-bit ASCII characters to encode special ones: eg. use U+20 to
- encode space, U+2E to encode dot, U+2C to encode comma.
- </p><p>
- Starting with 2.2.3-beta, aliases "english" and "russian" are allowed at
- control character mapping.
- </p><h4><a name="idp33512336"></a>Example:</h4><pre class="programlisting">
- # default are English and Russian letters
- charset_table = 0..9, A..Z->a..z, _, a..z, \
- U+410..U+42F->U+430..U+44F, U+430..U+44F, U+401->U+451, U+451
-
- # english charset defined with alias
- charset_table = 0..9, english, _
- </pre></div>
- <div class="sect2" title="12.2.17. ignore_chars"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ignore-chars"></a>12.2.17. ignore_chars</h3></div></div></div>
- <p>
- Ignored characters list.
- Optional, default is empty.
- </p><p>
- Useful in the cases when some characters, such as soft hyphenation mark (U+00AD),
- should be not just treated as separators but rather fully ignored.
- For example, if '-' is simply not in the charset_table,
- "abc-def" text will be indexed as "abc" and "def" keywords.
- On the contrary, if '-' is added to ignore_chars list, the same
- text will be indexed as a single "abcdef" keyword.
- </p><p>
- The syntax is the same as for <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a>,
- but it's only allowed to declare characters, and not allowed to map them. Also,
- the ignored characters must not be present in charset_table.
- </p><h4><a name="idp33517728"></a>Example:</h4><pre class="programlisting">
- ignore_chars = U+AD
- </pre></div>
- <div class="sect2" title="12.2.18. min_prefix_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-min-prefix-len"></a>12.2.18. min_prefix_len</h3></div></div></div>
- <p>
- Minimum word prefix length to index.
- Optional, default is 0 (do not index prefixes).
- </p><p>
- Prefix indexing allows to implement wildcard searching by 'wordstart*' wildcards.
- When mininum prefix length is set to a positive number, indexer will index
- all the possible keyword prefixes (ie. word beginnings) in addition to the keywords
- themselves. Too short prefixes (below the minimum allowed length) will not
- be indexed.
- </p><p>
- For instance, indexing a keyword "example" with min_prefix_len=3
- will result in indexing "exa", "exam", "examp", "exampl" prefixes along
- with the word itself. Searches against such index for "exam" will match
- documents that contain "example" word, even if they do not contain "exam"
- on itself. However, indexing prefixes will make the index grow significantly
- (because of many more indexed keywords), and will degrade both indexing
- and searching times.
- </p><p>
- There's no automatic way to rank perfect word matches higher
- in a prefix index, but there's a number of tricks to achieve that.
- First, you can setup two indexes, one with prefix indexing and one
- without it, search through both, and use <a class="link" href="#api-func-setindexweights" title="9.3.6. SetIndexWeights">SetIndexWeights()</a>
- call to combine weights. Second, you can rewriteyour extended-mode queries:
- </p><pre class="programlisting">
- $cl->Query ( "( keyword | keyword* ) other keywords" );
- </pre><p>
- </p><h4><a name="idp33524352"></a>Example:</h4><pre class="programlisting">
- min_prefix_len = 3
- </pre></div>
- <div class="sect2" title="12.2.19. min_infix_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-min-infix-len"></a>12.2.19. min_infix_len</h3></div></div></div>
- <p>
- Minimum infix prefix length to index.
- Optional, default is 0 (do not index infixes).
- </p><p>
- Infix indexing allows to implement wildcard searching by 'start*', '*end', and '*middle*' wildcards.
- When minimum infix length is set to a positive number, indexer will index all the possible keyword infixes
- (ie. substrings) in addition to the keywords themselves. Too short infixes
- (below the minimum allowed length) will not be indexed. For instance,
- indexing a keyword "test" with min_infix_len=2 will result in indexing
- "te", "es", "st", "tes", "est" infixes along with the word itself.
- Searches against such index for "es" will match documents that contain
- "test" word, even if they do not contain "es" on itself. However,
- indexing infixes will make the index grow significantly (because of
- many more indexed keywords), and will degrade both indexing and
- searching times.</p><p>
- There's no automatic way to rank perfect word matches higher
- in an infix index, but the same tricks as with <a class="link" href="#conf-min-prefix-len" title="12.2.18. min_prefix_len">prefix indexes</a>
- can be applied.
- </p><h4><a name="idp33529520"></a>Example:</h4><pre class="programlisting">
- min_infix_len = 3
- </pre></div>
- <div class="sect2" title="12.2.20. max_substring_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-substring-len"></a>12.2.20. max_substring_len</h3></div></div></div>
- <p>
- Maximum substring (either prefix or infix) length to index.
- Optional, default is 0 (do not limit indexed substrings).
- Applies to dict=crc only.
- </p><p>
- By default, substring (either prefix or infix) indexing in the
- <a class="link" href="#conf-dict" title="12.2.7. dict">dict=crc mode</a> will index <span class="bold"><strong>all</strong></span>
- the possible substrings as separate keywords. That might result
- in an overly large index. So the <code class="code">max_substring_len</code>
- directive lets you limit the impact of substring indexing
- by skipping too-long substrings (which, chances are, will never
- get searched for anyway).
- </p><p>
- For example, a test index of 10,000 blog posts takes this
- much disk space depending on the settings:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">6.4 MB baseline (no substrings)</li>
- <li class="listitem">24.3 MB (3.8x) with min_prefix_len = 3</li>
- <li class="listitem">22.2 MB (3.5x) with min_prefix_len = 3, max_substring_len = 8</li>
- <li class="listitem">19.3 MB (3.0x) with min_prefix_len = 3, max_substring_len = 6</li>
- <li class="listitem">94.3 MB (14.7x) with min_infix_len = 3</li>
- <li class="listitem">84.6 MB (13.2x) with min_infix_len = 3, max_substring_len = 8</li>
- <li class="listitem">70.7 MB (11.0x) with min_infix_len = 3, max_substring_len = 6</li>
- </ul></div>
- <p>
- So in this test limiting the max substring length saved us
- 10-15% on the index size.
- </p><p>
- There is no performance impact associated with substring length
- when using dict=keywords mode, so this directive is not applicable
- and intentionally forbidden in that case. If required, you can still
- limit the length of a substring that you search for in the application
- code.
- </p><h4><a name="idp33539760"></a>Example:</h4><pre class="programlisting">
- max_substring_len = 12
- </pre></div>
- <div class="sect2" title="12.2.21. prefix_fields"><div class="titlepage"><div><div><h3 class="title"><a name="conf-prefix-fields"></a>12.2.21. prefix_fields</h3></div></div></div>
- <p>
- The list of full-text fields to limit prefix indexing to.
- Optional, default is empty (index all fields in prefix mode).
- </p><p>
- Because prefix indexing impacts both indexing and searching performance,
- it might be desired to limit it to specific full-text fields only:
- for instance, to provide prefix searching through URLs, but not through
- page contents. prefix_fields specifies what fields will be prefix-indexed;
- all other fields will be indexed in normal mode. The value format is a
- comma-separated list of field names.
- </p><h4><a name="idp33543360"></a>Example:</h4><pre class="programlisting">
- prefix_fields = url, domain
- </pre></div>
- <div class="sect2" title="12.2.22. infix_fields"><div class="titlepage"><div><div><h3 class="title"><a name="conf-infix-fields"></a>12.2.22. infix_fields</h3></div></div></div>
- <p>
- The list of full-text fields to limit infix indexing to.
- Optional, default is empty (index all fields in infix mode).
- </p><p>
- Similar to <a class="link" href="#conf-prefix-fields" title="12.2.21. prefix_fields">prefix_fields</a>,
- but lets you limit infix-indexing to given fields.
- </p><h4><a name="idp33547552"></a>Example:</h4><pre class="programlisting">
- infix_fields = url, domain
- </pre></div>
- <div class="sect2" title="12.2.23. ngram_len"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ngram-len"></a>12.2.23. ngram_len</h3></div></div></div>
- <p>
- N-gram lengths for N-gram indexing.
- Optional, default is 0 (disable n-gram indexing).
- Known values are 0 and 1 (other lengths to be implemented).
- </p><p>
- N-grams provide basic CJK (Chinese, Japanese, Korean) support for
- unsegmented texts. The issue with CJK searching is that there could be no
- clear separators between the words. Ideally, the texts would be filtered
- through a special program called segmenter that would insert separators
- in proper locations. However, segmenters are slow and error prone,
- and it's common to index contiguous groups of N characters, or n-grams,
- instead.
- </p><p>
- When this feature is enabled, streams of CJK characters are indexed
- as N-grams. For example, if incoming text is "ABCDEF" (where A to F represent
- some CJK characters) and length is 1, in will be indexed as if
- it was "A B C D E F". (With length equal to 2, it would produce "AB BC CD DE EF";
- but only 1 is supported at the moment.) Only those characters that are
- listed in <a class="link" href="#conf-ngram-chars" title="12.2.24. ngram_chars">ngram_chars</a> table
- will be split this way; other ones will not be affected.
- </p><p>
- Note that if search query is segmented, ie. there are separators between
- individual words, then wrapping the words in quotes and using extended mode
- will result in proper matches being found even if the text was <span class="bold"><strong>not</strong></span>
- segmented. For instance, assume that the original query is BC DEF.
- After wrapping in quotes on the application side, it should look
- like "BC" "DEF" (<span class="emphasis"><em>with</em></span> quotes). This query
- will be passed to Sphinx and internally split into 1-grams too,
- resulting in "B C" "D E F" query, still with
- quotes that are the phrase matching operator. And it will match
- the text even though there were no separators in the text.
- </p><p>
- Even if the search query is not segmented, Sphinx should still produce
- good results, thanks to phrase based ranking: it will pull closer phrase
- matches (which in case of N-gram CJK words can mean closer multi-character
- word matches) to the top.
- </p><h4><a name="idp33556432"></a>Example:</h4><pre class="programlisting">
- ngram_len = 1
- </pre></div>
- <div class="sect2" title="12.2.24. ngram_chars"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ngram-chars"></a>12.2.24. ngram_chars</h3></div></div></div>
- <p>
- N-gram characters list.
- Optional, default is empty.
- </p><p>
- To be used in conjunction with in <a class="link" href="#conf-ngram-len" title="12.2.23. ngram_len">ngram_len</a>,
- this list defines characters, sequences of which are subject to N-gram extraction.
- Words comprised of other characters will not be affected by N-gram indexing
- feature. The value format is identical to <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a>.
- </p><h4><a name="idp33561344"></a>Example:</h4><pre class="programlisting">
- ngram_chars = U+3000..U+2FA1F
- </pre></div>
- <div class="sect2" title="12.2.25. phrase_boundary"><div class="titlepage"><div><div><h3 class="title"><a name="conf-phrase-boundary"></a>12.2.25. phrase_boundary</h3></div></div></div>
- <p>
- Phrase boundary characters list.
- Optional, default is empty.
- </p><p>
- This list controls what characters will be treated as phrase boundaries,
- in order to adjust word positions and enable phrase-level search
- emulation through proximity search. The syntax is similar
- to <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a>.
- Mappings are not allowed and the boundary characters must not
- overlap with anything else.
- </p><p>
- On phrase boundary, additional word position increment (specified by
- <a class="link" href="#conf-phrase-boundary-step" title="12.2.26. phrase_boundary_step">phrase_boundary_step</a>)
- will be added to current word position. This enables phrase-level
- searching through proximity queries: words in different phrases
- will be guaranteed to be more than phrase_boundary_step distance
- away from each other; so proximity search within that distance
- will be equivalent to phrase-level search.
- </p><p>
- Phrase boundary condition will be raised if and only if such character
- is followed by a separator; this is to avoid abbreviations such as
- S.T.A.L.K.E.R or URLs being treated as several phrases.
- </p><h4><a name="idp33567984"></a>Example:</h4><pre class="programlisting">
- phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
- </pre></div>
- <div class="sect2" title="12.2.26. phrase_boundary_step"><div class="titlepage"><div><div><h3 class="title"><a name="conf-phrase-boundary-step"></a>12.2.26. phrase_boundary_step</h3></div></div></div>
- <p>
- Phrase boundary word position increment.
- Optional, default is 0.
- </p><p>
- On phrase boundary, current word position will be additionally incremented
- by this number. See <a class="link" href="#conf-phrase-boundary" title="12.2.25. phrase_boundary">phrase_boundary</a> for details.
- </p><h4><a name="idp33571952"></a>Example:</h4><pre class="programlisting">
- phrase_boundary_step = 100
- </pre></div>
- <div class="sect2" title="12.2.27. html_strip"><div class="titlepage"><div><div><h3 class="title"><a name="conf-html-strip"></a>12.2.27. html_strip</h3></div></div></div>
- <p>
- Whether to strip HTML markup from incoming full-text data.
- Optional, default is 0.
- Known values are 0 (disable stripping) and 1 (enable stripping).
- </p><p>
- Both HTML tags and entities and considered markup and get processed.
- </p><p>HTML tags are removed, their contents (i.e., everything between
- <P> and </P>) are left intact by default. You can choose
- to keep and index attributes of the tags (e.g., HREF attribute in
- an A tag, or ALT in an IMG one). Several well-known inline tags are
- completely removed, all other tags are treated as block level and
- replaced with whitespace. For example, 'te<B>st</B>'
- text will be indexed as a single keyword 'test', however,
- 'te<P>st</P>' will be indexed as two keywords
- 'te' and 'st'. Known inline tags are as follows: A, B, I, S, U, BASEFONT,
- BIG, EM, FONT, IMG, LABEL, SMALL, SPAN, STRIKE, STRONG, SUB, SUP, TT.
- </p><p>
- HTML entities get decoded and replaced with corresponding UTF-8
- characters. Stripper supports both numeric forms (such as &#239;)
- and text forms (such as &oacute; or &nbsp;). All entities
- as specified by HTML4 standard are supported.
- </p><p>
- Stripping should work with
- properly formed HTML and XHTML, but, just as most browsers, may produce
- unexpected results on malformed input (such as HTML with stray <'s
- or unclosed >'s).
- </p><p>
- Only the tags themselves, and also HTML comments, are stripped.
- To strip the contents of the tags too (eg. to strip embedded scripts),
- see <a class="link" href="#conf-html-remove-elements" title="12.2.29. html_remove_elements">html_remove_elements</a> option.
- There are no restrictions on tag names; ie. everything
- that looks like a valid tag start, or end, or a comment
- will be stripped.
- </p><h4><a name="idp33579504"></a>Example:</h4><pre class="programlisting">
- html_strip = 1
- </pre></div>
- <div class="sect2" title="12.2.28. html_index_attrs"><div class="titlepage"><div><div><h3 class="title"><a name="conf-html-index-attrs"></a>12.2.28. html_index_attrs</h3></div></div></div>
- <p>
- A list of markup attributes to index when stripping HTML.
- Optional, default is empty (do not index markup attributes).
- </p><p>
- Specifies HTML markup attributes whose contents should be retained and indexed
- even though other HTML markup is stripped. The format is per-tag enumeration of
- indexable attributes, as shown in the example below.
- </p><h4><a name="idp33582848"></a>Example:</h4><pre class="programlisting">
- html_index_attrs = img=alt,title; a=title;
- </pre></div>
- <div class="sect2" title="12.2.29. html_remove_elements"><div class="titlepage"><div><div><h3 class="title"><a name="conf-html-remove-elements"></a>12.2.29. html_remove_elements</h3></div></div></div>
- <p>
- A list of HTML elements for which to strip contents along with the elements themselves.
- Optional, default is empty string (do not strip contents of any elements).
- </p><p>
- This feature allows to strip element contents, ie. everything that
- is between the opening and the closing tags. It is useful to remove
- embedded scripts, CSS, etc. Short tag form for empty elements
- (ie. <br />) is properly supported; ie. the text that
- follows such tag will <span class="bold"><strong>not</strong></span> be removed.
- </p><p>
- The value is a comma-separated list of element (tag) names whose
- contents should be removed. Tag names are case insensitive.
- </p><h4><a name="idp33587760"></a>Example:</h4><pre class="programlisting">
- html_remove_elements = style, script
- </pre></div>
- <div class="sect2" title="12.2.30. local"><div class="titlepage"><div><div><h3 class="title"><a name="conf-local"></a>12.2.30. local</h3></div></div></div>
- <p>
- Local index declaration in the <a class="link" href="#distributed" title="5.8. Distributed searching">distributed index</a>.
- Multi-value, optional, default is empty.
- </p><p>
- This setting is used to declare local indexes that will be searched when
- given distributed index is searched. Many local indexes can be declared per
- each distributed index. Any local index can also be mentioned several times
- in different distributed indexes.
- </p><p>
- Note that by default all local indexes will be searched <span class="bold"><strong>sequentially</strong></span>,
- utilizing only 1 CPU or core. To parallelize processing of the local parts
- in the distributed index, you should use <code class="option">dist_threads</code> directive,
- see <a class="xref" href="#conf-dist-threads" title="12.4.24. dist_threads">Section 12.4.24, “dist_threads”</a>.
- </p><p>
- Before <code class="option">dist_threads</code>, there also was a legacy solution
- to configure <code class="filename">searchd</code> to query itself instead of using
- local indexes (refer to <a class="xref" href="#conf-agent" title="12.2.31. agent">Section 12.2.31, “agent”</a> for the details). However,
- that creates redundant CPU and network load, and <code class="option">dist_threads</code>
- is now strongly suggested instead.
- </p><h4><a name="idp33597120"></a>Example:</h4><pre class="programlisting">
- local = chunk1
- local = chunk2
- </pre></div>
- <div class="sect2" title="12.2.31. agent"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent"></a>12.2.31. agent</h3></div></div></div>
- <p>
- Remote agent declaration in the <a class="link" href="#distributed" title="5.8. Distributed searching">distributed index</a>.
- Multi-value, optional, default is empty.
- </p><p>
- <code class="code">agent</code> directive declares remote agents that are searched
- every time when the enclosing distributed index is searched. The agents
- are, essentially, pointers to networked indexes. Prior to version 2.1.1-beta,
- the value format was:
- </p><pre class="programlisting">
- agent = address:index-list
- </pre><p>
- Starting with 2.1.1-beta, the value can additionally specify multiple
- alternatives (agent mirrors) for either the address only, or the address
- and index list:
- </p><pre class="programlisting">
- agent = address1 [ | address2 [...] ]:index-list
- agent = address1:index-list [ | address2:index-list [...] ]
- </pre><p>
- In both cases the address specification must be one of the following:
- </p><pre class="programlisting">
- address = hostname:port # eg. server2:9312
- address = /absolute/unix/socket/path # eg. /var/run/sphinx2.sock
- </pre><p>
- Where
- <code class="code">hostname</code> is the remote host name,
- <code class="code">port</code> is the remote TCP port number,
- <code class="code">index-list</code> is a comma-separated list of index names,
- and square braces [] designate an optional clause.
- </p><p>
- In other words, you can point every single agent to one or more remote
- indexes, residing on one or more networked servers. There are absolutely
- no restrictions on the pointers. To point out a couple important things,
- the host can be localhost, and the remote index can be a distributed
- index in turn, all that is legal. That enables a bunch of very different
- usage modes:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>sharding over multiple agent servers, and creating
- an arbitrary cluster topology;</p></li>
- <li class="listitem"><p>sharding over multiple agent servers, mirrored
- for HA/LB (High Availability and Load Balancing) purposes
- (starting with 2.1.1-beta);</p></li>
- <li class="listitem"><p>sharding within localhost, to utilize multiple cores
- (historical and not recommended in versions 1.x and above, use multiple
- local indexes and dist_threads directive instead);</p></li>
- </ul></div>
- <p>
- </p><p>
- All agents are searched in parallel. An index list is passed verbatim
- to the remote agent. How exactly that list is searched within the agent
- (ie. sequentially or in parallel too) depends solely on the agent
- configuration (ie. dist_threads directive). Master has no remote
- control over that.
- </p><h4><a name="idp33609504"></a>Example:</h4><pre class="programlisting">
- # config on box2
- # sharding an index over 3 servers
- agent = box2:9312:chunk2
- agent = box3:9312:chunk3
- # config on box2
- # sharding an index over 3 servers
- agent = box1:9312:chunk2
- agent = box3:9312:chunk3
- # config on box3
- # sharding an index over 3 servers
- agent = box1:9312:chunk2
- agent = box2:9312:chunk3
- </pre><h4><a name="idp33610896"></a>Agent mirrors</h4><p>
- New syntax added in 2.1.1-beta lets you define so-called <span class="bold"><strong>agent mirrors</strong></span>
- that can be used interchangeably when processing a search query. Master server
- keeps track of mirror status (alive or dead) and response times, and does
- automatic failover and load balancing based on that. For example, this line:</p><pre class="programlisting">
- agent = box1:9312|box2:9312|box3:9312:chunk2
- </pre><p>Declares that box1:9312, box2:9312, and box3:9312 all have an index
- called chunk2, and can be used as interchangeable mirrors. If any single
- of those servers go down, the queries will be distributed between
- the other two. When it gets back up, master will detect that and begin
- routing queries to all three boxes again.
- </p><p>
- Another way to define the mirrors is to explicitly specify the index list
- for every mirror:</p><pre class="programlisting">
- agent = box1:9312:box1chunk2|box2:9312:box2chunk2
- </pre><p>This works essentially the same as the previous example, but different
- index names will be used when querying different severs: box1chunk2 when querying
- box1:9312, and box2chunk when querying box2:9312.
- </p><p>
- By default, all queries are routed to the best of the mirrors. The best one
- is picked based on the recent statistics, as controlled by the
- <a class="link" href="#conf-ha-period-karma" title="12.4.40. ha_period_karma">ha_period_karma</a> config directive.
- Master stores a number of metrics (total query count, error count, response
- time, etc) recently observed for every agent. It groups those by time spans,
- and karma is that time span length. The best agent mirror is then determined
- dynamically based on the last 2 such time spans. Specific algorithm that
- will be used to pick a mirror can be configured
- <a class="link" href="#conf-ha-strategy" title="12.2.60. ha_strategy">ha_strategy</a> directive.
- </p><p>
- The karma period is in seconds and defaults to 60 seconds. Master stores
- upto 15 karma spans with per-agent statistics for instrumentation purposes
- (see <a class="link" href="#sphinxql-show-agent-status" title="8.29. SHOW AGENT STATUS">SHOW AGENT STATUS</a>
- statement). However, only the last 2 spans out of those are ever used for
- HA/LB logic.
- </p><p>
- When there are no queries, master sends a regular ping command every
- <a class="link" href="#conf-ha-ping-interval" title="12.4.39. ha_ping_interval">ha_ping_interval</a> milliseconds
- in order to have some statistics and at least check, whether the remote
- host is still alive. ha_ping_interval defaults to 1000 msec. Setting it to 0
- disables pings and statistics will only be accumulated based on actual queries.
- </p><h4><a name="idp33621808"></a>Example:</h4><pre class="programlisting">
- # sharding index over 4 servers total
- # in just 2 chunks but with 2 failover mirrors for each chunk
- # box1, box2 carry chunk1 as local
- # box3, box4 carry chunk2 as local
- # config on box1, box2
- agent = box3:9312|box4:9312:chunk2
- # config on box3, box4
- agent = box1:9312|box2:9312:chunk1
- </pre></div>
- <div class="sect2" title="12.2.32. agent_persistent"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-persistent"></a>12.2.32. agent_persistent</h3></div></div></div>
- <p>
- Persistently connected remote agent declaration.
- Multi-value, optional, default is empty.
- Introduced in version 2.1.1-beta.
- </p><p>
- <code class="option">agent_persistent</code> directive syntax matches that of
- the <a class="link" href="#conf-agent" title="12.2.31. agent">agent</a> directive. The only difference
- is that the master will <span class="bold"><strong>not</strong></span> open a new connection to the agent for
- every query and then close it. Rather, it will keep a connection open and
- attempt to reuse for the subsequent queries. The maximal number of such persistent connections per one agent host
- is limited by <a class="link" href="#conf-persistent-connections-limit" title="12.4.41. persistent_connections_limit">persistent_connections_limit</a> option of searchd section.
- </p><p>
- Note, that you <span class="bold"><strong>have</strong></span> to set the last one in something greater than 0 if you want to use persistent agent connections.
- Otherwise - when <a class="link" href="#conf-persistent-connections-limit" title="12.4.41. persistent_connections_limit">persistent_connections_limit</a> is not defined, it assumes
- the zero num of persistent connections, and 'agent_persistent' acts exactly as simple 'agent'.
- </p><p>
- Persistent master-agent connections reduce TCP port pressure, and
- save on connection handshakes. As of time of this writing, they are supported <span class="bold"><strong>only</strong></span>
- in workers=threads mode. In other modes, simple non-persistent connections
- (i.e., one connection per operation) will be used, and a warning will show
- up in the console.
- </p><h4><a name="idp33632112"></a>Example:</h4><pre class="programlisting">
- agent_persistent = remotebox:9312:index2
- </pre></div>
- <div class="sect2" title="12.2.33. agent_blackhole"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-blackhole"></a>12.2.33. agent_blackhole</h3></div></div></div>
- <p>
- Remote blackhole agent declaration in the <a class="link" href="#distributed" title="5.8. Distributed searching">distributed index</a>.
- Multi-value, optional, default is empty.
- Introduced in version 0.9.9-rc1.
- </p><p>
- <code class="option">agent_blackhole</code> lets you fire-and-forget queries
- to remote agents. That is useful for debugging (or just testing)
- production clusters: you can setup a separate debugging/testing searchd
- instance, and forward the requests to this instance from your production
- master (aggregator) instance without interfering with production work.
- Master searchd will attempt to connect and query blackhole agent
- normally, but it will neither wait nor process any responses.
- Also, all network errors on blackhole agents will be ignored.
- The value format is completely identical to regular
- <a class="link" href="#conf-agent" title="12.2.31. agent">agent</a> directive.
- </p><h4><a name="idp33637904"></a>Example:</h4><pre class="programlisting">
- agent_blackhole = testbox:9312:testindex1,testindex2
- </pre></div>
- <div class="sect2" title="12.2.34. agent_connect_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-connect-timeout"></a>12.2.34. agent_connect_timeout</h3></div></div></div>
- <p>
- Remote agent connection timeout, in milliseconds.
- Optional, default is 1000 (ie. 1 second).
- </p><p>
- When connecting to remote agents, <code class="filename">searchd</code>
- will wait at most this much time for connect() call to complete
- successfully. If the timeout is reached but connect() does not complete,
- and <a class="link" href="#api-func-setretries" title="9.1.4. SetRetries">retries</a> are enabled,
- retry will be initiated.
- </p><h4><a name="idp33642912"></a>Example:</h4><pre class="programlisting">
- agent_connect_timeout = 300
- </pre></div>
- <div class="sect2" title="12.2.35. agent_query_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-query-timeout"></a>12.2.35. agent_query_timeout</h3></div></div></div>
- <p>
- Remote agent query timeout, in milliseconds.
- Optional, default is 3000 (ie. 3 seconds).
- Added in version 2.1.1-beta.
- </p><p>
- After connection, <code class="filename">searchd</code> will wait at most this
- much time for remote queries to complete. This timeout is fully separate
- from connection timeout; so the maximum possible delay caused by
- a remote agent equals to the sum of <code class="code">agent_connection_timeout</code> and
- <code class="code">agent_query_timeout</code>. Queries will <span class="bold"><strong>not</strong></span> be retried
- if this timeout is reached; a warning will be produced instead.
- </p><h4><a name="idp33648656"></a>Example:</h4><pre class="programlisting">
- agent_query_timeout = 10000 # our query can be long, allow up to 10 sec
- </pre></div>
- <div class="sect2" title="12.2.36. preopen"><div class="titlepage"><div><div><h3 class="title"><a name="conf-preopen"></a>12.2.36. preopen</h3></div></div></div>
- <p>
- Whether to pre-open all index files, or open them per each query.
- Optional, default is 0 (do not preopen).
- </p><p>
- This option tells <code class="filename">searchd</code> that it should pre-open
- all index files on startup (or rotation) and keep them open while it runs.
- Currently, the default mode is <span class="bold"><strong>not</strong></span> to pre-open the files (this may
- change in the future). Preopened indexes take a few (currently 2) file
- descriptors per index. However, they save on per-query <code class="code">open()</code> calls;
- and also they are invulnerable to subtle race conditions that may happen during
- index rotation under high load. On the other hand, when serving many indexes
- (100s to 1000s), it still might be desired to open the on per-query basis
- in order to save file descriptors.
- </p><p>
- This directive does not affect <code class="filename">indexer</code> in any way,
- it only affects <code class="filename">searchd</code>.
- </p><h4><a name="idp33656128"></a>Example:</h4><pre class="programlisting">
- preopen = 1
- </pre></div>
- <div class="sect2" title="12.2.37. inplace_enable"><div class="titlepage"><div><div><h3 class="title"><a name="conf-inplace-enable"></a>12.2.37. inplace_enable</h3></div></div></div>
- <p>
- Whether to enable in-place index inversion.
- Optional, default is 0 (use separate temporary files).
- Introduced in version 0.9.9-rc1.
- </p><p>
- <code class="option">inplace_enable</code> greatly reduces indexing disk footprint,
- at a cost of slightly slower indexing (it uses around 2x less disk,
- but yields around 90-95% the original performance).
- </p><p>
- Indexing involves two major phases. The first phase collects,
- processes, and partially sorts documents by keyword, and writes
- the intermediate result to temporary files (.tmp*). The second
- phase fully sorts the documents, and creates the final index
- files. Thus, rebuilding a production index on the fly involves
- around 3x peak disk footprint: 1st copy for the intermediate
- temporary files, 2nd copy for newly constructed copy, and 3rd copy
- for the old index that will be serving production queries in the meantime.
- (Intermediate data is comparable in size to the final index.)
- That might be too much disk footprint for big data collections,
- and <code class="option">inplace_enable</code> allows to reduce it.
- When enabled, it reuses the temporary files, outputs the
- final data back to them, and renames them on completion.
- However, this might require additional temporary data chunk
- relocation, which is where the performance impact comes from.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33663408"></a>Example:</h4><pre class="programlisting">
- inplace_enable = 1
- </pre></div>
- <div class="sect2" title="12.2.38. inplace_hit_gap"><div class="titlepage"><div><div><h3 class="title"><a name="conf-inplace-hit-gap"></a>12.2.38. inplace_hit_gap</h3></div></div></div>
- <p>
- <a class="link" href="#conf-inplace-enable" title="12.2.37. inplace_enable">In-place inversion</a> fine-tuning option.
- Controls preallocated hitlist gap size.
- Optional, default is 0.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33668848"></a>Example:</h4><pre class="programlisting">
- inplace_hit_gap = 1M
- </pre></div>
- <div class="sect2" title="12.2.39. inplace_docinfo_gap"><div class="titlepage"><div><div><h3 class="title"><a name="conf-inplace-docinfo-gap"></a>12.2.39. inplace_docinfo_gap</h3></div></div></div>
- <p>
- <a class="link" href="#conf-inplace-enable" title="12.2.37. inplace_enable">In-place inversion</a> fine-tuning option.
- Controls preallocated docinfo gap size.
- Optional, default is 0.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33674192"></a>Example:</h4><pre class="programlisting">
- inplace_docinfo_gap = 1M
- </pre></div>
- <div class="sect2" title="12.2.40. inplace_reloc_factor"><div class="titlepage"><div><div><h3 class="title"><a name="conf-inplace-reloc-factor"></a>12.2.40. inplace_reloc_factor</h3></div></div></div>
- <p>
- <a class="link" href="#conf-inplace-reloc-factor" title="12.2.40. inplace_reloc_factor">In-place inversion</a> fine-tuning option.
- Controls relocation buffer size within indexing memory arena.
- Optional, default is 0.1.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33679856"></a>Example:</h4><pre class="programlisting">
- inplace_reloc_factor = 0.1
- </pre></div>
- <div class="sect2" title="12.2.41. inplace_write_factor"><div class="titlepage"><div><div><h3 class="title"><a name="conf-inplace-write-factor"></a>12.2.41. inplace_write_factor</h3></div></div></div>
- <p>
- <a class="link" href="#conf-inplace-write-factor" title="12.2.41. inplace_write_factor">In-place inversion</a> fine-tuning option.
- Controls in-place write buffer size within indexing memory arena.
- Optional, default is 0.1.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33685520"></a>Example:</h4><pre class="programlisting">
- inplace_write_factor = 0.1
- </pre></div>
- <div class="sect2" title="12.2.42. index_exact_words"><div class="titlepage"><div><div><h3 class="title"><a name="conf-index-exact-words"></a>12.2.42. index_exact_words</h3></div></div></div>
- <p>
- Whether to index the original keywords along with the stemmed/remapped versions.
- Optional, default is 0 (do not index).
- Introduced in version 0.9.9-rc1.
- </p><p>
- When enabled, <code class="option">index_exact_words</code> forces <code class="filename">indexer</code>
- to put the raw keywords in the index along with the stemmed versions. That, in turn,
- enables <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">exact form operator</a> in the query language to work.
- This impacts the index size and the indexing time. However, searching performance
- is not impacted at all.
- </p><h4><a name="idp33690864"></a>Example:</h4><pre class="programlisting">
- index_exact_words = 1
- </pre></div>
- <div class="sect2" title="12.2.43. overshort_step"><div class="titlepage"><div><div><h3 class="title"><a name="conf-overshort-step"></a>12.2.43. overshort_step</h3></div></div></div>
- <p>
- Position increment on overshort (less that <a class="link" href="#conf-min-word-len" title="12.2.15. min_word_len">min_word_len</a>) keywords.
- Optional, allowed values are 0 and 1, default is 1.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33696464"></a>Example:</h4><pre class="programlisting">
- overshort_step = 1
- </pre></div>
- <div class="sect2" title="12.2.44. stopword_step"><div class="titlepage"><div><div><h3 class="title"><a name="conf-stopword-step"></a>12.2.44. stopword_step</h3></div></div></div>
- <p>
- Position increment on <a class="link" href="#conf-stopwords" title="12.2.11. stopwords">stopwords</a>.
- Optional, allowed values are 0 and 1, default is 1.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This directive does not affect <code class="filename">searchd</code> in any way,
- it only affects <code class="filename">indexer</code>.
- </p><h4><a name="idp33701840"></a>Example:</h4><pre class="programlisting">
- stopword_step = 1
- </pre></div>
- <div class="sect2" title="12.2.45. hitless_words"><div class="titlepage"><div><div><h3 class="title"><a name="conf-hitless-words"></a>12.2.45. hitless_words</h3></div></div></div>
- <p>
- Hitless words list.
- Optional, allowed values are 'all', or a list file name.
- Introduced in version 1.10-beta.
- </p><p>
- By default, Sphinx full-text index stores not only a list of matching
- documents for every given keyword, but also a list of its in-document positions
- (aka hitlist). Hitlists enables phrase, proximity, strict order and other
- advanced types of searching, as well as phrase proximity ranking. However,
- hitlists for specific frequent keywords (that can not be stopped for
- some reason despite being frequent) can get huge and thus slow to process
- while querying. Also, in some cases we might only care about boolean
- keyword matching, and never need position-based searching operators
- (such as phrase matching) nor phrase ranking.
- </p><p>
- <code class="option">hitless_words</code> lets you create indexes that either
- do not have positional information (hitlists) at all, or skip it for
- specific keywords.
- </p><p>
- Hitless index will generally use less space than the respective
- regular index (about 1.5x can be expected). Both indexing and searching
- should be faster, at a cost of missing positional query and ranking support.
- When searching, positional queries (eg. phrase queries) will be automatically
- converted to respective non-positional (document-level) or combined queries.
- For instance, if keywords "hello" and "world" are hitless, "hello world"
- phrase query will be converted to (hello & world) bag-of-words query,
- matching all documents that mention either of the keywords but not necessarily
- the exact phrase. And if, in addition, keywords "simon" and "says" are not
- hitless, "simon says hello world" will be converted to ("simon says" &
- hello & world) query, matching all documents that contain "hello" and
- "world" anywhere in the document, and also "simon says" as an exact phrase.
- </p><h4><a name="idp33708528"></a>Example:</h4><pre class="programlisting">
- hitless_words = all
- </pre></div>
- <div class="sect2" title="12.2.46. expand_keywords"><div class="titlepage"><div><div><h3 class="title"><a name="conf-expand-keywords"></a>12.2.46. expand_keywords</h3></div></div></div>
- <p>
- Expand keywords with exact forms and/or stars when possible.
- Optional, default is 0 (do not expand keywords).
- Introduced in version 1.10-beta.
- </p><p>
- Queries against indexes with <code class="option">expand_keywords</code> feature
- enabled are internally expanded as follows. If the index was built with
- prefix or infix indexing enabled, every keyword gets internally replaced
- with a disjunction of keyword itself and a respective prefix or infix
- (keyword with stars). If the index was built with both stemming and
- <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> enabled,
- exact form is also added. Here's an example that shows how internal
- expansion works when all of the above (infixes, stemming, and exact
- words) are combined:
- </p><pre class="programlisting">
- running -> ( running | *running* | =running )
- </pre><p>
- </p><p>
- Expanded queries take naturally longer to complete, but can possibly
- improve the search quality, as the documents with exact form matches
- should be ranked generally higher than documents with stemmed or infix matches.
- </p><p>
- Note that the existing query syntax does not allow to emulate this
- kind of expansion, because internal expansion works on keyword level and
- expands keywords within phrase or quorum operators too (which is not
- possible through the query syntax).
- </p><p>
- This directive does not affect <code class="filename">indexer</code> in any way,
- it only affects <code class="filename">searchd</code>.
- </p><h4><a name="idp33717232"></a>Example:</h4><pre class="programlisting">
- expand_keywords = 1
- </pre></div>
- <div class="sect2" title="12.2.47. blend_chars"><div class="titlepage"><div><div><h3 class="title"><a name="conf-blend-chars"></a>12.2.47. blend_chars</h3></div></div></div>
- <p>
- Blended characters list.
- Optional, default is empty.
- Introduced in version 1.10-beta.
- </p><p>
- Blended characters are indexed both as separators and valid characters.
- For instance, assume that & is configured as blended and AT&T
- occurs in an indexed document. Three different keywords will get indexed,
- namely "at&t", treating blended characters as valid, plus "at" and "t",
- treating them as separators.
- </p><p>
- Positions for tokens obtained by replacing blended characters with whitespace
- are assigned as usual, so regular keywords will be indexed just as if there was
- no <code class="option">blend_chars</code> specified at all. An additional token that
- mixes blended and non-blended characters will be put at the starting position.
- For instance, if the field contents are "AT&T company" occurs in the very
- beginning of the text field, "at" will be given position 1, "t" position 2,
- "company" position 3, and "AT&T" will also be given position 1 ("blending"
- with the opening regular keyword). Thus, querying for either AT&T or just
- AT will match that document, and querying for "AT T" as a phrase also match it.
- Last but not least, phrase query for "AT&T company" will <span class="emphasis"><em>also</em></span>
- match it, despite the position
- </p><p>
- Blended characters can overlap with special characters used in query
- syntax (think of T-Mobile or @twitter). Where possible, query parser will
- automatically handle blended character as blended. For instance, "hello @twitter"
- within quotes (a phrase operator) would handle @-sign as blended, because
- @-syntax for field operator is not allowed within phrases. Otherwise,
- the character would be handled as an operator. So you might want to
- escape the keywords.
- </p><p>
- Starting with version 2.0.1-beta, blended characters can be remapped,
- so that multiple different blended characters could be normalized into
- just one base form. This is useful when indexing multiple alternative
- Unicode codepoints with equivalent glyphs.
- </p><h4><a name="idp33724752"></a>Example:</h4><pre class="programlisting">
- blend_chars = +, &, U+23
- blend_chars = +, &->+ # 2.0.1 and above
- </pre></div>
- <div class="sect2" title="12.2.48. blend_mode"><div class="titlepage"><div><div><h3 class="title"><a name="conf-blend-mode"></a>12.2.48. blend_mode</h3></div></div></div>
- <p>
- Blended tokens indexing mode.
- Optional, default is <code class="option">trim_none</code>.
- Introduced in version 2.0.1-beta.
- </p><p>
- By default, tokens that mix blended and non-blended characters
- get indexed in there entirety. For instance, when both at-sign and
- an exclamation are in <code class="option">blend_chars</code>, "@dude!" will get
- result in two tokens indexed: "@dude!" (with all the blended characters)
- and "dude" (without any). Therefore "@dude" query will <span class="emphasis"><em>not</em></span>
- match it.
- </p><p>
- <code class="option">blend_mode</code> directive adds flexibility to this indexing
- behavior. It takes a comma-separated list of options.
- </p><pre class="programlisting">
- blend_mode = option [, option [, ...]]
- option = trim_none | trim_head | trim_tail | trim_both | skip_pure
- </pre><p>
- </p><p>
- Options specify token indexing variants. If multiple options are
- specified, multiple variants of the same token will be indexed.
- Regular keywords (resulting from that token by replacing blended
- with whitespace) are always be indexed.
- </p><div class="variablelist"><dl><dt><span class="term">trim_none</span></dt>
- <dd><p>Index the entire token.</p></dd><dt><span class="term">trim_head</span></dt>
- <dd><p>Trim heading blended characters, and index the resulting token.</p></dd><dt><span class="term">trim_tail</span></dt>
- <dd><p>Trim trailing blended characters, and index the resulting token.</p></dd><dt><span class="term">trim_both</span></dt>
- <dd><p>Trim both heading and trailing blended characters, and index the resulting token.</p></dd><dt><span class="term">skip_pure</span></dt>
- <dd><p>Do not index the token if it's purely blended, that is, consists of blended characters only.</p></dd></dl></div>
- <p>
- Returning to the "@dude!" example above, setting <code class="option">blend_mode = trim_head,
- trim_tail</code> will result in two tokens being indexed, "@dude" and "dude!".
- In this particular example, <code class="option">trim_both</code> would have no effect,
- because trimming both blended characters results in "dude" which is already
- indexed as a regular keyword. Indexing "@U.S.A." with <code class="option">trim_both</code>
- (and assuming that dot is blended two) would result in "U.S.A" being indexed.
- Last but not least, <code class="option">skip_pure</code> enables you to fully ignore
- sequences of blended characters only. For example, "one @@@ two" would be
- indexed exactly as "one two", and match that as a phrase. That is not the case
- by default because a fully blended token gets indexed and offsets the second
- keyword position.
- </p><p>
- Default behavior is to index the entire token, equivalent to
- <code class="option">blend_mode = trim_none</code>.
- </p><h4><a name="idp33742464"></a>Example:</h4><pre class="programlisting">
- blend_mode = trim_tail, skip_pure
- </pre></div>
- <div class="sect2" title="12.2.49. rt_mem_limit"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-mem-limit"></a>12.2.49. rt_mem_limit</h3></div></div></div>
- <p>
- RAM chunk size limit.
- Optional, default is 128M.
- Introduced in version 1.10-beta.
- </p><p>
- RT index keeps some data in memory (so-called RAM chunk) and
- also maintains a number of on-disk indexes (so-called disk chunks).
- This directive lets you control the RAM chunk size. Once there's
- too much data to keep in RAM, RT index will flush it to disk,
- activate a newly created disk chunk, and reset the RAM chunk.
- </p><p>
- The limit is pretty strict; RT index should never allocate more
- memory than it's limited to. The memory is not preallocated either,
- hence, specifying 512 MB limit and only inserting 3 MB of data
- should result in allocating 3 MB, not 512 MB.
- </p><p>
- </p><h4><a name="idp33746912"></a>Example:</h4><pre class="programlisting">
- rt_mem_limit = 512M
- </pre></div>
- <div class="sect2" title="12.2.50. rt_field"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-field"></a>12.2.50. rt_field</h3></div></div></div>
- <p>
- Full-text field declaration.
- Multi-value, mandatory
- Introduced in version 1.10-beta.
- </p><p>
- Full-text fields to be indexed are declared using <code class="option">rt_field</code>
- directive. The names must be unique. The order is preserved; and so field values
- in INSERT statements without an explicit list of inserted columns will have to be
- in the same order as configured.
- </p><p>
- </p><h4><a name="idp33751104"></a>Example:</h4><pre class="programlisting">
- rt_field = author
- rt_field = title
- rt_field = content
- </pre></div>
- <div class="sect2" title="12.2.51. rt_attr_uint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-uint"></a>12.2.51. rt_attr_uint</h3></div></div></div>
- <p>
- Unsigned integer attribute declaration.
- Multi-value (an arbitrary number of attributes is allowed), optional.
- Declares an unsigned 32-bit attribute.
- Introduced in version 1.10-beta.
- </p><h4><a name="idp33753920"></a>Example:</h4><pre class="programlisting">
- rt_attr_uint = gid
- </pre></div>
- <div class="sect2" title="12.2.52. rt_attr_bool"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-bool"></a>12.2.52. rt_attr_bool</h3></div></div></div>
- <p>
- Boolean attribute declaration.
- Multi-value (there might be multiple attributes declared), optional.
- Declares a 1-bit unsigned integer attribute.
- Introduced in version 2.1.2-release.
- </p><h4><a name="idp33756752"></a>Example:</h4><pre class="programlisting">
- rt_attr_bool = available
- </pre></div>
- <div class="sect2" title="12.2.53. rt_attr_bigint"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-bigint"></a>12.2.53. rt_attr_bigint</h3></div></div></div>
- <p>
- BIGINT attribute declaration.
- Multi-value (an arbitrary number of attributes is allowed), optional.
- Declares a signed 64-bit attribute.
- Introduced in version 1.10-beta.
- </p><h4><a name="idp33759600"></a>Example:</h4><pre class="programlisting">
- rt_attr_bigint = guid
- </pre></div>
- <div class="sect2" title="12.2.54. rt_attr_float"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-float"></a>12.2.54. rt_attr_float</h3></div></div></div>
- <p>
- Floating point attribute declaration.
- Multi-value (an arbitrary number of attributes is allowed), optional.
- Declares a single precision, 32-bit IEEE 754 format float attribute.
- Introduced in version 1.10-beta.
- </p><h4><a name="idp33762400"></a>Example:</h4><pre class="programlisting">
- rt_attr_float = gpa
- </pre></div>
- <div class="sect2" title="12.2.55. rt_attr_multi"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-multi"></a>12.2.55. rt_attr_multi</h3></div></div></div>
- <p>
- <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">Multi-valued attribute</a> (MVA) declaration.
- Declares the UNSIGNED INTEGER (unsigned 32-bit) MVA attribute.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to RT indexes only.
- </p><h4><a name="idp33766016"></a>Example:</h4><pre class="programlisting">
- rt_attr_multi = my_tags
- </pre></div>
- <div class="sect2" title="12.2.56. rt_attr_multi_64"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-multi-64"></a>12.2.56. rt_attr_multi_64</h3></div></div></div>
- <p>
- <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">Multi-valued attribute</a> (MVA) declaration.
- Declares the BIGINT (signed 64-bit) MVA attribute.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Applies to RT indexes only.
- </p><h4><a name="idp33769520"></a>Example:</h4><pre class="programlisting">
- rt_attr_multi_64 = my_wide_tags
- </pre></div>
- <div class="sect2" title="12.2.57. rt_attr_timestamp"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-timestamp"></a>12.2.57. rt_attr_timestamp</h3></div></div></div>
- <p>
- Timestamp attribute declaration.
- Multi-value (an arbitrary number of attributes is allowed), optional.
- Introduced in version 1.10-beta.
- </p><h4><a name="idp33772272"></a>Example:</h4><pre class="programlisting">
- rt_attr_timestamp = date_added
- </pre></div>
- <div class="sect2" title="12.2.58. rt_attr_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-string"></a>12.2.58. rt_attr_string</h3></div></div></div>
- <p>
- String attribute declaration.
- Multi-value (an arbitrary number of attributes is allowed), optional.
- Introduced in version 1.10-beta.
- </p><h4><a name="idp33775024"></a>Example:</h4><pre class="programlisting">
- rt_attr_string = author
- </pre></div>
- <div class="sect2" title="12.2.59. rt_attr_json"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-attr-json"></a>12.2.59. rt_attr_json</h3></div></div></div>
- <p>
- JSON attribute declaration.
- Multi-value (ie. there may be more than one such attribute declared), optional.
- Introduced in version 2.1.1-beta.
- </p><p>
- Refer to <a class="xref" href="#conf-sql-attr-json" title="12.1.24. sql_attr_json">Section 12.1.24, “sql_attr_json”</a> for more details on the JSON attributes.
- </p><h4><a name="idp33778992"></a>Example:</h4><pre class="programlisting">
- rt_attr_json = properties
- </pre></div>
- <div class="sect2" title="12.2.60. ha_strategy"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ha-strategy"></a>12.2.60. ha_strategy</h3></div></div></div>
- <p>
- Agent mirror selection strategy, for load balancing.
- Optional, default is random.
- Added in 2.1.1-beta.
- </p><p>
- The strategy used for mirror selection, or in other words, choosing
- a specific <a class="link" href="#conf-agent" title="12.2.31. agent">agent mirror</a> in a distributed
- index. Essentially, this directive controls how exactly master does the
- load balancing between the configured mirror agent nodes.
- As of 2.1.1-beta, the following strategies are implemented:
- </p><h4><a name="idp33783200"></a>Simple random balancing</h4><pre class="programlisting">ha_strategy = random</pre><p>
- The default balancing mode. Simple linear random distribution among the mirrors.
- That is, equal selection probability are assigned to every mirror. Kind of similar
- to round-robin (RR), but unlike RR, does not impose a strict selection order.
- </p><h4><a name="idp33784944"></a>Adaptive randomized balancing</h4><p>
- The default simple random strategy does not take mirror status, error rate,
- and, most importantly, actual response latencies into account. So to accommodate
- for heterogeneous clusters and/or temporary spikes in agent node load, we have
- a group of balancing strategies that dynamically adjusts the probabilities
- based on the actual query latencies observed by the master.
- </p><p>
- The adaptive strategies based on <span class="bold"><strong>latency-weighted probabilities</strong></span>
- basically work as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>latency stats are accumulated, in blocks of ha_period_karma seconds;</p></li>
- <li class="listitem"><p>once per karma period, latency-weighted probabilities get recomputed;</p></li>
- <li class="listitem"><p>once per request (including ping requests), "dead or alive" flag is adjusted.</p></li>
- </ul></div>
- <p>
- Currently (as of 2.1.1-beta), we begin with equal probabilities (or percentages,
- for brevity), and on every step, scale them by the inverse of the latencies observed
- during the last "karma" period, and then renormalize them. For example, if during
- the first 60 seconds after the master startup 4 mirrors had latencies of
- 10, 5, 30, and 3 msec/query respectively, the first adjustment step
- would go as follow:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>initial percentages: 0.25, 0.25, 0.25, 0.2%;</p></li>
- <li class="listitem"><p>observed latencies: 10 ms, 5 ms, 30 ms, 3 ms;</p></li>
- <li class="listitem"><p>inverse latencies: 0.1, 0.2, 0.0333, 0.333;</p></li>
- <li class="listitem"><p>scaled percentages: 0.025, 0.05, 0.008333, 0.0833;</p></li>
- <li class="listitem"><p>renormalized percentages: 0.15, 0.30, 0.05, 0.50.</p></li>
- </ul></div>
- <p>
- Meaning that the 1st mirror would have a 15% chance of being chosen during
- the next karma period, the 2nd one a 30% chance, the 3rd one (slowest at 30 ms)
- only a 5% chance, and the 4th and the fastest one (at 3 ms) a 50% chance.
- Then, after that period, the second adjustment step would update those chances
- again, and so on.
- </p><p>
- The rationale here is, once the <span class="bold"><strong>observed latencies</strong></span> stabilize,
- the <span class="bold"><strong>latency weighted probabilities</strong></span> stabilize as well. So all these
- adjustment iterations are supposed to converge at a point where the average
- latencies are (roughly) equal over all mirrors.
- </p><pre class="programlisting">ha_strategy = nodeads</pre><p>
- Latency-weighted probabilities, but dead mirrors are excluded from
- the selection. "Dead" mirror is defined as a mirror that resulted
- in multiple hard errors (eg. network failure, or no answer, etc) in a row.
- </p><pre class="programlisting">ha_strategy = noerrors</pre><p>
- Latency-weighted probabilities, but mirrors with worse errors/success ratio
- are excluded from the selection.
- </p><h4><a name="idp33798064"></a>Round-robin balancing</h4><pre class="programlisting">ha_strategy = roundrobin</pre><p>Simple round-robin selection, that is, selecting the 1st mirror
- in the list, then the 2nd one, then the 3rd one, etc, and then repeating
- the process once the last mirror in the list is reached. Unlike with
- the randomized strategies, RR imposes a strict querying order (1, 2, 3, ..,
- N-1, N, 1, 2, 3, ... and so on) and <span class="emphasis"><em>guarantees</em></span> that
- no two subsequent queries will be sent to the same mirror.
- </p></div>
- <div class="sect2" title="12.2.61. bigram_freq_words"><div class="titlepage"><div><div><h3 class="title"><a name="conf-bigram-freq-words"></a>12.2.61. bigram_freq_words</h3></div></div></div>
- <p>
- A list of keywords considered "frequent" when indexing bigrams.
- Optional, default is empty.
- Added in 2.1.1-beta.
- </p><p>
- Bigram indexing is a feature to accelerate phrase searches.
- When indexing, it stores a document list for either all or some
- of the adjacent words pairs into the index. Such a list can then be used
- at searching time to significantly accelerate phrase or sub-phrase
- matching.
- </p><p>
- Some of the bigram indexing modes (see <a class="xref" href="#conf-bigram-index" title="12.2.62. bigram_index">Section 12.2.62, “bigram_index”</a>)
- require to define a list of frequent keywords. These are <span class="bold"><strong>not</strong></span> to be
- confused with stopwords! Stopwords are completely eliminated when both indexing
- and searching. Frequent keywords are only used by bigrams to determine whether
- to index a current word pair or not.
- </p><p>
- <code class="code">bigram_freq_words</code> lets you define a list of such keywords.
- </p><h4><a name="idp33805712"></a>Example:</h4><pre class="programlisting">
- bigram_freq_words = the, a, you, i
- </pre></div>
- <div class="sect2" title="12.2.62. bigram_index"><div class="titlepage"><div><div><h3 class="title"><a name="conf-bigram-index"></a>12.2.62. bigram_index</h3></div></div></div>
- <p>
- Bigram indexing mode.
- Optional, default is none.
- Added in 2.1.1-beta.
- </p><p>
- Bigram indexing is a feature to accelerate phrase searches.
- When indexing, it stores a document list for either all or some
- of the adjacent words pairs into the index. Such a list can then be used
- at searching time to significantly accelerate phrase or sub-phrase
- matching.
- </p><p>
- <code class="code">bigram_index</code> controls the selection of specific word pairs.
- The known modes are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="code">all</code>, index every single word pair.
- (NB: probably totally not worth it even on a moderately sized index,
- but added anyway for the sake of completeness.)
- </p></li>
- <li class="listitem"><p><code class="code">first_freq</code>, only index word pairs
- where the <span class="emphasis"><em>first</em></span> word is in a list of frequent words
- (see <a class="xref" href="#conf-bigram-freq-words" title="12.2.61. bigram_freq_words">Section 12.2.61, “bigram_freq_words”</a>). For example, with
- <code class="code">bigram_freq_words = the, in, i, a</code>, indexing
- "alone in the dark" text will result in "in the" and "the dark" pairs
- being stored as bigrams, because they begin with a frequent keyword
- (either "in" or "the" respectively), but "alone in" would <span class="bold"><strong>not</strong></span>
- be indexed, because "in" is a <span class="emphasis"><em>second</em></span> word in that pair.
- </p></li>
- <li class="listitem"><p><code class="code">both_freq</code>, only index word pairs where
- both words are frequent. Continuing with the same example, in this mode
- indexing "alone in the dark" would only store "in the" (the very worst
- of them all from searching perspective) as a bigram, but none of the
- other word pairs.
- </p></li>
- </ul></div>
- <p>
- For most usecases, <code class="code">both_freq</code> would be the best mode, but
- your mileage may vary.
- </p><h4><a name="idp33816784"></a>Example:</h4><pre class="programlisting">
- bigram_freq_words = both_freq
- </pre></div>
- <div class="sect2" title="12.2.63. index_field_lengths"><div class="titlepage"><div><div><h3 class="title"><a name="conf-index-field-lengths"></a>12.2.63. index_field_lengths</h3></div></div></div>
- <p>
- Enables computing and storing of field lengths (both per-document and
- average per-index values) into the index.
- Optional, default is 0 (do not compute and store).
- Added in 2.1.1-beta.
- </p><p>
- When <code class="code">index_field_lengths</code> is set to 1, <code class="filename">indexer</code>
- will 1) create a respective length attribute for every full-text field,
- sharing the same name; 2) compute a field length (counted in keywords) for
- every document and store in to a respective attribute; 3) compute the per-index
- averages. The lengths attributes will have a special TOKENCOUNT type, but their
- values are in fact regular 32-bit integers, and their values are generally
- accessible.
- </p><p>
- BM25A() and BM25F() functions in the expression ranker are based
- on these lengths and require <code class="code">index_field_lengths</code> to be enabled.
- Historically, Sphinx used a simplified, stripped-down variant of BM25 that,
- unlike the complete function, did <span class="bold"><strong>not</strong></span> account for document length.
- (We later realized that it should have been called BM15 from the start.)
- Starting with 2.1.1-beta, we added support for both a complete variant of BM25,
- and its extension towards multiple fields, called BM25F. They require
- per-document length and per-field lengths, respectively. Hence the additional
- directive.
- </p><h4><a name="idp33823712"></a>Example:</h4><pre class="programlisting">
- index_field_lengths = 1
- </pre></div>
- <div class="sect2" title="12.2.64. regexp_filter"><div class="titlepage"><div><div><h3 class="title"><a name="conf-regexp-filter"></a>12.2.64. regexp_filter</h3></div></div></div>
- <p>
- Regular expressions (regexps) to filter the fields and queries with.
- Optional, multi-value, default is an empty list of regexps.
- Added in 2.1.1-beta.
- </p><p>
- In certain applications (like product search) there can be
- many different ways to call a model, or a product, or a property,
- and so on. For instance, 'iphone 3gs' and 'iphone 3 gs'
- (or even 'iphone3 gs') are very likely to mean the same
- product. Or, for a more tricky example, '13-inch', '13 inch',
- '13"', and '13in' in a laptop screen size descriptions do mean
- the same.
- </p><p>
- Regexps provide you with a mechanism to specify a number of rules
- specific to your application to handle such cases. In the first
- 'iphone 3gs' example, you could possibly get away with a wordforms
- files tailored to handle a handful of iPhone models. However even
- in a comparatively simple second '13-inch' example there is just
- way too many individual forms and you are better off specifying
- rules that would normalize both '13-inch' and '13in' to something
- identical.
- </p><p>
- Regular expressions listed in <code class="code">regexp_filter</code> are
- applied in the order they are listed. That happens at the earliest
- stage possible, before any other processing, even before tokenization.
- That is, regexps are applied to the raw source fields when indeixng,
- and to the raw search query text when searching.
- </p><p>
- We use the <a class="ulink" href="http://code.google.com/p/re2/" target="_top">RE2 engine</a>
- to implement regexps. So when building from the source, the library must be
- installed in the system and Sphinx must be configured built with a
- <code class="code">--with-re2</code> switch. Binary packages should come with RE2
- builtin.
- </p><h4><a name="idp33830880"></a>Example:</h4><pre class="programlisting">
- # index '13-inch' as '13inch'
- regexp_filter = \b(\d+)\" => \1inch
- # index 'blue' or 'red' as 'color'
- regexp_filter = (blue|red) => color
- </pre></div>
- <div class="sect2" title="12.2.65. stopwords_unstemmed"><div class="titlepage"><div><div><h3 class="title"><a name="conf-stopwords-unstemmed"></a>12.2.65. stopwords_unstemmed</h3></div></div></div>
- <p>
- Whether to apply stopwords before or after stemming.
- Optional, default is 0 (apply stopword filter after stemming).
- Added in 2.1.1-beta.
- </p><p>
- By default, stopwords are stemmed themselves, and applied to
- tokens <span class="emphasis"><em>after</em></span> stemming (or any other morphology
- processing). In other words, by default, a token is stopped when
- stem(token) == stem(stopword). That can lead to unexpected results
- when a token gets (erroneously) stemmed to a stopped root. For example,
- 'Andes' gets stemmed to 'and' by our current stemmer implementation,
- so when 'and' is a stopword, 'Andes' is also stopped.
- </p><p>
- stopwords_unstemmed directive fixes that issue. When it's enabled,
- stopwords are applied before stemming (and therefore to the original
- word forms), and the tokens are stopped when token == stopword.
- </p><h4><a name="idp33835712"></a>Example:</h4><pre class="programlisting">
- stopwords_unstemmed = 1
- </pre></div>
- <div class="sect2" title="12.2.66. global_idf"><div class="titlepage"><div><div><h3 class="title"><a name="conf-global-idf"></a>12.2.66. global_idf</h3></div></div></div>
- <p>
- The path to a file with global (cluster-wide) keyword IDFs.
- Optional, default is empty (use local IDFs).
- Added in 2.1.1-beta.
- </p><p>
- On a multi-index cluster, per-keyword frequencies are quite
- likely to differ across different indexes. That means that when
- the ranking function uses TF-IDF based values, such as BM25 family
- of factors, the results might be ranked slightly different
- depending on what cluster node they reside.
- </p><p>
- The easiest way to fix that issue is to create and utilize
- a global frequency dictionary, or a global IDF file for short.
- This directive lets you specify the location of that file.
- It it suggested (but not required) to use a .idf extension.
- When the IDF file is specified for a given index <span class="emphasis"><em>and</em></span>
- and OPTION global_idf is set to 1, the engine will use the keyword
- frequencies and collection documents count from the global_idf file,
- rather than just the local index. That way, IDFs and the values
- that depend on them will stay consistent across the cluster.
- </p><p>
- IDF files can be shared across multiple indexes. Only a single
- copy of an IDF file will be loaded by <code class="filename">searchd</code>,
- even when many indexes refer to that file. Should the contents of
- an IDF file change, the new contents can be loaded with a SIGHUP.
- </p><p>
- You can build an .idf file using <code class="filename">indextool</code>
- utility, by dumping dictionaries using <code class="code">--dumpdict</code> switch
- first, then converting those to .idf format using <code class="code">--buildidf</code>,
- then merging all .idf files across cluser using <code class="code">--mergeidf</code>.
- Refer to <a class="xref" href="#ref-indextool" title="7.4. indextool command reference">Section 7.4, “<code class="filename">indextool</code> command reference”</a> for more information.
- </p><h4><a name="idp33845056"></a>Example:</h4><pre class="programlisting">
- global_idf = /usr/local/sphinx/var/global.idf
- </pre></div>
- <div class="sect2" title="12.2.67. rlp_context"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rlp-context"></a>12.2.67. rlp_context</h3></div></div></div>
- <p>
- RLP context configuration file. Mandatory if RLP is used.
- Added in 2.2.1-beta.
- </p><h4><a name="idp33847776"></a>Example:</h4><pre class="programlisting">
- rlp_context = /home/myuser/RLP/rlp-context.xml
- </pre></div>
- <div class="sect2" title="12.2.68. ondisk_attrs"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ondisk-attrs"></a>12.2.68. ondisk_attrs</h3></div></div></div>
- <p>
- Allows for fine-grain control over how attributes are loaded into memory
- when using indexes with external storage. It is now possible (since
- version 2.2.1-beta) to keep attributes on disk. Although, the daemon does
- map them to memory and the OS loads small chunks of data on demand. This
- allows use of docinfo = extern instead of docinfo = inline, but still
- leaves plenty of free memory for cases when you have large collections
- of pooled attributes (string/JSON/MVA) or when you're using many indexes
- per daemon that don't consume memory. It is not possible to update
- attributes left on disk when this option is enabled and the constraint
- of 4Gb of entries per pool is still in effect.
- </p><p>
- Note that this option also affects RT indexes. When it is enabled, all atribute updates
- will be disabled, and also all disk chunks of RT indexes will behave described above. However
- inserting and deleting of docs from RT indexes is still possible with enabled ondisk_attrs.
- </p><h4><a name="idp33851776"></a>Possible values:</h4><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">
- 0 - disabled and default value, all attributes are loaded in memory
- (the normal behaviour of docinfo = extern)
- </li>
- <li class="listitem">
- 1 - all attributes stay on disk. Daemon loads no files (spa, spm, sps).
- This is the most memory conserving mode, however it is also the slowest
- as the whole doc-id-list and block index doesn't load.
- </li>
- <li class="listitem">
- pool - only pooled attributes stay on disk. Pooled attributes are string,
- MVA, and JSON attributes (sps, spm files). Scalar attributes stored in
- docinfo (spa file) load as usual.
- </li>
- </ul></div>
- <p>
- This option does not affect indexing in any way, it only requires daemon
- restart.
- </p><h4><a name="idp33854752"></a>Example:</h4><pre class="programlisting">
- ondisk_attrs = pool #keep pooled attributes on disk
- </pre></div></div>
- <div class="sect1" title="12.3. indexer program configuration options"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="confgroup-indexer"></a>12.3. <code class="filename">indexer</code> program configuration options</h2></div></div></div>
- <div class="sect2" title="12.3.1. mem_limit"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mem-limit"></a>12.3.1. mem_limit</h3></div></div></div>
- <p>
- Indexing RAM usage limit.
- Optional, default is 128M.
- </p><p>
- Enforced memory usage limit that the <code class="filename">indexer</code>
- will not go above. Can be specified in bytes, or kilobytes
- (using K postfix), or megabytes (using M postfix); see the example.
- This limit will be automatically raised if set to extremely low value
- causing I/O buffers to be less than 8 KB; the exact lower bound
- for that depends on the indexed data size. If the buffers are
- less than 256 KB, a warning will be produced.
- </p><p>
- Maximum possible limit is 2047M. Too low values can hurt
- indexing speed, but 256M to 1024M should be enough for most
- if not all datasets. Setting this value too high can cause
- SQL server timeouts. During the document collection phase,
- there will be periods when the memory buffer is partially
- sorted and no communication with the database is performed;
- and the database server can timeout. You can resolve that
- either by raising timeouts on SQL server side or by lowering
- <code class="code">mem_limit</code>.
- </p><h4><a name="idp33861968"></a>Example:</h4><pre class="programlisting">
- mem_limit = 256M
- # mem_limit = 262144K # same, but in KB
- # mem_limit = 268435456 # same, but in bytes
- </pre></div>
- <div class="sect2" title="12.3.2. max_iops"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-iops"></a>12.3.2. max_iops</h3></div></div></div>
- <p>
- Maximum I/O operations per second, for I/O throttling.
- Optional, default is 0 (unlimited).
- </p><p>
- I/O throttling related option.
- It limits maximum count of I/O operations (reads or writes) per any given second.
- A value of 0 means that no limit is imposed.
- </p><p>
- <code class="filename">indexer</code> can cause bursts of intensive disk I/O during
- indexing, and it might desired to limit its disk activity (and keep something
- for other programs running on the same machine, such as <code class="filename">searchd</code>).
- I/O throttling helps to do that. It works by enforcing a minimum guaranteed
- delay between subsequent disk I/O operations performed by <code class="filename">indexer</code>.
- Modern SATA HDDs are able to perform up to 70-100+ I/O operations per second
- (that's mostly limited by disk heads seek time). Limiting indexing I/O
- to a fraction of that can help reduce search performance degradation
- caused by indexing.
- </p><h4><a name="idp33868416"></a>Example:</h4><pre class="programlisting">
- max_iops = 40
- </pre></div>
- <div class="sect2" title="12.3.3. max_iosize"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-iosize"></a>12.3.3. max_iosize</h3></div></div></div>
- <p>
- Maximum allowed I/O operation size, in bytes, for I/O throttling.
- Optional, default is 0 (unlimited).
- </p><p>
- I/O throttling related option. It limits maximum file I/O operation
- (read or write) size for all operations performed by <code class="filename">indexer</code>.
- A value of 0 means that no limit is imposed.
- Reads or writes that are bigger than the limit
- will be split in several smaller operations, and counted as several operation
- by <a class="link" href="#conf-max-iops" title="12.3.2. max_iops">max_iops</a> setting. At the time of this
- writing, all I/O calls should be under 256 KB (default internal buffer size)
- anyway, so <code class="code">max_iosize</code> values higher than 256 KB must not affect anything.
- </p><h4><a name="idp33874048"></a>Example:</h4><pre class="programlisting">
- max_iosize = 1048576
- </pre></div>
- <div class="sect2" title="12.3.4. max_xmlpipe2_field"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-xmlpipe2-field"></a>12.3.4. max_xmlpipe2_field</h3></div></div></div>
- <p>
- Maximum allowed field size for XMLpipe2 source type, bytes.
- Optional, default is 2 MB.
- </p><h4><a name="idp33876800"></a>Example:</h4><pre class="programlisting">
- max_xmlpipe2_field = 8M
- </pre></div>
- <div class="sect2" title="12.3.5. write_buffer"><div class="titlepage"><div><div><h3 class="title"><a name="conf-write-buffer"></a>12.3.5. write_buffer</h3></div></div></div>
- <p>
- Write buffer size, bytes.
- Optional, default is 1 MB.
- </p><p>
- Write buffers are used to write both temporary and final index
- files when indexing. Larger buffers reduce the number of required
- disk writes. Memory for the buffers is allocated in addition to
- <a class="link" href="#conf-mem-limit" title="12.3.1. mem_limit">mem_limit</a>. Note that several
- (currently up to 4) buffers for different files will be allocated,
- proportionally increasing the RAM usage.
- </p><h4><a name="idp33881120"></a>Example:</h4><pre class="programlisting">
- write_buffer = 4M
- </pre></div>
- <div class="sect2" title="12.3.6. max_file_field_buffer"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-file-field-buffer"></a>12.3.6. max_file_field_buffer</h3></div></div></div>
- <p>
- Maximum file field adaptive buffer size, bytes.
- Optional, default is 8 MB, minimum is 1 MB.
- </p><p>
- File field buffer is used to load files referred to from
- <a class="link" href="#conf-sql-file-field" title="12.1.27. sql_file_field">sql_file_field</a> columns.
- This buffer is adaptive, starting at 1 MB at first allocation,
- and growing in 2x steps until either file contents can be loaded,
- or maximum buffer size, specified by <code class="option">max_file_field_buffer</code>
- directive, is reached.
- </p><p>
- Thus, if there are no file fields are specified, no buffer
- is allocated at all. If all files loaded during indexing are under
- (for example) 2 MB in size, but <code class="option">max_file_field_buffer</code>
- value is 128 MB, peak buffer usage would still be only 2 MB. However,
- files over 128 MB would be entirely skipped.
- </p><h4><a name="idp33886944"></a>Example:</h4><pre class="programlisting">
- max_file_field_buffer = 128M
- </pre></div>
- <div class="sect2" title="12.3.7. on_file_field_error"><div class="titlepage"><div><div><h3 class="title"><a name="conf-on-file-field-error"></a>12.3.7. on_file_field_error</h3></div></div></div>
- <p>
- How to handle IO errors in file fields.
- Optional, default is <code class="code">ignore_field</code>.
- Introduced in version 2.0.2-beta.
- </p><p>
- When there is a problem indexing a file referenced by a file field
- (<a class="xref" href="#conf-sql-file-field" title="12.1.27. sql_file_field">Section 12.1.27, “sql_file_field”</a>), <code class="filename">indexer</code> can
- either index the document, assuming empty content in this particular field,
- or skip the document, or fail indexing entirely. <code class="option">on_file_field_error</code>
- directive controls that behavior. The values it takes are:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p><code class="code">ignore_field</code>, index the current document without field;</p></li>
- <li class="listitem"><p><code class="code">skip_document</code>, skip the current document but continue indexing;</p></li>
- <li class="listitem"><p><code class="code">fail_index</code>, fail indexing with an error message.</p></li>
- </ul></div>
- <p>
- </p><p>
- The problems that can arise are: open error, size error (file too big),
- and data read error. Warning messages on any problem will be given at all times,
- irregardless of the phase and the <code class="code">on_file_field_error</code> setting.
- </p><p>
- Note that with <code class="option">on_file_field_error = skip_document</code>
- documents will only be ignored if problems are detected during
- an early check phase, and <span class="bold"><strong>not</strong></span> during the actual file parsing
- phase. <code class="filename">indexer</code> will open every referenced file
- and check its size before doing any work, and then open it again
- when doing actual parsing work. So in case a file goes away
- between these two open attempts, the document will still be
- indexed.
- </p><h4><a name="idp33899184"></a>Example:</h4><pre class="programlisting">
- on_file_field_errors = skip_document
- </pre></div>
- <div class="sect2" title="12.3.8. lemmatizer_cache"><div class="titlepage"><div><div><h3 class="title"><a name="conf-lemmatizer-cache"></a>12.3.8. lemmatizer_cache</h3></div></div></div>
- <p>
- Lemmatizer cache size.
- Optional, default is 256K.
- Added in version 2.1.1-beta.
- </p><p>
- Our lemmatizer implementation (see <a class="xref" href="#conf-morphology" title="12.2.6. morphology">Section 12.2.6, “morphology”</a>
- for a discussion of what lemmatizers are) uses a compressed dictionary
- format that enables a space/speed tradeoff. It can either perform
- lemmatization off the compressed data, using more CPU but less RAM,
- or it can decompress and precache the dictionary either partially
- or fully, thus using less CPU but more RAM. And the lemmatizer_cache
- directive lets you control how much RAM exactly can be spent for that
- uncompressed dictionary cache.
- </p><p>
- Currently, the only available dictionary is ru.pak, the Russian one.
- The compressed dictionary is approximately 10 MB in size. Note that the
- dictionary stays in memory at all times, too. The default cache size
- is 256 KB. The accepted cache sizes are 0 to 2047 MB. It's safe to raise
- the cache size too high; the lemmatizer will only use the needed memory.
- For instance, the entire Russian dictionary decompresses to approximately
- 110 MB; and thus setting lemmatizer_cache anywhere higher than that will
- not affect the memory use: even when 1024 MB is allowed for the cache,
- if only 110 MB is needed, it will only use those 110 MB.
- </p><p>
- On our benchmarks, the total indexing time with different cache
- sizes was as follows:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">9.07 sec, morphology = lemmatize_ru, lemmatizer_cache = 0</li>
- <li class="listitem">8.60 sec, morphology = lemmatize_ru, lemmatizer_cache = 256K</li>
- <li class="listitem">8.33 sec, morphology = lemmatize_ru, lemmatizer_cache = 8M</li>
- <li class="listitem">7.95 sec, morphology = lemmatize_ru, lemmatizer_cache = 128M</li>
- <li class="listitem">6.85 sec, morphology = stem_ru (baseline)</li>
- </ul></div>
- <p>
- Your mileage may vary, but a simple rule of thumb would be to either
- go with the small default 256 KB cache when pressed for memory, or spend
- 128 MB extra RAM and cache the entire dictionary for maximum indexing
- performance.
- </p><h4><a name="idp33907904"></a>Example:</h4><pre class="programlisting">
- lemmatizer_cache = 256M # cache it all
- </pre></div></div>
- <div class="sect1" title="12.4. searchd program configuration options"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="confgroup-searchd"></a>12.4. <code class="filename">searchd</code> program configuration options</h2></div></div></div>
- <div class="sect2" title="12.4.1. listen"><div class="titlepage"><div><div><h3 class="title"><a name="conf-listen"></a>12.4.1. listen</h3></div></div></div>
- <p>
- This setting lets you specify IP address and port, or Unix-domain
- socket path, that <code class="code">searchd</code> will listen on.
- Introduced in version 0.9.9-rc1.
- </p><p>
- The informal grammar for <code class="code">listen</code> setting is:
- </p><pre class="programlisting">
- listen = ( address ":" port | port | path ) [ ":" protocol ]
- </pre><p>
- I.e. you can specify either an IP address (or hostname) and port
- number, or just a port number, or Unix socket path. If you specify
- port number but not the address, <code class="code">searchd</code> will listen on
- all network interfaces. Unix path is identified by a leading slash.
- </p><p>
- Starting with version 0.9.9-rc2, you can also specify a protocol
- handler (listener) to be used for connections on this socket.
- Supported protocol values are 'sphinx' (Sphinx 0.9.x API protocol)
- and 'mysql41' (MySQL protocol used since 4.1 upto at least 5.1).
- More details on MySQL protocol support can be found in
- <a class="xref" href="#sphinxql" title="5.10. MySQL protocol support and SphinxQL">Section 5.10, “MySQL protocol support and SphinxQL”</a> section.
- </p><h4><a name="idp33916512"></a>Examples:</h4><pre class="programlisting">
- listen = localhost
- listen = localhost:5000
- listen = 192.168.0.1:5000
- listen = /var/run/sphinx.s
- listen = 9312
- listen = localhost:9306:mysql41
- </pre><p>
- There can be multiple listen directives, <code class="code">searchd</code> will
- listen for client connections on all specified ports and sockets. If
- no <code class="code">listen</code> directives are found then the server will listen
- on all available interfaces using the default SphinxAPI port 9312.
- Starting with 1.10-beta, it will also listen on default SphinxQL
- port 9306. Both port numbers are assigned by IANA (see
- <a class="ulink" href="http://www.iana.org/assignments/port-numbers" target="_top">http://www.iana.org/assignments/port-numbers</a>
- for details) and should therefore be available.
- </p><p>
- Unix-domain sockets are not supported on Windows.
- </p></div>
- <div class="sect2" title="12.4.2. log"><div class="titlepage"><div><div><h3 class="title"><a name="conf-log"></a>12.4.2. log</h3></div></div></div>
- <p>
- Log file name.
- Optional, default is 'searchd.log'.
- All <code class="filename">searchd</code> run time events will be logged in this file.
- </p><p>
- Also you can use the 'syslog' as the file name. In this case the events will be sent to syslog daemon.
- To use the syslog option the sphinx must be configured '--with-syslog' on building.
- </p><h4><a name="idp33923568"></a>Example:</h4><pre class="programlisting">
- log = /var/log/searchd.log
- </pre></div>
- <div class="sect2" title="12.4.3. query_log"><div class="titlepage"><div><div><h3 class="title"><a name="conf-query-log"></a>12.4.3. query_log</h3></div></div></div>
- <p>
- Query log file name.
- Optional, default is empty (do not log queries).
- All search queries will be logged in this file. The format is described in <a class="xref" href="#query-log-format" title="5.9. searchd query log formats">Section 5.9, “<code class="filename">searchd</code> query log formats”</a>.
- </p><p>
- In case of 'plain' format, you can use the 'syslog' as the path to the log file.
- In this case all search queries will be sent to syslog daemon with LOG_INFO priority,
- prefixed with '[query]' instead of timestamp.
- To use the syslog option the sphinx must be configured '--with-syslog' on building.
- </p><h4><a name="idp33927712"></a>Example:</h4><pre class="programlisting">
- query_log = /var/log/query.log
- </pre></div>
- <div class="sect2" title="12.4.4. query_log_format"><div class="titlepage"><div><div><h3 class="title"><a name="conf-query-log-format"></a>12.4.4. query_log_format</h3></div></div></div>
- <p>
- Query log format.
- Optional, allowed values are 'plain' and 'sphinxql', default is 'plain'.
- Introduced in version 2.0.1-beta.
- </p><p>
- Starting with version 2.0.1-beta, two different log formats are supported.
- The default one logs queries in a custom text format. The new one logs
- valid SphinxQL statements. This directive allows to switch between the two
- formats on search daemon startup. The log format can also be altered
- on the fly, using <code class="code">SET GLOBAL query_log_format=sphinxql</code> syntax.
- Refer to <a class="xref" href="#query-log-format" title="5.9. searchd query log formats">Section 5.9, “<code class="filename">searchd</code> query log formats”</a> for more discussion and format
- details.
- </p><h4><a name="idp33932336"></a>Example:</h4><pre class="programlisting">
- query_log_format = sphinxql
- </pre></div>
- <div class="sect2" title="12.4.5. read_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-read-timeout"></a>12.4.5. read_timeout</h3></div></div></div>
- <p>
- Network client request read timeout, in seconds.
- Optional, default is 5 seconds.
- <code class="filename">searchd</code> will forcibly close the client connections which fail to send a query within this timeout.
- </p><h4><a name="idp33935856"></a>Example:</h4><pre class="programlisting">
- read_timeout = 1
- </pre></div>
- <div class="sect2" title="12.4.6. client_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-client-timeout"></a>12.4.6. client_timeout</h3></div></div></div>
- <p>
- Maximum time to wait between requests (in seconds) when using
- persistent connections. Optional, default is five minutes.
- </p><h4><a name="idp33938576"></a>Example:</h4><pre class="programlisting">
- client_timeout = 3600
- </pre></div>
- <div class="sect2" title="12.4.7. max_children"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-children"></a>12.4.7. max_children</h3></div></div></div>
- <p>
- Maximum amount of children to fork (or in other words, concurrent searches to run in parallel).
- Optional, default is 0 (unlimited).
- </p><p>
- Useful to control server load. There will be no more than this much concurrent
- searches running, at all times. When the limit is reached, additional incoming
- clients are dismissed with temporarily failure (SEARCHD_RETRY) status code
- and a message stating that the server is maxed out.
- </p><h4><a name="idp33942048"></a>Example:</h4><pre class="programlisting">
- max_children = 10
- </pre></div>
- <div class="sect2" title="12.4.8. pid_file"><div class="titlepage"><div><div><h3 class="title"><a name="conf-pid-file"></a>12.4.8. pid_file</h3></div></div></div>
- <p>
- <code class="filename">searchd</code> process ID file name.
- Mandatory.
- </p><p>
- PID file will be re-created (and locked) on startup. It will contain
- head daemon process ID while the daemon is running, and it will be unlinked
- on daemon shutdown. It's mandatory because Sphinx uses it internally
- for a number of things: to check whether there already is a running instance
- of <code class="filename">searchd</code>; to stop <code class="filename">searchd</code>;
- to notify it that it should rotate the indexes. Can also be used for
- different external automation scripts.
- </p><h4><a name="idp33947600"></a>Example:</h4><pre class="programlisting">
- pid_file = /var/run/searchd.pid
- </pre></div>
- <div class="sect2" title="12.4.9. seamless_rotate"><div class="titlepage"><div><div><h3 class="title"><a name="conf-seamless-rotate"></a>12.4.9. seamless_rotate</h3></div></div></div>
- <p>
- Prevents <code class="filename">searchd</code> stalls while rotating indexes with huge amounts of data to precache.
- Optional, default is 1 (enable seamless rotation).
- </p><p>
- Indexes may contain some data that needs to be precached in RAM.
- At the moment, <code class="filename">.spa</code>, <code class="filename">.spi</code> and
- <code class="filename">.spm</code> files are fully precached (they contain attribute data,
- MVA data, and keyword index, respectively.)
- Without seamless rotate, rotating an index tries to use as little RAM
- as possible and works as follows:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>new queries are temporarily rejected (with "retry" error code);</p></li>
- <li class="listitem"><p><code class="filename">searchd</code> waits for all currently running queries to finish;</p></li>
- <li class="listitem"><p>old index is deallocated and its files are renamed;</p></li>
- <li class="listitem"><p>new index files are renamed and required RAM is allocated;</p></li>
- <li class="listitem"><p>new index attribute and dictionary data is preloaded to RAM;</p></li>
- <li class="listitem"><p><code class="filename">searchd</code> resumes serving queries from new index.</p></li>
- </ol></div>
- <p>
- </p><p>
- However, if there's a lot of attribute or dictionary data, then preloading step
- could take noticeable time - up to several minutes in case of preloading 1-5+ GB files.
- </p><p>
- With seamless rotate enabled, rotation works as follows:
- </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>new index RAM storage is allocated;</p></li>
- <li class="listitem"><p>new index attribute and dictionary data is asynchronously preloaded to RAM;</p></li>
- <li class="listitem"><p>on success, old index is deallocated and both indexes' files are renamed;</p></li>
- <li class="listitem"><p>on failure, new index is deallocated;</p></li>
- <li class="listitem"><p>at any given moment, queries are served either from old or new index copy.</p></li>
- </ol></div>
- <p>
- </p><p>
- Seamless rotate comes at the cost of higher <span class="bold"><strong>peak</strong></span>
- memory usage during the rotation (because both old and new copies of
- <code class="filename">.spa/.spi/.spm</code> data need to be in RAM while
- preloading new copy). Average usage stays the same.
- </p><h4><a name="idp33966336"></a>Example:</h4><pre class="programlisting">
- seamless_rotate = 1
- </pre></div>
- <div class="sect2" title="12.4.10. preopen_indexes"><div class="titlepage"><div><div><h3 class="title"><a name="conf-preopen-indexes"></a>12.4.10. preopen_indexes</h3></div></div></div>
- <p>
- Whether to forcibly preopen all indexes on startup.
- Optional, default is 1 (preopen everything).
- </p><p>
- Starting with 2.0.1-beta, the default value for this
- option is now 1 (foribly preopen all indexes). In prior
- versions, it used to be 0 (use per-index settings).
- </p><p>
- When set to 1, this directive overrides and enforces
- <a class="link" href="#conf-preopen" title="12.2.36. preopen">preopen</a> on all indexes.
- They will be preopened, no matter what is the per-index
- <code class="code">preopen</code> setting. When set to 0, per-index
- settings can take effect. (And they default to 0.)
- </p><p>
- Pre-opened indexes avoid races between search queries
- and rotations that can cause queries to fail occasionally.
- They also make <code class="filename">searchd</code> use more file
- handles. In most scenarios it's therefore preferred and
- recommended to preopen indexes.
- </p><h4><a name="idp33972848"></a>Example:</h4><pre class="programlisting">
- preopen_indexes = 1
- </pre></div>
- <div class="sect2" title="12.4.11. unlink_old"><div class="titlepage"><div><div><h3 class="title"><a name="conf-unlink-old"></a>12.4.11. unlink_old</h3></div></div></div>
- <p>
- Whether to unlink .old index copies on successful rotation.
- Optional, default is 1 (do unlink).
- </p><h4><a name="idp33975600"></a>Example:</h4><pre class="programlisting">
- unlink_old = 0
- </pre></div>
- <div class="sect2" title="12.4.12. attr_flush_period"><div class="titlepage"><div><div><h3 class="title"><a name="conf-attr-flush-period"></a>12.4.12. attr_flush_period</h3></div></div></div>
- <p>
- When calling <code class="code">UpdateAttributes()</code> to update document attributes in
- real-time, changes are first written to the in-memory copy of attributes
- (<code class="option">docinfo</code> must be set to <code class="option">extern</code>).
- Then, once <code class="filename">searchd</code> shuts down normally (via <code class="code">SIGTERM</code>
- being sent), the changes are written to disk.
- Introduced in version 0.9.9-rc1.
- </p><p>Starting with 0.9.9-rc1, it is possible to tell <code class="filename">searchd</code>
- to periodically write these changes back to disk, to avoid them being lost. The time
- between those intervals is set with <code class="option">attr_flush_period</code>, in seconds.
- </p><p>It defaults to 0, which disables the periodic flushing, but flushing will
- still occur at normal shut-down.
- </p><h4><a name="idp33983152"></a>Example:</h4><pre class="programlisting">
- attr_flush_period = 900 # persist updates to disk every 15 minutes
- </pre></div>
- <div class="sect2" title="12.4.13. max_packet_size"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-packet-size"></a>12.4.13. max_packet_size</h3></div></div></div>
- <p>
- Maximum allowed network packet size.
- Limits both query packets from clients, and response packets from remote agents in distributed environment.
- Only used for internal sanity checks, does not directly affect RAM use or performance.
- Optional, default is 8M.
- Introduced in version 0.9.9-rc1.
- </p><h4><a name="idp33986144"></a>Example:</h4><pre class="programlisting">
- max_packet_size = 32M
- </pre></div>
- <div class="sect2" title="12.4.14. mva_updates_pool"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mva-updates-pool"></a>12.4.14. mva_updates_pool</h3></div></div></div>
- <p>
- Shared pool size for in-memory MVA updates storage.
- Optional, default size is 1M.
- Introduced in version 0.9.9-rc1.
- </p><p>
- This setting controls the size of the shared storage pool for updated MVA values.
- Specifying 0 for the size disable MVA updates at all. Once the pool size limit
- is hit, MVA update attempts will result in an error. However, updates on regular
- (scalar) attributes will still work. Due to internal technical difficulties,
- currently it is <span class="bold"><strong>not</strong></span> possible to store (flush) <span class="bold"><strong>any</strong></span> updates on indexes
- where MVA were updated; though this might be implemented in the future.
- In the meantime, MVA updates are intended to be used as a measure to quickly
- catchup with latest changes in the database until the next index rebuild;
- not as a persistent storage mechanism.
- </p><h4><a name="idp33991264"></a>Example:</h4><pre class="programlisting">
- mva_updates_pool = 16M
- </pre></div>
- <div class="sect2" title="12.4.15. max_filters"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-filters"></a>12.4.15. max_filters</h3></div></div></div>
- <p>
- Maximum allowed per-query filter count.
- Only used for internal sanity checks, does not directly affect RAM use or performance.
- Optional, default is 256.
- Introduced in version 0.9.9-rc1.
- </p><h4><a name="idp33994080"></a>Example:</h4><pre class="programlisting">
- max_filters = 1024
- </pre></div>
- <div class="sect2" title="12.4.16. max_filter_values"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-filter-values"></a>12.4.16. max_filter_values</h3></div></div></div>
- <p>
- Maximum allowed per-filter values count.
- Only used for internal sanity checks, does not directly affect RAM use or performance.
- Optional, default is 4096.
- Introduced in version 0.9.9-rc1.
- </p><h4><a name="idp33996928"></a>Example:</h4><pre class="programlisting">
- max_filter_values = 16384
- </pre></div>
- <div class="sect2" title="12.4.17. listen_backlog"><div class="titlepage"><div><div><h3 class="title"><a name="conf-listen-backlog"></a>12.4.17. listen_backlog</h3></div></div></div>
- <p>
- TCP listen backlog.
- Optional, default is 5.
- </p><p>
- Windows builds currently (as of 0.9.9) can only process the requests
- one by one. Concurrent requests will be enqueued by the TCP stack
- on OS level, and requests that can not be enqueued will immediately
- fail with "connection refused" message. listen_backlog directive
- controls the length of the connection queue. Non-Windows builds
- should work fine with the default value.
- </p><h4><a name="idp34000368"></a>Example:</h4><pre class="programlisting">
- listen_backlog = 20
- </pre></div>
- <div class="sect2" title="12.4.18. read_buffer"><div class="titlepage"><div><div><h3 class="title"><a name="conf-read-buffer"></a>12.4.18. read_buffer</h3></div></div></div>
- <p>
- Per-keyword read buffer size.
- Optional, default is 256K.
- </p><p>
- For every keyword occurrence in every search query, there are
- two associated read buffers (one for document list and one for
- hit list). This setting lets you control their sizes, increasing
- per-query RAM use, but possibly decreasing IO time.
- </p><h4><a name="idp34003728"></a>Example:</h4><pre class="programlisting">
- read_buffer = 1M
- </pre></div>
- <div class="sect2" title="12.4.19. read_unhinted"><div class="titlepage"><div><div><h3 class="title"><a name="conf-read-unhinted"></a>12.4.19. read_unhinted</h3></div></div></div>
- <p>
- Unhinted read size.
- Optional, default is 32K.
- </p><p>
- When querying, some reads know in advance exactly how much data
- is there to be read, but some currently do not. Most prominently,
- hit list size in not currently known in advance. This setting
- lest you control how much data to read in such cases. It will
- impact hit list IO time, reducing it for lists larger than
- unhinted read size, but raising it for smaller lists. It will
- <span class="bold"><strong>not</strong></span> affect RAM use because read buffer will be already
- allocated. So it should be not greater than read_buffer.
- </p><h4><a name="idp34007936"></a>Example:</h4><pre class="programlisting">
- read_unhinted = 32K
- </pre></div>
- <div class="sect2" title="12.4.20. max_batch_queries"><div class="titlepage"><div><div><h3 class="title"><a name="conf-max-batch-queries"></a>12.4.20. max_batch_queries</h3></div></div></div>
- <p>
- Limits the amount of queries per batch.
- Optional, default is 32.
- </p><p>
- Makes searchd perform a sanity check of the amount of the queries
- submitted in a single batch when using <a class="link" href="#multi-queries" title="5.11. Multi-queries">multi-queries</a>.
- Set it to 0 to skip the check.
- </p><h4><a name="idp34011968"></a>Example:</h4><pre class="programlisting">
- max_batch_queries = 256
- </pre></div>
- <div class="sect2" title="12.4.21. subtree_docs_cache"><div class="titlepage"><div><div><h3 class="title"><a name="conf-subtree-docs-cache"></a>12.4.21. subtree_docs_cache</h3></div></div></div>
- <p>
- Max common subtree document cache size, per-query.
- Optional, default is 0 (disabled).
- </p><p>
- Limits RAM usage of a common subtree optimizer (see <a class="xref" href="#multi-queries" title="5.11. Multi-queries">Section 5.11, “Multi-queries”</a>).
- At most this much RAM will be spent to cache document entries per each query.
- Setting the limit to 0 disables the optimizer.
- </p><h4><a name="idp34015936"></a>Example:</h4><pre class="programlisting">
- subtree_docs_cache = 8M
- </pre></div>
- <div class="sect2" title="12.4.22. subtree_hits_cache"><div class="titlepage"><div><div><h3 class="title"><a name="conf-subtree-hits-cache"></a>12.4.22. subtree_hits_cache</h3></div></div></div>
- <p>
- Max common subtree hit cache size, per-query.
- Optional, default is 0 (disabled).
- </p><p>
- Limits RAM usage of a common subtree optimizer (see <a class="xref" href="#multi-queries" title="5.11. Multi-queries">Section 5.11, “Multi-queries”</a>).
- At most this much RAM will be spent to cache keyword occurrences (hits) per each query.
- Setting the limit to 0 disables the optimizer.
- </p><h4><a name="idp34019888"></a>Example:</h4><pre class="programlisting">
- subtree_hits_cache = 16M
- </pre></div>
- <div class="sect2" title="12.4.23. workers"><div class="titlepage"><div><div><h3 class="title"><a name="conf-workers"></a>12.4.23. workers</h3></div></div></div>
- <p>
- Multi-processing mode (MPM).
- Optional; allowed values are none, fork, prefork, and threads.
- Default is threads.
- Introduced in version 1.10-beta.
- </p><p>
- Lets you choose how <code class="filename">searchd</code> processes multiple
- concurrent requests. The possible values are:
- </p><div class="variablelist"><dl><dt><span class="term">none</span></dt>
- <dd><p>All requests will be handled serially, one-by-one.
- Prior to 1.x, this was the only mode available on Windows.
- </p></dd><dt><span class="term">fork</span></dt>
- <dd><p>A new child process will be forked to handle every
- incoming request.
- </p></dd><dt><span class="term">prefork</span></dt>
- <dd><p>On startup, <code class="filename">searchd</code> will pre-fork
- a number of worker processes, and pass the incoming requests
- to one of those children.
- </p></dd><dt><span class="term">threads</span></dt>
- <dd><p>A new thread will be created to handle every
- incoming request. This is the only mode compatible with
- RT indexing backend. This is a default value.
- </p></dd></dl></div>
- <p>
- </p><p>
- Historically, <code class="filename">searchd</code> used fork-based model,
- which generally performs OK but spends a noticeable amount of CPU
- in fork() system call when there's a high amount of (tiny) requests
- per second. Prefork mode was implemented to alleviate that; with
- prefork, worker processes are basically only created on startup
- and re-created on index rotation, somewhat reducing fork() call
- pressure.
- </p><p>
- Threads mode was implemented along with RT backend and is required
- to use RT indexes. (Regular disk-based indexes work in all the
- available modes.)
- </p><h4><a name="idp34032752"></a>Example:</h4><pre class="programlisting">
- workers = threads
- </pre></div>
- <div class="sect2" title="12.4.24. dist_threads"><div class="titlepage"><div><div><h3 class="title"><a name="conf-dist-threads"></a>12.4.24. dist_threads</h3></div></div></div>
- <p>
- Max local worker threads to use for parallelizable requests (searching a distributed index; building a batch of snippets).
- Optional, default is 0, which means to disable in-request parallelism.
- Introduced in version 1.10-beta.
- </p><p>
- Distributed index can include several local indexes. <code class="option">dist_threads</code>
- lets you easily utilize multiple CPUs/cores for that (previously existing
- alternative was to specify the indexes as remote agents, pointing searchd
- to itself and paying some network overheads).
- </p><p>
- When set to a value N greater than 1, this directive will create up to
- N threads for every query, and schedule the specific searches within these
- threads. For example, if there are 7 local indexes to search and dist_threads
- is set to 2, then 2 parallel threads would be created: one that sequentially
- searches 4 indexes, and another one that searches the other 3 indexes.
- </p><p>
- In case of CPU bound workload, setting <code class="option">dist_threads</code>
- to 1x the number of cores is advised (creating more threads than cores
- will not improve query time). In case of mixed CPU/disk bound workload
- it might sometimes make sense to use more (so that all cores could be
- utilizes even when there are threads that wait for I/O completion).
- </p><p>
- Note that <code class="option">dist_threads</code> does <span class="bold"><strong>not</strong></span> require
- threads MPM. You can perfectly use it with fork or prefork MPMs too.
- </p><p>
- Starting with version 2.0.1-beta, building a batch of snippets
- with <code class="option">load_files</code> flag enabled can also be parallelized.
- Up to <code class="option">dist_threads</code> threads are be created to process
- those files. That speeds up snippet extraction when the total amount
- of document data to process is significant (hundreds of megabytes).
- </p><h4><a name="idp34041760"></a>Example:</h4><pre class="programlisting">
- index dist_test
- {
- type = distributed
- local = chunk1
- local = chunk2
- local = chunk3
- local = chunk4
- }
- # ...
- dist_threads = 4
- </pre></div>
- <div class="sect2" title="12.4.25. binlog_path"><div class="titlepage"><div><div><h3 class="title"><a name="conf-binlog-path"></a>12.4.25. binlog_path</h3></div></div></div>
- <p>
- Binary log (aka transaction log) files path.
- Optional, default is build-time configured data directory.
- Introduced in version 1.10-beta.
- </p><p>
- Binary logs are used for crash recovery of RT index data, and also of
- attributes updates of plain disk indices that
- would otherwise only be stored in RAM until flush. When logging is enabled,
- every transaction COMMIT-ted into RT index gets written into
- a log file. Logs are then automatically replayed on startup
- after an unclean shutdown, recovering the logged changes.
- </p><p>
- <code class="option">binlog_path</code> directive specifies the binary log
- files location. It should contain just the path; <code class="option">searchd</code>
- will create and unlink multiple binlog.* files in that path as necessary
- (binlog data, metadata, and lock files, etc).
- </p><p>
- Empty value disables binary logging. That improves performance,
- but puts RT index data at risk.
- </p><p>
- WARNING! It is strongly recommended to always explicitly define 'binlog_path' option in your config.
- Otherwise, the default path, which in most cases is the same as working folder, may point to the
- folder with no write access (for example, /usr/local/var/data). In this case, the searchd
- will not start at all.
- </p><h4><a name="idp34048080"></a>Example:</h4><pre class="programlisting">
- binlog_path = # disable logging
- binlog_path = /var/data # /var/data/binlog.001 etc will be created
- </pre></div>
- <div class="sect2" title="12.4.26. binlog_flush"><div class="titlepage"><div><div><h3 class="title"><a name="conf-binlog-flush"></a>12.4.26. binlog_flush</h3></div></div></div>
- <p>
- Binary log transaction flush/sync mode.
- Optional, default is 2 (flush every transaction, sync every second).
- Introduced in version 1.10-beta.
- </p><p>
- This directive controls how frequently will binary log be flushed
- to OS and synced to disk. Three modes are supported:
- </p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>0, flush and sync every second. Best performance,
- but up to 1 second worth of committed transactions can be lost
- both on daemon crash, or OS/hardware crash.
- </p></li>
- <li class="listitem"><p>1, flush and sync every transaction. Worst performance,
- but every committed transaction data is guaranteed to be saved.
- </p></li>
- <li class="listitem"><p>2, flush every transaction, sync every second.
- Good performance, and every committed transaction is guaranteed
- to be saved in case of daemon crash. However, in case of OS/hardware
- crash up to 1 second worth of committed transactions can be lost.
- </p></li>
- </ul></div>
- <p>
- </p><p>
- For those familiar with MySQL and InnoDB, this directive is entirely
- similar to <code class="option">innodb_flush_log_at_trx_commit</code>. In most
- cases, the default hybrid mode 2 provides a nice balance of speed
- and safety, with full RT index data protection against daemon crashes,
- and some protection against hardware ones.
- </p><h4><a name="idp34055056"></a>Example:</h4><pre class="programlisting">
- binlog_flush = 1 # ultimate safety, low speed
- </pre></div>
- <div class="sect2" title="12.4.27. binlog_max_log_size"><div class="titlepage"><div><div><h3 class="title"><a name="conf-binlog-max-log-size"></a>12.4.27. binlog_max_log_size</h3></div></div></div>
- <p>
- Maximum binary log file size.
- Optional, default is 0 (do not reopen binlog file based on size).
- Introduced in version 1.10-beta.
- </p><p>
- A new binlog file will be forcibly opened once the current binlog file
- reaches this limit. This achieves a finer granularity of logs and can yield
- more efficient binlog disk usage under certain borderline workloads.
- </p><h4><a name="idp34058528"></a>Example:</h4><pre class="programlisting">
- binlog_max_log_size = 16M
- </pre></div>
- <div class="sect2" title="12.4.28. snippets_file_prefix"><div class="titlepage"><div><div><h3 class="title"><a name="conf-snippets-file-prefix"></a>12.4.28. snippets_file_prefix</h3></div></div></div>
- <p>
- A prefix to prepend to the local file names when generating snippets.
- Optional, default is empty.
- Introduced in version 2.1.1-beta.
- </p><p>
- This prefix can be used in distributed snippets generation along with
- <code class="option">load_files</code> or <code class="option">load_files_scattered</code> options.
- </p><p>
- Note how this is a prefix, and <span class="bold"><strong>not</strong></span> a path! Meaning that if a prefix
- is set to "server1" and the request refers to "file23", <code class="filename">searchd</code>
- will attempt to open "server1file23" (all of that without quotes). So if you
- need it to be a path, you have to mention the trailing slash.
- </p><p>
- Note also that this is a local option, it does not affect the agents anyhow.
- So you can safely set a prefix on a master server. The requests routed to the
- agents will not be affected by the master's setting. They will however be affected
- by the agent's own settings.
- </p><p>
- This might be useful, for instance, when the document storage locations
- (be those local storage or NAS mountpoints) are inconsistent across the servers.
- </p><h4><a name="idp34065840"></a>Example:</h4><pre class="programlisting">
- snippets_file_prefix = /mnt/common/server1/
- </pre></div>
- <div class="sect2" title="12.4.29. collation_server"><div class="titlepage"><div><div><h3 class="title"><a name="conf-collation-server"></a>12.4.29. collation_server</h3></div></div></div>
- <p>
- Default server collation.
- Optional, default is libc_ci.
- Introduced in version 2.0.1-beta.
- </p><p>
- Specifies the default collation used for incoming requests.
- The collation can be overridden on a per-query basis.
- Refer to <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a> section for the list of available collations and other details.
- </p><h4><a name="idp34069856"></a>Example:</h4><pre class="programlisting">
- collation_server = utf8_ci
- </pre></div>
- <div class="sect2" title="12.4.30. collation_libc_locale"><div class="titlepage"><div><div><h3 class="title"><a name="conf-collation-libc-locale"></a>12.4.30. collation_libc_locale</h3></div></div></div>
- <p>
- Server libc locale.
- Optional, default is C.
- Introduced in version 2.0.1-beta.
- </p><p>
- Specifies the libc locale, affecting the libc-based collations.
- Refer to <a class="xref" href="#collations" title="5.12. Collations">Section 5.12, “Collations”</a> section for the details.
- </p><h4><a name="idp34073760"></a>Example:</h4><pre class="programlisting">
- collation_libc_locale = fr_FR
- </pre></div>
- <div class="sect2" title="12.4.31. plugin_dir"><div class="titlepage"><div><div><h3 class="title"><a name="conf-plugin-dir"></a>12.4.31. plugin_dir</h3></div></div></div>
- <p>
- Trusted location for the dynamic libraries (UDFs).
- Optional, default is empty (no location).
- Introduced in version 2.0.1-beta.
- </p><p>
- Specifies the trusted directory from which the
- <a class="link" href="#sphinx-udfs" title="6.1. Sphinx UDFs (User Defined Functions)">UDF libraries</a> can be loaded. Requires
- <a class="link" href="#conf-workers" title="12.4.23. workers">workers = thread</a> to take effect.
- </p><h4><a name="idp34078608"></a>Example:</h4><pre class="programlisting">
- workers = threads
- plugin_dir = /usr/local/sphinx/lib
- </pre></div>
- <div class="sect2" title="12.4.32. mysql_version_string"><div class="titlepage"><div><div><h3 class="title"><a name="conf-mysql-version-string"></a>12.4.32. mysql_version_string</h3></div></div></div>
- <p>
- A server version string to return via MySQL protocol.
- Optional, default is empty (return Sphinx version).
- Introduced in version 2.0.1-beta.
- </p><p>
- Several picky MySQL client libraries depend on a particular version
- number format used by MySQL, and moreover, sometimes choose a different
- execution path based on the reported version number (rather than the
- indicated capabilities flags). For instance, Python MySQLdb 1.2.2 throws
- an exception when the version number is not in X.Y.ZZ format; MySQL .NET
- connector 6.3.x fails internally on version numbers 1.x along with
- a certain combination of flags, etc. To workaround that, you can use
- <code class="option">mysql_version_string</code> directive and have <code class="filename">searchd</code>
- report a different version to clients connecting over MySQL protocol.
- (By default, it reports its own version.)
- </p><h4><a name="idp34083568"></a>Example:</h4><pre class="programlisting">
- mysql_version_string = 5.0.37
- </pre></div>
- <div class="sect2" title="12.4.33. rt_flush_period"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-flush-period"></a>12.4.33. rt_flush_period</h3></div></div></div>
- <p>
- RT indexes RAM chunk flush check period, in seconds.
- Optional, default is 10 hours.
- Introduced in version 2.0.1-beta.
- </p><p>
- Actively updated RT indexes that however fully fit in RAM chunks
- can result in ever-growing binlogs, impacting disk use and crash
- recovery time. With this directive the search daemon performs
- periodic flush checks, and eligible RAM chunks can get saved,
- enabling consequential binlog cleanup. See <a class="xref" href="#rt-binlog" title="4.4. Binary logging">Section 4.4, “Binary logging”</a>
- for more details.
- </p><h4><a name="idp34087632"></a>Example:</h4><pre class="programlisting">
- rt_flush_period = 3600 # 1 hour
- </pre></div>
- <div class="sect2" title="12.4.34. thread_stack"><div class="titlepage"><div><div><h3 class="title"><a name="conf-thread-stack"></a>12.4.34. thread_stack</h3></div></div></div>
- <p>
- Per-thread stack size.
- Optional, default is 1M.
- Introduced in version 2.0.1-beta.
- </p><p>
- In the <code class="code">workers = threads</code> mode, every request is processed
- with a separate thread that needs its own stack space. By default, 1M per
- thread are allocated for stack. However, extremely complex search requests
- might eventually exhaust the default stack and require more. For instance,
- a query that matches a thousands of keywords (either directly or through
- term expansion) can eventually run out of stack. Previously, that resulted
- in crashes. Starting with 2.0.1-beta, <code class="filename">searchd</code> attempts
- to estimate the expected stack use, and blocks the potentially dangerous
- queries. To process such queries, you can either the thread stack size
- by using the <code class="code">thread_stack</code> directive (or switch to a different
- <code class="code">workers</code> setting if that is possible).
- </p><p>
- A query with N levels of nesting is estimated to require approximately
- 30+0.16*N KB of stack, meaning that the default 64K is enough for queries
- with upto 250 levels, 150K for upto 700 levels, etc. If the stack size limit
- is not met, <code class="filename">searchd</code> fails the query and reports
- the required stack size in the error message.
- </p><h4><a name="idp34094784"></a>Example:</h4><pre class="programlisting">
- thread_stack = 256K
- </pre></div>
- <div class="sect2" title="12.4.35. expansion_limit"><div class="titlepage"><div><div><h3 class="title"><a name="conf-expansion-limit"></a>12.4.35. expansion_limit</h3></div></div></div>
- <p>
- The maximum number of expanded keywords for a single wildcard.
- Optional, default is 0 (no limit).
- Introduced in version 2.0.1-beta.
- </p><p>
- When doing substring searches against indexes built with
- <code class="code">dict = keywords</code> enabled, a single wildcard may
- potentially result in thousands and even millions of matched
- keywords (think of matching 'a*' against the entire Oxford
- dictionary). This directive lets you limit the impact
- of such expansions. Setting <code class="code">expansion_limit = N</code>
- restricts expansions to no more than N of the most frequent
- matching keywords (per each wildcard in the query).
- </p><h4><a name="idp34099136"></a>Example:</h4><pre class="programlisting">
- expansion_limit = 16
- </pre></div>
- <div class="sect2" title="12.4.36. watchdog"><div class="titlepage"><div><div><h3 class="title"><a name="conf-watchdog"></a>12.4.36. watchdog</h3></div></div></div>
- <p>
- Threaded server watchdog.
- Optional, default is 1 (watchdog enabled).
- Introduced in version 2.0.1-beta.
- </p><p>
- A crashed query in <code class="code">threads</code> multi-processing mode
- (<code class="code"><a class="link" href="#conf-workers" title="12.4.23. workers">workers</a> = threads</code>)
- can take down the entire server. With watchdog feature enabled,
- <code class="filename">searchd</code> additionally keeps a separate lightweight
- process that monitors the main server process, and automatically
- restarts the latter in case of abnormal termination. Watchdog
- is enabled by default.
- </p><h4><a name="idp34104800"></a>Example:</h4><pre class="programlisting">
- watchdog = 0 # disable watchdog
- </pre></div>
- <div class="sect2" title="12.4.37. prefork_rotation_throttle"><div class="titlepage"><div><div><h3 class="title"><a name="conf-prefork-rotation-throttle"></a>12.4.37. prefork_rotation_throttle</h3></div></div></div>
- <p>
- Delay between restarting preforked children on index rotation, in milliseconds.
- Optional, default is 0 (no delay).
- Introduced in version 2.0.2-beta.
- </p><p>
- When running in <code class="code"><a class="link" href="#conf-workers" title="12.4.23. workers">workers</a> = prefork</code>
- mode, every index rotation needs to restart all children to propagate the newly
- loaded index data changes. Restarting all of them at once might put excessive
- strain on CPU and/or network connections. (For instance, when the application
- keeps a bunch of open persistent connections to different children, and all those
- children restart.) Those bursts can be throttled down with
- <code class="option">prefork_rotation_throttle</code> directive. Note that
- the children will be restarted sequentially, and thus "old" results might
- persist for a few more seconds. For instance, if
- <code class="option">prefork_rotation_throttle</code> is set to 50 (milliseconds), and
- there are 30 children, then the last one would only be <span class="emphasis"><em>actually</em></span>
- restarted 1.5 seconds (50*30=1500 milliseconds) <span class="emphasis"><em>after</em></span>
- the "rotation finished" message in the <code class="filename">searchd</code> event log.
- </p><h4><a name="idp34112384"></a>Example:</h4><pre class="programlisting">
- prefork_rotation_throttle = 50 # throttle children restarts by 50 msec each
- </pre></div>
- <div class="sect2" title="12.4.38. sphinxql_state"><div class="titlepage"><div><div><h3 class="title"><a name="conf-sphinxql-state"></a>12.4.38. sphinxql_state</h3></div></div></div>
- <p>
- Path to a file where current SphinxQL state will be serialized.
- Available since version 2.1.1-beta.
- </p><p>
- On daemon startup, this file gets replayed. On eligible state changes (eg. SET GLOBAL),
- this file gets rewritten automatically. This can prevent a hard-to-diagnose problem:
- If you load UDF functions, but Sphinx crashes, when it
- gets (automatically) restarted, your UDF and global variables will no longer be available;
- using persistent state helps a graceful recovery with no such surprises.
- </p><h4><a name="idp34115952"></a>Example:</h4><pre class="programlisting">
- sphinxql_state = uservars.sql
- </pre></div>
- <div class="sect2" title="12.4.39. ha_ping_interval"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ha-ping-interval"></a>12.4.39. ha_ping_interval</h3></div></div></div>
- <p>
- Interval between agent mirror pings, in milliseconds.
- Optional, default is 1000.
- Added in 2.1.1-beta.
- </p><p>
- For a distributed index with agent mirrors in it (see more in ???),
- master sends all mirrors a ping command during the idle periods.
- This is to track the current agent status (alive or dead, network
- roundtrip, etc). The interval between such pings is defined
- by this directive.
- </p><p>
- To disable pings, set ha_ping_interval to 0.
- </p><h4><a name="idp34119728"></a>Example:</h4><pre class="programlisting">
- ha_ping_interval = 0
- </pre></div>
- <div class="sect2" title="12.4.40. ha_period_karma"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ha-period-karma"></a>12.4.40. ha_period_karma</h3></div></div></div>
- <p>
- Agent mirror statistics window size, in seconds.
- Optional, default is 60.
- Added in 2.1.1-beta.
- </p><p>
- For a distributed index with agent mirrors in it (see more in ???),
- master tracks several different per-mirror counters. These counters
- are then used for failover and balancing. (Master picks the best
- mirror to use based on the counters.) Counters are accumulated in
- blocks of <code class="code">ha_period_karma</code> seconds.
- </p><p>
- After beginning a new block, master may still use the accumulated
- values from the previous one, until the new one is half full. Thus,
- any previous history stops affecting the mirror choice after
- 1.5 times ha_period_karma seconds at most.
- </p><p>
- Despite that at most 2 blocks are used for mirror selection,
- upto 15 last blocks are actually stored, for instrumentation purposes.
- They can be inspected using
- <a class="link" href="#sphinxql-show-agent-status" title="8.29. SHOW AGENT STATUS">SHOW AGENT STATUS</a>
- statement.
- </p><h4><a name="idp34125664"></a>Example:</h4><pre class="programlisting">
- ha_period_karma = 120
- </pre></div>
- <div class="sect2" title="12.4.41. persistent_connections_limit"><div class="titlepage"><div><div><h3 class="title"><a name="conf-persistent-connections-limit"></a>12.4.41. persistent_connections_limit</h3></div></div></div>
- <p>
- The maximum # of simultaneous persistent connections to remote <a class="link" href="#conf-agent-persistent" title="12.2.32. agent_persistent">persistent agents</a>.
- Each time connecting agent defined under 'agent_persistent' we try to reuse existing connection (if any), or connect and save the connection for the future.
- However we can't hold unlimited # of such persistent connections, since each one holds a worker on agent size (and finally we'll receive the 'maxed out' error,
- when all of them are busy). This very directive limits the number. It affects the num of connections to each agent's host, across all distributed indexes.
- </p><p>
- It is reasonable to set the value equal or less than <a class="link" href="#conf-max-children" title="12.4.7. max_children">max_children</a> option of the agents.
- </p><h4><a name="idp34131104"></a>Example:</h4><pre class="programlisting">
- persistent_connections_limit = 29 # assume that each host of agents has max_children = 30 (or 29).
- </pre></div>
- <div class="sect2" title="12.4.42. rt_merge_iops"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-merge-iops"></a>12.4.42. rt_merge_iops</h3></div></div></div>
- <p>
- A maximum number of I/O operations (per second) that the RT chunks merge thread is allowed to start.
- Optional, default is 0 (no limit). Added in 2.1.1-beta.
- </p><p>
- This directive lets you throttle down the I/O impact arising from
- the <code class="code">OPTIMIZE</code> statements. It is guaranteed that all the
- RT optimization activity will not generate more disk iops (I/Os per second)
- than the configured limit. Modern SATA drives can perform up to around 100 I/O operations per
- second, and limiting rt_merge_iops can reduce search performance degradation caused by merging.
- </p><h4><a name="idp34135152"></a>Example:</h4><pre class="programlisting">
- rt_merge_iops = 40
- </pre></div>
- <div class="sect2" title="12.4.43. rt_merge_maxiosize"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rt-merge-maxiosize"></a>12.4.43. rt_merge_maxiosize</h3></div></div></div>
- <p>
- A maximum size of an I/O operation that the RT chunks merge
- thread is allowed to start.
- Optional, default is 0 (no limit).
- Added in 2.1.1-beta.
- </p><p>
- This directive lets you throttle down the I/O impact arising from
- the <code class="code">OPTIMIZE</code> statements. I/Os bigger than this limit will be
- broken down into 2 or more I/Os, which will then be accounted as separate I/Os
- with regards to the <a class="link" href="#conf-rt-merge-iops" title="12.4.42. rt_merge_iops">rt_merge_iops</a>
- limit. Thus, it is guaranteed that all the optimization activity will not
- generate more than (rt_merge_iops * rt_merge_maxiosize) bytes of disk I/O
- per second.
- </p><h4><a name="idp34139920"></a>Example:</h4><pre class="programlisting">
- rt_merge_maxiosize = 1M
- </pre></div>
- <div class="sect2" title="12.4.44. predicted_time_costs"><div class="titlepage"><div><div><h3 class="title"><a name="conf-predicted-time-costs"></a>12.4.44. predicted_time_costs</h3></div></div></div>
- <p>
- Costs for the query time prediction model, in nanoseconds.
- Optional, default is "doc=64, hit=48, skip=2048, match=64" (without the quotes).
- Added in 2.1.1-beta.
- </p><p>
- Terminating queries before completion based on their execution time
- (via either <a class="link" href="#api-func-setmaxquerytime" title="9.2.2. SetMaxQueryTime">SetMaxQueryTime()</a>
- API call, or <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT ... OPTION max_query_time</a>
- SphinxQL statement) is a nice safety net, but it comes with an inborn drawback:
- indeterministic (unstable) results. That is, if you repeat the very same (complex)
- search query with a time limit several times, the time limit will get hit
- at different stages, and you will get <span class="emphasis"><em>different</em></span> result sets.
- </p><p>
- Starting with 2.1.1-beta, there is a new option,
- <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT ... OPTION max_predicted_time</a>,
- that lets you limit the query time <span class="emphasis"><em>and</em></span> get stable,
- repeatable results. Instead of regularly checking the actual current time
- while evaluating the query, which is indeterministic, it predicts the current
- running time using a simple linear model instead:
- </p><pre class="programlisting">
- predicted_time =
- doc_cost * processed_documents +
- hit_cost * processed_hits +
- skip_cost * skiplist_jumps +
- match_cost * found_matches
- </pre><p>
- The query is then terminated early when the <code class="code">predicted_time</code>
- reaches a given limit.
- </p><p>
- Of course, this is not a hard limit on the actual time spent (it is, however,
- a hard limit on the amount of <span class="emphasis"><em>processing</em></span> work done), and
- a simple linear model is in no way an ideally precise one. So the wall clock time
- <span class="emphasis"><em>may</em></span> be either below or over the target limit. However,
- the error margins are quite acceptable: for instance, in our experiments with
- a 100 msec target limit the majority of the test queries fell into a 95 to 105 msec
- range, and <span class="emphasis"><em>all</em></span> of the queries were in a 80 to 120 msec range.
- Also, as a nice side effect, using the modeled query time instead of measuring
- actual run time results in somewhat less gettimeofday() calls, too.
- </p><p>
- No two server makes and models are identical, so <code class="code">predicted_time_costs</code>
- directive lets you configure the costs for the model above. For convenience, they are
- integers, counted in nanoseconds. (The limit in max_predicted_time is counted
- in milliseconds, and having to specify cost values as 0.000128 ms instead of 128 ns
- is somewhat more error prone.) It is not necessary to specify all 4 costs at once,
- as the missed one will take the default values. However, we strongly suggest
- to specify all of them, for readability.
- </p><h4><a name="idp34152416"></a>Example:</h4><pre class="programlisting">
- predicted_time_costs = doc=128, hit=96, skip=4096, match=128
- </pre></div>
- <div class="sect2" title="12.4.45. shutdown_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-shutdown-timeout"></a>12.4.45. shutdown_timeout</h3></div></div></div>
- <p>
- searchd --stopwait wait time, in seconds.
- Optional, default is 3 seconds.
- Added in 2.2.1-beta.
- </p><p>
- When you run searchd --stopwait your daemon needs to perform some
- activities before stopping like finishing queries, flushing RT RAM chunk,
- flushing attributes and updating binlog. And it requires some time.
- searchd --stopwait will wait up to shutdown_time seconds for daemon to
- finish its jobs. Suitable time depends on your index size and load.
- </p><h4><a name="idp34155856"></a>Example:</h4><pre class="programlisting">
- shutdown_timeout = 5 # wait for up to 5 seconds
- </pre></div>
- <div class="sect2" title="12.4.46. ondisk_attrs_default"><div class="titlepage"><div><div><h3 class="title"><a name="conf-ondisk-attrs-default"></a>12.4.46. ondisk_attrs_default</h3></div></div></div>
- <p>
- Instance-wide defaults for <a class="link" href="#conf-ondisk-attrs" title="12.2.68. ondisk_attrs">ondisk_attrs</a>
- directive. Optional, default is 0 (all attributes are loaded in memory). This
- directive lets you specify the default value of ondisk_attrs for all indexes
- served by this copy of searchd. Per-index directives take precedence, and will
- overwrite this instance-wide default value, allowing for fine-grain control.
- </p></div>
- <div class="sect2" title="12.4.47. query_log_min_msec"><div class="titlepage"><div><div><h3 class="title"><a name="conf-query-log-min-msec"></a>12.4.47. query_log_min_msec</h3></div></div></div>
- <p>
- Limit (in milliseconds) that prevents the query from being written to the query log.
- Optional, default is 0 (all queries are written to the query log). This directive
- specifies that only queries with execution times that exceed the specified limit will be logged.
- </p></div>
- <div class="sect2" title="12.4.48. agent_connect_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-connect-timeout-default"></a>12.4.48. agent_connect_timeout</h3></div></div></div>
- <p>
- Instance-wide defaults for <a class="link" href="#conf-agent-connect-timeout" title="12.2.34. agent_connect_timeout">agent_connect_timeout</a> parameter.
- The last defined in distributed (network) indexes.
- </p></div>
- <div class="sect2" title="12.4.49. agent_query_timeout"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-query-timeout-default"></a>12.4.49. agent_query_timeout</h3></div></div></div>
- <p>
- Instance-wide defaults for <a class="link" href="#conf-agent-query-timeout" title="12.2.35. agent_query_timeout">agent_query_timeout</a> parameter.
- The last defined in distributed (network) indexes, or also may be overrided per-query using OPTION clause.
- </p></div>
- <div class="sect2" title="12.4.50. agent_retry_count"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-retry-count"></a>12.4.50. agent_retry_count</h3></div></div></div>
- <p>
- Integer, specifies how many times sphinx will try to connect and query remote agents in distributed index before reporting
- fatal query error. Default is 0 (i.e. no retries). This value may be also specified on per-query basis using
- 'OPTION retry_count=XXX' clause. If per-query option exists, it will override the one specified in config.
- </p></div>
- <div class="sect2" title="12.4.51. agent_retry_delay"><div class="titlepage"><div><div><h3 class="title"><a name="conf-agent-retry-delay"></a>12.4.51. agent_retry_delay</h3></div></div></div>
- <p>
- Integer, in milliseconds. Specifies the delay sphinx rest before retrying to query a remote agent in case it fails.
- The value has sense only if non-zero <a class="link" href="#conf-agent-retry-count" title="12.4.50. agent_retry_count">agent_retry_count</a>
- or non-zero per-query OPTION retry_count specified. Default is 500. This value may be also specified
- on per-query basis using 'OPTION retry_delay=XXX' clause. If per-query option exists, it will override the one specified in config.
- </p></div></div>
- <div class="sect1" title="12.5. Common section configuration options"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="confgroup-common"></a>12.5. Common section configuration options</h2></div></div></div>
- <div class="sect2" title="12.5.1. lemmatizer_base"><div class="titlepage"><div><div><h3 class="title"><a name="conf-lemmatizer-base"></a>12.5.1. lemmatizer_base</h3></div></div></div>
- <p>
- Lemmatizer dictionaries base path.
- Optional, default is /usr/local/share (as in --datadir switch to ./configure script).
- Added in version 2.1.1-beta.
- </p><p>
- Our lemmatizer implementation (see <a class="xref" href="#conf-morphology" title="12.2.6. morphology">Section 12.2.6, “morphology”</a> for a discussion
- of what lemmatizers are) is dictionary driven. lemmatizer_base directive configures
- the base dictionary path. File names are hardcoded and specific to a given lemmatizer;
- the Russian lemmatizer uses ru.pak dictionary file. The dictionaries can be obtained
- from the Sphinx website.
- </p><h4><a name="idp34175424"></a>Example:</h4><pre class="programlisting">
- lemmatizer_base = /usr/local/share/sphinx/dicts/
- </pre></div>
- <div class="sect2" title="12.5.2. on_json_attr_error"><div class="titlepage"><div><div><h3 class="title"><a name="conf-on-json-attr-error"></a>12.5.2. on_json_attr_error</h3></div></div></div>
- <p>
- What to do if JSON format errors are found.
- Optional, default value is <code class="option">ignore_attr</code> (ignore errors).
- Applies only to <code class="option">sql_attr_json</code> attributes.
- Added in 2.1.1-beta.
- </p><p>
- By default, JSON format errors are ignored (<code class="option">ignore_attr</code>) and
- the indexer tool will just show a warning. Setting this option to <code class="option">fail_index</code>
- will rather make indexing fail at the first JSON format error.
- </p><h4><a name="idp34180544"></a>Example:</h4><pre class="programlisting">
- on_json_attr_error = ignore_attr
- </pre></div>
- <div class="sect2" title="12.5.3. json_autoconv_numbers"><div class="titlepage"><div><div><h3 class="title"><a name="conf-json-autoconv-numbers"></a>12.5.3. json_autoconv_numbers</h3></div></div></div>
- <p>
- Automatically detect and convert possible JSON
- strings that represent numbers, into numeric attributes.
- Optional, default value is 0 (do not convert strings into numbers).
- Added in 2.1.1-beta.
- </p><p>
- When this option is 1, values such as "1234" will be indexed as numbers instead
- of strings; if the option is 0, such values will be indexed as strings.
- This conversion applies to any data source, that is, JSON attributes originating
- from either SQL or XMLpipe2 sources will all be affected.
- </p><h4><a name="idp34184176"></a>Example:</h4><pre class="programlisting">
- json_autoconv_numbers = 1
- </pre></div>
- <div class="sect2" title="12.5.4. json_autoconv_keynames"><div class="titlepage"><div><div><h3 class="title"><a name="conf-json-autoconv-keynames"></a>12.5.4. json_autoconv_keynames</h3></div></div></div>
- <p>
- Whether and how to auto-convert key names within JSON attributes.
- Known value is 'lowercase'.
- Optional, default value is unspecified (do not convert anything).
- Added in 2.1.1-beta.
- </p><p>
- When this directive is set to 'lowercase', key names within JSON attributes
- will be automatically brought to lower case when indexing.
- This conversion applies to any data source, that is, JSON attributes originating
- from either SQL or XMLpipe2 sources will all be affected.
- </p><h4><a name="idp34187728"></a>Example:</h4><pre class="programlisting">
- json_autoconv_keynames = lowercase
- </pre></div>
- <div class="sect2" title="12.5.5. rlp_root"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rlp-root"></a>12.5.5. rlp_root</h3></div></div></div>
- <p>
- Path to the RLP root folder. Mandatory if RLP is used.
- Added in 2.2.1-beta.
- </p><h4><a name="idp34190384"></a>Example:</h4><pre class="programlisting">
- rlp_root = /home/myuser/RLP
- </pre></div>
- <div class="sect2" title="12.5.6. rlp_environment"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rlp-environment"></a>12.5.6. rlp_environment</h3></div></div></div>
- <p>
- RLP environment configuration file. Mandatory if RLP is used.
- Added in 2.2.1-beta.
- </p><h4><a name="idp34193088"></a>Example:</h4><pre class="programlisting">
- rlp_environment = /home/myuser/RLP/rlp-environment.xml
- </pre></div>
- <div class="sect2" title="12.5.7. rlp_max_batch_size"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rlp-max-batch-size"></a>12.5.7. rlp_max_batch_size</h3></div></div></div>
- <p>
- Maximum total size of documents batched before processing them by the RLP. Optional, default is 51200.
- Do not set this value to more than 10Mb because sphinx splits large documents to 10Mb chunks before processing them by the RLP.
- This option has effect only if <code class="option">morphology = rlp_chinese_batched</code> is specified.
- Added in 2.2.1-beta.
- </p><h4><a name="idp34196480"></a>Example:</h4><pre class="programlisting">
- rlp_max_batch_size = 100k
- </pre></div>
- <div class="sect2" title="12.5.8. rlp_max_batch_docs"><div class="titlepage"><div><div><h3 class="title"><a name="conf-rlp-max-batch-docs"></a>12.5.8. rlp_max_batch_docs</h3></div></div></div>
- <p>
- Maximum number of documents batched before processing them by the RLP. Optional, default is 50.
- This option has effect only if <code class="option">morphology = rlp_chinese_batched</code> is specified.
- Added in 2.2.1-beta.
- </p><h4><a name="idp34199712"></a>Example:</h4><pre class="programlisting">
- rlp_max_batch_docs = 100
- </pre></div></div></div>
- <div class="appendix" title="Appendix A. Sphinx revision history"><div class="titlepage"><div><div><h2 class="title"><a name="changelog"></a>Appendix A. Sphinx revision history</h2></div></div></div>
- <div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="sect1"><a href="#rel226">A.1. Version 2.2.6-release, 13 nov 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel225">A.2. Version 2.2.5-release, 06 oct 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel224">A.3. Version 2.2.4-release, 11 sep 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel223">A.4. Version 2.2.3-beta, 13 may 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel222">A.5. Version 2.2.2-beta, 11 feb 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel221">A.6. Version 2.2.1-beta, 13 nov 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel219">A.7. Version 2.1.9-release, 03 jul 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel218">A.8. Version 2.1.8-release, 28 apr 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel217">A.9. Version 2.1.7-release, 30 mar 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel216">A.10. Version 2.1.6-release, 24 feb 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel215">A.11. Version 2.1.5-release, 22 jan 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel214">A.12. Version 2.1.4-release, 18 dec 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel213">A.13. Version 2.1.3-release, 12 nov 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel212">A.14. Version 2.1.2-release, 10 oct 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel211">A.15. Version 2.1.1-beta, 20 feb 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel2011">A.16. Version 2.0.11-dev, xx xxx xxxx</a></span></dt>
- <dt><span class="sect1"><a href="#rel2010">A.17. Version 2.0.10-release, 22 jan 2014</a></span></dt>
- <dt><span class="sect1"><a href="#rel209">A.18. Version 2.0.9-release, 26 aug 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel208">A.19. Version 2.0.8-release, 26 apr 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel207">A.20. Version 2.0.7-release, 26 mar 2013</a></span></dt>
- <dt><span class="sect1"><a href="#rel206">A.21. Version 2.0.6-release, 22 oct 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel205">A.22. Version 2.0.5-release, 28 jul 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel204">A.23. Version 2.0.4-release, 02 mar 2012</a></span></dt>
- <dt><span class="sect1"><a href="#rel203">A.24. Version 2.0.3-release, 23 dec 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel202">A.25. Version 2.0.2-beta, 15 nov 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel201">A.26. Version 2.0.1-beta, 22 apr 2011</a></span></dt>
- <dt><span class="sect1"><a href="#rel110">A.27. Version 1.10-beta, 19 jul 2010</a></span></dt>
- <dt><span class="sect1"><a href="#rel099">A.28. Version 0.9.9-release, 02 dec 2009</a></span></dt>
- <dt><span class="sect1"><a href="#rel099rc2">A.29. Version 0.9.9-rc2, 08 apr 2009</a></span></dt>
- <dt><span class="sect1"><a href="#rel099rc1">A.30. Version 0.9.9-rc1, 17 nov 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel0981">A.31. Version 0.9.8.1, 30 oct 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel098">A.32. Version 0.9.8, 14 jul 2008</a></span></dt>
- <dt><span class="sect1"><a href="#rel097">A.33. Version 0.9.7, 02 apr 2007</a></span></dt>
- <dt><span class="sect1"><a href="#rel097rc2">A.34. Version 0.9.7-rc2, 15 dec 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel097rc">A.35. Version 0.9.7-rc1, 26 oct 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel096">A.36. Version 0.9.6, 24 jul 2006</a></span></dt>
- <dt><span class="sect1"><a href="#rel096rc1">A.37. Version 0.9.6-rc1, 26 jun 2006</a></span></dt>
- </dl></div>
- <div class="sect1" title="A.1. Version 2.2.6-release, 13 nov 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel226"></a>A.1. Version 2.2.6-release, 13 nov 2014</h2></div></div></div>
- <h3><a name="idp34204000"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2104" target="_top">#2104</a>, <a class="link" href="#expr-func-all">ALL()</a>/ANY()/INDEXOF() support for distributed indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2102" target="_top">#2102</a>, show agent status misses warnings from agents</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2100" target="_top">#2100</a>, crash of <code class="filename">indexer</code> while loading stopwords with tokenizer plugin</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2098" target="_top">#2098</a>, arbitrary JSON subkeys and IS NULL for distributed indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2097" target="_top">#2097</a>, escaping of field-start modifier</p></li>
- <li class="listitem"><p>fixed possibly memory leak in plugin creation function</p></li>
- <li class="listitem"><p>indexation of duplicate documents</p></li>
- </ul></div></div>
- <div class="sect1" title="A.2. Version 2.2.5-release, 06 oct 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel225"></a>A.2. Version 2.2.5-release, 06 oct 2014</h2></div></div></div>
- <h3><a name="idp34215216"></a>New minor features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added OPTION <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">rand_seed</a> which affects ORDER BY RAND()</p></li>
- </ul></div>
- <h3><a name="idp34217376"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2042" target="_top">#2042</a>, <code class="filename">indextool</code> fails with field mask on 32+ fields</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2031" target="_top">#2031</a>, wrong encoding with UnixODBC/Oracle source</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2056" target="_top">#2056</a>, several bugs in RLP tokenizer</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2054" target="_top">#2054</a>, <a class="link" href="#sphinxql-threads" title="8.39. SHOW THREADS syntax">SHOW THREADS </a>hangs if queries in prefork mode</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2057" target="_top">#2057</a>, WARNING at <code class="filename">indexer</code> on duplicated wordforms</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2066" target="_top">#2066</a>, snippet generation with <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">weight_order</a> enabled</p></li>
- <li class="listitem"><p>fixed exception parsing in queries</p></li>
- <li class="listitem"><p>fixed crash in config parser</p></li>
- <li class="listitem"><p>fixed MySQL protocol response when daemon maxed out</p></li>
- </ul></div></div>
- <div class="sect1" title="A.3. Version 2.2.4-release, 11 sep 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel224"></a>A.3. Version 2.2.4-release, 11 sep 2014</h2></div></div></div>
- <h3><a name="idp34232064"></a>New major features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#sphinxql-attach" title="8.24. ALTER syntax">ALTER</a> RTINDEX rt1 RECONFIGURE which allows to change RT index settings on the fly</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-show-index-settings" title="8.32. SHOW INDEX SETTINGS syntax">SHOW INDEX idx1 SETTINGS</a> statement</p></li>
- <li class="listitem"><p>added ability to specify several destination forms for the same source wordform (as a result, N:M mapping is now available)</p></li>
- <li class="listitem"><p>added blended chars support to exceptions</p></li>
- </ul></div>
- <h3><a name="idp34237184"></a>New minor features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#expr-func-any">ANY()</a>/<a class="link" href="#expr-func-all">ALL()</a>/<a class="link" href="#expr-func-indexof">INDEXOF()</a> support for JSON string arrays</p></li>
- <li class="listitem"><p>added FACTORS() alias for <a class="link" href="#expr-func-packedfactors">PACKEDFACTORS()</a> function</p></li>
- <li class="listitem"><p>added <code class="code">LIMIT</code> clause for the <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">FACET</a> keyword</p></li>
- <li class="listitem"><p>added JSON-formatted output to <code class="code">PACKEDFACTORS()</code> function</p></li>
- <li class="listitem"><p>added #1999 <a class="link" href="#expr-func-atan2">ATAN2()</a> function</p></li>
- <li class="listitem"><p>added connections counter and also avg and max timers to agent status</p></li>
- <li class="listitem"><p>added <code class="filename">searchd</code> configuration keys <a class="link" href="#conf-agent-connect-timeout" title="12.2.34. agent_connect_timeout">agent_connect_timeout</a>, <a class="link" href="#conf-agent-query-timeout" title="12.2.35. agent_query_timeout">agent_query_timeout</a>, <a class="link" href="#conf-agent-retry-count" title="12.4.50. agent_retry_count">agent_retry_count</a> and <a class="link" href="#conf-agent-retry-delay" title="12.4.51. agent_retry_delay">agent_retry_delay</a></p></li>
- <li class="listitem"><p><a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUPBY()</a> function now returns strings for string attributes</p></li>
- </ul></div>
- <h3><a name="idp34252912"></a>Optimizations and removals</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>optimized <a class="link" href="#conf-json-autoconv-numbers" title="12.5.3. json_autoconv_numbers">json_autoconv_numbers</a> option speed</p></li>
- <li class="listitem"><p>optimized tokenizing with expections on</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1970" target="_top">#1970</a>, speeding up <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">ZONE and ZONESPAN</a> operators</p></li>
- </ul></div>
- <h3><a name="idp34257888"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2027" target="_top">#2027</a>, slow queries to multiple indexes with large kill-lists</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2022" target="_top">#2022</a>, blend characters of matched word must not be outside of snippet passage</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2021" target="_top">#2021</a>, output units in <a class="link" href="#expr-func-geodist">GEODIST()</a> function</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2018" target="_top">#2018</a>, different wildcard behaviour in RT and plain indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=2005" target="_top">#2005</a>, aggregate functions improperly calculate aliased expressions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1972" target="_top">#1972</a>, daemon crashes trying to read a big (>8G) .spm file</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1966" target="_top">#1966</a>, <a class="link" href="#expr-func-interval">INTERVAL()</a> function does not work with JSON fields</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1963" target="_top">#1963</a>, <code class="code">GROUPBY()</code> on JSON attributes sometimes yields NULL</p></li>
- <li class="listitem"><p>fixed <code class="code">GROUPBY()</code> on empty JSON arrays to return NULL instead of []</p></li>
- <li class="listitem"><p>fixed buffer overrun when sizing packed factors (with way too many fields) in expression ranker</p></li>
- <li class="listitem"><p>fixed cpu time logging for cases where work is done in child threads or agents</p></li>
- </ul></div></div>
- <div class="sect1" title="A.4. Version 2.2.3-beta, 13 may 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel223"></a>A.4. Version 2.2.3-beta, 13 may 2014</h2></div></div></div>
- <h3><a name="idp34274848"></a>New features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1920" target="_top">#1920</a>, <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a> aliases</p></li>
- <li class="listitem"><p>added <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1887" target="_top">#1887</a>, filtering over string attributes</p></li>
- <li class="listitem"><p>added <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1860" target="_top">#1860</a>, <a class="link" href="#sphinxql-set" title="8.9. SET syntax">USERVARs</a> for distributed indexes</p></li>
- <li class="listitem"><p>added <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1689" target="_top">#1689</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP BY JSON</a> attributes</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">FACET</a> keyword</p></li>
- <li class="listitem"><p>added Go MySQL connector support</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">IDF boost</a> keyword modifier</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">MAYBE</a> fulltext operator</p></li>
- </ul></div>
- <h3><a name="idp34287920"></a>Optimizations and removals</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>improved speed of concurrent insertion in RT indexes</p></li>
- <li class="listitem"><p>removed <a class="link" href="#sphinx-deprecations-defaults" title="2.6. Sphinx deprecations and changes in default configuration">max_matches</a> config key</p></li>
- </ul></div>
- <h3><a name="idp34290832"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1946" target="_top">#1946</a>, <a class="link" href="#expr-func-in">IN()</a> function support for string attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1942" target="_top">#1942</a>, crash in <a class="link" href="#sphinxql-threads" title="8.39. SHOW THREADS syntax">SHOW THREADS</a> command</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1922" target="_top">#1922</a>, crash on snippet generation for queries with duplicated words</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1919" target="_top">#1919</a>, <a class="link" href="#tsvpipe" title="3.10. tsvpipe (Tab Separated Values) data source">TSV</a> bitcount attributes indexation issue</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1916" target="_top">#1916</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">COUNT(*)</a> with empty result set</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1910" target="_top">#1910</a>, JSON parsing issue</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1906" target="_top">#1906</a>, <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">ZONE</a> constraints for expanded terms</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1904" target="_top">#1904</a>, race condition in RT indexes on saving disk chunk</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1899" target="_top">#1899</a>, crash on <a class="link" href="#sphinxql-call-keywords" title="8.15. CALL KEYWORDS syntax">CALL KEYWORDS</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1893" target="_top">#1893</a>, <code class="filename">searchd</code> crashes on expressions like 'a<<(*!b)'</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1884" target="_top">#1884</a>, crash with <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SNIPPET()</a> function over distributed index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1883" target="_top">#1883</a>, crash at expanded keyword with hitless index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1870" target="_top">#1870</a>, crash on <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ORDER BY JSON</a> attributes</p></li>
- <li class="listitem"><p>fixed template index removing on rotation</p></li>
- </ul></div></div>
- <div class="sect1" title="A.5. Version 2.2.2-beta, 11 feb 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel222"></a>A.5. Version 2.2.2-beta, 11 feb 2014</h2></div></div></div>
- <h3><a name="idp34317632"></a>New features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added #1604, <a class="link" href="#sphinxql-call-keywords" title="8.15. CALL KEYWORDS syntax">CALL KEYWORDS</a> can show now multiple lemmas for a keyword</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-attach" title="8.24. ALTER syntax">ALTER TABLE DROP COLUMN</a></p></li>
- <li class="listitem"><p>added ALTER for JSON/string/MVA attributes</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-func-remap">REMAP()</a> function which surpasses SetOverride() API</p></li>
- <li class="listitem"><p>added an argument to <a class="link" href="#misc-functions" title="5.5.6. Miscellaneous functions">PACKEDFACTORS()</a> to disable ATC calculation (syntax: PACKEDFACTORS({no_atc=1}))</p></li>
- <li class="listitem"><p>added exact phrase query syntax</p></li>
- <li class="listitem"><p>added flag <code class="option">'--enable-dl'</code> to configure script which works with <code class="filename">libmysqlclient</code>, <code class="filename">libpostgresql</code>, <code class="filename">libexpat</code>, <code class="filename">libunixobdc</code></p></li>
- <li class="listitem"><p>added new plugin system: <a class="link" href="#sphinxql-create-plugin" title="8.36. CREATE PLUGIN syntax">CREATE</a>/<a class="link" href="#sphinxql-drop-plugin" title="8.37. DROP PLUGIN syntax">DROP PLUGIN</a>, <a class="link" href="#sphinxql-show-plugins" title="8.38. SHOW PLUGINS syntax">SHOW PLUGINS</a>, <a class="link" href="#conf-plugin-dir" title="12.4.31. plugin_dir">plugin_dir</a> now in common, <a class="link" href="#sphinxql-create-plugin" title="8.36. CREATE PLUGIN syntax">index/query_token_filter</a> plugins</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-ondisk-attrs" title="12.2.68. ondisk_attrs">ondisk_attrs</a> support for RT indexes</p></li>
- <li class="listitem"><p>added position shift operator to phrase operator</p></li>
- <li class="listitem"><p>added possibility to add user-defined rankers (via <a class="link" href="#extending-sphinx" title="Chapter 6. Extending Sphinx">plugins</a>)</p></li>
- </ul></div>
- <h3><a name="idp34337104"></a>Optimizations, behavior changes, and removals</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>changed #1797, per-term statistics report (expanded terms fold to their respective substrings)</p></li>
- <li class="listitem"><p>changed default <a class="link" href="#conf-thread-stack" title="12.4.34. thread_stack">thread_stack</a> value to 1M</p></li>
- <li class="listitem"><p>changed local directive in a distributed index which takes now a list (eg. <code class="option">local=shard1,shard2,shard3</code>)</p></li>
- <li class="listitem"><p>deprecated <a class="link" href="#api-func-setmatchmode" title="9.3.1. SetMatchMode">SetMatchMode()</a> API call</p></li>
- <li class="listitem"><p>deprecated <a class="link" href="#api-func-setoverride" title="9.2.3. SetOverride">SetOverride()</a> API call</p></li>
- <li class="listitem"><p>optimized infix searches for dict=keywords</p></li>
- <li class="listitem"><p>optimized kill lists in plain and RT indexes</p></li>
- <li class="listitem"><p>removed deprecated <code class="option">"address"</code> and <code class="option">"port"</code> config keys</p></li>
- <li class="listitem"><p>removed deprecated CLI <code class="filename">search</code> and <code class="option">sql_query_info</code></p></li>
- <li class="listitem"><p>removed deprecated <code class="option">charset_type</code> and <code class="option">mssql_unicode</code></p></li>
- <li class="listitem"><p>removed deprecated <code class="option">enable_star</code></p></li>
- <li class="listitem"><p>removed deprecated <code class="option">ondisk_dict</code> and <code class="option">ondisk_dict_default</code></p></li>
- <li class="listitem"><p>removed deprecated <code class="option">str2ordinal</code> attributes</p></li>
- <li class="listitem"><p>removed deprecated <code class="option">str2wordcount</code> attributes</p></li>
- <li class="listitem"><p>removed support for client versions 0.9.6 and below</p></li>
- </ul></div></div>
- <div class="sect1" title="A.6. Version 2.2.1-beta, 13 nov 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel221"></a>A.6. Version 2.2.1-beta, 13 nov 2013</h2></div></div></div>
- <h3><a name="idp34355216"></a>Major new features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">added <a class="link" href="#sphinxql-attach" title="8.24. ALTER syntax">ALTER TABLE</a> that can add attributes to disk and RT indexes on the fly</li>
- <li class="listitem">added ATTACH support for non-empty RT target indexes</li>
- <li class="listitem">added Chinese segmentation with <a class="link" href="#conf-morphology" title="12.2.6. morphology">RLP</a> (Rosette Linguistics platform) support</li>
- <li class="listitem">added English, German <a class="link" href="#conf-morphology" title="12.2.6. morphology">lemmatization</a> support</li>
- <li class="listitem">added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">HAVING</a> support to SELECT statement, filtering on aggregate values is now possible</li>
- <li class="listitem">added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">N-best GROUP BY</a> extension to return more than 1 row per group</li>
- <li class="listitem">added RT index support for <a class="link" href="#conf-index-field-lengths" title="12.2.63. index_field_lengths">index_field_lengths=1</a>, bitfield attributes, and multiforms</li>
- <li class="listitem">added CSV, <a class="link" href="#tsvpipe" title="3.10. tsvpipe (Tab Separated Values) data source">TSV</a> data sources</li>
- <li class="listitem">added full <a class="link" href="#conf-sql-attr-json" title="12.1.24. sql_attr_json">JSON</a> attributes support, arbitrary JSON documents (with subobjects etc) can now be stored</li>
- <li class="listitem">added in-place JSON updates for scalar values</li>
- <li class="listitem">added index <a class="link" href="#conf-index-type" title="12.2.1. type">type=template</a> directive (allows CALL KEYWORDS, CALL SNIPPETS)</li>
- <li class="listitem">added <a class="link" href="#conf-ondisk-attrs" title="12.2.68. ondisk_attrs">ondisk_attrs</a>, <a class="link" href="#conf-ondisk-attrs-default" title="12.4.46. ondisk_attrs_default">ondisk_attrs_default</a> directives that keep attributes on disk</li>
- <li class="listitem">added table functions mechanism, and <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">REMOVE_REPEATS()</a> table function</li>
- <li class="listitem">added support for arbitrary expressions in WHERE for DELETE queries</li>
- </ul></div>
- <h3><a name="idp34372400"></a>Ranking related features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">added OPTION <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">local_df=1</a>, an option to aggregate IDFs over local indexes (shards)</li>
- <li class="listitem">added <a class="link" href="#sphinx-udfs" title="6.1. Sphinx UDFs (User Defined Functions)">UDF</a> XXX_reinit() method to reload UDFs with <code class="option">workers=prefork</code></li>
- <li class="listitem">added comma-separated syntax to <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION</a><code class="option"> idf</code>, <code class="option">tfidf_unnormalized</code> and <code class="option">tfidf_normalized</code> flags</li>
- <li class="listitem">added <code class="option">lccs</code>, <code class="option">wlccs</code>, <code class="option">exact_order</code>, <code class="option">min_gaps</code>, and <code class="option">atc </code><a class="link" href="#field-factors" title="5.4.6. Field-level ranking factors">ranking factors</a></li>
- <li class="listitem">added <code class="code">sphinx_get_XXX_factors()</code>, a faster interface to access <a class="link" href="#misc-functions" title="5.5.6. Miscellaneous functions">PACKEDFACTORS()</a> in UDFs</li>
- <li class="listitem">added support for <a class="link" href="#field-factors" title="5.4.6. Field-level ranking factors">exact_hit</a>, <a class="link" href="#field-factors" title="5.4.6. Field-level ranking factors">exact_order</a> field factors when using more than 32 fields (exact_hit, exact_order)</li>
- </ul></div>
- <h3><a name="idp34385680"></a>Instrumentation features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">added <a class="link" href="#sphinxql-describe" title="8.17. DESCRIBE syntax">DESCRIBE</a> and <a class="link" href="#ref-indextool" title="7.4. indextool command reference">--dumpheader</a> support for tokencount attributes (generated by index_field_lengths=1 directive)</li>
- <li class="listitem">added RT index query profile, percentages, totals to <a class="link" href="#sphinxql-show-profile" title="8.30. SHOW PROFILE syntax">SHOW PROFILE</a></li>
- <li class="listitem">added <code class="option">predicted_time</code>, <code class="option">dist_predicted_time</code>, <code class="option">fetched_docs</code>, <code class="option">fetched_hits</code> counters to <a class="link" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">SHOW META</a></li>
- <li class="listitem">added <code class="option">total_tokens</code> and <code class="option">disk_bytes</code> counters to <a class="link" href="#sphinxql-show-index-status" title="8.31. SHOW INDEX STATUS syntax">SHOW INDEX STATUS</a></li>
- </ul></div>
- <h3><a name="idp34394928"></a>General features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">added <a class="link" href="#expr-func-all">ALL()</a>, <a class="link" href="#expr-func-any">ANY()</a> and <a class="link" href="#expr-func-indexof">INDEXOF()</a> functions for JSON subarrays</li>
- <li class="listitem">added <a class="link" href="#expr-func-min-top-weight">MIN_TOP_WEIGHT()</a>, <a class="link" href="#expr-func-min-top-sortval">MIN_TOP_SORTVAL()</a> functions</li>
- <li class="listitem">added <a class="link" href="#factor-aggr-functions" title="5.4.7. Ranking factor aggregation functions">TOP()</a> aggregate function to expression ranker</li>
- <li class="listitem">added a check for duplicated tail hit positions in <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool --check</a></li>
- <li class="listitem">added <a class="link" href="#sphinxql-log-format" title="5.9.2. SphinxQL log format">compact_in</a> option to <a class="link" href="#conf-query-log-format" title="12.4.4. query_log_format">query_log_format=sphinxql</a></li>
- <li class="listitem">added distance units and calculation method options to <a class="link" href="#expr-func-geodist">GEODIST()</a> function, optimized it a lot</li>
- <li class="listitem">added embedded stopwords/exceptions/wordforms to <code class="option">--dumpheader</code></li>
- <li class="listitem">added <a class="link" href="#ref-indexer" title="7.1. indexer command reference">indexer --nohup</a> and <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool --rotate</a> switches to check index files before rotating them</li>
- <li class="listitem">added scientific notation support for JSON attributes (as per <a class="ulink" href="http://www.ietf.org/rfc/rfc4627.txt" target="_top">RFC 4627</a>)</li>
- <li class="listitem">added several SphinxQL statements to fix MySQL Workbench connection issues (LIKE for session variables, etc.)</li>
- <li class="listitem">added <a class="link" href="#conf-shutdown-timeout" title="12.4.45. shutdown_timeout">shutdown_timeout</a> directive to <code class="filename">searchd</code> config section</li>
- <li class="listitem">added signed values support for <a class="link" href="#expr-func-integer">INTEGER()</a> and <a class="link" href="#expr-func-uint">UINT()</a> function</li>
- <li class="listitem">added snippet generation options to <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SNIPPET()</a> function</li>
- <li class="listitem">added string filter support in distributed queries, SphinxAPI, SphinxQL query log</li>
- <li class="listitem">added support for mixed distributed and local index queries (SELECT * FROM dist1,dist2,local3), and <code class="option">index_weights</code> option for that case</li>
- </ul></div>
- <h3><a name="idp34417920"></a>Optimizations, behavior changes, and removals</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">optimized JSON attributes access (1.12x to 2.0x+ total query speedup depending on the JSON data)</li>
- <li class="listitem">optimized SELECT (1.02x to 3.5x speedup, depending on index schema size)</li>
- <li class="listitem">optimized <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">UPDATE</a> (up to 3x faster on big updates)</li>
- <li class="listitem">optimized away internal threads table mutex contention with <code class="option">workers=threads</code> and 1000s of threads</li>
- <li class="listitem">changed [emptyword -foo] query behavior in cases when emptyword is a stopword or an overshort word, made such queries computable rather than erroneous</li>
- <li class="listitem">changed post-morphology <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a> behavior, now it works as <code class="code">'if ( stem(token)==stem(abc) ) emit(def)'</code></li>
- <li class="listitem">changed the <a class="link" href="#sphinx-deprecations-defaults" title="2.6. Sphinx deprecations and changes in default configuration">config defaults</a> to <code class="option">id64</code>, <code class="option">dict=keywords</code>, <code class="option">charset_type=utf-8</code>, <code class="option">enable_star=1</code>, <code class="option">workers=threads</code>, <code class="option">mem_limit=128M</code>, <code class="option">rt_mem_limit=128M</code></li>
- <li class="listitem">changed the default SphinxAPI matching mode to <a class="link" href="#matching-modes" title="5.1. Matching modes">SPH_MATCH_EXTENDED2</a></li>
- <li class="listitem">disallowed dashes in index names in API requests (just like in SphinxQL)</li>
- <li class="listitem">removed legacy <code class="option">xmlpipe</code> data source v1, <code class="option">compat_sphinxql_magics</code> directive, <code class="option">SetWeights()</code> SphinxAPI call, and SPH_SORT_CUSTOM SphinxAPI mode</li>
- </ul></div>
- <h3><a name="idp34431632"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1734" target="_top">#1734</a>, unquoted literal in json subscript could cause a crash, returns 'unknown column' now.</li>
- <li class="listitem">fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1683" target="_top">#1683</a>, under certain conditions <a class="link" href="#conf-stopwords" title="12.2.11. stopwords">stopwords</a> were not taken into account in RT indexes</li>
- <li class="listitem">fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1648" target="_top">#1648</a>, #1644, when using AOT lemmas with snippet generation, not all the forms got highlighted</li>
- <li class="listitem">fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1549" target="_top">#1549</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION</a><code class="option">idf=tfidf_normalized</code> was ignored for distributed queries</li>
- <li class="listitem">fixed that <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ORDER BY RAND()</a> was not affected by <code class="option">index_weights</code></li>
- <li class="listitem">fixed that float updates with integer values in SphinxQL mistakenly set the float to 0</li>
- <li class="listitem">fixed that <code class="option">predicted_time</code> was not accumulated with <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a></li>
- <li class="listitem">fixed <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP_CONCAT</a> result length limit (was implicitly limited by 1024 bytes)</li>
- <li class="listitem">fixed agent query distribution in HA mirroring</li>
- <li class="listitem">fixed duplicates check for <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">quorum operator</a>, it works ok now for expanded keywords</li>
- <li class="listitem">fixed off-by-1 query positions of words in indexes with wordforms and <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">blended characters</a></li>
- <li class="listitem">fixed wrong <code class="option">lcs</code> and <a class="link" href="#field-factors" title="5.4.6. Field-level ranking factors">min_best_span_pos</a> ranking factor values when any expansion (<a class="link" href="#conf-expand-keywords" title="12.2.46. expand_keywords">expand_keywords</a> or lemmatize) occurred</li>
- <li class="listitem">fixed a crash while creating indexes with <a class="link" href="#conf-sql-joined-field" title="12.1.13. sql_joined_field">sql_joined_field</a></li>
- </ul></div></div>
- <div class="sect1" title="A.7. Version 2.1.9-release, 03 jul 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel219"></a>A.7. Version 2.1.9-release, 03 jul 2014</h2></div></div></div>
- <h3><a name="idp34452080"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1994" target="_top">#1994</a>, parsing of empty JSON arrays</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1987" target="_top">#1987</a>, handling of <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> with AOT morphology and infixes on</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1984" target="_top">#1984</a>, teaching HTML parser to handle hex numbers</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1983" target="_top">#1983</a>, master and agents networking issue</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1977" target="_top">#1977</a>, escaping of characters doens't work with exceptions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1968" target="_top">#1968</a>, parsing of <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">WEIGHT()</a> function (queries to distributed indexes affected)</p></li>
- </ul></div></div>
- <div class="sect1" title="A.8. Version 2.1.8-release, 28 apr 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel218"></a>A.8. Version 2.1.8-release, 28 apr 2014</h2></div></div></div>
- <h3><a name="idp34463728"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1937" target="_top">#1937</a>, crash at <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">SENTENCE</a> operator</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1933" target="_top">#1933</a>, quorum operator works incorrectly if it's number is exception</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1932" target="_top">#1932</a>, fixed daemon index recovery after failed rotation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1923" target="_top">#1923</a>, crash at <code class="filename">indexer</code> with <code class="option">dict=keywords</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1918" target="_top">#1918</a>, fixed crash while hitless words are used within fulltext operators which require hits</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1878" target="_top">#1878</a>, daemon doesn't reset <a class="link" href="#conf-regexp-filter" title="12.2.64. regexp_filter">regexp_filter</a> after rotation with <a class="link" href="#conf-seamless-rotate" title="12.4.9. seamless_rotate">seamless_rotate=0</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1769" target="_top">#1769</a>, crash after unsuccessful <a class="link" href="#sphinxql-insert" title="8.6. INSERT and REPLACE syntax">INSERT</a> at RT index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1682" target="_top">#1682</a>, field end modifier doesn't work with words containing blended chars</p></li>
- </ul></div></div>
- <div class="sect1" title="A.9. Version 2.1.7-release, 30 mar 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel217"></a>A.9. Version 2.1.7-release, 30 mar 2014</h2></div></div></div>
- <h3><a name="idp34480576"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1917" target="_top">#1917</a>, field limit propagation outside of group</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1915" target="_top">#1915</a>, exact form passes to index skipping stopwords filter</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1905" target="_top">#1905</a>, multiple lemmas at the end of a field</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1903" target="_top">#1903</a>, <code class="filename">indextool</code> check mode for hitless indexes and indexes with large amount of documents</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1902" target="_top">#1902</a>, crash on JSON field in the <a class="link" href="#expr-func-in">IN()</a> function</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1884" target="_top">#1884</a>, crash at <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SNIPPET()</a> with local indexes at distributed index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1802" target="_top">#1802</a>, loading large keywords dictionary</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1786" target="_top">#1786</a>, <code class="filename">indextool</code> fails to handle indexes with AOT morphology</p></li>
- <li class="listitem"><p>fixed crash of daemon on logging extra large message</p></li>
- <li class="listitem"><p>fixed expression engine: division by zero, log and sqrt() functions of non-positive arguments</p></li>
- <li class="listitem"><p>fixed LCS and min_best_span_pos computation</p></li>
- <li class="listitem"><p>fixed unnecessary escaping in JSON result set</p></li>
- <li class="listitem"><p>fixed Quick Tour documentation chapter</p></li>
- </ul></div></div>
- <div class="sect1" title="A.10. Version 2.1.6-release, 24 feb 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel216"></a>A.10. Version 2.1.6-release, 24 feb 2014</h2></div></div></div>
- <h3><a name="idp34499040"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1857" target="_top">#1857</a>, crash in arabic stemmer</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1875" target="_top">#1875</a>, fixed crash on adding documents with long words in dict=keyword index with morphology and infixes enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1876" target="_top">#1876</a>, crash on words with large codepoints and infix searches</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1880" target="_top">#1880</a>, crash on multiquery with one incorrect query</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1882" target="_top">#1882</a>, race of periodic and forced FLUSHing on an RT index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1881" target="_top">#1881</a>, quorum syntax with '.' as blended char</p></li>
- <li class="listitem"><p>fixed evaluating of LCS by an expression ranker</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1864" target="_top">#1864</a>, <code class="filename">indexer</code> crash on badly formed JSON, e.g. '[,1,2,3,4,]'</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1853" target="_top">#1853</a>, incomplete <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ORDER BY JSON</a> attribute in distributed indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1847" target="_top">#1847</a>, broken infix searches in RT indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1844" target="_top">#1844</a>, clash of mix cased attribute and field names at CSV source</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1840" target="_top">#1840</a>, filter by <a class="link" href="#sphinxql-set" title="8.9. SET syntax">@uservar</a> in distributes indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1832" target="_top">#1832</a>,#1833,#1834, some big endianess issues</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1830" target="_top">#1830</a>, loss of <a class="link" href="#conf-ondisk-attrs" title="12.2.68. ondisk_attrs">ondisk_attrs</a> after rotation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1762" target="_top">#1762</a>, memory leak in <a class="link" href="#conf-regexp-filter" title="12.2.64. regexp_filter">regexp_filter</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1759" target="_top">#1759</a>, <code class="filename">indextool</code> false positives on persistent MVA checking</p></li>
- <li class="listitem"><p>fixed <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP BY</a> id</p></li>
- <li class="listitem"><p>fixed crash on sending empty snippet result</p></li>
- <li class="listitem"><p>fixed index corruption in <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">UPDATE</a> queries with non-existent attributes</p></li>
- </ul></div></div>
- <div class="sect1" title="A.11. Version 2.1.5-release, 22 jan 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel215"></a>A.11. Version 2.1.5-release, 22 jan 2014</h2></div></div></div>
- <h3><a name="idp34529312"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1848" target="_top">#1848</a>, infixes and morphology clash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1823" target="_top">#1823</a>, <code class="filename">indextool</code> fails to handle indexes with lemmatizer morphology</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1799" target="_top">#1799</a>, crash in queries to distributed indexes with <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP BY</a> on multiple values</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1718" target="_top">#1718</a>, <code class="option">expand_keywords</code> option lost in disk chunks of RT indexes</p></li>
- <li class="listitem"><p>fixed documentation on <a class="link" href="#conf-rt-flush-period" title="12.4.33. rt_flush_period">rt_flush_period</a></p></li>
- <li class="listitem"><p>fixed network protocol issue which results in timeouts of <code class="filename">libmysqlclient</code> for big Sphinx responses</p></li>
- </ul></div></div>
- <div class="sect1" title="A.12. Version 2.1.4-release, 18 dec 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel214"></a>A.12. Version 2.1.4-release, 18 dec 2013</h2></div></div></div>
- <h3><a name="idp34541056"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1778" target="_top">#1778</a>, indexes with more than 255 attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1777" target="_top">#1777</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ORDER BY WEIGHT()</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1796" target="_top">#1796</a>, missing results in queries with quorum operator of indexes with some lemmatizer</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1780" target="_top">#1780</a>, incorrect results while querying indexes with wordforms, some lemmatizer and enable_star=1</p></li>
- <li class="listitem"><p>fixed, SHOW PROFILE for fullscan queries</p></li>
- <li class="listitem"><p>fixed, --with-re2 check</p></li>
- </ul></div></div>
- <div class="sect1" title="A.13. Version 2.1.3-release, 12 nov 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel213"></a>A.13. Version 2.1.3-release, 12 nov 2013</h2></div></div></div>
- <h3><a name="idp34550080"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1753" target="_top">#1753</a>, path to re2 sources could not be set using <code class="option">--with-re2</code>, options <code class="option">--with-re2-libs</code> and <code class="option">--with-re2-includes</code> added to <code class="filename">configure</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1739" target="_top">#1739</a>, erroneous conversion of RAM chunk into disk chunk when loading id32 index with id64 binary</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1738" target="_top">#1738</a>, unlinking RAM chunk when converting it to disk chunk</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1710" target="_top">#1710</a>, unable to filter by attributes created by index_field_lengths=1</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1716" target="_top">#1716</a>, random crash with with multiple running threads</p></li>
- <li class="listitem"><p>fixed crash while querying index with lemmatizer and wordforms</p></li>
- </ul></div></div>
- <div class="sect1" title="A.14. Version 2.1.2-release, 10 oct 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel212"></a>A.14. Version 2.1.2-release, 10 oct 2013</h2></div></div></div>
- <h3><a name="idp34561248"></a>New features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#sphinxql-flush-ramchunk" title="8.27. FLUSH RAMCHUNK syntax">FLUSH RAMCHUNK</a> statement</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-show-plan" title="8.34. SHOW PLAN syntax">SHOW PLAN</a> statement</p></li>
- <li class="listitem"><p>added support for <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP BY</a> on multiple attributes</p></li>
- <li class="listitem"><p>added <a class="link" href="#expression-ranker" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)">BM25F()</a> function to <code class="code">SELECT</code> expressions (now works with the expression based ranker)</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a> <code class="option">--fold</code> command and <code class="option">-q</code> switch</p></li>
- <li class="listitem"><p>added JSON debug check for RT index RAM chunk</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-func-length">LENGTH()</a> function for MVA</p></li>
- <li class="listitem"><p>added missing <a class="link" href="#conf-rt-attr-bool" title="12.2.52. rt_attr_bool">rt_attr_bool</a> directive</p></li>
- <li class="listitem"><p>added support for selecting over 250 columns via SphinxQL</p></li>
- <li class="listitem"><p>deprecated custom sort mode, and <code class="option">str2ordinal</code> and <code class="option">str2wordcount</code> attribute types</p></li>
- <li class="listitem"><p>optimized <code class="code">SELECT</code>, <code class="code">UPDATE</code> for indexes with many attributes (up to 3.5x speedup in extreme cases)</p></li>
- <li class="listitem"><p><code class="code">JSON</code> attributes (up to 5-20% faster <code class="code">SELECTs</code> using JSON objects)</p></li>
- <li class="listitem"><p>optimized <a class="link" href="#xmlpipe2" title="3.9. xmlpipe2 data source">xmlpipe2</a> indexing (up to 9 times faster on some schemas)</p></li>
- </ul></div>
- <h3><a name="idp34580192"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1684" target="_top">#1684</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">COUNT(DISTINCT smth)</a> with implicit <code class="code">GROUP BY</code> returns correct value now</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1672" target="_top">#1672</a>, exact token AOT vs lemma (<code class="filename">indexer</code> skips exact form of token that passed AOT through tokenizer)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1659" target="_top">#1659</a>, fail while loading empty infix dictionary with <a class="link" href="#conf-dict" title="12.2.7. dict">dict=keywords</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1638" target="_top">#1638</a>, force explicit JSON type conversion for aggregate functions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1628" target="_top">#1628</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP_CONCAT()</a> and <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUPBY()</a> support for distributed agents</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1619" target="_top">#1619</a>, <code class="code">INTEGER()</code> conversion function doesn't support signed integers</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1615" target="_top">#1615</a>, global IDF vs exact term (=term) fixed global IDF for missed terms fixed SphinxQL <a class="link" href="#conf-global-idf" title="12.2.66. global_idf">global_idf=0 option</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1607" target="_top">#1607</a>, now ignoring binlog when running daemon with <code class="option">--console</code> flag</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1606" target="_top">#1606</a>, hard interruption of the daemon by Ctrl+C (SIGINT) signal</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1592" target="_top">#1592</a>, duplicates vs expression ranker</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1578" target="_top">#1578</a>, <a class="link" href="#sorting-modes" title="5.6. Sorting modes">SORT BY</a> string attribute via API <code class="option">attr_asc</code> \ <code class="option">attr_desc</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1575" target="_top">#1575</a>, crash of daemon on MVA receive from agents with <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a> enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1574" target="_top">#1574</a>, agent got kill list of local indexes of distributed index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1573" target="_top">#1573</a>, ranker expression vs expanded terms</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1572" target="_top">#1572</a>, <code class="code">BM25F</code> vs negative terms</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1550" target="_top">#1550</a>, float got cut at full-text part of a query</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1541" target="_top">#1541</a>, <code class="code">BM25F</code> expression in distributes indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1508" target="_top">#1508</a>, <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1522" target="_top">#1522</a>, distributed index query lasts up to <a class="link" href="#conf-agent-connect-timeout" title="12.2.34. agent_connect_timeout">agent_connect_timeout</a> with epoll path</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1508" target="_top">#1508</a>, master failed to connect waiting agents up to <a class="link" href="#conf-agent-connect-timeout" title="12.2.34. agent_connect_timeout">agent_connect_timeout</a> time</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1489" target="_top">#1489</a>, filtering by integer field in JSON using floating point precision</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1485" target="_top">#1485</a>, <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> vs keyword dict with infix</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1484" target="_top">#1484</a>, <a class="link" href="#sphinxql-insert" title="8.6. INSERT and REPLACE syntax">INSERT</a> into RT vs no JSON attribute</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1478" target="_top">#1478</a>, memory leaks at daemon <a class="link" href="#misc-functions" title="5.5.6. Miscellaneous functions">PACKEDFACTORS()</a> as UDF argument, index query tokenizer, expression ranker SUM()</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1470" target="_top">#1470</a>, broken UDF unpack (since r3738 UDF version 2)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1468" target="_top">#1468</a>, multiple conditions in <code class="code">WHERE</code> for JSON attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1466" target="_top">#1466</a>, <a class="link" href="#conf-index-field-lengths" title="12.2.63. index_field_lengths">index_field_lengths</a> vs XML data source</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1463" target="_top">#1463</a>, daemon shutdown vs RT index optimize (added forced terminate of long merging operation)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1460" target="_top">#1460</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">aggregate functions</a> <code class="code">AVG()</code>, <code class="code">MAX()</code>, <code class="code">MIN()</code>, <code class="code">SUM()</code> do not work for JSON attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1459" target="_top">#1459</a>, <code class="code">BM25F</code> doesn't work with <a class="link" href="#conf-sql-field-string" title="12.1.26. sql_field_string">field_string</a> fields</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1458" target="_top">#1458</a>, factors to copy <code class="code">field_tf</code> at UDF</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1450" target="_top">#1450</a>, garbage in JSON fields when selecting them from a RT index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1449" target="_top">#1449</a>, broken build on Mac OS X</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1446" target="_top">#1446</a>, <a class="link" href="#weighting" title="5.4. Search results ranking">WEIGHT()</a> did not work in <code class="code">SELECT</code> expressions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1445" target="_top">#1445</a>, field-start/field-end modifiers did not work for star-expanded keywords</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1443" target="_top">#1443</a>, <a class="link" href="#conf-morphology" title="12.2.6. morphology">morphology=lemmatizer_ru_all</a> now works with <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words=1</a> (exact forms can be matches)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1442" target="_top">#1442</a>, incorrect <code class="code">COUNT(*)</code> value in queries to distributed indexes with implicit <code class="code">GROUP BY</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1439" target="_top">#1439</a>, filters on float values in JSON issue, string values quoting issue</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1399" target="_top">#1399</a>, filter error message on string attribute</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1384" target="_top">#1384</a>, added possibility to define any own DSN line with <a class="link" href="#confgroup-source" title="12.1. Data source configuration options">source=mssql</a> (like as in <code class="code">source=odbc</code>)</p></li>
- <li class="listitem"><p>fixed <a class="link" href="#sphinxql-attach-index" title="8.25. ATTACH INDEX syntax">ATTACH</a> vs wordforms or stopwords; after daemon was restarted this setting was getting lost in RT indexes</p></li>
- <li class="listitem"><p>fixed balancing of agents in HA</p></li>
- <li class="listitem"><p>fixed co-working of <code class="code">index_exact_word</code> + AOT lemmatizer</p></li>
- <li class="listitem"><p>fixed epoll invoking and turned on by default</p></li>
- <li class="listitem"><p>fixed incorrect handling of wildcards in tokenizer</p></li>
- <li class="listitem"><p>fixed infix indexing with <code class="option">dict=keywords</code></p></li>
- <li class="listitem"><p>fixed <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">max_predicted_time</a> integer overflows</p></li>
- <li class="listitem"><p>fixed memory error in tokenizer</p></li>
- <li class="listitem"><p>fixed several memory leaks</p></li>
- <li class="listitem"><p>fixed <code class="code">PACKEDFACTORS()</code> to work in different <code class="code">GROUP BY</code> queries</p></li>
- <li class="listitem"><p>fixed preprocessor definitions for <a class="link" href="#conf-regexp-filter" title="12.2.64. regexp_filter">RE2</a> in VS solution</p></li>
- <li class="listitem"><p>fixed rotation of global IDF for <code class="option">workers=threads</code> and <code class="option">seamless_rotate=1</code></p></li>
- <li class="listitem"><p>fixed rotation of old indexes</p></li>
- <li class="listitem"><p>fixed RT kill list survives <code class="code">TRUNCATE</code> and works in newly <code class="code">ATTACH</code>ed index</p></li>
- <li class="listitem"><p>fixed saving id32 RT index with id64 daemon</p></li>
- <li class="listitem"><p>fixed stemmer vs RT index <code class="code">INSERT</code></p></li>
- <li class="listitem"><p>fixed string case error with JSON attributes in select list of a query</p></li>
- <li class="listitem"><p>fixed <code class="code">TOP_COUNT</code> usage in <code class="filename">misc/suggest</code> and updated to PHP 5.3 and UTF-8</p></li>
- </ul></div></div>
- <div class="sect1" title="A.15. Version 2.1.1-beta, 20 feb 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel211"></a>A.15. Version 2.1.1-beta, 20 feb 2013</h2></div></div></div>
- <h3><a name="idp34675536"></a>Major new features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added query profiling (SET PROFILING=1 and <a class="link" href="#sphinxql-show-profile" title="8.30. SHOW PROFILE syntax">SHOW PROFILE</a> statements)</p></li>
- <li class="listitem"><p>added AOT-based Russian lemmatizer (<a class="link" href="#conf-morphology" title="12.2.6. morphology">morphology={lemmatize_ru | lemmatize_ru_all}</a>, <a class="link" href="#conf-lemmatizer-base" title="12.5.1. lemmatizer_base">lemmatizer_base</a>, and <a class="link" href="#conf-lemmatizer-cache" title="12.3.8. lemmatizer_cache">lemmatizer_cache</a> directives)</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-wordbreaker" title="7.5. wordbreaker command reference">wordbreaker</a>, a tool to split compounds into individual words</p></li>
- <li class="listitem"><p>added JSON attributes support (<a class="link" href="#conf-sql-attr-json" title="12.1.24. sql_attr_json">sql_attr_json</a>, <a class="link" href="#conf-on-json-attr-error" title="12.5.2. on_json_attr_error">on_json_attr_error</a>, <a class="link" href="#conf-json-autoconv-numbers" title="12.5.3. json_autoconv_numbers">json_autoconv_numbers</a>, <a class="link" href="#conf-json-autoconv-keynames" title="12.5.4. json_autoconv_keynames">json_autoconv_keynames</a> directives)</p></li>
- <li class="listitem"><p>added initial subselects support, SELECT * FROM (SELECT ... ORDER BY cond1 LIMIT X) ORDER BY cond2 LIMIT Y</p></li>
- <li class="listitem"><p>added bigram indexing, and phrase searching with bigrams (<a class="link" href="#conf-bigram-index" title="12.2.62. bigram_index">bigram_index</a>, <a class="link" href="#conf-bigram-freq-words" title="12.2.61. bigram_freq_words">bigram_freq_words</a> directives)</p></li>
- <li class="listitem"><p>added HA/LB support, ha_strategy and agent_persistent directives, SHOW AGENT STATUS statement</p></li>
- <li class="listitem"><p>added RT index optimization (<a class="link" href="#sphinxql-optimize-index" title="8.33. OPTIMIZE INDEX syntax">OPTIMIZE INDEX</a> statement, <a class="link" href="#conf-rt-merge-iops" title="12.4.42. rt_merge_iops">rt_merge_iops</a> and <a class="link" href="#conf-rt-merge-maxiosize" title="12.4.43. rt_merge_maxiosize">rt_merge_maxiosize</a> directives)</p></li>
- <li class="listitem"><p>added wildcards support to <a class="link" href="#conf-dict" title="12.2.7. dict">dict=keywords</a> (eg. "t?st*")</p></li>
- <li class="listitem"><p>added substring search support (min_infix_len=2 and above) to <a class="link" href="#conf-dict" title="12.2.7. dict">dict=keywords</a></p></li>
- </ul></div>
- <h3><a name="idp34695744"></a>New features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added --checkconfig switch to <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a> to check config file for correctness (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1395" target="_top">#1395</a>)</p></li>
- <li class="listitem"><p>added global IDF support (<a class="link" href="#conf-global-idf" title="12.2.66. global_idf">global_idf</a> directive, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION global_idf</a>)</p></li>
- <li class="listitem"><p>added "term1 term2 term3"/0.5 <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">quorum fraction syntax</a> (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1372" target="_top">#1372</a>)</p></li>
- <li class="listitem"><p>added an option to apply stopwords before morphology, <a class="link" href="#conf-stopwords-unstemmed" title="12.2.65. stopwords_unstemmed">stopwords_unstemmed</a> directive</p></li>
- <li class="listitem"><p>added an alternative method to compute keyword IDFs, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION idf=plain</a></p></li>
- <li class="listitem"><p>added boolean query optimizations, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION boolean_simplify=1</a> (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1294" target="_top">#1294</a>)</p></li>
- <li class="listitem"><p>added stringptr return type support to UDFs, and <a class="link" href="#sphinxql-create-function" title="8.18. CREATE FUNCTION syntax">CREATE FUNCTION ... RETURNS STRING syntax</a></p></li>
- <li class="listitem"><p>added early query termination by predicted execution time (<a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION max_predicted_time</a>, and <a class="link" href="#conf-predicted-time-costs" title="12.4.44. predicted_time_costs">predicted_time_costs</a> directive)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-index-field-lengths" title="12.2.63. index_field_lengths">index_field_lengths</a> directive, BM25A() and BM25F() functions to <a class="link" href="#expression-ranker" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)">expression ranker</a></p></li>
- <li class="listitem"><p>added ranker=export, and <a class="link" href="#expr-func-packedfactors">PACKEDFACTORS()</a> function</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION agent_query_timeout</a></p></li>
- <li class="listitem"><p>added support for attribute files over 4 GB (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1274" target="_top">#1274</a>)</p></li>
- <li class="listitem"><p>added addr2line output to crash reports (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1265" target="_top">#1265</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">OPTION ignore_nonexistent_columns</a> to UPDATE, and a respective <a class="link" href="#api-func-updateatttributes" title="9.7.2. UpdateAttributes">UpdateAttributes()</a> argument</p></li>
- <li class="listitem"><p>added --keep-attrs switch to <a class="link" href="#ref-indexer" title="7.1. indexer command reference">indexer</a></p></li>
- <li class="listitem"><p>added --with-static-mysql, --with-static-pgsql switches to configure</p></li>
- <li class="listitem"><p>added double-buffering for RT <a class="link" href="#sphinxql-insert" title="8.6. INSERT and REPLACE syntax">INSERTs</a> (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1200" target="_top">#1200</a>)</p></li>
- <li class="listitem"><p>added --morph, --dumpdict switch to <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a></p></li>
- <li class="listitem"><p>added support for multiple wordforms files, comment syntax, and pre/post-morphology <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ZONESPANLIST()</a> builtin function</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-regexp-filter" title="12.2.64. regexp_filter">regexp_filter</a> directive, regexp document/query filtering support (uses RE2)</p></li>
- <li class="listitem"><p>added min_idf, max_idf, sum_idf <a class="link" href="#expression-ranker" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)">ranking factors</a></p></li>
- <li class="listitem"><p>added uservars persistence, and <a class="link" href="#conf-sphinxql-state" title="12.4.38. sphinxql_state">sphinxql_state</a> directive (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1132" target="_top">#1132</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-func-poly2d">POLY2D</a>, <a class="link" href="#expr-func-geopoly2d">GEOPOLY2D</a>, <a class="link" href="#expr-func-contains">CONTAINS</a> functions</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">ZONESPAN</a> operator</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-snippets-file-prefix" title="12.4.28. snippets_file_prefix">snippets_file_prefix</a> directive</p></li>
- <li class="listitem"><p>added Arabic stemmer, <a class="link" href="#conf-morphology" title="12.2.6. morphology">morphology=stem_ar</a> directive (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=519" target="_top">#519</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">OPTION sort_method={pq | kbuffer}</a>, an alternative match sorting method</p></li>
- <li class="listitem"><p>added SPZ (<a class="link" href="#conf-index-sp" title="12.2.8. index_sp">sentence, paragraph</a>, <a class="link" href="#conf-index-zones" title="12.2.9. index_zones">zone</a>) support to RT indexes</p></li>
- <li class="listitem"><p>added support for upto 255 keywords in <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">quorum operator</a> (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1030" target="_top">#1030</a>)</p></li>
- <li class="listitem"><p>added multi-threaded agent querying (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1000" target="_top">#1000</a>)</p></li>
- </ul></div>
- <h3><a name="idp34748624"></a>New SphinxQL features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#sphinxql-show-index-status" title="8.31. SHOW INDEX STATUS syntax">SHOW INDEX indexname STATUS</a> statement</p></li>
- <li class="listitem"><p>added LIKE clause support to multiple SHOW xxx statements</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SNIPPET()</a> function</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP_CONCAT()</a> aggregate function</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUPBY()</a> builtin function</p></li>
- <li class="listitem"><p>added iostats and cpustats to <a class="link" href="#sphinxql-show-meta" title="8.3. SHOW META syntax">SHOW META</a></p></li>
- <li class="listitem"><p>added support for <a class="link" href="#sphinxql-delete" title="8.8. DELETE syntax">DELETE</a> statement over distributed indexes (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1104" target="_top">#1104</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">EXIST('attr_name', default_value)</a> builtin function (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1037" target="_top">#1037</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-show-variables" title="8.20. SHOW VARIABLES syntax">SHOW VARIABLES WHERE variable_name='xxx'</a> syntax</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-truncate-rtindex" title="8.28. TRUNCATE RTINDEX syntax">TRUNCATE RTINDEX</a> statement</p></li>
- </ul></div>
- <h3><a name="idp34764416"></a>Major behavior changes and optimizations</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>changed that UDFs are now allowed in fork/prefork modes via <a class="link" href="#conf-sphinxql-state" title="12.4.38. sphinxql_state">sphinxql_state</a> startup script</p></li>
- <li class="listitem"><p>changed that compat_sphinxql_magics now defaults to 0</p></li>
- <li class="listitem"><p>changed that small enough exceptions, wordforms, stopwords files are now embedded into the index header</p></li>
- <li class="listitem"><p>changed that <a class="link" href="#conf-rt-mem-limit" title="12.2.49. rt_mem_limit">rt_mem_limit</a> can now be over 2 GB (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1059" target="_top">#1059</a>)</p></li>
- <li class="listitem"><p>optimized tokenizer (upto 1.25x indexing and snippets speedup)</p></li>
- <li class="listitem"><p>optimized multi-keyword searching (added skiplists)</p></li>
- <li class="listitem"><p>optimized filtering and scan in several frequent cases (single-value, 2-arg, 3-arg WHERE clauses)</p></li>
- </ul></div></div>
- <div class="sect1" title="A.16. Version 2.0.11-dev, xx xxx xxxx"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel2011"></a>A.16. Version 2.0.11-dev, xx xxx xxxx</h2></div></div></div>
- <h3><a name="idp34772880"></a>Bug fixes</h3></div>
- <div class="sect1" title="A.17. Version 2.0.10-release, 22 jan 2014"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel2010"></a>A.17. Version 2.0.10-release, 22 jan 2014</h2></div></div></div>
- <h3><a name="idp34774464"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1778" target="_top">#1778</a>, <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">SENTENCE and PARAGRAPH</a> operators and infix stars clash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1774" target="_top">#1774</a>, stack overflow on parsing large expressions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1744" target="_top">#1744</a>, daemon failed to write to log file bigger than 4G</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1705" target="_top">#1705</a>, expression ranker handling of indexes with more than 32 fields</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1700" target="_top">#1700</a>, crash and cutoff in fullscan <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">reverse_scan=1</a> queries</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1698" target="_top">#1698</a>, proper handling of stopword with blended chars</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1682" target="_top">#1682</a>, field end modifier and <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> clash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1678" target="_top">#1678</a>, memory leak in SUM() function of an expression ranker</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1670" target="_top">#1670</a>, updating of MVA attributes in distributed indexes via API</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1662" target="_top">#1662</a>, <a class="link" href="#api-func-escapestring" title="9.7.4. EscapeString">EscapeString()</a> API escapes '<' too now</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1520" target="_top">#1520</a>, <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a> API documentation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1491" target="_top">#1491</a>, documentation: space character is prohibited in <a class="link" href="#conf-charset-table" title="12.2.16. charset_table">charset_table</a></p></li>
- <li class="listitem"><p>fixed memory leak in expressions with max_window_hits</p></li>
- <li class="listitem"><p>fixed <a class="link" href="#conf-rt-flush-period" title="12.4.33. rt_flush_period">rt_flush_period</a> - less stricter internal check and more often flushes overall</p></li>
- </ul></div></div>
- <div class="sect1" title="A.18. Version 2.0.9-release, 26 aug 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel209"></a>A.18. Version 2.0.9-release, 26 aug 2013</h2></div></div></div>
- <h3><a name="idp34799056"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1655" target="_top">#1655</a>, special characters like ()?* were not processed correctly by exceptions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1651" target="_top">#1651</a>, <a class="link" href="#sphinxql-create-function" title="8.18. CREATE FUNCTION syntax">CREATE FUNCTION</a> can now be used with BIGINT return type</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1649" target="_top">#1649</a>, incorrect warning message (about statistics mismatch) was returned when mixing wildcards and regular keywords</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1603" target="_top">#1603</a>, passing MVA64 arguments to non-MVA functions caused unpredicted behavior and crashes (now explicitly forbidden)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1601" target="_top">#1601</a>, negative numbers in <a class="link" href="#expr-func-in">IN()</a> clause caused a syntax error</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1581" target="_top">#1581</a>, <a class="link" href="#conf-dict" title="12.2.7. dict">dict=keywords</a> and <a class="link" href="#conf-sql-joined-field" title="12.1.13. sql_joined_field">sql_joined_field</a> occasionally caused <code class="filename">indexer</code> to build corrupted indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1546" target="_top">#1546</a>, file descriptor leaked on index rotation (that eventually prevented <code class="filename">searchd</code> to reload indexes)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1537" target="_top">#1537</a>, <code class="code">COUNT(*)</code> and compat_sphinxql_magics=0 via SphinxAPI caused an incorrect error message</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1531" target="_top">#1531</a>, #1589, several matching and highlighting issues when using both <a class="link" href="#conf-blend-chars" title="12.2.47. blend_chars">blend_chars</a> and multi-wordforms</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1521" target="_top">#1521</a>, <code class="filename">indextool --check</code> did not handle empty RT MVA and gave an incorrect warning</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1392" target="_top">#1392</a>, SphinxSE builds with MySQL 5.6 now</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1346" target="_top">#1346</a>, <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">NEAR</a> handles duplicated keywords properly now</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=757" target="_top">#757</a>, wordforms shared between multiple indexes with different tokenizer settings failed to load (they now load with a warning)</p></li>
- <li class="listitem"><p>fixed that batch queries did not batch in some cases (because of internal expression alias issues)</p></li>
- <li class="listitem"><p>fixed that <a class="link" href="#sphinxql-call-keywords" title="8.15. CALL KEYWORDS syntax">CALL KEYWORDS</a> occasionally gave incorrect error messages</p></li>
- <li class="listitem"><p>fixed searchd crashes on <a class="link" href="#sphinxql-attach-index" title="8.25. ATTACH INDEX syntax">ATTACHing</a> plain indexes with MVAs</p></li>
- <li class="listitem"><p>fixed several deadlocks and other threading issues</p></li>
- <li class="listitem"><p>fixed incorrect sorting order with <a class="link" href="#collations" title="5.12. Collations">utf8_general_ci</a></p></li>
- <li class="listitem"><p>fixed that in some cases incorrect attribute values were returned when using expression aliases</p></li>
- <li class="listitem"><p>optimized <a class="link" href="#xmlpipe2" title="3.9. xmlpipe2 data source">xmlpipe2</a> indexing</p></li>
- <li class="listitem"><p>added a warning for missed stopwords, exception, wordforms files on index load and in <code class="filename">indextool --check</code></p></li>
- </ul></div></div>
- <div class="sect1" title="A.19. Version 2.0.8-release, 26 apr 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel208"></a>A.19. Version 2.0.8-release, 26 apr 2013</h2></div></div></div>
- <h3><a name="idp34834576"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1515" target="_top">#1515</a>, log strings over 2KB were clipped when <a class="link" href="#conf-query-log-format" title="12.4.4. query_log_format">query_log_format=plain</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1514" target="_top">#1514</a>, RT index disk chunk lose attribute update on daemon restart</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1512" target="_top">#1512</a>, crash while formatting log messages</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1511" target="_top">#1511</a>, crash on indexing PostgreSQL data source with <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">MVA</a> attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1509" target="_top">#1509</a>, <a class="link" href="#conf-blend-chars" title="12.2.47. blend_chars">blend_chars</a> vs incomplete multi-form and overshort</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1504" target="_top">#1504</a>, RT binlog replay vs descending tid on update</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1499" target="_top">#1499</a>, <code class="option">sql_field_str2wordcount</code> actually is int, not string</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1498" target="_top">#1498</a>, now working with exceptions starting with number too</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1496" target="_top">#1496</a>, multiple destination keywords in wordform</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1494" target="_top">#1494</a>, lost 'mod', '%' operations in select list. Also corrected few typers in the doc.</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1490" target="_top">#1490</a>, <a class="link" href="#conf-expand-keywords" title="12.2.46. expand_keywords">expand_keywords</a> vs prefix</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1487" target="_top">#1487</a>, `id` in expression fixed</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1483" target="_top">#1483</a>, snippets limits fix</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1481" target="_top">#1481</a>, shebang config changes check on rotation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1479" target="_top">#1479</a>, port handling in <a class="link" href="#api-reference" title="Chapter 9. API reference">PHP Sphinx API</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1474" target="_top">#1474</a>, daemon crash at SphinxQL packet overflows <a class="link" href="#conf-max-packet-size" title="12.4.13. max_packet_size">max_packet_size</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1472" target="_top">#1472</a>, crash on loading index to <code class="filename">indextool</code> for check</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1465" target="_top">#1465</a>, <a class="link" href="#conf-expansion-limit" title="12.4.35. expansion_limit">expansion_limit</a> got lost in index rotation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1427" target="_top">#1427</a>, #1506, utf8 3 and 4-bytes codepoints</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1405" target="_top">#1405</a>, between with mixed int float values</p></li>
- </ul></div></div>
- <div class="sect1" title="A.20. Version 2.0.7-release, 26 mar 2013"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel207"></a>A.20. Version 2.0.7-release, 26 mar 2013</h2></div></div></div>
- <h3><a name="idp34869328"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1475" target="_top">#1475</a>, memory leak in the expression parser</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1457" target="_top">#1457</a>, error messages over 2KB were clipped</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1454" target="_top">#1454</a>, searchd did not display an error message when the binlog path did not exist</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1441" target="_top">#1441</a>, SHOW META in a query batch was returning the last non-batch error</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1435" target="_top">#1435</a>, typo in the documentation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1430" target="_top">#1430</a>, rt_flush_period now works even with a disabled binlog</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1427" target="_top">#1427</a>, overlong 4-byte UTF-8 codes in source text could cause indexer crashes or index corruption</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1418" target="_top">#1418</a>, warnings from local index searches were lost with dist_threads>0</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1417" target="_top">#1417</a>, crash handler now works on searchd startup stage, too (eg. to report index load time crashes)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1410" target="_top">#1410</a>, bad numerics like '123abc' now result in a proper SphinxQL error message</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1404" target="_top">#1404</a>, a tiny memory leak in shared mutex</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1394" target="_top">#1394</a>, race in --iostats caused incorrect I/O statistics in threaded modes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1391" target="_top">#1391</a>, QUORUM operator vs docinfo=inline returned wrong attribute values</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1389" target="_top">#1389</a>, edge case in the ORDER operator caused occasionally searchd crashes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1382" target="_top">#1382</a>, query parts with field limits but without real keywords (like '@name {') are now simply ignored and no longer cause a query syntax error</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1370" target="_top">#1370</a>, Windows indexer builds failed to fetch rows from MSSQL 2012</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1368" target="_top">#1368</a>, ORDER BY RAND() did not work in RT indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1364" target="_top">#1364</a>, queries with hitless words could occasionally crash searchd</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1363" target="_top">#1363</a>, '*' in charset_table was causing query syntax errors with enable_star=1</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1353" target="_top">#1353</a>, added filtering by 'id' syntax (in addition to '@id') to SphinxSE</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1346" target="_top">#1346</a>, fixed NEAR operator behavior vs duplicated keywords</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1345" target="_top">#1345</a>, invalid PROXIMITY operator threshold now causes a query syntax error rather than unexpected search behavior</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1343" target="_top">#1343</a>, misconfigured indexes with 0 full text fields are now explicitly forbidden</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1342" target="_top">#1342</a>, specific error messages (from the preload stage) went missing when failing to load the indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1339" target="_top">#1339</a>, no warning on inconsistent word statistics</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1335" target="_top">#1335</a>, typo in searchd help screen</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1334" target="_top">#1334</a>, typo in SELECT documentation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1316" target="_top">#1316</a>, PHRASE operator did not match in a rare self-repeating document/query case</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1297" target="_top">#1297</a>, letting queries complete gracefully instead of killing them off in seamless_rotate=1, workers=prefork case</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1295" target="_top">#1295</a>, mentioned index naming requirements (proper identifier) in the FROM clause docs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1221" target="_top">#1221</a>, incorrect results when using @groupby in select list via SphinxAPI with compat_sphinxql_magics=0</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1180" target="_top">#1180</a>, special SPZ chars occasionally leaking into snippets</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1171" target="_top">#1171</a>, preforked children did not reload logs on SIGUSR1</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1150" target="_top">#1150</a>, added support for `id` syntax in DELETE and parents in WHERE</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1135" target="_top">#1135</a>, crashes when using MVA/strings attributes in expression ranker</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1124" target="_top">#1124</a>, corrupted attributes after merging with an empty index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1090" target="_top">#1090</a>, SphinxSE snippets UDF updated to support MySQL 5.5</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1041" target="_top">#1041</a>, added initial support for MVA updates (and other mutex protected things) on FreeBSD</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=999" target="_top">#999</a>, fullscan returned empty result sets in mixed batches of fullscan and fulltext queries</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=921" target="_top">#921</a>, document count/bytes 32bit overflow in indexer progress output</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=539" target="_top">#539</a>, added processing suffix rules with dots in .affix file to spelldump</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=481" target="_top">#481</a>, rotation did not work on Windows with preopen=1</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=268" target="_top">#268</a>, added warnings about duplicate elements in xmlpipe2</p></li>
- <li class="listitem"><p>fixed CSphStaticMutex (double initialization issue)</p></li>
- <li class="listitem"><p>fixed documentation typo in SQL data sources</p></li>
- <li class="listitem"><p>fixed too-late initialization of mutex at daemon</p></li>
- <li class="listitem"><p>fixed that an instance of searchd resurrected by watchdog could leak resources and/or crash</p></li>
- <li class="listitem"><p>added a console message about crashes during index loading at startup</p></li>
- <li class="listitem"><p>added more debug info about failed index loading</p></li>
- </ul></div></div>
- <div class="sect1" title="A.21. Version 2.0.6-release, 22 oct 2012"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel206"></a>A.21. Version 2.0.6-release, 22 oct 2012</h2></div></div></div>
- <h3><a name="idp34932704"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1322" target="_top">#1322</a>, J connector seems to be broken in rel20 , but works in trunk</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1321" target="_top">#1321</a>, 'set names utf8' passes, but 'set names utf-8' doesn't because of syntax error '-'</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1318" target="_top">#1318</a>, unhandled float comparison operators at filter</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1317" target="_top">#1317</a>, FD leaks on thread seamless rotation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1313" target="_top">#1313</a>, crash on stopping daemon with incorrect RT index config</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1306" target="_top">#1306</a>, 'jolly roger ;)', and '(((((((((9 brackets)' crashes <code class="filename">searchd</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1304" target="_top">#1304</a>, OS X debug compilation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1302" target="_top">#1302</a>, daemon random crashes on OS X</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1301" target="_top">#1301</a>, <code class="filename">indexer</code> fails to send rotate signal</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1300" target="_top">#1300</a>, lost index settings on attach</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1299" target="_top">#1299</a>, daemon failed to rotate <a class="link" href="#sphinxql-attach-index" title="8.25. ATTACH INDEX syntax">ATTACH</a>ed plain index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1289" target="_top">#1289</a>, <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">SENTENCE</a> or <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">PARAGRAPH</a> searching leak memory</p></li>
- <li class="listitem"><p>fixes #1285, crash on running <code class="filename">searchd</code> with <code class="filename">syslog</code> and <code class="filename">watchdog</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1279" target="_top">#1279</a>, linking against explicitly disabled iconv. Also added <code class="code">--with-libexpat</code> to config options, which sometimes required on systems without XML support</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1278" target="_top">#1278</a>, broken <a class="link" href="#conf-odbc-dsn" title="12.1.10. odbc_dsn">unixODBC</a> detection in configure script.</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1277" target="_top">#1277</a>, broken build on some toolchains (like uClibc) where not defined <code class="code">LLONG_MIN</code>, added <code class="code">ULLONG_MAX</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1274" target="_top">#1274</a>, large <code class="filename">spa</code> ( >4GB ) file hasn't loaded</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1269" target="_top">#1269</a>, crash at RT index with <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">MVA</a> from disk chunk previously updated</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1268" target="_top">#1268</a>, unuseful warning removed</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1264" target="_top">#1264</a>, string and MVA attributes aliasing works again</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1254" target="_top">#1254</a>, its now possible to add indexes using <a class="link" href="#ref-indexer" title="7.1. indexer command reference">--rotate</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1249" target="_top">#1249</a>, <a class="link" href="#sphinxql-reference" title="Chapter 8. SphinxQL reference">SphinxQL</a> unusable with PHP >= 5.4.5</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1246" target="_top">#1246</a>, attributes of 100 character length not being saved</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1234" target="_top">#1234</a>, case sensitive <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">GROUP BY</a> attribute</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1216" target="_top">#1216</a>, typos, <a class="link" href="#conf-mem-limit" title="12.3.1. mem_limit">mem_limit</a> default size and <a class="link" href="#rt-indexes" title="Chapter 4. Real-time indexes">RT documentation</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1148" target="_top">#1148</a>, RT documentation updated</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1140" target="_top">#1140</a>, mem_limit default value</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1138" target="_top">#1138</a>, updated documentation on <a class="link" href="#conf-sql-attr-string" title="12.1.23. sql_attr_string">sql_attr_string</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1129" target="_top">#1129</a>, snippets vs empty files and empty filenames</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1123" target="_top">#1123</a>, configure compatibility fix</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1122" target="_top">#1122</a>, 64bit <a class="link" href="#conf-sql-range-step" title="12.1.15. sql_range_step">sql_range_step</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1082" target="_top">#1082</a>, crashes and deadlocks on OS X with <code class="code">workers=threads</code> and field leak of read-write lock</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1081" target="_top">#1081</a>, select only count distinct attr1 but group by attr2</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1064" target="_top">#1064</a>, mistake while working with timestamp functions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1043" target="_top">#1043</a>, inaccurate distinct count in case many indexes or distributed index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1042" target="_top">#1042</a>, arithmetic expressions overflow</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1007" target="_top">#1007</a>, Russian stemming on big endian systems</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=986" target="_top">#986</a>, asserting in <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SetRankingMode</a> (PHP API)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=975" target="_top">#975</a>, incorrect ranking in some rare cases</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=967" target="_top">#967</a>, Python API type checking error</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=934" target="_top">#934</a>, API vs fullscan vs non-empty query</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=899" target="_top">#899</a>, error if using <a class="link" href="#api-func-setfilterrange" title="9.4.3. SetFilterRange">SetFilterRange</a> as HAVING from SQL</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=867" target="_top">#867</a>, <code class="filename">indexer</code> accepts index names starting with digit or _</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=699" target="_top">#699</a>, signed vs unsigned 64-bit DocIDs in SphinxQL</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=668" target="_top">#668</a>, now ignoring single @ character (incorrect field operator)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=611" target="_top">#611</a>, @! operator vs non-existent field, updated documentation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=412" target="_top">#412</a>, multiple <code class="code">--filter</code> arguments work as they should in search utility</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=108" target="_top">#108</a>, support for system libstemmer library. The sources of libstemmer placed into <code class="filename">libstemmer_c</code> is preferred, but the system lib will be tried if no sources found</p></li>
- <li class="listitem"><p>fixed <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">ORDER BY</a> output at query log with SphinxQL mode</p></li>
- <li class="listitem"><p>fixed documentation entry about <a class="link" href="#conf-sql-joined-field" title="12.1.13. sql_joined_field">sql_joined_field</a></p></li>
- <li class="listitem"><p>fixed sample config file</p></li>
- <li class="listitem"><p>fixed x64 configurations for libstemmer</p></li>
- </ul></div></div>
- <div class="sect1" title="A.22. Version 2.0.5-release, 28 jul 2012"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel205"></a>A.22. Version 2.0.5-release, 28 jul 2012</h2></div></div></div>
- <h3><a name="idp35019888"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1258" target="_top">#1258</a>, <code class="code">xmlpipe2</code> refused to index indexes with <code class="code">docinfo=inline</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1257" target="_top">#1257</a>, legacy groupby modes vs <code class="code">dist_threads</code> could occasionally return wrong search results (race condition)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1253" target="_top">#1253</a>, missing single-word query performance optimization (simplified ranker) vs prefix-expanded keywords vs <code class="code">dict=keywords</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1252" target="_top">#1252</a>, COUNT(*) vs <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a> could occasionally crash (race condition)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1251" target="_top">#1251</a>, missing expression support in the <a class="link" href="#expr-func-in">IN()</a> function</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1245" target="_top">#1245</a>, <a class="link" href="#api-func-flushattributes" title="9.7.6. FlushAttributes">FlushAttributes</a> mistakenly disabled by <a class="link" href="#conf-attr-flush-period" title="12.4.12. attr_flush_period">attr_flush_period=0</a> setting</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1244" target="_top">#1244</a>, per-API-command (search, update, etc) statistics were not updated by SphinxQL requests</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1243" target="_top">#1243</a>, misc issues (broken statistics, weights, checks) with very long keywords having blended parts in RT indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1240" target="_top">#1240</a>, embedded <code class="code">xmlpipe2</code> schema with more attributes than the <code class="code">sphinx.conf</code> one caused <code class="filename">indexer</code> to crash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1239" target="_top">#1239</a>, memory leak when optimizing <code class="code">ABS(const)</code> and other 1-arg functions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1228" target="_top">#1228</a>, #761, #1183, #1190, #1198, misc issues occasonally caused by MVA updates (crash on SaveAttributes; index rotation vs index name and TID; looped MVA updates; persistent MVA removal on rotation)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1227" target="_top">#1227</a>, API queries with <code class="code">SetGeoAnchor()</code> were logged incorrectly in SphinxQL-format query logs (<code class="code">query_log_format=sphinxql</code>)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1214" target="_top">#1214</a>, phrase query parsing issues when <a class="link" href="#conf-blend-chars" title="12.2.47. blend_chars">blend_chars</a> contained a quote (") symbol</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1213" target="_top">#1213</a>, attribute aliases were not recognized by the subsequent <code class="code">SELECT</code> items</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1212" target="_top">#1212</a>, <a class="link" href="#ref-indextool" title="7.4. indextool command reference"><code class="filename">indextool</code></a> failed to check hitless keywords</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1210" target="_top">#1210</a>, crash when indexing an index with joined fields only (no regular fields)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1209" target="_top">#1209</a>, <code class="code">xmlpipe_fixup_utf8</code> off by a byte on certain (pretty rare) malformed sequences</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1202" target="_top">#1202</a>, various issues with <code class="code">CALL KEYWORDS</code> vs RT indexes (crashes vs <code class="code">dict=keywords</code>, missing modifiers in output)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1201" target="_top">#1201</a>, snippets vs <code class="code">query_mode=1</code> vs complex OR-queries could occasionally crash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1197" target="_top">#1197</a>, <code class="filename">indexer</code> running out of disk space could either crash, or fail to display a proper error message</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1185" target="_top">#1185</a>, keywords with wildcards were not handled when highlighting the entire document</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1184" target="_top">#1184</a>, <code class="filename">indexer</code> crash when <a class="link" href="#conf-ngram-chars" title="12.2.24. ngram_chars">ngram_chars</a> was set, but <a class="link" href="#conf-ngram-len" title="12.2.23. ngram_len">ngram_len=0</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1182" target="_top">#1182</a>, <code class="filename">indexer</code> crash on certain combinations of <a class="link" href="#conf-docinfo" title="12.2.4. docinfo"><code class="code">docinfo=inline</code></a> vs bitfields</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1181" target="_top">#1181</a>, <code class="code">GROUP BY</code> on a MVA64 was truncated at 32 bits</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1179" target="_top">#1179</a>, <code class="code">passage_boundary</code> in snippets could get ignored (when highlighting the entire document)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1178" target="_top">#1178</a>, <code class="filename">indexer</code> could crash when <code class="code">charset_table</code> specified out-of-bounds codes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1177" target="_top">#1177</a>, SPZ queries in snippets erroneously required <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">passage_boundary</a> option to be explicitly set</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1176" target="_top">#1176</a>, multi-queries with a <code class="code">GROUP/ORDER BY</code> on a string attributed crashed</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1175" target="_top">#1175</a>, connection id mismatch in SphinxQL-format query logs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1167" target="_top">#1167</a>, nested parentheses in a full-text query could mistakenly reset preceding field or zone limit operator</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1158" target="_top">#1158</a>, float range filters were not supported in a multi-query batch optimizer</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1157" target="_top">#1157</a>, broken gcc-4.7 build</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1156" target="_top">#1156</a>, empty result set instead of an error message when querying distributed indexes with compat_sphinxql_magic=1 and hitting an error</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1143" target="_top">#1143</a>, dash after a number incorrectly parsed as an operator <code class="code">NOT</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1137" target="_top">#1137</a>, <code class="filename">searchd</code> <a class="link" href="#ref-searchd" title="7.2. searchd command reference">--stopwait</a> hanged when the running instance crashed during shutdown</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1136" target="_top">#1136</a>, high idle CPU load on systems without <code class="code">pthread_timed_lock()</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1134" target="_top">#1134</a>, issues with <code class="code">prefork</code> workers on systems without <code class="code">pthread_timed_lock()</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1133" target="_top">#1133</a>, <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts"><code class="code">BuildExcerpts()</code></a> on a distributed index with <code class="code">load_files</code> did not distribute the jobs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1126" target="_top">#1126</a>, inaccurate hits sorting progress report on joined field indexing</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1121" target="_top">#1121</a>, occasional bad entries (wrong characters or invalid SQL) in SphinxQL-format query log</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1118" target="_top">#1118</a>, <code class="code">libsphinxclient</code> requests failed when using <code class="code">SPH_RANK_EXPR</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1073" target="_top">#1073</a>, improved handling of wordforms/multiforms rules referring to stopwords</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1062" target="_top">#1062</a>, bigint filter ranges truncated when searching via <a class="link" href="#sphinxql-reference" title="Chapter 8. SphinxQL reference">SphinxQL</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1052" target="_top">#1052</a>, SphinxSE range arguments with leading zeroes mistakenly parsed as octal</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1011" target="_top">#1011</a>, negative MVA64 values mistakenly converted to positive (on indexing and/or output)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=974" target="_top">#974</a>, crash when logging queries over 2048 bytes with performance counters enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=909" target="_top">#909</a>, field-end modifier was ignored when followed by a non-whitespace syntax character (eg quote or bracket)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=907" target="_top">#907</a>, issue with bigint filtering (large positive or negative values)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=906" target="_top">#906</a>, #1074, Mac OS X 10.7.3 builds (conflicting memory allocation routines in Sphinx and external libs)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=901" target="_top">#901</a>, #1066, sending bigger request packets was broken in Python API</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=879" target="_top">#879</a>, filters on weight-dependent expressions did not work correctly</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=553" target="_top">#553</a>, default/missing port value was not handled properly in <a class="link" href="#api-func-setserver" title="9.1.3. SetServer">SetServer()</a> API call</p></li>
- <li class="listitem"><p>fixed that blended vs multiforms vs <a class="link" href="#conf-min-word-len" title="12.2.15. min_word_len">min_word_len</a> could hang the query parser</p></li>
- <li class="listitem"><p>fixed missing command-line switches documentation</p></li>
- </ul></div></div>
- <div class="sect1" title="A.23. Version 2.0.4-release, 02 mar 2012"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel204"></a>A.23. Version 2.0.4-release, 02 mar 2012</h2></div></div></div>
- <h3><a name="idp35120240"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=605" target="_top">#605</a>, pack vs mysql compress</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=783" target="_top">#783</a>, #862, #917, #985, #990, #1032 documentation bugs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=885" target="_top">#885</a>, bitwise AND/OR were not available via API</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=984" target="_top">#984</a>, crash on indexing data with MAGIC_CODE_ZONE symbol</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1004" target="_top">#1004</a>, RT index loses words from dictionary on segments merging with <code class="code">id64</code> enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1035" target="_top">#1035</a>, daemon doesn't properly handle FDs in case of socket overflow FD_SETSIZE ( *nix, <code class="code">preopen_indexes=0</code>, <code class="code">worker=threads</code> )</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1038" target="_top">#1038</a>, quoted string for API select</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1046" target="_top">#1046</a>, head SPZ overflow, snippet generation at non fast with SPZ</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1048" target="_top">#1048</a>, distributed index can't sort \ filter because of missed attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1050" target="_top">#1050</a>, expression ranker vs agents</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1051" target="_top">#1051</a>, added <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">MVA64</a> support to <a class="link" href="#sphinx-udfs" title="6.1. Sphinx UDFs (User Defined Functions)">UDFs</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1054" target="_top">#1054</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">max_query_time</a> not handled properly on searching at <a class="link" href="#rt-indexes" title="Chapter 4. Real-time indexes">RT index</a></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1055" target="_top">#1055</a>, <a class="link" href="#conf-expansion-limit" title="12.4.35. expansion_limit">expansion_limit</a> on searching at RT disk chunks</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1057" target="_top">#1057</a>, daemon crashes on generating snippet with 0 documents provided</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1060" target="_top">#1060</a>, <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">load_files_scattered</a> don't work</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1065" target="_top">#1065</a>, libsphinxclient vs distribute index (agents)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1067" target="_top">#1067</a>, modifiers were not escaped in legacy query emulation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1071" target="_top">#1071</a>, master - agent communication got slower for a large query</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1076" target="_top">#1076</a>, #1077, (redundant copying, and a possible mutex leak with uservars)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1078" target="_top">#1078</a>, <code class="code">blended</code> vs FIELD_END</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1084" target="_top">#1084</a> crash \ index corruption on loading persist MVA</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1091" target="_top">#1091</a>, RT attach of plain index with string \ MVA attributes prior regular attributes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1092" target="_top">#1092</a>, update got binloged with wrong TID</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1098" target="_top">#1098</a>, crash on creating large expression</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1099" target="_top">#1099</a>, cleaning up temporary files on fail of indexing</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1100" target="_top">#1100</a>, missing <a class="link" href="#conf-xmlpipe-attr-bigint" title="12.1.35. xmlpipe_attr_bigint">xmlpipe_attr_bigint</a> config directive</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1101" target="_top">#1101</a>, now ignoring dashes within keywords when dash is not in charset_table</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1103" target="_top">#1103</a>, <code class="code">ZONE</code> operator incorrectly works on more than one keywords in a simple zone</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1106" target="_top">#1106</a>, optimized <code class="code">WHERE id=value</code>, <code class="code">WHERE id IN (values_list)</code> clauses used in <code class="code">SELECT</code>, <code class="code">UPDATE</code> statements</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1112" target="_top">#1112</a>, Sphinx doesn't work out-of-the-box because the collision of <code class="code">binlog_path</code> option</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1116" target="_top">#1116</a>, crash on <code class="code">FLUSH RTINDEX</code> unknown-index-name</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1117" target="_top">#1117</a>, occasional RT headers corruption (leading to crashes and/or missing results)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1119" target="_top">#1119</a>, missing expression ranker support in SphinxSE</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1120" target="_top">#1120</a>, negative <a class="link" href="#api-funcgroup-querying" title="9.6. Querying">total_found</a>, docs and hits counter on huge indexes</p></li>
- </ul></div></div>
- <div class="sect1" title="A.24. Version 2.0.3-release, 23 dec 2011"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel203"></a>A.24. Version 2.0.3-release, 23 dec 2011</h2></div></div></div>
- <h3><a name="idp35178576"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1031" target="_top">#1031</a>, SphinxQL parsing syntax for MVA at insert \ replace statements</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1027" target="_top">#1027</a>, stalls on attribute update in high-concurrency load</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1026" target="_top">#1026</a>, daemon crash on malformed API command</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1021" target="_top">#1021</a>, <code class="code">max_children</code> option has been ignored with <code class="code">worker=threads</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1020" target="_top">#1020</a>, crash on large attribute files loading</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1014" target="_top">#1014</a>, crash on rotation when index has been removed from config file (<code class="code">worker=threads</code>, *nix box)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=1001" target="_top">#1001</a>, broken MVA files in RT index while saving disk chunk</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=995" target="_top">#995</a>, crash on empty MVA updates</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=994" target="_top">#994</a>, crash on daemon shutdown with <code class="code">seamless_rotate=0</code> and <code class="code">workers=threads</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=993" target="_top">#993</a>, #998, crash on replay <code class="code">DELETE</code> statement vs RT index with <code class="code">dict=keywords</code>, fixed sequential <code class="code">INSERT</code> into <code class="code">dict=keywords</code> index right after <code class="code">INSERT</code> into <code class="code">dict=crc</code> index</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=991" target="_top">#991</a>, crash on indexing mssql source with <code class="code">mssql_unicode</code> enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=983" target="_top">#983</a>, #950, crash on host name lookup (SphinxSE with MySQL 5.5)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=981" target="_top">#981</a>, snippet inconsistency with <code class="code">allow_empty=0</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=980" target="_top">#980</a>, broken index produced by index merge in rare cases</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=971" target="_top">#971</a>, absent error message at master on agent "maxed out"</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=695" target="_top">#695</a>, #815, #835, #866, malformed warnings in SphinxQL</p></li>
- <li class="listitem"><p>fixed build of SphinxSE with MySQL 5.1</p></li>
- <li class="listitem"><p>fixed crash log for 'fork' and 'prefork' workers</p></li>
- </ul></div></div>
- <div class="sect1" title="A.25. Version 2.0.2-beta, 15 nov 2011"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel202"></a>A.25. Version 2.0.2-beta, 15 nov 2011</h2></div></div></div>
- <h3><a name="idp35208192"></a>Major new features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added keywords dictionary (<a class="link" href="#conf-dict" title="12.2.7. dict"><code class="code">dict=keywords</code></a>) support to RT indexes</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-rt-attr-multi" title="12.2.55. rt_attr_multi">MVA</a>, <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> support to RT indexes (#888)</p></li>
- <li class="listitem"><p>added <a class="link" href="#mva" title="3.4. MVA (multi-valued attributes)">MVA64</a> (a set of BIGINTs) support to both disk and RT indexes (<a class="link" href="#conf-rt-attr-multi-64" title="12.2.56. rt_attr_multi_64">rt_attr_multi_64</a> directive)</p></li>
- <li class="listitem"><p>added an <a class="link" href="#expression-ranker" title="5.4.3. Expression based ranker (SPH_RANK_EXPR)">expression-based ranker</a>, and a number of new ranking factors</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-attach-index" title="8.25. ATTACH INDEX syntax">ATTACH INDEX</a> statement that converts a disk index to RT index</p></li>
- <li class="listitem"><p>added <code class="code">WHERE</code> clause support to <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">UPDATE</a> statement</p></li>
- <li class="listitem"><p>added <code class="code">bigint</code>, <code class="code">float</code>, and <code class="code">MVA</code> attribute support to <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">UPDATE</a> statement</p></li>
- </ul></div>
- <h3><a name="idp35222400"></a>New features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added support for upto <a class="link" href="#fields" title="3.2. Full-text fields">256 searchable fields</a> (was upto 32 before)</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-func-fibonacci"><code class="code">FIBONACCI()</code></a> function to <a class="link" href="#expressions" title="5.5. Expressions, functions, and operators">expressions</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">load_files_scattered option</a> to snippets</p></li>
- <li class="listitem"><p>added implicit attribute type promotions in multi-index result sets (#939)</p></li>
- <li class="listitem"><p>added index names to <code class="filename">indexer</code> progress message on merge (#928)</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-searchd" title="7.2. searchd command reference"><code class="option">--replay-flags</code></a> switch to <code class="filename">searchd</code></p></li>
- <li class="listitem"><p>added string attribute support and a few previously missing <a class="link" href="#sphinxse-snippets" title="10.4. Building snippets (excerpts) via MySQL">snippets options</a> to SphinxSE</p></li>
- <li class="listitem"><p>added previously missing <a class="link" href="#api-func-status" title="9.7.5. Status"><code class="code">Status()</code></a>, <a class="link" href="#api-func-setconnecttimeout" title="9.1.5. SetConnectTimeout"><code class="code">SetConnectTimeout()</code></a> API calls to Python API</p></li>
- <li class="listitem"><p>added <code class="code">ORDER BY RAND()</code> support to <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT</a> statement</p></li>
- <li class="listitem"><p>added Sphinx version to Windows crash log</p></li>
- <li class="listitem"><p>added RT index support to <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a> <code class="code">--check</code> (checks disk chunks only) (#877)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-prefork-rotation-throttle" title="12.4.37. prefork_rotation_throttle">prefork_rotation_throttle</a> directive (preforked children restart delay, in milliseconds) (#873)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-on-file-field-error" title="12.3.7. on_file_field_error">on_file_field_error</a> directive (different <code class="code">sql_file_field</code> handling modes)</p></li>
- <li class="listitem"><p>added manpages for all the programs</p></li>
- <li class="listitem"><p>added syslog logging support</p></li>
- <li class="listitem"><p>added sentence, paragraph, and zone support in <code class="code">html_strip_mode=retain</code> mode to snippets</p></li>
- <li class="listitem"><p>optimized search performance with many <code class="code">ZONE</code> operators</p></li>
- <li class="listitem"><p>improved suggestion tool (added Levenshtein limit, removed extra DB fetch)</p></li>
- <li class="listitem"><p>improved <a class="link" href="#conf-index-sp" title="12.2.8. index_sp">sentence extraction</a> (handles salutations, starting initials better now)</p></li>
- <li class="listitem"><p>changed <a class="link" href="#conf-max-filter-values" title="12.4.16. max_filter_values">max_filter_values</a> sanity check to 10M values</p></li>
- </ul></div>
- <h3><a name="idp35250832"></a>New SphinxQL features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#sphinxql-flush-rtindex" title="8.26. FLUSH RTINDEX syntax">FLUSH RTINDEX</a> statement</p></li>
- <li class="listitem"><p>added <code class="code">dist_threads</code> directive (parallel processing), <code class="code">load_files</code>, <code class="code">load_files_scattered</code>, batch syntax (multiple documents) support to <a class="link" href="#sphinxql-call-snippets" title="8.14. CALL SNIPPETS syntax">CALL SNIPPETS</a> statement</p></li>
- <li class="listitem"><p>added <code class="code">OPTION comment='...'</code> support to <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT</a> statement (#944)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-show-variables" title="8.20. SHOW VARIABLES syntax">SHOW VARIABLES</a> statement</p></li>
- <li class="listitem"><p>added dummy handlers for <a class="link" href="#sphinxql-set-transaction" title="8.10. SET TRANSACTION syntax">SET TRANSACTION</a>, <a class="link" href="#sphinxql-set" title="8.9. SET syntax">SET NAMES</a>, <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT @@sysvar</a> statements, and for <code class="code">sql_auto_is_null</code>, <code class="code">sql_mode</code>, and @@-style variables (like @@tx_isolation) in <a class="link" href="#sphinxql-set" title="8.9. SET syntax">SET</a> statement (better MySQL frameworks/connectors support)</p></li>
- <li class="listitem"><p>added complete <a class="link" href="#sphinxql-log-format" title="5.9.2. SphinxQL log format">SphinxQL error logging</a> (all errors are logged now, not just <code class="code">SELECT</code>s)</p></li>
- <li class="listitem"><p>improved <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT</a> statement syntax, made expressions aliases optional</p></li>
- </ul></div>
- <h3><a name="idp35267216"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=982" target="_top">#982</a>, empty binlogs prevented upgraded daemon from starting up</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=978" target="_top">#978</a>, libsphinxclient build failed on sparc/sparc64 solaris</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=977" target="_top">#977</a>, eliminated (most) compiler warnings</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=969" target="_top">#969</a>, broken expression MVA/string argument type check prevented IF(IN(mva..)) and other valid expressions from working</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=966" target="_top">#966</a>, NOT IN @global_var syntax was not supported</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=958" target="_top">#958</a>, mem_limit over INT_MAX was not clamped</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=954" target="_top">#954</a>, UTF-8 snippets could crash on malformed data</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=951" target="_top">#951</a>, UTF-8 snippets could hang on malformed data</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=947" target="_top">#947</a>, bad float column type was reported via SphinxQL, breaking some clients</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=940" target="_top">#940</a>, group-by with a small enough <code class="code">max_matches</code> limit could occasionaly crash and/or sort wrongly</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=932" target="_top">#932</a>, sending huge queries to agents occasionally failed (mainly on Windows)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=926" target="_top">#926</a>, snippets did not highlight widlcard matches with morphology enabled</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=918" target="_top">#918</a>, crash logger did not report a proper query in <code class="code">dist_threads</code> case</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=916" target="_top">#916</a>, watchdog caused (endless) respawns if there was a crash during shutdown</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=904" target="_top">#904</a>, attribute names were not forcibly case-folded in some API calls (eg. <code class="code">SetGroupDistinct</code>)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=902" target="_top">#902</a>, query parser did not support <code class="code">stopword_step=0</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=897" target="_top">#897</a>, network sockets dangled (open but unattended) while replaying binlog</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=855" target="_top">#855</a>, <code class="code">allow_empty</code> option in snippets did not always work correctly</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=854" target="_top">#854</a>, indexing with many <code class="code">bigint</code> attributes and <code class="code">docinfo=inline</code> crashed</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=838" target="_top">#838</a>, RT MVA insertion did not sort MVA values, caused matching issues</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=833" target="_top">#833</a>, duplicate MVA values were not eliminated on update</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=832" target="_top">#832</a>, certain (overshort/incorrect) documents crashed indexing MS SQL Unicode columns</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=829" target="_top">#829</a>, query parser did not properly handle numerics with <code class="code">blend_chars</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=814" target="_top">#814</a>, group-by string attributes in RT indexes dit not always work correctly</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=812" target="_top">#812</a>, utf8 stemming produced unexpected stems on words with single-byte chars</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=808" target="_top">#808</a>, huge queries crashed logging with <code class="code">query_log_format=sphinxql</code></p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=806" target="_top">#806</a>, stray single-star keyword crashed on querying</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=798" target="_top">#798</a>, snippets ignored <code class="code">index_exact_words</code> in query_mode</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=797" target="_top">#797</a>, RT klist loader had an occasional off-by-one crash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=791" target="_top">#791</a>, <code class="code">preopen_indexes</code> erroneously defaulted to 0 on Windows</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=790" target="_top">#790</a>, huge dictionaries (over 4 GB) did not work</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=786" target="_top">#786</a>, <code class="code">inplace_enable</code> could occasionally corrupt the indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=775" target="_top">#775</a>, doc had a typo (soundex vs metaphone)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=772" target="_top">#772</a>, snippets duplicated blended chars on a SPZ boundary</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=762" target="_top">#762</a>, query parser truncated digit-only keywords over 15 digits</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=736" target="_top">#736</a>, query parser dit not properly handle blended/special char sequence</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=726" target="_top">#726</a>, rotation of an index with a changed attribute count crashed</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=687" target="_top">#687</a>, querying multiple indexes with index weights and sort-by expression produced incorrect (unadjusted) weights</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=585" target="_top">#585</a>, (unsupported) string ordinals were silently zeroed out with <code class="code">docinfo=inline</code> (instead of failing)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=583" target="_top">#583</a>, certain keywords could occasionally crash multiforms</p></li>
- <li class="listitem"><p>fixed that concurrent MVA updates could crash</p></li>
- <li class="listitem"><p>fixed that query parser did not ignore a pure blended token with a leading modifier</p></li>
- <li class="listitem"><p>fixed that query parser did not properly handle a modifier followed by a dash</p></li>
- <li class="listitem"><p>fixed that substring indexing with <code class="code">dict=crc</code> did not support <code class="code">index_exact_words</code> and <code class="code">zones</code></p></li>
- <li class="listitem"><p>fixed that in a rare edge case common subtree cache could crash</p></li>
- <li class="listitem"><p>fixed that empty result set returned the full schema (rather than <code class="code">SELECT</code>-ed columns)</p></li>
- <li class="listitem"><p>fixed that SphinxQL did not have a sanity check for (currently unsupported) result set schemas over 250 attributes</p></li>
- <li class="listitem"><p>fixed that updates on regular indexes were not binlogged</p></li>
- <li class="listitem"><p>fixed that multi-query optimization check for expressions did not handle multi-index case</p></li>
- <li class="listitem"><p>fixed that SphinxSE did not build vs MySQL 5.5 release</p></li>
- <li class="listitem"><p>fixed that <code class="code">proximity_bm25</code> ranker could yield incorrect weight on duplicated keywords</p></li>
- <li class="listitem"><p>fixed that prefix expansion with <code class="code">dict=keyword</code> occasionally crashed</p></li>
- <li class="listitem"><p>fixed that <code class="code">strip_path</code> did not work on RT disk chunks</p></li>
- <li class="listitem"><p>fixed that exclude filters were not properly logged in <code class="code">query_log_format=sphinxql</code> mode</p></li>
- <li class="listitem"><p>fixed that plain string attribute check in <code class="filename">indextool</code> <code class="code">--check</code> was broken</p></li>
- <li class="listitem"><p>fixed that Java API did not let specify a connection timeout</p></li>
- <li class="listitem"><p>fixed that ordinal and wordcount attributes could not be fetched via SphinxQL</p></li>
- <li class="listitem"><p>fixed that in a rare edge case <code class="code">OR/ORDER</code> would not match properly</p></li>
- <li class="listitem"><p>fixed that sending (huge) query response did not handle <code class="code">EINTR</code> properly</p></li>
- <li class="listitem"><p>fixed that <code class="code">SPH04</code> ranker could yield incorrectly high weight in some cases</p></li>
- <li class="listitem"><p>fixed that C API did not let zero out cutoff, <code class="code">max_matches</code> settings</p></li>
- <li class="listitem"><p>fixed that on a persistent connection there were occasionally issues handling signals while doing network reads/waitss</p></li>
- <li class="listitem"><p>fixed that in a rare edge case (field start modifier in a certain complex query) querying crashed</p></li>
- <li class="listitem"><p>fixed that snippets did not support <code class="code">dist_threads</code> with <code class="code">load_files=0</code></p></li>
- <li class="listitem"><p>fixed that in some extremely rare edge cases tiny parts of an index could end up corrupted with <code class="code">dict=keywords</code></p></li>
- <li class="listitem"><p>fixed that field/zone conditions were not propagated to expanded keywords with <code class="code">dict=keywords</code></p></li>
- </ul></div></div>
- <div class="sect1" title="A.26. Version 2.0.1-beta, 22 apr 2011"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel201"></a>A.26. Version 2.0.1-beta, 22 apr 2011</h2></div></div></div>
- <h3><a name="idp35350992"></a>New general features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added remapping support to <a class="link" href="#conf-blend-chars" title="12.2.47. blend_chars">blend_chars</a> directive</p></li>
- <li class="listitem"><p>added multi-threaded snippet batches support (requires a batch sent via API, <a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a>, and <code class="code">load_files</code>)</p></li>
- <li class="listitem"><p>added collations (<a class="link" href="#conf-collation-server" title="12.4.29. collation_server">collation_server</a>, <a class="link" href="#conf-collation-libc-locale" title="12.4.30. collation_libc_locale">collation_libc_locale directives</a>)</p></li>
- <li class="listitem"><p>added support for sorting and grouping on string attributes (<code class="code">ORDER BY</code>, <code class="code">GROUP BY</code>, <code class="code">WITHIN GROUP ORDER BY</code>)</p></li>
- <li class="listitem"><p>added UDF support (<a class="link" href="#conf-plugin-dir" title="12.4.31. plugin_dir">plugin_dir</a> directive; <a class="link" href="#sphinxql-create-function" title="8.18. CREATE FUNCTION syntax">CREATE FUNCTION</a>, <a class="link" href="#sphinxql-drop-function" title="8.19. DROP FUNCTION syntax">DROP FUNCTION</a> statements)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-query-log-format" title="12.4.4. query_log_format">query_log_format</a> directive, <a class="link" href="#sphinxql-set" title="8.9. SET syntax">SET GLOBAL query_log_format | log_level = ...</a> statements; and connection id tracking</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-sql-column-buffers" title="12.1.25. sql_column_buffers">sql_column_buffers</a> directive, fixed out-of-buffer column handling in ODBC/MS SQL sources</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-blend-mode" title="12.2.48. blend_mode">blend_mode</a> directive that enables indexing multiple variants of a blended sequence</p></li>
- <li class="listitem"><p>added UNIX socket support to C, Ruby APIs</p></li>
- <li class="listitem"><p>added ranged query support to <a class="link" href="#conf-sql-joined-field" title="12.1.13. sql_joined_field">sql_joined_field</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-rt-flush-period" title="12.4.33. rt_flush_period">rt_flush_period</a> directive</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-thread-stack" title="12.4.34. thread_stack">thread_stack</a> directive</p></li>
- <li class="listitem"><p>added SENTENCE, PARAGRAPH, ZONE operators (and <a class="link" href="#conf-index-sp" title="12.2.8. index_sp">index_sp</a>, <a class="link" href="#conf-index-zones" title="12.2.9. index_zones">index_zones</a> directives)</p></li>
- <li class="listitem"><p>added keywords dictionary support (and <a class="link" href="#conf-dict" title="12.2.7. dict">dict</a>, <a class="link" href="#conf-expansion-limit" title="12.4.35. expansion_limit">expansion_limit</a> directives)</p></li>
- <li class="listitem"><p>added <code class="code">passage_boundary</code>, <code class="code">emit_zones</code> options to snippets</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-watchdog" title="12.4.36. watchdog">a watchdog process</a> in threaded mode</p></li>
- <li class="listitem"><p>added persistent MVA updates</p></li>
- <li class="listitem"><p>added crash dumps to <code class="filename">searchd.log</code>, deprecated <code class="code">crash_log_path</code> directive</p></li>
- <li class="listitem"><p>added id32 index support in id64 binaries (EXPERIMENTAL)</p></li>
- <li class="listitem"><p>added SphinxSE support for DELETE and REPLACE on SphinxQL tables</p></li>
- </ul></div>
- <h3><a name="idp35382784"></a>New SphinxQL features</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added new, more SQL compliant SphinxQL syntax; and a compat_sphinxql_magics directive</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-func-crc32">CRC32()</a>, <a class="link" href="#expr-func-day">DAY()</a>, <a class="link" href="#expr-func-month">MONTH()</a>, <a class="link" href="#expr-func-year">YEAR()</a>, <a class="link" href="#expr-func-yearmonth">YEARMONTH()</a>, <a class="link" href="#expr-func-yearmonthday">YEARMONTHDAY()</a> functions</p></li>
- <li class="listitem"><p>added <a class="link" href="#expr-ari-ops">DIV, MOD, and % operators</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">reverse_scan=(0|1)</a> option to SELECT</p></li>
- <li class="listitem"><p>added support for MySQL packets over 16M</p></li>
- <li class="listitem"><p>added dummy SHOW VARIABLES, SHOW COLLATION, and SET character_set_results support (to support handshake with certain client libraries and frameworks)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-mysql-version-string" title="12.4.32. mysql_version_string">mysql_version_string</a> directive (to workaround picky MySQL client libraries)</p></li>
- <li class="listitem"><p>added support for global filter variables, <a class="link" href="#sphinxql-set" title="8.9. SET syntax">SET GLOBAL @uservar=(int_list)</a> </p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-delete" title="8.8. DELETE syntax">DELETE ... IN (id_list)</a> syntax support</p></li>
- <li class="listitem"><p>added C-style comments syntax (for example, <code class="code">SELECT /*!40000 some comment*/ id FROM test</code>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-update" title="8.23. UPDATE syntax">UPDATE ... WHERE id=X</a> syntax support</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-multi-queries" title="8.40. Multi-statement queries">SphinxQL multi-query support</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-describe" title="8.17. DESCRIBE syntax">DESCRIBE</a>, <a class="link" href="#sphinxql-show-tables" title="8.16. SHOW TABLES syntax">SHOW TABLES</a> statements</p></li>
- </ul></div>
- <h3><a name="idp35403696"></a>New command-line switches</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <code class="code">--print-queries</code> switch to <code class="filename">indexer</code> that dumps SQL queries it runs</p></li>
- <li class="listitem"><p>added <code class="code">--sighup-each </code> switch to <code class="filename">indexer</code> that rotates indexes one by one</p></li>
- <li class="listitem"><p>added <code class="code">--strip-path</code> switch to <code class="filename">searchd</code> that skips file paths embedded in the index(-es)</p></li>
- <li class="listitem"><p>added <code class="code">--dumpconfig</code> switch to <code class="filename">indextool</code> that dumps an index header in <code class="filename">sphinx.conf</code> format</p></li>
- </ul></div>
- <h3><a name="idp35412160"></a>Major changes and optimizations</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>changed default preopen_indexes value to 1</p></li>
- <li class="listitem"><p>optimized English stemmer (results in 1.3x faster snippets and indexing with morphology=stem_en)</p></li>
- <li class="listitem"><p>optimized snippets, 1.6x general speedup</p></li>
- <li class="listitem"><p>optimized const-list parsing in SphinxQL</p></li>
- <li class="listitem"><p>optimized full-document highlighting CPU/RAM use</p></li>
- <li class="listitem"><p>optimized binlog replay (improved performance on K-list update)</p></li>
- </ul></div>
- <h3><a name="idp35416496"></a>Bug fixes</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=767" target="_top">#767</a>, joined fields vs ODBC sources</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=757" target="_top">#757</a>, wordforms shared by indexes with different settings</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=733" target="_top">#733</a>, loading of indexes in formats prior to v.14</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=763" target="_top">#763</a>, occasional snippets failures</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=648" target="_top">#648</a>, occasionally missed rotations on multiple SIGHUPs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=750" target="_top">#750</a>, an RT segment merge leading to false positives and/or crashes in some cases</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=755" target="_top">#755</a>, zones in snippets output</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=754" target="_top">#754</a>, stopwords counting at snippet passage generation</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=723" target="_top">#723</a>, fork/prefork index rotation in children processes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=696" target="_top">#696</a>, freeze on zero threshold in quorum operator</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=732" target="_top">#732</a>, query escaping in SphinxSE</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=739" target="_top">#739</a>, occasional crashes in MT mode on result set send</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=746" target="_top">#746</a>, crash with a named list in SphinxQL option</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=674" target="_top">#674</a>, AVG vs group order</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=734" target="_top">#734</a>, occasional crashes attempting to report NULL errors</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=829" target="_top">#829</a>, tail hits within field position modifier</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=712" target="_top">#712</a>, missing query_mode, force_all_words snippet option defaults in Java API</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=721" target="_top">#721</a>, added dupe removal on RT batch INSERT/REPLACE</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=720" target="_top">#720</a>, potential extraneous highlighting after a blended keyword</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=702" target="_top">#702</a>, exceptions vs star search</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=666" target="_top">#666</a>, ext2 query grouping vs exceptions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=688" target="_top">#688</a>, WITHIN GROUP ORDER BY related crash</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=660" target="_top">#660</a>, multi-queue batches vs dist_threads</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=678" target="_top">#678</a>, crash on dict=keywords vs xmlpipe vs min_prefix_len</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=596" target="_top">#596</a>, ECHILD vs scripted configs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=653" target="_top">#653</a>, dependency in expression, sorting, grouping</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=661" target="_top">#661</a>, concurrent distributed searches vs workers=threads</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=646" target="_top">#646</a>, crash on status query via UNIX socket</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=589" target="_top">#589</a>, libexpat.dll missing from some Win32 build types</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=574" target="_top">#574</a>, quorum match order</p></li>
- <li class="listitem"><p>fixed multiple documentation issues (#372, #483, #495, #601, #623, #632, #654)</p></li>
- <li class="listitem"><p>fixed that ondisk_dict did not affect RT indexes</p></li>
- <li class="listitem"><p>fixed that string attributes check in indextool --check was erroneously sensitive to string data order</p></li>
- <li class="listitem"><p>fixed a rare crash when using BEFORE operator</p></li>
- <li class="listitem"><p>fixed an issue with multiforms vs BuildKeywords()</p></li>
- <li class="listitem"><p>fixed an edge case in OR operator (emitted wrong hits order sometimes)</p></li>
- <li class="listitem"><p>fixed aliasing in docinfo accessors that lead to very rare crashes and/or missing results</p></li>
- <li class="listitem"><p>fixed a syntax error on a short token at the end of a query</p></li>
- <li class="listitem"><p>fixed id64 filtering and performance degradation with range filters</p></li>
- <li class="listitem"><p>fixed missing rankers in libsphinxclient</p></li>
- <li class="listitem"><p>fixed missing SPH04 ranker in SphinxSE</p></li>
- <li class="listitem"><p>fixed column names in sql_attr_multi sample (works with example.sql now)</p></li>
- <li class="listitem"><p>fixed an issue with distributed local+remote setup vs aggregate functions</p></li>
- <li class="listitem"><p>fixed case sensitive columns names in RT indexes</p></li>
- <li class="listitem"><p>fixed a crash vs strings from multiple indexes in result set</p></li>
- <li class="listitem"><p>fixed blended keywords vs snippets</p></li>
- <li class="listitem"><p>fixed secure_connection vs MySQL protocol vs MySQL.NET connector</p></li>
- <li class="listitem"><p>fixed that Python API did not works with Python 2.3</p></li>
- <li class="listitem"><p>fixed overshort_step vs snippets</p></li>
- <li class="listitem"><p>fixed keyword staistics vs dist_threads searching</p></li>
- <li class="listitem"><p>fixed multiforms vs query parsing (vs quorum)</p></li>
- <li class="listitem"><p>fixed missed quorum words vs RT segments</p></li>
- <li class="listitem"><p>fixed blended keywords occasionally skipping extra character when querying (eg "abc[]")</p></li>
- <li class="listitem"><p>fixed Python API to handle int32 values</p></li>
- <li class="listitem"><p>fixed prefix and infix indexing of joined fields</p></li>
- <li class="listitem"><p>fixed MVA ranged query</p></li>
- <li class="listitem"><p>fixed missing blended state reset on document boundary</p></li>
- <li class="listitem"><p>fixed a crash on missing index while replaying binlog</p></li>
- <li class="listitem"><p>fixed an error message on filter values overrun</p></li>
- <li class="listitem"><p>fixed passage duplication in snippets in weight_order mode</p></li>
- <li class="listitem"><p>fixed select clauses over 1K vs remote agents</p></li>
- <li class="listitem"><p>fixed overshort accounting vs soft-whitespace tokens</p></li>
- <li class="listitem"><p>fixed rotation vs workers=threads</p></li>
- <li class="listitem"><p>fixed schema issues vs distributed indexes</p></li>
- <li class="listitem"><p>fixed blended-escaped sequence parsing issue</p></li>
- <li class="listitem"><p>fixed MySQL IN clause (values order etc)</p></li>
- <li class="listitem"><p>fixed that post_index did not execute when 0 documents were succesfully indexed</p></li>
- <li class="listitem"><p>fixed field position limit vs many hits</p></li>
- <li class="listitem"><p>fixed that joined fields missed an end marker at field end</p></li>
- <li class="listitem"><p>fixed that xxx_step settings were missing from .sph index header</p></li>
- <li class="listitem"><p>fixed libsphinxclient missing request cleanup in sphinx_query() (eg after network errors)</p></li>
- <li class="listitem"><p>fixed that index_weights were ignored when grouping</p></li>
- <li class="listitem"><p>fixed multi wordforms vs blend_chars</p></li>
- <li class="listitem"><p>fixed broken MVA output in SphinxQL</p></li>
- <li class="listitem"><p>fixed a few RT leaks</p></li>
- <li class="listitem"><p>fixed an issue with RT string storage going missing</p></li>
- <li class="listitem"><p>fixed an issue with repeated queries vs dist_threads</p></li>
- <li class="listitem"><p>fixed an issue with string attributes vs buffer overrun in SphinxQL</p></li>
- <li class="listitem"><p>fixed unexpected character data warnings within ignored xmlpipe tags</p></li>
- <li class="listitem"><p>fixed a crash in snippets with NEAR syntax query</p></li>
- <li class="listitem"><p>fixed passage duplication in snippets</p></li>
- <li class="listitem"><p>fixed libsphinxclient SIGPIPE handling</p></li>
- <li class="listitem"><p>fixed libsphinxclient vs VS2003 compiler bug</p></li>
- </ul></div></div>
- <div class="sect1" title="A.27. Version 1.10-beta, 19 jul 2010"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel110"></a>A.27. Version 1.10-beta, 19 jul 2010</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added RT indexes support (<a class="xref" href="#rt-indexes" title="Chapter 4. Real-time indexes">Chapter 4, <i>Real-time indexes</i></a>)</p></li>
- <li class="listitem"><p>added prefork and threads support (<a class="link" href="#conf-workers" title="12.4.23. workers">workers</a> directives)</p></li>
- <li class="listitem"><p>added multi-threaded local searches in distributed indexes (<a class="link" href="#conf-dist-threads" title="12.4.24. dist_threads">dist_threads</a> directive)</p></li>
- <li class="listitem"><p>added common subquery cache (<a class="link" href="#conf-subtree-docs-cache" title="12.4.21. subtree_docs_cache">subtree_docs_cache</a>,
- <a class="link" href="#conf-subtree-hits-cache" title="12.4.22. subtree_hits_cache">subtree_hits_cache</a> directives)</p></li>
- <li class="listitem"><p>added string attributes support (<a class="link" href="#conf-sql-attr-string" title="12.1.23. sql_attr_string">sql_attr_string</a>,
- <a class="link" href="#conf-sql-field-string" title="12.1.26. sql_field_string">sql_field_string</a>,
- <a class="link" href="#conf-xmlpipe-attr-string" title="12.1.41. xmlpipe_attr_string">xml_attr_string</a>,
- <a class="link" href="#conf-xmlpipe-field-string" title="12.1.33. xmlpipe_field_string">xml_field_string</a> directives)</p></li>
- <li class="listitem"><p>added indexing-time word counter (<code class="option">sql_attr_str2wordcount</code>,
- <code class="option">sql_field_str2wordcount</code> directives)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql-call-snippets" title="8.14. CALL SNIPPETS syntax">CALL SNIPPETS()</a>,
- <a class="link" href="#sphinxql-call-keywords" title="8.15. CALL KEYWORDS syntax">CALL KEYWORDS()</a> SphinxQL statements</p></li>
- <li class="listitem"><p>added <code class="option">field_weights</code>, <code class="option">index_weights</code> options to
- SphinxQL <a class="link" href="#sphinxql-select" title="8.1. SELECT syntax">SELECT</a> statement</p></li>
- <li class="listitem"><p>added insert-only SphinxQL-talking tables to SphinxSE (connection='sphinxql://host[:port]/index')</p></li>
- <li class="listitem"><p>added <code class="option">select</code> option to SphinxSE queries</p></li>
- <li class="listitem"><p>added backtrace on crash to <code class="filename">searchd</code></p></li>
- <li class="listitem"><p>added SQL+FS indexing, aka loading files by names fetched from SQL
- (<a class="link" href="#conf-sql-file-field" title="12.1.27. sql_file_field">sql_file_field</a> directive)</p></li>
- <li class="listitem"><p>added a watchdog in threads mode to <code class="filename">searchd</code></p></li>
- <li class="listitem"><p>added automatic row phantoms elimination to index merge</p></li>
- <li class="listitem"><p>added hitless indexing support (hitless_words directive)</p></li>
- <li class="listitem"><p>added --check, --strip-path, --htmlstrip, --dumphitlist ... --wordid switches to <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a></p></li>
- <li class="listitem"><p>added --stopwait, --logdebug switches to <a class="link" href="#ref-searchd" title="7.2. searchd command reference">searchd</a></p></li>
- <li class="listitem"><p>added --dump-rows, --verbose switches to <a class="link" href="#ref-indexer" title="7.1. indexer command reference">indexer</a></p></li>
- <li class="listitem"><p>added "blended" characters indexing support (<a class="link" href="#conf-blend-chars" title="12.2.47. blend_chars">blend_chars</a> directive)</p></li>
- <li class="listitem"><p>added joined/payload field indexing (<a class="link" href="#conf-sql-joined-field" title="12.1.13. sql_joined_field">sql_joined_field</a> directive)</p></li>
- <li class="listitem"><p>added <a class="link" href="#api-func-flushattributes" title="9.7.6. FlushAttributes">FlushAttributes() API call</a></p></li>
- <li class="listitem"><p>added query_mode, force_all_words, limit_passages, limit_words, start_passage_id, load_files, html_strip_mode,
- allow_empty options, and %PASSAGE_ID% macro in before_match, after_match options
- to <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">BuildExcerpts()</a> API call</p></li>
- <li class="listitem"><p>added @groupby/@count/@distinct columns support to SELECT (but not to expressions)</p></li>
- <li class="listitem"><p>added query-time keyword expansion support (<a class="link" href="#conf-expand-keywords" title="12.2.46. expand_keywords">expand_keywords</a> directive,
- <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SPH_RANK_SPH04</a> ranker)</p></li>
- <li class="listitem"><p>added query batch size limit option (<a class="link" href="#conf-max-batch-queries" title="12.4.20. max_batch_queries">max_batch_queries</a> directive; was hardcoded)</p></li>
- <li class="listitem"><p>added SINT() function to expressions</p></li>
- <li class="listitem"><p>improved SphinxQL syntax error reporting</p></li>
- <li class="listitem"><p>improved expression optimizer (better constant handling)</p></li>
- <li class="listitem"><p>improved dash handling within keywords (no longer treated as an operator)</p></li>
- <li class="listitem"><p>improved snippets (better passage selection/trimming, around option now a hard limit)</p></li>
- <li class="listitem"><p>optimized index format that yields ~20-30% smaller indexes</p></li>
- <li class="listitem"><p>optimized sorting code (indexing time 1-5% faster on average; 100x faster in worst case)</p></li>
- <li class="listitem"><p>optimized searchd startup time (moved .spa preindexing to indexer), added a progress bar</p></li>
- <li class="listitem"><p>optimized queries against indexes with many attributes (eliminated redundant copying)</p></li>
- <li class="listitem"><p>optimized 1-keyword queries (performace regression introduced in 0.9.9)</p></li>
- <li class="listitem"><p>optimized SphinxQL protocol overheads, and performance on bigger result sets</p></li>
- <li class="listitem"><p>optimized unbuffered attributes writes on index merge</p></li>
- <li class="listitem"><p>changed attribute handling, duplicate names are strictly forbidden now</p></li>
- <li class="listitem"><p>fixed that SphinxQL sessions could stall shutdown</p></li>
- <li class="listitem"><p>fixed consts with leading minus in SphinxQL</p></li>
- <li class="listitem"><p>fixed AND/OR precedence in expressions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=334" target="_top">#334</a>, AVG() on integers was not computed in floats</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=371" target="_top">#371</a>, attribute flush vs 2+ GB files</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=373" target="_top">#373</a>, segfault on distributed queries vs certain libc versions</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=398" target="_top">#398</a>, stopwords not stopped in prefix/infix indexes</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=404" target="_top">#404</a>, erroneous MVA failures in indextool --check</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=408" target="_top">#408</a>, segfault on certain query batches (regular scan, plus a scan with MVA groupby)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=431" target="_top">#431</a>, occasional shutdown hangs in preforked workers</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=436" target="_top">#436</a>, trunk checkout builds vs Solaris sh</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=440" target="_top">#440</a>, escaping vs parentheses declared as valid in charset_table</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=442" target="_top">#442</a>, occasional non-aligned free in MVA indexing</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=447" target="_top">#447</a>, occasional crashes in MVA indexing</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=449" target="_top">#449</a>, pconn busyloop on aborted clients on certain arches</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=465" target="_top">#465</a>, build issue on Alpha</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=468" target="_top">#468</a>, build issue in libsphinxclient</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=472" target="_top">#472</a>, multiple stopword files failing to load</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=489" target="_top">#489</a>, buffer overflow in query logging</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=493" target="_top">#493</a>, Python API assertion after error returned from Query()</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=500" target="_top">#500</a>, malformed MySQL packet when sending MVAs</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=504" target="_top">#504</a>, SIGPIPE in libsphinxclient</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=506" target="_top">#506</a>, better MySQL protocol commands support in SphinxQL (PING etc)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=509" target="_top">#509</a>, indexing ranged results from stored procedures</p></li>
- </ul></div></div>
- <div class="sect1" title="A.28. Version 0.9.9-release, 02 dec 2009"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel099"></a>A.28. Version 0.9.9-release, 02 dec 2009</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added Open, Close, Status calls to libsphinxclient (C API)</p></li>
- <li class="listitem"><p>added automatic persistent connection reopening to PHP, Python APIs</p></li>
- <li class="listitem"><p>added 64-bit value/range filters, fullscan mode support to SphinxSE</p></li>
- <li class="listitem"><p>MAJOR CHANGE, our IANA assigned ports are 9312 and 9306 respectively (goodbye, trusty 3312)</p></li>
- <li class="listitem"><p>MAJOR CHANGE, erroneous filters now fail with an error (were silently ignored before)</p></li>
- <li class="listitem"><p>optimized unbuffered .spa writes on merge</p></li>
- <li class="listitem"><p>optimized 1-keyword queries ranking in extended2 mode</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=441" target="_top">#441</a> (IO race in case of highly conccurent load on a preopened)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=434" target="_top">#434</a> (distrubuted indexes were not searchable via MySQL protocol)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=317" target="_top">#317</a> (indexer MVA progress counter)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=398" target="_top">#398</a> (stopwords not removed from search query)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=328" target="_top">#328</a> (broken cutoff)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=250" target="_top">#250</a> (now quoting paths w/spaces when installing Windows service)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=348" target="_top">#348</a> (K-list was not updated on merge)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=357" target="_top">#357</a> (destination index were not K-list-filtered on merge)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=369" target="_top">#369</a> (precaching .spi files over 2 GBs)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=438" target="_top">#438</a> (missing boundary proximity matches)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=371" target="_top">#371</a> (.spa flush in case of files over 2 GBs)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=373" target="_top">#373</a> (crashes on distributed queries via mysql proto)</p></li>
- <li class="listitem"><p>fixed critical bugs in hit merging code</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=424" target="_top">#424</a> (ordinals could be misplaced during indexing in case of bitfields etc)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=426" target="_top">#426</a> (failing SE build on Solaris; thanks to Ben Beecher)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=423" target="_top">#423</a> (typo in SE caused crash on SHOW STATUS)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=363" target="_top">#363</a> (handling of read_timeout over 2147 seconds)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=376" target="_top">#376</a> (minor error message mismatch)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=413" target="_top">#413</a> (minus in SphinxQL)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=417" target="_top">#417</a> (floats w/o leading digit in SphinxQL)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=403" target="_top">#403</a> (typo in SetFieldWeights name in Java API)</p></li>
- <li class="listitem"><p>fixed index rotation vs persistent connections</p></li>
- <li class="listitem"><p>fixed backslash handling in SphinxQL parser</p></li>
- <li class="listitem"><p>fixed uint unpacking vs. PHP 5.2.9 (possibly other versions)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=325" target="_top">#325</a> (filter settings send from SphinxSE)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=352" target="_top">#352</a> (removed mysql wrapper around close() in SphinxSE)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=389" target="_top">#389</a> (display error messages through SphinxSE status variable)</p></li>
- <li class="listitem"><p>fixed linking with port-installed iconv on OS X</p></li>
- <li class="listitem"><p>fixed negative 64-bit unpacking in PHP API</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=349" target="_top">#349</a> (escaping backslash in query emulation mode)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=320" target="_top">#320</a> (disabled multi-query route when select items differ)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=353" target="_top">#353</a> (better quorum counts check)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=341" target="_top">#341</a> (merging of trailing hits; maybe other ranking issues too)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=368" target="_top">#368</a> (partially; @field "" caused crashes; now resets field limit)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=365" target="_top">#365</a> (field mask was leaking on field-limited terms)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=339" target="_top">#339</a> (updated debug query dumper)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=361" target="_top">#361</a> (added SetConnectTimeout() to Java API)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=338" target="_top">#338</a> (added missing fullscan to mode check in Java API)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=323" target="_top">#323</a> (added floats support to SphinxQL)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=340" target="_top">#340</a> (support listen=port:proto syntax too)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=332" target="_top">#332</a> (\r is legal SphinxQL space now)</p></li>
- <li class="listitem"><p>fixed xmlpipe2 K-lists</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=322" target="_top">#322</a> (safety gaps in mysql protocol row buffer)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=313" target="_top">#313</a> (return keyword stats for empty indexes too)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=344" target="_top">#344</a> (invalid checkpoints after merge)</p></li>
- <li class="listitem"><p>fixed <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=326" target="_top">#326</a> (missing CLOCK_xxx on FreeBSD)</p></li>
- </ul></div></div>
- <div class="sect1" title="A.29. Version 0.9.9-rc2, 08 apr 2009"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel099rc2"></a>A.29. Version 0.9.9-rc2, 08 apr 2009</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added IsConnectError(), Open(), Close() calls to Java API (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=240" target="_top">#240</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-read-buffer" title="12.4.18. read_buffer">read_buffer</a>, <a class="link" href="#conf-read-unhinted" title="12.4.19. read_unhinted">read_unhinted</a> directives</p></li>
- <li class="listitem"><p>added checks for build options returned by mysql_config (builds on Solaris now)</p></li>
- <li class="listitem"><p>added fixed-RAM index merge (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=169" target="_top">#169</a>)</p></li>
- <li class="listitem"><p>added logging chained queries count in case of (optimized) multi-queries</p></li>
- <li class="listitem"><p>added <a class="link" href="#sort-expr" title="5.6. SPH_SORT_EXPR mode">GEODIST()</a> function</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-searchd" title="7.2. searchd command reference">--status switch to searchd</a></p></li>
- <li class="listitem"><p>added MySpell (OpenOffice) affix file support (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=281" target="_top">#281</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-odbc-dsn" title="12.1.10. odbc_dsn">ODBC support</a> (both Windows and UnixODBC)</p></li>
- <li class="listitem"><p>added support for @id in IN() (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=292" target="_top">#292</a>)</p></li>
- <li class="listitem"><p>added support for <a class="link" href="#api-func-setselect" title="9.2.4. SetSelect">aggregate functions</a> in GROUP BY (namely AVG, MAX, MIN, SUM)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxse-snippets" title="10.4. Building snippets (excerpts) via MySQL">MySQL UDF that builds snippets</a> using searchd</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-write-buffer" title="12.3.5. write_buffer">write_buffer</a> directive (defaults to 1M)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-xmlpipe-fixup-utf8" title="12.1.43. xmlpipe_fixup_utf8">xmlpipe_fixup_utf8</a> directive</p></li>
- <li class="listitem"><p>added suggestions sample</p></li>
- <li class="listitem"><p>added microsecond precision int64 timer (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=282" target="_top">#282</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-listen-backlog" title="12.4.17. listen_backlog">listen_backlog directive</a></p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-max-xmlpipe2-field" title="12.3.4. max_xmlpipe2_field">max_xmlpipe2_field</a> directive</p></li>
- <li class="listitem"><p>added <a class="link" href="#sphinxql" title="5.10. MySQL protocol support and SphinxQL">initial SphinxQL support</a> to mysql41 handler, SELECT .../SHOW WARNINGS/STATUS/META are handled</p></li>
- <li class="listitem"><p>added support for different network protocols, and mysql41 protocol</p></li>
- <li class="listitem"><p>added <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">fieldmask ranker</a>, updated SphinxSE list of rankers</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-mysql-ssl" title="12.1.9. mysql_ssl_cert, mysql_ssl_key, mysql_ssl_ca">mysql_ssl_xxx</a> directives</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-searchd" title="7.2. searchd command reference">--cpustats (requires clock_gettime()) and --status switches</a> to searchd</p></li>
- <li class="listitem"><p>added performance counters, <a class="link" href="#api-func-status" title="9.7.5. Status">Status()</a> API call</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-overshort-step" title="12.2.43. overshort_step">overshort_step</a> and <a class="link" href="#conf-stopword-step" title="12.2.44. stopword_step">stopword_step</a> directives</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">strict order operator</a> (aka operator before, eg. "one << two << three")</p></li>
- <li class="listitem"><p>added <a class="link" href="#ref-indextool" title="7.4. indextool command reference">indextool</a> utility, moved --dumpheader there, added --debugdocids, --dumphitlist options</p></li>
- <li class="listitem"><p>added own RNG, reseeded on @random sort query (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=183" target="_top">#183</a>)</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">field-start and field-end modifiers support</a> (syntax is "^hello world$"; field-end requires reindex)</p></li>
- <li class="listitem"><p>added MVA attribute support to IN() function</p></li>
- <li class="listitem"><p>added <a class="link" href="#sort-expr" title="5.6. SPH_SORT_EXPR mode">AND, OR, and NOT support</a> to expressions</p></li>
- <li class="listitem"><p>improved logging of (optimized) multi-queries (now logging chained query count)</p></li>
- <li class="listitem"><p>improved handshake error handling, fixed protocol version byte order (omg)</p></li>
- <li class="listitem"><p>updated SphinxSE to protocol 1.22</p></li>
- <li class="listitem"><p>allowed phrase_boundary_step=-1 (trick to emulate keyword expansion)</p></li>
- <li class="listitem"><p>removed SPH_MAX_QUERY_WORDS limit</p></li>
- <li class="listitem"><p>fixed CLI search vs documents missing from DB (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=257" target="_top">#257</a>)</p></li>
- <li class="listitem"><p>fixed libsphinxclient results leak on subsequent sphinx_run_queries call (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=256" target="_top">#256</a>)</p></li>
- <li class="listitem"><p>fixed libsphinxclient handling of zero max_matches and cutoff (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=208" target="_top">#208</a>)</p></li>
- <li class="listitem"><p>fixed Java API over-64K string reads (eg. big snippets) in Java API (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=181" target="_top">#181</a>)</p></li>
- <li class="listitem"><p>fixed Java API 2nd Query() after network error in 1st Query() call (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=308" target="_top">#308</a>)</p></li>
- <li class="listitem"><p>fixed typo-class bugs in SetFilterFloatRange (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=259" target="_top">#259</a>), SetSortMode (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=248" target="_top">#248</a>)</p></li>
- <li class="listitem"><p>fixed missing @@relaxed support (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=276" target="_top">#276</a>), fixed missing error on @nosuchfield queries, documented @@relaxed</p></li>
- <li class="listitem"><p>fixed UNIX socket permissions to 0777 (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=288" target="_top">#288</a>)</p></li>
- <li class="listitem"><p>fixed xmlpipe2 crash on schemas with no fields, added better document structure checks</p></li>
- <li class="listitem"><p>fixed (and optimized) expr parser vs IN() with huge (10K+) args count</p></li>
- <li class="listitem"><p>fixed double EarlyCalc() in fullscan mode (minor performance impact)</p></li>
- <li class="listitem"><p>fixed phrase boundary handling in some cases (on buffer end, on trailing whitespace)</p></li>
- <li class="listitem"><p>fixes in snippets (aka excerpts) generation</p></li>
- <li class="listitem"><p>fixed inline attrs vs id64 index corruption</p></li>
- <li class="listitem"><p>fixed head searchd crash on config re-parse failure</p></li>
- <li class="listitem"><p>fixed handling of numeric keywords with leading zeroes such as "007" (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=251" target="_top">#251</a>)</p></li>
- <li class="listitem"><p>fixed junk in SphinxSE status variables (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=304" target="_top">#304</a>)</p></li>
- <li class="listitem"><p>fixed wordlist checkpoints serialization (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=236" target="_top">#236</a>)</p></li>
- <li class="listitem"><p>fixed unaligned docinfo id access (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=230" target="_top">#230</a>)</p></li>
- <li class="listitem"><p>fixed GetRawBytes() vs oversized blocks (headers with over 32K charset_table should now work, bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=300" target="_top">#300</a>)</p></li>
- <li class="listitem"><p>fixed buffer overflow caused by too long dest wordform, updated tests</p></li>
- <li class="listitem"><p>fixed IF() return type (was always int, is deduced now)</p></li>
- <li class="listitem"><p>fixed legacy queries vs. special chars vs. multiple indexes</p></li>
- <li class="listitem"><p>fixed write-write-read socket access pattern vs Nagle vs delays vs FreeBSD (oh wow)</p></li>
- <li class="listitem"><p>fixed exceptions vs query-parser issue</p></li>
- <li class="listitem"><p>fixed late calc vs @weight in expressions (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=285" target="_top">#285</a>)</p></li>
- <li class="listitem"><p>fixed early lookup/calc vs filters (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=284" target="_top">#284</a>)</p></li>
- <li class="listitem"><p>fixed emulated MATCH_ANY queries (empty proximity and phrase queries are allowed now)</p></li>
- <li class="listitem"><p>fixed MATCH_ANY ranker vs fields with no matches</p></li>
- <li class="listitem"><p>fixed index file size vs inplace_enable (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=245" target="_top">#245</a>)</p></li>
- <li class="listitem"><p>fixed that old logs were not closed on USR1 (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=221" target="_top">#221</a>)</p></li>
- <li class="listitem"><p>fixed handling of '!' alias to NOT operator (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=237" target="_top">#237</a>)</p></li>
- <li class="listitem"><p>fixed error handling vs query steps (step failure was not reported)</p></li>
- <li class="listitem"><p>fixed querying vs inline attributes</p></li>
- <li class="listitem"><p>fixed stupid bug in escaping code, fixed EscapeString() and made it static</p></li>
- <li class="listitem"><p>fixed parser vs @field -keyword, foo|@field bar, "" queries (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=310" target="_top">#310</a>)</p></li>
- </ul></div></div>
- <div class="sect1" title="A.30. Version 0.9.9-rc1, 17 nov 2008"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel099rc1"></a>A.30. Version 0.9.9-rc1, 17 nov 2008</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added <a class="link" href="#conf-min-stemming-len" title="12.2.10. min_stemming_len">min_stemming_len</a> directive</p></li>
- <li class="listitem"><p>added <a class="link" href="#api-func-isconnecterror" title="9.1.7. IsConnectError">IsConnectError()</a> API call (helps distingusih API vs remote errors)</p></li>
- <li class="listitem"><p>added duplicate log messages filter to searchd</p></li>
- <li class="listitem"><p>added --nodetach debugging switch to searchd</p></li>
- <li class="listitem"><p>added blackhole agents support for debugging/testing (<a class="link" href="#conf-agent-blackhole" title="12.2.33. agent_blackhole">agent_blackhole</a> directive)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-max-filters" title="12.4.15. max_filters">max_filters</a>, <a class="link" href="#conf-max-filter-values" title="12.4.16. max_filter_values">max_filter_values</a> directives (were hardcoded before)</p></li>
- <li class="listitem"><p>added int64 expression evaluation path, automatic inference, and BIGINT() enforcer function</p></li>
- <li class="listitem"><p>added crash handler for debugging (<code class="option">crash_log_path</code> directive)</p></li>
- <li class="listitem"><p>added MS SQL (aka SQL Server) source support (Windows only, <a class="link" href="#conf-mssql-winauth" title="12.1.44. mssql_winauth">mssql_winauth</a> and mssql_unicode directives)</p></li>
- <li class="listitem"><p>added indexer-side column unpacking feature (<a class="link" href="#conf-unpack-zlib" title="12.1.45. unpack_zlib">unpack_zlib</a>, <a class="link" href="#conf-unpack-mysqlcompress" title="12.1.46. unpack_mysqlcompress">unpack_mysqlcompress</a> directives)</p></li>
- <li class="listitem"><p>added nested brackers and NOTs support to <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">query language</a>, rewritten query parser</p></li>
- <li class="listitem"><p>added persistent connections support (<a class="link" href="#api-func-open" title="9.8.1. Open">Open()</a> and <a class="link" href="#api-func-close" title="9.8.2. Close">Close()</a> API calls)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-index-exact-words" title="12.2.42. index_exact_words">index_exact_words</a> feature, and exact form operator to query language ("hello =world")</p></li>
- <li class="listitem"><p>added status variables support to SphinxSE (SHOW STATUS LIKE 'sphinx_%')</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-max-packet-size" title="12.4.13. max_packet_size">max_packet_size</a> directive (was hardcoded at 8M before)</p></li>
- <li class="listitem"><p>added UNIX socket support, and multi-interface support (<a class="link" href="#conf-listen" title="12.4.1. listen">listen</a> directive)</p></li>
- <li class="listitem"><p>added star-syntax support to <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">BuildExcerpts()</a> API call</p></li>
- <li class="listitem"><p>added inplace inversion of .spa and .spp (<a class="link" href="#conf-inplace-enable" title="12.2.37. inplace_enable">inplace_enable</a> directive, 1.5-2x less disk space for indexing)</p></li>
- <li class="listitem"><p>added builtin Czech stemmer (morphology=stem_cz)</p></li>
- <li class="listitem"><p>added <a class="link" href="#sort-expr" title="5.6. SPH_SORT_EXPR mode">IDIV(), NOW(), INTERVAL(), IN() functions</a> to expressions</p></li>
- <li class="listitem"><p>added index-level early-reject based on filters</p></li>
- <li class="listitem"><p>added MVA updates feature (<a class="link" href="#conf-mva-updates-pool" title="12.4.14. mva_updates_pool">mva_updates_pool</a> directive)</p></li>
- <li class="listitem"><p>added select-list feature with computed expressions support (see <a class="link" href="#api-func-setselect" title="9.2.4. SetSelect">SetSelect()</a> API call, test.php --select switch), protocol 1.22</p></li>
- <li class="listitem"><p>added integer expressions support (2x faster than float)</p></li>
- <li class="listitem"><p>added multiforms support (multiple source words in wordforms file)</p></li>
- <li class="listitem"><p>added <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">legacy rankers</a> (MATCH_ALL/MATCH_ANY/etc), removed legacy matching code (everything runs on V2 engine now)</p></li>
- <li class="listitem"><p>added <a class="link" href="#extended-syntax" title="5.3. Extended query syntax">field position limit</a> modifier to field operator (syntax: @title[50] hello world)</p></li>
- <li class="listitem"><p>added killlist support (<a class="link" href="#conf-sql-query-killlist" title="12.1.16. sql_query_killlist">sql_query_killlist</a> directive, --merge-killlists switch)</p></li>
- <li class="listitem"><p>added on-disk SPI support (ondisk_dict directive)</p></li>
- <li class="listitem"><p>added indexer IO stats</p></li>
- <li class="listitem"><p>added periodic .spa flush (<a class="link" href="#conf-attr-flush-period" title="12.4.12. attr_flush_period">attr_flush_period</a> directive)</p></li>
- <li class="listitem"><p>added config reload on SIGHUP</p></li>
- <li class="listitem"><p>added per-query attribute overrides feature (see <a class="link" href="#api-func-setoverride" title="9.2.3. SetOverride">SetOverride()</a> API call); protocol 1.21</p></li>
- <li class="listitem"><p>added signed 64bit attrs support (<a class="link" href="#conf-sql-attr-bigint" title="12.1.19. sql_attr_bigint">sql_attr_bigint</a> directive)</p></li>
- <li class="listitem"><p>improved HTML stripper to also skip PIs (<? ... ?>, such as <?php ... ?>)</p></li>
- <li class="listitem"><p>improved excerpts speed (upto 50x faster on big documents)</p></li>
- <li class="listitem"><p>fixed a short window of searchd inaccessibility on startup (started listen()ing too early before)</p></li>
- <li class="listitem"><p>fixed .spa loading on systems where read() is 2GB capped</p></li>
- <li class="listitem"><p>fixed infixes vs morphology issues</p></li>
- <li class="listitem"><p>fixed backslash escaping, added backslash to EscapeString()</p></li>
- <li class="listitem"><p>fixed handling of over-2GB dictionary files (.spi)</p></li>
- </ul></div></div>
- <div class="sect1" title="A.31. Version 0.9.8.1, 30 oct 2008"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel0981"></a>A.31. Version 0.9.8.1, 30 oct 2008</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added configure script to libsphinxclient</p></li>
- <li class="listitem"><p>changed proximity/quorum operator syntax to require whitespace after length</p></li>
- <li class="listitem"><p>fixed potential head process crash on SIGPIPE during "maxed out" message</p></li>
- <li class="listitem"><p>fixed handling of incomplete remote replies (caused over-degraded distributed results, in rare cases)</p></li>
- <li class="listitem"><p>fixed sending of big remote requests (caused distributed requests to fail, in rare cases)</p></li>
- <li class="listitem"><p>fixed FD_SET() overflow (caused searchd to crash on startup, in rare cases)</p></li>
- <li class="listitem"><p>fixed MVA vs distributed indexes (caused loss of 1st MVA value in result set)</p></li>
- <li class="listitem"><p>fixed tokenizing of exceptions terminated by specials (eg. "GPS AT&T" in extended mode)</p></li>
- <li class="listitem"><p>fixed buffer overrun in stemmer on overlong tokens occasionally emitted by proximity/quorum operator parser (caused crashes on certain proximity/quorum queries)</p></li>
- <li class="listitem"><p>fixed wordcount ranker (could be dropping hits)</p></li>
- <li class="listitem"><p>fixed --merge feature (numerous different fixes, caused broken indexes)</p></li>
- <li class="listitem"><p>fixed --merge-dst-range performance</p></li>
- <li class="listitem"><p>fixed prefix/infix generation for stopwords</p></li>
- <li class="listitem"><p>fixed ignore_chars vs specials</p></li>
- <li class="listitem"><p>fixed misplaced F_SETLKW check (caused certain build types, eg. RPM build on FC8, to fail)</p></li>
- <li class="listitem"><p>fixed dictionary-defined charsets support in spelldump, added \x-style wordchars support</p></li>
- <li class="listitem"><p>fixed Java API to properly send long strings (over 64K; eg. long document bodies for excerpts)</p></li>
- <li class="listitem"><p>fixed Python API to accept offset/limit of 'long' type</p></li>
- <li class="listitem"><p>fixed default ID range (that filtered out all 64-bit values) in Java and Python APIs</p></li>
- </ul></div></div>
- <div class="sect1" title="A.32. Version 0.9.8, 14 jul 2008"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel098"></a>A.32. Version 0.9.8, 14 jul 2008</h2></div></div></div>
- <h3><a name="idp35767200"></a>Indexing</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added support for 64-bit document and keyword IDs, --enable-id64 switch to configure</p></li>
- <li class="listitem"><p>added support for floating point attributes</p></li>
- <li class="listitem"><p>added support for bitfields in attributes, <a class="link" href="#conf-sql-attr-bool" title="12.1.18. sql_attr_bool">sql_attr_bool</a> directive and bit-widths part in <a class="link" href="#conf-sql-attr-uint" title="12.1.17. sql_attr_uint">sql_attr_uint</a> directive</p></li>
- <li class="listitem"><p>added support for multi-valued attributes (MVA)</p></li>
- <li class="listitem"><p>added metaphone preprocessor</p></li>
- <li class="listitem"><p>added libstemmer library support, provides stemmers for a number of additional languages</p></li>
- <li class="listitem"><p>added xmlpipe2 source type, that supports arbitrary fields and attributes</p></li>
- <li class="listitem"><p>added word form dictionaries, <a class="link" href="#conf-wordforms" title="12.2.12. wordforms">wordforms</a> directive (and spelldump utility)</p></li>
- <li class="listitem"><p>added tokenizing exceptions, <a class="link" href="#conf-exceptions" title="12.2.14. exceptions">exceptions</a> directive</p></li>
- <li class="listitem"><p>added an option to fully remove element contents to HTML stripper, <a class="link" href="#conf-html-remove-elements" title="12.2.29. html_remove_elements">html_remove_elements</a> directive</p></li>
- <li class="listitem"><p>added HTML entities decoder (with full XHTML1 set support) to HTML stripper</p></li>
- <li class="listitem"><p>added per-index HTML stripping settings, <a class="link" href="#conf-html-strip" title="12.2.27. html_strip">html_strip</a>, <a class="link" href="#conf-html-index-attrs" title="12.2.28. html_index_attrs">html_index_attrs</a>, and <a class="link" href="#conf-html-remove-elements" title="12.2.29. html_remove_elements">html_remove_elements</a> directives</p></li>
- <li class="listitem"><p>added IO load throttling, <a class="link" href="#conf-max-iops" title="12.3.2. max_iops">max_iops</a> and <a class="link" href="#conf-max-iosize" title="12.3.3. max_iosize">max_iosize</a> directives</p></li>
- <li class="listitem"><p>added SQL load throttling, <a class="link" href="#conf-sql-ranged-throttle" title="12.1.30. sql_ranged_throttle">sql_ranged_throttle</a> directive</p></li>
- <li class="listitem"><p>added an option to index prefixes/infixes for given fields only, <a class="link" href="#conf-prefix-fields" title="12.2.21. prefix_fields">prefix_fields</a> and <a class="link" href="#conf-infix-fields" title="12.2.22. infix_fields">infix_fields</a> directives</p></li>
- <li class="listitem"><p>added an option to ignore certain characters (instead of just treating them as whitespace), <a class="link" href="#conf-ignore-chars" title="12.2.17. ignore_chars">ignore_chars</a> directive</p></li>
- <li class="listitem"><p>added an option to increment word position on phrase boundary characters, <a class="link" href="#conf-phrase-boundary" title="12.2.25. phrase_boundary">phrase_boundary</a> and <a class="link" href="#conf-phrase-boundary-step" title="12.2.26. phrase_boundary_step">phrase_boundary_step</a> directives</p></li>
- <li class="listitem"><p>added --merge-dst-range switch (and filters) to index merging feature (--merge switch)</p></li>
- <li class="listitem"><p>added <a class="link" href="#conf-mysql-connect-flags" title="12.1.8. mysql_connect_flags">mysql_connect_flags</a> directive (eg. to reduce indexing time MySQL network traffic and/or time)</p></li>
- <li class="listitem"><p>improved ordinals sorting; now runs in fixed RAM</p></li>
- <li class="listitem"><p>improved handling of documents with zero/NULL ids, now skipping them instead of aborting</p></li>
- </ul></div>
- <h3><a name="idp35795408"></a>Search daemon</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added an option to unlink old index on succesful rotation, <a class="link" href="#conf-unlink-old" title="12.4.11. unlink_old">unlink_old</a> directive</p></li>
- <li class="listitem"><p>added an option to keep index files open at all times (fixes subtle races on rotation), <a class="link" href="#conf-preopen" title="12.2.36. preopen">preopen</a> and <a class="link" href="#conf-preopen-indexes" title="12.4.10. preopen_indexes">preopen_indexes</a> directives</p></li>
- <li class="listitem"><p>added an option to profile searchd disk I/O, --iostats command-line option</p></li>
- <li class="listitem"><p>added an option to rotate index seamlessly (fully avoids query stalls), <a class="link" href="#conf-seamless-rotate" title="12.4.9. seamless_rotate">seamless_rotate</a> directive</p></li>
- <li class="listitem"><p>added HTML stripping support to excerpts (uses per-index settings)</p></li>
- <li class="listitem"><p>added 'exact_phrase', 'single_passage', 'use_boundaries', 'weight_order 'options to <a class="link" href="#api-func-buildexcerpts" title="9.7.1. BuildExcerpts">BuildExcerpts()</a> API call</p></li>
- <li class="listitem"><p>added distributed attribute updates propagation</p></li>
- <li class="listitem"><p>added distributed retries on master node side</p></li>
- <li class="listitem"><p>added log reopen on SIGUSR1</p></li>
- <li class="listitem"><p>added --stop switch (sends SIGTERM to running instance)</p></li>
- <li class="listitem"><p>added Windows service mode, and --servicename switch</p></li>
- <li class="listitem"><p>added Windows --rotate support</p></li>
- <li class="listitem"><p>improved log timestamping, now with millisecond precision</p></li>
- </ul></div>
- <h3><a name="idp35808112"></a>Querying</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added extended engine V2 (faster, cleaner, better; SPH_MATCH_EXTENDED2 mode)</p></li>
- <li class="listitem"><p>added ranking modes support (V2 engine only; <a class="link" href="#api-func-setrankingmode" title="9.3.2. SetRankingMode">SetRankingMode()</a> API call)</p></li>
- <li class="listitem"><p>added quorum searching support to query language (V2 engine only; example: "any three of all these words"/3)</p></li>
- <li class="listitem"><p>added query escaping support to query language, and <a class="link" href="#api-func-escapestring" title="9.7.4. EscapeString">EscapeString()</a> API call</p></li>
- <li class="listitem"><p>added multi-field syntax support to query language (example: "@(field1,field2) something"), and @@relaxed field checks option</p></li>
- <li class="listitem"><p>added optional star-syntax ('word*') support in keywords, enable_star directive (for prefix/infix indexes only)</p></li>
- <li class="listitem"><p>added full-scan support (query must be fully empty; can perform block-reject optimization)</p></li>
- <li class="listitem"><p>added COUNT(DISTINCT(attr)) calculation support, <a class="link" href="#api-func-setgroupdistinct" title="9.5.2. SetGroupDistinct">SetGroupDistinct()</a> API call</p></li>
- <li class="listitem"><p>added group-by on MVA support, <a class="link" href="#api-func-setarrayresult" title="9.1.6. SetArrayResult">SetArrayResult()</a> PHP API call</p></li>
- <li class="listitem"><p>added per-index weights feature, <a class="link" href="#api-func-setindexweights" title="9.3.6. SetIndexWeights">SetIndexWeights()</a> API call</p></li>
- <li class="listitem"><p>added geodistance support, <a class="link" href="#api-func-setgeoanchor" title="9.4.5. SetGeoAnchor">SetGeoAnchor()</a> API call</p></li>
- <li class="listitem"><p>added result set sorting by arbitrary expressions in run time (eg. "@weight+log(price)*2.5"), SPH_SORT_EXPR mode</p></li>
- <li class="listitem"><p>added result set sorting by @custom compile-time sorting function (see src/sphinxcustomsort.inl)</p></li>
- <li class="listitem"><p>added result set sorting by @random value</p></li>
- <li class="listitem"><p>added result set merging for indexes with different schemas</p></li>
- <li class="listitem"><p>added query comments support (3rd arg to <a class="link" href="#api-func-query" title="9.6.1. Query">Query()</a>/<a class="link" href="#api-func-addquery" title="9.6.2. AddQuery">AddQuery()</a> API calls, copied verbatim to query log)</p></li>
- <li class="listitem"><p>added keyword extraction support, <a class="link" href="#api-func-buildkeywords" title="9.7.3. BuildKeywords">BuildKeywords()</a> API call</p></li>
- <li class="listitem"><p>added binding field weights by name, <a class="link" href="#api-func-setfieldweights" title="9.3.5. SetFieldWeights">SetFieldWeights()</a> API call</p></li>
- <li class="listitem"><p>added optional limit on query time, <a class="link" href="#api-func-setmaxquerytime" title="9.2.2. SetMaxQueryTime">SetMaxQueryTime()</a> API call</p></li>
- <li class="listitem"><p>added optional limit on found matches count (4rd arg to <a class="link" href="#api-func-setlimits" title="9.2.1. SetLimits">SetLimits()</a> API call, so-called 'cutoff')</p></li>
- </ul></div>
- <h3><a name="idp35830960"></a>APIs and SphinxSE</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added pure C API (libsphinxclient)</p></li>
- <li class="listitem"><p>added Ruby API (thanks to Dmytro Shteflyuk)</p></li>
- <li class="listitem"><p>added Java API</p></li>
- <li class="listitem"><p>added SphinxSE support for MVAs (use varchar), floats (use float), 64bit docids (use bigint)</p></li>
- <li class="listitem"><p>added SphinxSE options "floatrange", "geoanchor", "fieldweights", "indexweights", "maxquerytime", "comment", "host" and "port"; and support for "expr:CLAUSE"</p></li>
- <li class="listitem"><p>improved SphinxSE max query size (using MySQL condition pushdown), upto 256K now</p></li>
- </ul></div>
- <h3><a name="idp35835360"></a>General</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added scripting (shebang syntax) support to config files (example: #!/usr/bin/php in the first line)</p></li>
- <li class="listitem"><p>added unified config handling and validation to all programs</p></li>
- <li class="listitem"><p>added unified documentation </p></li>
- <li class="listitem"><p>added .spec file for RPM builds</p></li>
- <li class="listitem"><p>added automated testing suite</p></li>
- <li class="listitem"><p>improved index locking, now fcntl()-based instead of buggy file-existence-based</p></li>
- <li class="listitem"><p>fixed unaligned RAM accesses, now works on SPARC and ARM</p></li>
- </ul></div>
- <h3><a name="rel098-fixes-since-rc2"></a>Changes and fixes since 0.9.8-rc2</h3><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added pure C API (libsphinxclient)</p></li>
- <li class="listitem"><p>added Ruby API</p></li>
- <li class="listitem"><p>added SetConnectTimeout() PHP API call</p></li>
- <li class="listitem"><p>added allowed type check to UpdateAttributes() handler (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=174" target="_top">#174</a>)</p></li>
- <li class="listitem"><p>added defensive MVA checks on index preload (protection against broken indexes, bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=168" target="_top">#168</a>)</p></li>
- <li class="listitem"><p>added sphinx-min.conf sample file</p></li>
- <li class="listitem"><p>added --without-iconv switch to configure</p></li>
- <li class="listitem"><p>removed redundant -lz dependency in searchd</p></li>
- <li class="listitem"><p>removed erroneous "xmlpipe2 deprecated" warning</p></li>
- <li class="listitem"><p>fixed EINTR handling in piped read (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=166" target="_top">#166</a>)</p></li>
- <li class="listitem"><p>fixup query time before logging and sending to client (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=153" target="_top">#153</a>)</p></li>
- <li class="listitem"><p>fixed attribute updates vs full-scan early-reject index (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=149" target="_top">#149</a>)</p></li>
- <li class="listitem"><p>fixed gcc warnings (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=160" target="_top">#160</a>)</p></li>
- <li class="listitem"><p>fixed mysql connection attempt vs pgsql source type (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=165" target="_top">#165</a>)</p></li>
- <li class="listitem"><p>fixed 32-bit wraparound when preloading over 2 GB files</p></li>
- <li class="listitem"><p>fixed "out of memory" message vs over 2 GB allocs (bug <a class="ulink" href="http://sphinxsearch.com/bugs/view.php?id=116" target="_top">#116</a>)</p></li>
- <li class="listitem"><p>fixed unaligned RAM access detection on ARM (where unaligned reads do not crash but produce wrong results)</p></li>
- <li class="listitem"><p>fixed missing full scan results in some cases</p></li>
- <li class="listitem"><p>fixed several bugs in --merge, --merge-dst-range</p></li>
- <li class="listitem"><p>fixed @geodist vs MultiQuery and filters, @expr vs MultiQuery</p></li>
- <li class="listitem"><p>fixed GetTokenEnd() vs 1-grams (was causing crash in excerpts)</p></li>
- <li class="listitem"><p>fixed sql_query_range to handle empty strings in addition to NULL strings (Postgres specific)</p></li>
- <li class="listitem"><p>fixed morphology=none vs infixes</p></li>
- <li class="listitem"><p>fixed case sensitive attributes names in UpdateAttributes()</p></li>
- <li class="listitem"><p>fixed ext2 ranking vs. stopwords (now using atompos from query parser)</p></li>
- <li class="listitem"><p>fixed EscapeString() call</p></li>
- <li class="listitem"><p>fixed escaped specials (now handled as whitespace if not in charset)</p></li>
- <li class="listitem"><p>fixed schema minimizer (now handles type/size mismatches)</p></li>
- <li class="listitem"><p>fixed word stats in extended2; stemmed form is now returned</p></li>
- <li class="listitem"><p>fixed spelldump case folding vs dictionary-defined character sets</p></li>
- <li class="listitem"><p>fixed Postgres BOOLEAN handling </p></li>
- <li class="listitem"><p>fixed enforced "inline" docinfo on empty indexes (normally ok, but index merge was really confused)</p></li>
- <li class="listitem"><p>fixed rare count(distinct) out-of-bounds issue (it occasionaly caused too high @distinct values)</p></li>
- <li class="listitem"><p>fixed hangups on documents with id=DOCID_MAX in some cases</p></li>
- <li class="listitem"><p>fixed rare crash in tokenizer (prefixed synonym vs. input stream eof)</p></li>
- <li class="listitem"><p>fixed query parser vs "aaa (bbb ccc)|ddd" queries</p></li>
- <li class="listitem"><p>fixed BuildExcerpts() request in Java API</p></li>
- <li class="listitem"><p>fixed Postgres specific memory leak</p></li>
- <li class="listitem"><p>fixed handling of overshort keywords (less than min_word_len)</p></li>
- <li class="listitem"><p>fixed HTML stripper (now emits space after indexed attributes)</p></li>
- <li class="listitem"><p>fixed 32-field case in query parser</p></li>
- <li class="listitem"><p>fixed rare count(distinct) vs. querying multiple local indexes vs. reusable sorter issue</p></li>
- <li class="listitem"><p>fixed sorting of negative floats in SPH_SORT_EXTENDED mode</p></li>
- </ul></div></div>
- <div class="sect1" title="A.33. Version 0.9.7, 02 apr 2007"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel097"></a>A.33. Version 0.9.7, 02 apr 2007</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added support for <code class="option">sql_str2ordinal_column</code></p></li>
- <li class="listitem"><p>added support for upto 5 sort-by attrs (in extended sorting mode)</p></li>
- <li class="listitem"><p>added support for separate groups sorting clause (in group-by mode)</p></li>
- <li class="listitem"><p>added support for on-the-fly attribute updates (PRE-ALPHA; will change heavily; use for preliminary testing ONLY)</p></li>
- <li class="listitem"><p>added support for zero/NULL attributes</p></li>
- <li class="listitem"><p>added support for 0.9.7 features to SphinxSE</p></li>
- <li class="listitem"><p>added support for n-grams (alpha, 1-grams only for now)</p></li>
- <li class="listitem"><p>added support for warnings reported to client</p></li>
- <li class="listitem"><p>added support for exclude-filters</p></li>
- <li class="listitem"><p>added support for prefix and infix indexing (see <code class="option">max_prefix_len</code>, <code class="option">max_infix_len</code>)</p></li>
- <li class="listitem"><p>added <code class="option">@*</code> syntax to reset current field to query language</p></li>
- <li class="listitem"><p>added removal of duplicate entries in query index order</p></li>
- <li class="listitem"><p>added PHP API workarounds for PHP signed/unsigned braindamage</p></li>
- <li class="listitem"><p>added locks to avoid two concurrent indexers working on same index</p></li>
- <li class="listitem"><p>added check for existing attributes vs. <code class="option">docinfo=none</code> case</p></li>
- <li class="listitem"><p>improved groupby code a lot (better precision, and upto 25x times faster in extreme cases)</p></li>
- <li class="listitem"><p>improved error handling and reporting</p></li>
- <li class="listitem"><p>improved handling of broken indexes (reports error instead of hanging/crashing)</p></li>
- <li class="listitem"><p>improved <code class="option">mmap()</code> limits for attributes and wordlists (now able to map over 4 GB on x64 and over 2 GB on x32 where possible)</p></li>
- <li class="listitem"><p>improved <code class="option">malloc()</code> pressure in head daemon (search time should not degrade with time any more)</p></li>
- <li class="listitem"><p>improved <code class="filename">test.php</code> command line options</p></li>
- <li class="listitem"><p>improved error reporting (distributed query, broken index etc issues now reported to client)</p></li>
- <li class="listitem"><p>changed default network packet size to be 8M, added extra checks</p></li>
- <li class="listitem"><p>fixed division by zero in BM25 on 1-document collections (in extended matching mode)</p></li>
- <li class="listitem"><p>fixed <code class="filename">.spl</code> files getting unlinked</p></li>
- <li class="listitem"><p>fixed crash in schema compatibility test</p></li>
- <li class="listitem"><p>fixed UTF-8 Russian stemmer</p></li>
- <li class="listitem"><p>fixed requested matches count when querying distributed agents</p></li>
- <li class="listitem"><p>fixed signed vs. unsigned issues everywhere (ranged queries, CLI search output, and obtaining docid)</p></li>
- <li class="listitem"><p>fixed potential crashes vs. negative query offsets</p></li>
- <li class="listitem"><p>fixed 0-match docs vs. extended mode vs. stats</p></li>
- <li class="listitem"><p>fixed group/timestamp filters being ignored if querying from older clients</p></li>
- <li class="listitem"><p>fixed docs to mention <code class="option">pgsql</code> source type</p></li>
- <li class="listitem"><p>fixed issues with explicit '&' in extended matching mode</p></li>
- <li class="listitem"><p>fixed wrong assertion in SBCS encoder</p></li>
- <li class="listitem"><p>fixed crashes with no-attribute indexes after rotate</p></li>
- </ul></div></div>
- <div class="sect1" title="A.34. Version 0.9.7-rc2, 15 dec 2006"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel097rc2"></a>A.34. Version 0.9.7-rc2, 15 dec 2006</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added support for extended matching mode (query language)</p></li>
- <li class="listitem"><p>added support for extended sorting mode (sorting clauses)</p></li>
- <li class="listitem"><p>added support for SBCS excerpts</p></li>
- <li class="listitem"><p>added <code class="option">mmap()ing</code> for attributes and wordlist (improves search time, speeds up <code class="option">fork()</code> greatly)</p></li>
- <li class="listitem"><p>fixed attribute name handling to be case insensitive</p></li>
- <li class="listitem"><p>fixed default compiler options to simplify post-mortem debugging (added <code class="option">-g</code>, removed <code class="option">-fomit-frame-pointer</code>)</p></li>
- <li class="listitem"><p>fixed rare memory leak</p></li>
- <li class="listitem"><p>fixed "hello hello" queries in "match phrase" mode</p></li>
- <li class="listitem"><p>fixed issue with excerpts, texts and overlong queries</p></li>
- <li class="listitem"><p>fixed logging multiple index name (no longer tokenized)</p></li>
- <li class="listitem"><p>fixed trailing stopword not flushed from tokenizer</p></li>
- <li class="listitem"><p>fixed boolean evaluation</p></li>
- <li class="listitem"><p>fixed pidfile being wrongly <code class="option">unlink()ed</code> on <code class="option">bind()</code> failure</p></li>
- <li class="listitem"><p>fixed <code class="option">--with-mysql-includes/libs</code> (they conflicted with well-known paths)</p></li>
- <li class="listitem"><p>fixes for 64-bit platforms</p></li>
- </ul></div></div>
- <div class="sect1" title="A.35. Version 0.9.7-rc1, 26 oct 2006"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel097rc"></a>A.35. Version 0.9.7-rc1, 26 oct 2006</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added alpha index merging code</p></li>
- <li class="listitem"><p>added an option to decrease <code class="option">max_matches</code> per-query</p></li>
- <li class="listitem"><p>added an option to specify IP address for searchd to listen on</p></li>
- <li class="listitem"><p>added support for unlimited amount of configured sources and indexes</p></li>
- <li class="listitem"><p>added support for group-by queries</p></li>
- <li class="listitem"><p>added support for /2 range modifier in charset_table</p></li>
- <li class="listitem"><p>added support for arbitrary amount of document attributes</p></li>
- <li class="listitem"><p>added logging filter count and index name</p></li>
- <li class="listitem"><p>added <code class="option">--with-debug</code> option to configure to compile in debug mode</p></li>
- <li class="listitem"><p>added <code class="option">-DNDEBUG</code> when compiling in default mode</p></li>
- <li class="listitem"><p>improved search time (added doclist size hints, in-memory wordlist cache, and used VLB coding everywhere)</p></li>
- <li class="listitem"><p>improved (refactored) SQL driver code (adding new drivers should be very easy now)</p></li>
- <li class="listitem"><p>improved exceprts generation</p></li>
- <li class="listitem"><p>fixed issue with empty sources and ranged queries</p></li>
- <li class="listitem"><p>fixed querying purely remote distributed indexes</p></li>
- <li class="listitem"><p>fixed suffix length check in English stemmer in some cases</p></li>
- <li class="listitem"><p>fixed UTF-8 decoder for codes over U+20000 (for CJK)</p></li>
- <li class="listitem"><p>fixed UTF-8 encoder for 3-byte sequences (for CJK)</p></li>
- <li class="listitem"><p>fixed overshort (less than <code class="option">min_word_len</code>) words prepended to next field</p></li>
- <li class="listitem"><p>fixed source connection order (indexer does not connect to all sources at once now)</p></li>
- <li class="listitem"><p>fixed line numbering in config parser</p></li>
- <li class="listitem"><p>fixed some issues with index rotation</p></li>
- </ul></div></div>
- <div class="sect1" title="A.36. Version 0.9.6, 24 jul 2006"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel096"></a>A.36. Version 0.9.6, 24 jul 2006</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added support for empty indexes</p></li>
- <li class="listitem"><p>added support for multiple sql_query_pre/post/post_index</p></li>
- <li class="listitem"><p>fixed timestamp ranges filter in "match any" mode</p></li>
- <li class="listitem"><p>fixed configure issues with --without-mysql and --with-pgsql options</p></li>
- <li class="listitem"><p>fixed building on Solaris 9</p></li>
- </ul></div></div>
- <div class="sect1" title="A.37. Version 0.9.6-rc1, 26 jun 2006"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rel096rc1"></a>A.37. Version 0.9.6-rc1, 26 jun 2006</h2></div></div></div>
- <div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>added boolean queries support (experimental, beta version)</p></li>
- <li class="listitem"><p>added simple file-based query cache (experimental, beta version)</p></li>
- <li class="listitem"><p>added storage engine for MySQL 5.0 and 5.1 (experimental, beta version)</p></li>
- <li class="listitem"><p>added GNU style <code class="filename">configure</code> script</p></li>
- <li class="listitem"><p>added new searchd protocol (all binary, and should be backwards compatible)</p></li>
- <li class="listitem"><p>added distributed searching support to searchd</p></li>
- <li class="listitem"><p>added PostgreSQL driver</p></li>
- <li class="listitem"><p>added excerpts generation</p></li>
- <li class="listitem"><p>added <code class="option">min_word_len</code> option to index</p></li>
- <li class="listitem"><p>added <code class="option">max_matches</code> option to searchd, removed hardcoded MAX_MATCHES limit</p></li>
- <li class="listitem"><p>added initial documentation, and a working <code class="filename">example.sql</code></p></li>
- <li class="listitem"><p>added support for multiple sources per index</p></li>
- <li class="listitem"><p>added soundex support</p></li>
- <li class="listitem"><p>added group ID ranges support</p></li>
- <li class="listitem"><p>added <code class="option">--stdin</code> command-line option to search utility</p></li>
- <li class="listitem"><p>added <code class="option">--noprogress</code> option to indexer</p></li>
- <li class="listitem"><p>added <code class="option">--index</code> option to search</p></li>
- <li class="listitem"><p>fixed UTF-8 decoder (3-byte codepoints did not work)</p></li>
- <li class="listitem"><p>fixed PHP API to handle big result sets faster</p></li>
- <li class="listitem"><p>fixed config parser to handle empty values properly</p></li>
- <li class="listitem"><p>fixed redundant <code class="code">time(NULL)</code> calls in time-segments mode</p></li>
- </ul></div></div></div></div>
- </body></html>
|