sphinxjsonquery.cpp 114 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292
  1. //
  2. // Copyright (c) 2017-2026, Manticore Software LTD (https://manticoresearch.com)
  3. // All rights reserved
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License. You should have
  7. // received a copy of the GPL license along with this program; if you
  8. // did not, you can find it at http://www.gnu.org/
  9. //
  10. #include "sphinxquery/xqparser.h"
  11. #include "sphinxquery/parse_helper.h"
  12. #include "sphinxsearch.h"
  13. #include "sphinxplugin.h"
  14. #include "sphinxutils.h"
  15. #include "searchdaemon.h"
  16. #include "jsonqueryfilter.h"
  17. #include "attribute.h"
  18. #include "searchdsql.h"
  19. #include "searchdha.h"
  20. #include "knnmisc.h"
  21. #include "sorterscroll.h"
  22. #include "sphinxexcerpt.h"
  23. static const char * g_szAll = "_all";
  24. static const char * g_szHighlight = "_@highlight_";
  25. static const char * g_szOrder = "_@order_";
  26. class QueryTreeBuilder_c;
  27. struct ErrorPathGuard_t
  28. {
  29. ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath );
  30. ~ErrorPathGuard_t ();
  31. QueryTreeBuilder_c & m_tBuilder;
  32. const bool m_bEnabled;
  33. };
  34. class QueryTreeBuilder_c : public XQParseHelper_c
  35. {
  36. public:
  37. QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings );
  38. void CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost );
  39. bool HandleFieldBlockStart ( const char * & /*pPtr*/ ) override { return true; }
  40. bool HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields ) override;
  41. bool NeedTrailingSeparator() override { return false; }
  42. XQNode_t * CreateNode ( XQLimitSpec_t & tLimitSpec );
  43. const TokenizerRefPtr_c & GetQLTokenizer() { return m_pQueryTokenizerQL; }
  44. const CSphIndexSettings & GetIndexSettings() { return m_tSettings; }
  45. const CSphQuery * GetQuery() { return m_pQuery; }
  46. bool m_bHasFulltext = false;
  47. bool m_bHasFilter = false;
  48. void ResetNodesFlags() { m_bHasFulltext = m_bHasFilter = false; }
  49. QueryTreeBuilder_c CreateCollectPath ( const CSphSchema * pSchema );
  50. void ErrorPrintPath ( QueryTreeBuilder_c & tOrig );
  51. ErrorPathGuard_t ErrorAddPath ( const JsonObj_c & tPath );
  52. private:
  53. const CSphQuery * m_pQuery {nullptr};
  54. const TokenizerRefPtr_c m_pQueryTokenizerQL;
  55. const CSphIndexSettings & m_tSettings;
  56. XQNode_t * AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost );
  57. friend ErrorPathGuard_t;
  58. CSphVector< std::pair<CSphString, const void *> > m_dErrorPath;
  59. bool m_bErrorCollectPath = false;
  60. };
  61. QueryTreeBuilder_c::QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings )
  62. : m_pQuery ( pQuery )
  63. , m_pQueryTokenizerQL ( std::move (pQueryTokenizerQL) )
  64. , m_tSettings ( tSettings )
  65. {}
  66. void QueryTreeBuilder_c::CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost )
  67. {
  68. m_pTokenizer->SetBuffer ( (const BYTE*)szStr, (int) strlen ( szStr ) );
  69. while (true)
  70. {
  71. int iSkippedPosBeforeToken = 0;
  72. if ( m_bWasBlended )
  73. {
  74. iSkippedPosBeforeToken = m_pTokenizer->SkipBlended();
  75. // just add all skipped blended parts except blended head (already added to atomPos)
  76. if ( iSkippedPosBeforeToken>1 )
  77. m_iAtomPos += iSkippedPosBeforeToken - 1;
  78. }
  79. // FIXME!!! only wildcard node need tokes with wildcard symbols
  80. const char * sToken = (const char *) m_pTokenizer->GetToken ();
  81. if ( !sToken )
  82. {
  83. AddChildKeyword ( pNode, nullptr, iSkippedPosBeforeToken, tLimitSpec, fBoost );
  84. break;
  85. }
  86. // now let's do some token post-processing
  87. m_bWasBlended = m_pTokenizer->TokenIsBlended();
  88. int iPrevDeltaPos = 0;
  89. if ( m_pPlugin && m_pPlugin->m_fnPushToken )
  90. sToken = m_pPlugin->m_fnPushToken ( m_pPluginData, const_cast<char*>(sToken), &iPrevDeltaPos, m_pTokenizer->GetTokenStart(), int ( m_pTokenizer->GetTokenEnd() - m_pTokenizer->GetTokenStart() ) );
  91. m_iAtomPos += 1 + iPrevDeltaPos;
  92. bool bMultiDestHead = false;
  93. bool bMultiDest = false;
  94. int iDestCount = 0;
  95. // do nothing inside phrase
  96. if ( !m_pTokenizer->IsPhraseMode() )
  97. bMultiDest = m_pTokenizer->WasTokenMultiformDestination ( bMultiDestHead, iDestCount );
  98. // check for stopword, and create that node
  99. // temp buffer is required, because GetWordID() might expand (!) the keyword in-place
  100. BYTE sTmp [ MAX_TOKEN_BYTES ];
  101. strncpy ( (char*)sTmp, sToken, MAX_TOKEN_BYTES );
  102. sTmp[MAX_TOKEN_BYTES-1] = '\0';
  103. int iStopWord = 0;
  104. if ( m_pPlugin && m_pPlugin->m_fnPreMorph )
  105. m_pPlugin->m_fnPreMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  106. SphWordID_t uWordId = iStopWord ? 0 : m_pDict->GetWordID ( sTmp );
  107. if ( uWordId && m_pPlugin && m_pPlugin->m_fnPostMorph )
  108. {
  109. int iRes = m_pPlugin->m_fnPostMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  110. if ( iStopWord )
  111. uWordId = 0;
  112. else if ( iRes )
  113. uWordId = m_pDict->GetWordIDNonStemmed ( sTmp );
  114. }
  115. if ( !uWordId )
  116. {
  117. sToken = nullptr;
  118. // stopwords with step=0 must not affect pos
  119. if ( m_bEmptyStopword )
  120. m_iAtomPos--;
  121. }
  122. XQNode_t * pChildNode = nullptr;
  123. if ( bMultiDest && !bMultiDestHead )
  124. {
  125. assert ( m_dMultiforms.GetLength() );
  126. m_dMultiforms.Last().m_iDestCount++;
  127. m_dDestForms.Add ( sToken );
  128. } else
  129. pChildNode = AddChildKeyword ( pNode, sToken, iSkippedPosBeforeToken, tLimitSpec, fBoost );
  130. if ( bMultiDestHead )
  131. {
  132. MultiformNode_t & tMulti = m_dMultiforms.Add();
  133. tMulti.m_pNode = pChildNode;
  134. tMulti.m_iDestStart = m_dDestForms.GetLength();
  135. tMulti.m_iDestCount = 0;
  136. }
  137. }
  138. }
  139. bool QueryTreeBuilder_c::HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields )
  140. {
  141. if ( *pPtr=='_' )
  142. {
  143. auto iLen = (int) strlen(g_szAll);
  144. if ( !strncmp ( pPtr, g_szAll, iLen ) )
  145. {
  146. pPtr += iLen;
  147. dFields.SetAll();
  148. return true;
  149. }
  150. }
  151. return false;
  152. }
  153. XQNode_t * QueryTreeBuilder_c::CreateNode ( XQLimitSpec_t & tLimitSpec )
  154. {
  155. auto * pNode = new XQNode_t(tLimitSpec);
  156. m_dSpawned.Add ( pNode );
  157. return pNode;
  158. }
  159. XQNode_t * QueryTreeBuilder_c::AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost )
  160. {
  161. XQKeyword_t tKeyword ( szKeyword, m_iAtomPos );
  162. tKeyword.m_iSkippedBefore = iSkippedPosBeforeToken;
  163. tKeyword.m_fBoost = fBoost;
  164. auto * pNode = new XQNode_t ( tLimitSpec );
  165. pNode->AddDirtyWord ( tKeyword );
  166. pParent->AddNewChild ( pNode );
  167. m_dSpawned.Add ( pNode );
  168. return pNode;
  169. }
  170. ErrorPathGuard_t QueryTreeBuilder_c::ErrorAddPath ( const JsonObj_c & tPath )
  171. {
  172. return ErrorPathGuard_t ( *this, m_bErrorCollectPath, tPath );
  173. }
  174. void QueryTreeBuilder_c::ErrorPrintPath ( QueryTreeBuilder_c & tOrig )
  175. {
  176. assert ( IsError() );
  177. StringBuilder_c tBuilder;
  178. tBuilder.Appendf ( "%s at '", tOrig.m_pParsed->m_sParseError.cstr() );
  179. const void * pLast = nullptr;
  180. for ( const auto & tEntry : m_dErrorPath )
  181. {
  182. // skip duplicates
  183. if ( !tEntry.second || pLast!=tEntry.second )
  184. tBuilder.Appendf ( "/%s", tEntry.first.scstr() );
  185. pLast = tEntry.second;
  186. }
  187. tBuilder << "'";
  188. tOrig.m_pParsed->m_sParseError = (CSphString)tBuilder;
  189. }
  190. QueryTreeBuilder_c QueryTreeBuilder_c::CreateCollectPath ( const CSphSchema * pSchema )
  191. {
  192. QueryTreeBuilder_c tOther ( m_pQuery, std::move ( m_pQueryTokenizerQL ), m_tSettings );
  193. tOther.Setup ( pSchema, m_pTokenizer->Clone ( SPH_CLONE ), std::move ( m_pDict ), m_pParsed, m_tSettings );
  194. tOther.m_bErrorCollectPath = true;
  195. tOther.m_dErrorPath.Add ( { "query", nullptr } );
  196. return tOther;
  197. }
  198. ErrorPathGuard_t::ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath )
  199. : m_tBuilder ( tBuilder )
  200. , m_bEnabled ( bEnabled )
  201. {
  202. // add path entry only in the collect pass and only prior to error point
  203. if ( m_bEnabled && !m_tBuilder.IsError() )
  204. m_tBuilder.m_dErrorPath.Add ( { tPath.Name(), tPath.GetRoot() } );
  205. }
  206. ErrorPathGuard_t::~ErrorPathGuard_t ()
  207. {
  208. if ( m_bEnabled && !m_tBuilder.IsError() )
  209. m_tBuilder.m_dErrorPath.Pop();
  210. }
  211. //////////////////////////////////////////////////////////////////////////
  212. class QueryParserJson_c : public QueryParser_i
  213. {
  214. public:
  215. bool IsFullscan ( const CSphQuery & tQuery ) const final;
  216. bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const final;
  217. QueryParser_i * Clone() const final { return new QueryParserJson_c; }
  218. private:
  219. XQNode_t * ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const;
  220. XQNode_t * ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  221. XQNode_t * ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  222. XQNode_t * ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const;
  223. bool ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const;
  224. bool ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const;
  225. XQNode_t * ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  226. };
  227. bool QueryParserJson_c::IsFullscan ( const CSphQuery & tQuery ) const
  228. {
  229. const char * szQ = tQuery.m_sQuery.cstr();
  230. if ( !szQ ) return true;
  231. if ( strstr ( szQ, R"("match")" ) ) return false;
  232. if ( strstr ( szQ, R"("terms")" ) ) return false;
  233. if ( strstr ( szQ, R"("match_phrase")" ) ) return false;
  234. if ( strstr ( szQ, R"("term")" ) ) return false;
  235. if ( strstr ( szQ, R"("query_string")" ) ) return false;
  236. if ( strstr ( szQ, R"("simple_query_string")" ) ) return false;
  237. return true;
  238. }
  239. static bool IsFullText ( const CSphString & sName );
  240. static bool IsBoolNode ( const CSphString & sName );
  241. bool CheckRootNode ( const JsonObj_c & tRoot, CSphString & sError )
  242. {
  243. bool bFilter = false;
  244. bool bBool = false;
  245. bool bFullText = false;
  246. for ( const auto & tItem : tRoot )
  247. {
  248. const CSphString & sName = tItem.Name();
  249. if ( IsFilter ( tItem ) )
  250. {
  251. if ( bFilter )
  252. {
  253. sError = "\"query\" has multiple filter properties, use bool node";
  254. return false;
  255. }
  256. bFilter = true;
  257. }
  258. else if ( IsBoolNode ( sName ) )
  259. {
  260. if ( bBool )
  261. {
  262. sError = "\"query\" has multiple bool properties";
  263. return false;
  264. }
  265. bBool = true;
  266. }
  267. else if ( IsFullText ( sName ) )
  268. {
  269. if ( bFullText )
  270. {
  271. sError = "\"query\" has multiple full-text properties, use bool node";
  272. return false;
  273. }
  274. bFullText = true;
  275. }
  276. }
  277. return true;
  278. }
  279. static JsonObj_c FindFullTextQueryNode ( const JsonObj_c & tRoot )
  280. {
  281. for ( JsonObj_c tChild : tRoot )
  282. {
  283. if ( !IsFilter ( tChild ) )
  284. return tChild;
  285. }
  286. return tRoot[0];
  287. }
  288. static bool HasFulltext ( const XQNode_t * pRoot )
  289. {
  290. if ( !pRoot )
  291. return false;
  292. CSphVector<const XQNode_t *> dNodes;
  293. dNodes.Add ( pRoot );
  294. ARRAY_FOREACH ( iNode, dNodes )
  295. {
  296. const XQNode_t * pNode = dNodes[iNode];
  297. if ( pNode->dWords().GetLength() )
  298. return true;
  299. dNodes.Append ( pNode->dChildren() );
  300. }
  301. return false;
  302. }
  303. bool QueryParserJson_c::ParseQuery ( XQQuery_t & tParsed, const char * szQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c & pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const
  304. {
  305. JsonObj_c tRoot ( szQuery );
  306. // take only the first item of the query; ignore the rest
  307. int iNumIndexes = ( tRoot.Empty() ? 0 : tRoot.Size() );
  308. if ( !iNumIndexes )
  309. {
  310. tParsed.m_sParseError = "\"query\" property is empty";
  311. return false;
  312. }
  313. if ( iNumIndexes!=1 && !CheckRootNode ( tRoot, tParsed.m_sParseError ) )
  314. return false;
  315. assert ( pQueryTokenizerJson->IsQueryTok() );
  316. DictRefPtr_c pMyDict = GetStatelessDict ( pDict );
  317. QueryTreeBuilder_c tBuilder ( pQuery, std::move ( pQueryTokenizerQL ), tSettings );
  318. tBuilder.Setup ( pSchema, pQueryTokenizerJson->Clone ( SPH_CLONE ), pMyDict, &tParsed, tSettings );
  319. const JsonObj_c tFtNode = FindFullTextQueryNode ( tRoot );
  320. XQNode_t * pRoot = ConstructNode ( tFtNode, tBuilder );
  321. if ( tBuilder.IsError() )
  322. {
  323. tBuilder.Cleanup();
  324. QueryTreeBuilder_c tErrorBuilder { tBuilder.CreateCollectPath ( pSchema ) };
  325. ConstructNode ( tFtNode, tErrorBuilder );
  326. tErrorBuilder.Cleanup();
  327. tErrorBuilder.ErrorPrintPath ( tBuilder );
  328. return false;
  329. }
  330. tParsed.m_bWasFullText = HasFulltext ( pRoot );
  331. XQLimitSpec_t tLimitSpec;
  332. pRoot = tBuilder.FixupTree ( pRoot, tLimitSpec, pMorphFields, IsAllowOnlyNot() );
  333. if ( tBuilder.IsError() )
  334. {
  335. tBuilder.Cleanup();
  336. return false;
  337. }
  338. tParsed.m_bSingleWord = ( pRoot && pRoot->dChildren().IsEmpty() && pRoot->dWords().GetLength() == 1 );
  339. tParsed.m_pRoot = pRoot;
  340. return true;
  341. }
  342. static const char * g_szOperatorNames[]=
  343. {
  344. "and",
  345. "or"
  346. };
  347. static XQOperator_e StrToNodeOp ( const char * szStr )
  348. {
  349. if ( !szStr )
  350. return SPH_QUERY_TOTAL;
  351. int iOp=0;
  352. for ( auto i : g_szOperatorNames )
  353. {
  354. if ( !strcmp ( szStr, i ) )
  355. return XQOperator_e(iOp);
  356. iOp++;
  357. }
  358. return SPH_QUERY_TOTAL;
  359. }
  360. static bool IsBoolNode ( const JsonObj_c & tJson )
  361. {
  362. if ( !tJson )
  363. return false;
  364. return CSphString ( tJson.Name() )=="bool";
  365. }
  366. bool IsBoolNode ( const CSphString & sName )
  367. {
  368. return ( sName=="bool" );
  369. }
  370. static float GetBoost ( const JsonObj_c & tFields )
  371. {
  372. const float fBoostDefault = 1.0f;
  373. if ( !tFields.IsObj() )
  374. return fBoostDefault;
  375. JsonObj_c tBoost = tFields.GetItem ( "boost" );
  376. if ( !tBoost || !tBoost.IsNum() )
  377. return fBoostDefault;
  378. return tBoost.FltVal();
  379. }
  380. XQNode_t * QueryParserJson_c::ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const
  381. {
  382. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  383. if ( !tJson.IsObj() )
  384. {
  385. tBuilder.Error ( "\"match\" value should be an object" );
  386. return nullptr;
  387. }
  388. if ( tJson.Size()!=1 )
  389. {
  390. tBuilder.Error ( "ill-formed \"match\" property" );
  391. return nullptr;
  392. }
  393. JsonObj_c tFields = tJson[0];
  394. tBuilder.SetString ( tFields.Name() );
  395. XQLimitSpec_t tLimitSpec;
  396. const char * szQuery = nullptr;
  397. XQOperator_e eNodeOp = bPhrase ? SPH_QUERY_PHRASE : SPH_QUERY_OR;
  398. bool bIgnore = false;
  399. StringBuilder_c tTermsBuf ( " " );
  400. if ( !tBuilder.ParseFields ( tLimitSpec.m_dFieldMask, tLimitSpec.m_iFieldMaxPos, bIgnore ) )
  401. return nullptr;
  402. if ( bIgnore )
  403. {
  404. tBuilder.Warning ( R"(ignoring fields in "%s", using "_all")", tFields.Name() );
  405. tLimitSpec.Reset();
  406. }
  407. tLimitSpec.m_bFieldSpec = true;
  408. if ( bTerms )
  409. {
  410. if ( !tFields.IsArray() )
  411. {
  412. tBuilder.Warning ( "values of properties in \"terms\" should be an array" );
  413. return nullptr;
  414. }
  415. for ( const auto & tTerm : tFields )
  416. {
  417. if ( !tTerm.IsStr() )
  418. {
  419. tBuilder.Error ( "\"terms\" value should be a string" );
  420. return nullptr;
  421. }
  422. tTermsBuf += tTerm.SzVal();
  423. }
  424. szQuery = tTermsBuf.cstr();
  425. } else if ( tFields.IsObj() )
  426. {
  427. // matching with flags
  428. CSphString sError;
  429. JsonObj_c tQuery = ( bSingleTerm ? tFields.GetStrItem ( "value", sError ) : tFields.GetStrItem ( "query", sError ) );
  430. if ( !tQuery )
  431. {
  432. tBuilder.Error ( "%s", sError.cstr() );
  433. return nullptr;
  434. }
  435. szQuery = tQuery.SzVal();
  436. if ( !bPhrase )
  437. {
  438. JsonObj_c tOp = tFields.GetItem ( "operator" );
  439. if ( tOp ) // "and", "or"
  440. {
  441. eNodeOp = StrToNodeOp ( tOp.SzVal() );
  442. if ( eNodeOp==SPH_QUERY_TOTAL )
  443. {
  444. tBuilder.Error ( "unknown operator: \"%s\"", tOp.SzVal() );
  445. return nullptr;
  446. }
  447. }
  448. }
  449. } else
  450. {
  451. // simple list of keywords
  452. if ( !tFields.IsStr() )
  453. {
  454. tBuilder.Warning ( "values of properties in \"match\" should be strings or objects" );
  455. return nullptr;
  456. }
  457. szQuery = tFields.SzVal();
  458. }
  459. assert ( szQuery );
  460. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  461. pNewNode->SetOp ( eNodeOp );
  462. float fBoost = GetBoost ( tFields );
  463. tBuilder.CollectKeywords ( szQuery, pNewNode, tLimitSpec, fBoost );
  464. return pNewNode;
  465. }
  466. bool QueryParserJson_c::ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const
  467. {
  468. if ( !tItem )
  469. return true;
  470. // we created filters before, no need to process them again
  471. if ( IsFilter(tItem) )
  472. {
  473. tBuilder.m_bHasFilter = true;
  474. return true;
  475. }
  476. XQNode_t * pNode = ConstructNode ( tItem, tBuilder );
  477. if ( !pNode )
  478. return IsBoolNode ( tItem ); // need walk down the tree for compart mode
  479. dNodes.Add ( pNode );
  480. return true;
  481. }
  482. bool QueryParserJson_c::ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const
  483. {
  484. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tClause );
  485. if ( tClause.IsArray() )
  486. {
  487. for ( const auto & tObject : tClause )
  488. {
  489. if ( !tObject.IsObj() )
  490. {
  491. tBuilder.Error ( "\"%s\" array value should be an object", tClause.Name() );
  492. return false;
  493. }
  494. if ( !ConstructNodeOrFilter ( tObject[0], dItems, tBuilder ) )
  495. return false;
  496. }
  497. } else if ( tClause.IsObj() )
  498. {
  499. if ( !ConstructNodeOrFilter ( tClause[0], dItems, tBuilder ) )
  500. return false;
  501. } else
  502. {
  503. tBuilder.Error ( "\"%s\" value should be an object or an array", tClause.Name() );
  504. return false;
  505. }
  506. return true;
  507. }
  508. XQNode_t * QueryParserJson_c::ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  509. {
  510. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  511. if ( !tJson.IsObj() )
  512. {
  513. tBuilder.Error ( "\"bool\" value should be an object" );
  514. return nullptr;
  515. }
  516. CSphVector<XQNode_t *> dMust, dShould, dMustNot;
  517. for ( const auto & tClause : tJson )
  518. {
  519. tBuilder.ResetNodesFlags();
  520. CSphString sName = tClause.Name();
  521. if ( sName=="must" )
  522. {
  523. if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
  524. return nullptr;
  525. } else if ( sName=="should" )
  526. {
  527. if ( !ConstructBoolNodeItems ( tClause, dShould, tBuilder ) )
  528. return nullptr;
  529. if ( tBuilder.m_bHasFilter && tBuilder.m_bHasFulltext )
  530. {
  531. tBuilder.Error ( "filter and full-text can be used together only inside \"must\" node" );
  532. return nullptr;
  533. }
  534. } else if ( sName=="must_not" )
  535. {
  536. if ( !ConstructBoolNodeItems ( tClause, dMustNot, tBuilder ) )
  537. return nullptr;
  538. } else if ( sName=="filter" )
  539. {
  540. if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
  541. return nullptr;
  542. } else if ( sName=="minimum_should_match" ) // FIXME!!! add to should as option
  543. {
  544. continue;
  545. } else
  546. {
  547. tBuilder.Error ( "unknown bool query type: \"%s\"", sName.cstr() );
  548. return nullptr;
  549. }
  550. }
  551. XQNode_t * pMustNode = nullptr;
  552. XQNode_t * pShouldNode = nullptr;
  553. XQNode_t * pMustNotNode = nullptr;
  554. XQLimitSpec_t tLimitSpec;
  555. if ( dMust.GetLength() )
  556. {
  557. // no need to construct AND node for a single child
  558. if ( dMust.GetLength()==1 )
  559. pMustNode = dMust[0];
  560. else
  561. {
  562. XQNode_t * pAndNode = tBuilder.CreateNode ( tLimitSpec );
  563. pAndNode->SetOp ( SPH_QUERY_AND );
  564. for ( auto & i : dMust )
  565. pAndNode->AddNewChild ( i);
  566. pMustNode = pAndNode;
  567. }
  568. }
  569. if ( dShould.GetLength() )
  570. {
  571. if ( dShould.GetLength()==1 )
  572. pShouldNode = dShould[0];
  573. else
  574. {
  575. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  576. pOrNode->SetOp ( SPH_QUERY_OR );
  577. for ( auto & i : dShould )
  578. pOrNode->AddNewChild (i);
  579. pShouldNode = pOrNode;
  580. }
  581. }
  582. // slightly different case - we need to construct the NOT node anyway
  583. if ( dMustNot.GetLength() )
  584. {
  585. XQNode_t * pNotNode = tBuilder.CreateNode ( tLimitSpec );
  586. pNotNode->SetOp ( SPH_QUERY_NOT );
  587. if ( dMustNot.GetLength()==1 )
  588. {
  589. pNotNode->AddNewChild ( dMustNot[0] );
  590. } else
  591. {
  592. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  593. pOrNode->SetOp ( SPH_QUERY_OR );
  594. for ( auto & i : dMustNot )
  595. pOrNode->AddNewChild ( i );
  596. pNotNode->AddNewChild ( pOrNode );
  597. }
  598. pMustNotNode = pNotNode;
  599. }
  600. int iTotalNodes = 0;
  601. iTotalNodes += pMustNode ? 1 : 0;
  602. iTotalNodes += pShouldNode ? 1 : 0;
  603. iTotalNodes += pMustNotNode ? 1 : 0;
  604. XQNode_t * pResultNode = nullptr;
  605. if ( !iTotalNodes )
  606. return nullptr;
  607. else if ( iTotalNodes==1 )
  608. {
  609. if ( pMustNode )
  610. pResultNode = pMustNode;
  611. else if ( pShouldNode )
  612. pResultNode = pShouldNode;
  613. else
  614. pResultNode = pMustNotNode;
  615. assert ( pResultNode );
  616. } else
  617. {
  618. pResultNode = pMustNode ? pMustNode : pMustNotNode;
  619. assert ( pResultNode );
  620. // combine 'must' and 'must_not' with AND
  621. if ( pMustNode && pMustNotNode )
  622. {
  623. XQNode_t * pAndNode = tBuilder.CreateNode(tLimitSpec);
  624. pAndNode->SetOp(SPH_QUERY_AND);
  625. pAndNode->AddNewChild ( pMustNode );
  626. pAndNode->AddNewChild ( pMustNotNode );
  627. pResultNode = pAndNode;
  628. }
  629. // combine 'result' node and 'should' node with MAYBE
  630. if ( pShouldNode )
  631. {
  632. XQNode_t * pMaybeNode = tBuilder.CreateNode ( tLimitSpec );
  633. pMaybeNode->SetOp ( SPH_QUERY_MAYBE );
  634. pMaybeNode->AddNewChild ( pResultNode );
  635. pMaybeNode->AddNewChild ( pShouldNode );
  636. pResultNode = pMaybeNode;
  637. }
  638. }
  639. return pResultNode;
  640. }
  641. XQNode_t * QueryParserJson_c::ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  642. {
  643. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  644. CSphString sQueryString;
  645. // query_string could be either {"query_string":{"query":"term"}} or {"query_string":"term"}
  646. if ( tJson.IsObj() )
  647. {
  648. CSphString sError;
  649. JsonObj_c tNestedQuery = tJson.GetStrItem ( "query", sError, false );
  650. if ( !tNestedQuery )
  651. {
  652. tBuilder.Error ( "\"query_string\" value should be an object with the \"query\" string" );
  653. return nullptr;
  654. }
  655. sQueryString = tNestedQuery.StrVal();
  656. }
  657. if ( sQueryString.IsEmpty() )
  658. {
  659. if ( tJson.IsStr() )
  660. {
  661. sQueryString = tJson.StrVal();
  662. } else
  663. {
  664. tBuilder.Error ( "\"query_string\" value should be an string" );
  665. return nullptr;
  666. }
  667. }
  668. XQQuery_t tParsed;
  669. tParsed.m_dZones = tBuilder.GetZone(); // should keep the same zone list for whole tree
  670. // no need to pass morph fields here as upper level does fixup
  671. if ( !sphParseExtendedQuery ( tParsed, sQueryString.cstr(), tBuilder.GetQuery(), tBuilder.GetQLTokenizer(), tBuilder.GetSchema(), tBuilder.GetDict(), tBuilder.GetIndexSettings(), nullptr ) )
  672. {
  673. tBuilder.Error ( "%s", tParsed.m_sParseError.cstr() );
  674. return nullptr;
  675. }
  676. if ( !tParsed.m_sParseWarning.IsEmpty() )
  677. tBuilder.Warning ( "%s", tParsed.m_sParseWarning.cstr() );
  678. XQNode_t * pRoot = tParsed.m_pRoot;
  679. tParsed.m_pRoot = nullptr;
  680. tBuilder.SetZone ( tParsed.m_dZones );
  681. return pRoot;
  682. }
  683. XQNode_t * QueryParserJson_c::ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const
  684. {
  685. XQLimitSpec_t tLimitSpec;
  686. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  687. pNewNode->SetOp ( SPH_QUERY_NULL );
  688. return pNewNode;
  689. }
  690. static bool IsFtMatch ( const CSphString & sName )
  691. {
  692. return ( sName=="match" );
  693. }
  694. static bool IsFtTerms ( const CSphString & sName )
  695. {
  696. return ( sName=="terms" );
  697. }
  698. static bool IsFtPhrase ( const CSphString & sName )
  699. {
  700. return ( sName=="match_phrase" );
  701. }
  702. static bool IsFtTerm ( const CSphString & sName )
  703. {
  704. return ( sName=="term" );
  705. }
  706. static bool IsFtMatchAll ( const CSphString & sName )
  707. {
  708. return ( sName=="match_all" );
  709. }
  710. static bool IsFtQueryString ( const CSphString & sName )
  711. {
  712. return ( sName=="query_string" );
  713. }
  714. static bool IsFtQueryStringSimple ( const CSphString & sName )
  715. {
  716. return ( sName=="simple_query_string" );
  717. }
  718. bool IsFullText ( const CSphString & sName )
  719. {
  720. return ( IsFtMatch ( sName ) || IsFtTerms ( sName ) || IsFtPhrase ( sName ) || IsFtTerm ( sName ) || IsFtMatchAll ( sName ) || IsFtQueryString ( sName ) || IsFtQueryStringSimple ( sName ));
  721. }
  722. XQNode_t * QueryParserJson_c::ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  723. {
  724. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  725. CSphString sName = tJson.Name();
  726. if ( !tJson || sName.IsEmpty() )
  727. {
  728. tBuilder.Error ( "empty json found" );
  729. return nullptr;
  730. }
  731. bool bMatch = IsFtMatch ( sName );
  732. bool bTerms = IsFtTerms ( sName );
  733. bool bPhrase = IsFtPhrase ( sName );
  734. bool bSingleTerm = IsFtTerm ( sName );
  735. if ( bMatch || bPhrase || bTerms || bSingleTerm )
  736. {
  737. tBuilder.m_bHasFulltext = true;
  738. return ConstructMatchNode ( tJson, bPhrase, bTerms, bSingleTerm, tBuilder );
  739. }
  740. if ( IsFtMatchAll ( sName ) )
  741. {
  742. tBuilder.m_bHasFulltext = true;
  743. return ConstructMatchAllNode ( tBuilder );
  744. }
  745. if ( IsBoolNode ( sName ) )
  746. return ConstructBoolNode ( tJson, tBuilder );
  747. if ( IsFtQueryString ( sName ) )
  748. {
  749. tBuilder.m_bHasFulltext = true;
  750. return ConstructQLNode ( tJson, tBuilder );
  751. }
  752. if ( IsFtQueryStringSimple ( sName ) && tJson.IsObj() )
  753. {
  754. tBuilder.m_bHasFulltext = true;
  755. return ConstructQLNode ( tJson.GetItem ( "query" ), tBuilder );
  756. }
  757. tBuilder.Error ( "unknown full-text node '%s'", sName.cstr() );
  758. return nullptr;
  759. }
  760. bool NonEmptyQuery ( const JsonObj_c & tQuery )
  761. {
  762. return ( tQuery.HasItem("match")
  763. || tQuery.HasItem("match_phrase")
  764. || tQuery.HasItem("bool") )
  765. || tQuery.HasItem("query_string");
  766. }
  767. //////////////////////////////////////////////////////////////////////////
  768. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError );
  769. static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning );
  770. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError );
  771. static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
  772. static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
  773. static bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError );
  774. static bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError );
  775. static bool ParseIndex ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  776. {
  777. if ( !tRoot )
  778. {
  779. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  780. return false;
  781. }
  782. JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
  783. if ( !tIndex )
  784. {
  785. tIndex = tRoot.GetStrItem ( "index", sError, true );
  786. if ( !tIndex )
  787. return false;
  788. sError = "";
  789. }
  790. tStmt.m_sIndex = tIndex.StrVal();
  791. tStmt.m_tQuery.m_sIndexes = tStmt.m_sIndex;
  792. const char * sIndexStart = strchr ( tStmt.m_sIndex.cstr(), ':' );
  793. if ( sIndexStart!=nullptr )
  794. {
  795. const char * sIndex = tStmt.m_sIndex.cstr();
  796. sError.SetSprintf ( "wrong table at cluster syntax, use \"cluster\": \"%.*s\" and \"index\": \"%s\" properties, instead of '%s'",
  797. (int)(sIndexStart-sIndex), sIndex, sIndexStart+1, sIndex );
  798. return false;
  799. }
  800. return true;
  801. }
  802. static bool ParseIndexId ( const JsonObj_c & tRoot, bool bArrayIds, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  803. {
  804. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  805. return false;
  806. JsonObj_c tId = tRoot.GetItem ( "id" );
  807. if ( tId )
  808. {
  809. if ( !tId.IsInt() && !tId.IsUint() && !tId.IsArray() )
  810. {
  811. sError = "Document ids should be integer or array of integers";
  812. return false;
  813. }
  814. if ( !bArrayIds && tId.IsArray() )
  815. {
  816. sError = "Document ids should be integer";
  817. return false;
  818. }
  819. if ( !tId.IsArray() )
  820. {
  821. if ( tId.IsInt() && tId.IntVal()<0 )
  822. {
  823. sError = "Negative document ids are not allowed";
  824. return false;
  825. }
  826. } else
  827. {
  828. for ( const auto & tItem : tId )
  829. {
  830. if ( !tItem.IsInt() && !tItem.IsUint() )
  831. {
  832. sError = "Document ids should be integer";
  833. return false;
  834. }
  835. if ( tItem.IsInt() && tItem.IntVal()<0 )
  836. {
  837. sError = "Negative document ids are not allowed";
  838. return false;
  839. }
  840. }
  841. }
  842. }
  843. if ( tId && !tId.IsArray() )
  844. tDocId = tId.IntVal();
  845. else
  846. tDocId = 0; // enable auto-id
  847. return true;
  848. }
  849. static bool ParseCluster ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  850. {
  851. if ( !tRoot )
  852. {
  853. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  854. return false;
  855. }
  856. // cluster is optional
  857. JsonObj_c tCluster = tRoot.GetStrItem ( "cluster", sError, true );
  858. if ( tCluster )
  859. tStmt.m_sCluster = tCluster.StrVal();
  860. return true;
  861. }
  862. std::unique_ptr<QueryParser_i> sphCreateJsonQueryParser()
  863. {
  864. return std::make_unique<QueryParserJson_c>();
  865. }
  866. static bool ParseLimits ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError )
  867. {
  868. JsonObj_c tLimit = tRoot.GetIntItem ( "limit", "size", sError );
  869. if ( !sError.IsEmpty() )
  870. return false;
  871. if ( tLimit )
  872. tQuery.m_iLimit = (int)tLimit.IntVal();
  873. JsonObj_c tOffset = tRoot.GetIntItem ( "offset", "from", sError );
  874. if ( !sError.IsEmpty() )
  875. return false;
  876. if ( tOffset )
  877. tQuery.m_iOffset = (int)tOffset.IntVal();
  878. JsonObj_c tCutoff = tRoot.GetIntItem ( "cutoff", sError, true );
  879. if ( !sError.IsEmpty() )
  880. return false;
  881. if ( tCutoff )
  882. tQuery.m_iCutoff = (int)tCutoff.IntVal();
  883. JsonObj_c tMaxMatches = tRoot.GetIntItem ( "max_matches", sError, true );
  884. if ( !sError.IsEmpty() )
  885. return false;
  886. if ( tMaxMatches )
  887. {
  888. tQuery.m_iMaxMatches = (int)tMaxMatches.IntVal();
  889. tQuery.m_bExplicitMaxMatches = true;
  890. }
  891. return true;
  892. }
  893. static bool ParseOptions ( const JsonObj_c & tOptions, CSphQuery & tQuery, CSphString & sError )
  894. {
  895. if ( !tOptions.IsObj() )
  896. {
  897. sError = "\"options\" property value should be an object";
  898. return false;
  899. }
  900. for ( const auto & i : tOptions )
  901. {
  902. AddOption_e eAdd = AddOption_e::NOT_FOUND;
  903. CSphString sOpt = i.Name();
  904. if ( i.IsInt() || i.IsBool() )
  905. eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.IntVal(), STMT_SELECT, sError );
  906. else if ( i.IsStr() )
  907. {
  908. CSphString sRanker = i.StrVal();
  909. const char * szRanker = sRanker.cstr();
  910. while ( sphIsAlpha(*szRanker) )
  911. szRanker++;
  912. if ( *szRanker=='(' && sRanker.Ends(")") )
  913. {
  914. int iRankerNameLen = szRanker-sRanker.cstr();
  915. CSphString sExpr = sRanker.SubString (iRankerNameLen+1, sRanker.Length()-iRankerNameLen-2 );
  916. sExpr.Unquote();
  917. sRanker = sRanker.SubString ( 0, iRankerNameLen );
  918. eAdd = ::AddOptionRanker ( tQuery, sOpt, sRanker, [sExpr]{ return sExpr; }, STMT_SELECT, sError );
  919. }
  920. if ( eAdd==AddOption_e::NOT_FOUND )
  921. eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.StrVal(), [&i]{ return i.StrVal(); }, STMT_SELECT, sError );
  922. }
  923. else if ( i.IsObj() )
  924. {
  925. CSphVector<CSphNamedVariant> dNamed;
  926. for ( const auto & tNamed : i )
  927. {
  928. if ( !tNamed.IsInt() )
  929. {
  930. sError.SetSprintf ( "\"%s\" property of \"%s\"' option should be integer", sOpt.cstr(), tNamed.Name() );
  931. return false;
  932. }
  933. auto& dNewNamed = dNamed.Add();
  934. dNewNamed.m_sKey = tNamed.Name();
  935. dNewNamed.m_iValue = tNamed.IntVal();
  936. dNewNamed.m_eType = VariantType_e::BIGINT;
  937. }
  938. eAdd = ::AddOption ( tQuery, sOpt, dNamed, STMT_SELECT, sError );
  939. }
  940. if ( eAdd==AddOption_e::NOT_FOUND )
  941. {
  942. sError.SetSprintf ( "unknown option '%s'", sOpt.cstr () );
  943. return false;
  944. }
  945. else if ( eAdd==AddOption_e::FAILED )
  946. return false;
  947. }
  948. return true;
  949. }
  950. static bool ParseOptions ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery, CSphString & sError )
  951. {
  952. CSphQuery & tQuery = tPJQuery.m_tQuery;
  953. // different from SQL: in sql it is requested by default
  954. tQuery.m_tScrollSettings.m_bRequested = false;
  955. JsonObj_c tOptions = tRoot.GetItem("options");
  956. if ( !tOptions )
  957. return true;
  958. if ( tQuery.m_eJoinType!=JoinType_e::NONE )
  959. for ( const auto & i : tOptions )
  960. {
  961. if ( !i.IsObj() )
  962. continue;
  963. CSphString sTable = i.Name();
  964. sTable.ToLower();
  965. StrVec_t dQueryIndexes;
  966. ParseIndexList ( tQuery.m_sIndexes, dQueryIndexes );
  967. bool bLeftTable = false;
  968. for ( const auto & i : dQueryIndexes )
  969. if ( sTable==i )
  970. {
  971. bLeftTable = true;
  972. break;
  973. }
  974. if ( bLeftTable )
  975. return ParseOptions ( i, tQuery, sError );
  976. if ( sTable==tQuery.m_sJoinIdx )
  977. return ParseOptions ( i, tPJQuery.m_tJoinQueryOptions, sError );
  978. sError.SetSprintf ( "Unknown table '%s' in OPTIONS", sTable.cstr() );
  979. return false;
  980. }
  981. return ParseOptions ( tOptions, tQuery, sError );
  982. }
  983. static bool FillQueryVec ( KnnSearchSettings_t & tKNN, const JsonObj_c & tQueryVec, CSphString & sError )
  984. {
  985. for ( const auto & tArrayItem : tQueryVec )
  986. {
  987. if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
  988. {
  989. sError = "\"query_vector\" items should be integer of float";
  990. return false;
  991. }
  992. tKNN.m_dVec.Add ( tArrayItem.FltVal() );
  993. }
  994. return true;
  995. }
  996. static bool ParseKNNQuery ( const JsonObj_c & tJson, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  997. {
  998. if ( !tJson )
  999. return true;
  1000. if ( !tJson.IsObj() )
  1001. {
  1002. sError = "\"knn\" property value should be an object";
  1003. return false;
  1004. }
  1005. auto & tKNN = tQuery.m_tKnnSettings;
  1006. if ( !tJson.FetchStrItem ( tKNN.m_sAttr, "field", sError ) ) return false;
  1007. JsonObj_c tK = tJson.GetIntItem ( "k", sError, true );
  1008. if ( !sError.IsEmpty() )
  1009. return false;
  1010. if ( tK )
  1011. {
  1012. tKNN.m_iK = (int)tK.IntVal();
  1013. if ( tKNN.m_iK <= 0 )
  1014. {
  1015. sError = "k parameter must be positive";
  1016. return false;
  1017. }
  1018. }
  1019. else
  1020. tKNN.m_iK = -1;
  1021. if ( !tJson.FetchIntItem ( tKNN.m_iEf, "ef", sError, true ) ) return false;
  1022. if ( tKNN.m_iEf < 0 )
  1023. {
  1024. sError = "ef parameter must be non-negative";
  1025. return false;
  1026. }
  1027. if ( !tJson.FetchBoolItem ( tKNN.m_bRescore, "rescore", sError, true ) ) return false;
  1028. if ( !tJson.FetchFltItem ( tKNN.m_fOversampling, "oversampling", sError, true ) ) return false;
  1029. if ( tKNN.m_fOversampling < 1.0f )
  1030. {
  1031. sError = "oversampling parameter must be >= 1.0";
  1032. return false;
  1033. }
  1034. JsonObj_c tQueryVec = tJson.GetArrayItem ( "query_vector", sError, true );
  1035. if ( tQueryVec )
  1036. {
  1037. if ( !FillQueryVec ( tKNN, tQueryVec, sError ) )
  1038. return false;
  1039. }
  1040. else
  1041. {
  1042. // mayber a "query" is present?
  1043. JsonObj_c tQuery = tJson.GetItem("query");
  1044. if ( !tQuery )
  1045. return false;
  1046. if ( tQuery.IsArray() )
  1047. {
  1048. if ( !FillQueryVec ( tKNN, tQuery, sError ) )
  1049. return false;
  1050. }
  1051. else if ( tQuery.IsStr() )
  1052. tKNN.m_sEmbStr = tQuery.StrVal();
  1053. else
  1054. {
  1055. sError = "\"query\" property value should be string or a vector";
  1056. return false;
  1057. }
  1058. }
  1059. return true;
  1060. }
  1061. static bool ParseOnCond ( const JsonObj_c & tRoot, CSphString & sIdx, CSphString & sAttr, ESphAttr & eType, CSphString & sError )
  1062. {
  1063. CSphString sType;
  1064. if ( !tRoot.FetchStrItem ( sIdx, "table", sError ) ) return false;
  1065. if ( !tRoot.FetchStrItem ( sAttr, "field", sError ) ) return false;
  1066. if ( !tRoot.FetchStrItem ( sType, "type", sError, true ) ) return false;
  1067. if ( !sType.IsEmpty() )
  1068. {
  1069. if ( sType=="int" || sType=="integer" )
  1070. eType = SPH_ATTR_INTEGER;
  1071. else if ( sType=="float" )
  1072. eType = SPH_ATTR_FLOAT;
  1073. else if ( sType=="string" )
  1074. eType = SPH_ATTR_STRING;
  1075. else
  1076. {
  1077. sError.SetSprintf ( "unknown \"type\" value: \"%s\"", sType.cstr() );
  1078. return false;
  1079. }
  1080. }
  1081. return true;
  1082. }
  1083. static bool ParseOnFilter ( const JsonObj_c & tRoot, OnFilter_t & tOnFilter, CSphString & sError )
  1084. {
  1085. if ( !tRoot.IsObj() )
  1086. {
  1087. sError = "\"on\" items should be objects";
  1088. return false;
  1089. }
  1090. CSphString sOp;
  1091. if ( !tRoot.FetchStrItem ( sOp, "operator", sError ) )
  1092. return false;
  1093. if ( sOp!="eq" )
  1094. {
  1095. sError = "Unknown \"operator\" value";
  1096. return false;
  1097. }
  1098. JsonObj_c tLeft = tRoot.GetObjItem ( "left", sError );
  1099. if ( !tLeft )
  1100. return false;
  1101. JsonObj_c tRight = tRoot.GetObjItem ( "right", sError );
  1102. if ( !tRight )
  1103. return false;
  1104. if ( !ParseOnCond ( tLeft, tOnFilter.m_sIdx1, tOnFilter.m_sAttr1, tOnFilter.m_eTypeCast1, sError ) )
  1105. return false;
  1106. if ( !ParseOnCond ( tRight, tOnFilter.m_sIdx2, tOnFilter.m_sAttr2, tOnFilter.m_eTypeCast2, sError ) )
  1107. return false;
  1108. return true;
  1109. }
  1110. static bool ParseJoin ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  1111. {
  1112. JsonObj_c tJoin = tRoot.GetArrayItem ( "join", sError, true );
  1113. if ( !tJoin )
  1114. return true;
  1115. int iNumJoins = 0;
  1116. for ( const auto & tJoinItem : tJoin )
  1117. {
  1118. if ( iNumJoins>0 )
  1119. {
  1120. sError = "Only single table joins are currently supported";
  1121. return false;
  1122. }
  1123. CSphString sJoinType;
  1124. if ( !tJoinItem.FetchStrItem ( sJoinType, "type", sError ) )
  1125. return false;
  1126. if ( sJoinType=="inner" )
  1127. tQuery.m_eJoinType = JoinType_e::INNER;
  1128. else if ( sJoinType=="left" )
  1129. tQuery.m_eJoinType = JoinType_e::LEFT;
  1130. else
  1131. {
  1132. sError.SetSprintf ( "unknown join type '%s'", sJoinType.cstr() );
  1133. return false;
  1134. }
  1135. if ( !tJoinItem.FetchStrItem ( tQuery.m_sJoinIdx, "table", sError ) )
  1136. return false;
  1137. JsonObj_c tMatchQuery = tJoinItem.GetObjItem ( "query", sError, true );
  1138. if ( tMatchQuery )
  1139. {
  1140. CSphQuery tStubQuery;
  1141. CSphString sStubError, sStubWarning;
  1142. if ( !ParseJsonQueryFilters ( tMatchQuery, tStubQuery, sStubError, sStubWarning ) || tStubQuery.m_dFilters.GetLength() )
  1143. {
  1144. sError = "only fulltext is allowed in joined queries; place filters in the main query";
  1145. return false;
  1146. }
  1147. tQuery.m_sJoinQuery = tMatchQuery.AsString();
  1148. }
  1149. JsonObj_c tOn = tJoinItem.GetArrayItem ( "on", sError );
  1150. if ( !tOn )
  1151. return false;
  1152. for ( const auto & tCond : tOn )
  1153. {
  1154. OnFilter_t tOnFilter;
  1155. if ( !ParseOnFilter ( tCond, tOnFilter, sError ) )
  1156. return false;
  1157. tQuery.m_dOnFilters.Add(tOnFilter);
  1158. }
  1159. iNumJoins++;
  1160. }
  1161. return true;
  1162. }
  1163. bool sphParseJsonQuery ( Str_t sQuery, ParsedJsonQuery_t & tPJQuery )
  1164. {
  1165. JsonObj_c tRoot ( sQuery );
  1166. tPJQuery.m_tQuery.m_sRawQuery = sQuery;
  1167. return sphParseJsonQuery ( tRoot, tPJQuery );
  1168. }
  1169. bool sphParseJsonQuery ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery )
  1170. {
  1171. TlsMsg::ResetErr();
  1172. if ( !tRoot )
  1173. return TlsMsg::Err ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1174. TLS_MSG_STRING ( sError );
  1175. JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
  1176. if ( !tIndex )
  1177. {
  1178. tIndex = tRoot.GetStrItem ( "index", sError, true );
  1179. if ( !tIndex )
  1180. return false;
  1181. sError = "";
  1182. }
  1183. auto & tQuery = tPJQuery.m_tQuery;
  1184. tQuery.m_sIndexes = tIndex.StrVal();
  1185. if ( tQuery.m_sIndexes==g_szAll )
  1186. tQuery.m_sIndexes = "*";
  1187. if ( !ParseLimits ( tRoot, tQuery, sError ) )
  1188. return false;
  1189. JsonObj_c tJsonQuery = tRoot.GetItem("query");
  1190. JsonObj_c tKNNQuery = tRoot.GetItem("knn");
  1191. if ( tJsonQuery && tKNNQuery )
  1192. return TlsMsg::Err ( "\"query\" can't be used together with \"knn\"" );
  1193. // common code used by search queries and update/delete by query
  1194. if ( !ParseJsonQueryFilters ( tJsonQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1195. return false;
  1196. if ( !ParseKNNQuery ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1197. return false;
  1198. if ( tKNNQuery && !ParseJsonQueryFilters ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1199. return false;
  1200. if ( !ParseJoin ( tRoot, tQuery, sError, tPJQuery.m_sWarning ) )
  1201. return false;
  1202. if ( !ParseOptions ( tRoot, tPJQuery, sError ) )
  1203. return false;
  1204. if ( !tRoot.FetchBoolItem ( tPJQuery.m_bProfile, "profile", sError, true ) )
  1205. return false;
  1206. if ( !tRoot.FetchIntItem ( tPJQuery.m_iPlan, "plan", sError, true ) )
  1207. return false;
  1208. // expression columns go first to select list
  1209. JsonObj_c tScriptFields = tRoot.GetItem ( "script_fields" );
  1210. if ( tScriptFields && !ParseScriptFields ( tScriptFields, tQuery, sError ) )
  1211. return false;
  1212. // a synonym to "script_fields"
  1213. JsonObj_c tExpressions = tRoot.GetItem ( "expressions" );
  1214. if ( tExpressions && !ParseExpressions ( tExpressions, tQuery, sError ) )
  1215. return false;
  1216. JsonObj_c tSnip = tRoot.GetObjItem ( "highlight", sError, true );
  1217. if ( tSnip )
  1218. {
  1219. if ( !ParseSnippet ( tSnip, tQuery, sError ) )
  1220. return false;
  1221. }
  1222. else if ( !sError.IsEmpty() )
  1223. return false;
  1224. JsonObj_c tSort = tRoot.GetItem("sort");
  1225. if ( tSort && !( tSort.IsArray() || tSort.IsObj() ) )
  1226. {
  1227. sError = "\"sort\" property value should be an array or an object";
  1228. return false;
  1229. }
  1230. if ( tSort )
  1231. {
  1232. bool bGotWeight = false;
  1233. if ( !ParseSort ( tSort, tQuery, bGotWeight, sError, tPJQuery.m_sWarning ) )
  1234. return false;
  1235. JsonObj_c tTrackScore = tRoot.GetBoolItem ( "track_scores", sError, true );
  1236. if ( !sError.IsEmpty() )
  1237. return false;
  1238. bool bTrackScore = tTrackScore && tTrackScore.BoolVal();
  1239. if ( !bGotWeight && !bTrackScore )
  1240. tQuery.m_eRanker = SPH_RANK_NONE;
  1241. }
  1242. else
  1243. {
  1244. // set defaults
  1245. tQuery.m_eSort = SPH_SORT_EXTENDED;
  1246. tQuery.m_sSortBy = "@weight desc";
  1247. tQuery.m_sOrderBy = "@weight desc";
  1248. }
  1249. // source \ select filter
  1250. JsonObj_c tSelect = tRoot.GetItem("_source");
  1251. bool bParsedSelect = ( !tSelect || ParseSelect ( tSelect, tQuery, sError ) );
  1252. if ( !bParsedSelect )
  1253. return false;
  1254. // docvalue_fields
  1255. JsonObj_c tDocFields = tRoot.GetItem ( "docvalue_fields" );
  1256. if ( tDocFields && !ParseDocFields ( tDocFields, tQuery, sError ) )
  1257. return false;
  1258. // aggs
  1259. JsonObj_c tAggs = tRoot.GetItem ( "aggs" );
  1260. if ( tAggs && !ParseAggregates ( tAggs, tQuery, sError ) )
  1261. return false;
  1262. if ( !SetupScroll ( tQuery, sError ) )
  1263. return false;
  1264. SetupKNNLimit(tQuery);
  1265. return true;
  1266. }
  1267. bool ParseJsonInsert ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1268. {
  1269. if ( !ParseIndexId ( tRoot, false, tStmt, tDocId, sError ) )
  1270. return false;
  1271. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1272. return false;
  1273. tStmt.m_dInsertSchema.Add ( sphGetDocidName() );
  1274. SqlInsert_t & tId = tStmt.m_dInsertValues.Add();
  1275. tId.m_iType = SqlInsert_t::CONST_INT;
  1276. tId.SetValueInt ( (uint64_t)tDocId, false );
  1277. // "doc" is optional
  1278. JsonObj_c tSource = tRoot.GetItem("doc");
  1279. return ParseJsonInsertSource ( tSource, tStmt, bReplace, sError );
  1280. }
  1281. static bool ParseJsonInsertSource ( const JsonObj_c & tSource, StrVec_t & dInsertSchema, CSphVector<SqlInsert_t> & dInsertValues, CSphString & sError )
  1282. {
  1283. if ( !tSource )
  1284. return true;
  1285. for ( const auto & tItem : tSource )
  1286. {
  1287. dInsertSchema.Add ( tItem.Name() );
  1288. dInsertSchema.Last().ToLower();
  1289. SqlInsert_t & tNewValue = dInsertValues.Add();
  1290. if ( tItem.IsStr() || tItem.IsNull() )
  1291. {
  1292. tNewValue.m_iType = ( tItem.IsStr() ? SqlInsert_t::QUOTED_STRING : SqlInsert_t::TOK_NULL );
  1293. tNewValue.m_sVal = tItem.StrVal();
  1294. } else if ( tItem.IsDbl() )
  1295. {
  1296. tNewValue.m_iType = SqlInsert_t::CONST_FLOAT;
  1297. tNewValue.m_fVal = tItem.FltVal();
  1298. } else if ( tItem.IsInt() || tItem.IsBool() || tItem.IsUint() )
  1299. {
  1300. tNewValue.m_iType = SqlInsert_t::CONST_INT;
  1301. tNewValue.SetValueInt ( tItem.IntVal() );
  1302. } else if ( tItem.IsArray() || tItem.IsObj() )
  1303. {
  1304. // could be either object or array
  1305. // all fit to JSON attribute
  1306. // array of int fits MVA attribute
  1307. tNewValue.m_sVal = tItem.AsString();
  1308. bool bMVA = false;
  1309. if ( tItem.IsArray() )
  1310. {
  1311. tNewValue.m_iType = SqlInsert_t::CONST_MVA;
  1312. tNewValue.m_pVals = new RefcountedVector_c<AttrValue_t>;
  1313. for ( const auto & tArrayItem : tItem )
  1314. {
  1315. if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
  1316. break;
  1317. tNewValue.m_pVals->Add ( { tArrayItem.IntVal(), tArrayItem.FltVal() } );
  1318. bMVA = true;
  1319. }
  1320. if ( !bMVA && !tItem.Size() )
  1321. bMVA = true;
  1322. }
  1323. if ( !bMVA )
  1324. {
  1325. tNewValue.m_iType = SqlInsert_t::QUOTED_STRING;
  1326. tNewValue.m_pVals = nullptr;
  1327. }
  1328. } else
  1329. {
  1330. sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
  1331. return false;
  1332. }
  1333. }
  1334. return true;
  1335. }
  1336. bool ParseJsonInsertSource ( const JsonObj_c & tSource, SqlStmt_t & tStmt, bool bReplace, CSphString & sError )
  1337. {
  1338. tStmt.m_eStmt = bReplace ? STMT_REPLACE : STMT_INSERT;
  1339. if ( !ParseJsonInsertSource ( tSource, tStmt.m_dInsertSchema, tStmt.m_dInsertValues, sError ) )
  1340. return false;
  1341. if ( !tStmt.CheckInsertIntegrity() )
  1342. {
  1343. sError = "wrong number of values";
  1344. return false;
  1345. }
  1346. return true;
  1347. }
  1348. bool sphParseJsonInsert ( Str_t sInsert, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1349. {
  1350. JsonObj_c tRoot ( sInsert );
  1351. return ParseJsonInsert ( tRoot, tStmt, tDocId, bReplace, sError );
  1352. }
  1353. static bool ParseUpdateDeleteQueries ( const JsonObj_c & tRoot, bool bDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1354. {
  1355. tStmt.m_tQuery.m_sSelect = "id";
  1356. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  1357. return false;
  1358. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1359. return false;
  1360. JsonObj_c tId = tRoot.GetItem ( "id" );
  1361. if ( tId )
  1362. {
  1363. if ( !ParseIndexId ( tRoot, bDelete, tStmt, tDocId, sError ) )
  1364. return false;
  1365. CSphFilterSettings & tFilter = tStmt.m_tQuery.m_dFilters.Add();
  1366. tFilter.m_eType = SPH_FILTER_VALUES;
  1367. if ( bDelete && tId.IsArray() )
  1368. {
  1369. for ( const auto & tItem : tId )
  1370. tFilter.m_dValues.Add ( tItem.IntVal() );
  1371. } else
  1372. {
  1373. tFilter.m_dValues.Add ( tId.IntVal() );
  1374. }
  1375. tFilter.m_sAttrName = "id";
  1376. tDocId = tFilter.m_dValues[0];
  1377. }
  1378. // "query" is optional
  1379. JsonObj_c tQuery = tRoot.GetItem("query");
  1380. if ( tQuery && tId )
  1381. {
  1382. sError = R"(both "id" and "query" specified)";
  1383. return false;
  1384. }
  1385. CSphString sWarning; // fixme: add to results
  1386. return ParseJsonQueryFilters ( tQuery, tStmt.m_tQuery, sError, sWarning );
  1387. }
  1388. bool ParseJsonUpdate ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1389. {
  1390. CSphAttrUpdate & tUpd = tStmt.AttrUpdate();
  1391. tStmt.m_eStmt = STMT_UPDATE;
  1392. if ( !ParseUpdateDeleteQueries ( tRoot, false, tStmt, tDocId, sError ) )
  1393. return false;
  1394. JsonObj_c tSource = tRoot.GetObjItem ( "doc", sError );
  1395. if ( !tSource )
  1396. return false;
  1397. CSphVector<int64_t> dMVA;
  1398. for ( const auto & tItem : tSource )
  1399. {
  1400. bool bFloat = tItem.IsNum();
  1401. bool bInt = tItem.IsInt();
  1402. bool bBool = tItem.IsBool();
  1403. bool bString = tItem.IsStr();
  1404. bool bArray = tItem.IsArray();
  1405. bool bObject = tItem.IsObj();
  1406. if ( !bFloat && !bInt && !bBool && !bString && !bArray && !bObject )
  1407. {
  1408. sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
  1409. return false;
  1410. }
  1411. CSphString sAttr = tItem.Name();
  1412. TypedAttribute_t & tTypedAttr = tUpd.m_dAttributes.Add();
  1413. tTypedAttr.m_sName = sAttr.ToLower();
  1414. if ( bInt || bBool )
  1415. {
  1416. int64_t iValue = tItem.IntVal();
  1417. tUpd.m_dPool.Add ( (DWORD)iValue );
  1418. auto uHi = (DWORD)( iValue>>32 );
  1419. if ( uHi )
  1420. {
  1421. tUpd.m_dPool.Add ( uHi );
  1422. tTypedAttr.m_eType = SPH_ATTR_BIGINT;
  1423. } else
  1424. tTypedAttr.m_eType = SPH_ATTR_INTEGER;
  1425. }
  1426. else if ( bFloat )
  1427. {
  1428. auto fValue = tItem.FltVal();
  1429. tUpd.m_dPool.Add ( sphF2DW ( fValue ) );
  1430. tTypedAttr.m_eType = SPH_ATTR_FLOAT;
  1431. }
  1432. else if ( bString || bObject )
  1433. {
  1434. CSphString sEncoded;
  1435. const char * szValue = tItem.SzVal();
  1436. if ( bObject )
  1437. {
  1438. sEncoded = tItem.AsString();
  1439. szValue = sEncoded.cstr();
  1440. }
  1441. auto iLength = (int) strlen ( szValue );
  1442. tUpd.m_dPool.Add ( tUpd.m_dBlobs.GetLength() );
  1443. tUpd.m_dPool.Add ( iLength );
  1444. if ( iLength )
  1445. {
  1446. BYTE * pBlob = tUpd.m_dBlobs.AddN ( iLength+2 ); // a couple of extra \0 for json parser to be happy
  1447. memcpy ( pBlob, szValue, iLength );
  1448. pBlob[iLength] = 0;
  1449. pBlob[iLength+1] = 0;
  1450. }
  1451. tTypedAttr.m_eType = SPH_ATTR_STRING;
  1452. } else if ( bArray )
  1453. {
  1454. dMVA.Resize ( 0 );
  1455. for ( const auto & tArrayItem : tItem )
  1456. {
  1457. if ( !tArrayItem.IsInt() )
  1458. {
  1459. sError = "MVA elements should be integers";
  1460. return false;
  1461. }
  1462. dMVA.Add ( tArrayItem.IntVal() );
  1463. }
  1464. dMVA.Uniq();
  1465. tUpd.m_dPool.Add ( dMVA.GetLength()*2 ); // as 64 bit stored into DWORD vector
  1466. tTypedAttr.m_eType = SPH_ATTR_UINT32SET;
  1467. for ( int64_t uVal : dMVA )
  1468. {
  1469. if ( uVal>UINT_MAX )
  1470. tTypedAttr.m_eType = SPH_ATTR_INT64SET;
  1471. *(( int64_t* ) tUpd.m_dPool.AddN ( 2 )) = uVal;
  1472. }
  1473. }
  1474. }
  1475. return true;
  1476. }
  1477. bool sphParseJsonUpdate ( Str_t sUpdate, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1478. {
  1479. JsonObj_c tRoot ( sUpdate );
  1480. return ParseJsonUpdate ( tRoot, tStmt, tDocId, sError );
  1481. }
  1482. static bool ParseJsonDelete ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1483. {
  1484. tStmt.m_eStmt = STMT_DELETE;
  1485. return ParseUpdateDeleteQueries ( tRoot, true, tStmt, tDocId, sError );
  1486. }
  1487. bool sphParseJsonDelete ( Str_t sDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1488. {
  1489. JsonObj_c tRoot ( sDelete );
  1490. return ParseJsonDelete ( tRoot, tStmt, tDocId, sError );
  1491. }
  1492. bool sphParseJsonStatement ( const char * szStmt, SqlStmt_t & tStmt, CSphString & sStmt, CSphString & sQuery, DocID_t & tDocId, CSphString & sError )
  1493. {
  1494. JsonObj_c tRoot ( szStmt );
  1495. if ( !tRoot )
  1496. {
  1497. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1498. return false;
  1499. }
  1500. JsonObj_c tJsonStmt = tRoot[0];
  1501. if ( !tJsonStmt )
  1502. {
  1503. sError = "no statement found";
  1504. return false;
  1505. }
  1506. sStmt = tJsonStmt.Name();
  1507. if ( !tJsonStmt.IsObj() )
  1508. {
  1509. sError.SetSprintf ( "statement %s should be an object", sStmt.cstr() );
  1510. return false;
  1511. }
  1512. if ( sStmt=="index" || sStmt=="replace" )
  1513. {
  1514. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, true, sError ) )
  1515. return false;
  1516. } else if ( sStmt=="create" || sStmt=="insert" )
  1517. {
  1518. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, false, sError ) )
  1519. return false;
  1520. } else if ( sStmt=="update" )
  1521. {
  1522. if ( !ParseJsonUpdate ( tJsonStmt, tStmt, tDocId, sError ) )
  1523. return false;
  1524. } else if ( sStmt=="delete" )
  1525. {
  1526. if ( !ParseJsonDelete ( tJsonStmt, tStmt, tDocId, sError ) )
  1527. return false;
  1528. } else
  1529. {
  1530. sError.SetSprintf ( "unknown bulk operation: %s", sStmt.cstr() );
  1531. return false;
  1532. }
  1533. sQuery = tJsonStmt.AsString();
  1534. return true;
  1535. }
  1536. //////////////////////////////////////////////////////////////////////////
  1537. static void PackedShortMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
  1538. {
  1539. auto dMVA = sphUnpackPtrAttr ( pMVA );
  1540. auto nValues = dMVA.second / sizeof ( DWORD );
  1541. auto pValues = ( const DWORD * ) dMVA.first;
  1542. for ( int i = 0; i<(int) nValues; ++i )
  1543. tOut.NtoA(pValues[i]);
  1544. }
  1545. static void PackedWideMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
  1546. {
  1547. auto dMVA = sphUnpackPtrAttr ( pMVA );
  1548. auto nValues = dMVA.second / sizeof ( int64_t );
  1549. auto pValues = ( const int64_t * ) dMVA.first;
  1550. for ( int i = 0; i<(int) nValues; ++i )
  1551. tOut.NtoA(pValues[i]);
  1552. }
  1553. static void PackedFloatVec2Json ( StringBuilder_c & tOut, const BYTE * pFV )
  1554. {
  1555. auto tFV = sphUnpackPtrAttr(pFV);
  1556. int iNumValues = tFV.second / sizeof(float);
  1557. auto pValues = (const float *)tFV.first;
  1558. for ( int i = 0; i<iNumValues; i++ )
  1559. tOut.FtoA(pValues[i]);
  1560. }
  1561. static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const CSphMatch & tMatch, const CSphAttrLocator & tLoc, int iMulti=1 )
  1562. {
  1563. switch ( eAttrType )
  1564. {
  1565. case SPH_ATTR_INTEGER:
  1566. case SPH_ATTR_TIMESTAMP:
  1567. case SPH_ATTR_TOKENCOUNT:
  1568. case SPH_ATTR_BIGINT:
  1569. tOut.NtoA ( tMatch.GetAttr(tLoc) * iMulti );
  1570. break;
  1571. case SPH_ATTR_UINT64:
  1572. tOut.NtoA ( (uint64_t)tMatch.GetAttr(tLoc) * iMulti );
  1573. break;
  1574. case SPH_ATTR_FLOAT:
  1575. tOut.FtoA ( tMatch.GetAttrFloat(tLoc) * iMulti );
  1576. break;
  1577. case SPH_ATTR_DOUBLE:
  1578. tOut.DtoA ( tMatch.GetAttrDouble(tLoc) * iMulti );
  1579. break;
  1580. case SPH_ATTR_BOOL:
  1581. tOut << ( tMatch.GetAttr ( tLoc ) ? "true" : "false" );
  1582. break;
  1583. case SPH_ATTR_UINT32SET_PTR:
  1584. case SPH_ATTR_INT64SET_PTR:
  1585. case SPH_ATTR_FLOAT_VECTOR_PTR:
  1586. {
  1587. auto _ = tOut.Array ();
  1588. const auto * pMVA = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1589. if ( eAttrType==SPH_ATTR_UINT32SET_PTR )
  1590. PackedShortMVA2Json ( tOut, pMVA );
  1591. else if ( eAttrType==SPH_ATTR_INT64SET_PTR )
  1592. PackedWideMVA2Json ( tOut, pMVA );
  1593. else
  1594. PackedFloatVec2Json ( tOut, pMVA );
  1595. }
  1596. break;
  1597. case SPH_ATTR_STRINGPTR:
  1598. {
  1599. const auto * pString = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1600. auto dString = sphUnpackPtrAttr ( pString );
  1601. // special process for legacy typed strings
  1602. if ( dString.second>1 && dString.first[dString.second-2]=='\0')
  1603. {
  1604. auto uSubtype = dString.first[dString.second-1];
  1605. dString.second -= 2;
  1606. switch ( uSubtype)
  1607. {
  1608. case 1: // ql
  1609. {
  1610. ScopedComma_c sBrackets ( tOut, nullptr, R"({"ql":)", "}" );
  1611. tOut.AppendEscapedWithComma (( const char* ) dString.first, dString.second);
  1612. break;
  1613. }
  1614. case 0: // json
  1615. tOut << ( const char* ) dString.first;
  1616. break;
  1617. default:
  1618. tOut.Sprintf ("\"internal error! wrong subtype of stringptr %d\"", uSubtype );
  1619. }
  1620. break;
  1621. }
  1622. tOut.AppendEscapedWithComma ( ( const char * ) dString.first, dString.second );
  1623. }
  1624. break;
  1625. case SPH_ATTR_JSON_PTR:
  1626. {
  1627. const auto * pJSON = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1628. auto dJson = sphUnpackPtrAttr ( pJSON );
  1629. // no object at all? return NULL
  1630. if ( IsEmpty ( dJson ) )
  1631. tOut << "null";
  1632. else
  1633. sphJsonFormat ( tOut, dJson.first );
  1634. }
  1635. break;
  1636. case SPH_ATTR_FACTORS:
  1637. case SPH_ATTR_FACTORS_JSON:
  1638. {
  1639. const auto * pFactors = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1640. auto dFactors = sphUnpackPtrAttr ( pFactors );
  1641. if ( IsEmpty ( dFactors ))
  1642. tOut << "null";
  1643. else
  1644. sphFormatFactors ( tOut, (const unsigned int *) dFactors.first, true );
  1645. }
  1646. break;
  1647. case SPH_ATTR_JSON_FIELD_PTR:
  1648. {
  1649. const auto * pField = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1650. auto dField = sphUnpackPtrAttr ( pField );
  1651. if ( IsEmpty ( dField ))
  1652. {
  1653. tOut << "null";
  1654. break;
  1655. }
  1656. auto eJson = ESphJsonType ( *dField.first++ );
  1657. if ( eJson==JSON_NULL )
  1658. tOut << "null";
  1659. else
  1660. sphJsonFieldFormat ( tOut, dField.first, eJson, true );
  1661. }
  1662. break;
  1663. default:
  1664. assert ( 0 && "Unknown attribute" );
  1665. break;
  1666. }
  1667. }
  1668. static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const char * szCol, const CSphMatch & tMatch, const CSphAttrLocator & tLoc )
  1669. {
  1670. assert ( sphPlainAttrToPtrAttr ( eAttrType )==eAttrType );
  1671. tOut.AppendName ( szCol );
  1672. JsonObjAddAttr ( tOut, eAttrType, tMatch, tLoc );
  1673. }
  1674. static bool IsHighlightAttr ( const CSphString & sName )
  1675. {
  1676. return sName.Begins ( g_szHighlight );
  1677. }
  1678. static CSphString GetJoinedWeightName ( const CSphQuery & tQuery )
  1679. {
  1680. CSphString sWeight;
  1681. sWeight.SetSprintf ( "%s.weight()", tQuery.m_sJoinIdx.cstr() );
  1682. return sWeight;
  1683. }
  1684. static bool IsJoinedWeight ( const CSphString & sAttr, const CSphQuery & tQuery )
  1685. {
  1686. if ( tQuery.m_sJoinIdx.IsEmpty() )
  1687. return false;
  1688. return sAttr==GetJoinedWeightName(tQuery);
  1689. }
  1690. static bool NeedToSkipAttr ( const CSphString & sName, const CSphQuery & tQuery )
  1691. {
  1692. const char * szName = sName.cstr();
  1693. if ( szName[0]=='i' && szName[1]=='d' && szName[2]=='\0' ) return true;
  1694. if ( sName.Begins ( g_szHighlight ) ) return true;
  1695. if ( sName.Begins ( GetFilterAttrPrefix() ) ) return true;
  1696. if ( sName.Begins ( g_szOrder ) ) return true;
  1697. if ( sName.Begins ( GetKnnDistAttrName() ) ) return true;
  1698. if ( IsJoinedWeight ( sName, tQuery ) ) return true;
  1699. if ( !tQuery.m_dIncludeItems.GetLength() && !tQuery.m_dExcludeItems.GetLength () )
  1700. return false;
  1701. // empty include - shows all select list items
  1702. // exclude with only "*" - skip all select list items
  1703. bool bInclude = ( tQuery.m_dIncludeItems.GetLength()==0 );
  1704. for ( const auto &iItem: tQuery.m_dIncludeItems )
  1705. {
  1706. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1707. {
  1708. bInclude = true;
  1709. break;
  1710. }
  1711. }
  1712. if ( bInclude && tQuery.m_dExcludeItems.GetLength() )
  1713. {
  1714. for ( const auto& iItem: tQuery.m_dExcludeItems )
  1715. {
  1716. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1717. {
  1718. bInclude = false;
  1719. break;
  1720. }
  1721. }
  1722. }
  1723. return !bInclude;
  1724. }
  1725. namespace { // static
  1726. void EncodeHighlight ( const CSphMatch & tMatch, int iAttr, const ISphSchema & tSchema, JsonEscapedBuilder & tOut )
  1727. {
  1728. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1729. ScopedComma_c tHighlightComma ( tOut, ",", R"("highlight":{)", "}", false );
  1730. auto dSnippet = sphUnpackPtrAttr ((const BYTE *) tMatch.GetAttr ( tCol.m_tLocator ));
  1731. SnippetResult_t tRes = UnpackSnippetData ( dSnippet );
  1732. for ( const auto & tField : tRes.m_dFields )
  1733. {
  1734. tOut.AppendName ( tField.m_sName.cstr() );
  1735. ScopedComma_c tHighlight ( tOut, ",", "[", "]", false );
  1736. // we might want to add passage separators to field text here
  1737. for ( const auto & tPassage : tField.m_dPassages )
  1738. tOut.AppendEscapedWithComma ( (const char *)tPassage.m_dText.Begin(), tPassage.m_dText.GetLength() );
  1739. }
  1740. }
  1741. static const char * GetName ( const CSphString & sName )
  1742. {
  1743. return sName.cstr();
  1744. }
  1745. static const char * GetName ( const JsonDocField_t & tDF )
  1746. {
  1747. return tDF.m_sName.cstr();
  1748. }
  1749. template <typename T>
  1750. void EncodeFields ( const CSphVector<T> & dFields, const AggrResult_t & tRes, const CSphMatch & tMatch, const ISphSchema & tSchema, bool bValArray, const char * sPrefix, const char * sEnd, JsonEscapedBuilder & tOut )
  1751. {
  1752. JsonEscapedBuilder tDFVal;
  1753. tOut.StartBlock ( ",", sPrefix, sEnd );
  1754. for ( const T & tDF : dFields )
  1755. {
  1756. const CSphColumnInfo * pCol = tSchema.GetAttr ( GetName ( tDF ) );
  1757. if ( !pCol )
  1758. {
  1759. tOut += R"("Default")";
  1760. continue;
  1761. }
  1762. // FIXME!!! add format support
  1763. tDFVal.Clear();
  1764. JsonObjAddAttr ( tDFVal, pCol->m_eAttrType, tMatch, pCol->m_tLocator );
  1765. if ( bValArray )
  1766. tOut.Sprintf ( "%s", tDFVal.cstr() );
  1767. else
  1768. tOut.Sprintf ( R"("%s":["%s"])", GetName ( tDF ), tDFVal.cstr() );
  1769. }
  1770. tOut.FinishBlock ( false ); // close obj
  1771. }
  1772. struct CompositeLocator_t
  1773. {
  1774. ESphAttr m_eAttrType = SPH_ATTR_NONE;
  1775. CSphAttrLocator m_tLocator;
  1776. const char * m_sName = nullptr;
  1777. CompositeLocator_t ( const CSphColumnInfo & tCol, const char * sName )
  1778. : m_eAttrType ( tCol.m_eAttrType )
  1779. , m_tLocator ( tCol.m_tLocator )
  1780. , m_sName ( sName )
  1781. {}
  1782. CompositeLocator_t() = default;
  1783. };
  1784. struct AggrKeyTrait_t
  1785. {
  1786. const CSphColumnInfo * m_pKey = nullptr;
  1787. CSphVector<CompositeLocator_t> m_dCompositeKeys;
  1788. bool m_bKeyed = false;
  1789. RangeNameHash_t m_tRangeNames;
  1790. };
  1791. static bool GetAggrKey ( const JsonAggr_t & tAggr, const CSphSchema & tSchema, int iAggrItem, int iNow, AggrKeyTrait_t & tRes )
  1792. {
  1793. if ( tAggr.m_eAggrFunc==Aggr_e::NONE )
  1794. {
  1795. tRes.m_pKey = tSchema.GetAttr ( tAggr.m_sCol.cstr() );
  1796. } else if ( tAggr.m_eAggrFunc==Aggr_e::COMPOSITE )
  1797. {
  1798. for ( const auto & tItem : tAggr.m_dComposite )
  1799. {
  1800. const CSphColumnInfo * pCol = tSchema.GetAttr ( tItem.m_sColumn.cstr() );
  1801. CSphString sJsonCol;
  1802. if ( !pCol && sphJsonNameSplit ( tItem.m_sColumn.cstr(), nullptr, &sJsonCol ) )
  1803. pCol = tSchema.GetAttr ( sJsonCol.cstr() );
  1804. if ( !pCol )
  1805. return false;
  1806. tRes.m_dCompositeKeys.Add ( CompositeLocator_t ( *pCol, tItem.m_sAlias.cstr() ) );
  1807. }
  1808. } else
  1809. {
  1810. tRes.m_pKey = tSchema.GetAttr ( GetAggrName ( iAggrItem, tAggr.m_sCol ).cstr() );
  1811. switch ( tAggr.m_eAggrFunc )
  1812. {
  1813. case Aggr_e::RANGE:
  1814. GetRangeKeyNames ( tAggr.m_tRange, tRes.m_tRangeNames );
  1815. tRes.m_bKeyed = tAggr.m_tRange.m_bKeyed;
  1816. break;
  1817. case Aggr_e::DATE_RANGE:
  1818. GetRangeKeyNames ( tAggr.m_tDateRange, iNow, tRes.m_tRangeNames );
  1819. tRes.m_bKeyed = tAggr.m_tDateRange.m_bKeyed;
  1820. break;
  1821. case Aggr_e::HISTOGRAM:
  1822. tRes.m_bKeyed = tAggr.m_tHist.m_bKeyed;
  1823. break;
  1824. case Aggr_e::DATE_HISTOGRAM:
  1825. tRes.m_bKeyed = tAggr.m_tDateHist.m_bKeyed;
  1826. break;
  1827. default:
  1828. break;
  1829. }
  1830. }
  1831. return ( tRes.m_pKey || tRes.m_dCompositeKeys.GetLength() );
  1832. }
  1833. static const char * GetBucketPrefix ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, JsonEscapedBuilder & tPrefixBucketBlock )
  1834. {
  1835. const char * sPrefix = "{";
  1836. if ( tKey.m_bKeyed )
  1837. {
  1838. switch ( eAggrFunc )
  1839. {
  1840. case Aggr_e::RANGE:
  1841. case Aggr_e::DATE_RANGE:
  1842. {
  1843. tPrefixBucketBlock.Clear();
  1844. tPrefixBucketBlock.Appendf ( "\"%s\":{", pRange->m_sKey.cstr() );
  1845. sPrefix = tPrefixBucketBlock.cstr();
  1846. }
  1847. break;
  1848. case Aggr_e::HISTOGRAM:
  1849. {
  1850. tPrefixBucketBlock.Clear();
  1851. tPrefixBucketBlock.Appendf ( "\"");
  1852. JsonObjAddAttr ( tPrefixBucketBlock, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
  1853. tPrefixBucketBlock.Appendf ( "\":{" );
  1854. sPrefix = tPrefixBucketBlock.cstr();
  1855. }
  1856. break;
  1857. case Aggr_e::DATE_HISTOGRAM:
  1858. {
  1859. tPrefixBucketBlock.Clear();
  1860. tPrefixBucketBlock.Appendf ( "\"");
  1861. time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1862. FormatDate ( tSrcTime, tPrefixBucketBlock );
  1863. tPrefixBucketBlock.Appendf ( "\":{" );
  1864. sPrefix = tPrefixBucketBlock.cstr();
  1865. }
  1866. break;
  1867. default: break;
  1868. }
  1869. }
  1870. return sPrefix;
  1871. }
  1872. static void PrintKey ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, JsonEscapedBuilder & tBuf, JsonEscapedBuilder & tOut )
  1873. {
  1874. if ( eAggrFunc==Aggr_e::DATE_RANGE )
  1875. {
  1876. if ( !tKey.m_bKeyed )
  1877. tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
  1878. if ( !pRange->m_sFrom.IsEmpty() )
  1879. tOut.Sprintf ( R"("from":"%s")", pRange->m_sFrom.cstr() );
  1880. if ( !pRange->m_sTo.IsEmpty() )
  1881. tOut.Sprintf ( R"("to":"%s")", pRange->m_sTo.cstr() );
  1882. } else if ( eAggrFunc==Aggr_e::RANGE )
  1883. {
  1884. if ( !tKey.m_bKeyed )
  1885. tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
  1886. if ( !pRange->m_sFrom.IsEmpty() )
  1887. tOut.Sprintf ( R"("from":%s)", pRange->m_sFrom.cstr() );
  1888. if ( !pRange->m_sTo.IsEmpty() )
  1889. tOut.Sprintf ( R"("to":%s)", pRange->m_sTo.cstr() );
  1890. } else if ( eAggrFunc==Aggr_e::DATE_HISTOGRAM )
  1891. {
  1892. tBuf.Clear();
  1893. JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
  1894. tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
  1895. tBuf.Clear();
  1896. time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1897. FormatDate ( tSrcTime, tBuf );
  1898. tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
  1899. } else if ( eAggrFunc==Aggr_e::COMPOSITE )
  1900. {
  1901. ScopedComma_c sBlock ( tOut, ",", R"("key":{)", "}" );
  1902. for ( const auto & tItem : tKey.m_dCompositeKeys )
  1903. JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, tMatch, tItem.m_tLocator );
  1904. } else if ( eFormat==ResultSetFormat_e::MntSearch )
  1905. {
  1906. JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "key", tMatch, tKey.m_pKey->m_tLocator );
  1907. } else
  1908. {
  1909. // FIXME!!! remove after proper data type added but now need to multiple datatime values by 1000 for compat aggs result set
  1910. int iMulti = 1;
  1911. if ( eFormat==ResultSetFormat_e::ES && hDatetime [ tKey.m_pKey->m_sName ] )
  1912. iMulti = 1000;
  1913. tBuf.Clear();
  1914. JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator, iMulti );
  1915. tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
  1916. if ( tKey.m_pKey->m_eAttrType==SPH_ATTR_STRINGPTR )
  1917. tOut.Sprintf ( R"("key_as_string":%s)", tBuf.cstr() );
  1918. else
  1919. tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
  1920. }
  1921. }
  1922. static VecTraits_T<CSphMatch> GetResultMatches ( const VecTraits_T<CSphMatch> & dMatches, const CSphSchema & tSchema, int iOff, int iCount, const JsonAggr_t & tAggr )
  1923. {
  1924. bool bHasCompositeAfter = ( dMatches.GetLength() && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && tAggr.m_dCompositeAfterKey.GetLength() );
  1925. if ( !bHasCompositeAfter )
  1926. return dMatches.Slice ( iOff, iCount );
  1927. CSphString sError;
  1928. CreateFilterContext_t tCtx;
  1929. tCtx.m_pFilters = &tAggr.m_dCompositeAfterKey;
  1930. tCtx.m_pMatchSchema = &tSchema;
  1931. tCtx.m_bScan = true;
  1932. if ( !sphCreateFilters ( tCtx, sError, sError ) || !sError.IsEmpty() )
  1933. {
  1934. sphWarning ( "failed to create \"after\" filter: %s", sError.cstr() );
  1935. return dMatches.Slice ( iOff, iCount );
  1936. }
  1937. int iFound = dMatches.GetFirst ( [&] ( const CSphMatch & tMatch ) { return tCtx.m_pFilter->Eval ( tMatch ); } );
  1938. if ( iOff<0 )
  1939. return dMatches.Slice ( iOff, iCount );
  1940. else
  1941. return dMatches.Slice ( iFound+1, iCount );
  1942. }
  1943. static bool IsSingleValue ( Aggr_e eAggr )
  1944. {
  1945. return ( eAggr==Aggr_e::MIN || eAggr==Aggr_e::MAX || eAggr==Aggr_e::SUM || eAggr==Aggr_e::AVG );
  1946. }
  1947. static void EncodeAggr ( const JsonAggr_t & tAggr, int iAggrItem, const AggrResult_t & tRes, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, int iNow, const CSphString & sDistinctName, JsonEscapedBuilder & tOut )
  1948. {
  1949. if ( tAggr.m_eAggrFunc==Aggr_e::COUNT )
  1950. return;
  1951. const CSphColumnInfo * pCount = tRes.m_tSchema.GetAttr ( "count(*)" );
  1952. AggrKeyTrait_t tKey;
  1953. bool bHasKey = GetAggrKey ( tAggr, tRes.m_tSchema, iAggrItem, iNow, tKey );
  1954. const CSphColumnInfo * pDistinct = nullptr;
  1955. if ( !sDistinctName.IsEmpty() )
  1956. pDistinct = tRes.m_tSchema.GetAttr ( sDistinctName.cstr() );
  1957. // might be null for empty result set
  1958. auto dMatches = GetResultMatches ( tRes.m_dResults.First().m_dMatches, tRes.m_tSchema, tRes.m_iOffset, tRes.m_iCount, tAggr );
  1959. CSphString sBucketName;
  1960. sBucketName.SetSprintf ( R"("%s":{)", tAggr.m_sBucketName.cstr() );
  1961. tOut.StartBlock ( ",", sBucketName.cstr(), "}" );
  1962. // aggr.significant
  1963. switch ( tAggr.m_eAggrFunc )
  1964. {
  1965. case Aggr_e::SIGNIFICANT: // FIXME!!! add support
  1966. tOut.Appendf ( "\"doc_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
  1967. tOut.Appendf ( "\"bg_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
  1968. break;
  1969. default: break;
  1970. }
  1971. // after_key for aggr.composite
  1972. if ( bHasKey && pCount && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && dMatches.GetLength() )
  1973. {
  1974. tOut.StartBlock ( ",", R"("after_key":{)", "}" );
  1975. for ( const auto & tItem : tKey.m_dCompositeKeys )
  1976. JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, dMatches.Last(), tItem.m_tLocator );
  1977. tOut.FinishBlock ( false ); // named bucket obj
  1978. }
  1979. if ( !IsSingleValue ( tAggr.m_eAggrFunc ) )
  1980. {
  1981. // buckets might be named objects or array
  1982. if ( tKey.m_bKeyed )
  1983. tOut.StartBlock ( ",", R"("buckets":{)", "}" );
  1984. else
  1985. tOut.StartBlock ( ",", R"("buckets":[)", "]" );
  1986. // might be null for empty result set
  1987. if ( bHasKey && pCount )
  1988. {
  1989. JsonEscapedBuilder tPrefixBucketBlock;
  1990. JsonEscapedBuilder tBufMatch;
  1991. for ( const CSphMatch & tMatch : dMatches )
  1992. {
  1993. RangeKeyDesc_t * pRange = nullptr;
  1994. if ( tAggr.m_eAggrFunc==Aggr_e::RANGE || tAggr.m_eAggrFunc==Aggr_e::DATE_RANGE )
  1995. {
  1996. int iBucket = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1997. pRange = tKey.m_tRangeNames ( iBucket );
  1998. // lets skip bucket with out of ranges index, ie _all
  1999. if ( !pRange )
  2000. continue;
  2001. }
  2002. // bucket item is array item or dict item
  2003. const char * sBucketPrefix = GetBucketPrefix ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, tPrefixBucketBlock );
  2004. ScopedComma_c sBucketBlock ( tOut, ",", sBucketPrefix, "}" );
  2005. PrintKey ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, eFormat, hDatetime, tBufMatch, tOut );
  2006. JsonObjAddAttr ( tOut, pCount->m_eAttrType, "doc_count", tMatch, pCount->m_tLocator );
  2007. // FIXME!!! add support
  2008. if ( tAggr.m_eAggrFunc==Aggr_e::SIGNIFICANT )
  2009. {
  2010. tOut.Sprintf ( R"("score":0.001)" );
  2011. JsonObjAddAttr ( tOut, pCount->m_eAttrType, "bg_count", tMatch, pCount->m_tLocator );
  2012. }
  2013. if ( pDistinct )
  2014. JsonObjAddAttr ( tOut, pDistinct->m_eAttrType, pDistinct->m_sName.cstr(), tMatch, pDistinct->m_tLocator );
  2015. }
  2016. }
  2017. tOut.FinishBlock ( false ); // buckets array
  2018. } else
  2019. {
  2020. if ( bHasKey && pCount && dMatches.GetLength() )
  2021. {
  2022. const CSphMatch & tMatch = dMatches[0];
  2023. JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "value", tMatch, tKey.m_pKey->m_tLocator );
  2024. }
  2025. }
  2026. tOut.FinishBlock ( false ); // named bucket obj
  2027. }
  2028. void JsonRenderAccessSpecs ( JsonEscapedBuilder & tRes, const bson::Bson_c & tBson, bool bWithZones )
  2029. {
  2030. using namespace bson;
  2031. {
  2032. ScopedComma_c sFieldsArray ( tRes, ",", "\"fields\":[", "]" );
  2033. Bson_c ( tBson.ChildByName ( SZ_FIELDS ) ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
  2034. tRes.AppendEscapedWithComma ( String ( tNode ).cstr() );
  2035. } );
  2036. }
  2037. int iPos = (int)Int ( tBson.ChildByName ( SZ_MAX_FIELD_POS ) );
  2038. if ( iPos )
  2039. tRes.Sprintf ( "\"max_field_pos\":%d", iPos );
  2040. if ( !bWithZones )
  2041. return;
  2042. auto tZones = tBson.GetFirstOf ( { SZ_ZONES, SZ_ZONESPANS } );
  2043. ScopedComma_c dZoneDelim ( tRes, ", ", ( tZones.first==1 ) ? "\"zonespans\":[" : "\"zones\":[", "]" );
  2044. Bson_c ( tZones.second ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
  2045. tRes << String ( tNode );
  2046. } );
  2047. }
  2048. bool JsonRenderKeywordNode ( JsonEscapedBuilder & tRes, const bson::Bson_c& tBson )
  2049. {
  2050. using namespace bson;
  2051. auto tWord = tBson.ChildByName ( SZ_WORD );
  2052. if ( IsNullNode ( tWord ) )
  2053. return false;
  2054. ScopedComma_c sRoot ( tRes.Object() );
  2055. tRes << R"("type":"KEYWORD")";
  2056. tRes << "\"word\":";
  2057. tRes.AppendEscapedSkippingComma ( String ( tWord ).cstr () );
  2058. tRes.Sprintf ( R"("querypos":%d)", Int ( tBson.ChildByName ( SZ_QUERYPOS ) ) );
  2059. if ( Bool ( tBson.ChildByName ( SZ_EXCLUDED ) ) )
  2060. tRes << R"("excluded":true)";
  2061. if ( Bool ( tBson.ChildByName ( SZ_EXPANDED ) ) )
  2062. tRes << R"("expanded":true)";
  2063. if ( Bool ( tBson.ChildByName ( SZ_FIELD_START ) ) )
  2064. tRes << R"("field_start":true)";
  2065. if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
  2066. tRes << R"("field_end":true)";
  2067. if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
  2068. tRes << R"("morphed":true)";
  2069. auto tBoost = tBson.ChildByName ( SZ_BOOST );
  2070. if ( !IsNullNode ( tBoost ) )
  2071. {
  2072. auto fBoost = Double ( tBoost );
  2073. if ( fBoost!=1.0f ) // really comparing floats?
  2074. tRes.Sprintf ( R"("boost":%f)", fBoost );
  2075. }
  2076. return true;
  2077. }
  2078. void FormatJsonPlanFromBson ( JsonEscapedBuilder& tOut, bson::NodeHandle_t dBson, PLAN_FLAVOUR ePlanFlavour )
  2079. {
  2080. using namespace bson;
  2081. if ( dBson==nullnode )
  2082. return;
  2083. if ( ePlanFlavour == PLAN_FLAVOUR::EDESCR )
  2084. {
  2085. auto dRootBlock = tOut.ObjectBlock();
  2086. tOut << "\"description\":";
  2087. tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr() );
  2088. tOut.FinishBlocks ( dRootBlock );
  2089. return;
  2090. }
  2091. Bson_c tBson ( dBson );
  2092. if ( JsonRenderKeywordNode ( tOut, tBson) )
  2093. return;
  2094. auto dRootBlock = tOut.ObjectBlock();
  2095. tOut << "\"type\":";
  2096. tOut.AppendEscapedSkippingComma ( String ( tBson.ChildByName ( SZ_TYPE ) ).cstr() );
  2097. if ( ePlanFlavour==PLAN_FLAVOUR::EBOTH )
  2098. {
  2099. tOut << "\"description\":";
  2100. tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr () );
  2101. }
  2102. Bson_c ( tBson.ChildByName ( SZ_OPTIONS ) ).ForEach ( [&tOut] ( CSphString&& sName, const NodeHandle_t & tNode ) {
  2103. tOut.Sprintf ( R"("options":"%s=%d")", sName.cstr (), (int) Int ( tNode ) );
  2104. } );
  2105. JsonRenderAccessSpecs ( tOut, dBson, true );
  2106. tOut.StartBlock ( ",", "\"children\":[", "]" );
  2107. Bson_c ( tBson.ChildByName ( SZ_CHILDREN ) ).ForEach ( [&] ( const NodeHandle_t & tNode ) {
  2108. FormatJsonPlanFromBson ( tOut, tNode, ePlanFlavour );
  2109. } );
  2110. tOut.FinishBlocks ( dRootBlock );
  2111. }
  2112. } // static
  2113. CSphString JsonEncodeResultError ( const CSphString & sError, int iStatus )
  2114. {
  2115. JsonEscapedBuilder tOut;
  2116. CSphString sResult;
  2117. tOut.StartBlock ( ",", "{ \"error\":", "}" );
  2118. tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
  2119. tOut.AppendName ( "status" );
  2120. tOut << iStatus;
  2121. tOut.FinishBlock ( false );
  2122. tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
  2123. return sResult;
  2124. }
  2125. static CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType=nullptr, int * pStatus=nullptr, const char * sIndex=nullptr )
  2126. {
  2127. JsonEscapedBuilder tOut;
  2128. CSphString sResult;
  2129. tOut.StartBlock ( ",", "{", "}" );
  2130. tOut.StartBlock ( ",", R"("error":{)", "}" );
  2131. tOut.AppendName ( "type" );
  2132. tOut.AppendEscaped ( ( sErrorType ? sErrorType : "Error" ), EscBld::eEscape );
  2133. tOut.AppendName ( "reason" );
  2134. tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
  2135. if ( sIndex )
  2136. {
  2137. tOut.AppendName ( "table" );
  2138. tOut.AppendEscaped ( sIndex, EscBld::eEscape );
  2139. }
  2140. tOut.FinishBlock ( false );
  2141. if ( pStatus )
  2142. {
  2143. tOut.AppendName ( "status" );
  2144. tOut << *pStatus;
  2145. }
  2146. tOut.FinishBlock ( false );
  2147. tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
  2148. return sResult;
  2149. }
  2150. CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus )
  2151. {
  2152. return JsonEncodeResultError ( sError, sErrorType, &iStatus );
  2153. }
  2154. CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus, const char * sIndex )
  2155. {
  2156. return JsonEncodeResultError ( sError, sErrorType, &iStatus, sIndex );
  2157. }
  2158. CSphString HandleShowProfile ( const QueryProfile_c& p )
  2159. {
  2160. #define SPH_QUERY_STATE( _name, _desc ) _desc,
  2161. static const char* dStates[SPH_QSTATE_TOTAL] = { SPH_QUERY_STATES };
  2162. #undef SPH_QUERY_STATES
  2163. JsonEscapedBuilder sProfile;
  2164. int64_t tmTotal = 0;
  2165. int iCount = 0;
  2166. for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
  2167. {
  2168. if ( p.m_dSwitches[i] <= 0 )
  2169. continue;
  2170. tmTotal += p.m_tmTotal[i];
  2171. iCount += p.m_dSwitches[i];
  2172. }
  2173. {
  2174. auto arrayw = sProfile.ArrayW();
  2175. for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
  2176. {
  2177. if ( p.m_dSwitches[i] <= 0 )
  2178. continue;
  2179. auto _ = sProfile.ObjectW();
  2180. sProfile.NamedString ( "status", dStates[i] );
  2181. sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( p.m_tmTotal[i] ) );
  2182. sProfile.NamedVal ( "switches", p.m_dSwitches[i] );
  2183. sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( p.m_tmTotal[i], tmTotal, 2 ) ) );
  2184. }
  2185. {
  2186. auto _ = sProfile.ObjectW();
  2187. sProfile.NamedString ( "status", "total" );
  2188. sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( tmTotal ) );
  2189. sProfile.NamedVal ( "switches", iCount );
  2190. sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( tmTotal, tmTotal, 2 ) ) );
  2191. }
  2192. }
  2193. return (CSphString)sProfile;
  2194. }
  2195. static void AddJoinedWeight ( JsonEscapedBuilder & tOut, const CSphQuery & tQuery, const CSphMatch & tMatch, const CSphColumnInfo * pJoinedWeightAttr )
  2196. {
  2197. if ( !pJoinedWeightAttr )
  2198. return;
  2199. tOut.Sprintf ( R"("%s._score":%d)", tQuery.m_sJoinIdx.cstr(), (int)tMatch.GetAttr ( pJoinedWeightAttr->m_tLocator ) );
  2200. }
  2201. CSphString sphEncodeResultJson ( const VecTraits_T<AggrResult_t>& dRes, const JsonQuery_c & tQuery, QueryProfile_c * pProfile, ResultSetFormat_e eFormat )
  2202. {
  2203. assert ( dRes.GetLength()>=1 );
  2204. const AggrResult_t & tRes = dRes[0];
  2205. if ( !tRes.m_iSuccesses )
  2206. return JsonEncodeResultError ( tRes.m_sError );
  2207. JsonEscapedBuilder tOut;
  2208. CSphString sResult;
  2209. tOut.ObjectBlock();
  2210. tOut.Sprintf (R"("took":%d,"timed_out":false)", tRes.m_iQueryTime);
  2211. if ( !tRes.m_sWarning.IsEmpty() )
  2212. {
  2213. tOut.StartBlock ( nullptr, R"("warning":{"reason":)", "}" );
  2214. tOut.AppendEscapedWithComma ( tRes.m_sWarning.cstr () );
  2215. tOut.FinishBlock ( false );
  2216. }
  2217. if ( eFormat==ResultSetFormat_e::ES )
  2218. tOut += R"("_shards":{ "total": 1, "successful": 1, "skipped": 0, "failed": 0 })";
  2219. auto sHitMeta = tOut.StartBlock ( ",", R"("hits":{)", "}" );
  2220. tOut.Sprintf ( R"("total":%d)", tRes.m_iTotalMatches );
  2221. tOut.Sprintf ( R"("total_relation":%s)", tRes.m_bTotalMatchesApprox ? R"("gte")" : R"("eq")" );
  2222. if ( eFormat==ResultSetFormat_e::ES )
  2223. tOut += R"("max_score": null)";
  2224. const ISphSchema & tSchema = tRes.m_tSchema;
  2225. CSphVector<BYTE> dTmp;
  2226. CSphBitvec tAttrsToSend;
  2227. sphGetAttrsToSend ( tSchema, false, true, tAttrsToSend );
  2228. const CSphColumnInfo * pJoinedWeightAttr = tQuery.m_sJoinIdx.IsEmpty() ? nullptr : tSchema.GetAttr ( GetJoinedWeightName(tQuery).cstr() );
  2229. int iHighlightAttr = -1;
  2230. int nSchemaAttrs = tSchema.GetAttrsCount();
  2231. CSphBitvec dSkipAttrs ( nSchemaAttrs );
  2232. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  2233. {
  2234. if ( !tAttrsToSend.BitGet(iAttr) )
  2235. continue;
  2236. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  2237. const CSphString & sName = tCol.m_sName;
  2238. if ( IsHighlightAttr ( sName ) )
  2239. iHighlightAttr = iAttr;
  2240. if ( NeedToSkipAttr ( sName, tQuery ) )
  2241. dSkipAttrs.BitSet ( iAttr );
  2242. if ( eFormat==ResultSetFormat_e::ES && tCol.m_eAttrType==SPH_ATTR_TOKENCOUNT )
  2243. dSkipAttrs.BitSet ( iAttr );
  2244. }
  2245. tOut.StartBlock ( ",", R"("hits":[)", "]" );
  2246. if ( !tQuery.m_bGroupEmulation )
  2247. {
  2248. const CSphColumnInfo * pId = tSchema.GetAttr ( sphGetDocidName() );
  2249. const CSphColumnInfo * pKNNDist = tSchema.GetAttr ( GetKnnDistAttrName() );
  2250. bool bCompatId = false;
  2251. const CSphColumnInfo * pCompatRaw = nullptr;
  2252. const CSphColumnInfo * pCompatVer = nullptr;
  2253. if ( eFormat==ResultSetFormat_e::ES )
  2254. {
  2255. const CSphColumnInfo * pCompatId = tSchema.GetAttr ( "_id" );
  2256. if ( pCompatId )
  2257. {
  2258. bCompatId = true;
  2259. pId = pCompatId;
  2260. }
  2261. pCompatRaw = tSchema.GetAttr ( "_raw" );
  2262. pCompatVer = tSchema.GetAttr ( "_version" );
  2263. }
  2264. bool bTag = tRes.m_bTagsAssigned;
  2265. int iTag = ( bTag ? 0 : tRes.m_dResults.First().m_iTag );
  2266. auto dMatches = tRes.m_dResults.First ().m_dMatches.Slice ( tRes.m_iOffset, tRes.m_iCount );
  2267. for ( const auto & tMatch : dMatches )
  2268. {
  2269. ScopedComma_c sQueryComma ( tOut, ",", "{", "}" );
  2270. // note, that originally there is string UID, so we just output number in quotes for docid here
  2271. // number in quotes in compat mode or just number for _id
  2272. if ( bCompatId || ( eFormat==ResultSetFormat_e::ES ) )
  2273. {
  2274. DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
  2275. tOut.Sprintf ( R"("_id":"%llu","_score":%d)", tDocID, tMatch.m_iWeight );
  2276. }
  2277. else if ( pId )
  2278. {
  2279. DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
  2280. tOut.Sprintf ( R"("_id":%U,"_score":%d)", tDocID, tMatch.m_iWeight );
  2281. }
  2282. else
  2283. tOut.Sprintf ( R"("_score":%d)", tMatch.m_iWeight );
  2284. AddJoinedWeight ( tOut, tQuery, tMatch, pJoinedWeightAttr );
  2285. if ( eFormat==ResultSetFormat_e::ES )
  2286. {
  2287. tOut.Sprintf ( R"("_index":"%s")", tRes.m_dIndexNames[bTag ? tMatch.m_iTag : iTag].scstr() ); // FIXME!!! breaks for multiple indexes
  2288. tOut += R"("_type": "doc")";
  2289. if ( pCompatVer )
  2290. JsonObjAddAttr ( tOut, pCompatVer->m_eAttrType, "_version", tMatch, pCompatVer->m_tLocator );
  2291. else
  2292. tOut += R"("_version": 1)";
  2293. }
  2294. if ( pKNNDist )
  2295. tOut.Sprintf( R"("_knn_dist":%f)", tMatch.GetAttrFloat ( pKNNDist->m_tLocator ) );
  2296. tOut.StartBlock ( ",", "\"_source\":{", "}");
  2297. if ( pCompatRaw )
  2298. JsonObjAddAttr ( tOut, pCompatRaw->m_eAttrType, "_raw", tMatch, pCompatRaw->m_tLocator );
  2299. else
  2300. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  2301. {
  2302. if ( !tAttrsToSend.BitGet(iAttr) )
  2303. continue;
  2304. if ( dSkipAttrs.BitGet ( iAttr ) )
  2305. continue;
  2306. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  2307. JsonObjAddAttr ( tOut, tCol.m_eAttrType, tCol.m_sName.cstr(), tMatch, tCol.m_tLocator );
  2308. }
  2309. tOut.FinishBlock ( false ); // _source obj
  2310. if ( iHighlightAttr!=-1 )
  2311. EncodeHighlight ( tMatch, iHighlightAttr, tSchema, tOut );
  2312. if ( eFormat==ResultSetFormat_e::ES )
  2313. {
  2314. if ( tQuery.m_dDocFields.GetLength() )
  2315. EncodeFields ( tQuery.m_dDocFields, tRes, tMatch, tSchema, false, R"("fields":{)", "}", tOut );
  2316. if ( tQuery.m_dSortFields.GetLength() )
  2317. EncodeFields ( tQuery.m_dSortFields, tRes, tMatch, tSchema, true, R"("sort":[)", "]", tOut );
  2318. }
  2319. }
  2320. }
  2321. tOut.FinishBlocks ( sHitMeta, false ); // hits array, hits meta
  2322. if ( tQuery.m_bGroupEmulation || dRes.GetLength()>1 )
  2323. {
  2324. sph::StringSet hDatetime;
  2325. if ( eFormat==ResultSetFormat_e::ES )
  2326. {
  2327. tQuery.m_dDocFields.for_each ( [&hDatetime]( const auto & tDocfield )
  2328. {
  2329. if ( tDocfield.m_bDateTime )
  2330. hDatetime.Add ( tDocfield.m_sName );
  2331. });
  2332. }
  2333. CSphString sDistinctName;
  2334. tQuery.m_dItems.any_of ( [&]( const CSphQueryItem & tItem ) {
  2335. if ( tItem.m_sExpr=="@distinct" )
  2336. {
  2337. sDistinctName = tItem.m_sAlias;
  2338. return true;
  2339. }
  2340. return false;
  2341. });
  2342. if ( tQuery.m_bGroupEmulation )
  2343. {
  2344. tOut.StartBlock ( ",", R"("aggregations":{)", "}");
  2345. EncodeAggr ( tQuery.m_dAggs[0], 1, dRes[0], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
  2346. tOut.FinishBlock ( false ); // aggregations obj
  2347. } else
  2348. {
  2349. assert ( dRes.GetLength()==tQuery.m_dAggs.GetLength()+1 );
  2350. tOut.StartBlock ( ",", R"("aggregations":{)", "}");
  2351. ARRAY_FOREACH ( i, tQuery.m_dAggs )
  2352. EncodeAggr ( tQuery.m_dAggs[i], i, dRes[i+1], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
  2353. tOut.FinishBlock ( false ); // aggregations obj
  2354. }
  2355. }
  2356. CSphString sScroll;
  2357. if ( dRes.GetLength() && FormatScrollSettings ( dRes.Last(), tQuery, sScroll ) )
  2358. tOut.Sprintf ( R"("scroll":"%s")", sScroll.cstr() );
  2359. if ( eFormat==ResultSetFormat_e::ES )
  2360. tOut += R"("status": 200)";
  2361. if ( pProfile && pProfile->m_bNeedProfile )
  2362. {
  2363. auto sProfile = HandleShowProfile ( *pProfile );
  2364. tOut.Sprintf ( R"("profile":{"query":%s})", sProfile.cstr () );
  2365. }
  2366. if ( pProfile && pProfile->m_eNeedPlan != PLAN_FLAVOUR::ENONE )
  2367. {
  2368. JsonEscapedBuilder sPlan;
  2369. FormatJsonPlanFromBson ( sPlan, bson::MakeHandle ( pProfile->m_dPlan ), pProfile->m_eNeedPlan );
  2370. if ( sPlan.IsEmpty() )
  2371. tOut << R"("plan":null)";
  2372. else
  2373. tOut.Sprintf ( R"("plan":{"query":%s})", sPlan.cstr() );
  2374. }
  2375. tOut.FinishBlocks (); tOut.MoveTo ( sResult ); return sResult;
  2376. }
  2377. JsonObj_c sphEncodeInsertResultJson ( const char * szIndex, bool bReplace, DocID_t tDocId, ResultSetFormat_e eFormat )
  2378. {
  2379. JsonObj_c tObj;
  2380. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2381. tObj.AddUint ( "id", tDocId );
  2382. tObj.AddBool ( "created", !bReplace );
  2383. tObj.AddStr ( "result", bReplace ? "updated" : "created" );
  2384. tObj.AddInt ( "status", bReplace ? 200 : 201 );
  2385. return tObj;
  2386. }
  2387. JsonObj_c sphEncodeTxnResultJson ( const char* szIndex, DocID_t tDocId, int iInserts, int iDeletes, int iUpdates, ResultSetFormat_e eFormat )
  2388. {
  2389. JsonObj_c tObj;
  2390. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2391. tObj.AddInt ( "_id", tDocId );
  2392. tObj.AddInt ( "created", iInserts );
  2393. tObj.AddInt ( "deleted", iDeletes );
  2394. tObj.AddInt ( "updated", iUpdates );
  2395. bool bReplaced = (iInserts!=0 && iDeletes!=0);
  2396. tObj.AddStr ( "result", bReplaced ? "updated" : "created" );
  2397. tObj.AddInt ( "status", bReplaced ? 200 : 201 );
  2398. return tObj;
  2399. }
  2400. JsonObj_c sphEncodeUpdateResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
  2401. {
  2402. JsonObj_c tObj;
  2403. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2404. if ( !tDocId )
  2405. tObj.AddInt ( "updated", iAffected );
  2406. else
  2407. {
  2408. tObj.AddInt ( "id", tDocId );
  2409. tObj.AddStr ( "result", iAffected ? "updated" : "noop" );
  2410. }
  2411. return tObj;
  2412. }
  2413. JsonObj_c sphEncodeDeleteResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
  2414. {
  2415. JsonObj_c tObj;
  2416. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2417. if ( !tDocId )
  2418. tObj.AddInt ( "deleted", iAffected );
  2419. else
  2420. {
  2421. tObj.AddInt ( "id", tDocId );
  2422. tObj.AddBool ( "found", !!iAffected );
  2423. tObj.AddStr ( "result", iAffected ? "deleted" : "not found" );
  2424. }
  2425. return tObj;
  2426. }
  2427. JsonObj_c sphEncodeInsertErrorJson ( const char * szIndex, const char * szError, ResultSetFormat_e eFormat )
  2428. {
  2429. JsonObj_c tObj, tErr;
  2430. tErr.AddStr ( "type", szError );
  2431. tErr.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2432. tObj.AddItem ( "error", tErr );
  2433. tObj.AddInt ( "status", HttpGetStatusCodes ( EHTTP_STATUS::_409 ) );
  2434. return tObj;
  2435. }
  2436. bool sphGetResultStats ( const char * szResult, int & iAffected, int & iWarnings, bool bUpdate, CSphString & sError )
  2437. {
  2438. JsonObj_c tJsonRoot ( szResult );
  2439. if ( !tJsonRoot )
  2440. return false;
  2441. // no warnings in json results for now
  2442. iWarnings = 0;
  2443. CSphString sParseError;
  2444. if ( tJsonRoot.HasItem("error") )
  2445. {
  2446. JsonObj_c tReplyError = tJsonRoot.GetItem ( "error" );
  2447. if ( tReplyError.IsObj() )
  2448. {
  2449. JsonObj_c tReason = tReplyError.GetItem ( "reason" );
  2450. if ( tReason && tReason.IsStr() )
  2451. sError = tReason.StrVal();
  2452. } else if ( tReplyError.IsStr() )
  2453. sError = tReplyError.StrVal();
  2454. else if ( sError.IsEmpty() )
  2455. sError = tReplyError.AsString();
  2456. iAffected = 0;
  2457. return false;
  2458. }
  2459. // its either update or delete
  2460. JsonObj_c tAffected = tJsonRoot.GetIntItem ( bUpdate ? "updated" : "deleted", sParseError );
  2461. if ( tAffected )
  2462. {
  2463. iAffected = (int)tAffected.IntVal();
  2464. return true;
  2465. }
  2466. // it was probably a query with an "id"
  2467. JsonObj_c tId = tJsonRoot.GetIntItem ( "id", sParseError );
  2468. if ( tId )
  2469. {
  2470. iAffected = 1;
  2471. return true;
  2472. }
  2473. return false;
  2474. }
  2475. //////////////////////////////////////////////////////////////////////////
  2476. // Highlight
  2477. static void FormatSnippetOpts ( const CSphString & sQuery, const SnippetQuerySettings_t & tSnippetQuery, CSphQuery & tQuery )
  2478. {
  2479. StringBuilder_c sItem;
  2480. sItem << "HIGHLIGHT(";
  2481. sItem << tSnippetQuery.AsString();
  2482. sItem << ",";
  2483. auto & hFieldHash = tSnippetQuery.m_hPerFieldLimits;
  2484. if ( tSnippetQuery.m_hPerFieldLimits.GetLength() )
  2485. {
  2486. sItem.StartBlock ( ",", "'", "'" );
  2487. for ( const auto& tField : hFieldHash )
  2488. sItem << tField.first;
  2489. sItem.FinishBlock(false);
  2490. }
  2491. else
  2492. sItem << "''";
  2493. if ( !sQuery.IsEmpty() )
  2494. sItem.Appendf ( ",'%s'", sQuery.cstr() );
  2495. sItem << ")";
  2496. CSphQueryItem & tItem = tQuery.m_dItems.Add();
  2497. tItem.m_sExpr = sItem.cstr ();
  2498. tItem.m_sAlias.SetSprintf ( "%s", g_szHighlight );
  2499. }
  2500. static bool ParseFieldsArray ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2501. {
  2502. for ( const auto & tField : tFields )
  2503. {
  2504. if ( !tField.IsStr() )
  2505. {
  2506. sError.SetSprintf ( "\"%s\" field should be an string", tField.Name() );
  2507. return false;
  2508. }
  2509. SnippetLimits_t tDefault;
  2510. tSettings.m_hPerFieldLimits.Add( tDefault, tField.StrVal() );
  2511. }
  2512. return true;
  2513. }
  2514. static bool ParseSnippetLimitsElastic ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
  2515. {
  2516. if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "fragment_size", sError, true ) ) return false;
  2517. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "number_of_fragments", sError, true ) ) return false;
  2518. return true;
  2519. }
  2520. static bool ParseSnippetLimitsSphinx ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
  2521. {
  2522. if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "limit", sError, true ) ) return false;
  2523. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_passages", sError, true ) ) return false;
  2524. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_snippets", sError, true ) ) return false;
  2525. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitWords, "limit_words", sError, true ) ) return false;
  2526. return true;
  2527. }
  2528. static bool ParseFieldsObject ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2529. {
  2530. for ( const auto & tField : tFields )
  2531. {
  2532. if ( !tField.IsObj() )
  2533. {
  2534. sError.SetSprintf ( "\"%s\" field should be an object", tField.Name() );
  2535. return false;
  2536. }
  2537. SnippetLimits_t & tLimits = tSettings.m_hPerFieldLimits.AddUnique ( tField.Name() );
  2538. if ( !ParseSnippetLimitsElastic ( tField, tLimits, sError ) )
  2539. return false;
  2540. if ( !ParseSnippetLimitsSphinx ( tField, tLimits, sError ) )
  2541. return false;
  2542. }
  2543. return true;
  2544. }
  2545. static bool ParseSnippetFields ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2546. {
  2547. JsonObj_c tFields = tSnip.GetItem("fields");
  2548. if ( !tFields )
  2549. return true;
  2550. if ( tFields.IsArray() )
  2551. return ParseFieldsArray ( tFields, tSettings, sError );
  2552. if ( tFields.IsObj() )
  2553. return ParseFieldsObject ( tFields, tSettings, sError );
  2554. sError = R"("fields" property value should be an array or an object)";
  2555. return false;
  2556. }
  2557. static bool FetchTags ( const char * sName, const JsonObj_c & tSnip, CSphString & sVal, CSphString & sError )
  2558. {
  2559. JsonObj_c tTag = tSnip.GetItem ( sName );
  2560. if ( !tTag )
  2561. return true;
  2562. if ( tTag.IsStr() )
  2563. {
  2564. sVal = tTag.StrVal();
  2565. return true;
  2566. }
  2567. if ( tTag.IsArray() )
  2568. {
  2569. if ( tTag.Size() )
  2570. sVal = tTag[0].StrVal();
  2571. return true;
  2572. }
  2573. sError.SetSprintf ( R"("%s" property value should be an array or sting)", sName );
  2574. return false;
  2575. }
  2576. static bool ParseSnippetOptsElastic ( const JsonObj_c & tSnip, CSphString & sQuery, SnippetQuerySettings_t & tQuery, CSphString & sError )
  2577. {
  2578. JsonObj_c tEncoder = tSnip.GetStrItem ( "encoder", sError, true );
  2579. if ( tEncoder )
  2580. {
  2581. if ( tEncoder.StrVal()=="html" )
  2582. tQuery.m_sStripMode = "retain";
  2583. }
  2584. else if ( !sError.IsEmpty() )
  2585. return false;
  2586. JsonObj_c tHlQuery = tSnip.GetObjItem ( "highlight_query", sError, true );
  2587. if ( tHlQuery )
  2588. sQuery = tHlQuery.AsString();
  2589. else if ( !sError.IsEmpty() )
  2590. return false;
  2591. if ( !FetchTags ( "pre_tags", tSnip, tQuery.m_sBeforeMatch, sError ) ) return false;
  2592. if ( !FetchTags ( "post_tags", tSnip, tQuery.m_sAfterMatch, sError ) ) return false;
  2593. JsonObj_c tNoMatchSize = tSnip.GetItem ( "no_match_size" );
  2594. if ( tNoMatchSize )
  2595. {
  2596. int iNoMatch = 0;
  2597. if ( !tSnip.FetchIntItem ( iNoMatch, "no_match_size", sError, true ) )
  2598. return false;
  2599. tQuery.m_bAllowEmpty = iNoMatch<1;
  2600. }
  2601. JsonObj_c tOrder = tSnip.GetStrItem ( "order", sError, true );
  2602. if ( tOrder )
  2603. tQuery.m_bWeightOrder = tOrder.StrVal()=="score";
  2604. else if ( !sError.IsEmpty() )
  2605. return false;
  2606. if ( !ParseSnippetLimitsElastic ( tSnip, tQuery, sError ) )
  2607. return false;
  2608. return true;
  2609. }
  2610. static bool ParseSnippetOptsSphinx ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tOpt, CSphString & sError )
  2611. {
  2612. if ( !ParseSnippetLimitsSphinx ( tSnip, tOpt, sError ) )
  2613. return false;
  2614. if ( !tSnip.FetchStrItem ( tOpt.m_sBeforeMatch, "before_match", sError, true ) ) return false;
  2615. if ( !tSnip.FetchStrItem ( tOpt.m_sAfterMatch, "after_match", sError, true ) ) return false;
  2616. if ( !tSnip.FetchIntItem ( tOpt.m_iAround, "around", sError, true ) ) return false;
  2617. if ( !tSnip.FetchBoolItem ( tOpt.m_bUseBoundaries, "use_boundaries", sError, true ) ) return false;
  2618. if ( !tSnip.FetchBoolItem ( tOpt.m_bWeightOrder, "weight_order", sError, true ) ) return false;
  2619. if ( !tSnip.FetchBoolItem ( tOpt.m_bForceAllWords, "force_all_words", sError, true ) ) return false;
  2620. if ( !tSnip.FetchStrItem ( tOpt.m_sStripMode, "html_strip_mode", sError, true ) ) return false;
  2621. if ( !tSnip.FetchBoolItem ( tOpt.m_bAllowEmpty, "allow_empty", sError, true ) ) return false;
  2622. if ( !tSnip.FetchBoolItem ( tOpt.m_bEmitZones, "emit_zones", sError, true ) ) return false;
  2623. if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_passages", sError, true ) ) return false;
  2624. if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_snippets", sError, true ) ) return false;
  2625. if ( !tSnip.FetchBoolItem ( tOpt.m_bPackFields, "pack_fields", sError, true ) ) return false;
  2626. if ( !tSnip.FetchBoolItem ( tOpt.m_bLimitsPerField, "limits_per_field", sError, true ) )return false;
  2627. JsonObj_c tBoundary = tSnip.GetStrItem ( "passage_boundary", "snippet_boundary", sError );
  2628. if ( tBoundary )
  2629. tOpt.m_ePassageSPZ = GetPassageBoundary ( tBoundary.StrVal() );
  2630. else if ( !sError.IsEmpty() )
  2631. return false;
  2632. return true;
  2633. }
  2634. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError )
  2635. {
  2636. CSphString sQuery;
  2637. SnippetQuerySettings_t tSettings;
  2638. tSettings.m_bJsonQuery = true;
  2639. tSettings.m_bPackFields = true;
  2640. if ( !ParseSnippetFields ( tSnip, tSettings, sError ) )
  2641. return false;
  2642. // elastic-style options
  2643. if ( !ParseSnippetOptsElastic ( tSnip, sQuery, tSettings, sError ) )
  2644. return false;
  2645. // sphinx-style options
  2646. if ( !ParseSnippetOptsSphinx ( tSnip, tSettings, sError ) )
  2647. return false;
  2648. FormatSnippetOpts ( sQuery, tSettings, tQuery );
  2649. return true;
  2650. }
  2651. //////////////////////////////////////////////////////////////////////////
  2652. // Sort
  2653. struct SortField_t : public GeoDistInfo_c
  2654. {
  2655. CSphString m_sName;
  2656. CSphString m_sMode;
  2657. bool m_bAsc {true};
  2658. };
  2659. static void FormatSortBy ( const CSphVector<SortField_t> & dSort, JsonQuery_c & tQuery, bool & bGotWeight )
  2660. {
  2661. StringBuilder_c sSortBuf;
  2662. Comma_c sComma ({", ",2});
  2663. for ( const SortField_t &tItem : dSort )
  2664. {
  2665. const char * sSort = ( tItem.m_bAsc ? " asc" : " desc" );
  2666. if ( tItem.IsGeoDist() )
  2667. {
  2668. // ORDER BY statement
  2669. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  2670. // query item
  2671. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2672. tQueryItem.m_sExpr = tItem.BuildExprString();
  2673. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  2674. // select list
  2675. StringBuilder_c sTmp;
  2676. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  2677. sTmp.MoveTo ( tQuery.m_sSelect );
  2678. } else if ( tItem.m_sMode.IsEmpty() )
  2679. {
  2680. const char * sName = tItem.m_sName.cstr();
  2681. if ( tItem.m_sName=="_score" )
  2682. sName = "@weight";
  2683. else if ( tItem.m_sName=="_count" )
  2684. sName = "count(*)";
  2685. // sort by attribute or weight
  2686. sSortBuf << sComma << sName << sSort;
  2687. bGotWeight |= ( tItem.m_sName=="_score" );
  2688. } else
  2689. {
  2690. // sort by MVA
  2691. // ORDER BY statement
  2692. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  2693. // query item
  2694. StringBuilder_c sTmp;
  2695. sTmp << ( tItem.m_sMode=="min" ? "least" : "greatest" ) << "(" << tItem.m_sName << ")";
  2696. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2697. sTmp.MoveTo (tQueryItem.m_sExpr);
  2698. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  2699. // select list
  2700. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  2701. sTmp.MoveTo ( tQuery.m_sSelect );
  2702. }
  2703. tQuery.m_dSortFields.Add ( tItem.m_sName );
  2704. }
  2705. if ( !dSort.GetLength() )
  2706. {
  2707. sSortBuf += "@weight desc";
  2708. bGotWeight = true;
  2709. }
  2710. tQuery.m_eSort = SPH_SORT_EXTENDED;
  2711. sSortBuf.MoveTo ( tQuery.m_sSortBy );
  2712. }
  2713. static bool ParseSortObj ( const JsonObj_c & tSortItem, CSphVector<SortField_t> & dSort, CSphString & sError, CSphString & sWarning )
  2714. {
  2715. bool bSortString = tSortItem.IsStr();
  2716. bool bSortObj = tSortItem.IsObj();
  2717. CSphString sSortName = tSortItem.Name();
  2718. if ( ( !bSortString && !bSortObj ) || !tSortItem.Name() || ( bSortString && !tSortItem.SzVal() ) )
  2719. {
  2720. sError.SetSprintf ( R"("sort" property 0("%s") should be %s)", sSortName.scstr(), ( bSortObj ? "a string" : "an object" ) );
  2721. return false;
  2722. }
  2723. // [ { "attr_name" : "sort_mode" } ]
  2724. if ( bSortString )
  2725. {
  2726. CSphString sOrder = tSortItem.StrVal();
  2727. if ( sOrder!="asc" && sOrder!="desc" )
  2728. {
  2729. sError.SetSprintf ( R"("sort" property "%s" order is invalid %s)", sSortName.scstr(), sOrder.cstr() );
  2730. return false;
  2731. }
  2732. SortField_t & tAscItem = dSort.Add();
  2733. tAscItem.m_sName = sSortName;
  2734. tAscItem.m_bAsc = ( sOrder=="asc" );
  2735. return true;
  2736. }
  2737. // [ { "attr_name" : { "order" : "sort_mode" } } ]
  2738. SortField_t & tSortField = dSort.Add();
  2739. tSortField.m_sName = sSortName;
  2740. JsonObj_c tAttrItems = tSortItem.GetItem("order");
  2741. if ( tAttrItems )
  2742. {
  2743. if ( !tAttrItems.IsStr() )
  2744. {
  2745. sError.SetSprintf ( R"("sort" property "%s" order is invalid)", tAttrItems.Name() );
  2746. return false;
  2747. }
  2748. CSphString sOrder = tAttrItems.StrVal();
  2749. tSortField.m_bAsc = ( sOrder=="asc" );
  2750. }
  2751. JsonObj_c tMode = tSortItem.GetItem("mode");
  2752. if ( tMode )
  2753. {
  2754. if ( tAttrItems && !tMode.IsStr() )
  2755. {
  2756. sError.SetSprintf ( R"("mode" property "%s" order is invalid)", tAttrItems.Name() );
  2757. return false;
  2758. }
  2759. CSphString sMode = tMode.StrVal();
  2760. if ( sMode!="min" && sMode!="max" )
  2761. {
  2762. sError.SetSprintf ( R"("mode" supported are "min" and "max", got "%s", not supported)", sMode.cstr() );
  2763. return false;
  2764. }
  2765. tSortField.m_sMode = sMode;
  2766. }
  2767. // geodist
  2768. if ( tSortField.m_sName=="_geo_distance" )
  2769. {
  2770. if ( tMode )
  2771. {
  2772. sError = R"("mode" property not supported with "_geo_distance")";
  2773. return false;
  2774. }
  2775. if ( tSortItem.HasItem("unit") )
  2776. {
  2777. sError = R"("unit" property not supported with "_geo_distance")";
  2778. return false;
  2779. }
  2780. if ( !tSortField.Parse ( tSortItem, false, sError, sWarning ) )
  2781. return false;
  2782. }
  2783. // FXIME!!! "unmapped_type" should be replaced with expression EXIST
  2784. // unsupported options
  2785. const char * dUnsupported[] = { "missing", "nested_path", "nested_filter"};
  2786. for ( auto szOption : dUnsupported )
  2787. {
  2788. if ( tSortItem.HasItem(szOption) )
  2789. {
  2790. sError.SetSprintf ( R"("%s" property not supported)", szOption );
  2791. return false;
  2792. }
  2793. }
  2794. return true;
  2795. }
  2796. static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning )
  2797. {
  2798. bGotWeight = false;
  2799. // unsupported options
  2800. if ( tSort.HasItem("_script") )
  2801. {
  2802. sError = "\"_script\" property not supported";
  2803. return false;
  2804. }
  2805. CSphVector<SortField_t> dSort;
  2806. dSort.Reserve ( tSort.Size() );
  2807. if ( tSort.IsObj() )
  2808. {
  2809. if ( !ParseSortObj ( tSort[0], dSort, sError, sWarning ) )
  2810. return false;
  2811. } else
  2812. {
  2813. for ( const auto & tItem : tSort )
  2814. {
  2815. CSphString sName = tItem.Name();
  2816. bool bString = tItem.IsStr();
  2817. bool bObj = tItem.IsObj();
  2818. if ( !bString && !bObj )
  2819. {
  2820. sError.SetSprintf ( R"("sort" property "%s" should be a string or an object)", sName.scstr() );
  2821. return false;
  2822. }
  2823. if ( bObj && tItem.Size()!=1 )
  2824. {
  2825. sError.SetSprintf ( R"("sort" property "%s" should be an object)", sName.scstr() );
  2826. return false;
  2827. }
  2828. // [ "attr_name" ]
  2829. if ( bString )
  2830. {
  2831. SortField_t & tSortField = dSort.Add();
  2832. tSortField.m_sName = tItem.StrVal();
  2833. // order defaults to desc when sorting on the _score, and defaults to asc when sorting on anything else
  2834. tSortField.m_bAsc = ( tSortField.m_sName!="_score" );
  2835. // _random name should be on pair with _score \ _geo_distance
  2836. if ( tSortField.m_sName=="_random" )
  2837. tSortField.m_sName = "@random";
  2838. continue;
  2839. }
  2840. JsonObj_c tSortItem = tItem[0];
  2841. if ( !tSortItem )
  2842. {
  2843. sError = R"(invalid "sort" property item)";
  2844. return false;
  2845. }
  2846. if ( !ParseSortObj ( tSortItem, dSort, sError, sWarning ) )
  2847. return false;
  2848. }
  2849. }
  2850. FormatSortBy ( dSort, tQuery, bGotWeight );
  2851. return true;
  2852. }
  2853. //////////////////////////////////////////////////////////////////////////
  2854. // _source / select list
  2855. static bool ParseStringArray ( const JsonObj_c & tArray, const char * szProp, StrVec_t & dItems, CSphString & sError )
  2856. {
  2857. for ( const auto & tItem : tArray )
  2858. {
  2859. if ( !tItem.IsStr() )
  2860. {
  2861. sError.SetSprintf ( R"("%s" property should be a string)", szProp );
  2862. return false;
  2863. }
  2864. dItems.Add ( tItem.StrVal() );
  2865. }
  2866. return true;
  2867. }
  2868. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError )
  2869. {
  2870. bool bString = tSelect.IsStr();
  2871. bool bArray = tSelect.IsArray();
  2872. bool bObj = tSelect.IsObj();
  2873. if ( !bString && !bArray && !bObj )
  2874. {
  2875. sError = R"("_source" property should be a string or an array or an object)";
  2876. return false;
  2877. }
  2878. if ( bString )
  2879. {
  2880. tQuery.m_dIncludeItems.Add ( tSelect.StrVal() );
  2881. if ( tQuery.m_dIncludeItems[0]=="*" || tQuery.m_dIncludeItems[0].IsEmpty() )
  2882. tQuery.m_dIncludeItems.Reset();
  2883. return true;
  2884. }
  2885. if ( bArray )
  2886. return ParseStringArray ( tSelect, R"("_source")", tQuery.m_dIncludeItems, sError );
  2887. assert ( bObj );
  2888. // includes part of _source object
  2889. JsonObj_c tInclude = tSelect.GetArrayItem ( "includes", sError, true );
  2890. if ( tInclude )
  2891. {
  2892. if ( !ParseStringArray ( tInclude, R"("_source" "includes")", tQuery.m_dIncludeItems, sError ) )
  2893. return false;
  2894. if ( tQuery.m_dIncludeItems.GetLength()==1 && tQuery.m_dIncludeItems[0]=="*" )
  2895. tQuery.m_dIncludeItems.Reset();
  2896. } else if ( !sError.IsEmpty() )
  2897. return false;
  2898. // excludes part of _source object
  2899. JsonObj_c tExclude = tSelect.GetArrayItem ( "excludes", sError, true );
  2900. if ( tExclude )
  2901. {
  2902. if ( !ParseStringArray ( tExclude, R"("_source" "excludes")", tQuery.m_dExcludeItems, sError ) )
  2903. return false;
  2904. } else if ( !sError.IsEmpty() )
  2905. return false;
  2906. return true;
  2907. }
  2908. //////////////////////////////////////////////////////////////////////////
  2909. // script_fields / expressions
  2910. static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
  2911. {
  2912. if ( !tExpr )
  2913. return true;
  2914. if ( !tExpr.IsObj() )
  2915. {
  2916. sError = R"("script_fields" property should be an object)";
  2917. return false;
  2918. }
  2919. StringBuilder_c sSelect;
  2920. sSelect << tQuery.m_sSelect;
  2921. for ( const auto & tAlias : tExpr )
  2922. {
  2923. if ( !tAlias.IsObj() )
  2924. {
  2925. sError = R"("script_fields" properties should be objects)";
  2926. return false;
  2927. }
  2928. if ( CSphString ( tAlias.Name() ).IsEmpty() )
  2929. {
  2930. sError = R"("script_fields" empty property name)";
  2931. return false;
  2932. }
  2933. JsonObj_c tAliasScript = tAlias.GetItem("script");
  2934. if ( !tAliasScript )
  2935. {
  2936. sError = R"("script_fields" property should have "script" object)";
  2937. return false;
  2938. }
  2939. CSphString sExpr;
  2940. if ( !tAliasScript.FetchStrItem ( sExpr, "inline", sError ) )
  2941. return false;
  2942. const char * dUnsupported[] = { "lang", "params", "stored", "file" };
  2943. for ( auto szOption : dUnsupported )
  2944. if ( tAliasScript.HasItem(szOption) )
  2945. {
  2946. sError.SetSprintf ( R"("%s" property not supported in "script_fields")", szOption );
  2947. return false;
  2948. }
  2949. // add to query
  2950. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2951. tQueryItem.m_sExpr = sExpr;
  2952. tQueryItem.m_sAlias = tAlias.Name();
  2953. // add to select list
  2954. sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
  2955. }
  2956. sSelect.MoveTo ( tQuery.m_sSelect );
  2957. return true;
  2958. }
  2959. static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
  2960. {
  2961. if ( !tExpr )
  2962. return true;
  2963. if ( !tExpr.IsObj() )
  2964. {
  2965. sError = R"("expressions" property should be an object)";
  2966. return false;
  2967. }
  2968. StringBuilder_c sSelect;
  2969. sSelect << tQuery.m_sSelect;
  2970. for ( const auto & tAlias : tExpr )
  2971. {
  2972. if ( !tAlias.IsStr() )
  2973. {
  2974. sError = R"("expressions" properties should be strings)";
  2975. return false;
  2976. }
  2977. if ( CSphString ( tAlias.Name() ).IsEmpty() )
  2978. {
  2979. sError = R"("expressions" empty property name)";
  2980. return false;
  2981. }
  2982. // add to query
  2983. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2984. tQueryItem.m_sExpr = tAlias.StrVal();
  2985. tQueryItem.m_sAlias = tAlias.Name();
  2986. // add to select list
  2987. sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
  2988. }
  2989. sSelect.MoveTo ( tQuery.m_sSelect );
  2990. return true;
  2991. }
  2992. //////////////////////////////////////////////////////////////////////////
  2993. // docvalue_fields
  2994. bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError )
  2995. {
  2996. if ( !tDocFields || !tDocFields.IsArray() )
  2997. {
  2998. sError = R"("docvalue_fields" property should be an array or an object")";
  2999. return false;
  3000. }
  3001. for ( const auto & tItem : tDocFields )
  3002. {
  3003. if ( !tItem.IsObj() )
  3004. {
  3005. sError = R"("docvalue_fields" property item should be an object)";
  3006. return false;
  3007. }
  3008. CSphString sFieldName;
  3009. if ( !tItem.FetchStrItem ( sFieldName, "field", sError, false ) )
  3010. return false;
  3011. if ( tQuery.m_dItems.GetFirst ( [&sFieldName] ( const CSphQueryItem & tVal ) { return ( tVal.m_sExpr=="*" || tVal.m_sExpr==sFieldName ); } )==-1 )
  3012. {
  3013. CSphQueryItem & tDFItem = tQuery.m_dItems.Add();
  3014. tDFItem.m_sExpr = sFieldName;
  3015. tDFItem.m_sAlias = sFieldName;
  3016. }
  3017. // FIXME!!! collect format type
  3018. bool bDateTime = false;
  3019. CSphString sFormat;
  3020. if ( tItem.FetchStrItem ( sFormat, "format", sError, true ) )
  3021. bDateTime = ( sFormat=="date_time" );
  3022. tQuery.m_dDocFields.Add ( { sFieldName, bDateTime } );
  3023. }
  3024. return true;
  3025. }
  3026. static Aggr_e GetAggrFunc ( const JsonObj_c & tBucket, bool bCheckAggType )
  3027. {
  3028. if ( StrEq ( tBucket.Name(), "significant_terms" ) )
  3029. return Aggr_e::SIGNIFICANT;
  3030. if ( StrEq ( tBucket.Name(), "histogram" ) )
  3031. return Aggr_e::HISTOGRAM;
  3032. if ( StrEq ( tBucket.Name(), "date_histogram" ) )
  3033. return Aggr_e::DATE_HISTOGRAM;
  3034. if ( StrEq ( tBucket.Name(), "range") )
  3035. return Aggr_e::RANGE;
  3036. if ( StrEq ( tBucket.Name(), "date_range") )
  3037. return Aggr_e::DATE_RANGE;
  3038. if ( StrEq ( tBucket.Name(), "composite") )
  3039. return Aggr_e::COMPOSITE;
  3040. if ( StrEq ( tBucket.Name(), "min") )
  3041. return Aggr_e::MIN;
  3042. if ( StrEq ( tBucket.Name(), "max") )
  3043. return Aggr_e::MAX;
  3044. if ( StrEq ( tBucket.Name(), "sum") )
  3045. return Aggr_e::SUM;
  3046. if ( StrEq ( tBucket.Name(), "avg") )
  3047. return Aggr_e::AVG;
  3048. if ( bCheckAggType )
  3049. sphWarning ( "unsupported aggregate type '%s'", tBucket.Name() );
  3050. return Aggr_e::NONE;
  3051. }
  3052. static void SetRangeFrom ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
  3053. {
  3054. if ( tSrc.IsDbl() )
  3055. tItem.m_fFrom = tSrc.DblVal();
  3056. else if ( bForceFloat )
  3057. tItem.m_fFrom = tSrc.IntVal();
  3058. else
  3059. tItem.m_iFrom = tSrc.IntVal();
  3060. }
  3061. static void SetRangeTo ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
  3062. {
  3063. if ( tSrc.IsDbl() )
  3064. tItem.m_fTo = tSrc.DblVal();
  3065. else if ( bForceFloat )
  3066. tItem.m_fTo = tSrc.IntVal();
  3067. else
  3068. tItem.m_iTo = tSrc.IntVal();
  3069. }
  3070. static bool GetKeyed ( const JsonObj_c & tBucket, bool & bKeyed, CSphString & sError )
  3071. {
  3072. if ( !tBucket.HasItem ( "keyed" ) )
  3073. return true;
  3074. const auto tKeyed = tBucket.GetBoolItem ( "keyed", sError, false );
  3075. if ( !tKeyed )
  3076. return false;
  3077. bKeyed = tKeyed.BoolVal();
  3078. return true;
  3079. }
  3080. static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError );
  3081. static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError );
  3082. static bool ParseAggrRange ( const JsonObj_c & tBucket, JsonAggr_t & tItem, bool bDate, CSphString & sError )
  3083. {
  3084. JsonObj_c tRanges = tBucket.GetItem( "ranges" );
  3085. if ( !tRanges || !tRanges.IsArray() )
  3086. {
  3087. if ( !tRanges )
  3088. sError.SetSprintf ( "\"%s\" missed \"ranges\" property", tItem.m_sCol.cstr() );
  3089. else
  3090. sError.SetSprintf ( "\"%s\" \"ranges\" should be an array", tItem.m_sCol.cstr() );
  3091. return false;
  3092. }
  3093. int iCount = tRanges.Size();
  3094. if ( !iCount )
  3095. {
  3096. sError.SetSprintf ( "\"%s\" empty \"ranges\" property", tItem.m_sCol.cstr() );
  3097. return false;
  3098. }
  3099. bool bKeyed = false;
  3100. if ( !GetKeyed ( tBucket, bKeyed, sError ) )
  3101. return false;
  3102. if ( !bDate )
  3103. {
  3104. auto & dRanges = tItem.m_tRange;
  3105. dRanges.Resize ( iCount );
  3106. dRanges.m_bKeyed = bKeyed;
  3107. return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
  3108. } else
  3109. {
  3110. auto & dRanges = tItem.m_tDateRange;
  3111. dRanges.Resize ( iCount );
  3112. dRanges.m_bKeyed = bKeyed;
  3113. return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
  3114. }
  3115. }
  3116. bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError )
  3117. {
  3118. int iFloatStart = -1;
  3119. for ( int i=0; i<dRanges.GetLength(); i++ )
  3120. {
  3121. const auto tRangeItem = tRanges[i];
  3122. const auto tFrom = tRangeItem.GetItem ( "from" );
  3123. const auto tTo = tRangeItem.GetItem ( "to" );
  3124. const bool bHasFrom = tFrom;
  3125. const bool bHasTo = tTo;
  3126. if ( !bHasFrom && i!=0 )
  3127. {
  3128. sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
  3129. return false;
  3130. }
  3131. if ( !bHasTo && i!=dRanges.GetLength()-1 )
  3132. {
  3133. sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
  3134. return false;
  3135. }
  3136. if ( ( bHasFrom && tFrom.IsDbl() ) || ( bHasTo && tTo.IsDbl() ) )
  3137. {
  3138. dRanges.m_bFloat = true;
  3139. if ( iFloatStart!=-1 )
  3140. iFloatStart = i;
  3141. }
  3142. if ( bHasFrom )
  3143. SetRangeFrom ( tFrom, ( iFloatStart!=-1 ), dRanges[i] );
  3144. else
  3145. dRanges.m_bOpenLeft = true;
  3146. if ( bHasTo )
  3147. SetRangeTo ( tTo, ( iFloatStart!=-1 ), dRanges[i] );
  3148. else
  3149. dRanges.m_bOpenRight = true;
  3150. }
  3151. // convert int to float values for head of array values
  3152. if ( iFloatStart>0 )
  3153. {
  3154. for ( int i=iFloatStart; i<dRanges.GetLength(); i++ )
  3155. {
  3156. dRanges[i].m_fFrom = dRanges[i].m_iFrom;
  3157. dRanges[i].m_fTo = dRanges[i].m_iTo;
  3158. }
  3159. }
  3160. if ( dRanges.m_bOpenLeft )
  3161. {
  3162. if ( dRanges.m_bFloat )
  3163. dRanges[0].m_fFrom = -FLT_MAX;
  3164. else
  3165. dRanges[0].m_iFrom = -LLONG_MAX;
  3166. }
  3167. if ( dRanges.m_bOpenRight )
  3168. {
  3169. if ( dRanges.m_bFloat )
  3170. dRanges.Last().m_fTo = FLT_MAX;
  3171. else
  3172. dRanges.Last().m_iTo = LLONG_MAX;
  3173. }
  3174. return true;
  3175. }
  3176. bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError )
  3177. {
  3178. for ( int i=0; i<dRanges.GetLength(); i++ )
  3179. {
  3180. const auto tRangeItem = tRanges[i];
  3181. const auto tFrom = tRangeItem.GetItem ( "from" );
  3182. const auto tTo = tRangeItem.GetItem ( "to" );
  3183. const bool bHasFrom = tFrom;
  3184. const bool bHasTo = tTo;
  3185. if ( !bHasFrom && i!=0 )
  3186. {
  3187. sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
  3188. return false;
  3189. }
  3190. if ( !bHasTo && i!=dRanges.GetLength()-1 )
  3191. {
  3192. sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
  3193. return false;
  3194. }
  3195. if ( bHasFrom )
  3196. dRanges[i].m_sFrom = tFrom.StrVal();
  3197. if ( bHasTo )
  3198. dRanges[i].m_sTo = tTo.StrVal();
  3199. }
  3200. return true;
  3201. }
  3202. static bool ParseAggrHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
  3203. {
  3204. AggrHistSetting_t & tHist = tItem.m_tHist;
  3205. JsonObj_c tInterval = tBucket.GetItem ( "interval" );
  3206. if ( tInterval.Empty() )
  3207. {
  3208. sError.SetSprintf ( "\"%s\" interval missed", tItem.m_sCol.cstr() );
  3209. return false;
  3210. }
  3211. if ( !tInterval.IsNum() )
  3212. {
  3213. sError.SetSprintf ( "\"%s\" interval should be numeric", tItem.m_sCol.cstr() );
  3214. return false;
  3215. }
  3216. if ( tInterval.IsInt() )
  3217. tHist.m_tInterval = tInterval.IntVal();
  3218. else
  3219. tHist.m_tInterval = tInterval.FltVal();
  3220. JsonObj_c tOffset = tBucket.GetItem ( "offset" );
  3221. if ( !tOffset.Empty() )
  3222. {
  3223. if ( !tOffset.IsNum() )
  3224. {
  3225. sError.SetSprintf ( "\"%s\" offset should be numeric", tItem.m_sCol.cstr() );
  3226. return false;
  3227. }
  3228. if ( tOffset.IsInt() )
  3229. tHist.m_tOffset = tOffset.IntVal();
  3230. else
  3231. tHist.m_tOffset = tOffset.FltVal();
  3232. } else
  3233. {
  3234. tHist.m_tOffset = INT64_C ( 0 );
  3235. }
  3236. if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
  3237. return false;
  3238. FixFloat ( tHist );
  3239. return true;
  3240. }
  3241. static bool ParseAggrDateHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
  3242. {
  3243. AggrDateHistSetting_t & tHist = tItem.m_tDateHist;
  3244. JsonObj_c tCalendar = tBucket.GetItem ( "calendar_interval" );
  3245. JsonObj_c tFixed = tBucket.GetItem ( "fixed_interval" );
  3246. if ( tCalendar.Empty() && tFixed.Empty() )
  3247. {
  3248. sError.SetSprintf ( "\"%s\" calendar_interval or fixed_interval missed", tItem.m_sCol.cstr() );
  3249. return false;
  3250. }
  3251. if ( !tCalendar.Empty() && !tFixed.Empty() )
  3252. {
  3253. sError.SetSprintf ( "\"%s\" both calendar_interval and fixed_interval supplied", tItem.m_sCol.cstr() );
  3254. return false;
  3255. }
  3256. tHist.m_bFixed = !tFixed.Empty();
  3257. const JsonObj_c & tInterval = ( tHist.m_bFixed ? tFixed : tCalendar );
  3258. if ( !tInterval.IsStr() )
  3259. {
  3260. sError.SetSprintf ( "\"%s\" calendar_interval should be string", tItem.m_sCol.cstr() );
  3261. return false;
  3262. }
  3263. tHist.m_sInterval = tInterval.StrVal();
  3264. if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
  3265. return false;
  3266. return true;
  3267. }
  3268. static bool ParseAggrComposite ( const JsonObj_c & tBucket, JsonAggr_t & tAggr, CSphString & sError )
  3269. {
  3270. JsonObj_c tComposite = tBucket.GetObjItem ( "composite", sError, false );
  3271. if ( !tComposite )
  3272. return false;
  3273. JsonObj_c tSource = tComposite.GetArrayItem ( "sources", sError, false );
  3274. if ( !tSource )
  3275. return false;
  3276. if ( !tSource.IsArray() )
  3277. {
  3278. sError = R"("sources" property item should be an array)";
  3279. return false;
  3280. }
  3281. SmallStringHash_T<AggrComposite_t> hColumns;
  3282. for ( const auto & tArrayItem : tSource )
  3283. {
  3284. if ( !tArrayItem.IsObj() )
  3285. {
  3286. sError = R"("sources" items should be an object)";
  3287. return false;
  3288. }
  3289. JsonObj_c tItem = tArrayItem.begin();
  3290. JsonObj_c tTerms = tItem.GetObjItem ( "terms", sError, false );
  3291. if ( !tTerms )
  3292. return false;
  3293. AggrComposite_t tCol;
  3294. if ( !tTerms.FetchStrItem ( tCol.m_sColumn, "field", sError, false ) )
  3295. return false;
  3296. tCol.m_sAlias = tItem.Name();
  3297. if ( !hColumns.Add ( tCol, tItem.Name() ) )
  3298. {
  3299. sError.SetSprintf ( R"("composite" has multiple "%s" aggregates)", tItem.Name() );
  3300. return false;
  3301. }
  3302. }
  3303. if ( hColumns.IsEmpty() )
  3304. {
  3305. sError = R"(empty "composite" aggregate)";
  3306. return false;
  3307. }
  3308. JsonObj_c tAfter = tComposite.GetObjItem ( "after", sError, false );
  3309. if ( tAfter && tAfter.Size() )
  3310. {
  3311. JsonObj_c tJsonQuery ( R"( {"query":{"bool":{"must":[] }}} )" );
  3312. JsonObj_c tFilters = tJsonQuery.GetItem ( "query" ).GetItem ( "bool" ).GetItem ( "must" );
  3313. for ( const auto & tItem : tAfter )
  3314. {
  3315. AggrComposite_t * pCol = hColumns ( tItem.Name() );
  3316. if ( !pCol )
  3317. {
  3318. sError.SetSprintf ( R"("after" missed "%s" aggregate)", tItem.Name() );
  3319. return false;
  3320. }
  3321. JsonObj_c tFilterVal = tItem.Clone();
  3322. JsonObj_c tEqItem ( R"( {"equals":{} } )") ;
  3323. tEqItem.begin().AddItem ( pCol->m_sColumn.cstr(), tFilterVal );
  3324. tFilters.AddItem ( tEqItem );
  3325. }
  3326. CSphQuery tTmpQuery;
  3327. if ( !ParseJsonQueryFilters ( tJsonQuery.GetItem( "query" ), tTmpQuery, sError, sError ) )
  3328. return false;
  3329. if ( !sError.IsEmpty() )
  3330. return false;
  3331. assert ( tTmpQuery.m_dFilterTree.IsEmpty() );
  3332. tAggr.m_dCompositeAfterKey = std::move ( tTmpQuery.m_dFilters );
  3333. }
  3334. tAggr.m_iSize = DEFAULT_MAX_MATCHES;
  3335. tComposite.FetchIntItem ( tAggr.m_iSize, "size", sError, true );
  3336. StringBuilder_c sColName ( "," );
  3337. tAggr.m_dComposite.Reserve ( hColumns.GetLength() );
  3338. for ( const auto & tCol : hColumns )
  3339. {
  3340. sColName += tCol.second.m_sColumn.cstr();
  3341. tAggr.m_dComposite.Add ( tCol.second );
  3342. }
  3343. tAggr.m_sCol = sColName.cstr();
  3344. return true;
  3345. }
  3346. static bool ParseAggsNode ( const JsonObj_c & tBucket, const JsonObj_c & tJsonItem, bool bRoot, JsonAggr_t & tItem, CSphString & sError )
  3347. {
  3348. if ( !tBucket.IsObj() )
  3349. {
  3350. sError.SetSprintf ( R"("aggs" bucket '%s' should be an object)", tItem.m_sBucketName.cstr() );
  3351. return false;
  3352. }
  3353. if ( !StrEq ( tBucket.Name(), "composite" ) && !tBucket.FetchStrItem ( tItem.m_sCol, "field", sError, false ) )
  3354. return false;
  3355. tBucket.FetchIntItem ( tItem.m_iSize, "size", sError, true );
  3356. int iShardSize = 0;
  3357. tBucket.FetchIntItem ( iShardSize, "shard_size", sError, true );
  3358. tItem.m_iSize = Max ( tItem.m_iSize, iShardSize ); // FIXME!!! use (size * 1.5 + 10) for shard size
  3359. tItem.m_eAggrFunc = GetAggrFunc ( tBucket, !bRoot );
  3360. switch ( tItem.m_eAggrFunc )
  3361. {
  3362. case Aggr_e::DATE_HISTOGRAM:
  3363. if ( !ParseAggrDateHistogram ( tBucket, tItem, sError ) )
  3364. return false;
  3365. tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
  3366. break;
  3367. case Aggr_e::HISTOGRAM:
  3368. if ( !ParseAggrHistogram ( tBucket, tItem, sError ) )
  3369. return false;
  3370. tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
  3371. break;
  3372. case Aggr_e::RANGE:
  3373. if ( !ParseAggrRange ( tBucket, tItem, false, sError ) )
  3374. return false;
  3375. tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
  3376. break;
  3377. case Aggr_e::DATE_RANGE:
  3378. if ( !ParseAggrRange ( tBucket, tItem, true, sError ) )
  3379. return false;
  3380. tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tDateRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
  3381. break;
  3382. case Aggr_e::COMPOSITE:
  3383. if ( !ParseAggrComposite ( tJsonItem, tItem, sError ) )
  3384. return false;
  3385. break;
  3386. case Aggr_e::MIN:
  3387. case Aggr_e::MAX:
  3388. case Aggr_e::SUM:
  3389. case Aggr_e::AVG:
  3390. tItem.m_iSize = 1;
  3391. break;
  3392. default: break;
  3393. }
  3394. return true;
  3395. }
  3396. static bool ParseAggsNodeSort ( const JsonObj_c & tJsonItem, bool bOrder, JsonAggr_t & tItem, CSphString & sError )
  3397. {
  3398. if ( !( tJsonItem.IsArray() || tJsonItem.IsObj() ) )
  3399. {
  3400. sError.SetSprintf ( "\"%s\" property value should be an array or an object", ( bOrder ? "order" : "sort" ) );
  3401. return false;
  3402. }
  3403. bool bGotWeight = false;
  3404. JsonQuery_c tTmpQuery;
  3405. tTmpQuery.m_sSortBy = "";
  3406. tTmpQuery.m_eSort = SPH_SORT_RELEVANCE;
  3407. // FIXME!!! reports warnings for geodist sort
  3408. CSphString sWarning;
  3409. if ( !ParseSort ( tJsonItem, tTmpQuery, bGotWeight, sError, sWarning ) )
  3410. return false;
  3411. tItem.m_sSort = tTmpQuery.m_sSortBy;
  3412. return true;
  3413. }
  3414. static bool AddSubAggregate ( const JsonObj_c & tAggs, bool bRoot, CSphVector<JsonAggr_t> & dParentItems, CSphString & sError )
  3415. {
  3416. if ( bRoot && tAggs.begin().Empty() )
  3417. {
  3418. JsonAggr_t & tCount = dParentItems.Add();
  3419. tCount.m_eAggrFunc = Aggr_e::COUNT;
  3420. tCount.m_iSize = 1;
  3421. return true;
  3422. }
  3423. for ( const auto & tJsonItem : tAggs )
  3424. {
  3425. if ( !tJsonItem.IsObj() )
  3426. {
  3427. sError = R"("aggs" property item should be an object)";
  3428. return false;
  3429. }
  3430. JsonAggr_t tItem;
  3431. tItem.m_sBucketName = tJsonItem.Name();
  3432. for ( const auto & tAggsItem : tJsonItem )
  3433. {
  3434. // could be a sort object at the aggs item or order object at the bucket
  3435. if ( strcmp ( tAggsItem.Name(), "sort" )==0 )
  3436. {
  3437. if ( !ParseAggsNodeSort ( tAggsItem, false, tItem, sError ) )
  3438. return false;
  3439. } else
  3440. {
  3441. if ( StrEq ( tAggsItem.Name(), "aggs" ) || tAggsItem.HasItem ( "aggs" ) )
  3442. {
  3443. sError = R"(nested "aggs" is not supported)";
  3444. return false;
  3445. }
  3446. if ( tAggsItem==tAggsItem.end() )
  3447. {
  3448. sError.SetSprintf ( R"("aggs" bucket '%s' with only nested items)", tAggsItem.Name() );
  3449. return false;
  3450. }
  3451. if ( !ParseAggsNode ( tAggsItem, tJsonItem, bRoot, tItem, sError ) )
  3452. return false;
  3453. // bucket could have its own order item
  3454. if ( tAggsItem.HasItem ( "order" ) )
  3455. {
  3456. if ( !ParseAggsNodeSort ( tAggsItem.GetItem("order"), true, tItem, sError ) )
  3457. return false;
  3458. }
  3459. }
  3460. }
  3461. if ( tItem.m_eAggrFunc==Aggr_e::NONE && !bRoot )
  3462. {
  3463. sError.SetSprintf ( R"(bucket '%s' without aggregate items)", tItem.m_sBucketName.cstr() );
  3464. return false;
  3465. }
  3466. dParentItems.Add ( tItem );
  3467. }
  3468. return true;
  3469. }
  3470. bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError )
  3471. {
  3472. if ( !tAggs || !tAggs.IsObj() )
  3473. {
  3474. sError = R"("aggs" property should be an object")";
  3475. return false;
  3476. }
  3477. if ( !AddSubAggregate ( tAggs, true, tQuery.m_dAggs, sError ) )
  3478. return false;
  3479. // set query now for any date aggregate to make sure they will have the same now timestamp
  3480. if ( tQuery.m_dAggs.any_of ( [] ( const JsonAggr_t & tAggr ) { return !tAggr.m_tDateRange.IsEmpty(); } ) )
  3481. tQuery.m_iNow = time ( nullptr );
  3482. return true;
  3483. }
  3484. CSphString JsonAggr_t::GetAliasName () const
  3485. {
  3486. CSphString sName;
  3487. sName.SetSprintf ( "%s_%s", m_sCol.cstr(), m_sBucketName.cstr() );
  3488. return sName;
  3489. }
  3490. ParsedJsonQuery_t::ParsedJsonQuery_t()
  3491. {
  3492. SetQueryDefaultsExt2 ( m_tQuery );
  3493. SetQueryDefaultsExt2 ( m_tJoinQueryOptions );
  3494. }