sphinxjsonquery.cpp 112 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189
  1. //
  2. // Copyright (c) 2017-2025, Manticore Software LTD (https://manticoresearch.com)
  3. // All rights reserved
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License. You should have
  7. // received a copy of the GPL license along with this program; if you
  8. // did not, you can find it at http://www.gnu.org/
  9. //
  10. #include "sphinxquery/xqparser.h"
  11. #include "sphinxquery/parse_helper.h"
  12. #include "sphinxsearch.h"
  13. #include "sphinxplugin.h"
  14. #include "sphinxutils.h"
  15. #include "searchdaemon.h"
  16. #include "jsonqueryfilter.h"
  17. #include "attribute.h"
  18. #include "searchdsql.h"
  19. #include "searchdha.h"
  20. #include "knnmisc.h"
  21. #include "sorterscroll.h"
  22. #include "sphinxexcerpt.h"
  23. static const char * g_szAll = "_all";
  24. static const char * g_szHighlight = "_@highlight_";
  25. static const char * g_szOrder = "_@order_";
  26. class QueryTreeBuilder_c;
  27. struct ErrorPathGuard_t
  28. {
  29. ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath );
  30. ~ErrorPathGuard_t ();
  31. QueryTreeBuilder_c & m_tBuilder;
  32. const bool m_bEnabled;
  33. };
  34. class QueryTreeBuilder_c : public XQParseHelper_c
  35. {
  36. public:
  37. QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings );
  38. void CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost );
  39. bool HandleFieldBlockStart ( const char * & /*pPtr*/ ) override { return true; }
  40. bool HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields ) override;
  41. bool NeedTrailingSeparator() override { return false; }
  42. XQNode_t * CreateNode ( XQLimitSpec_t & tLimitSpec );
  43. const TokenizerRefPtr_c & GetQLTokenizer() { return m_pQueryTokenizerQL; }
  44. const CSphIndexSettings & GetIndexSettings() { return m_tSettings; }
  45. const CSphQuery * GetQuery() { return m_pQuery; }
  46. bool m_bHasFulltext = false;
  47. bool m_bHasFilter = false;
  48. void ResetNodesFlags() { m_bHasFulltext = m_bHasFilter = false; }
  49. QueryTreeBuilder_c CreateCollectPath ( const CSphSchema * pSchema );
  50. void ErrorPrintPath ( QueryTreeBuilder_c & tOrig );
  51. ErrorPathGuard_t ErrorAddPath ( const JsonObj_c & tPath );
  52. private:
  53. const CSphQuery * m_pQuery {nullptr};
  54. const TokenizerRefPtr_c m_pQueryTokenizerQL;
  55. const CSphIndexSettings & m_tSettings;
  56. XQNode_t * AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost );
  57. friend ErrorPathGuard_t;
  58. CSphVector< std::pair<CSphString, const void *> > m_dErrorPath;
  59. bool m_bErrorCollectPath = false;
  60. };
  61. QueryTreeBuilder_c::QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings )
  62. : m_pQuery ( pQuery )
  63. , m_pQueryTokenizerQL ( std::move (pQueryTokenizerQL) )
  64. , m_tSettings ( tSettings )
  65. {}
  66. void QueryTreeBuilder_c::CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost )
  67. {
  68. m_pTokenizer->SetBuffer ( (const BYTE*)szStr, (int) strlen ( szStr ) );
  69. while (true)
  70. {
  71. int iSkippedPosBeforeToken = 0;
  72. if ( m_bWasBlended )
  73. {
  74. iSkippedPosBeforeToken = m_pTokenizer->SkipBlended();
  75. // just add all skipped blended parts except blended head (already added to atomPos)
  76. if ( iSkippedPosBeforeToken>1 )
  77. m_iAtomPos += iSkippedPosBeforeToken - 1;
  78. }
  79. // FIXME!!! only wildcard node need tokes with wildcard symbols
  80. const char * sToken = (const char *) m_pTokenizer->GetToken ();
  81. if ( !sToken )
  82. {
  83. AddChildKeyword ( pNode, nullptr, iSkippedPosBeforeToken, tLimitSpec, fBoost );
  84. break;
  85. }
  86. // now let's do some token post-processing
  87. m_bWasBlended = m_pTokenizer->TokenIsBlended();
  88. int iPrevDeltaPos = 0;
  89. if ( m_pPlugin && m_pPlugin->m_fnPushToken )
  90. sToken = m_pPlugin->m_fnPushToken ( m_pPluginData, const_cast<char*>(sToken), &iPrevDeltaPos, m_pTokenizer->GetTokenStart(), int ( m_pTokenizer->GetTokenEnd() - m_pTokenizer->GetTokenStart() ) );
  91. m_iAtomPos += 1 + iPrevDeltaPos;
  92. bool bMultiDestHead = false;
  93. bool bMultiDest = false;
  94. int iDestCount = 0;
  95. // do nothing inside phrase
  96. if ( !m_pTokenizer->IsPhraseMode() )
  97. bMultiDest = m_pTokenizer->WasTokenMultiformDestination ( bMultiDestHead, iDestCount );
  98. // check for stopword, and create that node
  99. // temp buffer is required, because GetWordID() might expand (!) the keyword in-place
  100. BYTE sTmp [ MAX_TOKEN_BYTES ];
  101. strncpy ( (char*)sTmp, sToken, MAX_TOKEN_BYTES );
  102. sTmp[MAX_TOKEN_BYTES-1] = '\0';
  103. int iStopWord = 0;
  104. if ( m_pPlugin && m_pPlugin->m_fnPreMorph )
  105. m_pPlugin->m_fnPreMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  106. SphWordID_t uWordId = iStopWord ? 0 : m_pDict->GetWordID ( sTmp );
  107. if ( uWordId && m_pPlugin && m_pPlugin->m_fnPostMorph )
  108. {
  109. int iRes = m_pPlugin->m_fnPostMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  110. if ( iStopWord )
  111. uWordId = 0;
  112. else if ( iRes )
  113. uWordId = m_pDict->GetWordIDNonStemmed ( sTmp );
  114. }
  115. if ( !uWordId )
  116. {
  117. sToken = nullptr;
  118. // stopwords with step=0 must not affect pos
  119. if ( m_bEmptyStopword )
  120. m_iAtomPos--;
  121. }
  122. XQNode_t * pChildNode = nullptr;
  123. if ( bMultiDest && !bMultiDestHead )
  124. {
  125. assert ( m_dMultiforms.GetLength() );
  126. m_dMultiforms.Last().m_iDestCount++;
  127. m_dDestForms.Add ( sToken );
  128. } else
  129. pChildNode = AddChildKeyword ( pNode, sToken, iSkippedPosBeforeToken, tLimitSpec, fBoost );
  130. if ( bMultiDestHead )
  131. {
  132. MultiformNode_t & tMulti = m_dMultiforms.Add();
  133. tMulti.m_pNode = pChildNode;
  134. tMulti.m_iDestStart = m_dDestForms.GetLength();
  135. tMulti.m_iDestCount = 0;
  136. }
  137. }
  138. }
  139. bool QueryTreeBuilder_c::HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields )
  140. {
  141. if ( *pPtr=='_' )
  142. {
  143. auto iLen = (int) strlen(g_szAll);
  144. if ( !strncmp ( pPtr, g_szAll, iLen ) )
  145. {
  146. pPtr += iLen;
  147. dFields.SetAll();
  148. return true;
  149. }
  150. }
  151. return false;
  152. }
  153. XQNode_t * QueryTreeBuilder_c::CreateNode ( XQLimitSpec_t & tLimitSpec )
  154. {
  155. auto * pNode = new XQNode_t(tLimitSpec);
  156. m_dSpawned.Add ( pNode );
  157. return pNode;
  158. }
  159. XQNode_t * QueryTreeBuilder_c::AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost )
  160. {
  161. XQKeyword_t tKeyword ( szKeyword, m_iAtomPos );
  162. tKeyword.m_iSkippedBefore = iSkippedPosBeforeToken;
  163. tKeyword.m_fBoost = fBoost;
  164. auto * pNode = new XQNode_t ( tLimitSpec );
  165. pNode->AddDirtyWord ( tKeyword );
  166. pParent->AddNewChild ( pNode );
  167. m_dSpawned.Add ( pNode );
  168. return pNode;
  169. }
  170. ErrorPathGuard_t QueryTreeBuilder_c::ErrorAddPath ( const JsonObj_c & tPath )
  171. {
  172. return ErrorPathGuard_t ( *this, m_bErrorCollectPath, tPath );
  173. }
  174. void QueryTreeBuilder_c::ErrorPrintPath ( QueryTreeBuilder_c & tOrig )
  175. {
  176. assert ( IsError() );
  177. StringBuilder_c tBuilder;
  178. tBuilder.Appendf ( "%s at '", tOrig.m_pParsed->m_sParseError.cstr() );
  179. const void * pLast = nullptr;
  180. for ( const auto & tEntry : m_dErrorPath )
  181. {
  182. // skip duplicates
  183. if ( !tEntry.second || pLast!=tEntry.second )
  184. tBuilder.Appendf ( "/%s", tEntry.first.scstr() );
  185. pLast = tEntry.second;
  186. }
  187. tBuilder << "'";
  188. tOrig.m_pParsed->m_sParseError = (CSphString)tBuilder;
  189. }
  190. QueryTreeBuilder_c QueryTreeBuilder_c::CreateCollectPath ( const CSphSchema * pSchema )
  191. {
  192. QueryTreeBuilder_c tOther ( m_pQuery, std::move ( m_pQueryTokenizerQL ), m_tSettings );
  193. tOther.Setup ( pSchema, m_pTokenizer->Clone ( SPH_CLONE ), std::move ( m_pDict ), m_pParsed, m_tSettings );
  194. tOther.m_bErrorCollectPath = true;
  195. tOther.m_dErrorPath.Add ( { "query", nullptr } );
  196. return tOther;
  197. }
  198. ErrorPathGuard_t::ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath )
  199. : m_tBuilder ( tBuilder )
  200. , m_bEnabled ( bEnabled )
  201. {
  202. // add path entry only in the collect pass and only prior to error point
  203. if ( m_bEnabled && !m_tBuilder.IsError() )
  204. m_tBuilder.m_dErrorPath.Add ( { tPath.Name(), tPath.GetRoot() } );
  205. }
  206. ErrorPathGuard_t::~ErrorPathGuard_t ()
  207. {
  208. if ( m_bEnabled && !m_tBuilder.IsError() )
  209. m_tBuilder.m_dErrorPath.Pop();
  210. }
  211. //////////////////////////////////////////////////////////////////////////
  212. class QueryParserJson_c : public QueryParser_i
  213. {
  214. public:
  215. bool IsFullscan ( const CSphQuery & tQuery ) const final;
  216. bool IsFullscan ( const XQQuery_t & tQuery ) const final;
  217. bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const final;
  218. QueryParser_i * Clone() const final { return new QueryParserJson_c; }
  219. private:
  220. XQNode_t * ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const;
  221. XQNode_t * ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  222. XQNode_t * ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  223. XQNode_t * ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const;
  224. bool ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const;
  225. bool ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const;
  226. XQNode_t * ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  227. };
  228. bool QueryParserJson_c::IsFullscan ( const CSphQuery & tQuery ) const
  229. {
  230. const char * szQ = tQuery.m_sQuery.cstr();
  231. if ( !szQ ) return true;
  232. if ( strstr ( szQ, R"("match")" ) ) return false;
  233. if ( strstr ( szQ, R"("terms")" ) ) return false;
  234. if ( strstr ( szQ, R"("match_phrase")" ) ) return false;
  235. if ( strstr ( szQ, R"("term")" ) ) return false;
  236. if ( strstr ( szQ, R"("query_string")" ) ) return false;
  237. if ( strstr ( szQ, R"("simple_query_string")" ) ) return false;
  238. return true;
  239. }
  240. bool QueryParserJson_c::IsFullscan ( const XQQuery_t & tQuery ) const
  241. {
  242. return !( tQuery.m_pRoot && ( tQuery.m_pRoot->dChildren().GetLength () || tQuery.m_pRoot->dWords().GetLength () ) );
  243. }
  244. static bool IsFullText ( const CSphString & sName );
  245. static bool IsBoolNode ( const CSphString & sName );
  246. bool CheckRootNode ( const JsonObj_c & tRoot, CSphString & sError )
  247. {
  248. bool bFilter = false;
  249. bool bBool = false;
  250. bool bFullText = false;
  251. for ( const auto & tItem : tRoot )
  252. {
  253. const CSphString & sName = tItem.Name();
  254. if ( IsFilter ( tItem ) )
  255. {
  256. if ( bFilter )
  257. {
  258. sError = "\"query\" has multiple filter properties, use bool node";
  259. return false;
  260. }
  261. bFilter = true;
  262. }
  263. else if ( IsBoolNode ( sName ) )
  264. {
  265. if ( bBool )
  266. {
  267. sError = "\"query\" has multiple bool properties";
  268. return false;
  269. }
  270. bBool = true;
  271. }
  272. else if ( IsFullText ( sName ) )
  273. {
  274. if ( bFullText )
  275. {
  276. sError = "\"query\" has multiple full-text properties, use bool node";
  277. return false;
  278. }
  279. bFullText = true;
  280. }
  281. }
  282. return true;
  283. }
  284. static JsonObj_c FindFullTextQueryNode ( const JsonObj_c & tRoot )
  285. {
  286. for ( JsonObj_c tChild : tRoot )
  287. {
  288. if ( !IsFilter ( tChild ) )
  289. return tChild;
  290. }
  291. return tRoot[0];
  292. }
  293. bool QueryParserJson_c::ParseQuery ( XQQuery_t & tParsed, const char * szQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c & pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const
  294. {
  295. JsonObj_c tRoot ( szQuery );
  296. // take only the first item of the query; ignore the rest
  297. int iNumIndexes = ( tRoot.Empty() ? 0 : tRoot.Size() );
  298. if ( !iNumIndexes )
  299. {
  300. tParsed.m_sParseError = "\"query\" property is empty";
  301. return false;
  302. }
  303. if ( iNumIndexes!=1 && !CheckRootNode ( tRoot, tParsed.m_sParseError ) )
  304. return false;
  305. assert ( pQueryTokenizerJson->IsQueryTok() );
  306. DictRefPtr_c pMyDict = GetStatelessDict ( pDict );
  307. QueryTreeBuilder_c tBuilder ( pQuery, std::move ( pQueryTokenizerQL ), tSettings );
  308. tBuilder.Setup ( pSchema, pQueryTokenizerJson->Clone ( SPH_CLONE ), pMyDict, &tParsed, tSettings );
  309. const JsonObj_c tFtNode = FindFullTextQueryNode ( tRoot );
  310. XQNode_t * pRoot = ConstructNode ( tFtNode, tBuilder );
  311. if ( tBuilder.IsError() )
  312. {
  313. tBuilder.Cleanup();
  314. QueryTreeBuilder_c tErrorBuilder { tBuilder.CreateCollectPath ( pSchema ) };
  315. ConstructNode ( tFtNode, tErrorBuilder );
  316. tErrorBuilder.Cleanup();
  317. tErrorBuilder.ErrorPrintPath ( tBuilder );
  318. return false;
  319. }
  320. XQLimitSpec_t tLimitSpec;
  321. pRoot = tBuilder.FixupTree ( pRoot, tLimitSpec, pMorphFields, IsAllowOnlyNot() );
  322. if ( tBuilder.IsError() )
  323. {
  324. tBuilder.Cleanup();
  325. return false;
  326. }
  327. tParsed.m_bSingleWord = ( pRoot && pRoot->dChildren().IsEmpty() && pRoot->dWords().GetLength() == 1 );
  328. tParsed.m_pRoot = pRoot;
  329. return true;
  330. }
  331. static const char * g_szOperatorNames[]=
  332. {
  333. "and",
  334. "or"
  335. };
  336. static XQOperator_e StrToNodeOp ( const char * szStr )
  337. {
  338. if ( !szStr )
  339. return SPH_QUERY_TOTAL;
  340. int iOp=0;
  341. for ( auto i : g_szOperatorNames )
  342. {
  343. if ( !strcmp ( szStr, i ) )
  344. return XQOperator_e(iOp);
  345. iOp++;
  346. }
  347. return SPH_QUERY_TOTAL;
  348. }
  349. static bool IsBoolNode ( const JsonObj_c & tJson )
  350. {
  351. if ( !tJson )
  352. return false;
  353. return CSphString ( tJson.Name() )=="bool";
  354. }
  355. bool IsBoolNode ( const CSphString & sName )
  356. {
  357. return ( sName=="bool" );
  358. }
  359. static float GetBoost ( const JsonObj_c & tFields )
  360. {
  361. const float fBoostDefault = 1.0f;
  362. if ( !tFields.IsObj() )
  363. return fBoostDefault;
  364. JsonObj_c tBoost = tFields.GetItem ( "boost" );
  365. if ( !tBoost || !tBoost.IsNum() )
  366. return fBoostDefault;
  367. return tBoost.FltVal();
  368. }
  369. XQNode_t * QueryParserJson_c::ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const
  370. {
  371. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  372. if ( !tJson.IsObj() )
  373. {
  374. tBuilder.Error ( "\"match\" value should be an object" );
  375. return nullptr;
  376. }
  377. if ( tJson.Size()!=1 )
  378. {
  379. tBuilder.Error ( "ill-formed \"match\" property" );
  380. return nullptr;
  381. }
  382. JsonObj_c tFields = tJson[0];
  383. tBuilder.SetString ( tFields.Name() );
  384. XQLimitSpec_t tLimitSpec;
  385. const char * szQuery = nullptr;
  386. XQOperator_e eNodeOp = bPhrase ? SPH_QUERY_PHRASE : SPH_QUERY_OR;
  387. bool bIgnore = false;
  388. StringBuilder_c tTermsBuf ( " " );
  389. if ( !tBuilder.ParseFields ( tLimitSpec.m_dFieldMask, tLimitSpec.m_iFieldMaxPos, bIgnore ) )
  390. return nullptr;
  391. if ( bIgnore )
  392. {
  393. tBuilder.Warning ( R"(ignoring fields in "%s", using "_all")", tFields.Name() );
  394. tLimitSpec.Reset();
  395. }
  396. tLimitSpec.m_bFieldSpec = true;
  397. if ( bTerms )
  398. {
  399. if ( !tFields.IsArray() )
  400. {
  401. tBuilder.Warning ( "values of properties in \"terms\" should be an array" );
  402. return nullptr;
  403. }
  404. for ( const auto & tTerm : tFields )
  405. {
  406. if ( !tTerm.IsStr() )
  407. {
  408. tBuilder.Error ( "\"terms\" value should be a string" );
  409. return nullptr;
  410. }
  411. tTermsBuf += tTerm.SzVal();
  412. }
  413. szQuery = tTermsBuf.cstr();
  414. } else if ( tFields.IsObj() )
  415. {
  416. // matching with flags
  417. CSphString sError;
  418. JsonObj_c tQuery = ( bSingleTerm ? tFields.GetStrItem ( "value", sError ) : tFields.GetStrItem ( "query", sError ) );
  419. if ( !tQuery )
  420. {
  421. tBuilder.Error ( "%s", sError.cstr() );
  422. return nullptr;
  423. }
  424. szQuery = tQuery.SzVal();
  425. if ( !bPhrase )
  426. {
  427. JsonObj_c tOp = tFields.GetItem ( "operator" );
  428. if ( tOp ) // "and", "or"
  429. {
  430. eNodeOp = StrToNodeOp ( tOp.SzVal() );
  431. if ( eNodeOp==SPH_QUERY_TOTAL )
  432. {
  433. tBuilder.Error ( "unknown operator: \"%s\"", tOp.SzVal() );
  434. return nullptr;
  435. }
  436. }
  437. }
  438. } else
  439. {
  440. // simple list of keywords
  441. if ( !tFields.IsStr() )
  442. {
  443. tBuilder.Warning ( "values of properties in \"match\" should be strings or objects" );
  444. return nullptr;
  445. }
  446. szQuery = tFields.SzVal();
  447. }
  448. assert ( szQuery );
  449. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  450. pNewNode->SetOp ( eNodeOp );
  451. float fBoost = GetBoost ( tFields );
  452. tBuilder.CollectKeywords ( szQuery, pNewNode, tLimitSpec, fBoost );
  453. return pNewNode;
  454. }
  455. bool QueryParserJson_c::ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const
  456. {
  457. if ( !tItem )
  458. return true;
  459. // we created filters before, no need to process them again
  460. if ( IsFilter(tItem) )
  461. {
  462. tBuilder.m_bHasFilter = true;
  463. return true;
  464. }
  465. XQNode_t * pNode = ConstructNode ( tItem, tBuilder );
  466. if ( !pNode )
  467. return IsBoolNode ( tItem ); // need walk down the tree for compart mode
  468. dNodes.Add ( pNode );
  469. return true;
  470. }
  471. bool QueryParserJson_c::ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const
  472. {
  473. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tClause );
  474. if ( tClause.IsArray() )
  475. {
  476. for ( const auto & tObject : tClause )
  477. {
  478. if ( !tObject.IsObj() )
  479. {
  480. tBuilder.Error ( "\"%s\" array value should be an object", tClause.Name() );
  481. return false;
  482. }
  483. if ( !ConstructNodeOrFilter ( tObject[0], dItems, tBuilder ) )
  484. return false;
  485. }
  486. } else if ( tClause.IsObj() )
  487. {
  488. if ( !ConstructNodeOrFilter ( tClause[0], dItems, tBuilder ) )
  489. return false;
  490. } else
  491. {
  492. tBuilder.Error ( "\"%s\" value should be an object or an array", tClause.Name() );
  493. return false;
  494. }
  495. return true;
  496. }
  497. XQNode_t * QueryParserJson_c::ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  498. {
  499. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  500. if ( !tJson.IsObj() )
  501. {
  502. tBuilder.Error ( "\"bool\" value should be an object" );
  503. return nullptr;
  504. }
  505. CSphVector<XQNode_t *> dMust, dShould, dMustNot;
  506. for ( const auto & tClause : tJson )
  507. {
  508. tBuilder.ResetNodesFlags();
  509. CSphString sName = tClause.Name();
  510. if ( sName=="must" )
  511. {
  512. if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
  513. return nullptr;
  514. } else if ( sName=="should" )
  515. {
  516. if ( !ConstructBoolNodeItems ( tClause, dShould, tBuilder ) )
  517. return nullptr;
  518. if ( tBuilder.m_bHasFilter && tBuilder.m_bHasFulltext )
  519. {
  520. tBuilder.Error ( "filter and full-text can be used together only inside \"must\" node" );
  521. return nullptr;
  522. }
  523. } else if ( sName=="must_not" )
  524. {
  525. if ( !ConstructBoolNodeItems ( tClause, dMustNot, tBuilder ) )
  526. return nullptr;
  527. } else if ( sName=="filter" )
  528. {
  529. if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
  530. return nullptr;
  531. } else if ( sName=="minimum_should_match" ) // FIXME!!! add to should as option
  532. {
  533. continue;
  534. } else
  535. {
  536. tBuilder.Error ( "unknown bool query type: \"%s\"", sName.cstr() );
  537. return nullptr;
  538. }
  539. }
  540. XQNode_t * pMustNode = nullptr;
  541. XQNode_t * pShouldNode = nullptr;
  542. XQNode_t * pMustNotNode = nullptr;
  543. XQLimitSpec_t tLimitSpec;
  544. if ( dMust.GetLength() )
  545. {
  546. // no need to construct AND node for a single child
  547. if ( dMust.GetLength()==1 )
  548. pMustNode = dMust[0];
  549. else
  550. {
  551. XQNode_t * pAndNode = tBuilder.CreateNode ( tLimitSpec );
  552. pAndNode->SetOp ( SPH_QUERY_AND );
  553. for ( auto & i : dMust )
  554. pAndNode->AddNewChild ( i);
  555. pMustNode = pAndNode;
  556. }
  557. }
  558. if ( dShould.GetLength() )
  559. {
  560. if ( dShould.GetLength()==1 )
  561. pShouldNode = dShould[0];
  562. else
  563. {
  564. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  565. pOrNode->SetOp ( SPH_QUERY_OR );
  566. for ( auto & i : dShould )
  567. pOrNode->AddNewChild (i);
  568. pShouldNode = pOrNode;
  569. }
  570. }
  571. // slightly different case - we need to construct the NOT node anyway
  572. if ( dMustNot.GetLength() )
  573. {
  574. XQNode_t * pNotNode = tBuilder.CreateNode ( tLimitSpec );
  575. pNotNode->SetOp ( SPH_QUERY_NOT );
  576. if ( dMustNot.GetLength()==1 )
  577. {
  578. pNotNode->AddNewChild ( dMustNot[0] );
  579. } else
  580. {
  581. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  582. pOrNode->SetOp ( SPH_QUERY_OR );
  583. for ( auto & i : dMustNot )
  584. pOrNode->AddNewChild ( i );
  585. pNotNode->AddNewChild ( pOrNode );
  586. }
  587. pMustNotNode = pNotNode;
  588. }
  589. int iTotalNodes = 0;
  590. iTotalNodes += pMustNode ? 1 : 0;
  591. iTotalNodes += pShouldNode ? 1 : 0;
  592. iTotalNodes += pMustNotNode ? 1 : 0;
  593. XQNode_t * pResultNode = nullptr;
  594. if ( !iTotalNodes )
  595. return nullptr;
  596. else if ( iTotalNodes==1 )
  597. {
  598. if ( pMustNode )
  599. pResultNode = pMustNode;
  600. else if ( pShouldNode )
  601. pResultNode = pShouldNode;
  602. else
  603. pResultNode = pMustNotNode;
  604. assert ( pResultNode );
  605. } else
  606. {
  607. pResultNode = pMustNode ? pMustNode : pMustNotNode;
  608. assert ( pResultNode );
  609. // combine 'must' and 'must_not' with AND
  610. if ( pMustNode && pMustNotNode )
  611. {
  612. XQNode_t * pAndNode = tBuilder.CreateNode(tLimitSpec);
  613. pAndNode->SetOp(SPH_QUERY_AND);
  614. pAndNode->AddNewChild ( pMustNode );
  615. pAndNode->AddNewChild ( pMustNotNode );
  616. pResultNode = pAndNode;
  617. }
  618. // combine 'result' node and 'should' node with MAYBE
  619. if ( pShouldNode )
  620. {
  621. XQNode_t * pMaybeNode = tBuilder.CreateNode ( tLimitSpec );
  622. pMaybeNode->SetOp ( SPH_QUERY_MAYBE );
  623. pMaybeNode->AddNewChild ( pResultNode );
  624. pMaybeNode->AddNewChild ( pShouldNode );
  625. pResultNode = pMaybeNode;
  626. }
  627. }
  628. return pResultNode;
  629. }
  630. XQNode_t * QueryParserJson_c::ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  631. {
  632. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  633. CSphString sQueryString;
  634. // query_string could be either {"query_string":{"query":"term"}} or {"query_string":"term"}
  635. if ( tJson.IsObj() )
  636. {
  637. CSphString sError;
  638. JsonObj_c tNestedQuery = tJson.GetStrItem ( "query", sError, false );
  639. if ( !tNestedQuery )
  640. {
  641. tBuilder.Error ( "\"query_string\" value should be an object with the \"query\" string" );
  642. return nullptr;
  643. }
  644. sQueryString = tNestedQuery.StrVal();
  645. }
  646. if ( sQueryString.IsEmpty() )
  647. {
  648. if ( tJson.IsStr() )
  649. {
  650. sQueryString = tJson.StrVal();
  651. } else
  652. {
  653. tBuilder.Error ( "\"query_string\" value should be an string" );
  654. return nullptr;
  655. }
  656. }
  657. XQQuery_t tParsed;
  658. tParsed.m_dZones = tBuilder.GetZone(); // should keep the same zone list for whole tree
  659. // no need to pass morph fields here as upper level does fixup
  660. if ( !sphParseExtendedQuery ( tParsed, sQueryString.cstr(), tBuilder.GetQuery(), tBuilder.GetQLTokenizer(), tBuilder.GetSchema(), tBuilder.GetDict(), tBuilder.GetIndexSettings(), nullptr ) )
  661. {
  662. tBuilder.Error ( "%s", tParsed.m_sParseError.cstr() );
  663. return nullptr;
  664. }
  665. if ( !tParsed.m_sParseWarning.IsEmpty() )
  666. tBuilder.Warning ( "%s", tParsed.m_sParseWarning.cstr() );
  667. XQNode_t * pRoot = tParsed.m_pRoot;
  668. tParsed.m_pRoot = nullptr;
  669. tBuilder.SetZone ( tParsed.m_dZones );
  670. return pRoot;
  671. }
  672. XQNode_t * QueryParserJson_c::ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const
  673. {
  674. XQLimitSpec_t tLimitSpec;
  675. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  676. pNewNode->SetOp ( SPH_QUERY_NULL );
  677. return pNewNode;
  678. }
  679. static bool IsFtMatch ( const CSphString & sName )
  680. {
  681. return ( sName=="match" );
  682. }
  683. static bool IsFtTerms ( const CSphString & sName )
  684. {
  685. return ( sName=="terms" );
  686. }
  687. static bool IsFtPhrase ( const CSphString & sName )
  688. {
  689. return ( sName=="match_phrase" );
  690. }
  691. static bool IsFtTerm ( const CSphString & sName )
  692. {
  693. return ( sName=="term" );
  694. }
  695. static bool IsFtMatchAll ( const CSphString & sName )
  696. {
  697. return ( sName=="match_all" );
  698. }
  699. static bool IsFtQueryString ( const CSphString & sName )
  700. {
  701. return ( sName=="query_string" );
  702. }
  703. static bool IsFtQueryStringSimple ( const CSphString & sName )
  704. {
  705. return ( sName=="simple_query_string" );
  706. }
  707. bool IsFullText ( const CSphString & sName )
  708. {
  709. return ( IsFtMatch ( sName ) || IsFtTerms ( sName ) || IsFtPhrase ( sName ) || IsFtTerm ( sName ) || IsFtMatchAll ( sName ) || IsFtQueryString ( sName ) || IsFtQueryStringSimple ( sName ));
  710. }
  711. XQNode_t * QueryParserJson_c::ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  712. {
  713. ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
  714. CSphString sName = tJson.Name();
  715. if ( !tJson || sName.IsEmpty() )
  716. {
  717. tBuilder.Error ( "empty json found" );
  718. return nullptr;
  719. }
  720. bool bMatch = IsFtMatch ( sName );
  721. bool bTerms = IsFtTerms ( sName );
  722. bool bPhrase = IsFtPhrase ( sName );
  723. bool bSingleTerm = IsFtTerm ( sName );
  724. if ( bMatch || bPhrase || bTerms || bSingleTerm )
  725. {
  726. tBuilder.m_bHasFulltext = true;
  727. return ConstructMatchNode ( tJson, bPhrase, bTerms, bSingleTerm, tBuilder );
  728. }
  729. if ( IsFtMatchAll ( sName ) )
  730. {
  731. tBuilder.m_bHasFulltext = true;
  732. return ConstructMatchAllNode ( tBuilder );
  733. }
  734. if ( IsBoolNode ( sName ) )
  735. return ConstructBoolNode ( tJson, tBuilder );
  736. if ( IsFtQueryString ( sName ) )
  737. {
  738. tBuilder.m_bHasFulltext = true;
  739. return ConstructQLNode ( tJson, tBuilder );
  740. }
  741. if ( IsFtQueryStringSimple ( sName ) && tJson.IsObj() )
  742. {
  743. tBuilder.m_bHasFulltext = true;
  744. return ConstructQLNode ( tJson.GetItem ( "query" ), tBuilder );
  745. }
  746. tBuilder.Error ( "unknown full-text node '%s'", sName.cstr() );
  747. return nullptr;
  748. }
  749. bool NonEmptyQuery ( const JsonObj_c & tQuery )
  750. {
  751. return ( tQuery.HasItem("match")
  752. || tQuery.HasItem("match_phrase")
  753. || tQuery.HasItem("bool") )
  754. || tQuery.HasItem("query_string");
  755. }
  756. //////////////////////////////////////////////////////////////////////////
  757. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError );
  758. static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning );
  759. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError );
  760. static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
  761. static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
  762. static bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError );
  763. static bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError );
  764. static bool ParseIndex ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  765. {
  766. if ( !tRoot )
  767. {
  768. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  769. return false;
  770. }
  771. JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
  772. if ( !tIndex )
  773. {
  774. tIndex = tRoot.GetStrItem ( "index", sError, true );
  775. if ( !tIndex )
  776. return false;
  777. sError = "";
  778. }
  779. tStmt.m_sIndex = tIndex.StrVal();
  780. tStmt.m_tQuery.m_sIndexes = tStmt.m_sIndex;
  781. const char * sIndexStart = strchr ( tStmt.m_sIndex.cstr(), ':' );
  782. if ( sIndexStart!=nullptr )
  783. {
  784. const char * sIndex = tStmt.m_sIndex.cstr();
  785. sError.SetSprintf ( "wrong table at cluster syntax, use \"cluster\": \"%.*s\" and \"index\": \"%s\" properties, instead of '%s'",
  786. (int)(sIndexStart-sIndex), sIndex, sIndexStart+1, sIndex );
  787. return false;
  788. }
  789. return true;
  790. }
  791. static bool ParseIndexId ( const JsonObj_c & tRoot, bool bArrayIds, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  792. {
  793. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  794. return false;
  795. JsonObj_c tId = tRoot.GetItem ( "id" );
  796. if ( tId )
  797. {
  798. if ( !tId.IsInt() && !tId.IsUint() && !tId.IsArray() )
  799. {
  800. sError = "Document ids should be integer or array of integers";
  801. return false;
  802. }
  803. if ( !bArrayIds && tId.IsArray() )
  804. {
  805. sError = "Document ids should be integer";
  806. return false;
  807. }
  808. if ( !tId.IsArray() )
  809. {
  810. if ( tId.IsInt() && tId.IntVal()<0 )
  811. {
  812. sError = "Negative document ids are not allowed";
  813. return false;
  814. }
  815. } else
  816. {
  817. for ( const auto & tItem : tId )
  818. {
  819. if ( !tItem.IsInt() && !tItem.IsUint() )
  820. {
  821. sError = "Document ids should be integer";
  822. return false;
  823. }
  824. if ( tItem.IsInt() && tItem.IntVal()<0 )
  825. {
  826. sError = "Negative document ids are not allowed";
  827. return false;
  828. }
  829. }
  830. }
  831. }
  832. if ( tId && !tId.IsArray() )
  833. tDocId = tId.IntVal();
  834. else
  835. tDocId = 0; // enable auto-id
  836. return true;
  837. }
  838. static bool ParseCluster ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  839. {
  840. if ( !tRoot )
  841. {
  842. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  843. return false;
  844. }
  845. // cluster is optional
  846. JsonObj_c tCluster = tRoot.GetStrItem ( "cluster", sError, true );
  847. if ( tCluster )
  848. tStmt.m_sCluster = tCluster.StrVal();
  849. return true;
  850. }
  851. std::unique_ptr<QueryParser_i> sphCreateJsonQueryParser()
  852. {
  853. return std::make_unique<QueryParserJson_c>();
  854. }
  855. static bool ParseLimits ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError )
  856. {
  857. JsonObj_c tLimit = tRoot.GetIntItem ( "limit", "size", sError );
  858. if ( !sError.IsEmpty() )
  859. return false;
  860. if ( tLimit )
  861. tQuery.m_iLimit = (int)tLimit.IntVal();
  862. JsonObj_c tOffset = tRoot.GetIntItem ( "offset", "from", sError );
  863. if ( !sError.IsEmpty() )
  864. return false;
  865. if ( tOffset )
  866. tQuery.m_iOffset = (int)tOffset.IntVal();
  867. JsonObj_c tCutoff = tRoot.GetIntItem ( "cutoff", sError, true );
  868. if ( !sError.IsEmpty() )
  869. return false;
  870. if ( tCutoff )
  871. tQuery.m_iCutoff = (int)tCutoff.IntVal();
  872. JsonObj_c tMaxMatches = tRoot.GetIntItem ( "max_matches", sError, true );
  873. if ( !sError.IsEmpty() )
  874. return false;
  875. if ( tMaxMatches )
  876. {
  877. tQuery.m_iMaxMatches = (int)tMaxMatches.IntVal();
  878. tQuery.m_bExplicitMaxMatches = true;
  879. }
  880. return true;
  881. }
  882. static bool ParseOptions ( const JsonObj_c & tOptions, CSphQuery & tQuery, CSphString & sError )
  883. {
  884. if ( !tOptions.IsObj() )
  885. {
  886. sError = "\"options\" property value should be an object";
  887. return false;
  888. }
  889. for ( const auto & i : tOptions )
  890. {
  891. AddOption_e eAdd = AddOption_e::NOT_FOUND;
  892. CSphString sOpt = i.Name();
  893. if ( i.IsInt() || i.IsBool() )
  894. eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.IntVal(), STMT_SELECT, sError );
  895. else if ( i.IsStr() )
  896. {
  897. CSphString sRanker = i.StrVal();
  898. const char * szRanker = sRanker.cstr();
  899. while ( sphIsAlpha(*szRanker) )
  900. szRanker++;
  901. if ( *szRanker=='(' && sRanker.Ends(")") )
  902. {
  903. int iRankerNameLen = szRanker-sRanker.cstr();
  904. CSphString sExpr = sRanker.SubString (iRankerNameLen+1, sRanker.Length()-iRankerNameLen-2 );
  905. sExpr.Unquote();
  906. sRanker = sRanker.SubString ( 0, iRankerNameLen );
  907. eAdd = ::AddOptionRanker ( tQuery, sOpt, sRanker, [sExpr]{ return sExpr; }, STMT_SELECT, sError );
  908. }
  909. if ( eAdd==AddOption_e::NOT_FOUND )
  910. eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.StrVal(), [&i]{ return i.StrVal(); }, STMT_SELECT, sError );
  911. }
  912. else if ( i.IsObj() )
  913. {
  914. CSphVector<CSphNamedInt> dNamed;
  915. for ( const auto & tNamed : i )
  916. {
  917. if ( !tNamed.IsInt() )
  918. {
  919. sError.SetSprintf ( "\"%s\" property of \"%s\"' option should be integer", sOpt.cstr(), tNamed.Name() );
  920. return false;
  921. }
  922. dNamed.Add ( { tNamed.Name(), tNamed.IntVal() } );
  923. }
  924. eAdd = ::AddOption ( tQuery, sOpt, dNamed, STMT_SELECT, sError );
  925. }
  926. if ( eAdd==AddOption_e::NOT_FOUND )
  927. {
  928. sError.SetSprintf ( "unknown option '%s'", sOpt.cstr () );
  929. return false;
  930. }
  931. else if ( eAdd==AddOption_e::FAILED )
  932. return false;
  933. }
  934. return true;
  935. }
  936. static bool ParseOptions ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery, CSphString & sError )
  937. {
  938. CSphQuery & tQuery = tPJQuery.m_tQuery;
  939. // different from SQL: in sql it is requested by default
  940. tQuery.m_tScrollSettings.m_bRequested = false;
  941. JsonObj_c tOptions = tRoot.GetItem("options");
  942. if ( !tOptions )
  943. return true;
  944. if ( tQuery.m_eJoinType!=JoinType_e::NONE )
  945. for ( const auto & i : tOptions )
  946. {
  947. if ( !i.IsObj() )
  948. continue;
  949. CSphString sTable = i.Name();
  950. sTable.ToLower();
  951. StrVec_t dQueryIndexes;
  952. ParseIndexList ( tQuery.m_sIndexes, dQueryIndexes );
  953. bool bLeftTable = false;
  954. for ( const auto & i : dQueryIndexes )
  955. if ( sTable==i )
  956. {
  957. bLeftTable = true;
  958. break;
  959. }
  960. if ( bLeftTable )
  961. return ParseOptions ( i, tQuery, sError );
  962. if ( sTable==tQuery.m_sJoinIdx )
  963. return ParseOptions ( i, tPJQuery.m_tJoinQueryOptions, sError );
  964. sError.SetSprintf ( "Unknown table '%s' in OPTIONS", sTable.cstr() );
  965. return false;
  966. }
  967. return ParseOptions ( tOptions, tQuery, sError );
  968. }
  969. static bool ParseKNNQuery ( const JsonObj_c & tJson, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  970. {
  971. if ( !tJson )
  972. return true;
  973. if ( !tJson.IsObj() )
  974. {
  975. sError = "\"knn\" property value should be an object";
  976. return false;
  977. }
  978. if ( !tJson.FetchStrItem ( tQuery.m_sKNNAttr, "field", sError ) ) return false;
  979. if ( !tJson.FetchIntItem ( tQuery.m_iKNNK, "k", sError ) ) return false;
  980. if ( !tJson.FetchIntItem ( tQuery.m_iKnnEf, "ef", sError, true ) ) return false;
  981. JsonObj_c tQueryVec = tJson.GetArrayItem ( "query_vector", sError );
  982. if ( !tQueryVec )
  983. return false;
  984. for ( const auto & tArrayItem : tQueryVec )
  985. {
  986. if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
  987. {
  988. sError = "\"query_vector\" items should be integer of float";
  989. return false;
  990. }
  991. tQuery.m_dKNNVec.Add ( tArrayItem.FltVal() );
  992. }
  993. return true;
  994. }
  995. static bool ParseOnCond ( const JsonObj_c & tRoot, CSphString & sIdx, CSphString & sAttr, ESphAttr & eType, CSphString & sError )
  996. {
  997. CSphString sType;
  998. if ( !tRoot.FetchStrItem ( sIdx, "table", sError ) ) return false;
  999. if ( !tRoot.FetchStrItem ( sAttr, "field", sError ) ) return false;
  1000. if ( !tRoot.FetchStrItem ( sType, "type", sError, true ) ) return false;
  1001. if ( !sType.IsEmpty() )
  1002. {
  1003. if ( sType=="int" || sType=="integer" )
  1004. eType = SPH_ATTR_INTEGER;
  1005. else if ( sType=="float" )
  1006. eType = SPH_ATTR_FLOAT;
  1007. else if ( sType=="string" )
  1008. eType = SPH_ATTR_STRING;
  1009. else
  1010. {
  1011. sError.SetSprintf ( "unknown \"type\" value: \"%s\"", sType.cstr() );
  1012. return false;
  1013. }
  1014. }
  1015. return true;
  1016. }
  1017. static bool ParseOnFilter ( const JsonObj_c & tRoot, OnFilter_t & tOnFilter, CSphString & sError )
  1018. {
  1019. if ( !tRoot.IsObj() )
  1020. {
  1021. sError = "\"on\" items should be objects";
  1022. return false;
  1023. }
  1024. CSphString sOp;
  1025. if ( !tRoot.FetchStrItem ( sOp, "operator", sError ) )
  1026. return false;
  1027. if ( sOp!="eq" )
  1028. {
  1029. sError = "Unknown \"operator\" value";
  1030. return false;
  1031. }
  1032. JsonObj_c tLeft = tRoot.GetObjItem ( "left", sError );
  1033. if ( !tLeft )
  1034. return false;
  1035. JsonObj_c tRight = tRoot.GetObjItem ( "right", sError );
  1036. if ( !tRight )
  1037. return false;
  1038. if ( !ParseOnCond ( tLeft, tOnFilter.m_sIdx1, tOnFilter.m_sAttr1, tOnFilter.m_eTypeCast1, sError ) )
  1039. return false;
  1040. if ( !ParseOnCond ( tRight, tOnFilter.m_sIdx2, tOnFilter.m_sAttr2, tOnFilter.m_eTypeCast2, sError ) )
  1041. return false;
  1042. return true;
  1043. }
  1044. static bool ParseJoin ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  1045. {
  1046. JsonObj_c tJoin = tRoot.GetArrayItem ( "join", sError, true );
  1047. if ( !tJoin )
  1048. return true;
  1049. int iNumJoins = 0;
  1050. for ( const auto & tJoinItem : tJoin )
  1051. {
  1052. if ( iNumJoins>0 )
  1053. {
  1054. sError = "Only single table joins are currently supported";
  1055. return false;
  1056. }
  1057. CSphString sJoinType;
  1058. if ( !tJoinItem.FetchStrItem ( sJoinType, "type", sError ) )
  1059. return false;
  1060. if ( sJoinType=="inner" )
  1061. tQuery.m_eJoinType = JoinType_e::INNER;
  1062. else if ( sJoinType=="left" )
  1063. tQuery.m_eJoinType = JoinType_e::LEFT;
  1064. else
  1065. {
  1066. sError.SetSprintf ( "unknown join type '%s'", sJoinType.cstr() );
  1067. return false;
  1068. }
  1069. if ( !tJoinItem.FetchStrItem ( tQuery.m_sJoinIdx, "table", sError ) )
  1070. return false;
  1071. JsonObj_c tMatchQuery = tJoinItem.GetObjItem ( "query", sError, true );
  1072. if ( tMatchQuery )
  1073. tQuery.m_sJoinQuery = tMatchQuery.AsString();
  1074. JsonObj_c tOn = tJoinItem.GetArrayItem ( "on", sError );
  1075. if ( !tOn )
  1076. return false;
  1077. for ( const auto & tCond : tOn )
  1078. {
  1079. OnFilter_t tOnFilter;
  1080. if ( !ParseOnFilter ( tCond, tOnFilter, sError ) )
  1081. return false;
  1082. tQuery.m_dOnFilters.Add(tOnFilter);
  1083. }
  1084. iNumJoins++;
  1085. }
  1086. return true;
  1087. }
  1088. bool sphParseJsonQuery ( Str_t sQuery, ParsedJsonQuery_t & tPJQuery )
  1089. {
  1090. JsonObj_c tRoot ( sQuery );
  1091. tPJQuery.m_tQuery.m_sRawQuery = sQuery;
  1092. return sphParseJsonQuery ( tRoot, tPJQuery );
  1093. }
  1094. bool sphParseJsonQuery ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery )
  1095. {
  1096. TlsMsg::ResetErr();
  1097. if ( !tRoot )
  1098. return TlsMsg::Err ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1099. TLS_MSG_STRING ( sError );
  1100. JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
  1101. if ( !tIndex )
  1102. {
  1103. tIndex = tRoot.GetStrItem ( "index", sError, true );
  1104. if ( !tIndex )
  1105. return false;
  1106. sError = "";
  1107. }
  1108. auto & tQuery = tPJQuery.m_tQuery;
  1109. tQuery.m_sIndexes = tIndex.StrVal();
  1110. if ( tQuery.m_sIndexes==g_szAll )
  1111. tQuery.m_sIndexes = "*";
  1112. if ( !ParseLimits ( tRoot, tQuery, sError ) )
  1113. return false;
  1114. JsonObj_c tJsonQuery = tRoot.GetItem("query");
  1115. JsonObj_c tKNNQuery = tRoot.GetItem("knn");
  1116. if ( tJsonQuery && tKNNQuery )
  1117. return TlsMsg::Err ( "\"query\" can't be used together with \"knn\"" );
  1118. // common code used by search queries and update/delete by query
  1119. if ( !ParseJsonQueryFilters ( tJsonQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1120. return false;
  1121. if ( !ParseKNNQuery ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1122. return false;
  1123. if ( tKNNQuery && !ParseJsonQueryFilters ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
  1124. return false;
  1125. if ( !ParseJoin ( tRoot, tQuery, sError, tPJQuery.m_sWarning ) )
  1126. return false;
  1127. if ( !ParseOptions ( tRoot, tPJQuery, sError ) )
  1128. return false;
  1129. if ( !tRoot.FetchBoolItem ( tPJQuery.m_bProfile, "profile", sError, true ) )
  1130. return false;
  1131. if ( !tRoot.FetchIntItem ( tPJQuery.m_iPlan, "plan", sError, true ) )
  1132. return false;
  1133. // expression columns go first to select list
  1134. JsonObj_c tScriptFields = tRoot.GetItem ( "script_fields" );
  1135. if ( tScriptFields && !ParseScriptFields ( tScriptFields, tQuery, sError ) )
  1136. return false;
  1137. // a synonym to "script_fields"
  1138. JsonObj_c tExpressions = tRoot.GetItem ( "expressions" );
  1139. if ( tExpressions && !ParseExpressions ( tExpressions, tQuery, sError ) )
  1140. return false;
  1141. JsonObj_c tSnip = tRoot.GetObjItem ( "highlight", sError, true );
  1142. if ( tSnip )
  1143. {
  1144. if ( !ParseSnippet ( tSnip, tQuery, sError ) )
  1145. return false;
  1146. }
  1147. else if ( !sError.IsEmpty() )
  1148. return false;
  1149. JsonObj_c tSort = tRoot.GetItem("sort");
  1150. if ( tSort && !( tSort.IsArray() || tSort.IsObj() ) )
  1151. {
  1152. sError = "\"sort\" property value should be an array or an object";
  1153. return false;
  1154. }
  1155. if ( tSort )
  1156. {
  1157. bool bGotWeight = false;
  1158. if ( !ParseSort ( tSort, tQuery, bGotWeight, sError, tPJQuery.m_sWarning ) )
  1159. return false;
  1160. JsonObj_c tTrackScore = tRoot.GetBoolItem ( "track_scores", sError, true );
  1161. if ( !sError.IsEmpty() )
  1162. return false;
  1163. bool bTrackScore = tTrackScore && tTrackScore.BoolVal();
  1164. if ( !bGotWeight && !bTrackScore )
  1165. tQuery.m_eRanker = SPH_RANK_NONE;
  1166. }
  1167. else
  1168. {
  1169. // set defaults
  1170. tQuery.m_eSort = SPH_SORT_EXTENDED;
  1171. tQuery.m_sSortBy = "@weight desc";
  1172. tQuery.m_sOrderBy = "@weight desc";
  1173. }
  1174. // source \ select filter
  1175. JsonObj_c tSelect = tRoot.GetItem("_source");
  1176. bool bParsedSelect = ( !tSelect || ParseSelect ( tSelect, tQuery, sError ) );
  1177. if ( !bParsedSelect )
  1178. return false;
  1179. // docvalue_fields
  1180. JsonObj_c tDocFields = tRoot.GetItem ( "docvalue_fields" );
  1181. if ( tDocFields && !ParseDocFields ( tDocFields, tQuery, sError ) )
  1182. return false;
  1183. // aggs
  1184. JsonObj_c tAggs = tRoot.GetItem ( "aggs" );
  1185. if ( tAggs && !ParseAggregates ( tAggs, tQuery, sError ) )
  1186. return false;
  1187. if ( !SetupScroll ( tQuery, sError ) )
  1188. return false;
  1189. return true;
  1190. }
  1191. bool ParseJsonInsert ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1192. {
  1193. if ( !ParseIndexId ( tRoot, false, tStmt, tDocId, sError ) )
  1194. return false;
  1195. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1196. return false;
  1197. tStmt.m_dInsertSchema.Add ( sphGetDocidName() );
  1198. SqlInsert_t & tId = tStmt.m_dInsertValues.Add();
  1199. tId.m_iType = SqlInsert_t::CONST_INT;
  1200. tId.SetValueInt ( (uint64_t)tDocId, false );
  1201. // "doc" is optional
  1202. JsonObj_c tSource = tRoot.GetItem("doc");
  1203. return ParseJsonInsertSource ( tSource, tStmt, bReplace, sError );
  1204. }
  1205. static bool ParseJsonInsertSource ( const JsonObj_c & tSource, StrVec_t & dInsertSchema, CSphVector<SqlInsert_t> & dInsertValues, CSphString & sError )
  1206. {
  1207. if ( !tSource )
  1208. return true;
  1209. for ( const auto & tItem : tSource )
  1210. {
  1211. dInsertSchema.Add ( tItem.Name() );
  1212. dInsertSchema.Last().ToLower();
  1213. SqlInsert_t & tNewValue = dInsertValues.Add();
  1214. if ( tItem.IsStr() || tItem.IsNull() )
  1215. {
  1216. tNewValue.m_iType = ( tItem.IsStr() ? SqlInsert_t::QUOTED_STRING : SqlInsert_t::TOK_NULL );
  1217. tNewValue.m_sVal = tItem.StrVal();
  1218. } else if ( tItem.IsDbl() )
  1219. {
  1220. tNewValue.m_iType = SqlInsert_t::CONST_FLOAT;
  1221. tNewValue.m_fVal = tItem.FltVal();
  1222. } else if ( tItem.IsInt() || tItem.IsBool() || tItem.IsUint() )
  1223. {
  1224. tNewValue.m_iType = SqlInsert_t::CONST_INT;
  1225. tNewValue.SetValueInt ( tItem.IntVal() );
  1226. } else if ( tItem.IsArray() || tItem.IsObj() )
  1227. {
  1228. // could be either object or array
  1229. // all fit to JSON attribute
  1230. // array of int fits MVA attribute
  1231. tNewValue.m_sVal = tItem.AsString();
  1232. bool bMVA = false;
  1233. if ( tItem.IsArray() )
  1234. {
  1235. tNewValue.m_iType = SqlInsert_t::CONST_MVA;
  1236. tNewValue.m_pVals = new RefcountedVector_c<AttrValue_t>;
  1237. for ( const auto & tArrayItem : tItem )
  1238. {
  1239. if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
  1240. break;
  1241. tNewValue.m_pVals->Add ( { tArrayItem.IntVal(), tArrayItem.FltVal() } );
  1242. bMVA = true;
  1243. }
  1244. if ( !bMVA && !tItem.Size() )
  1245. bMVA = true;
  1246. }
  1247. if ( !bMVA )
  1248. {
  1249. tNewValue.m_iType = SqlInsert_t::QUOTED_STRING;
  1250. tNewValue.m_pVals = nullptr;
  1251. }
  1252. } else
  1253. {
  1254. sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
  1255. return false;
  1256. }
  1257. }
  1258. return true;
  1259. }
  1260. bool ParseJsonInsertSource ( const JsonObj_c & tSource, SqlStmt_t & tStmt, bool bReplace, CSphString & sError )
  1261. {
  1262. tStmt.m_eStmt = bReplace ? STMT_REPLACE : STMT_INSERT;
  1263. if ( !ParseJsonInsertSource ( tSource, tStmt.m_dInsertSchema, tStmt.m_dInsertValues, sError ) )
  1264. return false;
  1265. if ( !tStmt.CheckInsertIntegrity() )
  1266. {
  1267. sError = "wrong number of values";
  1268. return false;
  1269. }
  1270. return true;
  1271. }
  1272. bool sphParseJsonInsert ( Str_t sInsert, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1273. {
  1274. JsonObj_c tRoot ( sInsert );
  1275. return ParseJsonInsert ( tRoot, tStmt, tDocId, bReplace, sError );
  1276. }
  1277. static bool ParseUpdateDeleteQueries ( const JsonObj_c & tRoot, bool bDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1278. {
  1279. tStmt.m_tQuery.m_sSelect = "id";
  1280. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  1281. return false;
  1282. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1283. return false;
  1284. JsonObj_c tId = tRoot.GetItem ( "id" );
  1285. if ( tId )
  1286. {
  1287. if ( !ParseIndexId ( tRoot, bDelete, tStmt, tDocId, sError ) )
  1288. return false;
  1289. CSphFilterSettings & tFilter = tStmt.m_tQuery.m_dFilters.Add();
  1290. tFilter.m_eType = SPH_FILTER_VALUES;
  1291. if ( bDelete && tId.IsArray() )
  1292. {
  1293. for ( const auto & tItem : tId )
  1294. tFilter.m_dValues.Add ( tItem.IntVal() );
  1295. } else
  1296. {
  1297. tFilter.m_dValues.Add ( tId.IntVal() );
  1298. }
  1299. tFilter.m_sAttrName = "id";
  1300. tDocId = tFilter.m_dValues[0];
  1301. }
  1302. // "query" is optional
  1303. JsonObj_c tQuery = tRoot.GetItem("query");
  1304. if ( tQuery && tId )
  1305. {
  1306. sError = R"(both "id" and "query" specified)";
  1307. return false;
  1308. }
  1309. CSphString sWarning; // fixme: add to results
  1310. return ParseJsonQueryFilters ( tQuery, tStmt.m_tQuery, sError, sWarning );
  1311. }
  1312. bool ParseJsonUpdate ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1313. {
  1314. CSphAttrUpdate & tUpd = tStmt.AttrUpdate();
  1315. tStmt.m_eStmt = STMT_UPDATE;
  1316. if ( !ParseUpdateDeleteQueries ( tRoot, false, tStmt, tDocId, sError ) )
  1317. return false;
  1318. JsonObj_c tSource = tRoot.GetObjItem ( "doc", sError );
  1319. if ( !tSource )
  1320. return false;
  1321. CSphVector<int64_t> dMVA;
  1322. for ( const auto & tItem : tSource )
  1323. {
  1324. bool bFloat = tItem.IsNum();
  1325. bool bInt = tItem.IsInt();
  1326. bool bBool = tItem.IsBool();
  1327. bool bString = tItem.IsStr();
  1328. bool bArray = tItem.IsArray();
  1329. bool bObject = tItem.IsObj();
  1330. if ( !bFloat && !bInt && !bBool && !bString && !bArray && !bObject )
  1331. {
  1332. sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
  1333. return false;
  1334. }
  1335. CSphString sAttr = tItem.Name();
  1336. TypedAttribute_t & tTypedAttr = tUpd.m_dAttributes.Add();
  1337. tTypedAttr.m_sName = sAttr.ToLower();
  1338. if ( bInt || bBool )
  1339. {
  1340. int64_t iValue = tItem.IntVal();
  1341. tUpd.m_dPool.Add ( (DWORD)iValue );
  1342. auto uHi = (DWORD)( iValue>>32 );
  1343. if ( uHi )
  1344. {
  1345. tUpd.m_dPool.Add ( uHi );
  1346. tTypedAttr.m_eType = SPH_ATTR_BIGINT;
  1347. } else
  1348. tTypedAttr.m_eType = SPH_ATTR_INTEGER;
  1349. }
  1350. else if ( bFloat )
  1351. {
  1352. auto fValue = tItem.FltVal();
  1353. tUpd.m_dPool.Add ( sphF2DW ( fValue ) );
  1354. tTypedAttr.m_eType = SPH_ATTR_FLOAT;
  1355. }
  1356. else if ( bString || bObject )
  1357. {
  1358. CSphString sEncoded;
  1359. const char * szValue = tItem.SzVal();
  1360. if ( bObject )
  1361. {
  1362. sEncoded = tItem.AsString();
  1363. szValue = sEncoded.cstr();
  1364. }
  1365. auto iLength = (int) strlen ( szValue );
  1366. tUpd.m_dPool.Add ( tUpd.m_dBlobs.GetLength() );
  1367. tUpd.m_dPool.Add ( iLength );
  1368. if ( iLength )
  1369. {
  1370. BYTE * pBlob = tUpd.m_dBlobs.AddN ( iLength+2 ); // a couple of extra \0 for json parser to be happy
  1371. memcpy ( pBlob, szValue, iLength );
  1372. pBlob[iLength] = 0;
  1373. pBlob[iLength+1] = 0;
  1374. }
  1375. tTypedAttr.m_eType = SPH_ATTR_STRING;
  1376. } else if ( bArray )
  1377. {
  1378. dMVA.Resize ( 0 );
  1379. for ( const auto & tArrayItem : tItem )
  1380. {
  1381. if ( !tArrayItem.IsInt() )
  1382. {
  1383. sError = "MVA elements should be integers";
  1384. return false;
  1385. }
  1386. dMVA.Add ( tArrayItem.IntVal() );
  1387. }
  1388. dMVA.Uniq();
  1389. tUpd.m_dPool.Add ( dMVA.GetLength()*2 ); // as 64 bit stored into DWORD vector
  1390. tTypedAttr.m_eType = SPH_ATTR_UINT32SET;
  1391. for ( int64_t uVal : dMVA )
  1392. {
  1393. if ( uVal>UINT_MAX )
  1394. tTypedAttr.m_eType = SPH_ATTR_INT64SET;
  1395. *(( int64_t* ) tUpd.m_dPool.AddN ( 2 )) = uVal;
  1396. }
  1397. }
  1398. }
  1399. return true;
  1400. }
  1401. bool sphParseJsonUpdate ( Str_t sUpdate, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1402. {
  1403. JsonObj_c tRoot ( sUpdate );
  1404. return ParseJsonUpdate ( tRoot, tStmt, tDocId, sError );
  1405. }
  1406. static bool ParseJsonDelete ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1407. {
  1408. tStmt.m_eStmt = STMT_DELETE;
  1409. return ParseUpdateDeleteQueries ( tRoot, true, tStmt, tDocId, sError );
  1410. }
  1411. bool sphParseJsonDelete ( Str_t sDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1412. {
  1413. JsonObj_c tRoot ( sDelete );
  1414. return ParseJsonDelete ( tRoot, tStmt, tDocId, sError );
  1415. }
  1416. bool sphParseJsonStatement ( const char * szStmt, SqlStmt_t & tStmt, CSphString & sStmt, CSphString & sQuery, DocID_t & tDocId, CSphString & sError )
  1417. {
  1418. JsonObj_c tRoot ( szStmt );
  1419. if ( !tRoot )
  1420. {
  1421. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1422. return false;
  1423. }
  1424. JsonObj_c tJsonStmt = tRoot[0];
  1425. if ( !tJsonStmt )
  1426. {
  1427. sError = "no statement found";
  1428. return false;
  1429. }
  1430. sStmt = tJsonStmt.Name();
  1431. if ( !tJsonStmt.IsObj() )
  1432. {
  1433. sError.SetSprintf ( "statement %s should be an object", sStmt.cstr() );
  1434. return false;
  1435. }
  1436. if ( sStmt=="index" || sStmt=="replace" )
  1437. {
  1438. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, true, sError ) )
  1439. return false;
  1440. } else if ( sStmt=="create" || sStmt=="insert" )
  1441. {
  1442. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, false, sError ) )
  1443. return false;
  1444. } else if ( sStmt=="update" )
  1445. {
  1446. if ( !ParseJsonUpdate ( tJsonStmt, tStmt, tDocId, sError ) )
  1447. return false;
  1448. } else if ( sStmt=="delete" )
  1449. {
  1450. if ( !ParseJsonDelete ( tJsonStmt, tStmt, tDocId, sError ) )
  1451. return false;
  1452. } else
  1453. {
  1454. sError.SetSprintf ( "unknown bulk operation: %s", sStmt.cstr() );
  1455. return false;
  1456. }
  1457. sQuery = tJsonStmt.AsString();
  1458. return true;
  1459. }
  1460. //////////////////////////////////////////////////////////////////////////
  1461. static void PackedShortMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
  1462. {
  1463. auto dMVA = sphUnpackPtrAttr ( pMVA );
  1464. auto nValues = dMVA.second / sizeof ( DWORD );
  1465. auto pValues = ( const DWORD * ) dMVA.first;
  1466. for ( int i = 0; i<(int) nValues; ++i )
  1467. tOut.NtoA(pValues[i]);
  1468. }
  1469. static void PackedWideMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
  1470. {
  1471. auto dMVA = sphUnpackPtrAttr ( pMVA );
  1472. auto nValues = dMVA.second / sizeof ( int64_t );
  1473. auto pValues = ( const int64_t * ) dMVA.first;
  1474. for ( int i = 0; i<(int) nValues; ++i )
  1475. tOut.NtoA(pValues[i]);
  1476. }
  1477. static void PackedFloatVec2Json ( StringBuilder_c & tOut, const BYTE * pFV )
  1478. {
  1479. auto tFV = sphUnpackPtrAttr(pFV);
  1480. int iNumValues = tFV.second / sizeof(float);
  1481. auto pValues = (const float *)tFV.first;
  1482. for ( int i = 0; i<iNumValues; i++ )
  1483. tOut.FtoA(pValues[i]);
  1484. }
  1485. static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const CSphMatch & tMatch, const CSphAttrLocator & tLoc, int iMulti=1 )
  1486. {
  1487. switch ( eAttrType )
  1488. {
  1489. case SPH_ATTR_INTEGER:
  1490. case SPH_ATTR_TIMESTAMP:
  1491. case SPH_ATTR_TOKENCOUNT:
  1492. case SPH_ATTR_BIGINT:
  1493. tOut.NtoA ( tMatch.GetAttr(tLoc) * iMulti );
  1494. break;
  1495. case SPH_ATTR_UINT64:
  1496. tOut.NtoA ( (uint64_t)tMatch.GetAttr(tLoc) * iMulti );
  1497. break;
  1498. case SPH_ATTR_FLOAT:
  1499. tOut.FtoA ( tMatch.GetAttrFloat(tLoc) * iMulti );
  1500. break;
  1501. case SPH_ATTR_DOUBLE:
  1502. tOut.DtoA ( tMatch.GetAttrDouble(tLoc) * iMulti );
  1503. break;
  1504. case SPH_ATTR_BOOL:
  1505. tOut << ( tMatch.GetAttr ( tLoc ) ? "true" : "false" );
  1506. break;
  1507. case SPH_ATTR_UINT32SET_PTR:
  1508. case SPH_ATTR_INT64SET_PTR:
  1509. case SPH_ATTR_FLOAT_VECTOR_PTR:
  1510. {
  1511. auto _ = tOut.Array ();
  1512. const auto * pMVA = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1513. if ( eAttrType==SPH_ATTR_UINT32SET_PTR )
  1514. PackedShortMVA2Json ( tOut, pMVA );
  1515. else if ( eAttrType==SPH_ATTR_INT64SET_PTR )
  1516. PackedWideMVA2Json ( tOut, pMVA );
  1517. else
  1518. PackedFloatVec2Json ( tOut, pMVA );
  1519. }
  1520. break;
  1521. case SPH_ATTR_STRINGPTR:
  1522. {
  1523. const auto * pString = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1524. auto dString = sphUnpackPtrAttr ( pString );
  1525. // special process for legacy typed strings
  1526. if ( dString.second>1 && dString.first[dString.second-2]=='\0')
  1527. {
  1528. auto uSubtype = dString.first[dString.second-1];
  1529. dString.second -= 2;
  1530. switch ( uSubtype)
  1531. {
  1532. case 1: // ql
  1533. {
  1534. ScopedComma_c sBrackets ( tOut, nullptr, R"({"ql":)", "}" );
  1535. tOut.AppendEscapedWithComma (( const char* ) dString.first, dString.second);
  1536. break;
  1537. }
  1538. case 0: // json
  1539. tOut << ( const char* ) dString.first;
  1540. break;
  1541. default:
  1542. tOut.Sprintf ("\"internal error! wrong subtype of stringptr %d\"", uSubtype );
  1543. }
  1544. break;
  1545. }
  1546. tOut.AppendEscapedWithComma ( ( const char * ) dString.first, dString.second );
  1547. }
  1548. break;
  1549. case SPH_ATTR_JSON_PTR:
  1550. {
  1551. const auto * pJSON = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1552. auto dJson = sphUnpackPtrAttr ( pJSON );
  1553. // no object at all? return NULL
  1554. if ( IsEmpty ( dJson ) )
  1555. tOut << "null";
  1556. else
  1557. sphJsonFormat ( tOut, dJson.first );
  1558. }
  1559. break;
  1560. case SPH_ATTR_FACTORS:
  1561. case SPH_ATTR_FACTORS_JSON:
  1562. {
  1563. const auto * pFactors = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1564. auto dFactors = sphUnpackPtrAttr ( pFactors );
  1565. if ( IsEmpty ( dFactors ))
  1566. tOut << "null";
  1567. else
  1568. sphFormatFactors ( tOut, (const unsigned int *) dFactors.first, true );
  1569. }
  1570. break;
  1571. case SPH_ATTR_JSON_FIELD_PTR:
  1572. {
  1573. const auto * pField = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1574. auto dField = sphUnpackPtrAttr ( pField );
  1575. if ( IsEmpty ( dField ))
  1576. {
  1577. tOut << "null";
  1578. break;
  1579. }
  1580. auto eJson = ESphJsonType ( *dField.first++ );
  1581. if ( eJson==JSON_NULL )
  1582. tOut << "null";
  1583. else
  1584. sphJsonFieldFormat ( tOut, dField.first, eJson, true );
  1585. }
  1586. break;
  1587. default:
  1588. assert ( 0 && "Unknown attribute" );
  1589. break;
  1590. }
  1591. }
  1592. static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const char * szCol, const CSphMatch & tMatch, const CSphAttrLocator & tLoc )
  1593. {
  1594. assert ( sphPlainAttrToPtrAttr ( eAttrType )==eAttrType );
  1595. tOut.AppendName ( szCol );
  1596. JsonObjAddAttr ( tOut, eAttrType, tMatch, tLoc );
  1597. }
  1598. static bool IsHighlightAttr ( const CSphString & sName )
  1599. {
  1600. return sName.Begins ( g_szHighlight );
  1601. }
  1602. static CSphString GetJoinedWeightName ( const CSphQuery & tQuery )
  1603. {
  1604. CSphString sWeight;
  1605. sWeight.SetSprintf ( "%s.weight()", tQuery.m_sJoinIdx.cstr() );
  1606. return sWeight;
  1607. }
  1608. static bool IsJoinedWeight ( const CSphString & sAttr, const CSphQuery & tQuery )
  1609. {
  1610. if ( tQuery.m_sJoinIdx.IsEmpty() )
  1611. return false;
  1612. return sAttr==GetJoinedWeightName(tQuery);
  1613. }
  1614. static bool NeedToSkipAttr ( const CSphString & sName, const CSphQuery & tQuery )
  1615. {
  1616. const char * szName = sName.cstr();
  1617. if ( szName[0]=='i' && szName[1]=='d' && szName[2]=='\0' ) return true;
  1618. if ( sName.Begins ( g_szHighlight ) ) return true;
  1619. if ( sName.Begins ( GetFilterAttrPrefix() ) ) return true;
  1620. if ( sName.Begins ( g_szOrder ) ) return true;
  1621. if ( sName.Begins ( GetKnnDistAttrName() ) ) return true;
  1622. if ( IsJoinedWeight ( sName, tQuery ) ) return true;
  1623. if ( !tQuery.m_dIncludeItems.GetLength() && !tQuery.m_dExcludeItems.GetLength () )
  1624. return false;
  1625. // empty include - shows all select list items
  1626. // exclude with only "*" - skip all select list items
  1627. bool bInclude = ( tQuery.m_dIncludeItems.GetLength()==0 );
  1628. for ( const auto &iItem: tQuery.m_dIncludeItems )
  1629. {
  1630. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1631. {
  1632. bInclude = true;
  1633. break;
  1634. }
  1635. }
  1636. if ( bInclude && tQuery.m_dExcludeItems.GetLength() )
  1637. {
  1638. for ( const auto& iItem: tQuery.m_dExcludeItems )
  1639. {
  1640. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1641. {
  1642. bInclude = false;
  1643. break;
  1644. }
  1645. }
  1646. }
  1647. return !bInclude;
  1648. }
  1649. namespace { // static
  1650. void EncodeHighlight ( const CSphMatch & tMatch, int iAttr, const ISphSchema & tSchema, JsonEscapedBuilder & tOut )
  1651. {
  1652. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1653. ScopedComma_c tHighlightComma ( tOut, ",", R"("highlight":{)", "}", false );
  1654. auto dSnippet = sphUnpackPtrAttr ((const BYTE *) tMatch.GetAttr ( tCol.m_tLocator ));
  1655. SnippetResult_t tRes = UnpackSnippetData ( dSnippet );
  1656. for ( const auto & tField : tRes.m_dFields )
  1657. {
  1658. tOut.AppendName ( tField.m_sName.cstr() );
  1659. ScopedComma_c tHighlight ( tOut, ",", "[", "]", false );
  1660. // we might want to add passage separators to field text here
  1661. for ( const auto & tPassage : tField.m_dPassages )
  1662. tOut.AppendEscapedWithComma ( (const char *)tPassage.m_dText.Begin(), tPassage.m_dText.GetLength() );
  1663. }
  1664. }
  1665. static const char * GetName ( const CSphString & sName )
  1666. {
  1667. return sName.cstr();
  1668. }
  1669. static const char * GetName ( const JsonDocField_t & tDF )
  1670. {
  1671. return tDF.m_sName.cstr();
  1672. }
  1673. template <typename T>
  1674. void EncodeFields ( const CSphVector<T> & dFields, const AggrResult_t & tRes, const CSphMatch & tMatch, const ISphSchema & tSchema, bool bValArray, const char * sPrefix, const char * sEnd, JsonEscapedBuilder & tOut )
  1675. {
  1676. JsonEscapedBuilder tDFVal;
  1677. tOut.StartBlock ( ",", sPrefix, sEnd );
  1678. for ( const T & tDF : dFields )
  1679. {
  1680. const CSphColumnInfo * pCol = tSchema.GetAttr ( GetName ( tDF ) );
  1681. if ( !pCol )
  1682. {
  1683. tOut += R"("Default")";
  1684. continue;
  1685. }
  1686. // FIXME!!! add format support
  1687. tDFVal.Clear();
  1688. JsonObjAddAttr ( tDFVal, pCol->m_eAttrType, tMatch, pCol->m_tLocator );
  1689. if ( bValArray )
  1690. tOut.Sprintf ( "%s", tDFVal.cstr() );
  1691. else
  1692. tOut.Sprintf ( R"("%s":["%s"])", GetName ( tDF ), tDFVal.cstr() );
  1693. }
  1694. tOut.FinishBlock ( false ); // close obj
  1695. }
  1696. struct CompositeLocator_t
  1697. {
  1698. ESphAttr m_eAttrType = SPH_ATTR_NONE;
  1699. CSphAttrLocator m_tLocator;
  1700. const char * m_sName = nullptr;
  1701. CompositeLocator_t ( const CSphColumnInfo & tCol, const char * sName )
  1702. : m_eAttrType ( tCol.m_eAttrType )
  1703. , m_tLocator ( tCol.m_tLocator )
  1704. , m_sName ( sName )
  1705. {}
  1706. CompositeLocator_t() = default;
  1707. };
  1708. struct AggrKeyTrait_t
  1709. {
  1710. const CSphColumnInfo * m_pKey = nullptr;
  1711. CSphVector<CompositeLocator_t> m_dCompositeKeys;
  1712. bool m_bKeyed = false;
  1713. RangeNameHash_t m_tRangeNames;
  1714. };
  1715. static bool GetAggrKey ( const JsonAggr_t & tAggr, const CSphSchema & tSchema, int iAggrItem, int iNow, AggrKeyTrait_t & tRes )
  1716. {
  1717. if ( tAggr.m_eAggrFunc==Aggr_e::NONE )
  1718. {
  1719. tRes.m_pKey = tSchema.GetAttr ( tAggr.m_sCol.cstr() );
  1720. } else if ( tAggr.m_eAggrFunc==Aggr_e::COMPOSITE )
  1721. {
  1722. for ( const auto & tItem : tAggr.m_dComposite )
  1723. {
  1724. const CSphColumnInfo * pCol = tSchema.GetAttr ( tItem.m_sColumn.cstr() );
  1725. CSphString sJsonCol;
  1726. if ( !pCol && sphJsonNameSplit ( tItem.m_sColumn.cstr(), nullptr, &sJsonCol ) )
  1727. pCol = tSchema.GetAttr ( sJsonCol.cstr() );
  1728. if ( !pCol )
  1729. return false;
  1730. tRes.m_dCompositeKeys.Add ( CompositeLocator_t ( *pCol, tItem.m_sAlias.cstr() ) );
  1731. }
  1732. } else
  1733. {
  1734. tRes.m_pKey = tSchema.GetAttr ( GetAggrName ( iAggrItem, tAggr.m_sCol ).cstr() );
  1735. switch ( tAggr.m_eAggrFunc )
  1736. {
  1737. case Aggr_e::RANGE:
  1738. GetRangeKeyNames ( tAggr.m_tRange, tRes.m_tRangeNames );
  1739. tRes.m_bKeyed = tAggr.m_tRange.m_bKeyed;
  1740. break;
  1741. case Aggr_e::DATE_RANGE:
  1742. GetRangeKeyNames ( tAggr.m_tDateRange, iNow, tRes.m_tRangeNames );
  1743. tRes.m_bKeyed = tAggr.m_tDateRange.m_bKeyed;
  1744. break;
  1745. case Aggr_e::HISTOGRAM:
  1746. tRes.m_bKeyed = tAggr.m_tHist.m_bKeyed;
  1747. break;
  1748. case Aggr_e::DATE_HISTOGRAM:
  1749. tRes.m_bKeyed = tAggr.m_tDateHist.m_bKeyed;
  1750. break;
  1751. default:
  1752. break;
  1753. }
  1754. }
  1755. return ( tRes.m_pKey || tRes.m_dCompositeKeys.GetLength() );
  1756. }
  1757. static const char * GetBucketPrefix ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, JsonEscapedBuilder & tPrefixBucketBlock )
  1758. {
  1759. const char * sPrefix = "{";
  1760. if ( tKey.m_bKeyed )
  1761. {
  1762. switch ( eAggrFunc )
  1763. {
  1764. case Aggr_e::RANGE:
  1765. case Aggr_e::DATE_RANGE:
  1766. {
  1767. tPrefixBucketBlock.Clear();
  1768. tPrefixBucketBlock.Appendf ( "\"%s\":{", pRange->m_sKey.cstr() );
  1769. sPrefix = tPrefixBucketBlock.cstr();
  1770. }
  1771. break;
  1772. case Aggr_e::HISTOGRAM:
  1773. {
  1774. tPrefixBucketBlock.Clear();
  1775. tPrefixBucketBlock.Appendf ( "\"");
  1776. JsonObjAddAttr ( tPrefixBucketBlock, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
  1777. tPrefixBucketBlock.Appendf ( "\":{" );
  1778. sPrefix = tPrefixBucketBlock.cstr();
  1779. }
  1780. break;
  1781. case Aggr_e::DATE_HISTOGRAM:
  1782. {
  1783. tPrefixBucketBlock.Clear();
  1784. tPrefixBucketBlock.Appendf ( "\"");
  1785. time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1786. FormatDate ( tSrcTime, tPrefixBucketBlock );
  1787. tPrefixBucketBlock.Appendf ( "\":{" );
  1788. sPrefix = tPrefixBucketBlock.cstr();
  1789. }
  1790. break;
  1791. default: break;
  1792. }
  1793. }
  1794. return sPrefix;
  1795. }
  1796. static void PrintKey ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, JsonEscapedBuilder & tBuf, JsonEscapedBuilder & tOut )
  1797. {
  1798. if ( eAggrFunc==Aggr_e::DATE_RANGE )
  1799. {
  1800. if ( !tKey.m_bKeyed )
  1801. tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
  1802. if ( !pRange->m_sFrom.IsEmpty() )
  1803. tOut.Sprintf ( R"("from":"%s")", pRange->m_sFrom.cstr() );
  1804. if ( !pRange->m_sTo.IsEmpty() )
  1805. tOut.Sprintf ( R"("to":"%s")", pRange->m_sTo.cstr() );
  1806. } else if ( eAggrFunc==Aggr_e::RANGE )
  1807. {
  1808. if ( !tKey.m_bKeyed )
  1809. tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
  1810. if ( !pRange->m_sFrom.IsEmpty() )
  1811. tOut.Sprintf ( R"("from":%s)", pRange->m_sFrom.cstr() );
  1812. if ( !pRange->m_sTo.IsEmpty() )
  1813. tOut.Sprintf ( R"("to":%s)", pRange->m_sTo.cstr() );
  1814. } else if ( eAggrFunc==Aggr_e::DATE_HISTOGRAM )
  1815. {
  1816. tBuf.Clear();
  1817. JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
  1818. tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
  1819. tBuf.Clear();
  1820. time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1821. FormatDate ( tSrcTime, tBuf );
  1822. tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
  1823. } else if ( eAggrFunc==Aggr_e::COMPOSITE )
  1824. {
  1825. ScopedComma_c sBlock ( tOut, ",", R"("key":{)", "}" );
  1826. for ( const auto & tItem : tKey.m_dCompositeKeys )
  1827. JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, tMatch, tItem.m_tLocator );
  1828. } else if ( eFormat==ResultSetFormat_e::MntSearch )
  1829. {
  1830. JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "key", tMatch, tKey.m_pKey->m_tLocator );
  1831. } else
  1832. {
  1833. // FIXME!!! remove after proper data type added but now need to multiple datatime values by 1000 for compat aggs result set
  1834. int iMulti = 1;
  1835. if ( eFormat==ResultSetFormat_e::ES && hDatetime [ tKey.m_pKey->m_sName ] )
  1836. iMulti = 1000;
  1837. tBuf.Clear();
  1838. JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator, iMulti );
  1839. tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
  1840. if ( tKey.m_pKey->m_eAttrType==SPH_ATTR_STRINGPTR )
  1841. tOut.Sprintf ( R"("key_as_string":%s)", tBuf.cstr() );
  1842. else
  1843. tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
  1844. }
  1845. }
  1846. static VecTraits_T<CSphMatch> GetResultMatches ( const VecTraits_T<CSphMatch> & dMatches, const CSphSchema & tSchema, int iOff, int iCount, const JsonAggr_t & tAggr )
  1847. {
  1848. bool bHasCompositeAfter = ( dMatches.GetLength() && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && tAggr.m_dCompositeAfterKey.GetLength() );
  1849. if ( !bHasCompositeAfter )
  1850. return dMatches.Slice ( iOff, iCount );
  1851. CSphString sError;
  1852. CreateFilterContext_t tCtx;
  1853. tCtx.m_pFilters = &tAggr.m_dCompositeAfterKey;
  1854. tCtx.m_pMatchSchema = &tSchema;
  1855. tCtx.m_bScan = true;
  1856. if ( !sphCreateFilters ( tCtx, sError, sError ) || !sError.IsEmpty() )
  1857. {
  1858. sphWarning ( "failed to create \"after\" filter: %s", sError.cstr() );
  1859. return dMatches.Slice ( iOff, iCount );
  1860. }
  1861. int iFound = dMatches.GetFirst ( [&] ( const CSphMatch & tMatch ) { return tCtx.m_pFilter->Eval ( tMatch ); } );
  1862. if ( iOff<0 )
  1863. return dMatches.Slice ( iOff, iCount );
  1864. else
  1865. return dMatches.Slice ( iFound+1, iCount );
  1866. }
  1867. static bool IsSingleValue ( Aggr_e eAggr )
  1868. {
  1869. return ( eAggr==Aggr_e::MIN || eAggr==Aggr_e::MAX || eAggr==Aggr_e::SUM || eAggr==Aggr_e::AVG );
  1870. }
  1871. static void EncodeAggr ( const JsonAggr_t & tAggr, int iAggrItem, const AggrResult_t & tRes, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, int iNow, const CSphString & sDistinctName, JsonEscapedBuilder & tOut )
  1872. {
  1873. if ( tAggr.m_eAggrFunc==Aggr_e::COUNT )
  1874. return;
  1875. const CSphColumnInfo * pCount = tRes.m_tSchema.GetAttr ( "count(*)" );
  1876. AggrKeyTrait_t tKey;
  1877. bool bHasKey = GetAggrKey ( tAggr, tRes.m_tSchema, iAggrItem, iNow, tKey );
  1878. const CSphColumnInfo * pDistinct = nullptr;
  1879. if ( !sDistinctName.IsEmpty() )
  1880. pDistinct = tRes.m_tSchema.GetAttr ( sDistinctName.cstr() );
  1881. // might be null for empty result set
  1882. auto dMatches = GetResultMatches ( tRes.m_dResults.First().m_dMatches, tRes.m_tSchema, tRes.m_iOffset, tRes.m_iCount, tAggr );
  1883. CSphString sBucketName;
  1884. sBucketName.SetSprintf ( R"("%s":{)", tAggr.m_sBucketName.cstr() );
  1885. tOut.StartBlock ( ",", sBucketName.cstr(), "}" );
  1886. // aggr.significant
  1887. switch ( tAggr.m_eAggrFunc )
  1888. {
  1889. case Aggr_e::SIGNIFICANT: // FIXME!!! add support
  1890. tOut.Appendf ( "\"doc_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
  1891. tOut.Appendf ( "\"bg_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
  1892. break;
  1893. default: break;
  1894. }
  1895. // after_key for aggr.composite
  1896. if ( bHasKey && pCount && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && dMatches.GetLength() )
  1897. {
  1898. tOut.StartBlock ( ",", R"("after_key":{)", "}" );
  1899. for ( const auto & tItem : tKey.m_dCompositeKeys )
  1900. JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, dMatches.Last(), tItem.m_tLocator );
  1901. tOut.FinishBlock ( false ); // named bucket obj
  1902. }
  1903. if ( !IsSingleValue ( tAggr.m_eAggrFunc ) )
  1904. {
  1905. // buckets might be named objects or array
  1906. if ( tKey.m_bKeyed )
  1907. tOut.StartBlock ( ",", R"("buckets":{)", "}" );
  1908. else
  1909. tOut.StartBlock ( ",", R"("buckets":[)", "]" );
  1910. // might be null for empty result set
  1911. if ( bHasKey && pCount )
  1912. {
  1913. JsonEscapedBuilder tPrefixBucketBlock;
  1914. JsonEscapedBuilder tBufMatch;
  1915. for ( const CSphMatch & tMatch : dMatches )
  1916. {
  1917. RangeKeyDesc_t * pRange = nullptr;
  1918. if ( tAggr.m_eAggrFunc==Aggr_e::RANGE || tAggr.m_eAggrFunc==Aggr_e::DATE_RANGE )
  1919. {
  1920. int iBucket = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
  1921. pRange = tKey.m_tRangeNames ( iBucket );
  1922. // lets skip bucket with out of ranges index, ie _all
  1923. if ( !pRange )
  1924. continue;
  1925. }
  1926. // bucket item is array item or dict item
  1927. const char * sBucketPrefix = GetBucketPrefix ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, tPrefixBucketBlock );
  1928. ScopedComma_c sBucketBlock ( tOut, ",", sBucketPrefix, "}" );
  1929. PrintKey ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, eFormat, hDatetime, tBufMatch, tOut );
  1930. JsonObjAddAttr ( tOut, pCount->m_eAttrType, "doc_count", tMatch, pCount->m_tLocator );
  1931. // FIXME!!! add support
  1932. if ( tAggr.m_eAggrFunc==Aggr_e::SIGNIFICANT )
  1933. {
  1934. tOut.Sprintf ( R"("score":0.001)" );
  1935. JsonObjAddAttr ( tOut, pCount->m_eAttrType, "bg_count", tMatch, pCount->m_tLocator );
  1936. }
  1937. if ( pDistinct )
  1938. JsonObjAddAttr ( tOut, pDistinct->m_eAttrType, pDistinct->m_sName.cstr(), tMatch, pDistinct->m_tLocator );
  1939. }
  1940. }
  1941. tOut.FinishBlock ( false ); // buckets array
  1942. } else
  1943. {
  1944. if ( bHasKey && pCount && dMatches.GetLength() )
  1945. {
  1946. const CSphMatch & tMatch = dMatches[0];
  1947. JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "value", tMatch, tKey.m_pKey->m_tLocator );
  1948. }
  1949. }
  1950. tOut.FinishBlock ( false ); // named bucket obj
  1951. }
  1952. void JsonRenderAccessSpecs ( JsonEscapedBuilder & tRes, const bson::Bson_c & tBson, bool bWithZones )
  1953. {
  1954. using namespace bson;
  1955. {
  1956. ScopedComma_c sFieldsArray ( tRes, ",", "\"fields\":[", "]" );
  1957. Bson_c ( tBson.ChildByName ( SZ_FIELDS ) ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
  1958. tRes.AppendEscapedWithComma ( String ( tNode ).cstr() );
  1959. } );
  1960. }
  1961. int iPos = (int)Int ( tBson.ChildByName ( SZ_MAX_FIELD_POS ) );
  1962. if ( iPos )
  1963. tRes.Sprintf ( "\"max_field_pos\":%d", iPos );
  1964. if ( !bWithZones )
  1965. return;
  1966. auto tZones = tBson.GetFirstOf ( { SZ_ZONES, SZ_ZONESPANS } );
  1967. ScopedComma_c dZoneDelim ( tRes, ", ", ( tZones.first==1 ) ? "\"zonespans\":[" : "\"zones\":[", "]" );
  1968. Bson_c ( tZones.second ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
  1969. tRes << String ( tNode );
  1970. } );
  1971. }
  1972. bool JsonRenderKeywordNode ( JsonEscapedBuilder & tRes, const bson::Bson_c& tBson )
  1973. {
  1974. using namespace bson;
  1975. auto tWord = tBson.ChildByName ( SZ_WORD );
  1976. if ( IsNullNode ( tWord ) )
  1977. return false;
  1978. ScopedComma_c sRoot ( tRes.Object() );
  1979. tRes << R"("type":"KEYWORD")";
  1980. tRes << "\"word\":";
  1981. tRes.AppendEscapedSkippingComma ( String ( tWord ).cstr () );
  1982. tRes.Sprintf ( R"("querypos":%d)", Int ( tBson.ChildByName ( SZ_QUERYPOS ) ) );
  1983. if ( Bool ( tBson.ChildByName ( SZ_EXCLUDED ) ) )
  1984. tRes << R"("excluded":true)";
  1985. if ( Bool ( tBson.ChildByName ( SZ_EXPANDED ) ) )
  1986. tRes << R"("expanded":true)";
  1987. if ( Bool ( tBson.ChildByName ( SZ_FIELD_START ) ) )
  1988. tRes << R"("field_start":true)";
  1989. if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
  1990. tRes << R"("field_end":true)";
  1991. if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
  1992. tRes << R"("morphed":true)";
  1993. auto tBoost = tBson.ChildByName ( SZ_BOOST );
  1994. if ( !IsNullNode ( tBoost ) )
  1995. {
  1996. auto fBoost = Double ( tBoost );
  1997. if ( fBoost!=1.0f ) // really comparing floats?
  1998. tRes.Sprintf ( R"("boost":%f)", fBoost );
  1999. }
  2000. return true;
  2001. }
  2002. void FormatJsonPlanFromBson ( JsonEscapedBuilder& tOut, bson::NodeHandle_t dBson, PLAN_FLAVOUR ePlanFlavour )
  2003. {
  2004. using namespace bson;
  2005. if ( dBson==nullnode )
  2006. return;
  2007. if ( ePlanFlavour == PLAN_FLAVOUR::EDESCR )
  2008. {
  2009. auto dRootBlock = tOut.ObjectBlock();
  2010. tOut << "\"description\":";
  2011. tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr() );
  2012. tOut.FinishBlocks ( dRootBlock );
  2013. return;
  2014. }
  2015. Bson_c tBson ( dBson );
  2016. if ( JsonRenderKeywordNode ( tOut, tBson) )
  2017. return;
  2018. auto dRootBlock = tOut.ObjectBlock();
  2019. tOut << "\"type\":";
  2020. tOut.AppendEscapedSkippingComma ( String ( tBson.ChildByName ( SZ_TYPE ) ).cstr() );
  2021. if ( ePlanFlavour==PLAN_FLAVOUR::EBOTH )
  2022. {
  2023. tOut << "\"description\":";
  2024. tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr () );
  2025. }
  2026. Bson_c ( tBson.ChildByName ( SZ_OPTIONS ) ).ForEach ( [&tOut] ( CSphString&& sName, const NodeHandle_t & tNode ) {
  2027. tOut.Sprintf ( R"("options":"%s=%d")", sName.cstr (), (int) Int ( tNode ) );
  2028. } );
  2029. JsonRenderAccessSpecs ( tOut, dBson, true );
  2030. tOut.StartBlock ( ",", "\"children\":[", "]" );
  2031. Bson_c ( tBson.ChildByName ( SZ_CHILDREN ) ).ForEach ( [&] ( const NodeHandle_t & tNode ) {
  2032. FormatJsonPlanFromBson ( tOut, tNode, ePlanFlavour );
  2033. } );
  2034. tOut.FinishBlocks ( dRootBlock );
  2035. }
  2036. } // static
  2037. CSphString JsonEncodeResultError ( const CSphString & sError, int iStatus )
  2038. {
  2039. JsonEscapedBuilder tOut;
  2040. CSphString sResult;
  2041. tOut.StartBlock ( ",", "{ \"error\":", "}" );
  2042. tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
  2043. tOut.AppendName ( "status" );
  2044. tOut << iStatus;
  2045. tOut.FinishBlock ( false );
  2046. tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
  2047. return sResult;
  2048. }
  2049. static CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType=nullptr, int * pStatus=nullptr, const char * sIndex=nullptr )
  2050. {
  2051. JsonEscapedBuilder tOut;
  2052. CSphString sResult;
  2053. tOut.StartBlock ( ",", "{", "}" );
  2054. tOut.StartBlock ( ",", R"("error":{)", "}" );
  2055. tOut.AppendName ( "type" );
  2056. tOut.AppendEscaped ( ( sErrorType ? sErrorType : "Error" ), EscBld::eEscape );
  2057. tOut.AppendName ( "reason" );
  2058. tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
  2059. if ( sIndex )
  2060. {
  2061. tOut.AppendName ( "table" );
  2062. tOut.AppendEscaped ( sIndex, EscBld::eEscape );
  2063. }
  2064. tOut.FinishBlock ( false );
  2065. if ( pStatus )
  2066. {
  2067. tOut.AppendName ( "status" );
  2068. tOut << *pStatus;
  2069. }
  2070. tOut.FinishBlock ( false );
  2071. tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
  2072. return sResult;
  2073. }
  2074. CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus )
  2075. {
  2076. return JsonEncodeResultError ( sError, sErrorType, &iStatus );
  2077. }
  2078. CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus, const char * sIndex )
  2079. {
  2080. return JsonEncodeResultError ( sError, sErrorType, &iStatus, sIndex );
  2081. }
  2082. CSphString HandleShowProfile ( const QueryProfile_c& p )
  2083. {
  2084. #define SPH_QUERY_STATE( _name, _desc ) _desc,
  2085. static const char* dStates[SPH_QSTATE_TOTAL] = { SPH_QUERY_STATES };
  2086. #undef SPH_QUERY_STATES
  2087. JsonEscapedBuilder sProfile;
  2088. int64_t tmTotal = 0;
  2089. int iCount = 0;
  2090. for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
  2091. {
  2092. if ( p.m_dSwitches[i] <= 0 )
  2093. continue;
  2094. tmTotal += p.m_tmTotal[i];
  2095. iCount += p.m_dSwitches[i];
  2096. }
  2097. {
  2098. auto arrayw = sProfile.ArrayW();
  2099. for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
  2100. {
  2101. if ( p.m_dSwitches[i] <= 0 )
  2102. continue;
  2103. auto _ = sProfile.ObjectW();
  2104. sProfile.NamedString ( "status", dStates[i] );
  2105. sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( p.m_tmTotal[i] ) );
  2106. sProfile.NamedVal ( "switches", p.m_dSwitches[i] );
  2107. sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( p.m_tmTotal[i], tmTotal, 2 ) ) );
  2108. }
  2109. {
  2110. auto _ = sProfile.ObjectW();
  2111. sProfile.NamedString ( "status", "total" );
  2112. sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( tmTotal ) );
  2113. sProfile.NamedVal ( "switches", iCount );
  2114. sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( tmTotal, tmTotal, 2 ) ) );
  2115. }
  2116. }
  2117. return (CSphString)sProfile;
  2118. }
  2119. static void AddJoinedWeight ( JsonEscapedBuilder & tOut, const CSphQuery & tQuery, const CSphMatch & tMatch, const CSphColumnInfo * pJoinedWeightAttr )
  2120. {
  2121. if ( !pJoinedWeightAttr )
  2122. return;
  2123. tOut.Sprintf ( R"("%s._score":%d)", tQuery.m_sJoinIdx.cstr(), (int)tMatch.GetAttr ( pJoinedWeightAttr->m_tLocator ) );
  2124. }
  2125. CSphString sphEncodeResultJson ( const VecTraits_T<AggrResult_t>& dRes, const JsonQuery_c & tQuery, QueryProfile_c * pProfile, ResultSetFormat_e eFormat )
  2126. {
  2127. assert ( dRes.GetLength()>=1 );
  2128. const AggrResult_t & tRes = dRes[0];
  2129. if ( !tRes.m_iSuccesses )
  2130. return JsonEncodeResultError ( tRes.m_sError );
  2131. JsonEscapedBuilder tOut;
  2132. CSphString sResult;
  2133. tOut.ObjectBlock();
  2134. tOut.Sprintf (R"("took":%d,"timed_out":false)", tRes.m_iQueryTime);
  2135. if ( !tRes.m_sWarning.IsEmpty() )
  2136. {
  2137. tOut.StartBlock ( nullptr, R"("warning":{"reason":)", "}" );
  2138. tOut.AppendEscapedWithComma ( tRes.m_sWarning.cstr () );
  2139. tOut.FinishBlock ( false );
  2140. }
  2141. if ( eFormat==ResultSetFormat_e::ES )
  2142. tOut += R"("_shards":{ "total": 1, "successful": 1, "skipped": 0, "failed": 0 })";
  2143. auto sHitMeta = tOut.StartBlock ( ",", R"("hits":{)", "}" );
  2144. tOut.Sprintf ( R"("total":%d)", tRes.m_iTotalMatches );
  2145. tOut.Sprintf ( R"("total_relation":%s)", tRes.m_bTotalMatchesApprox ? R"("gte")" : R"("eq")" );
  2146. if ( eFormat==ResultSetFormat_e::ES )
  2147. tOut += R"("max_score": null)";
  2148. const ISphSchema & tSchema = tRes.m_tSchema;
  2149. CSphVector<BYTE> dTmp;
  2150. CSphBitvec tAttrsToSend;
  2151. sphGetAttrsToSend ( tSchema, false, true, tAttrsToSend );
  2152. const CSphColumnInfo * pJoinedWeightAttr = tQuery.m_sJoinIdx.IsEmpty() ? nullptr : tSchema.GetAttr ( GetJoinedWeightName(tQuery).cstr() );
  2153. int iHighlightAttr = -1;
  2154. int nSchemaAttrs = tSchema.GetAttrsCount();
  2155. CSphBitvec dSkipAttrs ( nSchemaAttrs );
  2156. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  2157. {
  2158. if ( !tAttrsToSend.BitGet(iAttr) )
  2159. continue;
  2160. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  2161. const CSphString & sName = tCol.m_sName;
  2162. if ( IsHighlightAttr ( sName ) )
  2163. iHighlightAttr = iAttr;
  2164. if ( NeedToSkipAttr ( sName, tQuery ) )
  2165. dSkipAttrs.BitSet ( iAttr );
  2166. if ( eFormat==ResultSetFormat_e::ES && tCol.m_eAttrType==SPH_ATTR_TOKENCOUNT )
  2167. dSkipAttrs.BitSet ( iAttr );
  2168. }
  2169. tOut.StartBlock ( ",", R"("hits":[)", "]" );
  2170. if ( !tQuery.m_bGroupEmulation )
  2171. {
  2172. const CSphColumnInfo * pId = tSchema.GetAttr ( sphGetDocidName() );
  2173. const CSphColumnInfo * pKNNDist = tSchema.GetAttr ( GetKnnDistAttrName() );
  2174. bool bCompatId = false;
  2175. const CSphColumnInfo * pCompatRaw = nullptr;
  2176. const CSphColumnInfo * pCompatVer = nullptr;
  2177. if ( eFormat==ResultSetFormat_e::ES )
  2178. {
  2179. const CSphColumnInfo * pCompatId = tSchema.GetAttr ( "_id" );
  2180. if ( pCompatId )
  2181. {
  2182. bCompatId = true;
  2183. pId = pCompatId;
  2184. }
  2185. pCompatRaw = tSchema.GetAttr ( "_raw" );
  2186. pCompatVer = tSchema.GetAttr ( "_version" );
  2187. }
  2188. bool bTag = tRes.m_bTagsAssigned;
  2189. int iTag = ( bTag ? 0 : tRes.m_dResults.First().m_iTag );
  2190. auto dMatches = tRes.m_dResults.First ().m_dMatches.Slice ( tRes.m_iOffset, tRes.m_iCount );
  2191. for ( const auto & tMatch : dMatches )
  2192. {
  2193. ScopedComma_c sQueryComma ( tOut, ",", "{", "}" );
  2194. // note, that originally there is string UID, so we just output number in quotes for docid here
  2195. // number in quotes in compat mode or just number for _id
  2196. if ( bCompatId || ( eFormat==ResultSetFormat_e::ES ) )
  2197. {
  2198. DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
  2199. tOut.Sprintf ( R"("_id":"%llu","_score":%d)", tDocID, tMatch.m_iWeight );
  2200. }
  2201. else if ( pId )
  2202. {
  2203. DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
  2204. tOut.Sprintf ( R"("_id":%U,"_score":%d)", tDocID, tMatch.m_iWeight );
  2205. }
  2206. else
  2207. tOut.Sprintf ( R"("_score":%d)", tMatch.m_iWeight );
  2208. AddJoinedWeight ( tOut, tQuery, tMatch, pJoinedWeightAttr );
  2209. if ( eFormat==ResultSetFormat_e::ES )
  2210. {
  2211. tOut.Sprintf ( R"("_index":"%s")", tRes.m_dIndexNames[bTag ? tMatch.m_iTag : iTag].scstr() ); // FIXME!!! breaks for multiple indexes
  2212. tOut += R"("_type": "doc")";
  2213. if ( pCompatVer )
  2214. JsonObjAddAttr ( tOut, pCompatVer->m_eAttrType, "_version", tMatch, pCompatVer->m_tLocator );
  2215. else
  2216. tOut += R"("_version": 1)";
  2217. }
  2218. if ( pKNNDist )
  2219. tOut.Sprintf( R"("_knn_dist":%f)", tMatch.GetAttrFloat ( pKNNDist->m_tLocator ) );
  2220. tOut.StartBlock ( ",", "\"_source\":{", "}");
  2221. if ( pCompatRaw )
  2222. JsonObjAddAttr ( tOut, pCompatRaw->m_eAttrType, "_raw", tMatch, pCompatRaw->m_tLocator );
  2223. else
  2224. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  2225. {
  2226. if ( !tAttrsToSend.BitGet(iAttr) )
  2227. continue;
  2228. if ( dSkipAttrs.BitGet ( iAttr ) )
  2229. continue;
  2230. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  2231. JsonObjAddAttr ( tOut, tCol.m_eAttrType, tCol.m_sName.cstr(), tMatch, tCol.m_tLocator );
  2232. }
  2233. tOut.FinishBlock ( false ); // _source obj
  2234. if ( iHighlightAttr!=-1 )
  2235. EncodeHighlight ( tMatch, iHighlightAttr, tSchema, tOut );
  2236. if ( eFormat==ResultSetFormat_e::ES )
  2237. {
  2238. if ( tQuery.m_dDocFields.GetLength() )
  2239. EncodeFields ( tQuery.m_dDocFields, tRes, tMatch, tSchema, false, R"("fields":{)", "}", tOut );
  2240. if ( tQuery.m_dSortFields.GetLength() )
  2241. EncodeFields ( tQuery.m_dSortFields, tRes, tMatch, tSchema, true, R"("sort":[)", "]", tOut );
  2242. }
  2243. }
  2244. }
  2245. tOut.FinishBlocks ( sHitMeta, false ); // hits array, hits meta
  2246. if ( tQuery.m_bGroupEmulation || dRes.GetLength()>1 )
  2247. {
  2248. sph::StringSet hDatetime;
  2249. if ( eFormat==ResultSetFormat_e::ES )
  2250. {
  2251. tQuery.m_dDocFields.for_each ( [&hDatetime]( const auto & tDocfield )
  2252. {
  2253. if ( tDocfield.m_bDateTime )
  2254. hDatetime.Add ( tDocfield.m_sName );
  2255. });
  2256. }
  2257. CSphString sDistinctName;
  2258. tQuery.m_dItems.any_of ( [&]( const CSphQueryItem & tItem ) {
  2259. if ( tItem.m_sExpr=="@distinct" )
  2260. {
  2261. sDistinctName = tItem.m_sAlias;
  2262. return true;
  2263. }
  2264. return false;
  2265. });
  2266. if ( tQuery.m_bGroupEmulation )
  2267. {
  2268. tOut.StartBlock ( ",", R"("aggregations":{)", "}");
  2269. EncodeAggr ( tQuery.m_dAggs[0], 1, dRes[0], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
  2270. tOut.FinishBlock ( false ); // aggregations obj
  2271. } else
  2272. {
  2273. assert ( dRes.GetLength()==tQuery.m_dAggs.GetLength()+1 );
  2274. tOut.StartBlock ( ",", R"("aggregations":{)", "}");
  2275. ARRAY_FOREACH ( i, tQuery.m_dAggs )
  2276. EncodeAggr ( tQuery.m_dAggs[i], i, dRes[i+1], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
  2277. tOut.FinishBlock ( false ); // aggregations obj
  2278. }
  2279. }
  2280. CSphString sScroll;
  2281. if ( dRes.GetLength() && FormatScrollSettings ( dRes.Last(), tQuery, sScroll ) )
  2282. tOut.Sprintf ( R"("scroll":"%s")", sScroll.cstr() );
  2283. if ( eFormat==ResultSetFormat_e::ES )
  2284. tOut += R"("status": 200)";
  2285. if ( pProfile && pProfile->m_bNeedProfile )
  2286. {
  2287. auto sProfile = HandleShowProfile ( *pProfile );
  2288. tOut.Sprintf ( R"("profile":{"query":%s})", sProfile.cstr () );
  2289. }
  2290. if ( pProfile && pProfile->m_eNeedPlan != PLAN_FLAVOUR::ENONE )
  2291. {
  2292. JsonEscapedBuilder sPlan;
  2293. FormatJsonPlanFromBson ( sPlan, bson::MakeHandle ( pProfile->m_dPlan ), pProfile->m_eNeedPlan );
  2294. if ( sPlan.IsEmpty() )
  2295. tOut << R"("plan":null)";
  2296. else
  2297. tOut.Sprintf ( R"("plan":{"query":%s})", sPlan.cstr() );
  2298. }
  2299. tOut.FinishBlocks (); tOut.MoveTo ( sResult ); return sResult;
  2300. }
  2301. JsonObj_c sphEncodeInsertResultJson ( const char * szIndex, bool bReplace, DocID_t tDocId, ResultSetFormat_e eFormat )
  2302. {
  2303. JsonObj_c tObj;
  2304. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2305. tObj.AddUint ( "id", tDocId );
  2306. tObj.AddBool ( "created", !bReplace );
  2307. tObj.AddStr ( "result", bReplace ? "updated" : "created" );
  2308. tObj.AddInt ( "status", bReplace ? 200 : 201 );
  2309. return tObj;
  2310. }
  2311. JsonObj_c sphEncodeTxnResultJson ( const char* szIndex, DocID_t tDocId, int iInserts, int iDeletes, int iUpdates, ResultSetFormat_e eFormat )
  2312. {
  2313. JsonObj_c tObj;
  2314. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2315. tObj.AddInt ( "_id", tDocId );
  2316. tObj.AddInt ( "created", iInserts );
  2317. tObj.AddInt ( "deleted", iDeletes );
  2318. tObj.AddInt ( "updated", iUpdates );
  2319. bool bReplaced = (iInserts!=0 && iDeletes!=0);
  2320. tObj.AddStr ( "result", bReplaced ? "updated" : "created" );
  2321. tObj.AddInt ( "status", bReplaced ? 200 : 201 );
  2322. return tObj;
  2323. }
  2324. JsonObj_c sphEncodeUpdateResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
  2325. {
  2326. JsonObj_c tObj;
  2327. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2328. if ( !tDocId )
  2329. tObj.AddInt ( "updated", iAffected );
  2330. else
  2331. {
  2332. tObj.AddInt ( "id", tDocId );
  2333. tObj.AddStr ( "result", iAffected ? "updated" : "noop" );
  2334. }
  2335. return tObj;
  2336. }
  2337. JsonObj_c sphEncodeDeleteResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
  2338. {
  2339. JsonObj_c tObj;
  2340. tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2341. if ( !tDocId )
  2342. tObj.AddInt ( "deleted", iAffected );
  2343. else
  2344. {
  2345. tObj.AddInt ( "id", tDocId );
  2346. tObj.AddBool ( "found", !!iAffected );
  2347. tObj.AddStr ( "result", iAffected ? "deleted" : "not found" );
  2348. }
  2349. return tObj;
  2350. }
  2351. JsonObj_c sphEncodeInsertErrorJson ( const char * szIndex, const char * szError, ResultSetFormat_e eFormat )
  2352. {
  2353. JsonObj_c tObj, tErr;
  2354. tErr.AddStr ( "type", szError );
  2355. tErr.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
  2356. tObj.AddItem ( "error", tErr );
  2357. tObj.AddInt ( "status", HttpGetStatusCodes ( EHTTP_STATUS::_409 ) );
  2358. return tObj;
  2359. }
  2360. bool sphGetResultStats ( const char * szResult, int & iAffected, int & iWarnings, bool bUpdate )
  2361. {
  2362. JsonObj_c tJsonRoot ( szResult );
  2363. if ( !tJsonRoot )
  2364. return false;
  2365. // no warnings in json results for now
  2366. iWarnings = 0;
  2367. if ( tJsonRoot.HasItem("error") )
  2368. {
  2369. iAffected = 0;
  2370. return true;
  2371. }
  2372. // its either update or delete
  2373. CSphString sError;
  2374. JsonObj_c tAffected = tJsonRoot.GetIntItem ( bUpdate ? "updated" : "deleted", sError );
  2375. if ( tAffected )
  2376. {
  2377. iAffected = (int)tAffected.IntVal();
  2378. return true;
  2379. }
  2380. // it was probably a query with an "id"
  2381. JsonObj_c tId = tJsonRoot.GetIntItem ( "id", sError );
  2382. if ( tId )
  2383. {
  2384. iAffected = 1;
  2385. return true;
  2386. }
  2387. return false;
  2388. }
  2389. //////////////////////////////////////////////////////////////////////////
  2390. // Highlight
  2391. static void FormatSnippetOpts ( const CSphString & sQuery, const SnippetQuerySettings_t & tSnippetQuery, CSphQuery & tQuery )
  2392. {
  2393. StringBuilder_c sItem;
  2394. sItem << "HIGHLIGHT(";
  2395. sItem << tSnippetQuery.AsString();
  2396. sItem << ",";
  2397. auto & hFieldHash = tSnippetQuery.m_hPerFieldLimits;
  2398. if ( tSnippetQuery.m_hPerFieldLimits.GetLength() )
  2399. {
  2400. sItem.StartBlock ( ",", "'", "'" );
  2401. for ( const auto& tField : hFieldHash )
  2402. sItem << tField.first;
  2403. sItem.FinishBlock(false);
  2404. }
  2405. else
  2406. sItem << "''";
  2407. if ( !sQuery.IsEmpty() )
  2408. sItem.Appendf ( ",'%s'", sQuery.cstr() );
  2409. sItem << ")";
  2410. CSphQueryItem & tItem = tQuery.m_dItems.Add();
  2411. tItem.m_sExpr = sItem.cstr ();
  2412. tItem.m_sAlias.SetSprintf ( "%s", g_szHighlight );
  2413. }
  2414. static bool ParseFieldsArray ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2415. {
  2416. for ( const auto & tField : tFields )
  2417. {
  2418. if ( !tField.IsStr() )
  2419. {
  2420. sError.SetSprintf ( "\"%s\" field should be an string", tField.Name() );
  2421. return false;
  2422. }
  2423. SnippetLimits_t tDefault;
  2424. tSettings.m_hPerFieldLimits.Add( tDefault, tField.StrVal() );
  2425. }
  2426. return true;
  2427. }
  2428. static bool ParseSnippetLimitsElastic ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
  2429. {
  2430. if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "fragment_size", sError, true ) ) return false;
  2431. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "number_of_fragments", sError, true ) ) return false;
  2432. return true;
  2433. }
  2434. static bool ParseSnippetLimitsSphinx ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
  2435. {
  2436. if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "limit", sError, true ) ) return false;
  2437. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_passages", sError, true ) ) return false;
  2438. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_snippets", sError, true ) ) return false;
  2439. if ( !tSnip.FetchIntItem ( tLimits.m_iLimitWords, "limit_words", sError, true ) ) return false;
  2440. return true;
  2441. }
  2442. static bool ParseFieldsObject ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2443. {
  2444. for ( const auto & tField : tFields )
  2445. {
  2446. if ( !tField.IsObj() )
  2447. {
  2448. sError.SetSprintf ( "\"%s\" field should be an object", tField.Name() );
  2449. return false;
  2450. }
  2451. SnippetLimits_t & tLimits = tSettings.m_hPerFieldLimits.AddUnique ( tField.Name() );
  2452. if ( !ParseSnippetLimitsElastic ( tField, tLimits, sError ) )
  2453. return false;
  2454. if ( !ParseSnippetLimitsSphinx ( tField, tLimits, sError ) )
  2455. return false;
  2456. }
  2457. return true;
  2458. }
  2459. static bool ParseSnippetFields ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tSettings, CSphString & sError )
  2460. {
  2461. JsonObj_c tFields = tSnip.GetItem("fields");
  2462. if ( !tFields )
  2463. return true;
  2464. if ( tFields.IsArray() )
  2465. return ParseFieldsArray ( tFields, tSettings, sError );
  2466. if ( tFields.IsObj() )
  2467. return ParseFieldsObject ( tFields, tSettings, sError );
  2468. sError = R"("fields" property value should be an array or an object)";
  2469. return false;
  2470. }
  2471. static bool FetchTags ( const char * sName, const JsonObj_c & tSnip, CSphString & sVal, CSphString & sError )
  2472. {
  2473. JsonObj_c tTag = tSnip.GetItem ( sName );
  2474. if ( !tTag )
  2475. return true;
  2476. if ( tTag.IsStr() )
  2477. {
  2478. sVal = tTag.StrVal();
  2479. return true;
  2480. }
  2481. if ( tTag.IsArray() )
  2482. {
  2483. if ( tTag.Size() )
  2484. sVal = tTag[0].StrVal();
  2485. return true;
  2486. }
  2487. sError.SetSprintf ( R"("%s" property value should be an array or sting)", sName );
  2488. return false;
  2489. }
  2490. static bool ParseSnippetOptsElastic ( const JsonObj_c & tSnip, CSphString & sQuery, SnippetQuerySettings_t & tQuery, CSphString & sError )
  2491. {
  2492. JsonObj_c tEncoder = tSnip.GetStrItem ( "encoder", sError, true );
  2493. if ( tEncoder )
  2494. {
  2495. if ( tEncoder.StrVal()=="html" )
  2496. tQuery.m_sStripMode = "retain";
  2497. }
  2498. else if ( !sError.IsEmpty() )
  2499. return false;
  2500. JsonObj_c tHlQuery = tSnip.GetObjItem ( "highlight_query", sError, true );
  2501. if ( tHlQuery )
  2502. sQuery = tHlQuery.AsString();
  2503. else if ( !sError.IsEmpty() )
  2504. return false;
  2505. if ( !FetchTags ( "pre_tags", tSnip, tQuery.m_sBeforeMatch, sError ) ) return false;
  2506. if ( !FetchTags ( "post_tags", tSnip, tQuery.m_sAfterMatch, sError ) ) return false;
  2507. JsonObj_c tNoMatchSize = tSnip.GetItem ( "no_match_size" );
  2508. if ( tNoMatchSize )
  2509. {
  2510. int iNoMatch = 0;
  2511. if ( !tSnip.FetchIntItem ( iNoMatch, "no_match_size", sError, true ) )
  2512. return false;
  2513. tQuery.m_bAllowEmpty = iNoMatch<1;
  2514. }
  2515. JsonObj_c tOrder = tSnip.GetStrItem ( "order", sError, true );
  2516. if ( tOrder )
  2517. tQuery.m_bWeightOrder = tOrder.StrVal()=="score";
  2518. else if ( !sError.IsEmpty() )
  2519. return false;
  2520. if ( !ParseSnippetLimitsElastic ( tSnip, tQuery, sError ) )
  2521. return false;
  2522. return true;
  2523. }
  2524. static bool ParseSnippetOptsSphinx ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tOpt, CSphString & sError )
  2525. {
  2526. if ( !ParseSnippetLimitsSphinx ( tSnip, tOpt, sError ) )
  2527. return false;
  2528. if ( !tSnip.FetchStrItem ( tOpt.m_sBeforeMatch, "before_match", sError, true ) ) return false;
  2529. if ( !tSnip.FetchStrItem ( tOpt.m_sAfterMatch, "after_match", sError, true ) ) return false;
  2530. if ( !tSnip.FetchIntItem ( tOpt.m_iAround, "around", sError, true ) ) return false;
  2531. if ( !tSnip.FetchBoolItem ( tOpt.m_bUseBoundaries, "use_boundaries", sError, true ) ) return false;
  2532. if ( !tSnip.FetchBoolItem ( tOpt.m_bWeightOrder, "weight_order", sError, true ) ) return false;
  2533. if ( !tSnip.FetchBoolItem ( tOpt.m_bForceAllWords, "force_all_words", sError, true ) ) return false;
  2534. if ( !tSnip.FetchStrItem ( tOpt.m_sStripMode, "html_strip_mode", sError, true ) ) return false;
  2535. if ( !tSnip.FetchBoolItem ( tOpt.m_bAllowEmpty, "allow_empty", sError, true ) ) return false;
  2536. if ( !tSnip.FetchBoolItem ( tOpt.m_bEmitZones, "emit_zones", sError, true ) ) return false;
  2537. if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_passages", sError, true ) ) return false;
  2538. if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_snippets", sError, true ) ) return false;
  2539. if ( !tSnip.FetchBoolItem ( tOpt.m_bPackFields, "pack_fields", sError, true ) ) return false;
  2540. if ( !tSnip.FetchBoolItem ( tOpt.m_bLimitsPerField, "limits_per_field", sError, true ) )return false;
  2541. JsonObj_c tBoundary = tSnip.GetStrItem ( "passage_boundary", "snippet_boundary", sError );
  2542. if ( tBoundary )
  2543. tOpt.m_ePassageSPZ = GetPassageBoundary ( tBoundary.StrVal() );
  2544. else if ( !sError.IsEmpty() )
  2545. return false;
  2546. return true;
  2547. }
  2548. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError )
  2549. {
  2550. CSphString sQuery;
  2551. SnippetQuerySettings_t tSettings;
  2552. tSettings.m_bJsonQuery = true;
  2553. tSettings.m_bPackFields = true;
  2554. if ( !ParseSnippetFields ( tSnip, tSettings, sError ) )
  2555. return false;
  2556. // elastic-style options
  2557. if ( !ParseSnippetOptsElastic ( tSnip, sQuery, tSettings, sError ) )
  2558. return false;
  2559. // sphinx-style options
  2560. if ( !ParseSnippetOptsSphinx ( tSnip, tSettings, sError ) )
  2561. return false;
  2562. FormatSnippetOpts ( sQuery, tSettings, tQuery );
  2563. return true;
  2564. }
  2565. //////////////////////////////////////////////////////////////////////////
  2566. // Sort
  2567. struct SortField_t : public GeoDistInfo_c
  2568. {
  2569. CSphString m_sName;
  2570. CSphString m_sMode;
  2571. bool m_bAsc {true};
  2572. };
  2573. static void FormatSortBy ( const CSphVector<SortField_t> & dSort, JsonQuery_c & tQuery, bool & bGotWeight )
  2574. {
  2575. StringBuilder_c sSortBuf;
  2576. Comma_c sComma ({", ",2});
  2577. for ( const SortField_t &tItem : dSort )
  2578. {
  2579. const char * sSort = ( tItem.m_bAsc ? " asc" : " desc" );
  2580. if ( tItem.IsGeoDist() )
  2581. {
  2582. // ORDER BY statement
  2583. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  2584. // query item
  2585. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2586. tQueryItem.m_sExpr = tItem.BuildExprString();
  2587. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  2588. // select list
  2589. StringBuilder_c sTmp;
  2590. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  2591. sTmp.MoveTo ( tQuery.m_sSelect );
  2592. } else if ( tItem.m_sMode.IsEmpty() )
  2593. {
  2594. const char * sName = tItem.m_sName.cstr();
  2595. if ( tItem.m_sName=="_score" )
  2596. sName = "@weight";
  2597. else if ( tItem.m_sName=="_count" )
  2598. sName = "count(*)";
  2599. // sort by attribute or weight
  2600. sSortBuf << sComma << sName << sSort;
  2601. bGotWeight |= ( tItem.m_sName=="_score" );
  2602. } else
  2603. {
  2604. // sort by MVA
  2605. // ORDER BY statement
  2606. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  2607. // query item
  2608. StringBuilder_c sTmp;
  2609. sTmp << ( tItem.m_sMode=="min" ? "least" : "greatest" ) << "(" << tItem.m_sName << ")";
  2610. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2611. sTmp.MoveTo (tQueryItem.m_sExpr);
  2612. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  2613. // select list
  2614. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  2615. sTmp.MoveTo ( tQuery.m_sSelect );
  2616. }
  2617. tQuery.m_dSortFields.Add ( tItem.m_sName );
  2618. }
  2619. if ( !dSort.GetLength() )
  2620. {
  2621. sSortBuf += "@weight desc";
  2622. bGotWeight = true;
  2623. }
  2624. tQuery.m_eSort = SPH_SORT_EXTENDED;
  2625. sSortBuf.MoveTo ( tQuery.m_sSortBy );
  2626. }
  2627. static bool ParseSortObj ( const JsonObj_c & tSortItem, CSphVector<SortField_t> & dSort, CSphString & sError, CSphString & sWarning )
  2628. {
  2629. bool bSortString = tSortItem.IsStr();
  2630. bool bSortObj = tSortItem.IsObj();
  2631. CSphString sSortName = tSortItem.Name();
  2632. if ( ( !bSortString && !bSortObj ) || !tSortItem.Name() || ( bSortString && !tSortItem.SzVal() ) )
  2633. {
  2634. sError.SetSprintf ( R"("sort" property 0("%s") should be %s)", sSortName.scstr(), ( bSortObj ? "a string" : "an object" ) );
  2635. return false;
  2636. }
  2637. // [ { "attr_name" : "sort_mode" } ]
  2638. if ( bSortString )
  2639. {
  2640. CSphString sOrder = tSortItem.StrVal();
  2641. if ( sOrder!="asc" && sOrder!="desc" )
  2642. {
  2643. sError.SetSprintf ( R"("sort" property "%s" order is invalid %s)", sSortName.scstr(), sOrder.cstr() );
  2644. return false;
  2645. }
  2646. SortField_t & tAscItem = dSort.Add();
  2647. tAscItem.m_sName = sSortName;
  2648. tAscItem.m_bAsc = ( sOrder=="asc" );
  2649. return true;
  2650. }
  2651. // [ { "attr_name" : { "order" : "sort_mode" } } ]
  2652. SortField_t & tSortField = dSort.Add();
  2653. tSortField.m_sName = sSortName;
  2654. JsonObj_c tAttrItems = tSortItem.GetItem("order");
  2655. if ( tAttrItems )
  2656. {
  2657. if ( !tAttrItems.IsStr() )
  2658. {
  2659. sError.SetSprintf ( R"("sort" property "%s" order is invalid)", tAttrItems.Name() );
  2660. return false;
  2661. }
  2662. CSphString sOrder = tAttrItems.StrVal();
  2663. tSortField.m_bAsc = ( sOrder=="asc" );
  2664. }
  2665. JsonObj_c tMode = tSortItem.GetItem("mode");
  2666. if ( tMode )
  2667. {
  2668. if ( tAttrItems && !tMode.IsStr() )
  2669. {
  2670. sError.SetSprintf ( R"("mode" property "%s" order is invalid)", tAttrItems.Name() );
  2671. return false;
  2672. }
  2673. CSphString sMode = tMode.StrVal();
  2674. if ( sMode!="min" && sMode!="max" )
  2675. {
  2676. sError.SetSprintf ( R"("mode" supported are "min" and "max", got "%s", not supported)", sMode.cstr() );
  2677. return false;
  2678. }
  2679. tSortField.m_sMode = sMode;
  2680. }
  2681. // geodist
  2682. if ( tSortField.m_sName=="_geo_distance" )
  2683. {
  2684. if ( tMode )
  2685. {
  2686. sError = R"("mode" property not supported with "_geo_distance")";
  2687. return false;
  2688. }
  2689. if ( tSortItem.HasItem("unit") )
  2690. {
  2691. sError = R"("unit" property not supported with "_geo_distance")";
  2692. return false;
  2693. }
  2694. if ( !tSortField.Parse ( tSortItem, false, sError, sWarning ) )
  2695. return false;
  2696. }
  2697. // FXIME!!! "unmapped_type" should be replaced with expression EXIST
  2698. // unsupported options
  2699. const char * dUnsupported[] = { "missing", "nested_path", "nested_filter"};
  2700. for ( auto szOption : dUnsupported )
  2701. {
  2702. if ( tSortItem.HasItem(szOption) )
  2703. {
  2704. sError.SetSprintf ( R"("%s" property not supported)", szOption );
  2705. return false;
  2706. }
  2707. }
  2708. return true;
  2709. }
  2710. static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning )
  2711. {
  2712. bGotWeight = false;
  2713. // unsupported options
  2714. if ( tSort.HasItem("_script") )
  2715. {
  2716. sError = "\"_script\" property not supported";
  2717. return false;
  2718. }
  2719. CSphVector<SortField_t> dSort;
  2720. dSort.Reserve ( tSort.Size() );
  2721. if ( tSort.IsObj() )
  2722. {
  2723. if ( !ParseSortObj ( tSort[0], dSort, sError, sWarning ) )
  2724. return false;
  2725. } else
  2726. {
  2727. for ( const auto & tItem : tSort )
  2728. {
  2729. CSphString sName = tItem.Name();
  2730. bool bString = tItem.IsStr();
  2731. bool bObj = tItem.IsObj();
  2732. if ( !bString && !bObj )
  2733. {
  2734. sError.SetSprintf ( R"("sort" property "%s" should be a string or an object)", sName.scstr() );
  2735. return false;
  2736. }
  2737. if ( bObj && tItem.Size()!=1 )
  2738. {
  2739. sError.SetSprintf ( R"("sort" property "%s" should be an object)", sName.scstr() );
  2740. return false;
  2741. }
  2742. // [ "attr_name" ]
  2743. if ( bString )
  2744. {
  2745. SortField_t & tSortField = dSort.Add();
  2746. tSortField.m_sName = tItem.StrVal();
  2747. // order defaults to desc when sorting on the _score, and defaults to asc when sorting on anything else
  2748. tSortField.m_bAsc = ( tSortField.m_sName!="_score" );
  2749. continue;
  2750. }
  2751. JsonObj_c tSortItem = tItem[0];
  2752. if ( !tSortItem )
  2753. {
  2754. sError = R"(invalid "sort" property item)";
  2755. return false;
  2756. }
  2757. if ( !ParseSortObj ( tSortItem, dSort, sError, sWarning ) )
  2758. return false;
  2759. }
  2760. }
  2761. FormatSortBy ( dSort, tQuery, bGotWeight );
  2762. return true;
  2763. }
  2764. //////////////////////////////////////////////////////////////////////////
  2765. // _source / select list
  2766. static bool ParseStringArray ( const JsonObj_c & tArray, const char * szProp, StrVec_t & dItems, CSphString & sError )
  2767. {
  2768. for ( const auto & tItem : tArray )
  2769. {
  2770. if ( !tItem.IsStr() )
  2771. {
  2772. sError.SetSprintf ( R"("%s" property should be a string)", szProp );
  2773. return false;
  2774. }
  2775. dItems.Add ( tItem.StrVal() );
  2776. }
  2777. return true;
  2778. }
  2779. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError )
  2780. {
  2781. bool bString = tSelect.IsStr();
  2782. bool bArray = tSelect.IsArray();
  2783. bool bObj = tSelect.IsObj();
  2784. if ( !bString && !bArray && !bObj )
  2785. {
  2786. sError = R"("_source" property should be a string or an array or an object)";
  2787. return false;
  2788. }
  2789. if ( bString )
  2790. {
  2791. tQuery.m_dIncludeItems.Add ( tSelect.StrVal() );
  2792. if ( tQuery.m_dIncludeItems[0]=="*" || tQuery.m_dIncludeItems[0].IsEmpty() )
  2793. tQuery.m_dIncludeItems.Reset();
  2794. return true;
  2795. }
  2796. if ( bArray )
  2797. return ParseStringArray ( tSelect, R"("_source")", tQuery.m_dIncludeItems, sError );
  2798. assert ( bObj );
  2799. // includes part of _source object
  2800. JsonObj_c tInclude = tSelect.GetArrayItem ( "includes", sError, true );
  2801. if ( tInclude )
  2802. {
  2803. if ( !ParseStringArray ( tInclude, R"("_source" "includes")", tQuery.m_dIncludeItems, sError ) )
  2804. return false;
  2805. if ( tQuery.m_dIncludeItems.GetLength()==1 && tQuery.m_dIncludeItems[0]=="*" )
  2806. tQuery.m_dIncludeItems.Reset();
  2807. } else if ( !sError.IsEmpty() )
  2808. return false;
  2809. // excludes part of _source object
  2810. JsonObj_c tExclude = tSelect.GetArrayItem ( "excludes", sError, true );
  2811. if ( tExclude )
  2812. {
  2813. if ( !ParseStringArray ( tExclude, R"("_source" "excludes")", tQuery.m_dExcludeItems, sError ) )
  2814. return false;
  2815. } else if ( !sError.IsEmpty() )
  2816. return false;
  2817. return true;
  2818. }
  2819. //////////////////////////////////////////////////////////////////////////
  2820. // script_fields / expressions
  2821. static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
  2822. {
  2823. if ( !tExpr )
  2824. return true;
  2825. if ( !tExpr.IsObj() )
  2826. {
  2827. sError = R"("script_fields" property should be an object)";
  2828. return false;
  2829. }
  2830. StringBuilder_c sSelect;
  2831. sSelect << tQuery.m_sSelect;
  2832. for ( const auto & tAlias : tExpr )
  2833. {
  2834. if ( !tAlias.IsObj() )
  2835. {
  2836. sError = R"("script_fields" properties should be objects)";
  2837. return false;
  2838. }
  2839. if ( CSphString ( tAlias.Name() ).IsEmpty() )
  2840. {
  2841. sError = R"("script_fields" empty property name)";
  2842. return false;
  2843. }
  2844. JsonObj_c tAliasScript = tAlias.GetItem("script");
  2845. if ( !tAliasScript )
  2846. {
  2847. sError = R"("script_fields" property should have "script" object)";
  2848. return false;
  2849. }
  2850. CSphString sExpr;
  2851. if ( !tAliasScript.FetchStrItem ( sExpr, "inline", sError ) )
  2852. return false;
  2853. const char * dUnsupported[] = { "lang", "params", "stored", "file" };
  2854. for ( auto szOption : dUnsupported )
  2855. if ( tAliasScript.HasItem(szOption) )
  2856. {
  2857. sError.SetSprintf ( R"("%s" property not supported in "script_fields")", szOption );
  2858. return false;
  2859. }
  2860. // add to query
  2861. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2862. tQueryItem.m_sExpr = sExpr;
  2863. tQueryItem.m_sAlias = tAlias.Name();
  2864. // add to select list
  2865. sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
  2866. }
  2867. sSelect.MoveTo ( tQuery.m_sSelect );
  2868. return true;
  2869. }
  2870. static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
  2871. {
  2872. if ( !tExpr )
  2873. return true;
  2874. if ( !tExpr.IsObj() )
  2875. {
  2876. sError = R"("expressions" property should be an object)";
  2877. return false;
  2878. }
  2879. StringBuilder_c sSelect;
  2880. sSelect << tQuery.m_sSelect;
  2881. for ( const auto & tAlias : tExpr )
  2882. {
  2883. if ( !tAlias.IsStr() )
  2884. {
  2885. sError = R"("expressions" properties should be strings)";
  2886. return false;
  2887. }
  2888. if ( CSphString ( tAlias.Name() ).IsEmpty() )
  2889. {
  2890. sError = R"("expressions" empty property name)";
  2891. return false;
  2892. }
  2893. // add to query
  2894. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2895. tQueryItem.m_sExpr = tAlias.StrVal();
  2896. tQueryItem.m_sAlias = tAlias.Name();
  2897. // add to select list
  2898. sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
  2899. }
  2900. sSelect.MoveTo ( tQuery.m_sSelect );
  2901. return true;
  2902. }
  2903. //////////////////////////////////////////////////////////////////////////
  2904. // docvalue_fields
  2905. bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError )
  2906. {
  2907. if ( !tDocFields || !tDocFields.IsArray() )
  2908. {
  2909. sError = R"("docvalue_fields" property should be an array or an object")";
  2910. return false;
  2911. }
  2912. for ( const auto & tItem : tDocFields )
  2913. {
  2914. if ( !tItem.IsObj() )
  2915. {
  2916. sError = R"("docvalue_fields" property item should be an object)";
  2917. return false;
  2918. }
  2919. CSphString sFieldName;
  2920. if ( !tItem.FetchStrItem ( sFieldName, "field", sError, false ) )
  2921. return false;
  2922. if ( tQuery.m_dItems.GetFirst ( [&sFieldName] ( const CSphQueryItem & tVal ) { return ( tVal.m_sExpr=="*" || tVal.m_sExpr==sFieldName ); } )==-1 )
  2923. {
  2924. CSphQueryItem & tDFItem = tQuery.m_dItems.Add();
  2925. tDFItem.m_sExpr = sFieldName;
  2926. tDFItem.m_sAlias = sFieldName;
  2927. }
  2928. // FIXME!!! collect format type
  2929. bool bDateTime = false;
  2930. CSphString sFormat;
  2931. if ( tItem.FetchStrItem ( sFormat, "format", sError, true ) )
  2932. bDateTime = ( sFormat=="date_time" );
  2933. tQuery.m_dDocFields.Add ( { sFieldName, bDateTime } );
  2934. }
  2935. return true;
  2936. }
  2937. static Aggr_e GetAggrFunc ( const JsonObj_c & tBucket, bool bCheckAggType )
  2938. {
  2939. if ( StrEq ( tBucket.Name(), "significant_terms" ) )
  2940. return Aggr_e::SIGNIFICANT;
  2941. if ( StrEq ( tBucket.Name(), "histogram" ) )
  2942. return Aggr_e::HISTOGRAM;
  2943. if ( StrEq ( tBucket.Name(), "date_histogram" ) )
  2944. return Aggr_e::DATE_HISTOGRAM;
  2945. if ( StrEq ( tBucket.Name(), "range") )
  2946. return Aggr_e::RANGE;
  2947. if ( StrEq ( tBucket.Name(), "date_range") )
  2948. return Aggr_e::DATE_RANGE;
  2949. if ( StrEq ( tBucket.Name(), "composite") )
  2950. return Aggr_e::COMPOSITE;
  2951. if ( StrEq ( tBucket.Name(), "min") )
  2952. return Aggr_e::MIN;
  2953. if ( StrEq ( tBucket.Name(), "max") )
  2954. return Aggr_e::MAX;
  2955. if ( StrEq ( tBucket.Name(), "sum") )
  2956. return Aggr_e::SUM;
  2957. if ( StrEq ( tBucket.Name(), "avg") )
  2958. return Aggr_e::AVG;
  2959. if ( bCheckAggType )
  2960. sphWarning ( "unsupported aggregate type '%s'", tBucket.Name() );
  2961. return Aggr_e::NONE;
  2962. }
  2963. static void SetRangeFrom ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
  2964. {
  2965. if ( tSrc.IsDbl() )
  2966. tItem.m_fFrom = tSrc.DblVal();
  2967. else if ( bForceFloat )
  2968. tItem.m_fFrom = tSrc.IntVal();
  2969. else
  2970. tItem.m_iFrom = tSrc.IntVal();
  2971. }
  2972. static void SetRangeTo ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
  2973. {
  2974. if ( tSrc.IsDbl() )
  2975. tItem.m_fTo = tSrc.DblVal();
  2976. else if ( bForceFloat )
  2977. tItem.m_fTo = tSrc.IntVal();
  2978. else
  2979. tItem.m_iTo = tSrc.IntVal();
  2980. }
  2981. static bool GetKeyed ( const JsonObj_c & tBucket, bool & bKeyed, CSphString & sError )
  2982. {
  2983. if ( !tBucket.HasItem ( "keyed" ) )
  2984. return true;
  2985. const auto tKeyed = tBucket.GetBoolItem ( "keyed", sError, false );
  2986. if ( !tKeyed )
  2987. return false;
  2988. bKeyed = tKeyed.BoolVal();
  2989. return true;
  2990. }
  2991. static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError );
  2992. static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError );
  2993. static bool ParseAggrRange ( const JsonObj_c & tBucket, JsonAggr_t & tItem, bool bDate, CSphString & sError )
  2994. {
  2995. JsonObj_c tRanges = tBucket.GetItem( "ranges" );
  2996. if ( !tRanges || !tRanges.IsArray() )
  2997. {
  2998. if ( !tRanges )
  2999. sError.SetSprintf ( "\"%s\" missed \"ranges\" property", tItem.m_sCol.cstr() );
  3000. else
  3001. sError.SetSprintf ( "\"%s\" \"ranges\" should be an array", tItem.m_sCol.cstr() );
  3002. return false;
  3003. }
  3004. int iCount = tRanges.Size();
  3005. if ( !iCount )
  3006. {
  3007. sError.SetSprintf ( "\"%s\" empty \"ranges\" property", tItem.m_sCol.cstr() );
  3008. return false;
  3009. }
  3010. bool bKeyed = false;
  3011. if ( !GetKeyed ( tBucket, bKeyed, sError ) )
  3012. return false;
  3013. if ( !bDate )
  3014. {
  3015. auto & dRanges = tItem.m_tRange;
  3016. dRanges.Resize ( iCount );
  3017. dRanges.m_bKeyed = bKeyed;
  3018. return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
  3019. } else
  3020. {
  3021. auto & dRanges = tItem.m_tDateRange;
  3022. dRanges.Resize ( iCount );
  3023. dRanges.m_bKeyed = bKeyed;
  3024. return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
  3025. }
  3026. }
  3027. bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError )
  3028. {
  3029. int iFloatStart = -1;
  3030. for ( int i=0; i<dRanges.GetLength(); i++ )
  3031. {
  3032. const auto tRangeItem = tRanges[i];
  3033. const auto tFrom = tRangeItem.GetItem ( "from" );
  3034. const auto tTo = tRangeItem.GetItem ( "to" );
  3035. const bool bHasFrom = tFrom;
  3036. const bool bHasTo = tTo;
  3037. if ( !bHasFrom && i!=0 )
  3038. {
  3039. sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
  3040. return false;
  3041. }
  3042. if ( !bHasTo && i!=dRanges.GetLength()-1 )
  3043. {
  3044. sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
  3045. return false;
  3046. }
  3047. if ( ( bHasFrom && tFrom.IsDbl() ) || ( bHasTo && tTo.IsDbl() ) )
  3048. {
  3049. dRanges.m_bFloat = true;
  3050. if ( iFloatStart!=-1 )
  3051. iFloatStart = i;
  3052. }
  3053. if ( bHasFrom )
  3054. SetRangeFrom ( tFrom, ( iFloatStart!=-1 ), dRanges[i] );
  3055. else
  3056. dRanges.m_bOpenLeft = true;
  3057. if ( bHasTo )
  3058. SetRangeTo ( tTo, ( iFloatStart!=-1 ), dRanges[i] );
  3059. else
  3060. dRanges.m_bOpenRight = true;
  3061. }
  3062. // convert int to float values for head of array values
  3063. if ( iFloatStart>0 )
  3064. {
  3065. for ( int i=iFloatStart; i<dRanges.GetLength(); i++ )
  3066. {
  3067. dRanges[i].m_fFrom = dRanges[i].m_iFrom;
  3068. dRanges[i].m_fTo = dRanges[i].m_iTo;
  3069. }
  3070. }
  3071. if ( dRanges.m_bOpenLeft )
  3072. {
  3073. if ( dRanges.m_bFloat )
  3074. dRanges[0].m_fFrom = -FLT_MAX;
  3075. else
  3076. dRanges[0].m_iFrom = -LLONG_MAX;
  3077. }
  3078. if ( dRanges.m_bOpenRight )
  3079. {
  3080. if ( dRanges.m_bFloat )
  3081. dRanges.Last().m_fTo = FLT_MAX;
  3082. else
  3083. dRanges.Last().m_iTo = LLONG_MAX;
  3084. }
  3085. return true;
  3086. }
  3087. bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError )
  3088. {
  3089. for ( int i=0; i<dRanges.GetLength(); i++ )
  3090. {
  3091. const auto tRangeItem = tRanges[i];
  3092. const auto tFrom = tRangeItem.GetItem ( "from" );
  3093. const auto tTo = tRangeItem.GetItem ( "to" );
  3094. const bool bHasFrom = tFrom;
  3095. const bool bHasTo = tTo;
  3096. if ( !bHasFrom && i!=0 )
  3097. {
  3098. sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
  3099. return false;
  3100. }
  3101. if ( !bHasTo && i!=dRanges.GetLength()-1 )
  3102. {
  3103. sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
  3104. return false;
  3105. }
  3106. if ( bHasFrom )
  3107. dRanges[i].m_sFrom = tFrom.StrVal();
  3108. if ( bHasTo )
  3109. dRanges[i].m_sTo = tTo.StrVal();
  3110. }
  3111. return true;
  3112. }
  3113. static bool ParseAggrHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
  3114. {
  3115. AggrHistSetting_t & tHist = tItem.m_tHist;
  3116. JsonObj_c tInterval = tBucket.GetItem ( "interval" );
  3117. if ( tInterval.Empty() )
  3118. {
  3119. sError.SetSprintf ( "\"%s\" interval missed", tItem.m_sCol.cstr() );
  3120. return false;
  3121. }
  3122. if ( !tInterval.IsNum() )
  3123. {
  3124. sError.SetSprintf ( "\"%s\" interval should be numeric", tItem.m_sCol.cstr() );
  3125. return false;
  3126. }
  3127. if ( tInterval.IsInt() )
  3128. tHist.m_tInterval = tInterval.IntVal();
  3129. else
  3130. tHist.m_tInterval = tInterval.FltVal();
  3131. JsonObj_c tOffset = tBucket.GetItem ( "offset" );
  3132. if ( !tOffset.Empty() )
  3133. {
  3134. if ( !tOffset.IsNum() )
  3135. {
  3136. sError.SetSprintf ( "\"%s\" offset should be numeric", tItem.m_sCol.cstr() );
  3137. return false;
  3138. }
  3139. if ( tOffset.IsInt() )
  3140. tHist.m_tOffset = tOffset.IntVal();
  3141. else
  3142. tHist.m_tOffset = tOffset.FltVal();
  3143. } else
  3144. {
  3145. tHist.m_tOffset = INT64_C ( 0 );
  3146. }
  3147. if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
  3148. return false;
  3149. FixFloat ( tHist );
  3150. return true;
  3151. }
  3152. static bool ParseAggrDateHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
  3153. {
  3154. AggrDateHistSetting_t & tHist = tItem.m_tDateHist;
  3155. JsonObj_c tCalendar = tBucket.GetItem ( "calendar_interval" );
  3156. JsonObj_c tFixed = tBucket.GetItem ( "fixed_interval" );
  3157. if ( tCalendar.Empty() && tFixed.Empty() )
  3158. {
  3159. sError.SetSprintf ( "\"%s\" calendar_interval or fixed_interval missed", tItem.m_sCol.cstr() );
  3160. return false;
  3161. }
  3162. if ( !tCalendar.Empty() && !tFixed.Empty() )
  3163. {
  3164. sError.SetSprintf ( "\"%s\" both calendar_interval and fixed_interval supplied", tItem.m_sCol.cstr() );
  3165. return false;
  3166. }
  3167. tHist.m_bFixed = !tFixed.Empty();
  3168. const JsonObj_c & tInterval = ( tHist.m_bFixed ? tFixed : tCalendar );
  3169. if ( !tInterval.IsStr() )
  3170. {
  3171. sError.SetSprintf ( "\"%s\" calendar_interval should be string", tItem.m_sCol.cstr() );
  3172. return false;
  3173. }
  3174. tHist.m_sInterval = tInterval.StrVal();
  3175. if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
  3176. return false;
  3177. return true;
  3178. }
  3179. static bool ParseAggrComposite ( const JsonObj_c & tBucket, JsonAggr_t & tAggr, CSphString & sError )
  3180. {
  3181. JsonObj_c tComposite = tBucket.GetObjItem ( "composite", sError, false );
  3182. if ( !tComposite )
  3183. return false;
  3184. JsonObj_c tSource = tComposite.GetArrayItem ( "sources", sError, false );
  3185. if ( !tSource )
  3186. return false;
  3187. if ( !tSource.IsArray() )
  3188. {
  3189. sError = R"("sources" property item should be an array)";
  3190. return false;
  3191. }
  3192. SmallStringHash_T<AggrComposite_t> hColumns;
  3193. for ( const auto & tArrayItem : tSource )
  3194. {
  3195. if ( !tArrayItem.IsObj() )
  3196. {
  3197. sError = R"("sources" items should be an object)";
  3198. return false;
  3199. }
  3200. JsonObj_c tItem = tArrayItem.begin();
  3201. JsonObj_c tTerms = tItem.GetObjItem ( "terms", sError, false );
  3202. if ( !tTerms )
  3203. return false;
  3204. AggrComposite_t tCol;
  3205. if ( !tTerms.FetchStrItem ( tCol.m_sColumn, "field", sError, false ) )
  3206. return false;
  3207. tCol.m_sAlias = tItem.Name();
  3208. if ( !hColumns.Add ( tCol, tItem.Name() ) )
  3209. {
  3210. sError.SetSprintf ( R"("composite" has multiple "%s" aggregates)", tItem.Name() );
  3211. return false;
  3212. }
  3213. }
  3214. if ( hColumns.IsEmpty() )
  3215. {
  3216. sError = R"(empty "composite" aggregate)";
  3217. return false;
  3218. }
  3219. JsonObj_c tAfter = tComposite.GetObjItem ( "after", sError, false );
  3220. if ( tAfter && tAfter.Size() )
  3221. {
  3222. JsonObj_c tJsonQuery ( R"( {"query":{"bool":{"must":[] }}} )" );
  3223. JsonObj_c tFilters = tJsonQuery.GetItem ( "query" ).GetItem ( "bool" ).GetItem ( "must" );
  3224. for ( const auto & tItem : tAfter )
  3225. {
  3226. AggrComposite_t * pCol = hColumns ( tItem.Name() );
  3227. if ( !pCol )
  3228. {
  3229. sError.SetSprintf ( R"("after" missed "%s" aggregate)", tItem.Name() );
  3230. return false;
  3231. }
  3232. JsonObj_c tFilterVal = tItem.Clone();
  3233. JsonObj_c tEqItem ( R"( {"equals":{} } )") ;
  3234. tEqItem.begin().AddItem ( pCol->m_sColumn.cstr(), tFilterVal );
  3235. tFilters.AddItem ( tEqItem );
  3236. }
  3237. CSphQuery tTmpQuery;
  3238. if ( !ParseJsonQueryFilters ( tJsonQuery.GetItem( "query" ), tTmpQuery, sError, sError ) )
  3239. return false;
  3240. if ( !sError.IsEmpty() )
  3241. return false;
  3242. assert ( tTmpQuery.m_dFilterTree.IsEmpty() );
  3243. tAggr.m_dCompositeAfterKey = std::move ( tTmpQuery.m_dFilters );
  3244. }
  3245. tAggr.m_iSize = DEFAULT_MAX_MATCHES;
  3246. tComposite.FetchIntItem ( tAggr.m_iSize, "size", sError, true );
  3247. StringBuilder_c sColName ( "," );
  3248. tAggr.m_dComposite.Reserve ( hColumns.GetLength() );
  3249. for ( const auto & tCol : hColumns )
  3250. {
  3251. sColName += tCol.second.m_sColumn.cstr();
  3252. tAggr.m_dComposite.Add ( tCol.second );
  3253. }
  3254. tAggr.m_sCol = sColName.cstr();
  3255. return true;
  3256. }
  3257. static bool ParseAggsNode ( const JsonObj_c & tBucket, const JsonObj_c & tJsonItem, bool bRoot, JsonAggr_t & tItem, CSphString & sError )
  3258. {
  3259. if ( !tBucket.IsObj() )
  3260. {
  3261. sError.SetSprintf ( R"("aggs" bucket '%s' should be an object)", tItem.m_sBucketName.cstr() );
  3262. return false;
  3263. }
  3264. if ( !StrEq ( tBucket.Name(), "composite" ) && !tBucket.FetchStrItem ( tItem.m_sCol, "field", sError, false ) )
  3265. return false;
  3266. tBucket.FetchIntItem ( tItem.m_iSize, "size", sError, true );
  3267. int iShardSize = 0;
  3268. tBucket.FetchIntItem ( iShardSize, "shard_size", sError, true );
  3269. tItem.m_iSize = Max ( tItem.m_iSize, iShardSize ); // FIXME!!! use (size * 1.5 + 10) for shard size
  3270. tItem.m_eAggrFunc = GetAggrFunc ( tBucket, !bRoot );
  3271. switch ( tItem.m_eAggrFunc )
  3272. {
  3273. case Aggr_e::DATE_HISTOGRAM:
  3274. if ( !ParseAggrDateHistogram ( tBucket, tItem, sError ) )
  3275. return false;
  3276. tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
  3277. break;
  3278. case Aggr_e::HISTOGRAM:
  3279. if ( !ParseAggrHistogram ( tBucket, tItem, sError ) )
  3280. return false;
  3281. tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
  3282. break;
  3283. case Aggr_e::RANGE:
  3284. if ( !ParseAggrRange ( tBucket, tItem, false, sError ) )
  3285. return false;
  3286. tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
  3287. break;
  3288. case Aggr_e::DATE_RANGE:
  3289. if ( !ParseAggrRange ( tBucket, tItem, true, sError ) )
  3290. return false;
  3291. tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tDateRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
  3292. break;
  3293. case Aggr_e::COMPOSITE:
  3294. if ( !ParseAggrComposite ( tJsonItem, tItem, sError ) )
  3295. return false;
  3296. break;
  3297. case Aggr_e::MIN:
  3298. case Aggr_e::MAX:
  3299. case Aggr_e::SUM:
  3300. case Aggr_e::AVG:
  3301. tItem.m_iSize = 1;
  3302. break;
  3303. default: break;
  3304. }
  3305. return true;
  3306. }
  3307. static bool ParseAggsNodeSort ( const JsonObj_c & tJsonItem, bool bOrder, JsonAggr_t & tItem, CSphString & sError )
  3308. {
  3309. if ( !( tJsonItem.IsArray() || tJsonItem.IsObj() ) )
  3310. {
  3311. sError.SetSprintf ( "\"%s\" property value should be an array or an object", ( bOrder ? "order" : "sort" ) );
  3312. return false;
  3313. }
  3314. bool bGotWeight = false;
  3315. JsonQuery_c tTmpQuery;
  3316. tTmpQuery.m_sSortBy = "";
  3317. tTmpQuery.m_eSort = SPH_SORT_RELEVANCE;
  3318. // FIXME!!! reports warnings for geodist sort
  3319. CSphString sWarning;
  3320. if ( !ParseSort ( tJsonItem, tTmpQuery, bGotWeight, sError, sWarning ) )
  3321. return false;
  3322. tItem.m_sSort = tTmpQuery.m_sSortBy;
  3323. return true;
  3324. }
  3325. static bool AddSubAggregate ( const JsonObj_c & tAggs, bool bRoot, CSphVector<JsonAggr_t> & dParentItems, CSphString & sError )
  3326. {
  3327. if ( bRoot && tAggs.begin().Empty() )
  3328. {
  3329. JsonAggr_t & tCount = dParentItems.Add();
  3330. tCount.m_eAggrFunc = Aggr_e::COUNT;
  3331. tCount.m_iSize = 1;
  3332. return true;
  3333. }
  3334. for ( const auto & tJsonItem : tAggs )
  3335. {
  3336. if ( !tJsonItem.IsObj() )
  3337. {
  3338. sError = R"("aggs" property item should be an object)";
  3339. return false;
  3340. }
  3341. JsonAggr_t tItem;
  3342. tItem.m_sBucketName = tJsonItem.Name();
  3343. for ( const auto & tAggsItem : tJsonItem )
  3344. {
  3345. // could be a sort object at the aggs item or order object at the bucket
  3346. if ( strcmp ( tAggsItem.Name(), "sort" )==0 )
  3347. {
  3348. if ( !ParseAggsNodeSort ( tAggsItem, false, tItem, sError ) )
  3349. return false;
  3350. } else
  3351. {
  3352. if ( StrEq ( tAggsItem.Name(), "aggs" ) || tAggsItem.HasItem ( "aggs" ) )
  3353. {
  3354. sError = R"(nested "aggs" is not supported)";
  3355. return false;
  3356. }
  3357. if ( tAggsItem==tAggsItem.end() )
  3358. {
  3359. sError.SetSprintf ( R"("aggs" bucket '%s' with only nested items)", tAggsItem.Name() );
  3360. return false;
  3361. }
  3362. if ( !ParseAggsNode ( tAggsItem, tJsonItem, bRoot, tItem, sError ) )
  3363. return false;
  3364. // bucket could have its own order item
  3365. if ( tAggsItem.HasItem ( "order" ) )
  3366. {
  3367. if ( !ParseAggsNodeSort ( tAggsItem.GetItem("order"), true, tItem, sError ) )
  3368. return false;
  3369. }
  3370. }
  3371. }
  3372. if ( tItem.m_eAggrFunc==Aggr_e::NONE && !bRoot )
  3373. {
  3374. sError.SetSprintf ( R"(bucket '%s' without aggregate items)", tItem.m_sBucketName.cstr() );
  3375. return false;
  3376. }
  3377. dParentItems.Add ( tItem );
  3378. }
  3379. return true;
  3380. }
  3381. bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError )
  3382. {
  3383. if ( !tAggs || !tAggs.IsObj() )
  3384. {
  3385. sError = R"("aggs" property should be an object")";
  3386. return false;
  3387. }
  3388. if ( !AddSubAggregate ( tAggs, true, tQuery.m_dAggs, sError ) )
  3389. return false;
  3390. // set query now for any date aggregate to make sure they will have the same now timestamp
  3391. if ( tQuery.m_dAggs.any_of ( [] ( const JsonAggr_t & tAggr ) { return !tAggr.m_tDateRange.IsEmpty(); } ) )
  3392. tQuery.m_iNow = time ( nullptr );
  3393. return true;
  3394. }
  3395. CSphString JsonAggr_t::GetAliasName () const
  3396. {
  3397. CSphString sName;
  3398. sName.SetSprintf ( "%s_%s", m_sCol.cstr(), m_sBucketName.cstr() );
  3399. return sName;
  3400. }
  3401. ParsedJsonQuery_t::ParsedJsonQuery_t()
  3402. {
  3403. SetQueryDefaultsExt2 ( m_tQuery );
  3404. SetQueryDefaultsExt2 ( m_tJoinQueryOptions );
  3405. }