| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189 |
- //
- // Copyright (c) 2017-2025, Manticore Software LTD (https://manticoresearch.com)
- // All rights reserved
- //
- // This program is free software; you can redistribute it and/or modify
- // it under the terms of the GNU General Public License. You should have
- // received a copy of the GPL license along with this program; if you
- // did not, you can find it at http://www.gnu.org/
- //
- #include "sphinxquery/xqparser.h"
- #include "sphinxquery/parse_helper.h"
- #include "sphinxsearch.h"
- #include "sphinxplugin.h"
- #include "sphinxutils.h"
- #include "searchdaemon.h"
- #include "jsonqueryfilter.h"
- #include "attribute.h"
- #include "searchdsql.h"
- #include "searchdha.h"
- #include "knnmisc.h"
- #include "sorterscroll.h"
- #include "sphinxexcerpt.h"
- static const char * g_szAll = "_all";
- static const char * g_szHighlight = "_@highlight_";
- static const char * g_szOrder = "_@order_";
- class QueryTreeBuilder_c;
- struct ErrorPathGuard_t
- {
- ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath );
- ~ErrorPathGuard_t ();
- QueryTreeBuilder_c & m_tBuilder;
- const bool m_bEnabled;
- };
- class QueryTreeBuilder_c : public XQParseHelper_c
- {
- public:
- QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings );
- void CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost );
- bool HandleFieldBlockStart ( const char * & /*pPtr*/ ) override { return true; }
- bool HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields ) override;
- bool NeedTrailingSeparator() override { return false; }
- XQNode_t * CreateNode ( XQLimitSpec_t & tLimitSpec );
- const TokenizerRefPtr_c & GetQLTokenizer() { return m_pQueryTokenizerQL; }
- const CSphIndexSettings & GetIndexSettings() { return m_tSettings; }
- const CSphQuery * GetQuery() { return m_pQuery; }
- bool m_bHasFulltext = false;
- bool m_bHasFilter = false;
- void ResetNodesFlags() { m_bHasFulltext = m_bHasFilter = false; }
- QueryTreeBuilder_c CreateCollectPath ( const CSphSchema * pSchema );
- void ErrorPrintPath ( QueryTreeBuilder_c & tOrig );
- ErrorPathGuard_t ErrorAddPath ( const JsonObj_c & tPath );
- private:
- const CSphQuery * m_pQuery {nullptr};
- const TokenizerRefPtr_c m_pQueryTokenizerQL;
- const CSphIndexSettings & m_tSettings;
- XQNode_t * AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost );
- friend ErrorPathGuard_t;
- CSphVector< std::pair<CSphString, const void *> > m_dErrorPath;
- bool m_bErrorCollectPath = false;
- };
- QueryTreeBuilder_c::QueryTreeBuilder_c ( const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, const CSphIndexSettings & tSettings )
- : m_pQuery ( pQuery )
- , m_pQueryTokenizerQL ( std::move (pQueryTokenizerQL) )
- , m_tSettings ( tSettings )
- {}
- void QueryTreeBuilder_c::CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec, float fBoost )
- {
- m_pTokenizer->SetBuffer ( (const BYTE*)szStr, (int) strlen ( szStr ) );
- while (true)
- {
- int iSkippedPosBeforeToken = 0;
- if ( m_bWasBlended )
- {
- iSkippedPosBeforeToken = m_pTokenizer->SkipBlended();
- // just add all skipped blended parts except blended head (already added to atomPos)
- if ( iSkippedPosBeforeToken>1 )
- m_iAtomPos += iSkippedPosBeforeToken - 1;
- }
- // FIXME!!! only wildcard node need tokes with wildcard symbols
- const char * sToken = (const char *) m_pTokenizer->GetToken ();
- if ( !sToken )
- {
- AddChildKeyword ( pNode, nullptr, iSkippedPosBeforeToken, tLimitSpec, fBoost );
- break;
- }
- // now let's do some token post-processing
- m_bWasBlended = m_pTokenizer->TokenIsBlended();
- int iPrevDeltaPos = 0;
- if ( m_pPlugin && m_pPlugin->m_fnPushToken )
- sToken = m_pPlugin->m_fnPushToken ( m_pPluginData, const_cast<char*>(sToken), &iPrevDeltaPos, m_pTokenizer->GetTokenStart(), int ( m_pTokenizer->GetTokenEnd() - m_pTokenizer->GetTokenStart() ) );
- m_iAtomPos += 1 + iPrevDeltaPos;
- bool bMultiDestHead = false;
- bool bMultiDest = false;
- int iDestCount = 0;
- // do nothing inside phrase
- if ( !m_pTokenizer->IsPhraseMode() )
- bMultiDest = m_pTokenizer->WasTokenMultiformDestination ( bMultiDestHead, iDestCount );
- // check for stopword, and create that node
- // temp buffer is required, because GetWordID() might expand (!) the keyword in-place
- BYTE sTmp [ MAX_TOKEN_BYTES ];
- strncpy ( (char*)sTmp, sToken, MAX_TOKEN_BYTES );
- sTmp[MAX_TOKEN_BYTES-1] = '\0';
- int iStopWord = 0;
- if ( m_pPlugin && m_pPlugin->m_fnPreMorph )
- m_pPlugin->m_fnPreMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
- SphWordID_t uWordId = iStopWord ? 0 : m_pDict->GetWordID ( sTmp );
- if ( uWordId && m_pPlugin && m_pPlugin->m_fnPostMorph )
- {
- int iRes = m_pPlugin->m_fnPostMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
- if ( iStopWord )
- uWordId = 0;
- else if ( iRes )
- uWordId = m_pDict->GetWordIDNonStemmed ( sTmp );
- }
- if ( !uWordId )
- {
- sToken = nullptr;
- // stopwords with step=0 must not affect pos
- if ( m_bEmptyStopword )
- m_iAtomPos--;
- }
- XQNode_t * pChildNode = nullptr;
- if ( bMultiDest && !bMultiDestHead )
- {
- assert ( m_dMultiforms.GetLength() );
- m_dMultiforms.Last().m_iDestCount++;
- m_dDestForms.Add ( sToken );
- } else
- pChildNode = AddChildKeyword ( pNode, sToken, iSkippedPosBeforeToken, tLimitSpec, fBoost );
- if ( bMultiDestHead )
- {
- MultiformNode_t & tMulti = m_dMultiforms.Add();
- tMulti.m_pNode = pChildNode;
- tMulti.m_iDestStart = m_dDestForms.GetLength();
- tMulti.m_iDestCount = 0;
- }
- }
- }
- bool QueryTreeBuilder_c::HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields )
- {
- if ( *pPtr=='_' )
- {
- auto iLen = (int) strlen(g_szAll);
- if ( !strncmp ( pPtr, g_szAll, iLen ) )
- {
- pPtr += iLen;
- dFields.SetAll();
- return true;
- }
- }
- return false;
- }
- XQNode_t * QueryTreeBuilder_c::CreateNode ( XQLimitSpec_t & tLimitSpec )
- {
- auto * pNode = new XQNode_t(tLimitSpec);
- m_dSpawned.Add ( pNode );
- return pNode;
- }
- XQNode_t * QueryTreeBuilder_c::AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec, float fBoost )
- {
- XQKeyword_t tKeyword ( szKeyword, m_iAtomPos );
- tKeyword.m_iSkippedBefore = iSkippedPosBeforeToken;
- tKeyword.m_fBoost = fBoost;
- auto * pNode = new XQNode_t ( tLimitSpec );
- pNode->AddDirtyWord ( tKeyword );
- pParent->AddNewChild ( pNode );
- m_dSpawned.Add ( pNode );
- return pNode;
- }
- ErrorPathGuard_t QueryTreeBuilder_c::ErrorAddPath ( const JsonObj_c & tPath )
- {
- return ErrorPathGuard_t ( *this, m_bErrorCollectPath, tPath );
- }
- void QueryTreeBuilder_c::ErrorPrintPath ( QueryTreeBuilder_c & tOrig )
- {
- assert ( IsError() );
- StringBuilder_c tBuilder;
- tBuilder.Appendf ( "%s at '", tOrig.m_pParsed->m_sParseError.cstr() );
- const void * pLast = nullptr;
- for ( const auto & tEntry : m_dErrorPath )
- {
- // skip duplicates
- if ( !tEntry.second || pLast!=tEntry.second )
- tBuilder.Appendf ( "/%s", tEntry.first.scstr() );
- pLast = tEntry.second;
- }
- tBuilder << "'";
- tOrig.m_pParsed->m_sParseError = (CSphString)tBuilder;
- }
- QueryTreeBuilder_c QueryTreeBuilder_c::CreateCollectPath ( const CSphSchema * pSchema )
- {
- QueryTreeBuilder_c tOther ( m_pQuery, std::move ( m_pQueryTokenizerQL ), m_tSettings );
- tOther.Setup ( pSchema, m_pTokenizer->Clone ( SPH_CLONE ), std::move ( m_pDict ), m_pParsed, m_tSettings );
- tOther.m_bErrorCollectPath = true;
- tOther.m_dErrorPath.Add ( { "query", nullptr } );
- return tOther;
- }
- ErrorPathGuard_t::ErrorPathGuard_t ( QueryTreeBuilder_c & tBuilder, bool bEnabled, const JsonObj_c & tPath )
- : m_tBuilder ( tBuilder )
- , m_bEnabled ( bEnabled )
- {
- // add path entry only in the collect pass and only prior to error point
- if ( m_bEnabled && !m_tBuilder.IsError() )
- m_tBuilder.m_dErrorPath.Add ( { tPath.Name(), tPath.GetRoot() } );
- }
- ErrorPathGuard_t::~ErrorPathGuard_t ()
- {
- if ( m_bEnabled && !m_tBuilder.IsError() )
- m_tBuilder.m_dErrorPath.Pop();
- }
- //////////////////////////////////////////////////////////////////////////
- class QueryParserJson_c : public QueryParser_i
- {
- public:
- bool IsFullscan ( const CSphQuery & tQuery ) const final;
- bool IsFullscan ( const XQQuery_t & tQuery ) const final;
- bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const final;
- QueryParser_i * Clone() const final { return new QueryParserJson_c; }
- private:
- XQNode_t * ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const;
- XQNode_t * ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
- XQNode_t * ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
- XQNode_t * ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const;
- bool ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const;
- bool ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const;
- XQNode_t * ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
- };
- bool QueryParserJson_c::IsFullscan ( const CSphQuery & tQuery ) const
- {
- const char * szQ = tQuery.m_sQuery.cstr();
- if ( !szQ ) return true;
- if ( strstr ( szQ, R"("match")" ) ) return false;
- if ( strstr ( szQ, R"("terms")" ) ) return false;
- if ( strstr ( szQ, R"("match_phrase")" ) ) return false;
- if ( strstr ( szQ, R"("term")" ) ) return false;
- if ( strstr ( szQ, R"("query_string")" ) ) return false;
- if ( strstr ( szQ, R"("simple_query_string")" ) ) return false;
- return true;
- }
- bool QueryParserJson_c::IsFullscan ( const XQQuery_t & tQuery ) const
- {
- return !( tQuery.m_pRoot && ( tQuery.m_pRoot->dChildren().GetLength () || tQuery.m_pRoot->dWords().GetLength () ) );
- }
- static bool IsFullText ( const CSphString & sName );
- static bool IsBoolNode ( const CSphString & sName );
- bool CheckRootNode ( const JsonObj_c & tRoot, CSphString & sError )
- {
- bool bFilter = false;
- bool bBool = false;
- bool bFullText = false;
- for ( const auto & tItem : tRoot )
- {
- const CSphString & sName = tItem.Name();
- if ( IsFilter ( tItem ) )
- {
- if ( bFilter )
- {
- sError = "\"query\" has multiple filter properties, use bool node";
- return false;
- }
- bFilter = true;
- }
- else if ( IsBoolNode ( sName ) )
- {
- if ( bBool )
- {
- sError = "\"query\" has multiple bool properties";
- return false;
- }
- bBool = true;
- }
- else if ( IsFullText ( sName ) )
- {
- if ( bFullText )
- {
- sError = "\"query\" has multiple full-text properties, use bool node";
- return false;
- }
- bFullText = true;
- }
- }
- return true;
- }
- static JsonObj_c FindFullTextQueryNode ( const JsonObj_c & tRoot )
- {
- for ( JsonObj_c tChild : tRoot )
- {
- if ( !IsFilter ( tChild ) )
- return tChild;
- }
- return tRoot[0];
- }
- bool QueryParserJson_c::ParseQuery ( XQQuery_t & tParsed, const char * szQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizerQL, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c & pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const
- {
- JsonObj_c tRoot ( szQuery );
- // take only the first item of the query; ignore the rest
- int iNumIndexes = ( tRoot.Empty() ? 0 : tRoot.Size() );
- if ( !iNumIndexes )
- {
- tParsed.m_sParseError = "\"query\" property is empty";
- return false;
- }
- if ( iNumIndexes!=1 && !CheckRootNode ( tRoot, tParsed.m_sParseError ) )
- return false;
- assert ( pQueryTokenizerJson->IsQueryTok() );
- DictRefPtr_c pMyDict = GetStatelessDict ( pDict );
- QueryTreeBuilder_c tBuilder ( pQuery, std::move ( pQueryTokenizerQL ), tSettings );
- tBuilder.Setup ( pSchema, pQueryTokenizerJson->Clone ( SPH_CLONE ), pMyDict, &tParsed, tSettings );
- const JsonObj_c tFtNode = FindFullTextQueryNode ( tRoot );
- XQNode_t * pRoot = ConstructNode ( tFtNode, tBuilder );
- if ( tBuilder.IsError() )
- {
- tBuilder.Cleanup();
- QueryTreeBuilder_c tErrorBuilder { tBuilder.CreateCollectPath ( pSchema ) };
- ConstructNode ( tFtNode, tErrorBuilder );
- tErrorBuilder.Cleanup();
- tErrorBuilder.ErrorPrintPath ( tBuilder );
- return false;
- }
- XQLimitSpec_t tLimitSpec;
- pRoot = tBuilder.FixupTree ( pRoot, tLimitSpec, pMorphFields, IsAllowOnlyNot() );
- if ( tBuilder.IsError() )
- {
- tBuilder.Cleanup();
- return false;
- }
- tParsed.m_bSingleWord = ( pRoot && pRoot->dChildren().IsEmpty() && pRoot->dWords().GetLength() == 1 );
- tParsed.m_pRoot = pRoot;
- return true;
- }
- static const char * g_szOperatorNames[]=
- {
- "and",
- "or"
- };
- static XQOperator_e StrToNodeOp ( const char * szStr )
- {
- if ( !szStr )
- return SPH_QUERY_TOTAL;
- int iOp=0;
- for ( auto i : g_szOperatorNames )
- {
- if ( !strcmp ( szStr, i ) )
- return XQOperator_e(iOp);
- iOp++;
- }
- return SPH_QUERY_TOTAL;
- }
- static bool IsBoolNode ( const JsonObj_c & tJson )
- {
- if ( !tJson )
- return false;
- return CSphString ( tJson.Name() )=="bool";
- }
- bool IsBoolNode ( const CSphString & sName )
- {
- return ( sName=="bool" );
- }
- static float GetBoost ( const JsonObj_c & tFields )
- {
- const float fBoostDefault = 1.0f;
- if ( !tFields.IsObj() )
- return fBoostDefault;
- JsonObj_c tBoost = tFields.GetItem ( "boost" );
- if ( !tBoost || !tBoost.IsNum() )
- return fBoostDefault;
-
- return tBoost.FltVal();
- }
- XQNode_t * QueryParserJson_c::ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, bool bTerms, bool bSingleTerm, QueryTreeBuilder_c & tBuilder ) const
- {
- ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
- if ( !tJson.IsObj() )
- {
- tBuilder.Error ( "\"match\" value should be an object" );
- return nullptr;
- }
- if ( tJson.Size()!=1 )
- {
- tBuilder.Error ( "ill-formed \"match\" property" );
- return nullptr;
- }
- JsonObj_c tFields = tJson[0];
- tBuilder.SetString ( tFields.Name() );
- XQLimitSpec_t tLimitSpec;
- const char * szQuery = nullptr;
- XQOperator_e eNodeOp = bPhrase ? SPH_QUERY_PHRASE : SPH_QUERY_OR;
- bool bIgnore = false;
- StringBuilder_c tTermsBuf ( " " );
- if ( !tBuilder.ParseFields ( tLimitSpec.m_dFieldMask, tLimitSpec.m_iFieldMaxPos, bIgnore ) )
- return nullptr;
- if ( bIgnore )
- {
- tBuilder.Warning ( R"(ignoring fields in "%s", using "_all")", tFields.Name() );
- tLimitSpec.Reset();
- }
- tLimitSpec.m_bFieldSpec = true;
- if ( bTerms )
- {
- if ( !tFields.IsArray() )
- {
- tBuilder.Warning ( "values of properties in \"terms\" should be an array" );
- return nullptr;
- }
- for ( const auto & tTerm : tFields )
- {
- if ( !tTerm.IsStr() )
- {
- tBuilder.Error ( "\"terms\" value should be a string" );
- return nullptr;
- }
- tTermsBuf += tTerm.SzVal();
- }
- szQuery = tTermsBuf.cstr();
- } else if ( tFields.IsObj() )
- {
- // matching with flags
- CSphString sError;
- JsonObj_c tQuery = ( bSingleTerm ? tFields.GetStrItem ( "value", sError ) : tFields.GetStrItem ( "query", sError ) );
- if ( !tQuery )
- {
- tBuilder.Error ( "%s", sError.cstr() );
- return nullptr;
- }
- szQuery = tQuery.SzVal();
- if ( !bPhrase )
- {
- JsonObj_c tOp = tFields.GetItem ( "operator" );
- if ( tOp ) // "and", "or"
- {
- eNodeOp = StrToNodeOp ( tOp.SzVal() );
- if ( eNodeOp==SPH_QUERY_TOTAL )
- {
- tBuilder.Error ( "unknown operator: \"%s\"", tOp.SzVal() );
- return nullptr;
- }
- }
- }
- } else
- {
- // simple list of keywords
- if ( !tFields.IsStr() )
- {
- tBuilder.Warning ( "values of properties in \"match\" should be strings or objects" );
- return nullptr;
- }
- szQuery = tFields.SzVal();
- }
- assert ( szQuery );
- XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
- pNewNode->SetOp ( eNodeOp );
- float fBoost = GetBoost ( tFields );
- tBuilder.CollectKeywords ( szQuery, pNewNode, tLimitSpec, fBoost );
- return pNewNode;
- }
- bool QueryParserJson_c::ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const
- {
- if ( !tItem )
- return true;
- // we created filters before, no need to process them again
- if ( IsFilter(tItem) )
- {
- tBuilder.m_bHasFilter = true;
- return true;
- }
- XQNode_t * pNode = ConstructNode ( tItem, tBuilder );
- if ( !pNode )
- return IsBoolNode ( tItem ); // need walk down the tree for compart mode
- dNodes.Add ( pNode );
- return true;
- }
- bool QueryParserJson_c::ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const
- {
- ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tClause );
- if ( tClause.IsArray() )
- {
- for ( const auto & tObject : tClause )
- {
- if ( !tObject.IsObj() )
- {
- tBuilder.Error ( "\"%s\" array value should be an object", tClause.Name() );
- return false;
- }
- if ( !ConstructNodeOrFilter ( tObject[0], dItems, tBuilder ) )
- return false;
- }
- } else if ( tClause.IsObj() )
- {
- if ( !ConstructNodeOrFilter ( tClause[0], dItems, tBuilder ) )
- return false;
- } else
- {
- tBuilder.Error ( "\"%s\" value should be an object or an array", tClause.Name() );
- return false;
- }
- return true;
- }
- XQNode_t * QueryParserJson_c::ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
- {
- ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
- if ( !tJson.IsObj() )
- {
- tBuilder.Error ( "\"bool\" value should be an object" );
- return nullptr;
- }
- CSphVector<XQNode_t *> dMust, dShould, dMustNot;
- for ( const auto & tClause : tJson )
- {
- tBuilder.ResetNodesFlags();
- CSphString sName = tClause.Name();
- if ( sName=="must" )
- {
- if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
- return nullptr;
- } else if ( sName=="should" )
- {
- if ( !ConstructBoolNodeItems ( tClause, dShould, tBuilder ) )
- return nullptr;
- if ( tBuilder.m_bHasFilter && tBuilder.m_bHasFulltext )
- {
- tBuilder.Error ( "filter and full-text can be used together only inside \"must\" node" );
- return nullptr;
- }
- } else if ( sName=="must_not" )
- {
- if ( !ConstructBoolNodeItems ( tClause, dMustNot, tBuilder ) )
- return nullptr;
- } else if ( sName=="filter" )
- {
- if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
- return nullptr;
- } else if ( sName=="minimum_should_match" ) // FIXME!!! add to should as option
- {
- continue;
- } else
- {
- tBuilder.Error ( "unknown bool query type: \"%s\"", sName.cstr() );
- return nullptr;
- }
- }
- XQNode_t * pMustNode = nullptr;
- XQNode_t * pShouldNode = nullptr;
- XQNode_t * pMustNotNode = nullptr;
- XQLimitSpec_t tLimitSpec;
- if ( dMust.GetLength() )
- {
- // no need to construct AND node for a single child
- if ( dMust.GetLength()==1 )
- pMustNode = dMust[0];
- else
- {
- XQNode_t * pAndNode = tBuilder.CreateNode ( tLimitSpec );
- pAndNode->SetOp ( SPH_QUERY_AND );
- for ( auto & i : dMust )
- pAndNode->AddNewChild ( i);
- pMustNode = pAndNode;
- }
- }
- if ( dShould.GetLength() )
- {
- if ( dShould.GetLength()==1 )
- pShouldNode = dShould[0];
- else
- {
- XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
- pOrNode->SetOp ( SPH_QUERY_OR );
- for ( auto & i : dShould )
- pOrNode->AddNewChild (i);
- pShouldNode = pOrNode;
- }
- }
- // slightly different case - we need to construct the NOT node anyway
- if ( dMustNot.GetLength() )
- {
- XQNode_t * pNotNode = tBuilder.CreateNode ( tLimitSpec );
- pNotNode->SetOp ( SPH_QUERY_NOT );
- if ( dMustNot.GetLength()==1 )
- {
- pNotNode->AddNewChild ( dMustNot[0] );
- } else
- {
- XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
- pOrNode->SetOp ( SPH_QUERY_OR );
- for ( auto & i : dMustNot )
- pOrNode->AddNewChild ( i );
- pNotNode->AddNewChild ( pOrNode );
- }
- pMustNotNode = pNotNode;
- }
- int iTotalNodes = 0;
- iTotalNodes += pMustNode ? 1 : 0;
- iTotalNodes += pShouldNode ? 1 : 0;
- iTotalNodes += pMustNotNode ? 1 : 0;
- XQNode_t * pResultNode = nullptr;
- if ( !iTotalNodes )
- return nullptr;
- else if ( iTotalNodes==1 )
- {
- if ( pMustNode )
- pResultNode = pMustNode;
- else if ( pShouldNode )
- pResultNode = pShouldNode;
- else
- pResultNode = pMustNotNode;
- assert ( pResultNode );
- } else
- {
- pResultNode = pMustNode ? pMustNode : pMustNotNode;
- assert ( pResultNode );
-
- // combine 'must' and 'must_not' with AND
- if ( pMustNode && pMustNotNode )
- {
- XQNode_t * pAndNode = tBuilder.CreateNode(tLimitSpec);
- pAndNode->SetOp(SPH_QUERY_AND);
- pAndNode->AddNewChild ( pMustNode );
- pAndNode->AddNewChild ( pMustNotNode );
- pResultNode = pAndNode;
- }
- // combine 'result' node and 'should' node with MAYBE
- if ( pShouldNode )
- {
- XQNode_t * pMaybeNode = tBuilder.CreateNode ( tLimitSpec );
- pMaybeNode->SetOp ( SPH_QUERY_MAYBE );
- pMaybeNode->AddNewChild ( pResultNode );
- pMaybeNode->AddNewChild ( pShouldNode );
- pResultNode = pMaybeNode;
- }
- }
- return pResultNode;
- }
- XQNode_t * QueryParserJson_c::ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
- {
- ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
- CSphString sQueryString;
- // query_string could be either {"query_string":{"query":"term"}} or {"query_string":"term"}
- if ( tJson.IsObj() )
- {
- CSphString sError;
- JsonObj_c tNestedQuery = tJson.GetStrItem ( "query", sError, false );
- if ( !tNestedQuery )
- {
- tBuilder.Error ( "\"query_string\" value should be an object with the \"query\" string" );
- return nullptr;
- }
- sQueryString = tNestedQuery.StrVal();
- }
- if ( sQueryString.IsEmpty() )
- {
- if ( tJson.IsStr() )
- {
- sQueryString = tJson.StrVal();
- } else
- {
- tBuilder.Error ( "\"query_string\" value should be an string" );
- return nullptr;
- }
- }
- XQQuery_t tParsed;
- tParsed.m_dZones = tBuilder.GetZone(); // should keep the same zone list for whole tree
- // no need to pass morph fields here as upper level does fixup
- if ( !sphParseExtendedQuery ( tParsed, sQueryString.cstr(), tBuilder.GetQuery(), tBuilder.GetQLTokenizer(), tBuilder.GetSchema(), tBuilder.GetDict(), tBuilder.GetIndexSettings(), nullptr ) )
- {
- tBuilder.Error ( "%s", tParsed.m_sParseError.cstr() );
- return nullptr;
- }
- if ( !tParsed.m_sParseWarning.IsEmpty() )
- tBuilder.Warning ( "%s", tParsed.m_sParseWarning.cstr() );
- XQNode_t * pRoot = tParsed.m_pRoot;
- tParsed.m_pRoot = nullptr;
- tBuilder.SetZone ( tParsed.m_dZones );
- return pRoot;
- }
- XQNode_t * QueryParserJson_c::ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const
- {
- XQLimitSpec_t tLimitSpec;
- XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
- pNewNode->SetOp ( SPH_QUERY_NULL );
- return pNewNode;
- }
- static bool IsFtMatch ( const CSphString & sName )
- {
- return ( sName=="match" );
- }
- static bool IsFtTerms ( const CSphString & sName )
- {
- return ( sName=="terms" );
- }
- static bool IsFtPhrase ( const CSphString & sName )
- {
- return ( sName=="match_phrase" );
- }
- static bool IsFtTerm ( const CSphString & sName )
- {
- return ( sName=="term" );
- }
- static bool IsFtMatchAll ( const CSphString & sName )
- {
- return ( sName=="match_all" );
- }
- static bool IsFtQueryString ( const CSphString & sName )
- {
- return ( sName=="query_string" );
- }
- static bool IsFtQueryStringSimple ( const CSphString & sName )
- {
- return ( sName=="simple_query_string" );
- }
- bool IsFullText ( const CSphString & sName )
- {
- return ( IsFtMatch ( sName ) || IsFtTerms ( sName ) || IsFtPhrase ( sName ) || IsFtTerm ( sName ) || IsFtMatchAll ( sName ) || IsFtQueryString ( sName ) || IsFtQueryStringSimple ( sName ));
- }
- XQNode_t * QueryParserJson_c::ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
- {
- ErrorPathGuard_t tGuard = tBuilder.ErrorAddPath ( tJson );
- CSphString sName = tJson.Name();
- if ( !tJson || sName.IsEmpty() )
- {
- tBuilder.Error ( "empty json found" );
- return nullptr;
- }
- bool bMatch = IsFtMatch ( sName );
- bool bTerms = IsFtTerms ( sName );
- bool bPhrase = IsFtPhrase ( sName );
- bool bSingleTerm = IsFtTerm ( sName );
- if ( bMatch || bPhrase || bTerms || bSingleTerm )
- {
- tBuilder.m_bHasFulltext = true;
- return ConstructMatchNode ( tJson, bPhrase, bTerms, bSingleTerm, tBuilder );
- }
- if ( IsFtMatchAll ( sName ) )
- {
- tBuilder.m_bHasFulltext = true;
- return ConstructMatchAllNode ( tBuilder );
- }
- if ( IsBoolNode ( sName ) )
- return ConstructBoolNode ( tJson, tBuilder );
- if ( IsFtQueryString ( sName ) )
- {
- tBuilder.m_bHasFulltext = true;
- return ConstructQLNode ( tJson, tBuilder );
- }
- if ( IsFtQueryStringSimple ( sName ) && tJson.IsObj() )
- {
- tBuilder.m_bHasFulltext = true;
- return ConstructQLNode ( tJson.GetItem ( "query" ), tBuilder );
- }
- tBuilder.Error ( "unknown full-text node '%s'", sName.cstr() );
- return nullptr;
- }
- bool NonEmptyQuery ( const JsonObj_c & tQuery )
- {
- return ( tQuery.HasItem("match")
- || tQuery.HasItem("match_phrase")
- || tQuery.HasItem("bool") )
- || tQuery.HasItem("query_string");
- }
- //////////////////////////////////////////////////////////////////////////
- static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError );
- static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning );
- static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError );
- static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
- static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
- static bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError );
- static bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError );
- static bool ParseIndex ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
- {
- if ( !tRoot )
- {
- sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
- return false;
- }
- JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
- if ( !tIndex )
- {
- tIndex = tRoot.GetStrItem ( "index", sError, true );
- if ( !tIndex )
- return false;
-
- sError = "";
- }
- tStmt.m_sIndex = tIndex.StrVal();
- tStmt.m_tQuery.m_sIndexes = tStmt.m_sIndex;
- const char * sIndexStart = strchr ( tStmt.m_sIndex.cstr(), ':' );
- if ( sIndexStart!=nullptr )
- {
- const char * sIndex = tStmt.m_sIndex.cstr();
- sError.SetSprintf ( "wrong table at cluster syntax, use \"cluster\": \"%.*s\" and \"index\": \"%s\" properties, instead of '%s'",
- (int)(sIndexStart-sIndex), sIndex, sIndexStart+1, sIndex );
- return false;
- }
- return true;
- }
- static bool ParseIndexId ( const JsonObj_c & tRoot, bool bArrayIds, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- if ( !ParseIndex ( tRoot, tStmt, sError ) )
- return false;
- JsonObj_c tId = tRoot.GetItem ( "id" );
- if ( tId )
- {
- if ( !tId.IsInt() && !tId.IsUint() && !tId.IsArray() )
- {
- sError = "Document ids should be integer or array of integers";
- return false;
- }
- if ( !bArrayIds && tId.IsArray() )
- {
- sError = "Document ids should be integer";
- return false;
- }
- if ( !tId.IsArray() )
- {
- if ( tId.IsInt() && tId.IntVal()<0 )
- {
- sError = "Negative document ids are not allowed";
- return false;
- }
- } else
- {
- for ( const auto & tItem : tId )
- {
- if ( !tItem.IsInt() && !tItem.IsUint() )
- {
- sError = "Document ids should be integer";
- return false;
- }
- if ( tItem.IsInt() && tItem.IntVal()<0 )
- {
- sError = "Negative document ids are not allowed";
- return false;
- }
- }
- }
- }
- if ( tId && !tId.IsArray() )
- tDocId = tId.IntVal();
- else
- tDocId = 0; // enable auto-id
- return true;
- }
- static bool ParseCluster ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
- {
- if ( !tRoot )
- {
- sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
- return false;
- }
- // cluster is optional
- JsonObj_c tCluster = tRoot.GetStrItem ( "cluster", sError, true );
- if ( tCluster )
- tStmt.m_sCluster = tCluster.StrVal();
- return true;
- }
- std::unique_ptr<QueryParser_i> sphCreateJsonQueryParser()
- {
- return std::make_unique<QueryParserJson_c>();
- }
- static bool ParseLimits ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError )
- {
- JsonObj_c tLimit = tRoot.GetIntItem ( "limit", "size", sError );
- if ( !sError.IsEmpty() )
- return false;
- if ( tLimit )
- tQuery.m_iLimit = (int)tLimit.IntVal();
- JsonObj_c tOffset = tRoot.GetIntItem ( "offset", "from", sError );
- if ( !sError.IsEmpty() )
- return false;
- if ( tOffset )
- tQuery.m_iOffset = (int)tOffset.IntVal();
- JsonObj_c tCutoff = tRoot.GetIntItem ( "cutoff", sError, true );
- if ( !sError.IsEmpty() )
- return false;
- if ( tCutoff )
- tQuery.m_iCutoff = (int)tCutoff.IntVal();
- JsonObj_c tMaxMatches = tRoot.GetIntItem ( "max_matches", sError, true );
- if ( !sError.IsEmpty() )
- return false;
- if ( tMaxMatches )
- {
- tQuery.m_iMaxMatches = (int)tMaxMatches.IntVal();
- tQuery.m_bExplicitMaxMatches = true;
- }
- return true;
- }
- static bool ParseOptions ( const JsonObj_c & tOptions, CSphQuery & tQuery, CSphString & sError )
- {
- if ( !tOptions.IsObj() )
- {
- sError = "\"options\" property value should be an object";
- return false;
- }
- for ( const auto & i : tOptions )
- {
- AddOption_e eAdd = AddOption_e::NOT_FOUND;
- CSphString sOpt = i.Name();
- if ( i.IsInt() || i.IsBool() )
- eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.IntVal(), STMT_SELECT, sError );
- else if ( i.IsStr() )
- {
- CSphString sRanker = i.StrVal();
- const char * szRanker = sRanker.cstr();
- while ( sphIsAlpha(*szRanker) )
- szRanker++;
- if ( *szRanker=='(' && sRanker.Ends(")") )
- {
- int iRankerNameLen = szRanker-sRanker.cstr();
- CSphString sExpr = sRanker.SubString (iRankerNameLen+1, sRanker.Length()-iRankerNameLen-2 );
- sExpr.Unquote();
- sRanker = sRanker.SubString ( 0, iRankerNameLen );
- eAdd = ::AddOptionRanker ( tQuery, sOpt, sRanker, [sExpr]{ return sExpr; }, STMT_SELECT, sError );
- }
- if ( eAdd==AddOption_e::NOT_FOUND )
- eAdd = AddOption ( tQuery, sOpt, i.StrVal(), i.StrVal(), [&i]{ return i.StrVal(); }, STMT_SELECT, sError );
- }
- else if ( i.IsObj() )
- {
- CSphVector<CSphNamedInt> dNamed;
- for ( const auto & tNamed : i )
- {
- if ( !tNamed.IsInt() )
- {
- sError.SetSprintf ( "\"%s\" property of \"%s\"' option should be integer", sOpt.cstr(), tNamed.Name() );
- return false;
- }
- dNamed.Add ( { tNamed.Name(), tNamed.IntVal() } );
- }
- eAdd = ::AddOption ( tQuery, sOpt, dNamed, STMT_SELECT, sError );
- }
- if ( eAdd==AddOption_e::NOT_FOUND )
- {
- sError.SetSprintf ( "unknown option '%s'", sOpt.cstr () );
- return false;
- }
- else if ( eAdd==AddOption_e::FAILED )
- return false;
- }
- return true;
- }
- static bool ParseOptions ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery, CSphString & sError )
- {
- CSphQuery & tQuery = tPJQuery.m_tQuery;
- // different from SQL: in sql it is requested by default
- tQuery.m_tScrollSettings.m_bRequested = false;
- JsonObj_c tOptions = tRoot.GetItem("options");
- if ( !tOptions )
- return true;
- if ( tQuery.m_eJoinType!=JoinType_e::NONE )
- for ( const auto & i : tOptions )
- {
- if ( !i.IsObj() )
- continue;
- CSphString sTable = i.Name();
- sTable.ToLower();
- StrVec_t dQueryIndexes;
- ParseIndexList ( tQuery.m_sIndexes, dQueryIndexes );
- bool bLeftTable = false;
- for ( const auto & i : dQueryIndexes )
- if ( sTable==i )
- {
- bLeftTable = true;
- break;
- }
- if ( bLeftTable )
- return ParseOptions ( i, tQuery, sError );
- if ( sTable==tQuery.m_sJoinIdx )
- return ParseOptions ( i, tPJQuery.m_tJoinQueryOptions, sError );
- sError.SetSprintf ( "Unknown table '%s' in OPTIONS", sTable.cstr() );
- return false;
- }
- return ParseOptions ( tOptions, tQuery, sError );
- }
- static bool ParseKNNQuery ( const JsonObj_c & tJson, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
- {
- if ( !tJson )
- return true;
- if ( !tJson.IsObj() )
- {
- sError = "\"knn\" property value should be an object";
- return false;
- }
- if ( !tJson.FetchStrItem ( tQuery.m_sKNNAttr, "field", sError ) ) return false;
- if ( !tJson.FetchIntItem ( tQuery.m_iKNNK, "k", sError ) ) return false;
- if ( !tJson.FetchIntItem ( tQuery.m_iKnnEf, "ef", sError, true ) ) return false;
- JsonObj_c tQueryVec = tJson.GetArrayItem ( "query_vector", sError );
- if ( !tQueryVec )
- return false;
- for ( const auto & tArrayItem : tQueryVec )
- {
- if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
- {
- sError = "\"query_vector\" items should be integer of float";
- return false;
- }
- tQuery.m_dKNNVec.Add ( tArrayItem.FltVal() );
- }
- return true;
- }
- static bool ParseOnCond ( const JsonObj_c & tRoot, CSphString & sIdx, CSphString & sAttr, ESphAttr & eType, CSphString & sError )
- {
- CSphString sType;
- if ( !tRoot.FetchStrItem ( sIdx, "table", sError ) ) return false;
- if ( !tRoot.FetchStrItem ( sAttr, "field", sError ) ) return false;
- if ( !tRoot.FetchStrItem ( sType, "type", sError, true ) ) return false;
- if ( !sType.IsEmpty() )
- {
- if ( sType=="int" || sType=="integer" )
- eType = SPH_ATTR_INTEGER;
- else if ( sType=="float" )
- eType = SPH_ATTR_FLOAT;
- else if ( sType=="string" )
- eType = SPH_ATTR_STRING;
- else
- {
- sError.SetSprintf ( "unknown \"type\" value: \"%s\"", sType.cstr() );
- return false;
- }
- }
- return true;
- }
- static bool ParseOnFilter ( const JsonObj_c & tRoot, OnFilter_t & tOnFilter, CSphString & sError )
- {
- if ( !tRoot.IsObj() )
- {
- sError = "\"on\" items should be objects";
- return false;
- }
- CSphString sOp;
- if ( !tRoot.FetchStrItem ( sOp, "operator", sError ) )
- return false;
- if ( sOp!="eq" )
- {
- sError = "Unknown \"operator\" value";
- return false;
- }
- JsonObj_c tLeft = tRoot.GetObjItem ( "left", sError );
- if ( !tLeft )
- return false;
- JsonObj_c tRight = tRoot.GetObjItem ( "right", sError );
- if ( !tRight )
- return false;
- if ( !ParseOnCond ( tLeft, tOnFilter.m_sIdx1, tOnFilter.m_sAttr1, tOnFilter.m_eTypeCast1, sError ) )
- return false;
- if ( !ParseOnCond ( tRight, tOnFilter.m_sIdx2, tOnFilter.m_sAttr2, tOnFilter.m_eTypeCast2, sError ) )
- return false;
- return true;
- }
- static bool ParseJoin ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
- {
- JsonObj_c tJoin = tRoot.GetArrayItem ( "join", sError, true );
- if ( !tJoin )
- return true;
- int iNumJoins = 0;
- for ( const auto & tJoinItem : tJoin )
- {
- if ( iNumJoins>0 )
- {
- sError = "Only single table joins are currently supported";
- return false;
- }
- CSphString sJoinType;
- if ( !tJoinItem.FetchStrItem ( sJoinType, "type", sError ) )
- return false;
- if ( sJoinType=="inner" )
- tQuery.m_eJoinType = JoinType_e::INNER;
- else if ( sJoinType=="left" )
- tQuery.m_eJoinType = JoinType_e::LEFT;
- else
- {
- sError.SetSprintf ( "unknown join type '%s'", sJoinType.cstr() );
- return false;
- }
- if ( !tJoinItem.FetchStrItem ( tQuery.m_sJoinIdx, "table", sError ) )
- return false;
- JsonObj_c tMatchQuery = tJoinItem.GetObjItem ( "query", sError, true );
- if ( tMatchQuery )
- tQuery.m_sJoinQuery = tMatchQuery.AsString();
-
- JsonObj_c tOn = tJoinItem.GetArrayItem ( "on", sError );
- if ( !tOn )
- return false;
- for ( const auto & tCond : tOn )
- {
- OnFilter_t tOnFilter;
- if ( !ParseOnFilter ( tCond, tOnFilter, sError ) )
- return false;
- tQuery.m_dOnFilters.Add(tOnFilter);
- }
- iNumJoins++;
- }
- return true;
- }
- bool sphParseJsonQuery ( Str_t sQuery, ParsedJsonQuery_t & tPJQuery )
- {
- JsonObj_c tRoot ( sQuery );
- tPJQuery.m_tQuery.m_sRawQuery = sQuery;
- return sphParseJsonQuery ( tRoot, tPJQuery );
- }
- bool sphParseJsonQuery ( const JsonObj_c & tRoot, ParsedJsonQuery_t & tPJQuery )
- {
- TlsMsg::ResetErr();
- if ( !tRoot )
- return TlsMsg::Err ( "unable to parse: %s", tRoot.GetErrorPtr() );
- TLS_MSG_STRING ( sError );
- JsonObj_c tIndex = tRoot.GetStrItem ( "table", sError );
- if ( !tIndex )
- {
- tIndex = tRoot.GetStrItem ( "index", sError, true );
- if ( !tIndex )
- return false;
- sError = "";
- }
- auto & tQuery = tPJQuery.m_tQuery;
- tQuery.m_sIndexes = tIndex.StrVal();
- if ( tQuery.m_sIndexes==g_szAll )
- tQuery.m_sIndexes = "*";
- if ( !ParseLimits ( tRoot, tQuery, sError ) )
- return false;
- JsonObj_c tJsonQuery = tRoot.GetItem("query");
- JsonObj_c tKNNQuery = tRoot.GetItem("knn");
- if ( tJsonQuery && tKNNQuery )
- return TlsMsg::Err ( "\"query\" can't be used together with \"knn\"" );
- // common code used by search queries and update/delete by query
- if ( !ParseJsonQueryFilters ( tJsonQuery, tQuery, sError, tPJQuery.m_sWarning ) )
- return false;
- if ( !ParseKNNQuery ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
- return false;
- if ( tKNNQuery && !ParseJsonQueryFilters ( tKNNQuery, tQuery, sError, tPJQuery.m_sWarning ) )
- return false;
- if ( !ParseJoin ( tRoot, tQuery, sError, tPJQuery.m_sWarning ) )
- return false;
- if ( !ParseOptions ( tRoot, tPJQuery, sError ) )
- return false;
- if ( !tRoot.FetchBoolItem ( tPJQuery.m_bProfile, "profile", sError, true ) )
- return false;
- if ( !tRoot.FetchIntItem ( tPJQuery.m_iPlan, "plan", sError, true ) )
- return false;
- // expression columns go first to select list
- JsonObj_c tScriptFields = tRoot.GetItem ( "script_fields" );
- if ( tScriptFields && !ParseScriptFields ( tScriptFields, tQuery, sError ) )
- return false;
- // a synonym to "script_fields"
- JsonObj_c tExpressions = tRoot.GetItem ( "expressions" );
- if ( tExpressions && !ParseExpressions ( tExpressions, tQuery, sError ) )
- return false;
- JsonObj_c tSnip = tRoot.GetObjItem ( "highlight", sError, true );
- if ( tSnip )
- {
- if ( !ParseSnippet ( tSnip, tQuery, sError ) )
- return false;
- }
- else if ( !sError.IsEmpty() )
- return false;
- JsonObj_c tSort = tRoot.GetItem("sort");
- if ( tSort && !( tSort.IsArray() || tSort.IsObj() ) )
- {
- sError = "\"sort\" property value should be an array or an object";
- return false;
- }
- if ( tSort )
- {
- bool bGotWeight = false;
- if ( !ParseSort ( tSort, tQuery, bGotWeight, sError, tPJQuery.m_sWarning ) )
- return false;
- JsonObj_c tTrackScore = tRoot.GetBoolItem ( "track_scores", sError, true );
- if ( !sError.IsEmpty() )
- return false;
- bool bTrackScore = tTrackScore && tTrackScore.BoolVal();
- if ( !bGotWeight && !bTrackScore )
- tQuery.m_eRanker = SPH_RANK_NONE;
- }
- else
- {
- // set defaults
- tQuery.m_eSort = SPH_SORT_EXTENDED;
- tQuery.m_sSortBy = "@weight desc";
- tQuery.m_sOrderBy = "@weight desc";
- }
- // source \ select filter
- JsonObj_c tSelect = tRoot.GetItem("_source");
- bool bParsedSelect = ( !tSelect || ParseSelect ( tSelect, tQuery, sError ) );
- if ( !bParsedSelect )
- return false;
- // docvalue_fields
- JsonObj_c tDocFields = tRoot.GetItem ( "docvalue_fields" );
- if ( tDocFields && !ParseDocFields ( tDocFields, tQuery, sError ) )
- return false;
- // aggs
- JsonObj_c tAggs = tRoot.GetItem ( "aggs" );
- if ( tAggs && !ParseAggregates ( tAggs, tQuery, sError ) )
- return false;
- if ( !SetupScroll ( tQuery, sError ) )
- return false;
- return true;
- }
- bool ParseJsonInsert ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
- {
- if ( !ParseIndexId ( tRoot, false, tStmt, tDocId, sError ) )
- return false;
- if ( !ParseCluster ( tRoot, tStmt, sError ) )
- return false;
- tStmt.m_dInsertSchema.Add ( sphGetDocidName() );
- SqlInsert_t & tId = tStmt.m_dInsertValues.Add();
- tId.m_iType = SqlInsert_t::CONST_INT;
- tId.SetValueInt ( (uint64_t)tDocId, false );
- // "doc" is optional
- JsonObj_c tSource = tRoot.GetItem("doc");
- return ParseJsonInsertSource ( tSource, tStmt, bReplace, sError );
- }
- static bool ParseJsonInsertSource ( const JsonObj_c & tSource, StrVec_t & dInsertSchema, CSphVector<SqlInsert_t> & dInsertValues, CSphString & sError )
- {
- if ( !tSource )
- return true;
- for ( const auto & tItem : tSource )
- {
- dInsertSchema.Add ( tItem.Name() );
- dInsertSchema.Last().ToLower();
- SqlInsert_t & tNewValue = dInsertValues.Add();
- if ( tItem.IsStr() || tItem.IsNull() )
- {
- tNewValue.m_iType = ( tItem.IsStr() ? SqlInsert_t::QUOTED_STRING : SqlInsert_t::TOK_NULL );
- tNewValue.m_sVal = tItem.StrVal();
- } else if ( tItem.IsDbl() )
- {
- tNewValue.m_iType = SqlInsert_t::CONST_FLOAT;
- tNewValue.m_fVal = tItem.FltVal();
- } else if ( tItem.IsInt() || tItem.IsBool() || tItem.IsUint() )
- {
- tNewValue.m_iType = SqlInsert_t::CONST_INT;
- tNewValue.SetValueInt ( tItem.IntVal() );
- } else if ( tItem.IsArray() || tItem.IsObj() )
- {
- // could be either object or array
- // all fit to JSON attribute
- // array of int fits MVA attribute
- tNewValue.m_sVal = tItem.AsString();
- bool bMVA = false;
- if ( tItem.IsArray() )
- {
- tNewValue.m_iType = SqlInsert_t::CONST_MVA;
- tNewValue.m_pVals = new RefcountedVector_c<AttrValue_t>;
- for ( const auto & tArrayItem : tItem )
- {
- if ( !tArrayItem.IsInt() && !tArrayItem.IsDbl() )
- break;
- tNewValue.m_pVals->Add ( { tArrayItem.IntVal(), tArrayItem.FltVal() } );
- bMVA = true;
- }
- if ( !bMVA && !tItem.Size() )
- bMVA = true;
- }
- if ( !bMVA )
- {
- tNewValue.m_iType = SqlInsert_t::QUOTED_STRING;
- tNewValue.m_pVals = nullptr;
- }
- } else
- {
- sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
- return false;
- }
- }
- return true;
- }
- bool ParseJsonInsertSource ( const JsonObj_c & tSource, SqlStmt_t & tStmt, bool bReplace, CSphString & sError )
- {
- tStmt.m_eStmt = bReplace ? STMT_REPLACE : STMT_INSERT;
- if ( !ParseJsonInsertSource ( tSource, tStmt.m_dInsertSchema, tStmt.m_dInsertValues, sError ) )
- return false;
- if ( !tStmt.CheckInsertIntegrity() )
- {
- sError = "wrong number of values";
- return false;
- }
- return true;
- }
- bool sphParseJsonInsert ( Str_t sInsert, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
- {
- JsonObj_c tRoot ( sInsert );
- return ParseJsonInsert ( tRoot, tStmt, tDocId, bReplace, sError );
- }
- static bool ParseUpdateDeleteQueries ( const JsonObj_c & tRoot, bool bDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- tStmt.m_tQuery.m_sSelect = "id";
- if ( !ParseIndex ( tRoot, tStmt, sError ) )
- return false;
- if ( !ParseCluster ( tRoot, tStmt, sError ) )
- return false;
- JsonObj_c tId = tRoot.GetItem ( "id" );
- if ( tId )
- {
- if ( !ParseIndexId ( tRoot, bDelete, tStmt, tDocId, sError ) )
- return false;
- CSphFilterSettings & tFilter = tStmt.m_tQuery.m_dFilters.Add();
- tFilter.m_eType = SPH_FILTER_VALUES;
- if ( bDelete && tId.IsArray() )
- {
- for ( const auto & tItem : tId )
- tFilter.m_dValues.Add ( tItem.IntVal() );
- } else
- {
- tFilter.m_dValues.Add ( tId.IntVal() );
- }
- tFilter.m_sAttrName = "id";
- tDocId = tFilter.m_dValues[0];
- }
- // "query" is optional
- JsonObj_c tQuery = tRoot.GetItem("query");
- if ( tQuery && tId )
- {
- sError = R"(both "id" and "query" specified)";
- return false;
- }
- CSphString sWarning; // fixme: add to results
- return ParseJsonQueryFilters ( tQuery, tStmt.m_tQuery, sError, sWarning );
- }
- bool ParseJsonUpdate ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- CSphAttrUpdate & tUpd = tStmt.AttrUpdate();
- tStmt.m_eStmt = STMT_UPDATE;
- if ( !ParseUpdateDeleteQueries ( tRoot, false, tStmt, tDocId, sError ) )
- return false;
- JsonObj_c tSource = tRoot.GetObjItem ( "doc", sError );
- if ( !tSource )
- return false;
- CSphVector<int64_t> dMVA;
- for ( const auto & tItem : tSource )
- {
- bool bFloat = tItem.IsNum();
- bool bInt = tItem.IsInt();
- bool bBool = tItem.IsBool();
- bool bString = tItem.IsStr();
- bool bArray = tItem.IsArray();
- bool bObject = tItem.IsObj();
- if ( !bFloat && !bInt && !bBool && !bString && !bArray && !bObject )
- {
- sError.SetSprintf ( "unsupported value type '%s' in field '%s'", tItem.TypeName(), tItem.Name() );
- return false;
- }
- CSphString sAttr = tItem.Name();
- TypedAttribute_t & tTypedAttr = tUpd.m_dAttributes.Add();
- tTypedAttr.m_sName = sAttr.ToLower();
- if ( bInt || bBool )
- {
- int64_t iValue = tItem.IntVal();
- tUpd.m_dPool.Add ( (DWORD)iValue );
- auto uHi = (DWORD)( iValue>>32 );
- if ( uHi )
- {
- tUpd.m_dPool.Add ( uHi );
- tTypedAttr.m_eType = SPH_ATTR_BIGINT;
- } else
- tTypedAttr.m_eType = SPH_ATTR_INTEGER;
- }
- else if ( bFloat )
- {
- auto fValue = tItem.FltVal();
- tUpd.m_dPool.Add ( sphF2DW ( fValue ) );
- tTypedAttr.m_eType = SPH_ATTR_FLOAT;
- }
- else if ( bString || bObject )
- {
- CSphString sEncoded;
- const char * szValue = tItem.SzVal();
- if ( bObject )
- {
- sEncoded = tItem.AsString();
- szValue = sEncoded.cstr();
- }
- auto iLength = (int) strlen ( szValue );
- tUpd.m_dPool.Add ( tUpd.m_dBlobs.GetLength() );
- tUpd.m_dPool.Add ( iLength );
- if ( iLength )
- {
- BYTE * pBlob = tUpd.m_dBlobs.AddN ( iLength+2 ); // a couple of extra \0 for json parser to be happy
- memcpy ( pBlob, szValue, iLength );
- pBlob[iLength] = 0;
- pBlob[iLength+1] = 0;
- }
- tTypedAttr.m_eType = SPH_ATTR_STRING;
- } else if ( bArray )
- {
- dMVA.Resize ( 0 );
- for ( const auto & tArrayItem : tItem )
- {
- if ( !tArrayItem.IsInt() )
- {
- sError = "MVA elements should be integers";
- return false;
- }
- dMVA.Add ( tArrayItem.IntVal() );
- }
- dMVA.Uniq();
- tUpd.m_dPool.Add ( dMVA.GetLength()*2 ); // as 64 bit stored into DWORD vector
- tTypedAttr.m_eType = SPH_ATTR_UINT32SET;
- for ( int64_t uVal : dMVA )
- {
- if ( uVal>UINT_MAX )
- tTypedAttr.m_eType = SPH_ATTR_INT64SET;
- *(( int64_t* ) tUpd.m_dPool.AddN ( 2 )) = uVal;
- }
- }
- }
- return true;
- }
- bool sphParseJsonUpdate ( Str_t sUpdate, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- JsonObj_c tRoot ( sUpdate );
- return ParseJsonUpdate ( tRoot, tStmt, tDocId, sError );
- }
- static bool ParseJsonDelete ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- tStmt.m_eStmt = STMT_DELETE;
- return ParseUpdateDeleteQueries ( tRoot, true, tStmt, tDocId, sError );
- }
- bool sphParseJsonDelete ( Str_t sDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
- {
- JsonObj_c tRoot ( sDelete );
- return ParseJsonDelete ( tRoot, tStmt, tDocId, sError );
- }
- bool sphParseJsonStatement ( const char * szStmt, SqlStmt_t & tStmt, CSphString & sStmt, CSphString & sQuery, DocID_t & tDocId, CSphString & sError )
- {
- JsonObj_c tRoot ( szStmt );
- if ( !tRoot )
- {
- sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
- return false;
- }
- JsonObj_c tJsonStmt = tRoot[0];
- if ( !tJsonStmt )
- {
- sError = "no statement found";
- return false;
- }
- sStmt = tJsonStmt.Name();
- if ( !tJsonStmt.IsObj() )
- {
- sError.SetSprintf ( "statement %s should be an object", sStmt.cstr() );
- return false;
- }
- if ( sStmt=="index" || sStmt=="replace" )
- {
- if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, true, sError ) )
- return false;
- } else if ( sStmt=="create" || sStmt=="insert" )
- {
- if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, false, sError ) )
- return false;
- } else if ( sStmt=="update" )
- {
- if ( !ParseJsonUpdate ( tJsonStmt, tStmt, tDocId, sError ) )
- return false;
- } else if ( sStmt=="delete" )
- {
- if ( !ParseJsonDelete ( tJsonStmt, tStmt, tDocId, sError ) )
- return false;
- } else
- {
- sError.SetSprintf ( "unknown bulk operation: %s", sStmt.cstr() );
- return false;
- }
- sQuery = tJsonStmt.AsString();
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- static void PackedShortMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
- {
- auto dMVA = sphUnpackPtrAttr ( pMVA );
- auto nValues = dMVA.second / sizeof ( DWORD );
- auto pValues = ( const DWORD * ) dMVA.first;
- for ( int i = 0; i<(int) nValues; ++i )
- tOut.NtoA(pValues[i]);
- }
- static void PackedWideMVA2Json ( StringBuilder_c & tOut, const BYTE * pMVA )
- {
- auto dMVA = sphUnpackPtrAttr ( pMVA );
- auto nValues = dMVA.second / sizeof ( int64_t );
- auto pValues = ( const int64_t * ) dMVA.first;
- for ( int i = 0; i<(int) nValues; ++i )
- tOut.NtoA(pValues[i]);
- }
- static void PackedFloatVec2Json ( StringBuilder_c & tOut, const BYTE * pFV )
- {
- auto tFV = sphUnpackPtrAttr(pFV);
- int iNumValues = tFV.second / sizeof(float);
- auto pValues = (const float *)tFV.first;
- for ( int i = 0; i<iNumValues; i++ )
- tOut.FtoA(pValues[i]);
- }
- static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const CSphMatch & tMatch, const CSphAttrLocator & tLoc, int iMulti=1 )
- {
- switch ( eAttrType )
- {
- case SPH_ATTR_INTEGER:
- case SPH_ATTR_TIMESTAMP:
- case SPH_ATTR_TOKENCOUNT:
- case SPH_ATTR_BIGINT:
- tOut.NtoA ( tMatch.GetAttr(tLoc) * iMulti );
- break;
- case SPH_ATTR_UINT64:
- tOut.NtoA ( (uint64_t)tMatch.GetAttr(tLoc) * iMulti );
- break;
- case SPH_ATTR_FLOAT:
- tOut.FtoA ( tMatch.GetAttrFloat(tLoc) * iMulti );
- break;
- case SPH_ATTR_DOUBLE:
- tOut.DtoA ( tMatch.GetAttrDouble(tLoc) * iMulti );
- break;
- case SPH_ATTR_BOOL:
- tOut << ( tMatch.GetAttr ( tLoc ) ? "true" : "false" );
- break;
- case SPH_ATTR_UINT32SET_PTR:
- case SPH_ATTR_INT64SET_PTR:
- case SPH_ATTR_FLOAT_VECTOR_PTR:
- {
- auto _ = tOut.Array ();
- const auto * pMVA = ( const BYTE * ) tMatch.GetAttr ( tLoc );
- if ( eAttrType==SPH_ATTR_UINT32SET_PTR )
- PackedShortMVA2Json ( tOut, pMVA );
- else if ( eAttrType==SPH_ATTR_INT64SET_PTR )
- PackedWideMVA2Json ( tOut, pMVA );
- else
- PackedFloatVec2Json ( tOut, pMVA );
- }
- break;
- case SPH_ATTR_STRINGPTR:
- {
- const auto * pString = ( const BYTE * ) tMatch.GetAttr ( tLoc );
- auto dString = sphUnpackPtrAttr ( pString );
- // special process for legacy typed strings
- if ( dString.second>1 && dString.first[dString.second-2]=='\0')
- {
- auto uSubtype = dString.first[dString.second-1];
- dString.second -= 2;
- switch ( uSubtype)
- {
- case 1: // ql
- {
- ScopedComma_c sBrackets ( tOut, nullptr, R"({"ql":)", "}" );
- tOut.AppendEscapedWithComma (( const char* ) dString.first, dString.second);
- break;
- }
- case 0: // json
- tOut << ( const char* ) dString.first;
- break;
- default:
- tOut.Sprintf ("\"internal error! wrong subtype of stringptr %d\"", uSubtype );
- }
- break;
- }
- tOut.AppendEscapedWithComma ( ( const char * ) dString.first, dString.second );
- }
- break;
- case SPH_ATTR_JSON_PTR:
- {
- const auto * pJSON = ( const BYTE * ) tMatch.GetAttr ( tLoc );
- auto dJson = sphUnpackPtrAttr ( pJSON );
- // no object at all? return NULL
- if ( IsEmpty ( dJson ) )
- tOut << "null";
- else
- sphJsonFormat ( tOut, dJson.first );
- }
- break;
- case SPH_ATTR_FACTORS:
- case SPH_ATTR_FACTORS_JSON:
- {
- const auto * pFactors = ( const BYTE * ) tMatch.GetAttr ( tLoc );
- auto dFactors = sphUnpackPtrAttr ( pFactors );
- if ( IsEmpty ( dFactors ))
- tOut << "null";
- else
- sphFormatFactors ( tOut, (const unsigned int *) dFactors.first, true );
- }
- break;
- case SPH_ATTR_JSON_FIELD_PTR:
- {
- const auto * pField = ( const BYTE * ) tMatch.GetAttr ( tLoc );
- auto dField = sphUnpackPtrAttr ( pField );
- if ( IsEmpty ( dField ))
- {
- tOut << "null";
- break;
- }
- auto eJson = ESphJsonType ( *dField.first++ );
- if ( eJson==JSON_NULL )
- tOut << "null";
- else
- sphJsonFieldFormat ( tOut, dField.first, eJson, true );
- }
- break;
- default:
- assert ( 0 && "Unknown attribute" );
- break;
- }
- }
- static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, ESphAttr eAttrType, const char * szCol, const CSphMatch & tMatch, const CSphAttrLocator & tLoc )
- {
- assert ( sphPlainAttrToPtrAttr ( eAttrType )==eAttrType );
- tOut.AppendName ( szCol );
- JsonObjAddAttr ( tOut, eAttrType, tMatch, tLoc );
- }
- static bool IsHighlightAttr ( const CSphString & sName )
- {
- return sName.Begins ( g_szHighlight );
- }
- static CSphString GetJoinedWeightName ( const CSphQuery & tQuery )
- {
- CSphString sWeight;
- sWeight.SetSprintf ( "%s.weight()", tQuery.m_sJoinIdx.cstr() );
- return sWeight;
- }
- static bool IsJoinedWeight ( const CSphString & sAttr, const CSphQuery & tQuery )
- {
- if ( tQuery.m_sJoinIdx.IsEmpty() )
- return false;
- return sAttr==GetJoinedWeightName(tQuery);
- }
- static bool NeedToSkipAttr ( const CSphString & sName, const CSphQuery & tQuery )
- {
- const char * szName = sName.cstr();
- if ( szName[0]=='i' && szName[1]=='d' && szName[2]=='\0' ) return true;
- if ( sName.Begins ( g_szHighlight ) ) return true;
- if ( sName.Begins ( GetFilterAttrPrefix() ) ) return true;
- if ( sName.Begins ( g_szOrder ) ) return true;
- if ( sName.Begins ( GetKnnDistAttrName() ) ) return true;
- if ( IsJoinedWeight ( sName, tQuery ) ) return true;
- if ( !tQuery.m_dIncludeItems.GetLength() && !tQuery.m_dExcludeItems.GetLength () )
- return false;
- // empty include - shows all select list items
- // exclude with only "*" - skip all select list items
- bool bInclude = ( tQuery.m_dIncludeItems.GetLength()==0 );
- for ( const auto &iItem: tQuery.m_dIncludeItems )
- {
- if ( sphWildcardMatch ( szName, iItem.cstr() ) )
- {
- bInclude = true;
- break;
- }
- }
- if ( bInclude && tQuery.m_dExcludeItems.GetLength() )
- {
- for ( const auto& iItem: tQuery.m_dExcludeItems )
- {
- if ( sphWildcardMatch ( szName, iItem.cstr() ) )
- {
- bInclude = false;
- break;
- }
- }
- }
- return !bInclude;
- }
- namespace { // static
- void EncodeHighlight ( const CSphMatch & tMatch, int iAttr, const ISphSchema & tSchema, JsonEscapedBuilder & tOut )
- {
- const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
- ScopedComma_c tHighlightComma ( tOut, ",", R"("highlight":{)", "}", false );
- auto dSnippet = sphUnpackPtrAttr ((const BYTE *) tMatch.GetAttr ( tCol.m_tLocator ));
- SnippetResult_t tRes = UnpackSnippetData ( dSnippet );
- for ( const auto & tField : tRes.m_dFields )
- {
- tOut.AppendName ( tField.m_sName.cstr() );
- ScopedComma_c tHighlight ( tOut, ",", "[", "]", false );
- // we might want to add passage separators to field text here
- for ( const auto & tPassage : tField.m_dPassages )
- tOut.AppendEscapedWithComma ( (const char *)tPassage.m_dText.Begin(), tPassage.m_dText.GetLength() );
- }
- }
- static const char * GetName ( const CSphString & sName )
- {
- return sName.cstr();
- }
- static const char * GetName ( const JsonDocField_t & tDF )
- {
- return tDF.m_sName.cstr();
- }
- template <typename T>
- void EncodeFields ( const CSphVector<T> & dFields, const AggrResult_t & tRes, const CSphMatch & tMatch, const ISphSchema & tSchema, bool bValArray, const char * sPrefix, const char * sEnd, JsonEscapedBuilder & tOut )
- {
- JsonEscapedBuilder tDFVal;
- tOut.StartBlock ( ",", sPrefix, sEnd );
- for ( const T & tDF : dFields )
- {
- const CSphColumnInfo * pCol = tSchema.GetAttr ( GetName ( tDF ) );
- if ( !pCol )
- {
- tOut += R"("Default")";
- continue;
- }
- // FIXME!!! add format support
- tDFVal.Clear();
- JsonObjAddAttr ( tDFVal, pCol->m_eAttrType, tMatch, pCol->m_tLocator );
- if ( bValArray )
- tOut.Sprintf ( "%s", tDFVal.cstr() );
- else
- tOut.Sprintf ( R"("%s":["%s"])", GetName ( tDF ), tDFVal.cstr() );
- }
- tOut.FinishBlock ( false ); // close obj
- }
- struct CompositeLocator_t
- {
- ESphAttr m_eAttrType = SPH_ATTR_NONE;
- CSphAttrLocator m_tLocator;
- const char * m_sName = nullptr;
- CompositeLocator_t ( const CSphColumnInfo & tCol, const char * sName )
- : m_eAttrType ( tCol.m_eAttrType )
- , m_tLocator ( tCol.m_tLocator )
- , m_sName ( sName )
- {}
- CompositeLocator_t() = default;
- };
- struct AggrKeyTrait_t
- {
- const CSphColumnInfo * m_pKey = nullptr;
- CSphVector<CompositeLocator_t> m_dCompositeKeys;
- bool m_bKeyed = false;
- RangeNameHash_t m_tRangeNames;
- };
- static bool GetAggrKey ( const JsonAggr_t & tAggr, const CSphSchema & tSchema, int iAggrItem, int iNow, AggrKeyTrait_t & tRes )
- {
- if ( tAggr.m_eAggrFunc==Aggr_e::NONE )
- {
- tRes.m_pKey = tSchema.GetAttr ( tAggr.m_sCol.cstr() );
- } else if ( tAggr.m_eAggrFunc==Aggr_e::COMPOSITE )
- {
- for ( const auto & tItem : tAggr.m_dComposite )
- {
- const CSphColumnInfo * pCol = tSchema.GetAttr ( tItem.m_sColumn.cstr() );
- CSphString sJsonCol;
- if ( !pCol && sphJsonNameSplit ( tItem.m_sColumn.cstr(), nullptr, &sJsonCol ) )
- pCol = tSchema.GetAttr ( sJsonCol.cstr() );
-
- if ( !pCol )
- return false;
- tRes.m_dCompositeKeys.Add ( CompositeLocator_t ( *pCol, tItem.m_sAlias.cstr() ) );
- }
- } else
- {
- tRes.m_pKey = tSchema.GetAttr ( GetAggrName ( iAggrItem, tAggr.m_sCol ).cstr() );
- switch ( tAggr.m_eAggrFunc )
- {
- case Aggr_e::RANGE:
- GetRangeKeyNames ( tAggr.m_tRange, tRes.m_tRangeNames );
- tRes.m_bKeyed = tAggr.m_tRange.m_bKeyed;
- break;
- case Aggr_e::DATE_RANGE:
- GetRangeKeyNames ( tAggr.m_tDateRange, iNow, tRes.m_tRangeNames );
- tRes.m_bKeyed = tAggr.m_tDateRange.m_bKeyed;
- break;
- case Aggr_e::HISTOGRAM:
- tRes.m_bKeyed = tAggr.m_tHist.m_bKeyed;
- break;
- case Aggr_e::DATE_HISTOGRAM:
- tRes.m_bKeyed = tAggr.m_tDateHist.m_bKeyed;
- break;
- default:
- break;
- }
- }
- return ( tRes.m_pKey || tRes.m_dCompositeKeys.GetLength() );
- }
- static const char * GetBucketPrefix ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, JsonEscapedBuilder & tPrefixBucketBlock )
- {
- const char * sPrefix = "{";
- if ( tKey.m_bKeyed )
- {
- switch ( eAggrFunc )
- {
- case Aggr_e::RANGE:
- case Aggr_e::DATE_RANGE:
- {
- tPrefixBucketBlock.Clear();
- tPrefixBucketBlock.Appendf ( "\"%s\":{", pRange->m_sKey.cstr() );
- sPrefix = tPrefixBucketBlock.cstr();
- }
- break;
- case Aggr_e::HISTOGRAM:
- {
- tPrefixBucketBlock.Clear();
- tPrefixBucketBlock.Appendf ( "\"");
- JsonObjAddAttr ( tPrefixBucketBlock, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
- tPrefixBucketBlock.Appendf ( "\":{" );
- sPrefix = tPrefixBucketBlock.cstr();
- }
- break;
- case Aggr_e::DATE_HISTOGRAM:
- {
- tPrefixBucketBlock.Clear();
- tPrefixBucketBlock.Appendf ( "\"");
- time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
- FormatDate ( tSrcTime, tPrefixBucketBlock );
- tPrefixBucketBlock.Appendf ( "\":{" );
- sPrefix = tPrefixBucketBlock.cstr();
- }
- break;
- default: break;
- }
- }
- return sPrefix;
- }
- static void PrintKey ( const AggrKeyTrait_t & tKey, Aggr_e eAggrFunc, const RangeKeyDesc_t * pRange, const CSphMatch & tMatch, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, JsonEscapedBuilder & tBuf, JsonEscapedBuilder & tOut )
- {
- if ( eAggrFunc==Aggr_e::DATE_RANGE )
- {
- if ( !tKey.m_bKeyed )
- tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
- if ( !pRange->m_sFrom.IsEmpty() )
- tOut.Sprintf ( R"("from":"%s")", pRange->m_sFrom.cstr() );
- if ( !pRange->m_sTo.IsEmpty() )
- tOut.Sprintf ( R"("to":"%s")", pRange->m_sTo.cstr() );
- } else if ( eAggrFunc==Aggr_e::RANGE )
- {
- if ( !tKey.m_bKeyed )
- tOut.Sprintf ( R"("key":"%s")", pRange->m_sKey.cstr() );
- if ( !pRange->m_sFrom.IsEmpty() )
- tOut.Sprintf ( R"("from":%s)", pRange->m_sFrom.cstr() );
- if ( !pRange->m_sTo.IsEmpty() )
- tOut.Sprintf ( R"("to":%s)", pRange->m_sTo.cstr() );
- } else if ( eAggrFunc==Aggr_e::DATE_HISTOGRAM )
- {
- tBuf.Clear();
- JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator );
- tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
- tBuf.Clear();
- time_t tSrcTime = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
- FormatDate ( tSrcTime, tBuf );
- tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
- } else if ( eAggrFunc==Aggr_e::COMPOSITE )
- {
- ScopedComma_c sBlock ( tOut, ",", R"("key":{)", "}" );
- for ( const auto & tItem : tKey.m_dCompositeKeys )
- JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, tMatch, tItem.m_tLocator );
- } else if ( eFormat==ResultSetFormat_e::MntSearch )
- {
- JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "key", tMatch, tKey.m_pKey->m_tLocator );
- } else
- {
- // FIXME!!! remove after proper data type added but now need to multiple datatime values by 1000 for compat aggs result set
- int iMulti = 1;
- if ( eFormat==ResultSetFormat_e::ES && hDatetime [ tKey.m_pKey->m_sName ] )
- iMulti = 1000;
- tBuf.Clear();
- JsonObjAddAttr ( tBuf, tKey.m_pKey->m_eAttrType, tMatch, tKey.m_pKey->m_tLocator, iMulti );
- tOut.Sprintf ( R"("key":%s)", tBuf.cstr() );
- if ( tKey.m_pKey->m_eAttrType==SPH_ATTR_STRINGPTR )
- tOut.Sprintf ( R"("key_as_string":%s)", tBuf.cstr() );
- else
- tOut.Sprintf ( R"("key_as_string":"%s")", tBuf.cstr() );
- }
- }
- static VecTraits_T<CSphMatch> GetResultMatches ( const VecTraits_T<CSphMatch> & dMatches, const CSphSchema & tSchema, int iOff, int iCount, const JsonAggr_t & tAggr )
- {
- bool bHasCompositeAfter = ( dMatches.GetLength() && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && tAggr.m_dCompositeAfterKey.GetLength() );
- if ( !bHasCompositeAfter )
- return dMatches.Slice ( iOff, iCount );
- CSphString sError;
- CreateFilterContext_t tCtx;
- tCtx.m_pFilters = &tAggr.m_dCompositeAfterKey;
- tCtx.m_pMatchSchema = &tSchema;
- tCtx.m_bScan = true;
- if ( !sphCreateFilters ( tCtx, sError, sError ) || !sError.IsEmpty() )
- {
- sphWarning ( "failed to create \"after\" filter: %s", sError.cstr() );
- return dMatches.Slice ( iOff, iCount );
- }
- int iFound = dMatches.GetFirst ( [&] ( const CSphMatch & tMatch ) { return tCtx.m_pFilter->Eval ( tMatch ); } );
- if ( iOff<0 )
- return dMatches.Slice ( iOff, iCount );
- else
- return dMatches.Slice ( iFound+1, iCount );
- }
- static bool IsSingleValue ( Aggr_e eAggr )
- {
- return ( eAggr==Aggr_e::MIN || eAggr==Aggr_e::MAX || eAggr==Aggr_e::SUM || eAggr==Aggr_e::AVG );
- }
- static void EncodeAggr ( const JsonAggr_t & tAggr, int iAggrItem, const AggrResult_t & tRes, ResultSetFormat_e eFormat, const sph::StringSet & hDatetime, int iNow, const CSphString & sDistinctName, JsonEscapedBuilder & tOut )
- {
- if ( tAggr.m_eAggrFunc==Aggr_e::COUNT )
- return;
- const CSphColumnInfo * pCount = tRes.m_tSchema.GetAttr ( "count(*)" );
- AggrKeyTrait_t tKey;
- bool bHasKey = GetAggrKey ( tAggr, tRes.m_tSchema, iAggrItem, iNow, tKey );
- const CSphColumnInfo * pDistinct = nullptr;
- if ( !sDistinctName.IsEmpty() )
- pDistinct = tRes.m_tSchema.GetAttr ( sDistinctName.cstr() );
- // might be null for empty result set
- auto dMatches = GetResultMatches ( tRes.m_dResults.First().m_dMatches, tRes.m_tSchema, tRes.m_iOffset, tRes.m_iCount, tAggr );
- CSphString sBucketName;
- sBucketName.SetSprintf ( R"("%s":{)", tAggr.m_sBucketName.cstr() );
- tOut.StartBlock ( ",", sBucketName.cstr(), "}" );
- // aggr.significant
- switch ( tAggr.m_eAggrFunc )
- {
- case Aggr_e::SIGNIFICANT: // FIXME!!! add support
- tOut.Appendf ( "\"doc_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
- tOut.Appendf ( "\"bg_count\":" INT64_FMT ",", tRes.m_iTotalMatches );
- break;
- default: break;
- }
- // after_key for aggr.composite
- if ( bHasKey && pCount && tAggr.m_eAggrFunc==Aggr_e::COMPOSITE && dMatches.GetLength() )
- {
- tOut.StartBlock ( ",", R"("after_key":{)", "}" );
- for ( const auto & tItem : tKey.m_dCompositeKeys )
- JsonObjAddAttr ( tOut, tItem.m_eAttrType, tItem.m_sName, dMatches.Last(), tItem.m_tLocator );
- tOut.FinishBlock ( false ); // named bucket obj
- }
- if ( !IsSingleValue ( tAggr.m_eAggrFunc ) )
- {
- // buckets might be named objects or array
- if ( tKey.m_bKeyed )
- tOut.StartBlock ( ",", R"("buckets":{)", "}" );
- else
- tOut.StartBlock ( ",", R"("buckets":[)", "]" );
- // might be null for empty result set
- if ( bHasKey && pCount )
- {
- JsonEscapedBuilder tPrefixBucketBlock;
- JsonEscapedBuilder tBufMatch;
- for ( const CSphMatch & tMatch : dMatches )
- {
- RangeKeyDesc_t * pRange = nullptr;
- if ( tAggr.m_eAggrFunc==Aggr_e::RANGE || tAggr.m_eAggrFunc==Aggr_e::DATE_RANGE )
- {
- int iBucket = tMatch.GetAttr ( tKey.m_pKey->m_tLocator );
- pRange = tKey.m_tRangeNames ( iBucket );
- // lets skip bucket with out of ranges index, ie _all
- if ( !pRange )
- continue;
- }
- // bucket item is array item or dict item
- const char * sBucketPrefix = GetBucketPrefix ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, tPrefixBucketBlock );
- ScopedComma_c sBucketBlock ( tOut, ",", sBucketPrefix, "}" );
- PrintKey ( tKey, tAggr.m_eAggrFunc, pRange, tMatch, eFormat, hDatetime, tBufMatch, tOut );
- JsonObjAddAttr ( tOut, pCount->m_eAttrType, "doc_count", tMatch, pCount->m_tLocator );
- // FIXME!!! add support
- if ( tAggr.m_eAggrFunc==Aggr_e::SIGNIFICANT )
- {
- tOut.Sprintf ( R"("score":0.001)" );
- JsonObjAddAttr ( tOut, pCount->m_eAttrType, "bg_count", tMatch, pCount->m_tLocator );
- }
- if ( pDistinct )
- JsonObjAddAttr ( tOut, pDistinct->m_eAttrType, pDistinct->m_sName.cstr(), tMatch, pDistinct->m_tLocator );
- }
- }
-
- tOut.FinishBlock ( false ); // buckets array
- } else
- {
- if ( bHasKey && pCount && dMatches.GetLength() )
- {
- const CSphMatch & tMatch = dMatches[0];
- JsonObjAddAttr ( tOut, tKey.m_pKey->m_eAttrType, "value", tMatch, tKey.m_pKey->m_tLocator );
- }
- }
- tOut.FinishBlock ( false ); // named bucket obj
- }
- void JsonRenderAccessSpecs ( JsonEscapedBuilder & tRes, const bson::Bson_c & tBson, bool bWithZones )
- {
- using namespace bson;
- {
- ScopedComma_c sFieldsArray ( tRes, ",", "\"fields\":[", "]" );
- Bson_c ( tBson.ChildByName ( SZ_FIELDS ) ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
- tRes.AppendEscapedWithComma ( String ( tNode ).cstr() );
- } );
- }
- int iPos = (int)Int ( tBson.ChildByName ( SZ_MAX_FIELD_POS ) );
- if ( iPos )
- tRes.Sprintf ( "\"max_field_pos\":%d", iPos );
- if ( !bWithZones )
- return;
- auto tZones = tBson.GetFirstOf ( { SZ_ZONES, SZ_ZONESPANS } );
- ScopedComma_c dZoneDelim ( tRes, ", ", ( tZones.first==1 ) ? "\"zonespans\":[" : "\"zones\":[", "]" );
- Bson_c ( tZones.second ).ForEach ( [&tRes] ( const NodeHandle_t & tNode ) {
- tRes << String ( tNode );
- } );
- }
- bool JsonRenderKeywordNode ( JsonEscapedBuilder & tRes, const bson::Bson_c& tBson )
- {
- using namespace bson;
- auto tWord = tBson.ChildByName ( SZ_WORD );
- if ( IsNullNode ( tWord ) )
- return false;
- ScopedComma_c sRoot ( tRes.Object() );
- tRes << R"("type":"KEYWORD")";
- tRes << "\"word\":";
- tRes.AppendEscapedSkippingComma ( String ( tWord ).cstr () );
- tRes.Sprintf ( R"("querypos":%d)", Int ( tBson.ChildByName ( SZ_QUERYPOS ) ) );
- if ( Bool ( tBson.ChildByName ( SZ_EXCLUDED ) ) )
- tRes << R"("excluded":true)";
- if ( Bool ( tBson.ChildByName ( SZ_EXPANDED ) ) )
- tRes << R"("expanded":true)";
- if ( Bool ( tBson.ChildByName ( SZ_FIELD_START ) ) )
- tRes << R"("field_start":true)";
- if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
- tRes << R"("field_end":true)";
- if ( Bool ( tBson.ChildByName ( SZ_FIELD_END ) ) )
- tRes << R"("morphed":true)";
- auto tBoost = tBson.ChildByName ( SZ_BOOST );
- if ( !IsNullNode ( tBoost ) )
- {
- auto fBoost = Double ( tBoost );
- if ( fBoost!=1.0f ) // really comparing floats?
- tRes.Sprintf ( R"("boost":%f)", fBoost );
- }
- return true;
- }
- void FormatJsonPlanFromBson ( JsonEscapedBuilder& tOut, bson::NodeHandle_t dBson, PLAN_FLAVOUR ePlanFlavour )
- {
- using namespace bson;
- if ( dBson==nullnode )
- return;
- if ( ePlanFlavour == PLAN_FLAVOUR::EDESCR )
- {
- auto dRootBlock = tOut.ObjectBlock();
- tOut << "\"description\":";
- tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr() );
- tOut.FinishBlocks ( dRootBlock );
- return;
- }
- Bson_c tBson ( dBson );
- if ( JsonRenderKeywordNode ( tOut, tBson) )
- return;
- auto dRootBlock = tOut.ObjectBlock();
- tOut << "\"type\":";
- tOut.AppendEscapedSkippingComma ( String ( tBson.ChildByName ( SZ_TYPE ) ).cstr() );
- if ( ePlanFlavour==PLAN_FLAVOUR::EBOTH )
- {
- tOut << "\"description\":";
- tOut.AppendEscapedSkippingComma ( sph::RenderBsonPlanBrief ( dBson ).cstr () );
- }
- Bson_c ( tBson.ChildByName ( SZ_OPTIONS ) ).ForEach ( [&tOut] ( CSphString&& sName, const NodeHandle_t & tNode ) {
- tOut.Sprintf ( R"("options":"%s=%d")", sName.cstr (), (int) Int ( tNode ) );
- } );
- JsonRenderAccessSpecs ( tOut, dBson, true );
- tOut.StartBlock ( ",", "\"children\":[", "]" );
- Bson_c ( tBson.ChildByName ( SZ_CHILDREN ) ).ForEach ( [&] ( const NodeHandle_t & tNode ) {
- FormatJsonPlanFromBson ( tOut, tNode, ePlanFlavour );
- } );
- tOut.FinishBlocks ( dRootBlock );
- }
- } // static
- CSphString JsonEncodeResultError ( const CSphString & sError, int iStatus )
- {
- JsonEscapedBuilder tOut;
- CSphString sResult;
- tOut.StartBlock ( ",", "{ \"error\":", "}" );
- tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
- tOut.AppendName ( "status" );
- tOut << iStatus;
- tOut.FinishBlock ( false );
- tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
- return sResult;
- }
- static CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType=nullptr, int * pStatus=nullptr, const char * sIndex=nullptr )
- {
- JsonEscapedBuilder tOut;
- CSphString sResult;
- tOut.StartBlock ( ",", "{", "}" );
- tOut.StartBlock ( ",", R"("error":{)", "}" );
- tOut.AppendName ( "type" );
- tOut.AppendEscaped ( ( sErrorType ? sErrorType : "Error" ), EscBld::eEscape );
- tOut.AppendName ( "reason" );
- tOut.AppendEscaped ( sError.cstr(), EscBld::eEscape );
- if ( sIndex )
- {
- tOut.AppendName ( "table" );
- tOut.AppendEscaped ( sIndex, EscBld::eEscape );
- }
- tOut.FinishBlock ( false );
- if ( pStatus )
- {
- tOut.AppendName ( "status" );
- tOut << *pStatus;
- }
- tOut.FinishBlock ( false );
- tOut.MoveTo ( sResult ); // since simple return tOut.cstr() will cause copy of string, then returning it.
- return sResult;
- }
- CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus )
- {
- return JsonEncodeResultError ( sError, sErrorType, &iStatus );
- }
- CSphString JsonEncodeResultError ( const CSphString & sError, const char * sErrorType, int iStatus, const char * sIndex )
- {
- return JsonEncodeResultError ( sError, sErrorType, &iStatus, sIndex );
- }
- CSphString HandleShowProfile ( const QueryProfile_c& p )
- {
- #define SPH_QUERY_STATE( _name, _desc ) _desc,
- static const char* dStates[SPH_QSTATE_TOTAL] = { SPH_QUERY_STATES };
- #undef SPH_QUERY_STATES
- JsonEscapedBuilder sProfile;
- int64_t tmTotal = 0;
- int iCount = 0;
- for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
- {
- if ( p.m_dSwitches[i] <= 0 )
- continue;
- tmTotal += p.m_tmTotal[i];
- iCount += p.m_dSwitches[i];
- }
- {
- auto arrayw = sProfile.ArrayW();
- for ( int i = 0; i < SPH_QSTATE_TOTAL; ++i )
- {
- if ( p.m_dSwitches[i] <= 0 )
- continue;
- auto _ = sProfile.ObjectW();
- sProfile.NamedString ( "status", dStates[i] );
- sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( p.m_tmTotal[i] ) );
- sProfile.NamedVal ( "switches", p.m_dSwitches[i] );
- sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( p.m_tmTotal[i], tmTotal, 2 ) ) );
- }
- {
- auto _ = sProfile.ObjectW();
- sProfile.NamedString ( "status", "total" );
- sProfile.NamedVal ( "duration", FixedFrac_T<int64_t, 6> ( tmTotal ) );
- sProfile.NamedVal ( "switches", iCount );
- sProfile.NamedVal ( "percent", FixedFrac_T<int64_t, 2> ( PercentOf ( tmTotal, tmTotal, 2 ) ) );
- }
- }
- return (CSphString)sProfile;
- }
- static void AddJoinedWeight ( JsonEscapedBuilder & tOut, const CSphQuery & tQuery, const CSphMatch & tMatch, const CSphColumnInfo * pJoinedWeightAttr )
- {
- if ( !pJoinedWeightAttr )
- return;
- tOut.Sprintf ( R"("%s._score":%d)", tQuery.m_sJoinIdx.cstr(), (int)tMatch.GetAttr ( pJoinedWeightAttr->m_tLocator ) );
- }
- CSphString sphEncodeResultJson ( const VecTraits_T<AggrResult_t>& dRes, const JsonQuery_c & tQuery, QueryProfile_c * pProfile, ResultSetFormat_e eFormat )
- {
- assert ( dRes.GetLength()>=1 );
- const AggrResult_t & tRes = dRes[0];
- if ( !tRes.m_iSuccesses )
- return JsonEncodeResultError ( tRes.m_sError );
- JsonEscapedBuilder tOut;
- CSphString sResult;
- tOut.ObjectBlock();
- tOut.Sprintf (R"("took":%d,"timed_out":false)", tRes.m_iQueryTime);
- if ( !tRes.m_sWarning.IsEmpty() )
- {
- tOut.StartBlock ( nullptr, R"("warning":{"reason":)", "}" );
- tOut.AppendEscapedWithComma ( tRes.m_sWarning.cstr () );
- tOut.FinishBlock ( false );
- }
- if ( eFormat==ResultSetFormat_e::ES )
- tOut += R"("_shards":{ "total": 1, "successful": 1, "skipped": 0, "failed": 0 })";
- auto sHitMeta = tOut.StartBlock ( ",", R"("hits":{)", "}" );
- tOut.Sprintf ( R"("total":%d)", tRes.m_iTotalMatches );
- tOut.Sprintf ( R"("total_relation":%s)", tRes.m_bTotalMatchesApprox ? R"("gte")" : R"("eq")" );
- if ( eFormat==ResultSetFormat_e::ES )
- tOut += R"("max_score": null)";
- const ISphSchema & tSchema = tRes.m_tSchema;
- CSphVector<BYTE> dTmp;
- CSphBitvec tAttrsToSend;
- sphGetAttrsToSend ( tSchema, false, true, tAttrsToSend );
- const CSphColumnInfo * pJoinedWeightAttr = tQuery.m_sJoinIdx.IsEmpty() ? nullptr : tSchema.GetAttr ( GetJoinedWeightName(tQuery).cstr() );
- int iHighlightAttr = -1;
- int nSchemaAttrs = tSchema.GetAttrsCount();
- CSphBitvec dSkipAttrs ( nSchemaAttrs );
- for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
- {
- if ( !tAttrsToSend.BitGet(iAttr) )
- continue;
- const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
- const CSphString & sName = tCol.m_sName;
- if ( IsHighlightAttr ( sName ) )
- iHighlightAttr = iAttr;
- if ( NeedToSkipAttr ( sName, tQuery ) )
- dSkipAttrs.BitSet ( iAttr );
- if ( eFormat==ResultSetFormat_e::ES && tCol.m_eAttrType==SPH_ATTR_TOKENCOUNT )
- dSkipAttrs.BitSet ( iAttr );
- }
- tOut.StartBlock ( ",", R"("hits":[)", "]" );
- if ( !tQuery.m_bGroupEmulation )
- {
- const CSphColumnInfo * pId = tSchema.GetAttr ( sphGetDocidName() );
- const CSphColumnInfo * pKNNDist = tSchema.GetAttr ( GetKnnDistAttrName() );
- bool bCompatId = false;
- const CSphColumnInfo * pCompatRaw = nullptr;
- const CSphColumnInfo * pCompatVer = nullptr;
- if ( eFormat==ResultSetFormat_e::ES )
- {
- const CSphColumnInfo * pCompatId = tSchema.GetAttr ( "_id" );
- if ( pCompatId )
- {
- bCompatId = true;
- pId = pCompatId;
- }
- pCompatRaw = tSchema.GetAttr ( "_raw" );
- pCompatVer = tSchema.GetAttr ( "_version" );
- }
- bool bTag = tRes.m_bTagsAssigned;
- int iTag = ( bTag ? 0 : tRes.m_dResults.First().m_iTag );
- auto dMatches = tRes.m_dResults.First ().m_dMatches.Slice ( tRes.m_iOffset, tRes.m_iCount );
- for ( const auto & tMatch : dMatches )
- {
- ScopedComma_c sQueryComma ( tOut, ",", "{", "}" );
- // note, that originally there is string UID, so we just output number in quotes for docid here
- // number in quotes in compat mode or just number for _id
- if ( bCompatId || ( eFormat==ResultSetFormat_e::ES ) )
- {
- DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
- tOut.Sprintf ( R"("_id":"%llu","_score":%d)", tDocID, tMatch.m_iWeight );
- }
- else if ( pId )
- {
- DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
- tOut.Sprintf ( R"("_id":%U,"_score":%d)", tDocID, tMatch.m_iWeight );
- }
- else
- tOut.Sprintf ( R"("_score":%d)", tMatch.m_iWeight );
- AddJoinedWeight ( tOut, tQuery, tMatch, pJoinedWeightAttr );
- if ( eFormat==ResultSetFormat_e::ES )
- {
- tOut.Sprintf ( R"("_index":"%s")", tRes.m_dIndexNames[bTag ? tMatch.m_iTag : iTag].scstr() ); // FIXME!!! breaks for multiple indexes
- tOut += R"("_type": "doc")";
- if ( pCompatVer )
- JsonObjAddAttr ( tOut, pCompatVer->m_eAttrType, "_version", tMatch, pCompatVer->m_tLocator );
- else
- tOut += R"("_version": 1)";
- }
- if ( pKNNDist )
- tOut.Sprintf( R"("_knn_dist":%f)", tMatch.GetAttrFloat ( pKNNDist->m_tLocator ) );
- tOut.StartBlock ( ",", "\"_source\":{", "}");
- if ( pCompatRaw )
- JsonObjAddAttr ( tOut, pCompatRaw->m_eAttrType, "_raw", tMatch, pCompatRaw->m_tLocator );
- else
- for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
- {
- if ( !tAttrsToSend.BitGet(iAttr) )
- continue;
- if ( dSkipAttrs.BitGet ( iAttr ) )
- continue;
- const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
- JsonObjAddAttr ( tOut, tCol.m_eAttrType, tCol.m_sName.cstr(), tMatch, tCol.m_tLocator );
- }
- tOut.FinishBlock ( false ); // _source obj
- if ( iHighlightAttr!=-1 )
- EncodeHighlight ( tMatch, iHighlightAttr, tSchema, tOut );
- if ( eFormat==ResultSetFormat_e::ES )
- {
- if ( tQuery.m_dDocFields.GetLength() )
- EncodeFields ( tQuery.m_dDocFields, tRes, tMatch, tSchema, false, R"("fields":{)", "}", tOut );
- if ( tQuery.m_dSortFields.GetLength() )
- EncodeFields ( tQuery.m_dSortFields, tRes, tMatch, tSchema, true, R"("sort":[)", "]", tOut );
- }
- }
- }
- tOut.FinishBlocks ( sHitMeta, false ); // hits array, hits meta
- if ( tQuery.m_bGroupEmulation || dRes.GetLength()>1 )
- {
- sph::StringSet hDatetime;
- if ( eFormat==ResultSetFormat_e::ES )
- {
- tQuery.m_dDocFields.for_each ( [&hDatetime]( const auto & tDocfield )
- {
- if ( tDocfield.m_bDateTime )
- hDatetime.Add ( tDocfield.m_sName );
- });
- }
- CSphString sDistinctName;
- tQuery.m_dItems.any_of ( [&]( const CSphQueryItem & tItem ) {
- if ( tItem.m_sExpr=="@distinct" )
- {
- sDistinctName = tItem.m_sAlias;
- return true;
- }
- return false;
- });
- if ( tQuery.m_bGroupEmulation )
- {
- tOut.StartBlock ( ",", R"("aggregations":{)", "}");
- EncodeAggr ( tQuery.m_dAggs[0], 1, dRes[0], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
- tOut.FinishBlock ( false ); // aggregations obj
- } else
- {
- assert ( dRes.GetLength()==tQuery.m_dAggs.GetLength()+1 );
- tOut.StartBlock ( ",", R"("aggregations":{)", "}");
- ARRAY_FOREACH ( i, tQuery.m_dAggs )
- EncodeAggr ( tQuery.m_dAggs[i], i, dRes[i+1], eFormat, hDatetime, tQuery.m_iNow, sDistinctName, tOut );
- tOut.FinishBlock ( false ); // aggregations obj
- }
- }
- CSphString sScroll;
- if ( dRes.GetLength() && FormatScrollSettings ( dRes.Last(), tQuery, sScroll ) )
- tOut.Sprintf ( R"("scroll":"%s")", sScroll.cstr() );
- if ( eFormat==ResultSetFormat_e::ES )
- tOut += R"("status": 200)";
- if ( pProfile && pProfile->m_bNeedProfile )
- {
- auto sProfile = HandleShowProfile ( *pProfile );
- tOut.Sprintf ( R"("profile":{"query":%s})", sProfile.cstr () );
- }
- if ( pProfile && pProfile->m_eNeedPlan != PLAN_FLAVOUR::ENONE )
- {
- JsonEscapedBuilder sPlan;
- FormatJsonPlanFromBson ( sPlan, bson::MakeHandle ( pProfile->m_dPlan ), pProfile->m_eNeedPlan );
- if ( sPlan.IsEmpty() )
- tOut << R"("plan":null)";
- else
- tOut.Sprintf ( R"("plan":{"query":%s})", sPlan.cstr() );
- }
- tOut.FinishBlocks (); tOut.MoveTo ( sResult ); return sResult;
- }
- JsonObj_c sphEncodeInsertResultJson ( const char * szIndex, bool bReplace, DocID_t tDocId, ResultSetFormat_e eFormat )
- {
- JsonObj_c tObj;
- tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
- tObj.AddUint ( "id", tDocId );
- tObj.AddBool ( "created", !bReplace );
- tObj.AddStr ( "result", bReplace ? "updated" : "created" );
- tObj.AddInt ( "status", bReplace ? 200 : 201 );
- return tObj;
- }
- JsonObj_c sphEncodeTxnResultJson ( const char* szIndex, DocID_t tDocId, int iInserts, int iDeletes, int iUpdates, ResultSetFormat_e eFormat )
- {
- JsonObj_c tObj;
- tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
- tObj.AddInt ( "_id", tDocId );
- tObj.AddInt ( "created", iInserts );
- tObj.AddInt ( "deleted", iDeletes );
- tObj.AddInt ( "updated", iUpdates );
- bool bReplaced = (iInserts!=0 && iDeletes!=0);
- tObj.AddStr ( "result", bReplaced ? "updated" : "created" );
- tObj.AddInt ( "status", bReplaced ? 200 : 201 );
- return tObj;
- }
- JsonObj_c sphEncodeUpdateResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
- {
- JsonObj_c tObj;
- tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
- if ( !tDocId )
- tObj.AddInt ( "updated", iAffected );
- else
- {
- tObj.AddInt ( "id", tDocId );
- tObj.AddStr ( "result", iAffected ? "updated" : "noop" );
- }
- return tObj;
- }
- JsonObj_c sphEncodeDeleteResultJson ( const char * szIndex, DocID_t tDocId, int iAffected, ResultSetFormat_e eFormat )
- {
- JsonObj_c tObj;
- tObj.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
- if ( !tDocId )
- tObj.AddInt ( "deleted", iAffected );
- else
- {
- tObj.AddInt ( "id", tDocId );
- tObj.AddBool ( "found", !!iAffected );
- tObj.AddStr ( "result", iAffected ? "deleted" : "not found" );
- }
- return tObj;
- }
- JsonObj_c sphEncodeInsertErrorJson ( const char * szIndex, const char * szError, ResultSetFormat_e eFormat )
- {
- JsonObj_c tObj, tErr;
- tErr.AddStr ( "type", szError );
- tErr.AddStr ( ( eFormat==ResultSetFormat_e::ES ? "_index" : "table" ), szIndex );
- tObj.AddItem ( "error", tErr );
- tObj.AddInt ( "status", HttpGetStatusCodes ( EHTTP_STATUS::_409 ) );
- return tObj;
- }
- bool sphGetResultStats ( const char * szResult, int & iAffected, int & iWarnings, bool bUpdate )
- {
- JsonObj_c tJsonRoot ( szResult );
- if ( !tJsonRoot )
- return false;
- // no warnings in json results for now
- iWarnings = 0;
- if ( tJsonRoot.HasItem("error") )
- {
- iAffected = 0;
- return true;
- }
- // its either update or delete
- CSphString sError;
- JsonObj_c tAffected = tJsonRoot.GetIntItem ( bUpdate ? "updated" : "deleted", sError );
- if ( tAffected )
- {
- iAffected = (int)tAffected.IntVal();
- return true;
- }
- // it was probably a query with an "id"
- JsonObj_c tId = tJsonRoot.GetIntItem ( "id", sError );
- if ( tId )
- {
- iAffected = 1;
- return true;
- }
- return false;
- }
- //////////////////////////////////////////////////////////////////////////
- // Highlight
- static void FormatSnippetOpts ( const CSphString & sQuery, const SnippetQuerySettings_t & tSnippetQuery, CSphQuery & tQuery )
- {
- StringBuilder_c sItem;
- sItem << "HIGHLIGHT(";
- sItem << tSnippetQuery.AsString();
- sItem << ",";
- auto & hFieldHash = tSnippetQuery.m_hPerFieldLimits;
- if ( tSnippetQuery.m_hPerFieldLimits.GetLength() )
- {
- sItem.StartBlock ( ",", "'", "'" );
- for ( const auto& tField : hFieldHash )
- sItem << tField.first;
- sItem.FinishBlock(false);
- }
- else
- sItem << "''";
- if ( !sQuery.IsEmpty() )
- sItem.Appendf ( ",'%s'", sQuery.cstr() );
- sItem << ")";
- CSphQueryItem & tItem = tQuery.m_dItems.Add();
- tItem.m_sExpr = sItem.cstr ();
- tItem.m_sAlias.SetSprintf ( "%s", g_szHighlight );
- }
- static bool ParseFieldsArray ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
- {
- for ( const auto & tField : tFields )
- {
- if ( !tField.IsStr() )
- {
- sError.SetSprintf ( "\"%s\" field should be an string", tField.Name() );
- return false;
- }
- SnippetLimits_t tDefault;
- tSettings.m_hPerFieldLimits.Add( tDefault, tField.StrVal() );
- }
- return true;
- }
- static bool ParseSnippetLimitsElastic ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
- {
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "fragment_size", sError, true ) ) return false;
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "number_of_fragments", sError, true ) ) return false;
- return true;
- }
- static bool ParseSnippetLimitsSphinx ( const JsonObj_c & tSnip, SnippetLimits_t & tLimits, CSphString & sError )
- {
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimit, "limit", sError, true ) ) return false;
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_passages", sError, true ) ) return false;
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimitPassages, "limit_snippets", sError, true ) ) return false;
- if ( !tSnip.FetchIntItem ( tLimits.m_iLimitWords, "limit_words", sError, true ) ) return false;
- return true;
- }
- static bool ParseFieldsObject ( const JsonObj_c & tFields, SnippetQuerySettings_t & tSettings, CSphString & sError )
- {
- for ( const auto & tField : tFields )
- {
- if ( !tField.IsObj() )
- {
- sError.SetSprintf ( "\"%s\" field should be an object", tField.Name() );
- return false;
- }
- SnippetLimits_t & tLimits = tSettings.m_hPerFieldLimits.AddUnique ( tField.Name() );
- if ( !ParseSnippetLimitsElastic ( tField, tLimits, sError ) )
- return false;
- if ( !ParseSnippetLimitsSphinx ( tField, tLimits, sError ) )
- return false;
- }
- return true;
- }
- static bool ParseSnippetFields ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tSettings, CSphString & sError )
- {
- JsonObj_c tFields = tSnip.GetItem("fields");
- if ( !tFields )
- return true;
- if ( tFields.IsArray() )
- return ParseFieldsArray ( tFields, tSettings, sError );
- if ( tFields.IsObj() )
- return ParseFieldsObject ( tFields, tSettings, sError );
- sError = R"("fields" property value should be an array or an object)";
- return false;
- }
- static bool FetchTags ( const char * sName, const JsonObj_c & tSnip, CSphString & sVal, CSphString & sError )
- {
- JsonObj_c tTag = tSnip.GetItem ( sName );
- if ( !tTag )
- return true;
- if ( tTag.IsStr() )
- {
- sVal = tTag.StrVal();
- return true;
- }
- if ( tTag.IsArray() )
- {
- if ( tTag.Size() )
- sVal = tTag[0].StrVal();
- return true;
- }
- sError.SetSprintf ( R"("%s" property value should be an array or sting)", sName );
- return false;
- }
- static bool ParseSnippetOptsElastic ( const JsonObj_c & tSnip, CSphString & sQuery, SnippetQuerySettings_t & tQuery, CSphString & sError )
- {
- JsonObj_c tEncoder = tSnip.GetStrItem ( "encoder", sError, true );
- if ( tEncoder )
- {
- if ( tEncoder.StrVal()=="html" )
- tQuery.m_sStripMode = "retain";
- }
- else if ( !sError.IsEmpty() )
- return false;
- JsonObj_c tHlQuery = tSnip.GetObjItem ( "highlight_query", sError, true );
- if ( tHlQuery )
- sQuery = tHlQuery.AsString();
- else if ( !sError.IsEmpty() )
- return false;
- if ( !FetchTags ( "pre_tags", tSnip, tQuery.m_sBeforeMatch, sError ) ) return false;
- if ( !FetchTags ( "post_tags", tSnip, tQuery.m_sAfterMatch, sError ) ) return false;
- JsonObj_c tNoMatchSize = tSnip.GetItem ( "no_match_size" );
- if ( tNoMatchSize )
- {
- int iNoMatch = 0;
- if ( !tSnip.FetchIntItem ( iNoMatch, "no_match_size", sError, true ) )
- return false;
- tQuery.m_bAllowEmpty = iNoMatch<1;
- }
- JsonObj_c tOrder = tSnip.GetStrItem ( "order", sError, true );
- if ( tOrder )
- tQuery.m_bWeightOrder = tOrder.StrVal()=="score";
- else if ( !sError.IsEmpty() )
- return false;
- if ( !ParseSnippetLimitsElastic ( tSnip, tQuery, sError ) )
- return false;
- return true;
- }
- static bool ParseSnippetOptsSphinx ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tOpt, CSphString & sError )
- {
- if ( !ParseSnippetLimitsSphinx ( tSnip, tOpt, sError ) )
- return false;
- if ( !tSnip.FetchStrItem ( tOpt.m_sBeforeMatch, "before_match", sError, true ) ) return false;
- if ( !tSnip.FetchStrItem ( tOpt.m_sAfterMatch, "after_match", sError, true ) ) return false;
- if ( !tSnip.FetchIntItem ( tOpt.m_iAround, "around", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bUseBoundaries, "use_boundaries", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bWeightOrder, "weight_order", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bForceAllWords, "force_all_words", sError, true ) ) return false;
- if ( !tSnip.FetchStrItem ( tOpt.m_sStripMode, "html_strip_mode", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bAllowEmpty, "allow_empty", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bEmitZones, "emit_zones", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_passages", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_snippets", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bPackFields, "pack_fields", sError, true ) ) return false;
- if ( !tSnip.FetchBoolItem ( tOpt.m_bLimitsPerField, "limits_per_field", sError, true ) )return false;
- JsonObj_c tBoundary = tSnip.GetStrItem ( "passage_boundary", "snippet_boundary", sError );
- if ( tBoundary )
- tOpt.m_ePassageSPZ = GetPassageBoundary ( tBoundary.StrVal() );
- else if ( !sError.IsEmpty() )
- return false;
- return true;
- }
- static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError )
- {
- CSphString sQuery;
- SnippetQuerySettings_t tSettings;
- tSettings.m_bJsonQuery = true;
- tSettings.m_bPackFields = true;
- if ( !ParseSnippetFields ( tSnip, tSettings, sError ) )
- return false;
- // elastic-style options
- if ( !ParseSnippetOptsElastic ( tSnip, sQuery, tSettings, sError ) )
- return false;
-
- // sphinx-style options
- if ( !ParseSnippetOptsSphinx ( tSnip, tSettings, sError ) )
- return false;
- FormatSnippetOpts ( sQuery, tSettings, tQuery );
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- // Sort
- struct SortField_t : public GeoDistInfo_c
- {
- CSphString m_sName;
- CSphString m_sMode;
- bool m_bAsc {true};
- };
- static void FormatSortBy ( const CSphVector<SortField_t> & dSort, JsonQuery_c & tQuery, bool & bGotWeight )
- {
- StringBuilder_c sSortBuf;
- Comma_c sComma ({", ",2});
- for ( const SortField_t &tItem : dSort )
- {
- const char * sSort = ( tItem.m_bAsc ? " asc" : " desc" );
- if ( tItem.IsGeoDist() )
- {
- // ORDER BY statement
- sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
- // query item
- CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
- tQueryItem.m_sExpr = tItem.BuildExprString();
- tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
- // select list
- StringBuilder_c sTmp;
- sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
- sTmp.MoveTo ( tQuery.m_sSelect );
- } else if ( tItem.m_sMode.IsEmpty() )
- {
- const char * sName = tItem.m_sName.cstr();
- if ( tItem.m_sName=="_score" )
- sName = "@weight";
- else if ( tItem.m_sName=="_count" )
- sName = "count(*)";
- // sort by attribute or weight
- sSortBuf << sComma << sName << sSort;
- bGotWeight |= ( tItem.m_sName=="_score" );
- } else
- {
- // sort by MVA
- // ORDER BY statement
- sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
- // query item
- StringBuilder_c sTmp;
- sTmp << ( tItem.m_sMode=="min" ? "least" : "greatest" ) << "(" << tItem.m_sName << ")";
- CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
- sTmp.MoveTo (tQueryItem.m_sExpr);
- tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
- // select list
- sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
- sTmp.MoveTo ( tQuery.m_sSelect );
- }
- tQuery.m_dSortFields.Add ( tItem.m_sName );
- }
- if ( !dSort.GetLength() )
- {
- sSortBuf += "@weight desc";
- bGotWeight = true;
- }
- tQuery.m_eSort = SPH_SORT_EXTENDED;
- sSortBuf.MoveTo ( tQuery.m_sSortBy );
- }
- static bool ParseSortObj ( const JsonObj_c & tSortItem, CSphVector<SortField_t> & dSort, CSphString & sError, CSphString & sWarning )
- {
- bool bSortString = tSortItem.IsStr();
- bool bSortObj = tSortItem.IsObj();
- CSphString sSortName = tSortItem.Name();
- if ( ( !bSortString && !bSortObj ) || !tSortItem.Name() || ( bSortString && !tSortItem.SzVal() ) )
- {
- sError.SetSprintf ( R"("sort" property 0("%s") should be %s)", sSortName.scstr(), ( bSortObj ? "a string" : "an object" ) );
- return false;
- }
- // [ { "attr_name" : "sort_mode" } ]
- if ( bSortString )
- {
- CSphString sOrder = tSortItem.StrVal();
- if ( sOrder!="asc" && sOrder!="desc" )
- {
- sError.SetSprintf ( R"("sort" property "%s" order is invalid %s)", sSortName.scstr(), sOrder.cstr() );
- return false;
- }
- SortField_t & tAscItem = dSort.Add();
- tAscItem.m_sName = sSortName;
- tAscItem.m_bAsc = ( sOrder=="asc" );
- return true;
- }
- // [ { "attr_name" : { "order" : "sort_mode" } } ]
- SortField_t & tSortField = dSort.Add();
- tSortField.m_sName = sSortName;
- JsonObj_c tAttrItems = tSortItem.GetItem("order");
- if ( tAttrItems )
- {
- if ( !tAttrItems.IsStr() )
- {
- sError.SetSprintf ( R"("sort" property "%s" order is invalid)", tAttrItems.Name() );
- return false;
- }
- CSphString sOrder = tAttrItems.StrVal();
- tSortField.m_bAsc = ( sOrder=="asc" );
- }
- JsonObj_c tMode = tSortItem.GetItem("mode");
- if ( tMode )
- {
- if ( tAttrItems && !tMode.IsStr() )
- {
- sError.SetSprintf ( R"("mode" property "%s" order is invalid)", tAttrItems.Name() );
- return false;
- }
- CSphString sMode = tMode.StrVal();
- if ( sMode!="min" && sMode!="max" )
- {
- sError.SetSprintf ( R"("mode" supported are "min" and "max", got "%s", not supported)", sMode.cstr() );
- return false;
- }
- tSortField.m_sMode = sMode;
- }
- // geodist
- if ( tSortField.m_sName=="_geo_distance" )
- {
- if ( tMode )
- {
- sError = R"("mode" property not supported with "_geo_distance")";
- return false;
- }
- if ( tSortItem.HasItem("unit") )
- {
- sError = R"("unit" property not supported with "_geo_distance")";
- return false;
- }
- if ( !tSortField.Parse ( tSortItem, false, sError, sWarning ) )
- return false;
- }
- // FXIME!!! "unmapped_type" should be replaced with expression EXIST
- // unsupported options
- const char * dUnsupported[] = { "missing", "nested_path", "nested_filter"};
- for ( auto szOption : dUnsupported )
- {
- if ( tSortItem.HasItem(szOption) )
- {
- sError.SetSprintf ( R"("%s" property not supported)", szOption );
- return false;
- }
- }
- return true;
- }
- static bool ParseSort ( const JsonObj_c & tSort, JsonQuery_c & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning )
- {
- bGotWeight = false;
- // unsupported options
- if ( tSort.HasItem("_script") )
- {
- sError = "\"_script\" property not supported";
- return false;
- }
- CSphVector<SortField_t> dSort;
- dSort.Reserve ( tSort.Size() );
- if ( tSort.IsObj() )
- {
- if ( !ParseSortObj ( tSort[0], dSort, sError, sWarning ) )
- return false;
- } else
- {
- for ( const auto & tItem : tSort )
- {
- CSphString sName = tItem.Name();
- bool bString = tItem.IsStr();
- bool bObj = tItem.IsObj();
- if ( !bString && !bObj )
- {
- sError.SetSprintf ( R"("sort" property "%s" should be a string or an object)", sName.scstr() );
- return false;
- }
- if ( bObj && tItem.Size()!=1 )
- {
- sError.SetSprintf ( R"("sort" property "%s" should be an object)", sName.scstr() );
- return false;
- }
- // [ "attr_name" ]
- if ( bString )
- {
- SortField_t & tSortField = dSort.Add();
- tSortField.m_sName = tItem.StrVal();
- // order defaults to desc when sorting on the _score, and defaults to asc when sorting on anything else
- tSortField.m_bAsc = ( tSortField.m_sName!="_score" );
- continue;
- }
- JsonObj_c tSortItem = tItem[0];
- if ( !tSortItem )
- {
- sError = R"(invalid "sort" property item)";
- return false;
- }
- if ( !ParseSortObj ( tSortItem, dSort, sError, sWarning ) )
- return false;
- }
- }
- FormatSortBy ( dSort, tQuery, bGotWeight );
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- // _source / select list
- static bool ParseStringArray ( const JsonObj_c & tArray, const char * szProp, StrVec_t & dItems, CSphString & sError )
- {
- for ( const auto & tItem : tArray )
- {
- if ( !tItem.IsStr() )
- {
- sError.SetSprintf ( R"("%s" property should be a string)", szProp );
- return false;
- }
- dItems.Add ( tItem.StrVal() );
- }
- return true;
- }
- static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError )
- {
- bool bString = tSelect.IsStr();
- bool bArray = tSelect.IsArray();
- bool bObj = tSelect.IsObj();
- if ( !bString && !bArray && !bObj )
- {
- sError = R"("_source" property should be a string or an array or an object)";
- return false;
- }
- if ( bString )
- {
- tQuery.m_dIncludeItems.Add ( tSelect.StrVal() );
- if ( tQuery.m_dIncludeItems[0]=="*" || tQuery.m_dIncludeItems[0].IsEmpty() )
- tQuery.m_dIncludeItems.Reset();
- return true;
- }
- if ( bArray )
- return ParseStringArray ( tSelect, R"("_source")", tQuery.m_dIncludeItems, sError );
- assert ( bObj );
- // includes part of _source object
- JsonObj_c tInclude = tSelect.GetArrayItem ( "includes", sError, true );
- if ( tInclude )
- {
- if ( !ParseStringArray ( tInclude, R"("_source" "includes")", tQuery.m_dIncludeItems, sError ) )
- return false;
- if ( tQuery.m_dIncludeItems.GetLength()==1 && tQuery.m_dIncludeItems[0]=="*" )
- tQuery.m_dIncludeItems.Reset();
- } else if ( !sError.IsEmpty() )
- return false;
- // excludes part of _source object
- JsonObj_c tExclude = tSelect.GetArrayItem ( "excludes", sError, true );
- if ( tExclude )
- {
- if ( !ParseStringArray ( tExclude, R"("_source" "excludes")", tQuery.m_dExcludeItems, sError ) )
- return false;
- } else if ( !sError.IsEmpty() )
- return false;
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- // script_fields / expressions
- static bool ParseScriptFields ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
- {
- if ( !tExpr )
- return true;
- if ( !tExpr.IsObj() )
- {
- sError = R"("script_fields" property should be an object)";
- return false;
- }
- StringBuilder_c sSelect;
- sSelect << tQuery.m_sSelect;
- for ( const auto & tAlias : tExpr )
- {
- if ( !tAlias.IsObj() )
- {
- sError = R"("script_fields" properties should be objects)";
- return false;
- }
- if ( CSphString ( tAlias.Name() ).IsEmpty() )
- {
- sError = R"("script_fields" empty property name)";
- return false;
- }
- JsonObj_c tAliasScript = tAlias.GetItem("script");
- if ( !tAliasScript )
- {
- sError = R"("script_fields" property should have "script" object)";
- return false;
- }
- CSphString sExpr;
- if ( !tAliasScript.FetchStrItem ( sExpr, "inline", sError ) )
- return false;
- const char * dUnsupported[] = { "lang", "params", "stored", "file" };
- for ( auto szOption : dUnsupported )
- if ( tAliasScript.HasItem(szOption) )
- {
- sError.SetSprintf ( R"("%s" property not supported in "script_fields")", szOption );
- return false;
- }
- // add to query
- CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
- tQueryItem.m_sExpr = sExpr;
- tQueryItem.m_sAlias = tAlias.Name();
- // add to select list
- sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
- }
- sSelect.MoveTo ( tQuery.m_sSelect );
- return true;
- }
- static bool ParseExpressions ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
- {
- if ( !tExpr )
- return true;
- if ( !tExpr.IsObj() )
- {
- sError = R"("expressions" property should be an object)";
- return false;
- }
- StringBuilder_c sSelect;
- sSelect << tQuery.m_sSelect;
- for ( const auto & tAlias : tExpr )
- {
- if ( !tAlias.IsStr() )
- {
- sError = R"("expressions" properties should be strings)";
- return false;
- }
- if ( CSphString ( tAlias.Name() ).IsEmpty() )
- {
- sError = R"("expressions" empty property name)";
- return false;
- }
- // add to query
- CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
- tQueryItem.m_sExpr = tAlias.StrVal();
- tQueryItem.m_sAlias = tAlias.Name();
- // add to select list
- sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
- }
- sSelect.MoveTo ( tQuery.m_sSelect );
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- // docvalue_fields
- bool ParseDocFields ( const JsonObj_c & tDocFields, JsonQuery_c & tQuery, CSphString & sError )
- {
- if ( !tDocFields || !tDocFields.IsArray() )
- {
- sError = R"("docvalue_fields" property should be an array or an object")";
- return false;
- }
- for ( const auto & tItem : tDocFields )
- {
- if ( !tItem.IsObj() )
- {
- sError = R"("docvalue_fields" property item should be an object)";
- return false;
- }
- CSphString sFieldName;
- if ( !tItem.FetchStrItem ( sFieldName, "field", sError, false ) )
- return false;
- if ( tQuery.m_dItems.GetFirst ( [&sFieldName] ( const CSphQueryItem & tVal ) { return ( tVal.m_sExpr=="*" || tVal.m_sExpr==sFieldName ); } )==-1 )
- {
- CSphQueryItem & tDFItem = tQuery.m_dItems.Add();
- tDFItem.m_sExpr = sFieldName;
- tDFItem.m_sAlias = sFieldName;
- }
- // FIXME!!! collect format type
- bool bDateTime = false;
- CSphString sFormat;
- if ( tItem.FetchStrItem ( sFormat, "format", sError, true ) )
- bDateTime = ( sFormat=="date_time" );
- tQuery.m_dDocFields.Add ( { sFieldName, bDateTime } );
- }
- return true;
- }
- static Aggr_e GetAggrFunc ( const JsonObj_c & tBucket, bool bCheckAggType )
- {
- if ( StrEq ( tBucket.Name(), "significant_terms" ) )
- return Aggr_e::SIGNIFICANT;
- if ( StrEq ( tBucket.Name(), "histogram" ) )
- return Aggr_e::HISTOGRAM;
- if ( StrEq ( tBucket.Name(), "date_histogram" ) )
- return Aggr_e::DATE_HISTOGRAM;
- if ( StrEq ( tBucket.Name(), "range") )
- return Aggr_e::RANGE;
- if ( StrEq ( tBucket.Name(), "date_range") )
- return Aggr_e::DATE_RANGE;
- if ( StrEq ( tBucket.Name(), "composite") )
- return Aggr_e::COMPOSITE;
- if ( StrEq ( tBucket.Name(), "min") )
- return Aggr_e::MIN;
- if ( StrEq ( tBucket.Name(), "max") )
- return Aggr_e::MAX;
- if ( StrEq ( tBucket.Name(), "sum") )
- return Aggr_e::SUM;
- if ( StrEq ( tBucket.Name(), "avg") )
- return Aggr_e::AVG;
- if ( bCheckAggType )
- sphWarning ( "unsupported aggregate type '%s'", tBucket.Name() );
- return Aggr_e::NONE;
- }
- static void SetRangeFrom ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
- {
- if ( tSrc.IsDbl() )
- tItem.m_fFrom = tSrc.DblVal();
- else if ( bForceFloat )
- tItem.m_fFrom = tSrc.IntVal();
- else
- tItem.m_iFrom = tSrc.IntVal();
- }
- static void SetRangeTo ( const JsonObj_c & tSrc, bool bForceFloat, RangeSetting_t & tItem )
- {
- if ( tSrc.IsDbl() )
- tItem.m_fTo = tSrc.DblVal();
- else if ( bForceFloat )
- tItem.m_fTo = tSrc.IntVal();
- else
- tItem.m_iTo = tSrc.IntVal();
- }
- static bool GetKeyed ( const JsonObj_c & tBucket, bool & bKeyed, CSphString & sError )
- {
- if ( !tBucket.HasItem ( "keyed" ) )
- return true;
- const auto tKeyed = tBucket.GetBoolItem ( "keyed", sError, false );
- if ( !tKeyed )
- return false;
- bKeyed = tKeyed.BoolVal();
- return true;
- }
- static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError );
- static bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError );
- static bool ParseAggrRange ( const JsonObj_c & tBucket, JsonAggr_t & tItem, bool bDate, CSphString & sError )
- {
- JsonObj_c tRanges = tBucket.GetItem( "ranges" );
- if ( !tRanges || !tRanges.IsArray() )
- {
- if ( !tRanges )
- sError.SetSprintf ( "\"%s\" missed \"ranges\" property", tItem.m_sCol.cstr() );
- else
- sError.SetSprintf ( "\"%s\" \"ranges\" should be an array", tItem.m_sCol.cstr() );
- return false;
- }
- int iCount = tRanges.Size();
- if ( !iCount )
- {
- sError.SetSprintf ( "\"%s\" empty \"ranges\" property", tItem.m_sCol.cstr() );
- return false;
- }
- bool bKeyed = false;
- if ( !GetKeyed ( tBucket, bKeyed, sError ) )
- return false;
- if ( !bDate )
- {
- auto & dRanges = tItem.m_tRange;
- dRanges.Resize ( iCount );
- dRanges.m_bKeyed = bKeyed;
- return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
- } else
- {
- auto & dRanges = tItem.m_tDateRange;
- dRanges.Resize ( iCount );
- dRanges.m_bKeyed = bKeyed;
- return ParseAggrRange ( tRanges, tItem.m_sCol, dRanges, sError );
- }
- }
- bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrRangeSetting_t & dRanges, CSphString & sError )
- {
- int iFloatStart = -1;
- for ( int i=0; i<dRanges.GetLength(); i++ )
- {
- const auto tRangeItem = tRanges[i];
- const auto tFrom = tRangeItem.GetItem ( "from" );
- const auto tTo = tRangeItem.GetItem ( "to" );
- const bool bHasFrom = tFrom;
- const bool bHasTo = tTo;
- if ( !bHasFrom && i!=0 )
- {
- sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
- return false;
- }
- if ( !bHasTo && i!=dRanges.GetLength()-1 )
- {
- sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
- return false;
- }
- if ( ( bHasFrom && tFrom.IsDbl() ) || ( bHasTo && tTo.IsDbl() ) )
- {
- dRanges.m_bFloat = true;
- if ( iFloatStart!=-1 )
- iFloatStart = i;
- }
- if ( bHasFrom )
- SetRangeFrom ( tFrom, ( iFloatStart!=-1 ), dRanges[i] );
- else
- dRanges.m_bOpenLeft = true;
- if ( bHasTo )
- SetRangeTo ( tTo, ( iFloatStart!=-1 ), dRanges[i] );
- else
- dRanges.m_bOpenRight = true;
- }
- // convert int to float values for head of array values
- if ( iFloatStart>0 )
- {
- for ( int i=iFloatStart; i<dRanges.GetLength(); i++ )
- {
- dRanges[i].m_fFrom = dRanges[i].m_iFrom;
- dRanges[i].m_fTo = dRanges[i].m_iTo;
- }
- }
- if ( dRanges.m_bOpenLeft )
- {
- if ( dRanges.m_bFloat )
- dRanges[0].m_fFrom = -FLT_MAX;
- else
- dRanges[0].m_iFrom = -LLONG_MAX;
- }
- if ( dRanges.m_bOpenRight )
- {
- if ( dRanges.m_bFloat )
- dRanges.Last().m_fTo = FLT_MAX;
- else
- dRanges.Last().m_iTo = LLONG_MAX;
- }
- return true;
- }
- bool ParseAggrRange ( const JsonObj_c & tRanges, const CSphString & sCol, AggrDateRangeSetting_t & dRanges, CSphString & sError )
- {
- for ( int i=0; i<dRanges.GetLength(); i++ )
- {
- const auto tRangeItem = tRanges[i];
- const auto tFrom = tRangeItem.GetItem ( "from" );
- const auto tTo = tRangeItem.GetItem ( "to" );
- const bool bHasFrom = tFrom;
- const bool bHasTo = tTo;
- if ( !bHasFrom && i!=0 )
- {
- sError.SetSprintf ( "\"%s\" ranges[%d] \"from\" empty", sCol.cstr(), i );
- return false;
- }
- if ( !bHasTo && i!=dRanges.GetLength()-1 )
- {
- sError.SetSprintf ( "\"%s\" ranges[%d] \"to\" empty", sCol.cstr(), i );
- return false;
- }
- if ( bHasFrom )
- dRanges[i].m_sFrom = tFrom.StrVal();
- if ( bHasTo )
- dRanges[i].m_sTo = tTo.StrVal();
- }
- return true;
- }
- static bool ParseAggrHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
- {
- AggrHistSetting_t & tHist = tItem.m_tHist;
- JsonObj_c tInterval = tBucket.GetItem ( "interval" );
- if ( tInterval.Empty() )
- {
- sError.SetSprintf ( "\"%s\" interval missed", tItem.m_sCol.cstr() );
- return false;
- }
- if ( !tInterval.IsNum() )
- {
- sError.SetSprintf ( "\"%s\" interval should be numeric", tItem.m_sCol.cstr() );
- return false;
- }
- if ( tInterval.IsInt() )
- tHist.m_tInterval = tInterval.IntVal();
- else
- tHist.m_tInterval = tInterval.FltVal();
- JsonObj_c tOffset = tBucket.GetItem ( "offset" );
- if ( !tOffset.Empty() )
- {
- if ( !tOffset.IsNum() )
- {
- sError.SetSprintf ( "\"%s\" offset should be numeric", tItem.m_sCol.cstr() );
- return false;
- }
- if ( tOffset.IsInt() )
- tHist.m_tOffset = tOffset.IntVal();
- else
- tHist.m_tOffset = tOffset.FltVal();
- } else
- {
- tHist.m_tOffset = INT64_C ( 0 );
- }
- if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
- return false;
- FixFloat ( tHist );
- return true;
- }
- static bool ParseAggrDateHistogram ( const JsonObj_c & tBucket, JsonAggr_t & tItem, CSphString & sError )
- {
- AggrDateHistSetting_t & tHist = tItem.m_tDateHist;
- JsonObj_c tCalendar = tBucket.GetItem ( "calendar_interval" );
- JsonObj_c tFixed = tBucket.GetItem ( "fixed_interval" );
- if ( tCalendar.Empty() && tFixed.Empty() )
- {
- sError.SetSprintf ( "\"%s\" calendar_interval or fixed_interval missed", tItem.m_sCol.cstr() );
- return false;
- }
- if ( !tCalendar.Empty() && !tFixed.Empty() )
- {
- sError.SetSprintf ( "\"%s\" both calendar_interval and fixed_interval supplied", tItem.m_sCol.cstr() );
- return false;
- }
- tHist.m_bFixed = !tFixed.Empty();
- const JsonObj_c & tInterval = ( tHist.m_bFixed ? tFixed : tCalendar );
- if ( !tInterval.IsStr() )
- {
- sError.SetSprintf ( "\"%s\" calendar_interval should be string", tItem.m_sCol.cstr() );
- return false;
- }
- tHist.m_sInterval = tInterval.StrVal();
- if ( !GetKeyed ( tBucket, tHist.m_bKeyed, sError ) )
- return false;
- return true;
- }
- static bool ParseAggrComposite ( const JsonObj_c & tBucket, JsonAggr_t & tAggr, CSphString & sError )
- {
- JsonObj_c tComposite = tBucket.GetObjItem ( "composite", sError, false );
- if ( !tComposite )
- return false;
- JsonObj_c tSource = tComposite.GetArrayItem ( "sources", sError, false );
- if ( !tSource )
- return false;
- if ( !tSource.IsArray() )
- {
- sError = R"("sources" property item should be an array)";
- return false;
- }
- SmallStringHash_T<AggrComposite_t> hColumns;
- for ( const auto & tArrayItem : tSource )
- {
- if ( !tArrayItem.IsObj() )
- {
- sError = R"("sources" items should be an object)";
- return false;
- }
- JsonObj_c tItem = tArrayItem.begin();
- JsonObj_c tTerms = tItem.GetObjItem ( "terms", sError, false );
- if ( !tTerms )
- return false;
- AggrComposite_t tCol;
- if ( !tTerms.FetchStrItem ( tCol.m_sColumn, "field", sError, false ) )
- return false;
- tCol.m_sAlias = tItem.Name();
- if ( !hColumns.Add ( tCol, tItem.Name() ) )
- {
- sError.SetSprintf ( R"("composite" has multiple "%s" aggregates)", tItem.Name() );
- return false;
- }
- }
- if ( hColumns.IsEmpty() )
- {
- sError = R"(empty "composite" aggregate)";
- return false;
- }
- JsonObj_c tAfter = tComposite.GetObjItem ( "after", sError, false );
- if ( tAfter && tAfter.Size() )
- {
- JsonObj_c tJsonQuery ( R"( {"query":{"bool":{"must":[] }}} )" );
- JsonObj_c tFilters = tJsonQuery.GetItem ( "query" ).GetItem ( "bool" ).GetItem ( "must" );
- for ( const auto & tItem : tAfter )
- {
- AggrComposite_t * pCol = hColumns ( tItem.Name() );
- if ( !pCol )
- {
- sError.SetSprintf ( R"("after" missed "%s" aggregate)", tItem.Name() );
- return false;
- }
- JsonObj_c tFilterVal = tItem.Clone();
- JsonObj_c tEqItem ( R"( {"equals":{} } )") ;
- tEqItem.begin().AddItem ( pCol->m_sColumn.cstr(), tFilterVal );
- tFilters.AddItem ( tEqItem );
- }
- CSphQuery tTmpQuery;
- if ( !ParseJsonQueryFilters ( tJsonQuery.GetItem( "query" ), tTmpQuery, sError, sError ) )
- return false;
- if ( !sError.IsEmpty() )
- return false;
- assert ( tTmpQuery.m_dFilterTree.IsEmpty() );
- tAggr.m_dCompositeAfterKey = std::move ( tTmpQuery.m_dFilters );
- }
- tAggr.m_iSize = DEFAULT_MAX_MATCHES;
- tComposite.FetchIntItem ( tAggr.m_iSize, "size", sError, true );
- StringBuilder_c sColName ( "," );
- tAggr.m_dComposite.Reserve ( hColumns.GetLength() );
- for ( const auto & tCol : hColumns )
- {
- sColName += tCol.second.m_sColumn.cstr();
- tAggr.m_dComposite.Add ( tCol.second );
- }
- tAggr.m_sCol = sColName.cstr();
- return true;
- }
- static bool ParseAggsNode ( const JsonObj_c & tBucket, const JsonObj_c & tJsonItem, bool bRoot, JsonAggr_t & tItem, CSphString & sError )
- {
- if ( !tBucket.IsObj() )
- {
- sError.SetSprintf ( R"("aggs" bucket '%s' should be an object)", tItem.m_sBucketName.cstr() );
- return false;
- }
- if ( !StrEq ( tBucket.Name(), "composite" ) && !tBucket.FetchStrItem ( tItem.m_sCol, "field", sError, false ) )
- return false;
- tBucket.FetchIntItem ( tItem.m_iSize, "size", sError, true );
- int iShardSize = 0;
- tBucket.FetchIntItem ( iShardSize, "shard_size", sError, true );
- tItem.m_iSize = Max ( tItem.m_iSize, iShardSize ); // FIXME!!! use (size * 1.5 + 10) for shard size
- tItem.m_eAggrFunc = GetAggrFunc ( tBucket, !bRoot );
- switch ( tItem.m_eAggrFunc )
- {
- case Aggr_e::DATE_HISTOGRAM:
- if ( !ParseAggrDateHistogram ( tBucket, tItem, sError ) )
- return false;
- tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
- break;
- case Aggr_e::HISTOGRAM:
- if ( !ParseAggrHistogram ( tBucket, tItem, sError ) )
- return false;
- tItem.m_iSize = Max ( tItem.m_iSize, 1000 ); // set max_matches to min\max / interval
- break;
- case Aggr_e::RANGE:
- if ( !ParseAggrRange ( tBucket, tItem, false, sError ) )
- return false;
- tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
- break;
- case Aggr_e::DATE_RANGE:
- if ( !ParseAggrRange ( tBucket, tItem, true, sError ) )
- return false;
- tItem.m_iSize = Max ( tItem.m_iSize, tItem.m_tDateRange.GetLength() + 1 ); // set max_matches to buckets count + _all bucket
- break;
- case Aggr_e::COMPOSITE:
- if ( !ParseAggrComposite ( tJsonItem, tItem, sError ) )
- return false;
- break;
- case Aggr_e::MIN:
- case Aggr_e::MAX:
- case Aggr_e::SUM:
- case Aggr_e::AVG:
- tItem.m_iSize = 1;
- break;
-
- default: break;
- }
- return true;
- }
- static bool ParseAggsNodeSort ( const JsonObj_c & tJsonItem, bool bOrder, JsonAggr_t & tItem, CSphString & sError )
- {
- if ( !( tJsonItem.IsArray() || tJsonItem.IsObj() ) )
- {
- sError.SetSprintf ( "\"%s\" property value should be an array or an object", ( bOrder ? "order" : "sort" ) );
- return false;
- }
- bool bGotWeight = false;
- JsonQuery_c tTmpQuery;
- tTmpQuery.m_sSortBy = "";
- tTmpQuery.m_eSort = SPH_SORT_RELEVANCE;
- // FIXME!!! reports warnings for geodist sort
- CSphString sWarning;
- if ( !ParseSort ( tJsonItem, tTmpQuery, bGotWeight, sError, sWarning ) )
- return false;
- tItem.m_sSort = tTmpQuery.m_sSortBy;
- return true;
- }
- static bool AddSubAggregate ( const JsonObj_c & tAggs, bool bRoot, CSphVector<JsonAggr_t> & dParentItems, CSphString & sError )
- {
- if ( bRoot && tAggs.begin().Empty() )
- {
- JsonAggr_t & tCount = dParentItems.Add();
- tCount.m_eAggrFunc = Aggr_e::COUNT;
- tCount.m_iSize = 1;
- return true;
- }
- for ( const auto & tJsonItem : tAggs )
- {
- if ( !tJsonItem.IsObj() )
- {
- sError = R"("aggs" property item should be an object)";
- return false;
- }
- JsonAggr_t tItem;
- tItem.m_sBucketName = tJsonItem.Name();
- for ( const auto & tAggsItem : tJsonItem )
- {
- // could be a sort object at the aggs item or order object at the bucket
- if ( strcmp ( tAggsItem.Name(), "sort" )==0 )
- {
- if ( !ParseAggsNodeSort ( tAggsItem, false, tItem, sError ) )
- return false;
- } else
- {
- if ( StrEq ( tAggsItem.Name(), "aggs" ) || tAggsItem.HasItem ( "aggs" ) )
- {
- sError = R"(nested "aggs" is not supported)";
- return false;
- }
- if ( tAggsItem==tAggsItem.end() )
- {
- sError.SetSprintf ( R"("aggs" bucket '%s' with only nested items)", tAggsItem.Name() );
- return false;
- }
- if ( !ParseAggsNode ( tAggsItem, tJsonItem, bRoot, tItem, sError ) )
- return false;
- // bucket could have its own order item
- if ( tAggsItem.HasItem ( "order" ) )
- {
- if ( !ParseAggsNodeSort ( tAggsItem.GetItem("order"), true, tItem, sError ) )
- return false;
- }
- }
- }
- if ( tItem.m_eAggrFunc==Aggr_e::NONE && !bRoot )
- {
- sError.SetSprintf ( R"(bucket '%s' without aggregate items)", tItem.m_sBucketName.cstr() );
- return false;
- }
- dParentItems.Add ( tItem );
- }
- return true;
- }
- bool ParseAggregates ( const JsonObj_c & tAggs, JsonQuery_c & tQuery, CSphString & sError )
- {
- if ( !tAggs || !tAggs.IsObj() )
- {
- sError = R"("aggs" property should be an object")";
- return false;
- }
- if ( !AddSubAggregate ( tAggs, true, tQuery.m_dAggs, sError ) )
- return false;
- // set query now for any date aggregate to make sure they will have the same now timestamp
- if ( tQuery.m_dAggs.any_of ( [] ( const JsonAggr_t & tAggr ) { return !tAggr.m_tDateRange.IsEmpty(); } ) )
- tQuery.m_iNow = time ( nullptr );
- return true;
- }
- CSphString JsonAggr_t::GetAliasName () const
- {
- CSphString sName;
- sName.SetSprintf ( "%s_%s", m_sCol.cstr(), m_sBucketName.cstr() );
- return sName;
- }
- ParsedJsonQuery_t::ParsedJsonQuery_t()
- {
- SetQueryDefaultsExt2 ( m_tQuery );
- SetQueryDefaultsExt2 ( m_tJoinQueryOptions );
- }
|