sphinxjsonquery.cpp 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773
  1. //
  2. // Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
  3. // All rights reserved
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License. You should have
  7. // received a copy of the GPL license along with this program; if you
  8. // did not, you can find it at http://www.gnu.org/
  9. //
  10. #include "sphinxquery.h"
  11. #include "sphinxsearch.h"
  12. #include "sphinxplugin.h"
  13. #include "sphinxutils.h"
  14. #include "searchdaemon.h"
  15. #include "sphinxjson.h"
  16. #include "attribute.h"
  17. #include "searchdsql.h"
  18. #include "json/cJSON.h"
  19. static const char * g_szAll = "_all";
  20. static const char * g_szFilter = "_@filter_";
  21. static const char * g_szHighlight = "_@highlight_";
  22. static const char * g_szOrder = "_@order_";
  23. static bool IsFilter ( const JsonObj_c & tJson )
  24. {
  25. if ( !tJson )
  26. return false;
  27. CSphString sName = tJson.Name();
  28. if ( sName=="equals" )
  29. return true;
  30. if ( sName=="range" )
  31. return true;
  32. if ( sName=="geo_distance" )
  33. return true;
  34. return false;
  35. }
  36. //////////////////////////////////////////////////////////////////////////
  37. class QueryTreeBuilder_c : public XQParseHelper_c
  38. {
  39. public:
  40. QueryTreeBuilder_c ( const CSphQuery * pQuery, const ISphTokenizer * pQueryTokenizerQL, const CSphIndexSettings & tSettings );
  41. void CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec );
  42. bool HandleFieldBlockStart ( const char * & /*pPtr*/ ) override { return true; }
  43. bool HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields ) override;
  44. bool NeedTrailingSeparator() override { return false; }
  45. XQNode_t * CreateNode ( XQLimitSpec_t & tLimitSpec );
  46. const ISphTokenizer * GetQLTokenizer() { return m_pQueryTokenizerQL; }
  47. const CSphIndexSettings & GetIndexSettings() { return m_tSettings; }
  48. const CSphQuery * GetQuery() { return m_pQuery; }
  49. private:
  50. const CSphQuery * m_pQuery {nullptr};
  51. const ISphTokenizer * m_pQueryTokenizerQL {nullptr};
  52. const CSphIndexSettings & m_tSettings;
  53. void AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec );
  54. };
  55. QueryTreeBuilder_c::QueryTreeBuilder_c ( const CSphQuery * pQuery, const ISphTokenizer * pQueryTokenizerQL, const CSphIndexSettings & tSettings )
  56. : m_pQuery ( pQuery )
  57. , m_pQueryTokenizerQL ( pQueryTokenizerQL )
  58. , m_tSettings ( tSettings )
  59. {}
  60. void QueryTreeBuilder_c::CollectKeywords ( const char * szStr, XQNode_t * pNode, const XQLimitSpec_t & tLimitSpec )
  61. {
  62. m_pTokenizer->SetBuffer ( (const BYTE*)szStr, (int) strlen ( szStr ) );
  63. while (true)
  64. {
  65. int iSkippedPosBeforeToken = 0;
  66. if ( m_bWasBlended )
  67. {
  68. iSkippedPosBeforeToken = m_pTokenizer->SkipBlended();
  69. // just add all skipped blended parts except blended head (already added to atomPos)
  70. if ( iSkippedPosBeforeToken>1 )
  71. m_iAtomPos += iSkippedPosBeforeToken - 1;
  72. }
  73. const char * sToken = (const char *) m_pTokenizer->GetToken ();
  74. if ( !sToken )
  75. {
  76. AddChildKeyword ( pNode, nullptr, iSkippedPosBeforeToken, tLimitSpec );
  77. break;
  78. }
  79. // now let's do some token post-processing
  80. m_bWasBlended = m_pTokenizer->TokenIsBlended();
  81. int iPrevDeltaPos = 0;
  82. if ( m_pPlugin && m_pPlugin->m_fnPushToken )
  83. sToken = m_pPlugin->m_fnPushToken ( m_pPluginData, (char*)sToken, &iPrevDeltaPos, m_pTokenizer->GetTokenStart(), m_pTokenizer->GetTokenEnd() - m_pTokenizer->GetTokenStart() );
  84. m_iAtomPos += 1 + iPrevDeltaPos;
  85. bool bMultiDestHead = false;
  86. bool bMultiDest = false;
  87. int iDestCount = 0;
  88. // do nothing inside phrase
  89. if ( !m_pTokenizer->m_bPhrase )
  90. bMultiDest = m_pTokenizer->WasTokenMultiformDestination ( bMultiDestHead, iDestCount );
  91. // check for stopword, and create that node
  92. // temp buffer is required, because GetWordID() might expand (!) the keyword in-place
  93. BYTE sTmp [ MAX_TOKEN_BYTES ];
  94. strncpy ( (char*)sTmp, sToken, MAX_TOKEN_BYTES );
  95. sTmp[MAX_TOKEN_BYTES-1] = '\0';
  96. int iStopWord = 0;
  97. if ( m_pPlugin && m_pPlugin->m_fnPreMorph )
  98. m_pPlugin->m_fnPreMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  99. SphWordID_t uWordId = iStopWord ? 0 : m_pDict->GetWordID ( sTmp );
  100. if ( uWordId && m_pPlugin && m_pPlugin->m_fnPostMorph )
  101. {
  102. int iRes = m_pPlugin->m_fnPostMorph ( m_pPluginData, (char*)sTmp, &iStopWord );
  103. if ( iStopWord )
  104. uWordId = 0;
  105. else if ( iRes )
  106. uWordId = m_pDict->GetWordIDNonStemmed ( sTmp );
  107. }
  108. if ( !uWordId )
  109. {
  110. sToken = nullptr;
  111. // stopwords with step=0 must not affect pos
  112. if ( m_bEmptyStopword )
  113. m_iAtomPos--;
  114. }
  115. if ( bMultiDest && !bMultiDestHead )
  116. {
  117. assert ( m_dMultiforms.GetLength() );
  118. m_dMultiforms.Last().m_iDestCount++;
  119. m_dDestForms.Add ( sToken );
  120. } else
  121. AddChildKeyword ( pNode, sToken, iSkippedPosBeforeToken, tLimitSpec );
  122. if ( bMultiDestHead )
  123. {
  124. MultiformNode_t & tMulti = m_dMultiforms.Add();
  125. tMulti.m_pNode = pNode;
  126. tMulti.m_iDestStart = m_dDestForms.GetLength();
  127. tMulti.m_iDestCount = 0;
  128. }
  129. }
  130. }
  131. bool QueryTreeBuilder_c::HandleSpecialFields ( const char * & pPtr, FieldMask_t & dFields )
  132. {
  133. if ( *pPtr=='_' )
  134. {
  135. auto iLen = (int) strlen(g_szAll);
  136. if ( !strncmp ( pPtr, g_szAll, iLen ) )
  137. {
  138. pPtr += iLen;
  139. dFields.SetAll();
  140. return true;
  141. }
  142. }
  143. return false;
  144. }
  145. XQNode_t * QueryTreeBuilder_c::CreateNode ( XQLimitSpec_t & tLimitSpec )
  146. {
  147. auto * pNode = new XQNode_t(tLimitSpec);
  148. m_dSpawned.Add ( pNode );
  149. return pNode;
  150. }
  151. void QueryTreeBuilder_c::AddChildKeyword ( XQNode_t * pParent, const char * szKeyword, int iSkippedPosBeforeToken, const XQLimitSpec_t & tLimitSpec )
  152. {
  153. XQKeyword_t tKeyword ( szKeyword, m_iAtomPos );
  154. tKeyword.m_iSkippedBefore = iSkippedPosBeforeToken;
  155. auto * pNode = new XQNode_t ( tLimitSpec );
  156. pNode->m_pParent = pParent;
  157. pNode->m_dWords.Add ( tKeyword );
  158. pParent->m_dChildren.Add ( pNode );
  159. m_dSpawned.Add ( pNode );
  160. }
  161. //////////////////////////////////////////////////////////////////////////
  162. class QueryParserJson_c : public QueryParser_i
  163. {
  164. public:
  165. bool IsFullscan ( const CSphQuery & tQuery ) const final;
  166. bool IsFullscan ( const XQQuery_t & tQuery ) const final;
  167. bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery,
  168. const ISphTokenizer * pQueryTokenizer, const ISphTokenizer * pQueryTokenizerJson,
  169. const CSphSchema * pSchema, CSphDict * pDict, const CSphIndexSettings & tSettings ) const final;
  170. private:
  171. XQNode_t * ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, QueryTreeBuilder_c & tBuilder ) const;
  172. XQNode_t * ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  173. XQNode_t * ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  174. XQNode_t * ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const;
  175. bool ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const;
  176. bool ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const;
  177. XQNode_t * ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const;
  178. };
  179. bool QueryParserJson_c::IsFullscan ( const CSphQuery & tQuery ) const
  180. {
  181. // fixme: add more checks here
  182. return tQuery.m_sQuery.IsEmpty();
  183. }
  184. bool QueryParserJson_c::IsFullscan ( const XQQuery_t & tQuery ) const
  185. {
  186. return !tQuery.m_pRoot || ( !tQuery.m_pRoot->m_dChildren.GetLength() && !tQuery.m_pRoot->m_dWords.GetLength() );
  187. }
  188. bool QueryParserJson_c::ParseQuery ( XQQuery_t & tParsed, const char * szQuery, const CSphQuery * pQuery,
  189. const ISphTokenizer * pQueryTokenizerQL, const ISphTokenizer * pQueryTokenizerJson, const CSphSchema * pSchema, CSphDict * pDict,
  190. const CSphIndexSettings & tSettings ) const
  191. {
  192. JsonObj_c tRoot ( szQuery );
  193. // take only the first item of the query; ignore the rest
  194. int iNumIndexes = tRoot.Size();
  195. if ( !iNumIndexes )
  196. {
  197. tParsed.m_sParseError = "\"query\" property is empty";
  198. return false;
  199. }
  200. TokenizerRefPtr_c pMyJsonTokenizer { pQueryTokenizerJson->Clone ( SPH_CLONE_QUERY_LIGHTWEIGHT ) };
  201. DictRefPtr_c pMyDict { GetStatelessDict ( pDict ) };
  202. QueryTreeBuilder_c tBuilder ( pQuery, pQueryTokenizerQL, tSettings );
  203. tBuilder.Setup ( pSchema, pMyJsonTokenizer, pMyDict, &tParsed, tSettings );
  204. tParsed.m_pRoot = ConstructNode ( tRoot[0], tBuilder );
  205. if ( tBuilder.IsError() )
  206. {
  207. tBuilder.Cleanup();
  208. return false;
  209. }
  210. XQLimitSpec_t tLimitSpec;
  211. tParsed.m_pRoot = tBuilder.FixupTree ( tParsed.m_pRoot, tLimitSpec );
  212. if ( tBuilder.IsError() )
  213. {
  214. tBuilder.Cleanup();
  215. return false;
  216. }
  217. return true;
  218. }
  219. static const char * g_szOperatorNames[]=
  220. {
  221. "and",
  222. "or"
  223. };
  224. static XQOperator_e StrToNodeOp ( const char * szStr )
  225. {
  226. if ( !szStr )
  227. return SPH_QUERY_TOTAL;
  228. int iOp=0;
  229. for ( auto i : g_szOperatorNames )
  230. {
  231. if ( !strcmp ( szStr, i ) )
  232. return XQOperator_e(iOp);
  233. iOp++;
  234. }
  235. return SPH_QUERY_TOTAL;
  236. }
  237. XQNode_t * QueryParserJson_c::ConstructMatchNode ( const JsonObj_c & tJson, bool bPhrase, QueryTreeBuilder_c & tBuilder ) const
  238. {
  239. if ( !tJson.IsObj() )
  240. {
  241. tBuilder.Error ( "\"match\" value should be an object" );
  242. return nullptr;
  243. }
  244. if ( tJson.Size()!=1 )
  245. {
  246. tBuilder.Error ( "ill-formed \"match\" property" );
  247. return nullptr;
  248. }
  249. JsonObj_c tFields = tJson[0];
  250. tBuilder.SetString ( tFields.Name() );
  251. XQLimitSpec_t tLimitSpec;
  252. const char * szQuery = nullptr;
  253. XQOperator_e eNodeOp = bPhrase ? SPH_QUERY_PHRASE : SPH_QUERY_OR;
  254. bool bIgnore = false;
  255. if ( !tBuilder.ParseFields ( tLimitSpec.m_dFieldMask, tLimitSpec.m_iFieldMaxPos, bIgnore ) )
  256. return nullptr;
  257. if ( bIgnore )
  258. {
  259. tBuilder.Warning ( R"(ignoring fields in "%s", using "_all")", tFields.Name() );
  260. tLimitSpec.Reset();
  261. }
  262. tLimitSpec.m_bFieldSpec = true;
  263. if ( tFields.IsObj() )
  264. {
  265. // matching with flags
  266. CSphString sError;
  267. JsonObj_c tQuery = tFields.GetStrItem ( "query", sError );
  268. if ( !tQuery )
  269. {
  270. tBuilder.Error ( "%s", sError.cstr() );
  271. return nullptr;
  272. }
  273. szQuery = tQuery.SzVal();
  274. if ( !bPhrase )
  275. {
  276. JsonObj_c tOp = tFields.GetItem ( "operator" );
  277. if ( tOp ) // "and", "or"
  278. {
  279. eNodeOp = StrToNodeOp ( tOp.SzVal() );
  280. if ( eNodeOp==SPH_QUERY_TOTAL )
  281. {
  282. tBuilder.Error ( "unknown operator: \"%s\"", tOp.SzVal() );
  283. return nullptr;
  284. }
  285. }
  286. }
  287. } else
  288. {
  289. // simple list of keywords
  290. if ( !tFields.IsStr() )
  291. {
  292. tBuilder.Warning ( "values of properties in \"match\" should be strings or objects" );
  293. return nullptr;
  294. }
  295. szQuery = tFields.SzVal();
  296. }
  297. assert ( szQuery );
  298. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  299. pNewNode->SetOp ( eNodeOp );
  300. tBuilder.CollectKeywords ( szQuery, pNewNode, tLimitSpec );
  301. return pNewNode;
  302. }
  303. bool QueryParserJson_c::ConstructNodeOrFilter ( const JsonObj_c & tItem, CSphVector<XQNode_t *> & dNodes, QueryTreeBuilder_c & tBuilder ) const
  304. {
  305. // we created filters before, no need to process them again
  306. if ( !IsFilter ( tItem ) )
  307. {
  308. XQNode_t * pNode = ConstructNode ( tItem, tBuilder );
  309. if ( !pNode )
  310. return false;
  311. dNodes.Add ( pNode );
  312. }
  313. return true;
  314. }
  315. bool QueryParserJson_c::ConstructBoolNodeItems ( const JsonObj_c & tClause, CSphVector<XQNode_t *> & dItems, QueryTreeBuilder_c & tBuilder ) const
  316. {
  317. if ( tClause.IsArray() )
  318. {
  319. for ( const auto & tObject : tClause )
  320. {
  321. if ( !tObject.IsObj() )
  322. {
  323. tBuilder.Error ( "\"%s\" array value should be an object", tClause.Name() );
  324. return false;
  325. }
  326. if ( !ConstructNodeOrFilter ( tObject[0], dItems, tBuilder ) )
  327. return false;
  328. }
  329. } else if ( tClause.IsObj() )
  330. {
  331. if ( !ConstructNodeOrFilter ( tClause[0], dItems, tBuilder ) )
  332. return false;
  333. } else
  334. {
  335. tBuilder.Error ( "\"%s\" value should be an object or an array", tClause.Name() );
  336. return false;
  337. }
  338. return true;
  339. }
  340. XQNode_t * QueryParserJson_c::ConstructBoolNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  341. {
  342. if ( !tJson.IsObj() )
  343. {
  344. tBuilder.Error ( "\"bool\" value should be an object" );
  345. return nullptr;
  346. }
  347. CSphVector<XQNode_t *> dMust, dShould, dMustNot;
  348. for ( const auto & tClause : tJson )
  349. {
  350. CSphString sName = tClause.Name();
  351. if ( sName=="must" )
  352. {
  353. if ( !ConstructBoolNodeItems ( tClause, dMust, tBuilder ) )
  354. return nullptr;
  355. } else if ( sName=="should" )
  356. {
  357. if ( !ConstructBoolNodeItems ( tClause, dShould, tBuilder ) )
  358. return nullptr;
  359. } else if ( sName=="must_not" )
  360. {
  361. if ( !ConstructBoolNodeItems ( tClause, dMustNot, tBuilder ) )
  362. return nullptr;
  363. } else
  364. {
  365. tBuilder.Error ( "unknown bool query type: \"%s\"", sName.cstr() );
  366. return nullptr;
  367. }
  368. }
  369. XQNode_t * pMustNode = nullptr;
  370. XQNode_t * pShouldNode = nullptr;
  371. XQNode_t * pMustNotNode = nullptr;
  372. XQLimitSpec_t tLimitSpec;
  373. if ( dMust.GetLength() )
  374. {
  375. // no need to construct AND node for a single child
  376. if ( dMust.GetLength()==1 )
  377. pMustNode = dMust[0];
  378. else
  379. {
  380. XQNode_t * pAndNode = tBuilder.CreateNode ( tLimitSpec );
  381. pAndNode->SetOp ( SPH_QUERY_AND );
  382. for ( auto & i : dMust )
  383. {
  384. pAndNode->m_dChildren.Add(i);
  385. i->m_pParent = pAndNode;
  386. }
  387. pMustNode = pAndNode;
  388. }
  389. }
  390. if ( dShould.GetLength() )
  391. {
  392. if ( dShould.GetLength()==1 )
  393. pShouldNode = dShould[0];
  394. else
  395. {
  396. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  397. pOrNode->SetOp ( SPH_QUERY_OR );
  398. for ( auto & i : dShould )
  399. {
  400. pOrNode->m_dChildren.Add(i);
  401. i->m_pParent = pOrNode;
  402. }
  403. pShouldNode = pOrNode;
  404. }
  405. }
  406. // slightly different case - we need to construct the NOT node anyway
  407. if ( dMustNot.GetLength() )
  408. {
  409. XQNode_t * pNotNode = tBuilder.CreateNode ( tLimitSpec );
  410. pNotNode->SetOp ( SPH_QUERY_NOT );
  411. if ( dMustNot.GetLength()==1 )
  412. {
  413. pNotNode->m_dChildren.Add ( dMustNot[0] );
  414. dMustNot[0]->m_pParent = pNotNode;
  415. } else
  416. {
  417. XQNode_t * pOrNode = tBuilder.CreateNode ( tLimitSpec );
  418. pOrNode->SetOp ( SPH_QUERY_OR );
  419. for ( auto & i : dMustNot )
  420. {
  421. pOrNode->m_dChildren.Add ( i );
  422. i->m_pParent = pOrNode;
  423. }
  424. pNotNode->m_dChildren.Add ( pOrNode );
  425. pOrNode->m_pParent = pNotNode;
  426. }
  427. pMustNotNode = pNotNode;
  428. }
  429. int iTotalNodes = 0;
  430. iTotalNodes += pMustNode ? 1 : 0;
  431. iTotalNodes += pShouldNode ? 1 : 0;
  432. iTotalNodes += pMustNotNode ? 1 : 0;
  433. if ( !iTotalNodes )
  434. return nullptr;
  435. else if ( iTotalNodes==1 )
  436. {
  437. XQNode_t * pResultNode = nullptr;
  438. if ( pMustNode )
  439. pResultNode = pMustNode;
  440. else if ( pShouldNode )
  441. pResultNode = pShouldNode;
  442. else
  443. pResultNode = pMustNotNode;
  444. assert ( pResultNode );
  445. return pResultNode;
  446. } else
  447. {
  448. XQNode_t * pResultNode = pMustNode ? pMustNode : pMustNotNode;
  449. assert ( pResultNode );
  450. // combine 'must' and 'must_not' with AND
  451. if ( pMustNode && pMustNotNode )
  452. {
  453. XQNode_t * pAndNode = tBuilder.CreateNode(tLimitSpec);
  454. pAndNode->SetOp(SPH_QUERY_AND);
  455. pAndNode->m_dChildren.Add ( pMustNode );
  456. pAndNode->m_dChildren.Add ( pMustNotNode );
  457. pMustNode->m_pParent = pAndNode;
  458. pMustNotNode->m_pParent = pAndNode;
  459. pResultNode = pAndNode;
  460. }
  461. // combine 'result' node and 'should' node with MAYBE
  462. if ( pShouldNode )
  463. {
  464. XQNode_t * pMaybeNode = tBuilder.CreateNode ( tLimitSpec );
  465. pMaybeNode->SetOp ( SPH_QUERY_MAYBE );
  466. pMaybeNode->m_dChildren.Add ( pResultNode );
  467. pMaybeNode->m_dChildren.Add ( pShouldNode );
  468. pShouldNode->m_pParent = pMaybeNode;
  469. pResultNode->m_pParent = pMaybeNode;
  470. pResultNode = pMaybeNode;
  471. }
  472. return pResultNode;
  473. }
  474. return nullptr;
  475. }
  476. XQNode_t * QueryParserJson_c::ConstructQLNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  477. {
  478. if ( !tJson.IsStr() )
  479. {
  480. tBuilder.Error ( "\"query_string\" value should be an string" );
  481. return nullptr;
  482. }
  483. XQQuery_t tParsed;
  484. if ( !sphParseExtendedQuery ( tParsed, tJson.StrVal().cstr(), tBuilder.GetQuery(), tBuilder.GetQLTokenizer(), tBuilder.GetSchema(), tBuilder.GetDict(), tBuilder.GetIndexSettings() ) )
  485. {
  486. tBuilder.Error ( "%s", tParsed.m_sParseError.cstr() );
  487. return nullptr;
  488. }
  489. if ( !tParsed.m_sParseWarning.IsEmpty() )
  490. tBuilder.Warning ( "%s", tParsed.m_sParseWarning.cstr() );
  491. XQNode_t * pRoot = tParsed.m_pRoot;
  492. tParsed.m_pRoot = nullptr;
  493. return pRoot;
  494. }
  495. XQNode_t * QueryParserJson_c::ConstructMatchAllNode ( QueryTreeBuilder_c & tBuilder ) const
  496. {
  497. XQLimitSpec_t tLimitSpec;
  498. XQNode_t * pNewNode = tBuilder.CreateNode ( tLimitSpec );
  499. pNewNode->SetOp ( SPH_QUERY_NULL );
  500. return pNewNode;
  501. }
  502. XQNode_t * QueryParserJson_c::ConstructNode ( const JsonObj_c & tJson, QueryTreeBuilder_c & tBuilder ) const
  503. {
  504. CSphString sName = tJson.Name();
  505. if ( !tJson || sName.IsEmpty() )
  506. {
  507. tBuilder.Error ( "empty json found" );
  508. return nullptr;
  509. }
  510. bool bMatch = sName=="match";
  511. bool bPhrase = sName=="match_phrase";
  512. if ( bMatch || bPhrase )
  513. return ConstructMatchNode ( tJson, bPhrase, tBuilder );
  514. if ( sName=="match_all" )
  515. return ConstructMatchAllNode ( tBuilder );
  516. if ( sName=="bool" )
  517. return ConstructBoolNode ( tJson, tBuilder );
  518. if ( sName=="query_string" )
  519. return ConstructQLNode ( tJson, tBuilder );
  520. return nullptr;
  521. }
  522. bool NonEmptyQuery ( const JsonObj_c & tQuery )
  523. {
  524. return ( tQuery.HasItem("match")
  525. || tQuery.HasItem("match_phrase")
  526. || tQuery.HasItem("bool") )
  527. || tQuery.HasItem("query_string");
  528. }
  529. //////////////////////////////////////////////////////////////////////////
  530. struct LocationField_t
  531. {
  532. float m_fLat =0.0f;
  533. float m_fLon = 0.0f;
  534. };
  535. struct LocationSource_t
  536. {
  537. CSphString m_sLat;
  538. CSphString m_sLon;
  539. };
  540. static bool ParseLocation ( const char * sName, const JsonObj_c & tLoc, LocationField_t * pField, LocationSource_t * pSource, CSphString & sError );
  541. class GeoDistInfo_c
  542. {
  543. public:
  544. bool Parse ( const JsonObj_c & tRoot, bool bNeedDistance, CSphString & sError, CSphString & sWarning );
  545. CSphString BuildExprString() const;
  546. bool IsGeoDist() const { return m_bGeodist; }
  547. float GetDistance() const { return m_fDistance; }
  548. private:
  549. bool m_bGeodist {false};
  550. bool m_bGeodistAdaptive {true};
  551. float m_fDistance {0.0f};
  552. LocationField_t m_tLocAnchor;
  553. LocationSource_t m_tLocSource;
  554. bool ParseDistance ( const JsonObj_c & tDistance, CSphString & sError );
  555. };
  556. bool GeoDistInfo_c::Parse ( const JsonObj_c & tRoot, bool bNeedDistance, CSphString & sError, CSphString & sWarning )
  557. {
  558. JsonObj_c tLocAnchor = tRoot.GetItem("location_anchor");
  559. JsonObj_c tLocSource = tRoot.GetItem("location_source");
  560. if ( !tLocAnchor || !tLocSource )
  561. {
  562. if ( !tLocAnchor && !tLocSource )
  563. sError = R"("location_anchor" and "location_source" properties missing)";
  564. else
  565. sError.SetSprintf ( "\"%s\" property missing", ( !tLocAnchor ? "location_anchor" : "location_source" ) );
  566. return false;
  567. }
  568. if ( !ParseLocation ( "location_anchor", tLocAnchor, &m_tLocAnchor, nullptr, sError )
  569. || !ParseLocation ( "location_source", tLocSource, nullptr, &m_tLocSource, sError ) )
  570. return false;
  571. JsonObj_c tType = tRoot.GetStrItem ( "distance_type", sError, true );
  572. if ( tType )
  573. {
  574. CSphString sType = tType.StrVal();
  575. if ( sType!="adaptive" && sType!="haversine" )
  576. {
  577. sWarning.SetSprintf ( R"("distance_type" property type is invalid: "%s", defaulting to "adaptive")", sType.cstr() );
  578. m_bGeodistAdaptive = true;
  579. } else
  580. m_bGeodistAdaptive = sType=="adaptive";
  581. } else if ( !sError.IsEmpty() )
  582. return false;
  583. JsonObj_c tDistance = tRoot.GetItem("distance");
  584. if ( tDistance )
  585. {
  586. if ( !ParseDistance ( tDistance, sError ) )
  587. return false;
  588. } else if ( bNeedDistance )
  589. {
  590. sError = "\"distance\" not specified";
  591. return false;
  592. }
  593. m_bGeodist = true;
  594. return true;
  595. }
  596. bool GeoDistInfo_c::ParseDistance ( const JsonObj_c & tDistance, CSphString & sError )
  597. {
  598. if ( tDistance.IsNum() )
  599. {
  600. // no units specified, meters assumed
  601. m_fDistance = tDistance.FltVal();
  602. return true;
  603. }
  604. if ( !tDistance.IsStr() )
  605. {
  606. sError = "\"distance\" property should be a number or a string";
  607. return false;
  608. }
  609. const char * p = tDistance.SzVal();
  610. assert ( p );
  611. while ( *p && sphIsSpace(*p) )
  612. p++;
  613. const char * szNumber = p;
  614. while ( *p && ( *p=='.' || ( *p>='0' && *p<='9' ) ) )
  615. p++;
  616. CSphString sNumber;
  617. sNumber.SetBinary ( szNumber, p-szNumber );
  618. while ( *p && sphIsSpace(*p) )
  619. p++;
  620. const char * szUnit = p;
  621. while ( *p && sphIsAlpha(*p) )
  622. p++;
  623. CSphString sUnit;
  624. sUnit.SetBinary ( szUnit, p-szUnit );
  625. m_fDistance = (float)atof ( sNumber.cstr() );
  626. float fCoeff = 1.0f;
  627. if ( !sphGeoDistanceUnit ( sUnit.cstr(), fCoeff ) )
  628. {
  629. sError.SetSprintf ( "unknown distance unit: %s", sUnit.cstr() );
  630. return false;
  631. }
  632. m_fDistance *= fCoeff;
  633. return true;
  634. }
  635. CSphString GeoDistInfo_c::BuildExprString() const
  636. {
  637. CSphString sResult;
  638. sResult.SetSprintf ( "GEODIST(%f, %f, %s, %s, {in=deg, out=m, method=%s})", m_tLocAnchor.m_fLat, m_tLocAnchor.m_fLon, m_tLocSource.m_sLat.cstr(), m_tLocSource.m_sLon.cstr(), m_bGeodistAdaptive ? "adaptive" : "haversine" );
  639. return sResult;
  640. }
  641. //////////////////////////////////////////////////////////////////////////
  642. static void AddToSelectList ( CSphQuery & tQuery, const CSphVector<CSphQueryItem> & dItems, int iFirstItem = 0 )
  643. {
  644. for ( int i = iFirstItem; i < dItems.GetLength(); i++ )
  645. tQuery.m_sSelect.SetSprintf ( "%s, %s as %s", tQuery.m_sSelect.cstr(), dItems[i].m_sExpr.cstr(), dItems[i].m_sAlias.cstr() );
  646. }
  647. static JsonObj_c GetFilterColumn ( const JsonObj_c & tJson, CSphString & sError )
  648. {
  649. if ( !tJson.IsObj() )
  650. {
  651. sError = "filter should be an object";
  652. return JsonNull;
  653. }
  654. if ( tJson.Size()!=1 )
  655. {
  656. sError = "\"equals\" filter should have only one element";
  657. return JsonNull;
  658. }
  659. JsonObj_c tColumn = tJson[0];
  660. if ( !tColumn )
  661. {
  662. sError = "empty filter found";
  663. return JsonNull;
  664. }
  665. return tColumn;
  666. }
  667. static bool ConstructEqualsFilter ( const JsonObj_c & tJson, CSphVector<CSphFilterSettings> & dFilters, CSphString & sError )
  668. {
  669. JsonObj_c tColumn = GetFilterColumn ( tJson, sError );
  670. if ( !tColumn )
  671. return false;
  672. if ( !tColumn.IsNum() && !tColumn.IsStr() )
  673. {
  674. sError = "\"equals\" filter expects numeric or string values";
  675. return false;
  676. }
  677. CSphFilterSettings tFilter;
  678. tFilter.m_sAttrName = tColumn.Name();
  679. sphColumnToLowercase ( const_cast<char *>( tFilter.m_sAttrName.cstr() ) );
  680. if ( tColumn.IsInt() )
  681. {
  682. tFilter.m_eType = SPH_FILTER_VALUES;
  683. tFilter.m_dValues.Add ( tColumn.IntVal() );
  684. } else if ( tColumn.IsNum() )
  685. {
  686. tFilter.m_eType = SPH_FILTER_FLOATRANGE;
  687. tFilter.m_fMinValue = tColumn.FltVal();
  688. tFilter.m_fMaxValue = tColumn.FltVal();
  689. tFilter.m_bHasEqualMin = true;
  690. tFilter.m_bHasEqualMax = true;
  691. tFilter.m_bExclude = false;
  692. } else
  693. {
  694. tFilter.m_eType = SPH_FILTER_STRING;
  695. tFilter.m_dStrings.Add ( tColumn.StrVal() );
  696. tFilter.m_bExclude = false;
  697. }
  698. dFilters.Add ( tFilter );
  699. return true;
  700. }
  701. static bool ConstructRangeFilter ( const JsonObj_c & tJson, CSphVector<CSphFilterSettings> & dFilters, CSphString & sError )
  702. {
  703. JsonObj_c tColumn = GetFilterColumn ( tJson, sError );
  704. if ( !tColumn )
  705. return false;
  706. CSphFilterSettings tNewFilter;
  707. tNewFilter.m_sAttrName = tColumn.Name();
  708. sphColumnToLowercase ( const_cast<char *>( tNewFilter.m_sAttrName.cstr() ) );
  709. tNewFilter.m_bHasEqualMin = false;
  710. tNewFilter.m_bHasEqualMax = false;
  711. JsonObj_c tLess = tColumn.GetItem("lt");
  712. if ( !tLess )
  713. {
  714. tLess = tColumn.GetItem("lte");
  715. tNewFilter.m_bHasEqualMax = tLess;
  716. }
  717. JsonObj_c tGreater = tColumn.GetItem("gt");
  718. if ( !tGreater )
  719. {
  720. tGreater = tColumn.GetItem("gte");
  721. tNewFilter.m_bHasEqualMin = tGreater;
  722. }
  723. bool bLess = tLess;
  724. bool bGreater = tGreater;
  725. if ( !bLess && !bGreater )
  726. {
  727. sError = "empty filter found";
  728. return false;
  729. }
  730. if ( ( bLess && !tLess.IsNum() ) || ( bGreater && !tGreater.IsNum() ) )
  731. {
  732. sError = "range filter expects numeric values";
  733. return false;
  734. }
  735. bool bIntFilter = ( bLess && tLess.IsInt() ) || ( bGreater && tGreater.IsInt() );
  736. if ( bLess )
  737. {
  738. if ( bIntFilter )
  739. tNewFilter.m_iMaxValue = tLess.IntVal();
  740. else
  741. tNewFilter.m_fMaxValue = tLess.FltVal();
  742. tNewFilter.m_bOpenLeft = !bGreater;
  743. }
  744. if ( bGreater )
  745. {
  746. if ( bIntFilter )
  747. tNewFilter.m_iMinValue = tGreater.IntVal();
  748. else
  749. tNewFilter.m_fMinValue = tGreater.FltVal();
  750. tNewFilter.m_bOpenRight = !bLess;
  751. }
  752. tNewFilter.m_eType = bIntFilter ? SPH_FILTER_RANGE : SPH_FILTER_FLOATRANGE;
  753. // float filters don't support open ranges
  754. if ( !bIntFilter )
  755. {
  756. if ( tNewFilter.m_bOpenRight )
  757. tNewFilter.m_fMaxValue = FLT_MAX;
  758. if ( tNewFilter.m_bOpenLeft )
  759. tNewFilter.m_fMinValue = FLT_MIN;
  760. }
  761. dFilters.Add ( tNewFilter );
  762. return true;
  763. }
  764. static bool ConstructGeoFilter ( const JsonObj_c & tJson, CSphVector<CSphFilterSettings> & dFilters, CSphVector<CSphQueryItem> & dQueryItems, int & iQueryItemId, CSphString & sError, CSphString & sWarning )
  765. {
  766. GeoDistInfo_c tGeoDist;
  767. if ( !tGeoDist.Parse ( tJson, true, sError, sWarning ) )
  768. return false;
  769. CSphQueryItem & tQueryItem = dQueryItems.Add();
  770. tQueryItem.m_sExpr = tGeoDist.BuildExprString();
  771. tQueryItem.m_sAlias.SetSprintf ( "%s%d", g_szFilter, iQueryItemId++ );
  772. CSphFilterSettings & tFilter = dFilters.Add();
  773. tFilter.m_sAttrName = tQueryItem.m_sAlias;
  774. tFilter.m_bOpenLeft = true;
  775. tFilter.m_bHasEqualMax = true;
  776. tFilter.m_fMaxValue = tGeoDist.GetDistance();
  777. tFilter.m_eType = SPH_FILTER_FLOATRANGE;
  778. return true;
  779. }
  780. static bool ConstructFilter ( const JsonObj_c & tJson, CSphVector<CSphFilterSettings> & dFilters, CSphVector<CSphQueryItem> & dQueryItems, int & iQueryItemId, CSphString & sError, CSphString & sWarning )
  781. {
  782. if ( !IsFilter ( tJson ) )
  783. return true;
  784. CSphString sName = tJson.Name();
  785. if ( sName=="equals" )
  786. return ConstructEqualsFilter ( tJson, dFilters, sError );
  787. if ( sName=="range" )
  788. return ConstructRangeFilter ( tJson, dFilters, sError );
  789. if ( sName=="geo_distance" )
  790. return ConstructGeoFilter ( tJson, dFilters, dQueryItems, iQueryItemId, sError, sWarning );
  791. sError.SetSprintf ( "unknown filter type: %s", sName.cstr() );
  792. return false;
  793. }
  794. static bool ConstructBoolNodeFilters ( const JsonObj_c & tClause, CSphVector<CSphFilterSettings> & dFilters, CSphVector<CSphQueryItem> & dQueryItems, int & iQueryItemId, CSphString & sError, CSphString & sWarning )
  795. {
  796. if ( tClause.IsArray() )
  797. {
  798. for ( const auto & tObject : tClause )
  799. {
  800. if ( !tObject.IsObj() )
  801. {
  802. sError.SetSprintf ( "\"%s\" array value should be an object", tClause.Name() );
  803. return false;
  804. }
  805. JsonObj_c tItem = tObject[0];
  806. if ( !ConstructFilter ( tItem, dFilters, dQueryItems, iQueryItemId, sError, sWarning ) )
  807. return false;
  808. }
  809. } else if ( tClause.IsObj() )
  810. {
  811. JsonObj_c tItem = tClause[0];
  812. if ( !ConstructFilter ( tItem, dFilters, dQueryItems, iQueryItemId, sError, sWarning ) )
  813. return false;
  814. } else
  815. {
  816. sError.SetSprintf ( "\"%s\" value should be an object or an array", tClause.Name() );
  817. return false;
  818. }
  819. return true;
  820. }
  821. static bool ConstructBoolFilters ( const JsonObj_c & tBool, CSphQuery & tQuery, int & iQueryItemId, CSphString & sError, CSphString & sWarning )
  822. {
  823. // non-recursive for now, maybe we should fix this later
  824. if ( !tBool.IsObj() )
  825. {
  826. sError = "\"bool\" value should be an object";
  827. return false;
  828. }
  829. CSphVector<CSphFilterSettings> dMust, dShould, dMustNot;
  830. CSphVector<CSphQueryItem> dMustQI, dShouldQI, dMustNotQI;
  831. for ( const auto & tClause : tBool )
  832. {
  833. CSphString sName = tClause.Name();
  834. if ( sName=="must" )
  835. {
  836. if ( !ConstructBoolNodeFilters ( tClause, dMust, dMustQI, iQueryItemId, sError, sWarning ) )
  837. return false;
  838. } else if ( sName=="should" )
  839. {
  840. if ( !ConstructBoolNodeFilters ( tClause, dShould, dShouldQI, iQueryItemId, sError, sWarning ) )
  841. return false;
  842. } else if ( sName=="must_not" )
  843. {
  844. if ( !ConstructBoolNodeFilters ( tClause, dMustNot, dMustNotQI, iQueryItemId, sError, sWarning ) )
  845. return false;
  846. } else
  847. {
  848. sError.SetSprintf ( "unknown bool query type: \"%s\"", sName.cstr() );
  849. return false;
  850. }
  851. }
  852. if ( dMustNot.GetLength() )
  853. {
  854. for ( auto & i : dMustNot )
  855. {
  856. i.m_bExclude = true;
  857. dMust.Add(i);
  858. }
  859. for ( auto & i : dMustNotQI )
  860. dMustQI.Add(i);
  861. }
  862. if ( dMust.GetLength() )
  863. {
  864. AddToSelectList ( tQuery, dMustQI );
  865. tQuery.m_dFilters.SwapData ( dMust );
  866. for ( const auto & i : dMustQI )
  867. tQuery.m_dItems.Add(i);
  868. return true;
  869. }
  870. if ( dShould.GetLength() )
  871. {
  872. AddToSelectList ( tQuery, dShouldQI );
  873. tQuery.m_dFilters.SwapData ( dShould );
  874. for ( const auto & i : dShouldQI )
  875. tQuery.m_dItems.Add(i);
  876. // need a filter tree
  877. FilterTreeItem_t & tTreeItem = tQuery.m_dFilterTree.Add();
  878. tTreeItem.m_iFilterItem = 0;
  879. int iRootNode = 0;
  880. ARRAY_FOREACH ( i, tQuery.m_dFilters )
  881. {
  882. int iNewFilterNodeId = tQuery.m_dFilterTree.GetLength();
  883. FilterTreeItem_t & tNewFilterNode = tQuery.m_dFilterTree.Add();
  884. tNewFilterNode.m_iFilterItem = i;
  885. int iNewOrNodeId = tQuery.m_dFilterTree.GetLength();
  886. FilterTreeItem_t & tNewOrNode = tQuery.m_dFilterTree.Add();
  887. tNewOrNode.m_bOr = true;
  888. tNewOrNode.m_iLeft = iRootNode;
  889. tNewOrNode.m_iRight = iNewFilterNodeId;
  890. iRootNode = iNewOrNodeId;
  891. }
  892. }
  893. return true;
  894. }
  895. static bool ConstructFilters ( const JsonObj_c & tJson, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  896. {
  897. if ( !tJson )
  898. return false;
  899. CSphString sName = tJson.Name();
  900. if ( sName.IsEmpty() )
  901. return false;
  902. if ( sName!="query" )
  903. {
  904. sError.SetSprintf ( R"("query" expected, got %s)", sName.cstr() );
  905. return false;
  906. }
  907. int iQueryItemId = 0;
  908. JsonObj_c tBool = tJson.GetItem("bool");
  909. if ( tBool )
  910. return ConstructBoolFilters ( tBool, tQuery, iQueryItemId, sError, sWarning );
  911. for ( const auto & tChild : tJson )
  912. if ( IsFilter ( tChild ) )
  913. {
  914. int iFirstNewItem = tQuery.m_dItems.GetLength();
  915. if ( !ConstructFilter ( tChild, tQuery.m_dFilters, tQuery.m_dItems, iQueryItemId, sError, sWarning ) )
  916. return false;
  917. AddToSelectList ( tQuery, tQuery.m_dItems, iFirstNewItem );
  918. // handle only the first filter in this case
  919. break;
  920. }
  921. return true;
  922. }
  923. //////////////////////////////////////////////////////////////////////////
  924. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError );
  925. static bool ParseSort ( const JsonObj_c & tSort, CSphQuery & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning );
  926. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError );
  927. static bool ParseExpr ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError );
  928. static bool ParseIndex ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  929. {
  930. if ( !tRoot )
  931. {
  932. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  933. return false;
  934. }
  935. JsonObj_c tIndex = tRoot.GetStrItem ( "index", sError );
  936. if ( !tIndex )
  937. return false;
  938. tStmt.m_sIndex = tIndex.StrVal();
  939. tStmt.m_tQuery.m_sIndexes = tStmt.m_sIndex;
  940. return true;
  941. }
  942. static bool ParseIndexId ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  943. {
  944. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  945. return false;
  946. JsonObj_c tId = tRoot.GetIntItem ( "id", sError );
  947. if ( tId )
  948. tDocId = tId.IntVal();
  949. else
  950. tDocId = 0; // enable auto-id
  951. return true;
  952. }
  953. static bool ParseCluster ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, CSphString & sError )
  954. {
  955. if ( !tRoot )
  956. {
  957. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  958. return false;
  959. }
  960. // cluster is optional
  961. JsonObj_c tCluster = tRoot.GetStrItem ( "cluster", sError, true );
  962. if ( tCluster )
  963. tStmt.m_sCluster = tCluster.StrVal();
  964. return true;
  965. }
  966. QueryParser_i * sphCreateJsonQueryParser()
  967. {
  968. return new QueryParserJson_c;
  969. }
  970. bool ParseJsonQueryFilters ( const JsonObj_c & tJson, CSphQuery & tQuery, CSphString & sError, CSphString & sWarning )
  971. {
  972. if ( tJson && !tJson.IsObj() )
  973. {
  974. sError = "\"query\" property value should be an object";
  975. return false;
  976. }
  977. CSphQueryItem & tItem = tQuery.m_dItems.Add();
  978. tItem.m_sExpr = "*";
  979. tItem.m_sAlias = "*";
  980. tQuery.m_sSelect = "*";
  981. // we need to know if the query is fullscan before re-parsing it to build AST tree
  982. // so we need to do some preprocessing here
  983. bool bFullscan = !tJson || ( tJson.Size()==1 && tJson.HasItem("match_all") );
  984. if ( !bFullscan )
  985. tQuery.m_sQuery = tJson.AsString();
  986. // because of the way sphinxql parsing was implemented
  987. // we need to parse our query and extract filters now
  988. // and parse the rest of the query later
  989. if ( tJson )
  990. {
  991. if ( !ConstructFilters ( tJson, tQuery, sError, sWarning ) )
  992. return false;
  993. }
  994. return true;
  995. }
  996. static bool ParseLimits ( const JsonObj_c & tRoot, CSphQuery & tQuery, CSphString & sError )
  997. {
  998. JsonObj_c tLimit = tRoot.GetIntItem ( "limit", "size", sError );
  999. if ( !sError.IsEmpty() )
  1000. return false;
  1001. if ( tLimit )
  1002. tQuery.m_iLimit = tLimit.IntVal();
  1003. JsonObj_c tOffset = tRoot.GetIntItem ( "offset", "from", sError );
  1004. if ( !sError.IsEmpty() )
  1005. return false;
  1006. if ( tOffset )
  1007. tQuery.m_iOffset = tOffset.IntVal();
  1008. JsonObj_c tMaxMatches = tRoot.GetIntItem ( "max_matches", sError, true );
  1009. if ( !sError.IsEmpty() )
  1010. return false;
  1011. if ( tMaxMatches )
  1012. tQuery.m_iMaxMatches = tMaxMatches.IntVal();
  1013. return true;
  1014. }
  1015. bool sphParseJsonQuery ( const char * szQuery, CSphQuery & tQuery, bool & bProfile, CSphString & sError, CSphString & sWarning )
  1016. {
  1017. JsonObj_c tRoot ( szQuery );
  1018. if ( !tRoot )
  1019. {
  1020. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1021. return false;
  1022. }
  1023. tQuery.m_sRawQuery = szQuery;
  1024. JsonObj_c tIndex = tRoot.GetStrItem ( "index", sError );
  1025. if ( !tIndex )
  1026. return false;
  1027. tQuery.m_sIndexes = tIndex.StrVal();
  1028. tQuery.m_sIndexes.ToLower();
  1029. if ( tQuery.m_sIndexes==g_szAll )
  1030. tQuery.m_sIndexes = "*";
  1031. if ( !ParseLimits ( tRoot, tQuery, sError ) )
  1032. return false;
  1033. JsonObj_c tJsonQuery = tRoot.GetItem("query");
  1034. // common code used by search queries and update/delete by query
  1035. if ( !ParseJsonQueryFilters ( tJsonQuery, tQuery, sError, sWarning ) )
  1036. return false;
  1037. bProfile = false;
  1038. if ( !tRoot.FetchBoolItem ( bProfile, "profile", sError, true ) )
  1039. return false;
  1040. // expression columns go first to select list
  1041. JsonObj_c tExpr = tRoot.GetItem ( "script_fields" );
  1042. if ( tExpr && !ParseExpr ( tExpr, tQuery, sError ) )
  1043. return false;
  1044. JsonObj_c tSnip = tRoot.GetObjItem ( "highlight", sError, true );
  1045. if ( tSnip )
  1046. {
  1047. if ( !ParseSnippet ( tSnip, tQuery, sError ) )
  1048. return false;
  1049. }
  1050. else if ( !sError.IsEmpty() )
  1051. return false;
  1052. JsonObj_c tSort = tRoot.GetItem("sort");
  1053. if ( tSort && !( tSort.IsArray() || tSort.IsObj() ) )
  1054. {
  1055. sError = "\"sort\" property value should be an array or an object";
  1056. return false;
  1057. }
  1058. if ( tSort )
  1059. {
  1060. bool bGotWeight = false;
  1061. if ( !ParseSort ( tSort, tQuery, bGotWeight, sError, sWarning ) )
  1062. return false;
  1063. JsonObj_c tTrackScore = tRoot.GetBoolItem ( "track_scores", sError, true );
  1064. if ( !sError.IsEmpty() )
  1065. return false;
  1066. bool bTrackScore = tTrackScore && tTrackScore.BoolVal();
  1067. if ( !bGotWeight && !bTrackScore )
  1068. tQuery.m_eRanker = SPH_RANK_NONE;
  1069. }
  1070. // source \ select filter
  1071. JsonObj_c tSelect = tRoot.GetItem("_source");
  1072. return !tSelect || ParseSelect ( tSelect, tQuery, sError );
  1073. }
  1074. bool ParseJsonInsert ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1075. {
  1076. tStmt.m_eStmt = bReplace ? STMT_REPLACE : STMT_INSERT;
  1077. if ( !ParseIndexId ( tRoot, tStmt, tDocId, sError ) )
  1078. return false;
  1079. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1080. return false;
  1081. tStmt.m_dInsertSchema.Add ( sphGetDocidName() );
  1082. SqlInsert_t & tId = tStmt.m_dInsertValues.Add();
  1083. tId.m_iType = sphGetTokTypeInt();
  1084. tId.m_iVal = tDocId;
  1085. // "doc" is optional
  1086. JsonObj_c tSource = tRoot.GetItem("doc");
  1087. if ( tSource )
  1088. {
  1089. for ( const auto & tItem : tSource )
  1090. {
  1091. tStmt.m_dInsertSchema.Add ( tItem.Name() );
  1092. tStmt.m_dInsertSchema.Last().ToLower();
  1093. SqlInsert_t & tNewValue = tStmt.m_dInsertValues.Add();
  1094. if ( tItem.IsStr() )
  1095. {
  1096. tNewValue.m_iType = sphGetTokTypeStr();
  1097. tNewValue.m_sVal = tItem.StrVal();
  1098. } else if ( tItem.IsDbl() )
  1099. {
  1100. tNewValue.m_iType = sphGetTokTypeFloat();
  1101. tNewValue.m_fVal = tItem.FltVal();
  1102. } else if ( tItem.IsInt() || tItem.IsBool() )
  1103. {
  1104. tNewValue.m_iType = sphGetTokTypeInt();
  1105. tNewValue.m_iVal = tItem.IntVal();
  1106. } else if ( tItem.IsArray() )
  1107. {
  1108. tNewValue.m_iType = sphGetTokTypeConstMVA();
  1109. tNewValue.m_pVals = new RefcountedVector_c<SphAttr_t>;
  1110. for ( const auto & tArrayItem : tItem )
  1111. {
  1112. if ( !tArrayItem.IsInt() )
  1113. {
  1114. sError = "MVA elements should be integers";
  1115. return false;
  1116. }
  1117. tNewValue.m_pVals->Add ( tArrayItem.IntVal() );
  1118. }
  1119. } else if ( tItem.IsObj() )
  1120. {
  1121. tNewValue.m_iType = sphGetTokTypeStr();
  1122. tNewValue.m_sVal = tItem.AsString();
  1123. } else
  1124. {
  1125. sError = "unsupported value type";
  1126. return false;
  1127. }
  1128. }
  1129. }
  1130. if ( !tStmt.CheckInsertIntegrity() )
  1131. {
  1132. sError = "wrong number of values";
  1133. return false;
  1134. }
  1135. return true;
  1136. }
  1137. bool sphParseJsonInsert ( const char * szInsert, SqlStmt_t & tStmt, DocID_t & tDocId, bool bReplace, CSphString & sError )
  1138. {
  1139. JsonObj_c tRoot ( szInsert );
  1140. return ParseJsonInsert ( tRoot, tStmt, tDocId, bReplace, sError );
  1141. }
  1142. static bool ParseUpdateDeleteQueries ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1143. {
  1144. tStmt.m_tQuery.m_sSelect = "*";
  1145. if ( !ParseIndex ( tRoot, tStmt, sError ) )
  1146. return false;
  1147. if ( !ParseCluster ( tRoot, tStmt, sError ) )
  1148. return false;
  1149. JsonObj_c tId = tRoot.GetIntItem ( "id", sError );
  1150. if ( tId )
  1151. {
  1152. CSphFilterSettings & tFilter = tStmt.m_tQuery.m_dFilters.Add();
  1153. tFilter.m_eType = SPH_FILTER_VALUES;
  1154. tFilter.m_dValues.Add ( tId.IntVal() );
  1155. tFilter.m_sAttrName = "@id";
  1156. tDocId = tId.IntVal();
  1157. }
  1158. // "query" is optional
  1159. JsonObj_c tQuery = tRoot.GetItem("query");
  1160. if ( tQuery && tId )
  1161. {
  1162. sError = R"(both "id" and "query" specified)";
  1163. return false;
  1164. }
  1165. CSphString sWarning; // fixme: add to results
  1166. return ParseJsonQueryFilters ( tQuery, tStmt.m_tQuery, sError, sWarning );
  1167. }
  1168. static bool ParseJsonUpdate ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1169. {
  1170. tStmt.m_eStmt = STMT_UPDATE;
  1171. tStmt.m_tUpdate.m_dRowOffset.Add ( 0 );
  1172. if ( !ParseUpdateDeleteQueries ( tRoot, tStmt, tDocId, sError ) )
  1173. return false;
  1174. JsonObj_c tSource = tRoot.GetObjItem ( "doc", sError );
  1175. if ( !tSource )
  1176. return false;
  1177. CSphVector<int64_t> dMVA;
  1178. for ( const auto & tItem : tSource )
  1179. {
  1180. bool bFloat = tItem.IsNum();
  1181. bool bInt = tItem.IsInt();
  1182. bool bBool = tItem.IsBool();
  1183. bool bString = tItem.IsStr();
  1184. bool bArray = tItem.IsArray();
  1185. bool bObject = tItem.IsObj();
  1186. if ( !bFloat && !bInt && !bBool && !bString && !bArray && !bObject )
  1187. {
  1188. sError = "unsupported value type";
  1189. return false;
  1190. }
  1191. CSphAttrUpdate & tUpd = tStmt.m_tUpdate;
  1192. CSphString sAttr = tItem.Name();
  1193. TypedAttribute_t & tTypedAttr = tUpd.m_dAttributes.Add();
  1194. tTypedAttr.m_sName = sAttr.ToLower();
  1195. if ( bInt || bBool )
  1196. {
  1197. int64_t iValue = tItem.IntVal();
  1198. tUpd.m_dPool.Add ( (DWORD)iValue );
  1199. auto uHi = (DWORD)( iValue>>32 );
  1200. if ( uHi )
  1201. {
  1202. tUpd.m_dPool.Add ( uHi );
  1203. tTypedAttr.m_eType = SPH_ATTR_BIGINT;
  1204. } else
  1205. tTypedAttr.m_eType = SPH_ATTR_INTEGER;
  1206. }
  1207. else if ( bFloat )
  1208. {
  1209. auto fValue = tItem.FltVal();
  1210. tUpd.m_dPool.Add ( sphF2DW ( fValue ) );
  1211. tTypedAttr.m_eType = SPH_ATTR_FLOAT;
  1212. }
  1213. else if ( bString || bObject )
  1214. {
  1215. CSphString sEncoded;
  1216. const char * szValue = tItem.SzVal();
  1217. if ( bObject )
  1218. {
  1219. sEncoded = tItem.AsString();
  1220. szValue = sEncoded.cstr();
  1221. }
  1222. auto iLength = (int) strlen ( szValue );
  1223. tUpd.m_dPool.Add ( tUpd.m_dBlobs.GetLength() );
  1224. tUpd.m_dPool.Add ( iLength );
  1225. if ( iLength )
  1226. {
  1227. BYTE * pBlob = tUpd.m_dBlobs.AddN ( iLength+2 ); // a couple of extra \0 for json parser to be happy
  1228. memcpy ( pBlob, szValue, iLength );
  1229. pBlob[iLength] = 0;
  1230. pBlob[iLength+1] = 0;
  1231. }
  1232. tTypedAttr.m_eType = SPH_ATTR_STRING;
  1233. } else if ( bArray )
  1234. {
  1235. dMVA.Resize ( 0 );
  1236. for ( const auto & tArrayItem : tItem )
  1237. {
  1238. if ( !tArrayItem.IsInt() )
  1239. {
  1240. sError = "MVA elements should be integers";
  1241. return false;
  1242. }
  1243. dMVA.Add ( tArrayItem.IntVal() );
  1244. }
  1245. dMVA.Uniq();
  1246. tUpd.m_dPool.Add ( dMVA.GetLength()*2 ); // as 64 bit stored into DWORD vector
  1247. tTypedAttr.m_eType = SPH_ATTR_UINT32SET;
  1248. for ( int64_t uVal : dMVA )
  1249. {
  1250. if ( uVal>UINT_MAX )
  1251. tTypedAttr.m_eType = SPH_ATTR_INT64SET;
  1252. *(( int64_t* ) tUpd.m_dPool.AddN ( 2 )) = uVal;
  1253. }
  1254. }
  1255. }
  1256. return true;
  1257. }
  1258. bool sphParseJsonUpdate ( const char * szUpdate, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1259. {
  1260. JsonObj_c tRoot ( szUpdate );
  1261. return ParseJsonUpdate ( tRoot, tStmt, tDocId, sError );
  1262. }
  1263. static bool ParseJsonDelete ( const JsonObj_c & tRoot, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1264. {
  1265. tStmt.m_eStmt = STMT_DELETE;
  1266. return ParseUpdateDeleteQueries ( tRoot, tStmt, tDocId, sError );
  1267. }
  1268. bool sphParseJsonDelete ( const char * szDelete, SqlStmt_t & tStmt, DocID_t & tDocId, CSphString & sError )
  1269. {
  1270. JsonObj_c tRoot ( szDelete );
  1271. return ParseJsonDelete ( tRoot, tStmt, tDocId, sError );
  1272. }
  1273. bool sphParseJsonStatement ( const char * szStmt, SqlStmt_t & tStmt, CSphString & sStmt, CSphString & sQuery, DocID_t & tDocId, CSphString & sError )
  1274. {
  1275. JsonObj_c tRoot ( szStmt );
  1276. if ( !tRoot )
  1277. {
  1278. sError.SetSprintf ( "unable to parse: %s", tRoot.GetErrorPtr() );
  1279. return false;
  1280. }
  1281. JsonObj_c tJsonStmt = tRoot[0];
  1282. if ( !tJsonStmt )
  1283. {
  1284. sError = "no statement found";
  1285. return false;
  1286. }
  1287. sStmt = tJsonStmt.Name();
  1288. if ( !tJsonStmt.IsObj() )
  1289. {
  1290. sError.SetSprintf ( "statement %s should be an object", sStmt.cstr() );
  1291. return false;
  1292. }
  1293. if ( sStmt=="index" || sStmt=="replace" )
  1294. {
  1295. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, true, sError ) )
  1296. return false;
  1297. } else if ( sStmt=="create" || sStmt=="insert" )
  1298. {
  1299. if ( !ParseJsonInsert ( tJsonStmt, tStmt, tDocId, false, sError ) )
  1300. return false;
  1301. } else if ( sStmt=="update" )
  1302. {
  1303. if ( !ParseJsonUpdate ( tJsonStmt, tStmt, tDocId, sError ) )
  1304. return false;
  1305. } else if ( sStmt=="delete" )
  1306. {
  1307. if ( !ParseJsonDelete ( tJsonStmt, tStmt, tDocId, sError ) )
  1308. return false;
  1309. } else
  1310. {
  1311. sError.SetSprintf ( "unknown bulk operation: %s", sStmt.cstr() );
  1312. return false;
  1313. }
  1314. sQuery = tJsonStmt.AsString();
  1315. return true;
  1316. }
  1317. //////////////////////////////////////////////////////////////////////////
  1318. static void PackedShortMVA2Json ( StringBuilder_c& tOut, const BYTE * pMVA )
  1319. {
  1320. int iLengthBytes = sphUnpackPtrAttr ( pMVA, &pMVA );
  1321. int nValues = iLengthBytes / sizeof ( DWORD );
  1322. auto pValues = ( const DWORD * ) pMVA;
  1323. for ( int i = 0; i<nValues; ++i )
  1324. tOut.NtoA(pValues[i]);
  1325. }
  1326. static void PackedWideMVA2Json ( StringBuilder_c &tOut, const BYTE * pMVA )
  1327. {
  1328. int iLengthBytes = sphUnpackPtrAttr ( pMVA, &pMVA );
  1329. int nValues = iLengthBytes / sizeof ( int64_t );
  1330. auto pValues = ( const int64_t * ) pMVA;
  1331. for ( int i = 0; i<nValues; ++i )
  1332. tOut.NtoA(pValues[i]);
  1333. }
  1334. static void JsonObjAddAttr ( JsonEscapedBuilder & tOut, const AggrResult_t &tRes, ESphAttr eAttrType, const char * szCol,
  1335. const CSphMatch &tMatch, const CSphAttrLocator &tLoc )
  1336. {
  1337. assert ( sphPlainAttrToPtrAttr ( eAttrType )==eAttrType );
  1338. tOut.AppendName(szCol);
  1339. switch ( eAttrType )
  1340. {
  1341. case SPH_ATTR_INTEGER:
  1342. case SPH_ATTR_TIMESTAMP:
  1343. case SPH_ATTR_TOKENCOUNT:
  1344. case SPH_ATTR_BIGINT:
  1345. tOut.NtoA ( tMatch.GetAttr(tLoc) );
  1346. break;
  1347. case SPH_ATTR_FLOAT:
  1348. tOut.FtoA ( tMatch.GetAttrFloat(tLoc) );
  1349. break;
  1350. case SPH_ATTR_BOOL:
  1351. tOut << ( tMatch.GetAttr ( tLoc ) ? "true" : "false" );
  1352. break;
  1353. case SPH_ATTR_UINT32SET_PTR:
  1354. case SPH_ATTR_INT64SET_PTR:
  1355. {
  1356. tOut.StartBlock( ",", "[", "]" );
  1357. const BYTE * pMVA = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1358. if ( eAttrType==SPH_ATTR_UINT32SET_PTR )
  1359. PackedShortMVA2Json ( tOut, pMVA );
  1360. else
  1361. PackedWideMVA2Json ( tOut, pMVA );
  1362. tOut.FinishBlock(false);
  1363. }
  1364. break;
  1365. case SPH_ATTR_STRINGPTR:
  1366. {
  1367. const auto * pString = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1368. int iLen = sphUnpackPtrAttr ( pString, &pString );
  1369. // special process for legacy typed strings
  1370. if ( pString && iLen>1 && pString[iLen-2]=='\0')
  1371. {
  1372. auto uSubtype = pString[iLen-1];
  1373. iLen -= 2;
  1374. switch ( uSubtype)
  1375. {
  1376. case 1: // ql
  1377. {
  1378. ScopedComma_c sBrackets ( tOut, nullptr, R"({"ql":)", "}" );
  1379. tOut.AppendEscaped (( const char* ) pString, EscBld::eEscape, iLen );
  1380. break;
  1381. }
  1382. case 0: // json
  1383. tOut << ( const char* ) pString;
  1384. break;
  1385. default:
  1386. tOut.Sprintf ("\"internal error! wrong subtype of stringptr %d\"", uSubtype );
  1387. }
  1388. break;
  1389. }
  1390. tOut.AppendEscaped ( ( const char * ) pString, EscBld::eEscape, iLen );
  1391. }
  1392. break;
  1393. case SPH_ATTR_JSON_PTR:
  1394. {
  1395. const BYTE * pJSON = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1396. sphUnpackPtrAttr ( pJSON, &pJSON );
  1397. // no object at all? return NULL
  1398. if ( !pJSON )
  1399. {
  1400. tOut << "null";
  1401. break;
  1402. }
  1403. sphJsonFormat ( tOut, pJSON );
  1404. }
  1405. break;
  1406. case SPH_ATTR_FACTORS:
  1407. case SPH_ATTR_FACTORS_JSON:
  1408. {
  1409. const BYTE * pFactors = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1410. sphUnpackPtrAttr ( pFactors, &pFactors );
  1411. if ( pFactors )
  1412. sphFormatFactors ( tOut, ( const unsigned int * ) pFactors, true );
  1413. else
  1414. tOut << "null";
  1415. }
  1416. break;
  1417. case SPH_ATTR_JSON_FIELD_PTR:
  1418. {
  1419. const BYTE * pField = ( const BYTE * ) tMatch.GetAttr ( tLoc );
  1420. sphUnpackPtrAttr ( pField, &pField );
  1421. if ( !pField )
  1422. {
  1423. tOut << "null";
  1424. break;
  1425. }
  1426. auto eJson = ESphJsonType ( *pField++ );
  1427. if ( eJson==JSON_NULL )
  1428. tOut << "null";
  1429. else
  1430. sphJsonFieldFormat ( tOut, pField, eJson, true );
  1431. }
  1432. break;
  1433. default:
  1434. assert ( 0 && "Unknown attribute" );
  1435. break;
  1436. }
  1437. }
  1438. static bool IsHighlightAttr ( const CSphString & sName )
  1439. {
  1440. return sName.Begins ( g_szHighlight );
  1441. }
  1442. static bool NeedToSkipAttr ( const CSphString & sName, const CSphQuery & tQuery )
  1443. {
  1444. const char * szName = sName.cstr();
  1445. if ( szName[0]=='i' && szName[1]=='d' && szName[2]=='\0' ) return true;
  1446. if ( sName.Begins ( g_szHighlight ) ) return true;
  1447. if ( sName.Begins ( g_szFilter ) ) return true;
  1448. if ( sName.Begins ( g_szOrder ) ) return true;
  1449. if ( !tQuery.m_dIncludeItems.GetLength() && !tQuery.m_dExcludeItems.GetLength () )
  1450. return false;
  1451. // empty include - shows all select list items
  1452. // exclude with only "*" - skip all select list items
  1453. bool bInclude = ( tQuery.m_dIncludeItems.GetLength()==0 );
  1454. for ( const auto &iItem: tQuery.m_dIncludeItems )
  1455. {
  1456. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1457. {
  1458. bInclude = true;
  1459. break;
  1460. }
  1461. }
  1462. if ( bInclude && tQuery.m_dExcludeItems.GetLength() )
  1463. {
  1464. for ( const auto& iItem: tQuery.m_dExcludeItems )
  1465. {
  1466. if ( sphWildcardMatch ( szName, iItem.cstr() ) )
  1467. {
  1468. bInclude = false;
  1469. break;
  1470. }
  1471. }
  1472. }
  1473. return !bInclude;
  1474. }
  1475. static void EncodeHighlight ( const CSphMatch & tMatch, int iAttr, const ISphSchema & tSchema, JsonEscapedBuilder & tOut )
  1476. {
  1477. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1478. ScopedComma_c tHighlightComma ( tOut, ",", R"("highlight":{)", "}", false );
  1479. auto pData = (const BYTE *)tMatch.GetAttr ( tCol.m_tLocator );
  1480. int iLength = sphUnpackPtrAttr ( pData, &pData );
  1481. SnippetResult_t tRes;
  1482. UnpackSnippetData ( pData, iLength, tRes );
  1483. for ( const auto & tField : tRes.m_dFields )
  1484. {
  1485. tOut.AppendName ( tField.m_sName.cstr() );
  1486. ScopedComma_c tHighlight ( tOut, ",", "[", "]", false );
  1487. // we might want to add passage separators to field text here
  1488. for ( const auto & tPassage : tField.m_dPassages )
  1489. tOut.AppendEscaped ( (const char *)tPassage.m_dText.Begin(), EscBld::eEscape, tPassage.m_dText.GetLength() );
  1490. }
  1491. }
  1492. CSphString sphEncodeResultJson ( const AggrResult_t & tRes, const CSphQuery & tQuery, CSphQueryProfile * pProfile )
  1493. {
  1494. JsonEscapedBuilder tOut;
  1495. CSphString sResult;
  1496. if ( !tRes.m_iSuccesses )
  1497. {
  1498. tOut.StartBlock ( nullptr, R"({"error":{"type":"Error","reason":)", "}}" );
  1499. tOut.AppendEscaped ( tRes.m_sError.cstr (), EscBld::eEscape );
  1500. tOut.FinishBlock (false);
  1501. tOut.MoveTo (sResult); // since simple return tOut.cstr() will cause copy of string, then returning it.
  1502. return sResult;
  1503. }
  1504. tOut.StartBlock( ",", "{", "}" );
  1505. tOut.Sprintf (R"("took":%d,"timed_out":false)", tRes.m_iQueryTime);
  1506. if ( !tRes.m_sWarning.IsEmpty() )
  1507. {
  1508. tOut.StartBlock ( nullptr, R"("warning":{"reason":)", "}" );
  1509. tOut.AppendEscaped ( tRes.m_sWarning.cstr (), EscBld::eEscape );
  1510. tOut.FinishBlock ( false );
  1511. }
  1512. auto sHitMeta = tOut.StartBlock ( ",", R"("hits":{)", "}" );
  1513. tOut.Sprintf ( R"("total":%d)", tRes.m_iTotalMatches );
  1514. const ISphSchema & tSchema = tRes.m_tSchema;
  1515. CSphVector<BYTE> dTmp;
  1516. CSphBitvec tAttrsToSend;
  1517. sphGetAttrsToSend ( tSchema, false, true, tAttrsToSend );
  1518. int iHighlightAttr = -1;
  1519. int nSchemaAttrs = tSchema.GetAttrsCount();
  1520. CSphBitvec dSkipAttrs ( nSchemaAttrs );
  1521. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  1522. {
  1523. if ( !tAttrsToSend.BitGet(iAttr) )
  1524. continue;
  1525. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1526. const char * szName = tCol.m_sName.cstr();
  1527. if ( IsHighlightAttr(szName) )
  1528. iHighlightAttr = iAttr;
  1529. if ( NeedToSkipAttr ( szName, tQuery ) )
  1530. dSkipAttrs.BitSet ( iAttr );
  1531. }
  1532. tOut.StartBlock ( ",", R"("hits":[)", "]" );
  1533. const CSphColumnInfo * pId = tSchema.GetAttr ( sphGetDocidName() );
  1534. for ( int iMatch=tRes.m_iOffset; iMatch<tRes.m_iOffset+tRes.m_iCount; ++iMatch )
  1535. {
  1536. const CSphMatch & tMatch = tRes.m_dMatches[iMatch];
  1537. ScopedComma_c sQueryComma ( tOut, ",", "{", "}" );
  1538. // note, that originally there is string UID, so we just output number in quotes for docid here
  1539. if ( pId )
  1540. {
  1541. DocID_t tDocID = tMatch.GetAttr ( pId->m_tLocator );
  1542. tOut.Sprintf ( R"("_id":"%l","_score":%d)", tDocID, tMatch.m_iWeight );
  1543. }
  1544. else
  1545. tOut.Sprintf ( R"("_score":%d)", tMatch.m_iWeight );
  1546. tOut.StartBlock ( ",", "\"_source\":{", "}");
  1547. for ( int iAttr=0; iAttr<nSchemaAttrs; iAttr++ )
  1548. {
  1549. if ( !tAttrsToSend.BitGet(iAttr) )
  1550. continue;
  1551. if ( dSkipAttrs.BitGet ( iAttr ) )
  1552. continue;
  1553. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1554. const char * sName = tCol.m_sName.cstr();
  1555. JsonObjAddAttr ( tOut, tRes, tCol.m_eAttrType, sName, tMatch, tCol.m_tLocator );
  1556. }
  1557. tOut.FinishBlock ( false ); // _source obj
  1558. if ( iHighlightAttr!=-1 )
  1559. EncodeHighlight ( tMatch, iHighlightAttr, tSchema, tOut );
  1560. }
  1561. tOut.FinishBlocks ( sHitMeta, false ); // hits array, hits meta
  1562. if ( pProfile )
  1563. {
  1564. const char * sProfileResult = pProfile->GetResultAsStr();
  1565. // FIXME: result can be empty if we run a fullscan
  1566. if ( sProfileResult && strlen ( sProfileResult ) )
  1567. tOut.Sprintf ( R"("profile":{"query":%s})", sProfileResult );
  1568. else
  1569. tOut << R"("profile":null)";
  1570. }
  1571. tOut.FinishBlocks (); tOut.MoveTo ( sResult ); return sResult;
  1572. }
  1573. JsonObj_c sphEncodeInsertResultJson ( const char * szIndex, bool bReplace, DocID_t tDocId )
  1574. {
  1575. JsonObj_c tObj;
  1576. tObj.AddStr ( "_index", szIndex );
  1577. tObj.AddInt ( "_id", tDocId );
  1578. tObj.AddBool ( "created", !bReplace );
  1579. tObj.AddStr ( "result", bReplace ? "updated" : "created" );
  1580. tObj.AddInt ( "status", bReplace ? 200 : 201 );
  1581. return tObj;
  1582. }
  1583. JsonObj_c sphEncodeUpdateResultJson ( const char * szIndex, DocID_t tDocId, int iAffected )
  1584. {
  1585. JsonObj_c tObj;
  1586. tObj.AddStr ( "_index", szIndex );
  1587. if ( !tDocId )
  1588. tObj.AddInt ( "updated", iAffected );
  1589. else
  1590. {
  1591. tObj.AddInt ( "_id", tDocId );
  1592. tObj.AddStr ( "result", iAffected ? "updated" : "noop" );
  1593. }
  1594. return tObj;
  1595. }
  1596. JsonObj_c sphEncodeDeleteResultJson ( const char * szIndex, DocID_t tDocId, int iAffected )
  1597. {
  1598. JsonObj_c tObj;
  1599. tObj.AddStr ( "_index", szIndex );
  1600. if ( !tDocId )
  1601. tObj.AddInt ( "deleted", iAffected );
  1602. else
  1603. {
  1604. tObj.AddInt ( "_id", tDocId );
  1605. tObj.AddBool ( "found", !!iAffected );
  1606. tObj.AddStr ( "result", iAffected ? "deleted" : "not found" );
  1607. }
  1608. return tObj;
  1609. }
  1610. JsonObj_c sphEncodeInsertErrorJson ( const char * szIndex, const char * szError )
  1611. {
  1612. JsonObj_c tObj, tErr;
  1613. tErr.AddStr ( "type", szError );
  1614. tErr.AddStr ( "index", szIndex );
  1615. tObj.AddItem ( "error", tErr );
  1616. tObj.AddInt ( "status", 500 );
  1617. return tObj;
  1618. }
  1619. bool sphGetResultStats ( const char * szResult, int & iAffected, int & iWarnings, bool bUpdate )
  1620. {
  1621. JsonObj_c tJsonRoot ( szResult );
  1622. if ( !tJsonRoot )
  1623. return false;
  1624. // no warnings in json results for now
  1625. iWarnings = 0;
  1626. if ( tJsonRoot.HasItem("error") )
  1627. {
  1628. iAffected = 0;
  1629. return true;
  1630. }
  1631. // its either update or delete
  1632. CSphString sError;
  1633. JsonObj_c tAffected = tJsonRoot.GetIntItem ( bUpdate ? "updated" : "deleted", sError );
  1634. if ( tAffected )
  1635. {
  1636. iAffected = tAffected.IntVal();
  1637. return true;
  1638. }
  1639. // it was probably a query with an "_id"
  1640. JsonObj_c tId = tJsonRoot.GetIntItem ( "_id", sError );
  1641. if ( tId )
  1642. {
  1643. iAffected = 1;
  1644. return true;
  1645. }
  1646. return false;
  1647. }
  1648. void AddAccessSpecs ( JsonEscapedBuilder &tOut, const XQNode_t * pNode, const CSphSchema &tSchema, const StrVec_t &dZones )
  1649. {
  1650. assert ( pNode );
  1651. // dump spec for keyword nodes
  1652. // FIXME? double check that spec does *not* affect non keyword nodes
  1653. if ( pNode->m_dSpec.IsEmpty () || !pNode->m_dWords.GetLength () )
  1654. return;
  1655. const XQLimitSpec_t &tSpec = pNode->m_dSpec;
  1656. if ( tSpec.m_bFieldSpec && !tSpec.m_dFieldMask.TestAll ( true ) )
  1657. {
  1658. ScopedComma_c sFieldsArray ( tOut, ",", "\"fields\":[", "]" );
  1659. for ( int i = 0; i<tSchema.GetFieldsCount (); ++i )
  1660. if ( tSpec.m_dFieldMask.Test ( i ) )
  1661. tOut.AppendEscaped ( tSchema.GetFieldName ( i ), EscBld::eEscape );
  1662. }
  1663. tOut.Sprintf ( "\"max_field_pos\":%d", tSpec.m_iFieldMaxPos );
  1664. if ( !tSpec.m_dZones.IsEmpty () )
  1665. {
  1666. ScopedComma_c sZoneDelim ( tOut, ",", tSpec.m_bZoneSpan ? "\"zonespans\":[" : "\"zones\":[", "]" );
  1667. for ( int iZone : tSpec.m_dZones )
  1668. tOut.AppendEscaped ( dZones[iZone].cstr(), EscBld::eEscape );
  1669. }
  1670. }
  1671. void CreateKeywordNode ( JsonEscapedBuilder & tOut, const XQKeyword_t &tKeyword )
  1672. {
  1673. ScopedComma_c sRoot ( tOut, ",", "{", "}");
  1674. tOut << R"("type":"KEYWORD")";
  1675. tOut << "\"word\":"; tOut.AppendEscaped ( tKeyword.m_sWord.cstr (), EscBld::eEscape | EscBld::eSkipComma );
  1676. tOut.Sprintf ( R"("querypos":%d)", tKeyword.m_iAtomPos);
  1677. if ( tKeyword.m_bExcluded )
  1678. tOut << R"("excluded":true)";
  1679. if ( tKeyword.m_bExpanded )
  1680. tOut << R"("expanded":true)";
  1681. if ( tKeyword.m_bFieldStart )
  1682. tOut << R"("field_start":true)";
  1683. if ( tKeyword.m_bFieldEnd )
  1684. tOut << R"("field_end":true)";
  1685. if ( tKeyword.m_bMorphed )
  1686. tOut << R"("morphed":true)";
  1687. if ( tKeyword.m_fBoost!=1.0f ) // really comparing floats?
  1688. tOut.Sprintf ( R"("boost":%f)", tKeyword.m_fBoost) ;
  1689. }
  1690. void sphBuildProfileJson ( JsonEscapedBuilder &tOut, const XQNode_t * pNode, const CSphSchema &tSchema, const StrVec_t &dZones )
  1691. {
  1692. assert ( pNode );
  1693. auto dRootBlock = tOut.StartBlock ( ",", "{", "}" );
  1694. CSphString sNodeName ( sphXQNodeToStr ( pNode ) );
  1695. tOut << "\"type\":"; tOut.AppendEscaped ( sNodeName.cstr (), EscBld::eEscape | EscBld::eSkipComma );
  1696. CSphString sDescription ( sphExplainQueryBrief ( pNode, tSchema ) );
  1697. tOut << "\"description\":"; tOut.AppendEscaped ( sDescription.cstr (), EscBld::eEscape | EscBld::eSkipComma );
  1698. CSphString sNodeOptions ( sphXQNodeGetExtraStr ( pNode ) );
  1699. if ( !sNodeOptions.IsEmpty () )
  1700. {
  1701. tOut << "\"options\":"; tOut.AppendEscaped ( sNodeOptions.cstr (), EscBld::eEscape | EscBld::eSkipComma );
  1702. }
  1703. AddAccessSpecs ( tOut, pNode, tSchema, dZones );
  1704. tOut.StartBlock ( ",", "\"children\":[", "]" );
  1705. if ( pNode->m_dChildren.GetLength () )
  1706. {
  1707. for ( const auto& i : pNode->m_dChildren )
  1708. sphBuildProfileJson ( tOut, i, tSchema, dZones );
  1709. } else
  1710. {
  1711. for ( const auto& i : pNode->m_dWords )
  1712. CreateKeywordNode ( tOut, i );
  1713. }
  1714. tOut.FinishBlocks ( dRootBlock );
  1715. }
  1716. //////////////////////////////////////////////////////////////////////////
  1717. // Highlight
  1718. struct SnippetOptions_t
  1719. {
  1720. CSphString m_sQuery;
  1721. CSphVector<CSphString> m_dFields;
  1722. };
  1723. static void FormatSnippetOpts ( const SnippetOptions_t & tOpts, const SnippetQuerySettings_t & tSnippetQuery, CSphQuery & tQuery )
  1724. {
  1725. StringBuilder_c sItem;
  1726. sItem << "HIGHLIGHT(";
  1727. sItem << tSnippetQuery.AsString();
  1728. sItem << ",";
  1729. if ( tOpts.m_dFields.GetLength() )
  1730. {
  1731. sItem.StartBlock ( ",", "'", "'" );
  1732. for ( const auto & i : tOpts.m_dFields )
  1733. sItem << i;
  1734. sItem.FinishBlock(false);
  1735. }
  1736. else
  1737. sItem << "''";
  1738. if ( !tOpts.m_sQuery.IsEmpty() )
  1739. sItem.Appendf ( ",'%s'", tOpts.m_sQuery.cstr() );
  1740. sItem << ")";
  1741. CSphQueryItem & tItem = tQuery.m_dItems.Add();
  1742. tItem.m_sExpr = sItem.cstr ();
  1743. tItem.m_sAlias.SetSprintf ( "%s", g_szHighlight );
  1744. }
  1745. static bool ParseSnippetFields ( const JsonObj_c & tSnip, SnippetOptions_t & tOpts, CSphString & sError )
  1746. {
  1747. JsonObj_c tFields = tSnip.GetArrayItem ( "fields", sError, true );
  1748. if ( !tFields && !sError.IsEmpty() )
  1749. return false;
  1750. if ( !tFields )
  1751. return true;
  1752. tOpts.m_dFields.Reserve ( tFields.Size() );
  1753. for ( const auto & tField : tFields )
  1754. {
  1755. if ( !tField.IsStr() )
  1756. {
  1757. sError.SetSprintf ( "\"%s\" field should be an string", tField.Name() );
  1758. return false;
  1759. }
  1760. tOpts.m_dFields.Add ( tField.StrVal() );
  1761. }
  1762. return true;
  1763. }
  1764. static bool ParseSnippetOptsElastic ( const JsonObj_c & tSnip, SnippetOptions_t & tOpts, SnippetQuerySettings_t & tQuery, CSphString & sError )
  1765. {
  1766. JsonObj_c tEncoder = tSnip.GetStrItem ( "encoder", sError, true );
  1767. if ( tEncoder )
  1768. {
  1769. if ( tEncoder.StrVal()=="html" )
  1770. tQuery.m_sStripMode = "retain";
  1771. }
  1772. else if ( !sError.IsEmpty() )
  1773. return false;
  1774. JsonObj_c tHlQuery = tSnip.GetObjItem ( "highlight_query", sError, true );
  1775. if ( tHlQuery )
  1776. tOpts.m_sQuery = tHlQuery.AsString();
  1777. else if ( !sError.IsEmpty() )
  1778. return false;
  1779. if ( !tSnip.FetchStrItem ( tQuery.m_sBeforeMatch, "pre_tags", sError, true ) )
  1780. return false;
  1781. if ( !tSnip.FetchStrItem ( tQuery.m_sAfterMatch, "post_tags", sError, true ) )
  1782. return false;
  1783. int iNoMatch = 0;
  1784. if ( !tSnip.FetchIntItem ( iNoMatch, "no_match_size", sError, true ) )
  1785. return false;
  1786. if ( iNoMatch<1 )
  1787. tQuery.m_bAllowEmpty = true;
  1788. JsonObj_c tOrder = tSnip.GetStrItem ( "order", sError, true );
  1789. if ( tOrder )
  1790. tQuery.m_bWeightOrder = tOrder.StrVal()=="score";
  1791. else if ( !sError.IsEmpty() )
  1792. return false;
  1793. if ( !tSnip.FetchIntItem ( tQuery.m_iLimit, "fragment_size", sError, true ) )
  1794. return false;
  1795. if ( !tSnip.FetchIntItem ( tQuery.m_iLimitPassages, "number_of_fragments", sError, true ) )
  1796. return false;
  1797. return true;
  1798. }
  1799. static bool ParseSnippetOptsSphinx ( const JsonObj_c & tSnip, SnippetQuerySettings_t & tOpt, CSphString & sError )
  1800. {
  1801. if ( !tSnip.FetchStrItem ( tOpt.m_sBeforeMatch, "before_match", sError, true ) ) return false;
  1802. if ( !tSnip.FetchStrItem ( tOpt.m_sAfterMatch, "after_match", sError, true ) ) return false;
  1803. if ( !tSnip.FetchIntItem ( tOpt.m_iLimit, "limit", sError, true ) ) return false;
  1804. if ( !tSnip.FetchIntItem ( tOpt.m_iAround, "around", sError, true ) ) return false;
  1805. if ( !tSnip.FetchBoolItem ( tOpt.m_bUseBoundaries, "use_boundaries", sError, true ) ) return false;
  1806. if ( !tSnip.FetchBoolItem ( tOpt.m_bWeightOrder, "weight_order", sError, true ) ) return false;
  1807. if ( !tSnip.FetchBoolItem ( tOpt.m_bForceAllWords, "force_all_words", sError, true ) ) return false;
  1808. if ( !tSnip.FetchIntItem ( tOpt.m_iLimitPassages, "limit_passages", sError, true ) ) return false;
  1809. if ( !tSnip.FetchIntItem ( tOpt.m_iLimitWords, "limit_words", sError, true ) ) return false;
  1810. if ( !tSnip.FetchStrItem ( tOpt.m_sStripMode, "html_strip_mode", sError, true ) ) return false;
  1811. if ( !tSnip.FetchBoolItem ( tOpt.m_bAllowEmpty, "allow_empty", sError, true ) ) return false;
  1812. if ( !tSnip.FetchBoolItem ( tOpt.m_bEmitZones, "emit_zones", sError, true ) ) return false;
  1813. if ( !tSnip.FetchBoolItem ( tOpt.m_bForcePassages, "force_passages", sError, true ) ) return false;
  1814. if ( !tSnip.FetchBoolItem ( tOpt.m_bPackFields, "pack_fields", sError, true ) ) return false;
  1815. JsonObj_c tBoundary = tSnip.GetStrItem ( "passage_boundary", sError, true );
  1816. if ( tBoundary )
  1817. tOpt.m_ePassageSPZ = GetPassageBoundary ( tBoundary.StrVal() );
  1818. else if ( !sError.IsEmpty() )
  1819. return false;
  1820. return true;
  1821. }
  1822. static bool ParseSnippet ( const JsonObj_c & tSnip, CSphQuery & tQuery, CSphString & sError )
  1823. {
  1824. SnippetOptions_t tOpts;
  1825. SnippetQuerySettings_t tOptsSphinx;
  1826. tOptsSphinx.m_bJsonQuery = true;
  1827. tOptsSphinx.m_bPackFields = true;
  1828. if ( !ParseSnippetFields ( tSnip, tOpts, sError ) )
  1829. return false;
  1830. // elastic-style options
  1831. if ( !ParseSnippetOptsElastic ( tSnip, tOpts, tOptsSphinx, sError ) )
  1832. return false;
  1833. // sphinx-style options
  1834. if ( !ParseSnippetOptsSphinx ( tSnip, tOptsSphinx, sError ) )
  1835. return false;
  1836. FormatSnippetOpts ( tOpts, tOptsSphinx, tQuery );
  1837. return true;
  1838. }
  1839. //////////////////////////////////////////////////////////////////////////
  1840. // Sort
  1841. struct SortField_t : public GeoDistInfo_c
  1842. {
  1843. CSphString m_sName;
  1844. CSphString m_sMode;
  1845. bool m_bAsc {true};
  1846. };
  1847. static void FormatSortBy ( const CSphVector<SortField_t> & dSort, CSphQuery & tQuery, bool & bGotWeight )
  1848. {
  1849. StringBuilder_c sSortBuf;
  1850. Comma_c sComma ({", ",2});
  1851. for ( const SortField_t &tItem : dSort )
  1852. {
  1853. const char * sSort = ( tItem.m_bAsc ? " asc" : " desc" );
  1854. if ( tItem.IsGeoDist() )
  1855. {
  1856. // ORDER BY statement
  1857. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  1858. // query item
  1859. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  1860. tQueryItem.m_sExpr = tItem.BuildExprString();
  1861. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  1862. // select list
  1863. StringBuilder_c sTmp;
  1864. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  1865. sTmp.MoveTo ( tQuery.m_sSelect );
  1866. } else if ( tItem.m_sMode.IsEmpty() )
  1867. {
  1868. // sort by attribute or weight
  1869. sSortBuf << sComma << ( tItem.m_sName=="_score" ? "@weight" : tItem.m_sName ) << sSort;
  1870. bGotWeight |= ( tItem.m_sName=="_score" );
  1871. } else
  1872. {
  1873. // sort by MVA
  1874. // ORDER BY statement
  1875. sSortBuf << sComma << g_szOrder << tItem.m_sName << sSort;
  1876. // query item
  1877. StringBuilder_c sTmp;
  1878. sTmp << ( tItem.m_sMode=="min" ? "least" : "greatest" ) << "(" << tItem.m_sName << ")";
  1879. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  1880. sTmp.MoveTo (tQueryItem.m_sExpr);
  1881. tQueryItem.m_sAlias.SetSprintf ( "%s%s", g_szOrder, tItem.m_sName.cstr() );
  1882. // select list
  1883. sTmp << tQuery.m_sSelect << ", " << tQueryItem.m_sExpr << " as " << tQueryItem.m_sAlias;
  1884. sTmp.MoveTo ( tQuery.m_sSelect );
  1885. }
  1886. }
  1887. if ( !dSort.GetLength() )
  1888. {
  1889. sSortBuf += "@weight desc";
  1890. bGotWeight = true;
  1891. }
  1892. tQuery.m_eSort = SPH_SORT_EXTENDED;
  1893. sSortBuf.MoveTo ( tQuery.m_sSortBy );
  1894. }
  1895. static bool ParseSort ( const JsonObj_c & tSort, CSphQuery & tQuery, bool & bGotWeight, CSphString & sError, CSphString & sWarning )
  1896. {
  1897. bGotWeight = false;
  1898. // unsupported options
  1899. if ( tSort.HasItem("_script") )
  1900. {
  1901. sError = "\"_script\" property not supported";
  1902. return false;
  1903. }
  1904. CSphVector<SortField_t> dSort;
  1905. dSort.Reserve ( tSort.Size() );
  1906. for ( const auto & tItem : tSort )
  1907. {
  1908. CSphString sName = tItem.Name();
  1909. bool bString = tItem.IsStr();
  1910. bool bObj = tItem.IsObj();
  1911. if ( !bString && !bObj )
  1912. {
  1913. sError.SetSprintf ( R"("sort" property "%s" should be a string or an object)", sName.scstr() );
  1914. return false;
  1915. }
  1916. if ( bObj && tItem.Size()!=1 )
  1917. {
  1918. sError.SetSprintf ( R"("sort" property "%s" should be an object)", sName.scstr() );
  1919. return false;
  1920. }
  1921. // [ "attr_name" ]
  1922. if ( bString )
  1923. {
  1924. SortField_t & tSortField = dSort.Add();
  1925. tSortField.m_sName = tItem.StrVal();
  1926. // order defaults to desc when sorting on the _score, and defaults to asc when sorting on anything else
  1927. tSortField.m_bAsc = ( tSortField.m_sName!="_score" );
  1928. continue;
  1929. }
  1930. JsonObj_c tSortItem = tItem[0];
  1931. if ( !tSortItem )
  1932. {
  1933. sError = R"(invalid "sort" property item)";
  1934. return false;
  1935. }
  1936. bool bSortString = tSortItem.IsStr();
  1937. bool bSortObj = tSortItem.IsObj();
  1938. CSphString sSortName = tSortItem.Name();
  1939. if ( ( !bSortString && !bSortObj ) || !tSortItem.Name() || ( bSortString && !tSortItem.SzVal() ) )
  1940. {
  1941. sError.SetSprintf ( R"("sort" property 0("%s") should be %s)", sSortName.scstr(), ( bSortObj ? "a string" : "an object" ) );
  1942. return false;
  1943. }
  1944. // [ { "attr_name" : "sort_mode" } ]
  1945. if ( bSortString )
  1946. {
  1947. CSphString sOrder = tSortItem.StrVal();
  1948. if ( sOrder!="asc" && sOrder!="desc" )
  1949. {
  1950. sError.SetSprintf ( R"("sort" property "%s" order is invalid %s)", sSortName.scstr(), sOrder.cstr() );
  1951. return false;
  1952. }
  1953. SortField_t & tItem = dSort.Add();
  1954. tItem.m_sName = sSortName;
  1955. tItem.m_bAsc = ( sOrder=="asc" );
  1956. continue;
  1957. }
  1958. // [ { "attr_name" : { "order" : "sort_mode" } } ]
  1959. SortField_t & tSortField = dSort.Add();
  1960. tSortField.m_sName = sSortName;
  1961. JsonObj_c tAttrItems = tSortItem.GetItem("order");
  1962. if ( tAttrItems )
  1963. {
  1964. if ( !tAttrItems.IsStr() )
  1965. {
  1966. sError.SetSprintf ( R"("sort" property "%s" order is invalid)", tAttrItems.Name() );
  1967. return false;
  1968. }
  1969. CSphString sOrder = tAttrItems.StrVal();
  1970. tSortField.m_bAsc = ( sOrder=="asc" );
  1971. }
  1972. JsonObj_c tMode = tSortItem.GetItem("mode");
  1973. if ( tMode )
  1974. {
  1975. if ( tAttrItems && !tMode.IsStr() )
  1976. {
  1977. sError.SetSprintf ( R"("mode" property "%s" order is invalid)", tAttrItems.Name() );
  1978. return false;
  1979. }
  1980. CSphString sMode = tMode.StrVal();
  1981. if ( sMode!="min" && sMode!="max" )
  1982. {
  1983. sError.SetSprintf ( R"("mode" supported are "min" and "max", got "%s", not supported)", sMode.cstr() );
  1984. return false;
  1985. }
  1986. tSortField.m_sMode = sMode;
  1987. }
  1988. // geodist
  1989. if ( tSortField.m_sName=="_geo_distance" )
  1990. {
  1991. if ( tMode )
  1992. {
  1993. sError = R"("mode" property not supported with "_geo_distance")";
  1994. return false;
  1995. }
  1996. if ( tSortItem.HasItem("unit") )
  1997. {
  1998. sError = R"("unit" property not supported with "_geo_distance")";
  1999. return false;
  2000. }
  2001. if ( !tSortField.Parse ( tSortItem, false, sError, sWarning ) )
  2002. return false;
  2003. }
  2004. // unsupported options
  2005. const char * dUnsupported[] = { "unmapped_type", "missing", "nested_path", "nested_filter"};
  2006. for ( auto szOption : dUnsupported )
  2007. if ( tSortItem.HasItem(szOption) )
  2008. {
  2009. sError.SetSprintf ( R"("%s" property not supported)", szOption );
  2010. return false;
  2011. }
  2012. }
  2013. FormatSortBy ( dSort, tQuery, bGotWeight );
  2014. return true;
  2015. }
  2016. static bool ParseLatLon ( const JsonObj_c & tLat, const JsonObj_c & tLon, LocationField_t * pField, LocationSource_t * pSource, CSphString & sError )
  2017. {
  2018. if ( !tLat || !tLon )
  2019. {
  2020. if ( !tLat && !tLon )
  2021. sError = R"("lat" and "lon" properties missing)";
  2022. else
  2023. sError.SetSprintf ( R"("%s" property missing)", ( !tLat ? "lat" : "lon" ) );
  2024. return false;
  2025. }
  2026. bool bParseField = !!pField;
  2027. bool bLatChecked = bParseField ? tLat.IsNum() : tLat.IsStr();
  2028. bool bLonChecked = bParseField ? tLon.IsNum() : tLon.IsStr();
  2029. if ( !bLatChecked || !bLonChecked )
  2030. {
  2031. if ( !bLatChecked && !bLonChecked )
  2032. sError.SetSprintf ( R"("lat" and "lon" property values should be %s)", ( bParseField ? "numeric" : "string" ) );
  2033. else
  2034. sError.SetSprintf ( R"("%s" property value should be %s)", ( !bLatChecked ? "lat" : "lon" ), ( bParseField ? "numeric" : "string" ) );
  2035. return false;
  2036. }
  2037. if ( bParseField )
  2038. {
  2039. pField->m_fLat = tLat.FltVal();
  2040. pField->m_fLon = tLon.FltVal();
  2041. } else
  2042. {
  2043. pSource->m_sLat = tLat.StrVal();
  2044. pSource->m_sLon = tLon.StrVal();
  2045. }
  2046. return true;
  2047. }
  2048. static bool ParseLocation ( const char * sName, const JsonObj_c & tLoc, LocationField_t * pField, LocationSource_t * pSource, CSphString & sError )
  2049. {
  2050. bool bParseField = !!pField;
  2051. assert ( ( bParseField && pField ) || pSource );
  2052. bool bObj = tLoc.IsObj();
  2053. bool bString = tLoc.IsStr();
  2054. bool bArray = tLoc.IsArray();
  2055. if ( !bObj && !bString && !bArray )
  2056. {
  2057. sError.SetSprintf ( "\"%s\" property value should be either an object or a string or an array", sName );
  2058. return false;
  2059. }
  2060. if ( bObj )
  2061. return ParseLatLon ( tLoc.GetItem("lat"), tLoc.GetItem("lon"), pField, pSource, sError );
  2062. if ( bString )
  2063. {
  2064. StrVec_t dVals;
  2065. sphSplit ( dVals, tLoc.SzVal() );
  2066. if ( dVals.GetLength()!=2 )
  2067. {
  2068. sError.SetSprintf ( "\"%s\" property values should be sting with lat,lon items, got %d items", sName, dVals.GetLength() );
  2069. return false;
  2070. }
  2071. // string and array order differs
  2072. // string - lat, lon
  2073. // array - lon, lat
  2074. int iLatLen = dVals[0].Length();
  2075. int iLonLen = dVals[1].Length();
  2076. if ( !iLatLen || !iLonLen )
  2077. {
  2078. if ( !iLatLen && !iLonLen )
  2079. sError.SetSprintf ( R"("lat" and "lon" values should be %s)", ( bParseField ? "numeric" : "string" ) );
  2080. else
  2081. sError.SetSprintf ( "\"%s\" value should be %s", ( !iLatLen ? "lat" : "lon" ), ( bParseField ? "numeric" : "string" ) );
  2082. return false;
  2083. }
  2084. if ( bParseField )
  2085. {
  2086. pField->m_fLat = (float)atof ( dVals[0].cstr() );
  2087. pField->m_fLon = (float)atof ( dVals[1].cstr() );
  2088. } else
  2089. {
  2090. pSource->m_sLat = dVals[0];
  2091. pSource->m_sLon = dVals[1];
  2092. }
  2093. return true;
  2094. }
  2095. assert ( bArray );
  2096. int iCount = tLoc.Size();
  2097. if ( iCount!=2 )
  2098. {
  2099. sError.SetSprintf ( "\"%s\" property values should be an array with lat,lon items, got %d items", sName, iCount );
  2100. return false;
  2101. }
  2102. // string and array order differs
  2103. // array - lon, lat
  2104. // string - lat, lon
  2105. return ParseLatLon ( tLoc[1], tLoc[0], pField, pSource, sError );
  2106. }
  2107. //////////////////////////////////////////////////////////////////////////
  2108. // _source / select list
  2109. static bool ParseStringArray ( const JsonObj_c & tArray, const char * szProp, StrVec_t & dItems, CSphString & sError )
  2110. {
  2111. for ( const auto & tItem : tArray )
  2112. {
  2113. if ( !tItem.IsStr() )
  2114. {
  2115. sError.SetSprintf ( R"("%s" property should be a string)", szProp );
  2116. return false;
  2117. }
  2118. dItems.Add ( tItem.StrVal() );
  2119. }
  2120. return true;
  2121. }
  2122. static bool ParseSelect ( const JsonObj_c & tSelect, CSphQuery & tQuery, CSphString & sError )
  2123. {
  2124. bool bString = tSelect.IsStr();
  2125. bool bArray = tSelect.IsArray();
  2126. bool bObj = tSelect.IsObj();
  2127. if ( !bString && !bArray && !bObj )
  2128. {
  2129. sError = R"("_source" property should be a string or an array or an object)";
  2130. return false;
  2131. }
  2132. if ( bString )
  2133. {
  2134. tQuery.m_dIncludeItems.Add ( tSelect.StrVal() );
  2135. if ( tQuery.m_dIncludeItems[0]=="*" || tQuery.m_dIncludeItems[0].IsEmpty() )
  2136. tQuery.m_dIncludeItems.Reset();
  2137. return true;
  2138. }
  2139. if ( bArray )
  2140. return ParseStringArray ( tSelect, R"("_source")", tQuery.m_dIncludeItems, sError );
  2141. assert ( bObj );
  2142. // includes part of _source object
  2143. JsonObj_c tInclude = tSelect.GetArrayItem ( "includes", sError, true );
  2144. if ( tInclude )
  2145. {
  2146. if ( !ParseStringArray ( tInclude, R"("_source" "includes")", tQuery.m_dIncludeItems, sError ) )
  2147. return false;
  2148. if ( tQuery.m_dIncludeItems.GetLength()==1 && tQuery.m_dIncludeItems[0]=="*" )
  2149. tQuery.m_dIncludeItems.Reset();
  2150. } else if ( !sError.IsEmpty() )
  2151. return false;
  2152. // excludes part of _source object
  2153. JsonObj_c tExclude = tSelect.GetArrayItem ( "excludes", sError, true );
  2154. if ( tExclude )
  2155. {
  2156. if ( !ParseStringArray ( tExclude, R"("_source" "excludes")", tQuery.m_dExcludeItems, sError ) )
  2157. return false;
  2158. if ( !tQuery.m_dExcludeItems.GetLength() )
  2159. tQuery.m_dExcludeItems.Add ( "*" );
  2160. } else if ( !sError.IsEmpty() )
  2161. return false;
  2162. return true;
  2163. }
  2164. //////////////////////////////////////////////////////////////////////////
  2165. // script_fields / expressions
  2166. static bool ParseExpr ( const JsonObj_c & tExpr, CSphQuery & tQuery, CSphString & sError )
  2167. {
  2168. if ( !tExpr )
  2169. return true;
  2170. if ( !tExpr.IsObj() )
  2171. {
  2172. sError = R"("script_fields" property should be an object)";
  2173. return false;
  2174. }
  2175. StringBuilder_c sSelect;
  2176. sSelect << tQuery.m_sSelect;
  2177. for ( const auto & tAlias : tExpr )
  2178. {
  2179. if ( !tAlias.IsObj() )
  2180. {
  2181. sError = R"("script_fields" properties should be objects)";
  2182. return false;
  2183. }
  2184. if ( CSphString ( tAlias.Name() ).IsEmpty() )
  2185. {
  2186. sError = R"("script_fields" empty property name)";
  2187. return false;
  2188. }
  2189. JsonObj_c tAliasScript = tAlias.GetItem("script");
  2190. if ( !tAliasScript )
  2191. {
  2192. sError = R"("script_fields" property should have "script" object)";
  2193. return false;
  2194. }
  2195. CSphString sExpr;
  2196. if ( !tAliasScript.FetchStrItem ( sExpr, "inline", sError ) )
  2197. return false;
  2198. const char * dUnsupported[] = { "lang", "params", "stored", "file" };
  2199. for ( auto szOption : dUnsupported )
  2200. if ( tAliasScript.HasItem(szOption) )
  2201. {
  2202. sError.SetSprintf ( R"("%s" property not supported in "script_fields")", szOption );
  2203. return false;
  2204. }
  2205. // add to query
  2206. CSphQueryItem & tQueryItem = tQuery.m_dItems.Add();
  2207. tQueryItem.m_sExpr = sExpr;
  2208. tQueryItem.m_sAlias = tAlias.Name();
  2209. // add to select list
  2210. sSelect.Appendf ( ", %s as %s", tQueryItem.m_sExpr.cstr(), tQueryItem.m_sAlias.cstr() );
  2211. }
  2212. sSelect.MoveTo ( tQuery.m_sSelect );
  2213. return true;
  2214. }