sortsetup.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. //
  2. // Copyright (c) 2017-2026, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include "sortsetup.h"
  13. #include "sphinxjson.h"
  14. #include "sphinxsort.h"
  15. CSphMatchComparatorState::CSphMatchComparatorState()
  16. {
  17. for ( int i=0; i<MAX_ATTRS; ++i )
  18. {
  19. m_eKeypart[i] = SPH_KEYPART_ROWID;
  20. m_dAttrs[i] = -1;
  21. }
  22. }
  23. bool CSphMatchComparatorState::UsesBitfields() const
  24. {
  25. for ( int i=0; i<MAX_ATTRS; ++i )
  26. if ( m_eKeypart[i]==SPH_KEYPART_INT && m_tLocator[i].IsBitfield() )
  27. return true;
  28. return false;
  29. }
  30. //////////////////////////////////////////////////////////////////////////
  31. class SortClauseTokenizer_c
  32. {
  33. public:
  34. explicit SortClauseTokenizer_c ( const char * sBuffer )
  35. {
  36. auto iLen = (int) strlen(sBuffer);
  37. m_pBuf = new char [ iLen+1 ];
  38. m_pMax = m_pBuf+iLen;
  39. m_pCur = m_pBuf;
  40. // make string lowercase but keep case of JSON.field
  41. bool bJson = false;
  42. for ( int i=0; i<=iLen; i++ )
  43. {
  44. char cSrc = sBuffer[i];
  45. char cDst = ToLower ( cSrc );
  46. bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
  47. m_pBuf[i] = bJson ? cSrc : cDst;
  48. }
  49. }
  50. ~SortClauseTokenizer_c()
  51. {
  52. SafeDeleteArray ( m_pBuf );
  53. }
  54. const char * GetToken ()
  55. {
  56. // skip spaces
  57. while ( m_pCur<m_pMax && !*m_pCur )
  58. m_pCur++;
  59. if ( m_pCur>=m_pMax )
  60. return nullptr;
  61. // memorize token start, and move pointer forward
  62. const char * sRes = m_pCur;
  63. while ( *m_pCur )
  64. m_pCur++;
  65. return sRes;
  66. }
  67. bool IsSparseCount ( const char * sTok )
  68. {
  69. const char * sSeq = "(*)";
  70. for ( ; sTok<m_pMax && *sSeq; sTok++ )
  71. {
  72. bool bGotSeq = ( *sSeq==*sTok );
  73. if ( bGotSeq )
  74. sSeq++;
  75. // stop checking on any non space char outside sequence or sequence end
  76. if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
  77. break;
  78. }
  79. if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
  80. {
  81. // advance token iterator after composite count(*) token
  82. m_pCur = sTok+1;
  83. return true;
  84. }
  85. return false;
  86. }
  87. protected:
  88. const char * m_pCur = nullptr;
  89. const char * m_pMax = nullptr;
  90. char * m_pBuf = nullptr;
  91. char ToLower ( char c )
  92. {
  93. // 0..9, A..Z->a..z, _, a..z, @, .
  94. if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
  95. return c;
  96. if ( c>='A' && c<='Z' )
  97. return c-'A'+'a';
  98. return 0;
  99. }
  100. };
  101. inline ESphSortKeyPart Attr2Keypart_ ( ESphAttr eType )
  102. {
  103. switch ( eType )
  104. {
  105. case SPH_ATTR_FLOAT:
  106. return SPH_KEYPART_FLOAT;
  107. case SPH_ATTR_DOUBLE:
  108. return SPH_KEYPART_DOUBLE;
  109. case SPH_ATTR_STRING:
  110. return SPH_KEYPART_STRING;
  111. case SPH_ATTR_JSON:
  112. case SPH_ATTR_JSON_PTR:
  113. case SPH_ATTR_JSON_FIELD:
  114. case SPH_ATTR_JSON_FIELD_PTR:
  115. case SPH_ATTR_STRINGPTR:
  116. return SPH_KEYPART_STRINGPTR;
  117. default:
  118. return SPH_KEYPART_INT;
  119. }
  120. }
  121. //////////////////////////////////////////////////////////////////////////
  122. class SortStateSetup_c
  123. {
  124. public:
  125. SortStateSetup_c ( const char * szTok, SortClauseTokenizer_c & tTok, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iField, const ISphSchema & tSchema, const CSphQuery & tQuery, const JoinArgs_t * pJoinArgs );
  126. bool Setup ( CSphString & sError );
  127. private:
  128. const char * m_szTok = nullptr;
  129. SortClauseTokenizer_c & m_tTok;
  130. CSphMatchComparatorState & m_tState;
  131. ExtraSortExpr_t & m_tExtraExpr;
  132. const int m_iField;
  133. const ISphSchema & m_tSchema;
  134. const CSphQuery & m_tQuery;
  135. const JoinArgs_t * m_pJoinArgs = nullptr;
  136. int m_iAttr = -1;
  137. ESphAttr m_eAttrType = SPH_ATTR_NONE;
  138. bool SetupSortByRelevance();
  139. void UnifyInternalAttrNames();
  140. bool CheckOrderByMva ( CSphString & sError ) const;
  141. void FindAliasedGroupby();
  142. void FindAliasedSortby();
  143. bool IsJsonAttr() const;
  144. void SetupJsonAttr();
  145. bool SetupJsonField ( CSphString & sError );
  146. bool SetupColumnar ( CSphString & sError );
  147. bool SetupJson ( CSphString & sError );
  148. void SetupJsonConversions();
  149. void SetupPrecalculatedJson();
  150. };
  151. SortStateSetup_c::SortStateSetup_c ( const char * szTok, SortClauseTokenizer_c & tTok, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dJsonExprs, int iField, const ISphSchema & tSchema, const CSphQuery & tQuery, const JoinArgs_t * pJoinArgs )
  152. : m_szTok ( szTok )
  153. , m_tTok ( tTok )
  154. , m_tState ( tState )
  155. , m_tExtraExpr ( dJsonExprs[iField] )
  156. , m_iField ( iField )
  157. , m_tSchema ( tSchema )
  158. , m_tQuery ( tQuery )
  159. , m_pJoinArgs ( pJoinArgs )
  160. {}
  161. bool SortStateSetup_c::SetupSortByRelevance()
  162. {
  163. if ( !strcasecmp ( m_szTok, "@relevance" )
  164. || !strcasecmp ( m_szTok, "@rank" )
  165. || !strcasecmp ( m_szTok, "@weight" )
  166. || !strcasecmp ( m_szTok, "weight()" ) )
  167. {
  168. m_tState.m_eKeypart[m_iField] = SPH_KEYPART_WEIGHT;
  169. return true;
  170. }
  171. return false;
  172. }
  173. void SortStateSetup_c::UnifyInternalAttrNames()
  174. {
  175. if ( !strcasecmp ( m_szTok, "@group" ) )
  176. m_szTok = "@groupby";
  177. else if ( !strcasecmp ( m_szTok, "count(*)" ) )
  178. m_szTok = "@count";
  179. else if ( !strcasecmp ( m_szTok, "facet()" ) )
  180. m_szTok = "@groupby"; // facet() is essentially a @groupby alias
  181. else if ( strcasecmp ( m_szTok, "count" )>=0 && m_tTok.IsSparseCount ( m_szTok + sizeof ( "count" ) - 1 ) ) // epression count(*) with various spaces
  182. m_szTok = "@count";
  183. else if ( !strcasecmp ( m_szTok, "knn_dist()" ) )
  184. m_szTok = "@knn_dist";
  185. }
  186. bool SortStateSetup_c::CheckOrderByMva ( CSphString & sError ) const
  187. {
  188. int iAttr = m_tSchema.GetAttrIndex(m_szTok);
  189. if ( iAttr<0 )
  190. return true;
  191. ESphAttr eAttrType = m_tSchema.GetAttr(iAttr).m_eAttrType;
  192. if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_INT64SET || eAttrType==SPH_ATTR_UINT32SET_PTR || eAttrType==SPH_ATTR_INT64SET_PTR )
  193. {
  194. sError.SetSprintf ( "order by MVA is undefined" );
  195. return false;
  196. }
  197. return true;
  198. }
  199. void SortStateSetup_c::FindAliasedGroupby()
  200. {
  201. if ( m_iAttr>=0 )
  202. return;
  203. int iAttr = m_tSchema.GetAttrIndex(m_szTok);
  204. if ( iAttr>=0 )
  205. {
  206. m_iAttr = iAttr;
  207. return;
  208. }
  209. // try to lookup aliased count(*) and aliased groupby() in select items
  210. for ( auto & i : m_tQuery.m_dItems )
  211. {
  212. if ( !i.m_sAlias.cstr() || strcasecmp ( i.m_sAlias.cstr(), m_szTok ) )
  213. continue;
  214. if ( i.m_sExpr.Begins("@") )
  215. {
  216. m_iAttr = m_tSchema.GetAttrIndex ( i.m_sExpr.cstr() );
  217. return;
  218. }
  219. if ( i.m_sExpr=="count(*)" )
  220. {
  221. m_iAttr = m_tSchema.GetAttrIndex ( "@count" );
  222. return;
  223. }
  224. if ( i.m_sExpr=="groupby()" )
  225. {
  226. CSphString sGroupJson = SortJsonInternalSet ( m_tQuery.m_sGroupBy );
  227. m_iAttr = m_tSchema.GetAttrIndex ( sGroupJson.cstr() );
  228. // try numeric group by
  229. if ( m_iAttr<0 )
  230. m_iAttr = m_tSchema.GetAttrIndex ( "@groupby" );
  231. return;
  232. }
  233. }
  234. }
  235. void SortStateSetup_c::FindAliasedSortby()
  236. {
  237. if ( m_iAttr>=0 )
  238. return;
  239. int iAttr = m_tSchema.GetAttrIndex(m_szTok);
  240. if ( iAttr>=0 )
  241. {
  242. m_iAttr = iAttr;
  243. return;
  244. }
  245. for ( auto & i : m_tQuery.m_dItems )
  246. {
  247. if ( !i.m_sAlias.cstr() || strcasecmp ( i.m_sAlias.cstr(), m_szTok ) )
  248. continue;
  249. m_iAttr = m_tSchema.GetAttrIndex ( i.m_sExpr.cstr() );
  250. return;
  251. }
  252. }
  253. bool SortStateSetup_c::IsJsonAttr() const
  254. {
  255. if ( m_iAttr<0 )
  256. return false;
  257. ESphAttr eAttrType = m_tSchema.GetAttr(m_iAttr).m_eAttrType;
  258. if ( eAttrType==SPH_ATTR_JSON_FIELD || eAttrType==SPH_ATTR_JSON_FIELD_PTR || eAttrType==SPH_ATTR_JSON || eAttrType==SPH_ATTR_JSON_PTR )
  259. return true;
  260. return false;
  261. }
  262. void SortStateSetup_c::SetupJsonAttr()
  263. {
  264. const CSphColumnInfo & tAttr = m_tSchema.GetAttr(m_iAttr);
  265. m_tExtraExpr.m_pExpr = tAttr.m_pExpr;
  266. m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int)strlen(m_szTok) );
  267. }
  268. bool SortStateSetup_c::SetupJsonField ( CSphString & sError )
  269. {
  270. CSphString sJsonCol;
  271. if ( !sphJsonNameSplit ( m_szTok, m_tQuery.m_sJoinIdx.cstr(), &sJsonCol ) )
  272. return true;
  273. m_iAttr = m_tSchema.GetAttrIndex ( sJsonCol.cstr() );
  274. if ( m_iAttr>=0 )
  275. {
  276. ExprParseArgs_t tExprArgs;
  277. ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, m_pJoinArgs ? &(m_pJoinArgs->m_sIndex2) : nullptr, sError, tExprArgs );
  278. if ( !pExpr )
  279. return false;
  280. m_tExtraExpr.m_pExpr = pExpr;
  281. m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int) strlen ( m_szTok ) );
  282. }
  283. return true;
  284. }
  285. bool SortStateSetup_c::SetupColumnar ( CSphString & sError )
  286. {
  287. if ( m_iAttr<0 )
  288. return true;
  289. const CSphColumnInfo & tAttr = m_tSchema.GetAttr(m_iAttr);
  290. if ( !tAttr.IsColumnar() || tAttr.IsJoined() )
  291. return true;
  292. ExprParseArgs_t tExprArgs;
  293. tExprArgs.m_pAttrType = &m_eAttrType;
  294. ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, m_pJoinArgs ? &(m_pJoinArgs->m_sIndex2) : nullptr, sError, tExprArgs );
  295. if ( !pExpr )
  296. return false;
  297. m_tExtraExpr.m_pExpr = pExpr;
  298. m_tExtraExpr.m_eType = m_eAttrType;
  299. return true;
  300. }
  301. bool SortStateSetup_c::SetupJson ( CSphString & sError )
  302. {
  303. if ( IsJsonAttr() )
  304. {
  305. SetupJsonAttr();
  306. return true;
  307. }
  308. // try JSON attribute and use JSON attribute instead of JSON field
  309. if ( m_iAttr<0 )
  310. return SetupJsonField(sError);
  311. return true;
  312. }
  313. void SortStateSetup_c::SetupJsonConversions()
  314. {
  315. if ( m_iAttr>=0 )
  316. return;
  317. // try json conversion functions (integer()/double()/bigint() in the order by clause)
  318. ESphAttr eAttrType = SPH_ATTR_NONE;
  319. ExprParseArgs_t tExprArgs;
  320. tExprArgs.m_pAttrType = &eAttrType;
  321. CSphString sError; // ignored
  322. ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, m_pJoinArgs ? &(m_pJoinArgs->m_sIndex2) : nullptr, sError, tExprArgs );
  323. if ( !pExpr )
  324. return;
  325. m_eAttrType = eAttrType;
  326. m_tExtraExpr.m_pExpr = pExpr;
  327. m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int) strlen(m_szTok) );
  328. m_tExtraExpr.m_eType = m_eAttrType;
  329. m_tExtraExpr.m_tKey.m_uMask = 0;
  330. m_iAttr = 0; // will be remapped in SetupSortRemap
  331. }
  332. void SortStateSetup_c::SetupPrecalculatedJson()
  333. {
  334. if ( m_iAttr>=0 )
  335. return;
  336. // try precalculated json fields/columnar attrs received from agents (prefixed with @int_*)
  337. CSphString sName;
  338. sName.SetSprintf ( "%s%s", GetInternalAttrPrefix(), m_szTok );
  339. m_iAttr = m_tSchema.GetAttrIndex ( sName.cstr() );
  340. }
  341. bool SortStateSetup_c::Setup ( CSphString & sError )
  342. {
  343. if ( SetupSortByRelevance() )
  344. return true;
  345. UnifyInternalAttrNames();
  346. if ( !CheckOrderByMva(sError) )
  347. return false;
  348. FindAliasedGroupby();
  349. FindAliasedSortby();
  350. if ( !SetupColumnar(sError) )
  351. return false;
  352. if ( !SetupJson(sError) )
  353. return false;
  354. SetupJsonConversions();
  355. SetupPrecalculatedJson();
  356. if ( m_iAttr<0 )
  357. {
  358. sError.SetSprintf ( "sort-by attribute '%s' not found", m_szTok );
  359. return false;
  360. }
  361. const CSphColumnInfo & tCol = m_tSchema.GetAttr(m_iAttr);
  362. m_tState.m_eKeypart[m_iField] = Attr2Keypart_ ( m_eAttrType!=SPH_ATTR_NONE ? m_eAttrType : tCol.m_eAttrType );
  363. m_tState.m_tLocator[m_iField] = tCol.m_tLocator;
  364. m_tState.m_dAttrs[m_iField] = m_iAttr;
  365. return true;
  366. }
  367. //////////////////////////////////////////////////////////////////////////
  368. ESortClauseParseResult sphParseSortClause ( const CSphQuery & tQuery, const char * szClause, const ISphSchema & tSchema, ESphSortFunc & eFunc, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, const JoinArgs_t * pJoinArgs, CSphString & sError )
  369. {
  370. for ( auto & tAttr : tState.m_dAttrs )
  371. tAttr = -1;
  372. dExtraExprs.Resize ( tState.MAX_ATTRS );
  373. // mini parser
  374. SortClauseTokenizer_c tTok(szClause);
  375. bool bField = false; // whether i'm expecting field name or sort order
  376. int iField = 0;
  377. for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
  378. {
  379. bField = !bField;
  380. // special case, sort by random
  381. if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
  382. return SORT_CLAUSE_RANDOM;
  383. // handle sort order
  384. if ( !bField )
  385. {
  386. // check
  387. if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
  388. {
  389. sError.SetSprintf ( "invalid sorting order '%s'", pTok );
  390. return SORT_CLAUSE_ERROR;
  391. }
  392. // set
  393. if ( !strcmp ( pTok, "desc" ) )
  394. tState.m_uAttrDesc |= ( 1<<iField );
  395. iField++;
  396. continue;
  397. }
  398. // handle attribute name
  399. if ( iField==CSphMatchComparatorState::MAX_ATTRS )
  400. {
  401. sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", CSphMatchComparatorState::MAX_ATTRS );
  402. return SORT_CLAUSE_ERROR;
  403. }
  404. SortStateSetup_c tSetup ( pTok, tTok, tState, dExtraExprs, iField, tSchema, tQuery, pJoinArgs );
  405. if ( !tSetup.Setup(sError) )
  406. return SORT_CLAUSE_ERROR;
  407. }
  408. // fix for
  409. // FACET attr ORDER BY COUNT(*)
  410. // into
  411. // FACET attr ORDER BY COUNT(*) DESC
  412. if ( iField==0 && tQuery.m_bFacet )
  413. {
  414. tState.m_uAttrDesc |= 1;
  415. iField++;
  416. }
  417. if ( iField==0 )
  418. {
  419. sError.SetSprintf ( "no sort order defined" );
  420. return SORT_CLAUSE_ERROR;
  421. }
  422. switch ( iField )
  423. {
  424. case 1: eFunc = FUNC_GENERIC1; break;
  425. case 2: eFunc = FUNC_GENERIC2; break;
  426. case 3: eFunc = FUNC_GENERIC3; break;
  427. case 4: eFunc = FUNC_GENERIC4; break;
  428. case 5: eFunc = FUNC_GENERIC5; break;
  429. default: sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
  430. }
  431. return SORT_CLAUSE_OK;
  432. }