sphinxsort.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218
  1. //
  2. // Copyright (c) 2017-2026, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include "sphinxsort.h"
  13. #include "sortcomp.h"
  14. #include "aggregate.h"
  15. #include "distinct.h"
  16. #include "netreceive_ql.h"
  17. #include "queuecreator.h"
  18. #include "sortertraits.h"
  19. #include "sortergroup.h"
  20. #include "grouper.h"
  21. #include "knnmisc.h"
  22. #include "joinsorter.h"
  23. #include "querycontext.h"
  24. #include <ctime>
  25. #if !_WIN32
  26. #include <unistd.h>
  27. #include <sys/time.h>
  28. #endif
  29. static bool g_bAccurateAggregation = false;
  30. static int g_iDistinctThresh = 3500;
  31. void SetAccurateAggregationDefault ( bool bEnabled )
  32. {
  33. g_bAccurateAggregation = bEnabled;
  34. }
  35. bool GetAccurateAggregationDefault()
  36. {
  37. return g_bAccurateAggregation;
  38. }
  39. void SetDistinctThreshDefault ( int iThresh )
  40. {
  41. g_iDistinctThresh = iThresh;
  42. }
  43. int GetDistinctThreshDefault()
  44. {
  45. return g_iDistinctThresh;
  46. }
  47. //////////////////////////////////////////////////////////////////////////
  48. // SORTING QUEUES
  49. //////////////////////////////////////////////////////////////////////////
  50. template < typename COMP >
  51. struct InvCompareIndex_fn
  52. {
  53. const VecTraits_T<CSphMatch>& m_dBase;
  54. const CSphMatchComparatorState & m_tState;
  55. explicit InvCompareIndex_fn ( const CSphMatchQueueTraits & tBase )
  56. : m_dBase ( tBase.GetMatches() )
  57. , m_tState ( tBase.GetState() )
  58. {}
  59. bool IsLess ( int a, int b ) const // inverts COMP::IsLess
  60. {
  61. return COMP::IsLess ( m_dBase[b], m_dBase[a], m_tState );
  62. }
  63. };
  64. #define LOG_COMPONENT_KMQ __LINE__ << " *(" << this << ") "
  65. #define LOG_LEVEL_DIAG false
  66. #define KMQ LOC(DIAG,KMQ)
  67. /// heap sorter
  68. /// plain binary heap based PQ
  69. template < typename COMP, bool NOTIFICATIONS >
  70. class CSphMatchQueue final : public CSphMatchQueueTraits
  71. {
  72. using MYTYPE = CSphMatchQueue<COMP, NOTIFICATIONS>;
  73. LOC_ADD;
  74. public:
  75. /// ctor
  76. explicit CSphMatchQueue ( int iSize )
  77. : CSphMatchQueueTraits ( iSize )
  78. , m_fnComp ( *this )
  79. {
  80. if constexpr ( NOTIFICATIONS )
  81. m_dJustPopped.Reserve(1);
  82. }
  83. bool IsGroupby () const final { return false; }
  84. const CSphMatch * GetWorst() const final { return m_dIData.IsEmpty() ? nullptr : Root(); }
  85. bool Push ( const CSphMatch & tEntry ) final { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  86. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  87. {
  88. for ( auto & i : dMatches )
  89. if ( i.m_tRowID!=INVALID_ROWID )
  90. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  91. else
  92. m_iTotal++;
  93. }
  94. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  95. /// store all entries into specified location in sorted order, and remove them from queue
  96. int Flatten ( CSphMatch * pTo ) final
  97. {
  98. KMQ << "flatten";
  99. assert ( !IsEmpty() );
  100. int iReadyMatches = Used();
  101. pTo += iReadyMatches;
  102. while ( !IsEmpty() )
  103. {
  104. --pTo;
  105. m_pSchema->FreeDataPtrs(*pTo);
  106. pTo->ResetDynamic();
  107. PopAndProcess_T ( [pTo] ( CSphMatch & tRoot )
  108. {
  109. Swap ( *pTo, tRoot );
  110. return true;
  111. }
  112. );
  113. }
  114. m_iTotal = 0;
  115. return iReadyMatches;
  116. }
  117. /// finalize, perform final sort/cut as needed
  118. void Finalize ( MatchProcessor_i & tProcessor, bool bCallProcessInResultSetOrder, bool bFinalizeMatches ) final
  119. {
  120. KMQ << "finalize";
  121. if ( !GetLength() )
  122. return;
  123. if ( bCallProcessInResultSetOrder )
  124. m_dIData.Sort ( m_fnComp );
  125. if ( tProcessor.ProcessInRowIdOrder() )
  126. {
  127. CSphFixedVector<int> dSorted ( m_dIData.GetLength() );
  128. memcpy ( dSorted.Begin(), m_dIData.Begin(), m_dIData.GetLength()*sizeof(m_dIData[0]) );
  129. // sort by tag, rowid. minimize columnar switches inside expressions and minimize seeks inside columnar iterators
  130. dSorted.Sort ( Lesser ( [this] ( int l, int r )
  131. {
  132. int iTagL = m_dData[l].m_iTag;
  133. int iTagR = m_dData[r].m_iTag;
  134. if ( iTagL!=iTagR )
  135. return iTagL < iTagR;
  136. return m_dData[l].m_tRowID < m_dData[r].m_tRowID;
  137. }
  138. ) );
  139. CSphFixedVector<CSphMatch *> dMatchPtrs ( dSorted.GetLength() );
  140. ARRAY_FOREACH ( i, dSorted )
  141. dMatchPtrs[i] = &m_dData[dSorted[i]];
  142. tProcessor.Process(dMatchPtrs);
  143. }
  144. else
  145. {
  146. for ( auto iMatch : m_dIData )
  147. tProcessor.Process ( &m_dData[iMatch] );
  148. }
  149. }
  150. // fixme! test
  151. ISphMatchSorter * Clone () const final
  152. {
  153. auto pClone = new MYTYPE ( m_iSize );
  154. CloneTo ( pClone );
  155. return pClone;
  156. }
  157. // FIXME! test CSphMatchQueue
  158. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  159. {
  160. KMQ << "moveto";
  161. // m_dLogger.Print ();
  162. auto& dRhs = *(MYTYPE *) pRhs;
  163. if ( IsEmpty() )
  164. return; // no matches, nothing to do.
  165. // dRhs.m_dLogger.Print ();
  166. // install into virgin sorter - no need to do something; just swap
  167. if ( dRhs.IsEmpty() )
  168. {
  169. SwapMatchQueueTraits ( dRhs );
  170. return;
  171. }
  172. // work as in non-ordered finalize call, but we not need to
  173. // clone the matches, may just move them instead.
  174. // total need special care: just add two values and don't rely
  175. // on result of moving, since it will be wrong
  176. auto iTotal = dRhs.m_iTotal;
  177. for ( auto i : m_dIData )
  178. dRhs.PushT ( m_dData[i], [] ( CSphMatch & tTrg, CSphMatch & tMatch ) { Swap ( tTrg, tMatch ); } );
  179. dRhs.m_iTotal = m_iTotal + iTotal;
  180. }
  181. void SetMerge ( bool bMerge ) final {}
  182. private:
  183. InvCompareIndex_fn<COMP> m_fnComp;
  184. CSphMatch * Root() const
  185. {
  186. return &m_dData [ m_dIData.First() ];
  187. }
  188. /// generic add entry to the queue
  189. template <typename MATCH, typename PUSHER>
  190. bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  191. {
  192. ++m_iTotal;
  193. if constexpr ( NOTIFICATIONS )
  194. {
  195. m_tJustPushed = RowTagged_t();
  196. m_dJustPopped.Resize(0);
  197. }
  198. if ( Used()==m_iSize )
  199. {
  200. // if it's worse that current min, reject it, else pop off current min
  201. if ( COMP::IsLess ( tEntry, *Root(), m_tState ) )
  202. return true;
  203. else
  204. PopAndProcess_T ( [] ( const CSphMatch & ) { return false; } );
  205. }
  206. // do add
  207. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  208. if constexpr ( NOTIFICATIONS )
  209. m_tJustPushed = RowTagged_t ( *Last() );
  210. int iEntry = Used()-1;
  211. // shift up if needed, so that worst (lesser) ones float to the top
  212. while ( iEntry )
  213. {
  214. int iParent = ( iEntry-1 ) / 2;
  215. if ( !m_fnComp.IsLess ( m_dIData[iParent], m_dIData[iEntry] ) )
  216. break;
  217. // entry is less than parent, should float to the top
  218. Swap ( m_dIData[iEntry], m_dIData[iParent] );
  219. iEntry = iParent;
  220. }
  221. return true;
  222. }
  223. /// remove root (ie. top priority) entry
  224. template<typename POPPER>
  225. void PopAndProcess_T ( POPPER && fnProcess )
  226. {
  227. assert ( !IsEmpty() );
  228. auto& iJustRemoved = m_dIData.Pop();
  229. if ( !IsEmpty() ) // for empty just popped is the root
  230. Swap ( m_dIData.First (), iJustRemoved );
  231. if ( !fnProcess ( m_dData[iJustRemoved] ) )
  232. {
  233. // make the last entry my new root
  234. if constexpr ( NOTIFICATIONS )
  235. {
  236. if ( m_dJustPopped.IsEmpty () )
  237. m_dJustPopped.Add ( RowTagged_t ( m_dData[iJustRemoved] ) );
  238. else
  239. m_dJustPopped[0] = RowTagged_t ( m_dData[iJustRemoved] );
  240. }
  241. }
  242. // sift down if needed
  243. int iEntry = 0;
  244. auto iUsed = Used();
  245. while (true)
  246. {
  247. // select child
  248. int iChild = (iEntry*2) + 1;
  249. if ( iChild>=iUsed )
  250. break;
  251. // select smallest child
  252. if ( iChild+1<iUsed )
  253. if ( m_fnComp.IsLess ( m_dIData[iChild], m_dIData[iChild+1] ) )
  254. ++iChild;
  255. // if smallest child is less than entry, do float it to the top
  256. if ( m_fnComp.IsLess ( m_dIData[iEntry], m_dIData[iChild] ) )
  257. {
  258. Swap ( m_dIData[iChild], m_dIData[iEntry] );
  259. iEntry = iChild;
  260. continue;
  261. }
  262. break;
  263. }
  264. }
  265. };
  266. #define LOG_COMPONENT_KBF __LINE__ << " *(" << this << ") "
  267. #define KBF LOC(DIAG,KBF)
  268. //////////////////////////////////////////////////////////////////////////
  269. /// K-buffer (generalized double buffer) sorter
  270. /// faster worst-case but slower average-case than the heap sorter
  271. /// invoked with select ... OPTION sort_method=kbuffer
  272. template < typename COMP, bool NOTIFICATIONS >
  273. class CSphKbufferMatchQueue : public CSphMatchQueueTraits
  274. {
  275. using MYTYPE = CSphKbufferMatchQueue<COMP, NOTIFICATIONS>;
  276. InvCompareIndex_fn<COMP> m_dComp;
  277. LOC_ADD;
  278. public:
  279. /// ctor
  280. explicit CSphKbufferMatchQueue ( int iSize )
  281. : CSphMatchQueueTraits ( iSize*COEFF )
  282. , m_dComp ( *this )
  283. {
  284. m_iSize /= COEFF;
  285. if constexpr ( NOTIFICATIONS )
  286. m_dJustPopped.Reserve ( m_iSize*(COEFF-1) );
  287. }
  288. bool IsGroupby () const final { return false; }
  289. int GetLength () final { return Min ( Used(), m_iSize ); }
  290. bool Push ( const CSphMatch & tEntry ) override { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  291. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) override
  292. {
  293. for ( const auto & i : dMatches )
  294. if ( i.m_tRowID!=INVALID_ROWID )
  295. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  296. else
  297. m_iTotal++;
  298. }
  299. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  300. /// store all entries into specified location in sorted order, and remove them from queue
  301. int Flatten ( CSphMatch * pTo ) final
  302. {
  303. KBF << "Flatten";
  304. FinalizeMatches ();
  305. auto iReadyMatches = Used();
  306. for ( auto iMatch : m_dIData )
  307. {
  308. KBF << "fltn " << m_dData[iMatch].m_iTag << ":" << m_dData[iMatch].m_tRowID;
  309. Swap ( *pTo, m_dData[iMatch] );
  310. ++pTo;
  311. }
  312. m_iMaxUsed = ResetDynamic ( m_iMaxUsed );
  313. // clean up for the next work session
  314. m_pWorst = nullptr;
  315. m_iTotal = 0;
  316. m_bFinalized = false;
  317. m_dIData.Resize(0);
  318. return iReadyMatches;
  319. }
  320. /// finalize, perform final sort/cut as needed
  321. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final
  322. {
  323. KBF << "Finalize";
  324. if ( IsEmpty() )
  325. return;
  326. if ( bFinalizeMatches )
  327. FinalizeMatches();
  328. for ( auto iMatch : m_dIData )
  329. tProcessor.Process ( &m_dData[iMatch] );
  330. }
  331. ISphMatchSorter* Clone() const final
  332. {
  333. auto pClone = new MYTYPE ( m_iSize );
  334. CloneTo ( pClone );
  335. return pClone;
  336. }
  337. // FIXME! test CSphKbufferMatchQueue
  338. // FIXME! need to deal with justpushed/justpopped any other way!
  339. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  340. {
  341. auto& dRhs = *(CSphKbufferMatchQueue<COMP, NOTIFICATIONS>*) pRhs;
  342. if ( IsEmpty () )
  343. return;
  344. if ( dRhs.IsEmpty () )
  345. {
  346. SwapMatchQueueTraits (dRhs);
  347. dRhs.m_pWorst = m_pWorst;
  348. dRhs.m_bFinalized = m_bFinalized;
  349. return;
  350. }
  351. FinalizeMatches();
  352. // both are non-empty - need to process.
  353. // work as finalize call, but don't clone the matches; move them instead.
  354. // total need special care!
  355. auto iTotal = dRhs.m_iTotal;
  356. for ( auto iMatch : m_dIData )
  357. {
  358. dRhs.PushT ( m_dData[iMatch],
  359. [] ( CSphMatch & tTrg, CSphMatch & tMatch ) {
  360. Swap ( tTrg, tMatch );
  361. });
  362. }
  363. dRhs.m_iTotal = m_iTotal + iTotal;
  364. }
  365. void SetMerge ( bool bMerge ) final {}
  366. protected:
  367. CSphMatch * m_pWorst = nullptr;
  368. bool m_bFinalized = false;
  369. int m_iMaxUsed = -1;
  370. static const int COEFF = 4;
  371. private:
  372. void SortMatches () // sort from best to worst
  373. {
  374. m_dIData.Sort ( m_dComp );
  375. }
  376. void FreeMatch ( int iMatch )
  377. {
  378. if constexpr ( NOTIFICATIONS )
  379. m_dJustPopped.Add ( RowTagged_t ( m_dData[iMatch] ) );
  380. m_pSchema->FreeDataPtrs ( m_dData[iMatch] );
  381. }
  382. void CutTail()
  383. {
  384. if ( Used()<=m_iSize)
  385. return;
  386. m_iMaxUsed = Max ( m_iMaxUsed, this->m_dIData.GetLength () ); // memorize it for free dynamics later.
  387. m_dIData.Slice ( m_iSize ).Apply ( [this] ( int iMatch ) { FreeMatch ( iMatch ); } );
  388. m_dIData.Resize ( m_iSize );
  389. }
  390. // conception: we have array of N*COEFF elems.
  391. // We need only N the best elements from it (rest have to be disposed).
  392. // direct way: rsort, then take first N elems.
  393. // this way: rearrange array by performing one pass of quick sort
  394. // if we have exactly N elems left hand from pivot - we're done.
  395. // otherwise repeat rearranging only to right or left part until the target achieved.
  396. void BinaryPartition ()
  397. {
  398. int iPivot = m_dIData[m_iSize / COEFF+1];
  399. int iMaxIndex = m_iSize-1;
  400. int a=0;
  401. int b=Used()-1;
  402. while (true)
  403. {
  404. int i=a;
  405. int j=b;
  406. while (i<=j)
  407. {
  408. while (m_dComp.IsLess (m_dIData[i],iPivot)) ++i;
  409. while (m_dComp.IsLess (iPivot, m_dIData[j])) --j;
  410. if ( i<=j ) ::Swap( m_dIData[i++], m_dIData[j--]);
  411. }
  412. if ( iMaxIndex == j )
  413. break;
  414. if ( iMaxIndex < j)
  415. b = j; // too many elems acquired; continue with left part
  416. else
  417. a = i; // too less elems acquired; continue with right part
  418. iPivot = m_dIData[( a * ( COEFF-1 )+b ) / COEFF];
  419. }
  420. }
  421. void RepartitionMatches ()
  422. {
  423. assert ( Used ()>m_iSize );
  424. BinaryPartition ();
  425. CutTail();
  426. }
  427. void FinalizeMatches ()
  428. {
  429. if ( m_bFinalized )
  430. return;
  431. m_bFinalized = true;
  432. if ( Used ()>m_iSize )
  433. RepartitionMatches();
  434. SortMatches();
  435. }
  436. // generic push entry (add it some way to the queue clone or swap PUSHER depends on)
  437. template<typename MATCH, typename PUSHER>
  438. FORCE_INLINE bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  439. {
  440. if constexpr ( NOTIFICATIONS )
  441. {
  442. m_tJustPushed = RowTagged_t();
  443. m_dJustPopped.Resize(0);
  444. }
  445. // quick early rejection checks
  446. ++m_iTotal;
  447. if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
  448. return true;
  449. // quick check passed
  450. // fill the data, back to front
  451. m_bFinalized = false;
  452. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  453. if constexpr ( NOTIFICATIONS )
  454. m_tJustPushed = RowTagged_t ( *Last() );
  455. // do the initial sort once
  456. if ( m_iTotal==m_iSize )
  457. {
  458. assert ( Used()==m_iSize && !m_pWorst );
  459. SortMatches();
  460. m_pWorst = Last();
  461. m_bFinalized = true;
  462. return true;
  463. }
  464. if ( Used ()<m_iSize*COEFF )
  465. return true;
  466. // do the sort/cut when the K-buffer is full
  467. assert ( Used ()==m_iSize*COEFF );
  468. RepartitionMatches();
  469. SortMatches ();
  470. m_pWorst = Last ();
  471. m_bFinalized = true;
  472. return true;
  473. }
  474. };
  475. //////////////////////////////////////////////////////////////////////////
  476. /// collect list of matched DOCIDs in aside compressed blob
  477. /// (mainly used to collect docs in `DELETE... WHERE` statement)
  478. class CollectQueue_c final : public MatchSorter_c, ISphNoncopyable
  479. {
  480. using BASE = MatchSorter_c;
  481. public:
  482. CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues );
  483. bool IsGroupby () const final { return false; }
  484. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  485. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(tEntry); }
  486. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  487. {
  488. for ( const auto & i : dMatches )
  489. if ( i.m_tRowID!=INVALID_ROWID )
  490. PushMatch(i);
  491. }
  492. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  493. int Flatten ( CSphMatch * ) final { return 0; }
  494. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  495. bool CanBeCloned() const final { return false; }
  496. ISphMatchSorter * Clone () const final { return nullptr; }
  497. void MoveTo ( ISphMatchSorter *, bool ) final {}
  498. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  499. bool IsCutoffDisabled() const final { return true; }
  500. void SetMerge ( bool bMerge ) final {}
  501. private:
  502. DocID_t m_iLastID;
  503. int m_iMaxMatches;
  504. CSphVector<DocID_t> m_dUnsortedDocs;
  505. MemoryWriter_c m_tWriter;
  506. bool m_bDocIdDynamic = false;
  507. inline bool PushMatch ( const CSphMatch & tEntry );
  508. inline void ProcessPushed();
  509. };
  510. CollectQueue_c::CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues )
  511. : m_iLastID ( 0 )
  512. , m_iMaxMatches ( iSize )
  513. , m_tWriter ( dCollectedValues )
  514. {}
  515. /// sort/uniq already collected and store them to writer
  516. void CollectQueue_c::ProcessPushed()
  517. {
  518. m_dUnsortedDocs.Uniq();
  519. for ( auto& iCurId : m_dUnsortedDocs )
  520. m_tWriter.ZipOffset ( iCurId - std::exchange ( m_iLastID, iCurId ) );
  521. m_dUnsortedDocs.Resize ( 0 );
  522. }
  523. bool CollectQueue_c::PushMatch ( const CSphMatch & tEntry )
  524. {
  525. if ( m_dUnsortedDocs.GetLength() >= m_iMaxMatches && m_dUnsortedDocs.GetLength() == m_dUnsortedDocs.GetLimit() )
  526. ProcessPushed();
  527. m_dUnsortedDocs.Add ( sphGetDocID ( m_bDocIdDynamic ? tEntry.m_pDynamic : tEntry.m_pStatic ) );
  528. return true;
  529. }
  530. /// final update pass
  531. void CollectQueue_c::Finalize ( MatchProcessor_i&, bool, bool )
  532. {
  533. ProcessPushed();
  534. m_iLastID = 0;
  535. }
  536. void CollectQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  537. {
  538. BASE::SetSchema ( pSchema, bRemapCmp );
  539. const CSphColumnInfo * pDocId = pSchema->GetAttr ( sphGetDocidName() );
  540. assert(pDocId);
  541. m_bDocIdDynamic = pDocId->m_tLocator.m_bDynamic;
  542. }
  543. ISphMatchSorter * CreateCollectQueue ( int iMaxMatches, CSphVector<BYTE> & tCollection )
  544. {
  545. return new CollectQueue_c ( iMaxMatches, tCollection );
  546. }
  547. //////////////////////////////////////////////////////////////////////////
  548. void SendSqlSchema ( const ISphSchema& tSchema, RowBuffer_i* pRows, const VecTraits_T<int>& dOrder )
  549. {
  550. pRows->HeadBegin ();
  551. ARRAY_CONSTFOREACH ( i, dOrder )
  552. {
  553. const CSphColumnInfo& tCol = tSchema.GetAttr ( dOrder[i] );
  554. if ( sphIsInternalAttr ( tCol ) )
  555. continue;
  556. if ( i == 0 )
  557. {
  558. assert (tCol.m_sName == "id");
  559. pRows->HeadColumn ( "id", ESphAttr2MysqlColumnStreamed ( SPH_ATTR_UINT64 ) );
  560. continue;
  561. }
  562. if ( tCol.m_eAttrType==SPH_ATTR_TOKENCOUNT )
  563. continue;
  564. pRows->HeadColumn ( tCol.m_sName.cstr(), ESphAttr2MysqlColumnStreamed ( tCol.m_eAttrType ) );
  565. }
  566. pRows->HeadEnd ( false, 0 );
  567. }
  568. using SqlEscapedBuilder_c = EscapedStringBuilder_T<BaseQuotation_T<SqlQuotator_t>>;
  569. void SendSqlMatch ( const ISphSchema& tSchema, RowBuffer_i* pRows, CSphMatch& tMatch, const BYTE* pBlobPool, const VecTraits_T<int>& dOrder, bool bDynamicDocid )
  570. {
  571. auto& dRows = *pRows;
  572. ARRAY_CONSTFOREACH ( i, dOrder )
  573. {
  574. const CSphColumnInfo& dAttr = tSchema.GetAttr ( dOrder[i] );
  575. if ( sphIsInternalAttr ( dAttr ) )
  576. continue;
  577. if ( dAttr.m_eAttrType==SPH_ATTR_TOKENCOUNT )
  578. continue;
  579. CSphAttrLocator tLoc = dAttr.m_tLocator;
  580. ESphAttr eAttrType = dAttr.m_eAttrType;
  581. if ( i == 0 )
  582. eAttrType = SPH_ATTR_UINT64;
  583. switch ( eAttrType )
  584. {
  585. case SPH_ATTR_STRING:
  586. dRows.PutArray ( sphGetBlobAttr ( tMatch, tLoc, pBlobPool ) );
  587. break;
  588. case SPH_ATTR_STRINGPTR:
  589. {
  590. const BYTE* pStr = nullptr;
  591. if ( dAttr.m_eStage == SPH_EVAL_POSTLIMIT )
  592. {
  593. if ( bDynamicDocid )
  594. {
  595. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  596. } else
  597. {
  598. auto pDynamic = tMatch.m_pDynamic;
  599. if ( tMatch.m_pStatic )
  600. tMatch.m_pDynamic = nullptr;
  601. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  602. tMatch.m_pDynamic = pDynamic;
  603. }
  604. dRows.PutString ( (const char*)pStr );
  605. SafeDeleteArray ( pStr );
  606. } else {
  607. pStr = (const BYTE*)tMatch.GetAttr ( tLoc );
  608. auto dString = sphUnpackPtrAttr ( pStr );
  609. dRows.PutArray ( dString );
  610. }
  611. }
  612. break;
  613. case SPH_ATTR_INTEGER:
  614. case SPH_ATTR_TIMESTAMP:
  615. case SPH_ATTR_BOOL:
  616. dRows.PutNumAsString ( (DWORD)tMatch.GetAttr ( tLoc ) );
  617. break;
  618. case SPH_ATTR_BIGINT:
  619. dRows.PutNumAsString ( tMatch.GetAttr ( tLoc ) );
  620. break;
  621. case SPH_ATTR_UINT64:
  622. dRows.PutNumAsString ( (uint64_t)tMatch.GetAttr ( tLoc ) );
  623. break;
  624. case SPH_ATTR_FLOAT:
  625. dRows.PutFloatAsString ( tMatch.GetAttrFloat ( tLoc ) );
  626. break;
  627. case SPH_ATTR_DOUBLE:
  628. dRows.PutDoubleAsString ( tMatch.GetAttrDouble ( tLoc ) );
  629. break;
  630. case SPH_ATTR_INT64SET:
  631. case SPH_ATTR_UINT32SET:
  632. {
  633. StringBuilder_c dStr;
  634. auto dMVA = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  635. dStr << "(";
  636. sphMVA2Str ( dMVA, eAttrType == SPH_ATTR_INT64SET, dStr );
  637. dStr << ")";
  638. dRows.PutArray ( dStr, false );
  639. break;
  640. }
  641. case SPH_ATTR_INT64SET_PTR:
  642. case SPH_ATTR_UINT32SET_PTR:
  643. {
  644. StringBuilder_c dStr;
  645. dStr << "(";
  646. sphPackedMVA2Str ( (const BYTE*)tMatch.GetAttr ( tLoc ), eAttrType == SPH_ATTR_INT64SET_PTR, dStr );
  647. dStr << ")";
  648. dRows.PutArray ( dStr, false );
  649. break;
  650. }
  651. case SPH_ATTR_FLOAT_VECTOR:
  652. {
  653. StringBuilder_c dStr;
  654. auto dFloatVec = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  655. dStr << "(";
  656. sphFloatVec2Str ( dFloatVec, dStr );
  657. dStr << ")";
  658. dRows.PutArray ( dStr, false );
  659. }
  660. break;
  661. case SPH_ATTR_FLOAT_VECTOR_PTR:
  662. {
  663. StringBuilder_c dStr;
  664. dStr << "(";
  665. sphPackedFloatVec2Str ( (const BYTE*)tMatch.GetAttr(tLoc), dStr );
  666. dStr << ")";
  667. dRows.PutArray ( dStr, false );
  668. }
  669. break;
  670. case SPH_ATTR_JSON:
  671. {
  672. auto pJson = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  673. JsonEscapedBuilder sTmp;
  674. if ( pJson.second )
  675. sphJsonFormat ( sTmp, pJson.first );
  676. auto sJson = Str_t(sTmp);
  677. SqlEscapedBuilder_c dEscaped;
  678. dEscaped.FixupSpacedAndAppendEscapedNoQuotes ( sJson.first, sJson.second );
  679. dRows.PutArray ( dEscaped, false );
  680. }
  681. break;
  682. case SPH_ATTR_JSON_PTR:
  683. {
  684. auto* pString = (const BYTE*)tMatch.GetAttr ( tLoc );
  685. JsonEscapedBuilder sTmp;
  686. if ( pString )
  687. {
  688. auto dJson = sphUnpackPtrAttr ( pString );
  689. sphJsonFormat ( sTmp, dJson.first );
  690. }
  691. auto sJson = Str_t ( sTmp );
  692. SqlEscapedBuilder_c dEscaped;
  693. dEscaped.FixupSpacedAndAppendEscapedNoQuotes ( sJson.first, sJson.second );
  694. dRows.PutArray ( dEscaped, false );
  695. }
  696. break;
  697. case SPH_ATTR_FACTORS:
  698. case SPH_ATTR_FACTORS_JSON:
  699. case SPH_ATTR_JSON_FIELD:
  700. case SPH_ATTR_JSON_FIELD_PTR:
  701. assert ( false ); // index schema never contain such column
  702. break;
  703. default:
  704. dRows.Add ( 1 );
  705. dRows.Add ( '-' );
  706. break;
  707. }
  708. }
  709. if ( !dRows.Commit() )
  710. session::SetKilled ( true );
  711. }
  712. /// stream out matches
  713. class DirectSqlQueue_c final : public MatchSorter_c, ISphNoncopyable
  714. {
  715. using BASE = MatchSorter_c;
  716. public:
  717. DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns );
  718. ~DirectSqlQueue_c() override;
  719. bool IsGroupby () const final { return false; }
  720. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  721. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(const_cast<CSphMatch&>(tEntry)); }
  722. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  723. {
  724. for ( const auto & i : dMatches )
  725. if ( i.m_tRowID!=INVALID_ROWID )
  726. PushMatch(const_cast<CSphMatch&>(i));
  727. }
  728. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  729. int Flatten ( CSphMatch * ) final { return 0; }
  730. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  731. bool CanBeCloned() const final { return false; }
  732. ISphMatchSorter * Clone () const final { return nullptr; }
  733. void MoveTo ( ISphMatchSorter *, bool ) final {}
  734. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  735. bool IsCutoffDisabled() const final { return true; }
  736. void SetMerge ( bool bMerge ) final {}
  737. void SetBlobPool ( const BYTE* pBlobPool ) final
  738. {
  739. m_pBlobPool = pBlobPool;
  740. MakeCtx();
  741. }
  742. void SetColumnar ( columnar::Columnar_i* pColumnar ) final
  743. {
  744. m_pColumnar = pColumnar;
  745. MakeCtx();
  746. }
  747. private:
  748. bool m_bSchemaSent = false;
  749. int64_t m_iDocs = 0;
  750. RowBuffer_i* m_pOutput;
  751. const BYTE* m_pBlobPool = nullptr;
  752. columnar::Columnar_i* m_pColumnar = nullptr;
  753. CSphVector<ISphExpr*> m_dDocstores;
  754. CSphVector<ISphExpr*> m_dFinals;
  755. void ** m_ppOpaque1 = nullptr;
  756. void ** m_ppOpaque2 = nullptr;
  757. void * m_pCurDocstore = nullptr;
  758. void * m_pCurDocstoreReader = nullptr;
  759. CSphQuery m_dFake;
  760. CSphQueryContext m_dCtx;
  761. StrVec_t m_dColumns;
  762. CSphVector<int> m_dOrder;
  763. bool m_bDynamicDocid;
  764. bool m_bNotYetFinalized = true;
  765. inline bool PushMatch ( CSphMatch & tEntry );
  766. void SendSchemaOnce();
  767. void FinalizeOnce();
  768. void MakeCtx();
  769. };
  770. DirectSqlQueue_c::DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns )
  771. : m_pOutput ( pOutput )
  772. , m_ppOpaque1 ( ppOpaque1 )
  773. , m_ppOpaque2 ( ppOpaque2 )
  774. , m_dCtx (m_dFake)
  775. , m_dColumns ( std::move ( dColumns ) )
  776. {}
  777. DirectSqlQueue_c::~DirectSqlQueue_c()
  778. {
  779. FinalizeOnce();
  780. }
  781. void DirectSqlQueue_c::SendSchemaOnce()
  782. {
  783. if ( m_bSchemaSent )
  784. return;
  785. assert ( !m_iDocs );
  786. for ( const auto& sColumn : m_dColumns )
  787. {
  788. auto iIdx = m_pSchema->GetAttrIndex ( sColumn.cstr() );
  789. if ( iIdx >= 0 )
  790. m_dOrder.Add ( iIdx );
  791. }
  792. for ( int i = 0; i < m_pSchema->GetAttrsCount(); ++i )
  793. {
  794. auto& tCol = const_cast< CSphColumnInfo &>(m_pSchema->GetAttr ( i ));
  795. if ( tCol.m_sName == sphGetDocidName() )
  796. m_bDynamicDocid = tCol.m_tLocator.m_bDynamic;
  797. if ( !tCol.m_pExpr )
  798. continue;
  799. switch ( tCol.m_eStage )
  800. {
  801. case SPH_EVAL_FINAL : m_dFinals.Add ( tCol.m_pExpr ); break;
  802. case SPH_EVAL_POSTLIMIT: m_dDocstores.Add ( tCol.m_pExpr ); break;
  803. default:
  804. sphWarning ("Unknown stage in SendSchema(): %d", tCol.m_eStage);
  805. }
  806. }
  807. SendSqlSchema ( *m_pSchema, m_pOutput, m_dOrder );
  808. m_bSchemaSent = true;
  809. }
  810. void DirectSqlQueue_c::MakeCtx()
  811. {
  812. CSphQueryResultMeta tFakeMeta;
  813. CSphVector<const ISphSchema*> tFakeSchemas;
  814. m_dCtx.SetupCalc ( tFakeMeta, *m_pSchema, *m_pSchema, m_pBlobPool, m_pColumnar, tFakeSchemas );
  815. }
  816. bool DirectSqlQueue_c::PushMatch ( CSphMatch & tEntry )
  817. {
  818. SendSchemaOnce();
  819. ++m_iDocs;
  820. if ( m_ppOpaque1 )
  821. {
  822. auto pDocstoreReader = *m_ppOpaque1;
  823. if ( pDocstoreReader!=std::exchange (m_pCurDocstore, pDocstoreReader) && pDocstoreReader )
  824. {
  825. DocstoreSession_c::InfoDocID_t tSessionInfo;
  826. tSessionInfo.m_pDocstore = (const DocstoreReader_i *)pDocstoreReader;
  827. tSessionInfo.m_iSessionId = -1;
  828. // value is copied; no leak of pointer to local here.
  829. m_dDocstores.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_DOCID, &tSessionInfo ); } );
  830. }
  831. }
  832. if ( m_ppOpaque2 )
  833. {
  834. auto pDocstore = *m_ppOpaque2;
  835. if ( pDocstore != std::exchange ( m_pCurDocstoreReader, pDocstore ) && pDocstore )
  836. {
  837. DocstoreSession_c::InfoRowID_t tSessionInfo;
  838. tSessionInfo.m_pDocstore = (Docstore_i*)pDocstore;
  839. tSessionInfo.m_iSessionId = -1;
  840. // value is copied; no leak of pointer to local here.
  841. m_dFinals.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_ROWID, &tSessionInfo ); } );
  842. }
  843. }
  844. m_dCtx.CalcFinal(tEntry);
  845. SendSqlMatch ( *m_pSchema, m_pOutput, tEntry, m_pBlobPool, m_dOrder, m_bDynamicDocid );
  846. return true;
  847. }
  848. /// final update pass
  849. void DirectSqlQueue_c::Finalize ( MatchProcessor_i&, bool, bool bFinalizeMatches )
  850. {
  851. if ( !bFinalizeMatches )
  852. return;
  853. FinalizeOnce();
  854. }
  855. void DirectSqlQueue_c::FinalizeOnce ()
  856. {
  857. if ( !std::exchange ( m_bNotYetFinalized, false ) )
  858. return;
  859. SendSchemaOnce();
  860. m_pOutput->Eof();
  861. }
  862. void DirectSqlQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  863. {
  864. BASE::SetSchema ( pSchema, bRemapCmp );
  865. }
  866. ISphMatchSorter * CreateDirectSqlQueue ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, const StrVec_t & dColumns )
  867. {
  868. return new DirectSqlQueue_c ( pOutput, ppOpaque1, ppOpaque2, dColumns );
  869. }
  870. //////////////////////////////////////////////////////////////////////////
  871. // SORT CLAUSE PARSER
  872. //////////////////////////////////////////////////////////////////////////
  873. class SortClauseTokenizer_t
  874. {
  875. protected:
  876. const char * m_pCur;
  877. const char * m_pMax;
  878. char * m_pBuf;
  879. protected:
  880. char ToLower ( char c )
  881. {
  882. // 0..9, A..Z->a..z, _, a..z, @, .
  883. if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
  884. return c;
  885. if ( c>='A' && c<='Z' )
  886. return c-'A'+'a';
  887. return 0;
  888. }
  889. public:
  890. explicit SortClauseTokenizer_t ( const char * sBuffer )
  891. {
  892. auto iLen = (int) strlen(sBuffer);
  893. m_pBuf = new char [ iLen+1 ];
  894. m_pMax = m_pBuf+iLen;
  895. m_pCur = m_pBuf;
  896. // make string lowercase but keep case of JSON.field
  897. bool bJson = false;
  898. for ( int i=0; i<=iLen; i++ )
  899. {
  900. char cSrc = sBuffer[i];
  901. char cDst = ToLower ( cSrc );
  902. bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
  903. m_pBuf[i] = bJson ? cSrc : cDst;
  904. }
  905. }
  906. ~SortClauseTokenizer_t ()
  907. {
  908. SafeDeleteArray ( m_pBuf );
  909. }
  910. const char * GetToken ()
  911. {
  912. // skip spaces
  913. while ( m_pCur<m_pMax && !*m_pCur )
  914. m_pCur++;
  915. if ( m_pCur>=m_pMax )
  916. return nullptr;
  917. // memorize token start, and move pointer forward
  918. const char * sRes = m_pCur;
  919. while ( *m_pCur )
  920. m_pCur++;
  921. return sRes;
  922. }
  923. bool IsSparseCount ( const char * sTok )
  924. {
  925. const char * sSeq = "(*)";
  926. for ( ; sTok<m_pMax && *sSeq; sTok++ )
  927. {
  928. bool bGotSeq = ( *sSeq==*sTok );
  929. if ( bGotSeq )
  930. sSeq++;
  931. // stop checking on any non-space char outside sequence or sequence end
  932. if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
  933. break;
  934. }
  935. if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
  936. {
  937. // advance token iterator after composite count(*) token
  938. m_pCur = sTok+1;
  939. return true;
  940. } else
  941. {
  942. return false;
  943. }
  944. }
  945. };
  946. //////////////////////////////////////////////////////////////////////////
  947. // SORTING+GROUPING INSTANTIATION
  948. //////////////////////////////////////////////////////////////////////////
  949. ISphMatchSorter * CreateSorter ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const PrecalculatedSorterResults_t & tPrecalc )
  950. {
  951. CSphRefcountedPtr<ISphMatchComparator> pComp;
  952. if ( !tSettings.m_bImplicit )
  953. switch ( eMatchFunc )
  954. {
  955. case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
  956. case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
  957. case FUNC_GENERIC1: pComp = new MatchGeneric1_fn(); break;
  958. case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
  959. case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
  960. case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
  961. case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
  962. case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
  963. }
  964. return CreateGroupSorter ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  965. }
  966. /////////////////////////
  967. // SORTING QUEUE FACTORY
  968. /////////////////////////
  969. template < typename COMP >
  970. static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bFactors )
  971. {
  972. if ( bKbuffer )
  973. {
  974. if ( bFactors )
  975. return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches );
  976. return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches );
  977. }
  978. if ( bFactors )
  979. return new CSphMatchQueue<COMP, true> ( iMaxMatches );
  980. return new CSphMatchQueue<COMP, false> ( iMaxMatches );
  981. }
  982. ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bFactors )
  983. {
  984. switch ( eMatchFunc )
  985. {
  986. case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bFactors );
  987. case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bFactors );
  988. case FUNC_GENERIC1: return CreatePlainSorter<MatchGeneric1_fn> ( bKbuffer, iMaxMatches, bFactors );
  989. case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bFactors );
  990. case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bFactors );
  991. case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bFactors );
  992. case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bFactors );
  993. case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bFactors );
  994. default: return nullptr;
  995. }
  996. }
  997. int ApplyImplicitCutoff ( const CSphQuery & tQuery, const VecTraits_T<ISphMatchSorter*> & dSorters, bool bFT )
  998. {
  999. bool bAllPrecalc = dSorters.GetLength() && dSorters.all_of ( []( auto pSorter ){ return pSorter->IsPrecalc(); } );
  1000. if ( bAllPrecalc )
  1001. return 1; // only need one match for precalc sorters
  1002. if ( tQuery.m_iCutoff>0 )
  1003. return tQuery.m_iCutoff;
  1004. if ( !tQuery.m_iCutoff )
  1005. return -1;
  1006. // this is the same as checking the sorters for disabled cutoff
  1007. // but this works when sorters are not yet available (e.g. GetPseudoShardingMetric())
  1008. if ( HasImplicitGrouping ( tQuery ) )
  1009. return -1;
  1010. if ( !tQuery.m_tKnnSettings.m_sAttr.IsEmpty() )
  1011. return -1;
  1012. bool bDisableCutoff = dSorters.any_of ( []( auto * pSorter ){ return pSorter->IsCutoffDisabled(); } );
  1013. if ( bDisableCutoff )
  1014. return -1;
  1015. // implicit cutoff when there's no sorting and no grouping
  1016. if ( !bFT && ( tQuery.m_sSortBy=="@weight desc" || tQuery.m_sSortBy.IsEmpty() ) && tQuery.m_sGroupBy.IsEmpty() && !tQuery.m_bFacet && !tQuery.m_bFacetHead )
  1017. return tQuery.m_iLimit+tQuery.m_iOffset;
  1018. return -1;
  1019. }