sphinxsort.cpp 97 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400
  1. //
  2. // $Id$
  3. //
  4. //
  5. // Copyright (c) 2001-2012, Andrew Aksyonoff
  6. // Copyright (c) 2008-2012, Sphinx Technologies Inc
  7. // All rights reserved
  8. //
  9. // This program is free software; you can redistribute it and/or modify
  10. // it under the terms of the GNU General Public License. You should have
  11. // received a copy of the GPL license along with this program; if you
  12. // did not, you can find it at http://www.gnu.org/
  13. //
  14. #include "sphinx.h"
  15. #include "sphinxint.h"
  16. #include <time.h>
  17. #include <math.h>
  18. #if !USE_WINDOWS
  19. #include <unistd.h>
  20. #include <sys/time.h>
  21. #endif
  22. //////////////////////////////////////////////////////////////////////////
  23. // TRAITS
  24. //////////////////////////////////////////////////////////////////////////
  25. /// groupby key type
  26. typedef int64_t SphGroupKey_t;
  27. /// base grouper (class that computes groupby key)
  28. class CSphGrouper
  29. {
  30. public:
  31. virtual ~CSphGrouper () {}
  32. virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const = 0;
  33. virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const = 0;
  34. virtual void GetLocator ( CSphAttrLocator & tOut ) const = 0;
  35. virtual ESphAttr GetResultType () const = 0;
  36. virtual void SetStringPool ( const BYTE * ) {}
  37. };
  38. /// match-sorting priority queue traits
  39. class CSphMatchQueueTraits : public ISphMatchSorter, ISphNoncopyable
  40. {
  41. protected:
  42. CSphMatch * m_pData;
  43. int m_iUsed;
  44. int m_iSize;
  45. CSphMatchComparatorState m_tState;
  46. const bool m_bUsesAttrs;
  47. private:
  48. int m_iAllocatedSize;
  49. public:
  50. /// ctor
  51. CSphMatchQueueTraits ( int iSize, bool bUsesAttrs )
  52. : m_iUsed ( 0 )
  53. , m_iSize ( iSize )
  54. , m_bUsesAttrs ( bUsesAttrs )
  55. , m_iAllocatedSize ( iSize )
  56. {
  57. assert ( iSize>0 );
  58. m_pData = new CSphMatch [ iSize ];
  59. assert ( m_pData );
  60. m_tState.m_iNow = (DWORD) time ( NULL );
  61. }
  62. /// dtor
  63. ~CSphMatchQueueTraits ()
  64. {
  65. for ( int i=0; i<m_iAllocatedSize; ++i )
  66. m_tSchema.FreeStringPtrs ( m_pData+i );
  67. SafeDeleteArray ( m_pData );
  68. }
  69. public:
  70. void SetState ( const CSphMatchComparatorState & tState ) { m_tState = tState; m_tState.m_iNow = (DWORD) time ( NULL ); }
  71. bool UsesAttrs () const { return m_bUsesAttrs; }
  72. virtual CSphMatch * Finalize () { return m_pData; }
  73. virtual int GetLength () const { return m_iUsed; }
  74. };
  75. //////////////////////////////////////////////////////////////////////////
  76. // SORTING QUEUES
  77. //////////////////////////////////////////////////////////////////////////
  78. /// heap sorter
  79. /// plain binary heap based PQ
  80. template < typename COMP >
  81. class CSphMatchQueue : public CSphMatchQueueTraits
  82. {
  83. public:
  84. /// ctor
  85. CSphMatchQueue ( int iSize, bool bUsesAttrs )
  86. : CSphMatchQueueTraits ( iSize, bUsesAttrs )
  87. {}
  88. /// check if this sorter does groupby
  89. virtual bool IsGroupby () const
  90. {
  91. return false;
  92. }
  93. /// add entry to the queue
  94. virtual bool Push ( const CSphMatch & tEntry )
  95. {
  96. m_iTotal++;
  97. if ( m_iUsed==m_iSize )
  98. {
  99. // if it's worse that current min, reject it, else pop off current min
  100. if ( COMP::IsLess ( tEntry, m_pData[0], m_tState ) )
  101. return true;
  102. else
  103. Pop ();
  104. }
  105. // do add
  106. m_tSchema.CloneMatch ( m_pData+m_iUsed, tEntry );
  107. int iEntry = m_iUsed++;
  108. // sift up if needed, so that worst (lesser) ones float to the top
  109. while ( iEntry )
  110. {
  111. int iParent = ( iEntry-1 ) >> 1;
  112. if ( !COMP::IsLess ( m_pData[iEntry], m_pData[iParent], m_tState ) )
  113. break;
  114. // entry is less than parent, should float to the top
  115. Swap ( m_pData[iEntry], m_pData[iParent] );
  116. iEntry = iParent;
  117. }
  118. return true;
  119. }
  120. /// add grouped entry (must not happen)
  121. virtual bool PushGrouped ( const CSphMatch & )
  122. {
  123. assert ( 0 );
  124. return false;
  125. }
  126. /// remove root (ie. top priority) entry
  127. virtual void Pop ()
  128. {
  129. assert ( m_iUsed );
  130. if ( !(--m_iUsed) ) // empty queue? just return
  131. return;
  132. // make the last entry my new root
  133. Swap ( m_pData[0], m_pData[m_iUsed] );
  134. m_tSchema.FreeStringPtrs ( &m_pData[m_iUsed] );
  135. // sift down if needed
  136. int iEntry = 0;
  137. for ( ;; )
  138. {
  139. // select child
  140. int iChild = (iEntry<<1) + 1;
  141. if ( iChild>=m_iUsed )
  142. break;
  143. // select smallest child
  144. if ( iChild+1<m_iUsed )
  145. if ( COMP::IsLess ( m_pData[iChild+1], m_pData[iChild], m_tState ) )
  146. iChild++;
  147. // if smallest child is less than entry, do float it to the top
  148. if ( COMP::IsLess ( m_pData[iChild], m_pData[iEntry], m_tState ) )
  149. {
  150. Swap ( m_pData[iChild], m_pData[iEntry] );
  151. iEntry = iChild;
  152. continue;
  153. }
  154. break;
  155. }
  156. }
  157. /// store all entries into specified location in sorted order, and remove them from queue
  158. void Flatten ( CSphMatch * pTo, int iTag )
  159. {
  160. assert ( m_iUsed>=0 );
  161. pTo += m_iUsed;
  162. while ( m_iUsed>0 )
  163. {
  164. --pTo;
  165. m_tSchema.FreeStringPtrs ( pTo );
  166. Swap ( *pTo, *m_pData );
  167. if ( iTag>=0 )
  168. pTo->m_iTag = iTag;
  169. Pop ();
  170. }
  171. m_iTotal = 0;
  172. }
  173. };
  174. //////////////////////////////////////////////////////////////////////////
  175. /// match sorting functor
  176. template < typename COMP >
  177. struct MatchSort_fn
  178. {
  179. typedef CSphMatch MEDIAN_TYPE;
  180. CSphMatchComparatorState m_tState;
  181. const CSphSchema * m_pCloner;
  182. MatchSort_fn ( const CSphMatchComparatorState & tState, const CSphSchema& dCloner )
  183. : m_tState ( tState )
  184. , m_pCloner ( &dCloner )
  185. {}
  186. bool IsLess ( const CSphMatch & a, const CSphMatch & b )
  187. {
  188. return COMP::IsLess ( a, b, m_tState );
  189. }
  190. CSphMatch & Key ( CSphMatch * a ) const
  191. {
  192. return *a;
  193. }
  194. void CopyKey ( CSphMatch * pMed, CSphMatch * pVal ) const
  195. {
  196. assert ( m_pCloner );
  197. m_pCloner->CloneMatch ( pMed, *pVal );
  198. }
  199. void Swap ( CSphMatch * a, CSphMatch * b ) const
  200. {
  201. ::Swap ( *a, *b );
  202. }
  203. CSphMatch * Add ( CSphMatch * p, int i ) const
  204. {
  205. return p+i;
  206. }
  207. int Sub ( CSphMatch * b, CSphMatch * a ) const
  208. {
  209. return (int)(b-a);
  210. }
  211. };
  212. /// K-buffer (generalized double buffer) sorter
  213. /// faster worst-case but slower average-case than the heap sorter
  214. template < typename COMP >
  215. class CSphKbufferMatchQueue : public CSphMatchQueueTraits
  216. {
  217. protected:
  218. static const int COEFF = 4;
  219. CSphMatch * m_pEnd;
  220. CSphMatch * m_pWorst;
  221. bool m_bFinalized;
  222. public:
  223. /// ctor
  224. CSphKbufferMatchQueue ( int iSize, bool bUsesAttrs )
  225. : CSphMatchQueueTraits ( iSize*COEFF, bUsesAttrs )
  226. , m_pEnd ( m_pData+iSize*COEFF )
  227. , m_pWorst ( NULL )
  228. , m_bFinalized ( false )
  229. {
  230. m_iSize /= COEFF;
  231. }
  232. /// check if this sorter does groupby
  233. virtual bool IsGroupby () const
  234. {
  235. return false;
  236. }
  237. /// add entry to the queue
  238. virtual bool Push ( const CSphMatch & tEntry )
  239. {
  240. assert ( !m_bFinalized );
  241. // quick early rejection checks
  242. m_iTotal++;
  243. if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
  244. return true;
  245. // quick check passed
  246. // fill the data, back to front
  247. m_iUsed++;
  248. m_tSchema.CloneMatch ( m_pEnd-m_iUsed, tEntry );
  249. // do the initial sort once
  250. if ( m_iTotal==m_iSize )
  251. {
  252. assert ( m_iUsed==m_iSize && !m_pWorst );
  253. MatchSort_fn<COMP> tComp ( m_tState, m_tSchema );
  254. sphSort ( m_pEnd-m_iSize, m_iSize, tComp, tComp );
  255. m_pWorst = m_pEnd-m_iSize;
  256. return true;
  257. }
  258. // do the sort/cut when the K-buffer is full
  259. if ( m_iUsed==m_iSize*COEFF )
  260. {
  261. MatchSort_fn<COMP> tComp ( m_tState, m_tSchema );
  262. sphSort ( m_pData, m_iUsed, tComp, tComp );
  263. m_iUsed = m_iSize;
  264. m_pWorst = m_pEnd-m_iSize;
  265. }
  266. return true;
  267. }
  268. /// add grouped entry (must not happen)
  269. virtual bool PushGrouped ( const CSphMatch & )
  270. {
  271. assert ( 0 );
  272. return false;
  273. }
  274. /// finalize, perform final sort/cut as needed
  275. virtual CSphMatch * Finalize ()
  276. {
  277. if ( m_bFinalized )
  278. {
  279. assert ( m_iUsed<=m_iSize );
  280. return m_pEnd-m_iUsed;
  281. }
  282. if ( m_iUsed )
  283. {
  284. MatchSort_fn<COMP> tComp ( m_tState, m_tSchema );
  285. sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
  286. }
  287. m_iUsed = Min ( m_iUsed, m_iSize );
  288. m_bFinalized = true;
  289. return m_pEnd-m_iUsed;
  290. }
  291. /// current result set length
  292. virtual int GetLength () const
  293. {
  294. return Min ( m_iUsed, m_iSize );
  295. }
  296. /// store all entries into specified location in sorted order, and remove them from queue
  297. void Flatten ( CSphMatch * pTo, int iTag )
  298. {
  299. // ensure we are sorted
  300. Finalize();
  301. // reverse copy
  302. for ( int i=1; i<=Min ( m_iUsed, m_iSize ); i++ )
  303. {
  304. m_tSchema.FreeStringPtrs ( pTo );
  305. Swap ( *pTo, m_pEnd[-i] );
  306. if ( iTag>=0 )
  307. pTo->m_iTag = iTag;
  308. pTo++;
  309. }
  310. // clean up for the next work session
  311. m_iTotal = 0;
  312. m_iUsed = 0;
  313. m_iSize = 0;
  314. m_bFinalized = false;
  315. }
  316. };
  317. //////////////////////////////////////////////////////////////////////////
  318. /// collector for UPDATE statement
  319. class CSphUpdateQueue : public CSphMatchQueueTraits
  320. {
  321. CSphAttrUpdateEx * m_pUpdate;
  322. private:
  323. void DoUpdate()
  324. {
  325. if ( !m_iUsed )
  326. return;
  327. CSphAttrUpdate tSet;
  328. tSet.m_dAttrs = m_pUpdate->m_pUpdate->m_dAttrs;
  329. tSet.m_dPool = m_pUpdate->m_pUpdate->m_dPool;
  330. tSet.m_dRowOffset.Resize ( m_iUsed );
  331. if ( !DOCINFO2ID ( STATIC2DOCINFO ( m_pData->m_pStatic ) ) ) // if static attrs were copied, so, they actually dynamic
  332. {
  333. tSet.m_dDocids.Resize ( m_iUsed );
  334. ARRAY_FOREACH ( i, tSet.m_dDocids )
  335. {
  336. tSet.m_dDocids[i] = m_pData[i].m_iDocID;
  337. tSet.m_dRowOffset[i] = 0;
  338. }
  339. } else // static attrs points to the active indexes - so, no lookup, 5 times faster update.
  340. {
  341. tSet.m_dRows.Resize ( m_iUsed );
  342. ARRAY_FOREACH ( i, tSet.m_dRows )
  343. {
  344. tSet.m_dRows[i] = m_pData[i].m_pStatic - ( sizeof(SphDocID_t) / sizeof(CSphRowitem) );
  345. tSet.m_dRowOffset[i] = 0;
  346. }
  347. }
  348. m_pUpdate->m_iAffected += m_pUpdate->m_pIndex->UpdateAttributes ( tSet, -1, *m_pUpdate->m_pError );
  349. m_iUsed = 0;
  350. }
  351. public:
  352. /// ctor
  353. CSphUpdateQueue ( int iSize, CSphAttrUpdateEx * pUpdate )
  354. : CSphMatchQueueTraits ( iSize, true )
  355. , m_pUpdate ( pUpdate )
  356. {}
  357. /// check if this sorter does groupby
  358. virtual bool IsGroupby () const
  359. {
  360. return false;
  361. }
  362. /// add entry to the queue
  363. virtual bool Push ( const CSphMatch & tEntry )
  364. {
  365. m_iTotal++;
  366. if ( m_iUsed==m_iSize )
  367. DoUpdate();
  368. // do add
  369. m_tSchema.CloneMatch ( &m_pData[m_iUsed++], tEntry );
  370. return true;
  371. }
  372. /// add grouped entry (must not happen)
  373. virtual bool PushGrouped ( const CSphMatch & )
  374. {
  375. assert ( 0 );
  376. return false;
  377. }
  378. /// store all entries into specified location in sorted order, and remove them from queue
  379. void Flatten ( CSphMatch *, int )
  380. {
  381. assert ( m_iUsed>=0 );
  382. DoUpdate();
  383. m_iTotal = 0;
  384. }
  385. };
  386. //////////////////////////////////////////////////////////////////////////
  387. // SORTING+GROUPING QUEUE
  388. //////////////////////////////////////////////////////////////////////////
  389. static bool IsCount ( const CSphString & s )
  390. {
  391. return s=="@count" || s=="count(*)";
  392. }
  393. static bool IsGroupby ( const CSphString & s )
  394. {
  395. return s=="@groupby" || s=="@distinct" || s=="groupby()";
  396. }
  397. static bool IsGroupbyMagic ( const CSphString & s )
  398. {
  399. return IsGroupby ( s ) || IsCount ( s );
  400. }
  401. /// groupers
  402. #define GROUPER_BEGIN(_name) \
  403. class _name : public CSphGrouper \
  404. { \
  405. protected: \
  406. CSphAttrLocator m_tLocator; \
  407. public: \
  408. explicit _name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
  409. virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; } \
  410. virtual ESphAttr GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
  411. virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
  412. virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const \
  413. {
  414. // NOLINT
  415. #define GROUPER_END \
  416. } \
  417. };
  418. #define GROUPER_BEGIN_SPLIT(_name) \
  419. GROUPER_BEGIN(_name) \
  420. time_t tStamp = (time_t)uValue; \
  421. struct tm * pSplit = localtime ( &tStamp );
  422. GROUPER_BEGIN ( CSphGrouperAttr )
  423. return uValue;
  424. GROUPER_END
  425. GROUPER_BEGIN_SPLIT ( CSphGrouperDay )
  426. return (pSplit->tm_year+1900)*10000 + (1+pSplit->tm_mon)*100 + pSplit->tm_mday;
  427. GROUPER_END
  428. GROUPER_BEGIN_SPLIT ( CSphGrouperWeek )
  429. int iPrevSunday = (1+pSplit->tm_yday) - pSplit->tm_wday; // prev Sunday day of year, base 1
  430. int iYear = pSplit->tm_year+1900;
  431. if ( iPrevSunday<=0 ) // check if we crossed year boundary
  432. {
  433. // adjust day and year
  434. iPrevSunday += 365;
  435. iYear--;
  436. // adjust for leap years
  437. if ( iYear%4==0 && ( iYear%100!=0 || iYear%400==0 ) )
  438. iPrevSunday++;
  439. }
  440. return iYear*1000 + iPrevSunday;
  441. GROUPER_END
  442. GROUPER_BEGIN_SPLIT ( CSphGrouperMonth )
  443. return (pSplit->tm_year+1900)*100 + (1+pSplit->tm_mon);
  444. GROUPER_END
  445. GROUPER_BEGIN_SPLIT ( CSphGrouperYear )
  446. return (pSplit->tm_year+1900);
  447. GROUPER_END
  448. template <class PRED>
  449. class CSphGrouperString : public CSphGrouperAttr, public PRED
  450. {
  451. private:
  452. const BYTE * m_pStringBase;
  453. public:
  454. explicit CSphGrouperString ( const CSphAttrLocator & tLoc )
  455. : CSphGrouperAttr ( tLoc )
  456. , m_pStringBase ( NULL )
  457. {
  458. }
  459. virtual ESphAttr GetResultType () const
  460. {
  461. return SPH_ATTR_BIGINT;
  462. }
  463. virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const
  464. {
  465. if ( !m_pStringBase || !uValue )
  466. return 0;
  467. const BYTE * pStr = NULL;
  468. int iLen = sphUnpackStr ( m_pStringBase+uValue, &pStr );
  469. if ( !pStr || !iLen )
  470. return 0;
  471. return PRED::Hash ( pStr, iLen );
  472. }
  473. virtual void SetStringPool ( const BYTE * pStrings )
  474. {
  475. m_pStringBase = pStrings;
  476. }
  477. };
  478. //////////////////////////////////////////////////////////////////////////
  479. /// simple fixed-size hash
  480. /// doesn't keep the order
  481. template < typename T, typename KEY, typename HASHFUNC >
  482. class CSphFixedHash : ISphNoncopyable
  483. {
  484. protected:
  485. static const int HASH_LIST_END = -1;
  486. static const int HASH_DELETED = -2;
  487. struct HashEntry_t
  488. {
  489. KEY m_tKey;
  490. T m_tValue;
  491. int m_iNext;
  492. };
  493. protected:
  494. CSphVector<HashEntry_t> m_dEntries; ///< key-value pairs storage pool
  495. CSphVector<int> m_dHash; ///< hash into m_dEntries pool
  496. int m_iFree; ///< free pairs count
  497. CSphVector<int> m_dFree; ///< free pair indexes
  498. public:
  499. /// ctor
  500. explicit CSphFixedHash ( int iLength )
  501. {
  502. int iBuckets = ( 2 << sphLog2 ( iLength-1 ) ); // less than 50% bucket usage guaranteed
  503. assert ( iLength>0 );
  504. assert ( iLength<=iBuckets );
  505. m_dEntries.Resize ( iLength );
  506. m_dHash.Resize ( iBuckets );
  507. m_dFree.Resize ( iLength );
  508. Reset ();
  509. }
  510. /// cleanup
  511. void Reset ()
  512. {
  513. ARRAY_FOREACH ( i, m_dEntries )
  514. m_dEntries[i].m_iNext = HASH_DELETED;
  515. ARRAY_FOREACH ( i, m_dHash )
  516. m_dHash[i] = HASH_LIST_END;
  517. m_iFree = m_dFree.GetLength();
  518. ARRAY_FOREACH ( i, m_dFree )
  519. m_dFree[i] = i;
  520. }
  521. /// add new entry
  522. /// returns NULL on success
  523. /// returns pointer to value if already hashed
  524. T * Add ( const T & tValue, const KEY & tKey )
  525. {
  526. assert ( m_iFree>0 && "hash overflow" );
  527. // check if it's already hashed
  528. DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
  529. int iPrev = -1, iEntry;
  530. for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
  531. if ( m_dEntries[iEntry].m_tKey==tKey )
  532. return &m_dEntries[iEntry].m_tValue;
  533. assert ( iEntry!=HASH_DELETED );
  534. // if it's not, do add
  535. int iNew = m_dFree [ --m_iFree ];
  536. HashEntry_t & tNew = m_dEntries[iNew];
  537. assert ( tNew.m_iNext==HASH_DELETED );
  538. tNew.m_tKey = tKey;
  539. tNew.m_tValue = tValue;
  540. tNew.m_iNext = HASH_LIST_END;
  541. if ( iPrev>=0 )
  542. {
  543. assert ( m_dEntries[iPrev].m_iNext==HASH_LIST_END );
  544. m_dEntries[iPrev].m_iNext = iNew;
  545. } else
  546. {
  547. assert ( m_dHash[uHash]==HASH_LIST_END );
  548. m_dHash[uHash] = iNew;
  549. }
  550. return NULL;
  551. }
  552. /// remove entry from hash
  553. void Remove ( const KEY & tKey )
  554. {
  555. // check if it's already hashed
  556. DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
  557. int iPrev = -1, iEntry;
  558. for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
  559. if ( m_dEntries[iEntry].m_tKey==tKey )
  560. {
  561. // found, remove it
  562. assert ( m_dEntries[iEntry].m_iNext!=HASH_DELETED );
  563. if ( iPrev>=0 )
  564. m_dEntries[iPrev].m_iNext = m_dEntries[iEntry].m_iNext;
  565. else
  566. m_dHash[uHash] = m_dEntries[iEntry].m_iNext;
  567. #ifndef NDEBUG
  568. m_dEntries[iEntry].m_iNext = HASH_DELETED;
  569. #endif
  570. m_dFree [ m_iFree++ ] = iEntry;
  571. return;
  572. }
  573. assert ( iEntry!=HASH_DELETED );
  574. }
  575. /// get value pointer by key
  576. T * operator () ( const KEY & tKey ) const
  577. {
  578. DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
  579. int iEntry;
  580. for ( iEntry=m_dHash[uHash]; iEntry>=0; iEntry=m_dEntries[iEntry].m_iNext )
  581. if ( m_dEntries[iEntry].m_tKey==tKey )
  582. return (T*)&m_dEntries[iEntry].m_tValue;
  583. assert ( iEntry!=HASH_DELETED );
  584. return NULL;
  585. }
  586. };
  587. /////////////////////////////////////////////////////////////////////////////
  588. /// (group,attrvalue) pair
  589. struct SphGroupedValue_t
  590. {
  591. public:
  592. SphGroupKey_t m_uGroup;
  593. SphAttr_t m_uValue;
  594. public:
  595. SphGroupedValue_t ()
  596. {}
  597. SphGroupedValue_t ( SphGroupKey_t uGroup, SphAttr_t uValue )
  598. : m_uGroup ( uGroup )
  599. , m_uValue ( uValue )
  600. {}
  601. inline bool operator < ( const SphGroupedValue_t & rhs ) const
  602. {
  603. if ( m_uGroup<rhs.m_uGroup ) return true;
  604. if ( m_uGroup>rhs.m_uGroup ) return false;
  605. return m_uValue<rhs.m_uValue;
  606. }
  607. inline bool operator == ( const SphGroupedValue_t & rhs ) const
  608. {
  609. return m_uGroup==rhs.m_uGroup && m_uValue==rhs.m_uValue;
  610. }
  611. };
  612. /// unique values counter
  613. /// used for COUNT(DISTINCT xxx) GROUP BY yyy queries
  614. class CSphUniqounter : public CSphVector<SphGroupedValue_t>
  615. {
  616. public:
  617. #ifndef NDEBUG
  618. CSphUniqounter () : m_iCountPos ( 0 ), m_bSorted ( true ) { Reserve ( 16384 ); }
  619. void Add ( const SphGroupedValue_t & tValue ) { CSphVector<SphGroupedValue_t>::Add ( tValue ); m_bSorted = false; }
  620. void Sort () { CSphVector<SphGroupedValue_t>::Sort (); m_bSorted = true; }
  621. #else
  622. CSphUniqounter () : m_iCountPos ( 0 ) {}
  623. #endif
  624. public:
  625. int CountStart ( SphGroupKey_t * pOutGroup ); ///< starting counting distinct values, returns count and group key (0 if empty)
  626. int CountNext ( SphGroupKey_t * pOutGroup ); ///< continues counting distinct values, returns count and group key (0 if done)
  627. void Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups );
  628. protected:
  629. int m_iCountPos;
  630. #ifndef NDEBUG
  631. bool m_bSorted;
  632. #endif
  633. };
  634. int CSphUniqounter::CountStart ( SphGroupKey_t * pOutGroup )
  635. {
  636. m_iCountPos = 0;
  637. return CountNext ( pOutGroup );
  638. }
  639. int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
  640. {
  641. assert ( m_bSorted );
  642. if ( m_iCountPos>=m_iLength )
  643. return 0;
  644. SphGroupKey_t uGroup = m_pData[m_iCountPos].m_uGroup;
  645. SphAttr_t uValue = m_pData[m_iCountPos].m_uValue;
  646. *pOutGroup = uGroup;
  647. int iCount = 1;
  648. while ( m_iCountPos<m_iLength && m_pData[m_iCountPos].m_uGroup==uGroup )
  649. {
  650. if ( m_pData[m_iCountPos].m_uValue!=uValue )
  651. iCount++;
  652. uValue = m_pData[m_iCountPos].m_uValue;
  653. m_iCountPos++;
  654. }
  655. return iCount;
  656. }
  657. void CSphUniqounter::Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups )
  658. {
  659. assert ( m_bSorted );
  660. if ( !m_iLength )
  661. return;
  662. sphSort ( pRemoveGroups, iRemoveGroups );
  663. SphGroupedValue_t * pSrc = m_pData;
  664. SphGroupedValue_t * pDst = m_pData;
  665. SphGroupedValue_t * pEnd = m_pData + m_iLength;
  666. // skip remove-groups which are not in my list
  667. while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
  668. {
  669. pRemoveGroups++;
  670. iRemoveGroups--;
  671. }
  672. for ( ; pSrc<pEnd; pSrc++ )
  673. {
  674. // check if this entry needs to be removed
  675. while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
  676. {
  677. pRemoveGroups++;
  678. iRemoveGroups--;
  679. }
  680. if ( iRemoveGroups && pSrc->m_uGroup==*pRemoveGroups )
  681. continue;
  682. // check if it's a dupe
  683. if ( pDst>m_pData && pDst[-1]==pSrc[0] )
  684. continue;
  685. *pDst++ = *pSrc;
  686. }
  687. assert ( pDst-m_pData<=m_iLength );
  688. m_iLength = pDst-m_pData;
  689. }
  690. /////////////////////////////////////////////////////////////////////////////
  691. /// attribute magic
  692. enum
  693. {
  694. SPH_VATTR_ID = -1, ///< tells match sorter to use doc id
  695. SPH_VATTR_RELEVANCE = -2, ///< tells match sorter to use match weight
  696. SPH_VATTR_FLOAT = 10000 ///< tells match sorter to compare floats
  697. };
  698. /// match comparator interface from group-by sorter point of view
  699. struct ISphMatchComparator
  700. {
  701. virtual ~ISphMatchComparator () {}
  702. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & tState ) const = 0;
  703. };
  704. /// additional group-by sorter settings
  705. struct CSphGroupSorterSettings
  706. {
  707. CSphAttrLocator m_tLocGroupby; ///< locator for @groupby
  708. CSphAttrLocator m_tLocCount; ///< locator for @count
  709. CSphAttrLocator m_tLocDistinct; ///< locator for @distinct
  710. CSphAttrLocator m_tDistinctLoc; ///< locator for attribute to compute count(distinct) for
  711. bool m_bDistinct; ///< whether we need distinct
  712. bool m_bMVA; ///< whether we're grouping by MVA attribute
  713. bool m_bMva64;
  714. CSphGrouper * m_pGrouper; ///< group key calculator
  715. CSphGroupSorterSettings ()
  716. : m_bDistinct ( false )
  717. , m_bMVA ( false )
  718. , m_bMva64 ( false )
  719. , m_pGrouper ( NULL )
  720. {}
  721. };
  722. #if USE_WINDOWS
  723. #pragma warning(disable:4127)
  724. #endif
  725. /// aggregate function interface
  726. class IAggrFunc
  727. {
  728. public:
  729. virtual ~IAggrFunc() {}
  730. virtual void Ungroup ( CSphMatch * ) {}
  731. virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped ) = 0;
  732. virtual void Finalize ( CSphMatch * ) {}
  733. };
  734. /// aggregate traits for different attribute types
  735. template < typename T >
  736. class IAggrFuncTraits : public IAggrFunc
  737. {
  738. public:
  739. explicit IAggrFuncTraits ( const CSphAttrLocator & tLocator ) : m_tLocator ( tLocator ) {}
  740. inline T GetValue ( const CSphMatch * pRow );
  741. inline void SetValue ( CSphMatch * pRow, T val );
  742. protected:
  743. CSphAttrLocator m_tLocator;
  744. };
  745. template<>
  746. DWORD IAggrFuncTraits<DWORD>::GetValue ( const CSphMatch * pRow )
  747. {
  748. return (DWORD)pRow->GetAttr ( m_tLocator );
  749. }
  750. template<>
  751. void IAggrFuncTraits<DWORD>::SetValue ( CSphMatch * pRow, DWORD val )
  752. {
  753. pRow->SetAttr ( m_tLocator, val );
  754. }
  755. template<>
  756. int64_t IAggrFuncTraits<int64_t>::GetValue ( const CSphMatch * pRow )
  757. {
  758. return pRow->GetAttr ( m_tLocator );
  759. }
  760. template<>
  761. void IAggrFuncTraits<int64_t>::SetValue ( CSphMatch * pRow, int64_t val )
  762. {
  763. pRow->SetAttr ( m_tLocator, val );
  764. }
  765. template<>
  766. float IAggrFuncTraits<float>::GetValue ( const CSphMatch * pRow )
  767. {
  768. return pRow->GetAttrFloat ( m_tLocator );
  769. }
  770. template<>
  771. void IAggrFuncTraits<float>::SetValue ( CSphMatch * pRow, float val )
  772. {
  773. pRow->SetAttrFloat ( m_tLocator, val );
  774. }
  775. /// SUM() implementation
  776. template < typename T >
  777. class AggrSum_t : public IAggrFuncTraits<T>
  778. {
  779. public:
  780. explicit AggrSum_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
  781. {}
  782. virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
  783. {
  784. this->SetValue ( pDst, this->GetValue(pDst)+this->GetValue(pSrc) );
  785. }
  786. };
  787. /// AVG() implementation
  788. template < typename T >
  789. class AggrAvg_t : public IAggrFuncTraits<T>
  790. {
  791. protected:
  792. CSphAttrLocator m_tCountLoc;
  793. public:
  794. AggrAvg_t ( const CSphAttrLocator & tLoc, const CSphAttrLocator & tCountLoc ) : IAggrFuncTraits<T> ( tLoc ), m_tCountLoc ( tCountLoc )
  795. {}
  796. virtual void Ungroup ( CSphMatch * pDst )
  797. {
  798. this->SetValue ( pDst, T ( this->GetValue ( pDst ) * pDst->GetAttr ( m_tCountLoc ) ) );
  799. }
  800. virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped )
  801. {
  802. if ( bGrouped )
  803. this->SetValue ( pDst, T ( this->GetValue ( pDst ) + this->GetValue ( pSrc ) * pSrc->GetAttr ( m_tCountLoc ) ) );
  804. else
  805. this->SetValue ( pDst, this->GetValue ( pDst ) + this->GetValue ( pSrc ) );
  806. }
  807. virtual void Finalize ( CSphMatch * pDst )
  808. {
  809. this->SetValue ( pDst, T ( this->GetValue ( pDst ) / pDst->GetAttr ( m_tCountLoc ) ) );
  810. }
  811. };
  812. /// MAX() implementation
  813. template < typename T >
  814. class AggrMax_t : public IAggrFuncTraits<T>
  815. {
  816. public:
  817. explicit AggrMax_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
  818. {}
  819. virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
  820. {
  821. this->SetValue ( pDst, Max ( this->GetValue(pDst), this->GetValue(pSrc) ) );
  822. }
  823. };
  824. /// MIN() implementation
  825. template < typename T >
  826. class AggrMin_t : public IAggrFuncTraits<T>
  827. {
  828. public:
  829. explicit AggrMin_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
  830. {}
  831. virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
  832. {
  833. this->SetValue ( pDst, Min ( this->GetValue(pDst), this->GetValue(pSrc) ) );
  834. }
  835. };
  836. /// GROUP_CONCAT() implementation
  837. class AggrConcat_t : public IAggrFunc
  838. {
  839. protected:
  840. CSphAttrLocator m_tLoc;
  841. public:
  842. explicit AggrConcat_t ( const CSphColumnInfo & tCol )
  843. : m_tLoc ( tCol.m_tLocator )
  844. {}
  845. void Ungroup ( CSphMatch * ) {}
  846. void Finalize ( CSphMatch * ) {}
  847. void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
  848. {
  849. const char * sDst = (const char*) pDst->GetAttr(m_tLoc);
  850. const char * sSrc = (const char*) pSrc->GetAttr(m_tLoc);
  851. assert ( !sDst || *sDst ); // ensure the string is either NULL, or has data
  852. assert ( !sSrc || *sSrc );
  853. // empty source? kinda weird, but done!
  854. if ( !sSrc )
  855. return;
  856. // empty destination? just clone the source
  857. if ( !sDst )
  858. {
  859. if ( sSrc )
  860. pDst->SetAttr ( m_tLoc, (SphAttr_t)strdup(sSrc) );
  861. return;
  862. }
  863. // both source and destination present? append source to destination
  864. assert ( sDst && sSrc );
  865. CSphString sNew;
  866. sNew.SetSprintf ( "%s,%s", sDst, sSrc );
  867. pDst->SetAttr ( m_tLoc, (SphAttr_t)sNew.Leak() );
  868. SafeDelete ( sDst );
  869. }
  870. };
  871. /// group sorting functor
  872. template < typename COMPGROUP >
  873. struct GroupSorter_fn : public CSphMatchComparatorState, public SphAccessor_T<CSphMatch>
  874. {
  875. typedef CSphMatch MEDIAN_TYPE;
  876. const CSphSchema* m_pCloner;
  877. GroupSorter_fn ()
  878. {
  879. m_pCloner = NULL;
  880. }
  881. void CopyKey ( MEDIAN_TYPE * pMed, CSphMatch * pVal ) const
  882. {
  883. assert ( m_pCloner );
  884. m_pCloner->CloneMatch ( pMed, *pVal );
  885. }
  886. bool IsLess ( const CSphMatch & a, const CSphMatch & b ) const
  887. {
  888. return COMPGROUP::IsLess ( b, a, *this );
  889. }
  890. // inherited swap does not work on gcc
  891. void Swap ( CSphMatch * a, CSphMatch * b ) const
  892. {
  893. ::Swap ( *a, *b );
  894. }
  895. };
  896. /// match sorter with k-buffering and group-by
  897. template < typename COMPGROUP, bool DISTINCT >
  898. class CSphKBufferGroupSorter : public CSphMatchQueueTraits
  899. {
  900. protected:
  901. ESphGroupBy m_eGroupBy; ///< group-by function
  902. CSphGrouper * m_pGrouper;
  903. CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn > m_hGroup2Match;
  904. protected:
  905. int m_iLimit; ///< max matches to be retrieved
  906. CSphUniqounter m_tUniq;
  907. bool m_bSortByDistinct;
  908. GroupSorter_fn<COMPGROUP> m_tGroupSorter;
  909. const ISphMatchComparator * m_pComp;
  910. CSphGroupSorterSettings m_tSettings;
  911. CSphVector<IAggrFunc *> m_dAggregates;
  912. CSphVector<IAggrFunc *> m_dAvgs;
  913. int m_iPregroupDynamic; ///< how much dynamic attributes are computed by the index (before groupby sorter)
  914. static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
  915. public:
  916. /// ctor
  917. CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
  918. : CSphMatchQueueTraits ( pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
  919. , m_eGroupBy ( pQuery->m_eGroupFunc )
  920. , m_pGrouper ( tSettings.m_pGrouper )
  921. , m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR )
  922. , m_iLimit ( pQuery->m_iMaxMatches )
  923. , m_bSortByDistinct ( false )
  924. , m_pComp ( pComp )
  925. , m_tSettings ( tSettings )
  926. , m_iPregroupDynamic ( 0 )
  927. {
  928. assert ( GROUPBY_FACTOR>1 );
  929. assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
  930. }
  931. /// schema setup
  932. virtual void SetSchema ( const CSphSchema & tSchema )
  933. {
  934. m_tSchema = tSchema;
  935. m_tGroupSorter.m_pCloner = &m_tSchema;
  936. bool bAggrStarted = false;
  937. for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
  938. {
  939. const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
  940. bool bMagicAggr = IsGroupbyMagic ( tAttr.m_sName ) || sphIsSortStringInternal ( tAttr.m_sName.cstr() ); // magic legacy aggregates
  941. if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE && !bMagicAggr )
  942. {
  943. if ( !bAggrStarted )
  944. continue;
  945. // !COMMIT
  946. assert ( 0 && "internal error: aggregates must not be followed by non-aggregates" );
  947. }
  948. if ( !bAggrStarted )
  949. {
  950. assert ( ( tAttr.m_tLocator.m_iBitOffset % ROWITEM_BITS )==0 );
  951. m_iPregroupDynamic = tAttr.m_tLocator.m_iBitOffset / ROWITEM_BITS;
  952. }
  953. bAggrStarted = true;
  954. if ( bMagicAggr )
  955. continue;
  956. switch ( tAttr.m_eAggrFunc )
  957. {
  958. case SPH_AGGR_SUM:
  959. switch ( tAttr.m_eAttrType )
  960. {
  961. case SPH_ATTR_INTEGER: m_dAggregates.Add ( new AggrSum_t<DWORD> ( tAttr.m_tLocator ) ); break;
  962. case SPH_ATTR_BIGINT: m_dAggregates.Add ( new AggrSum_t<int64_t> ( tAttr.m_tLocator ) ); break;
  963. case SPH_ATTR_FLOAT: m_dAggregates.Add ( new AggrSum_t<float> ( tAttr.m_tLocator ) ); break;
  964. default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
  965. }
  966. break;
  967. case SPH_AGGR_AVG:
  968. switch ( tAttr.m_eAttrType )
  969. {
  970. case SPH_ATTR_INTEGER: m_dAggregates.Add ( new AggrAvg_t<DWORD> ( tAttr.m_tLocator, m_tSettings.m_tLocCount ) ); break;
  971. case SPH_ATTR_BIGINT: m_dAggregates.Add ( new AggrAvg_t<int64_t> ( tAttr.m_tLocator, m_tSettings.m_tLocCount ) ); break;
  972. case SPH_ATTR_FLOAT: m_dAggregates.Add ( new AggrAvg_t<float> ( tAttr.m_tLocator, m_tSettings.m_tLocCount ) ); break;
  973. default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
  974. }
  975. // store avg to calculate these attributes prior to groups sort
  976. for ( int iState=0; iState<CSphMatchComparatorState::MAX_ATTRS; iState++ )
  977. {
  978. ESphSortKeyPart eKeypart = m_tGroupSorter.m_eKeypart[iState];
  979. CSphAttrLocator tLoc = m_tGroupSorter.m_tLocator[iState];
  980. if ( ( eKeypart==SPH_KEYPART_INT || eKeypart==SPH_KEYPART_FLOAT )
  981. && tLoc.m_bDynamic==tAttr.m_tLocator.m_bDynamic && tLoc.m_iBitOffset==tAttr.m_tLocator.m_iBitOffset
  982. && tLoc.m_iBitCount==tAttr.m_tLocator.m_iBitCount )
  983. {
  984. m_dAvgs.Add ( m_dAggregates.Last() );
  985. break;
  986. }
  987. }
  988. break;
  989. case SPH_AGGR_MIN:
  990. switch ( tAttr.m_eAttrType )
  991. {
  992. case SPH_ATTR_INTEGER: m_dAggregates.Add ( new AggrMin_t<DWORD> ( tAttr.m_tLocator ) ); break;
  993. case SPH_ATTR_BIGINT: m_dAggregates.Add ( new AggrMin_t<int64_t> ( tAttr.m_tLocator ) ); break;
  994. case SPH_ATTR_FLOAT: m_dAggregates.Add ( new AggrMin_t<float> ( tAttr.m_tLocator ) ); break;
  995. default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
  996. }
  997. break;
  998. case SPH_AGGR_MAX:
  999. switch ( tAttr.m_eAttrType )
  1000. {
  1001. case SPH_ATTR_INTEGER: m_dAggregates.Add ( new AggrMax_t<DWORD> ( tAttr.m_tLocator ) ); break;
  1002. case SPH_ATTR_BIGINT: m_dAggregates.Add ( new AggrMax_t<int64_t> ( tAttr.m_tLocator ) ); break;
  1003. case SPH_ATTR_FLOAT: m_dAggregates.Add ( new AggrMax_t<float> ( tAttr.m_tLocator ) ); break;
  1004. default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
  1005. }
  1006. break;
  1007. case SPH_AGGR_CAT:
  1008. m_dAggregates.Add ( new AggrConcat_t ( tAttr ) );
  1009. break;
  1010. default:
  1011. assert ( 0 && "internal error: unhandled aggregate function" );
  1012. break;
  1013. }
  1014. }
  1015. }
  1016. /// dtor
  1017. ~CSphKBufferGroupSorter ()
  1018. {
  1019. SafeDelete ( m_pComp );
  1020. SafeDelete ( m_pGrouper );
  1021. ARRAY_FOREACH ( i, m_dAggregates )
  1022. SafeDelete ( m_dAggregates[i] );
  1023. }
  1024. /// check if this sorter does groupby
  1025. virtual bool IsGroupby () const
  1026. {
  1027. return true;
  1028. }
  1029. /// set string pool pointer (for string+groupby sorters)
  1030. void SetStringPool ( const BYTE * pStrings )
  1031. {
  1032. m_pGrouper->SetStringPool ( pStrings );
  1033. }
  1034. /// add entry to the queue
  1035. virtual bool Push ( const CSphMatch & tEntry )
  1036. {
  1037. SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
  1038. return PushEx ( tEntry, uGroupKey, false );
  1039. }
  1040. /// add grouped entry to the queue
  1041. virtual bool PushGrouped ( const CSphMatch & tEntry )
  1042. {
  1043. return PushEx ( tEntry, tEntry.GetAttr ( m_tSettings.m_tLocGroupby ), true );
  1044. }
  1045. /// add entry to the queue
  1046. virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped )
  1047. {
  1048. // if this group is already hashed, we only need to update the corresponding match
  1049. CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
  1050. if ( ppMatch )
  1051. {
  1052. CSphMatch * pMatch = (*ppMatch);
  1053. assert ( pMatch );
  1054. assert ( pMatch->GetAttr ( m_tSettings.m_tLocGroupby )==uGroupKey );
  1055. assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  1056. if ( bGrouped )
  1057. {
  1058. // it's already grouped match
  1059. // sum grouped matches count
  1060. pMatch->SetAttr ( m_tSettings.m_tLocCount, pMatch->GetAttr ( m_tSettings.m_tLocCount ) + tEntry.GetAttr ( m_tSettings.m_tLocCount ) ); // OPTIMIZE! AddAttr()?
  1061. if ( DISTINCT )
  1062. pMatch->SetAttr ( m_tSettings.m_tLocDistinct, pMatch->GetAttr ( m_tSettings.m_tLocDistinct ) + tEntry.GetAttr ( m_tSettings.m_tLocDistinct ) );
  1063. } else
  1064. {
  1065. // it's a simple match
  1066. // increase grouped matches count
  1067. pMatch->SetAttr ( m_tSettings.m_tLocCount, 1 + pMatch->GetAttr ( m_tSettings.m_tLocCount ) ); // OPTIMIZE! IncAttr()?
  1068. }
  1069. // update aggregates
  1070. ARRAY_FOREACH ( i, m_dAggregates )
  1071. m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
  1072. // if new entry is more relevant, update from it
  1073. if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) )
  1074. {
  1075. // can't use Clone() here; must keep current aggregate values
  1076. pMatch->m_iDocID = tEntry.m_iDocID;
  1077. pMatch->m_iWeight = tEntry.m_iWeight;
  1078. pMatch->m_pStatic = tEntry.m_pStatic;
  1079. pMatch->m_iTag = tEntry.m_iTag;
  1080. if ( m_iPregroupDynamic )
  1081. {
  1082. assert ( pMatch->m_pDynamic );
  1083. assert ( tEntry.m_pDynamic );
  1084. assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  1085. m_tSchema.FreeStringPtrs ( pMatch, m_iPregroupDynamic );
  1086. for ( int i=0; i<m_iPregroupDynamic; i++ )
  1087. pMatch->m_pDynamic[i] = tEntry.m_pDynamic[i];
  1088. m_tSchema.CopyStrings ( pMatch, tEntry, m_iPregroupDynamic );
  1089. }
  1090. }
  1091. }
  1092. // submit actual distinct value in all cases
  1093. if ( DISTINCT && !bGrouped )
  1094. m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tSettings.m_tDistinctLoc ) ) ); // OPTIMIZE! use simpler locator here?
  1095. // it's a dupe anyway, so we shouldn't update total matches count
  1096. if ( ppMatch )
  1097. return false;
  1098. // if we're full, let's cut off some worst groups
  1099. if ( m_iUsed==m_iSize )
  1100. CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
  1101. // do add
  1102. assert ( m_iUsed<m_iSize );
  1103. CSphMatch & tNew = m_pData [ m_iUsed++ ];
  1104. m_tSchema.CloneMatch ( &tNew, tEntry );
  1105. if ( !bGrouped )
  1106. {
  1107. tNew.SetAttr ( m_tSettings.m_tLocGroupby, uGroupKey );
  1108. tNew.SetAttr ( m_tSettings.m_tLocCount, 1 );
  1109. if ( DISTINCT )
  1110. tNew.SetAttr ( m_tSettings.m_tLocDistinct, 0 );
  1111. } else
  1112. {
  1113. ARRAY_FOREACH ( i, m_dAggregates )
  1114. m_dAggregates[i]->Ungroup ( &tNew );
  1115. }
  1116. m_hGroup2Match.Add ( &tNew, uGroupKey );
  1117. m_iTotal++;
  1118. return true;
  1119. }
  1120. void CalcAvg ( bool bGroup )
  1121. {
  1122. if ( !m_dAvgs.GetLength() )
  1123. return;
  1124. CSphMatch * pMatch = m_pData;
  1125. CSphMatch * pEnd = pMatch + m_iUsed;
  1126. while ( pMatch<pEnd )
  1127. {
  1128. ARRAY_FOREACH ( j, m_dAvgs )
  1129. {
  1130. if ( bGroup )
  1131. m_dAvgs[j]->Finalize ( pMatch );
  1132. else
  1133. m_dAvgs[j]->Ungroup ( pMatch );
  1134. }
  1135. ++pMatch;
  1136. }
  1137. }
  1138. /// store all entries into specified location in sorted order, and remove them from queue
  1139. void Flatten ( CSphMatch * pTo, int iTag )
  1140. {
  1141. CountDistinct ();
  1142. CalcAvg ( true );
  1143. SortGroups ();
  1144. CSphVector<IAggrFunc *> dAggrs;
  1145. if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
  1146. {
  1147. dAggrs = m_dAggregates;
  1148. ARRAY_FOREACH ( i, m_dAvgs )
  1149. dAggrs.RemoveValue ( m_dAvgs[i] );
  1150. }
  1151. int iLen = GetLength ();
  1152. for ( int i=0; i<iLen; i++, pTo++ )
  1153. {
  1154. ARRAY_FOREACH ( j, dAggrs )
  1155. dAggrs[j]->Finalize ( &m_pData[i] );
  1156. m_tSchema.CloneMatch ( pTo, m_pData[i] );
  1157. if ( iTag>=0 )
  1158. pTo->m_iTag = iTag;
  1159. }
  1160. m_iUsed = 0;
  1161. m_iTotal = 0;
  1162. m_hGroup2Match.Reset ();
  1163. if ( DISTINCT )
  1164. m_tUniq.Resize ( 0 );
  1165. }
  1166. /// get entries count
  1167. int GetLength () const
  1168. {
  1169. return Min ( m_iUsed, m_iLimit );
  1170. }
  1171. /// set group comparator state
  1172. void SetGroupState ( const CSphMatchComparatorState & tState )
  1173. {
  1174. m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
  1175. // FIXME! manual bitwise copying.. yuck
  1176. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  1177. {
  1178. m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
  1179. m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
  1180. }
  1181. m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
  1182. m_tGroupSorter.m_iNow = tState.m_iNow;
  1183. // check whether we sort by distinct
  1184. if ( DISTINCT && m_tSettings.m_tDistinctLoc.m_iBitOffset>=0 )
  1185. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  1186. if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tSettings.m_tDistinctLoc.m_iBitOffset )
  1187. {
  1188. m_bSortByDistinct = true;
  1189. break;
  1190. }
  1191. }
  1192. protected:
  1193. /// count distinct values if necessary
  1194. void CountDistinct ()
  1195. {
  1196. if ( DISTINCT )
  1197. {
  1198. m_tUniq.Sort ();
  1199. SphGroupKey_t uGroup;
  1200. for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
  1201. {
  1202. CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
  1203. if ( ppMatch )
  1204. (*ppMatch)->SetAttr ( m_tSettings.m_tLocDistinct, iCount );
  1205. }
  1206. }
  1207. }
  1208. /// cut worst N groups off the buffer tail
  1209. void CutWorst ( int iCut )
  1210. {
  1211. // sort groups
  1212. if ( m_bSortByDistinct )
  1213. CountDistinct ();
  1214. CalcAvg ( true );
  1215. SortGroups ();
  1216. CalcAvg ( false );
  1217. // cut groups
  1218. m_iUsed -= iCut;
  1219. // cleanup unused distinct stuff
  1220. if ( DISTINCT )
  1221. {
  1222. // build kill-list
  1223. CSphVector<SphGroupKey_t> dRemove;
  1224. dRemove.Resize ( iCut );
  1225. for ( int i=0; i<iCut; i++ )
  1226. dRemove[i] = m_pData[m_iUsed+i].GetAttr ( m_tSettings.m_tLocGroupby );
  1227. // sort and compact
  1228. if ( !m_bSortByDistinct )
  1229. m_tUniq.Sort ();
  1230. m_tUniq.Compact ( &dRemove[0], iCut );
  1231. }
  1232. // rehash
  1233. m_hGroup2Match.Reset ();
  1234. for ( int i=0; i<m_iUsed; i++ )
  1235. m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tSettings.m_tLocGroupby ) );
  1236. }
  1237. /// sort groups buffer
  1238. void SortGroups ()
  1239. {
  1240. sphSort ( m_pData, m_iUsed, m_tGroupSorter, m_tGroupSorter );
  1241. }
  1242. virtual CSphMatch * Finalize()
  1243. {
  1244. if ( m_iUsed>m_iLimit )
  1245. CutWorst ( m_iUsed - m_iLimit );
  1246. return m_pData;
  1247. }
  1248. };
  1249. /// match sorter with k-buffering and group-by for MVAs
  1250. template < typename COMPGROUP, bool DISTINCT >
  1251. class CSphKBufferMVAGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT >
  1252. {
  1253. protected:
  1254. const DWORD * m_pMva; ///< pointer to MVA pool for incoming matches
  1255. CSphAttrLocator m_tMvaLocator;
  1256. bool m_bMva64;
  1257. public:
  1258. /// ctor
  1259. CSphKBufferMVAGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1260. : CSphKBufferGroupSorter < COMPGROUP, DISTINCT > ( pComp, pQuery, tSettings )
  1261. , m_pMva ( NULL )
  1262. , m_bMva64 ( tSettings.m_bMva64 )
  1263. {
  1264. this->m_pGrouper->GetLocator ( m_tMvaLocator );
  1265. }
  1266. /// check if this sorter does groupby
  1267. virtual bool IsGroupby ()
  1268. {
  1269. return true;
  1270. }
  1271. /// set MVA pool for subsequent matches
  1272. void SetMVAPool ( const DWORD * pMva )
  1273. {
  1274. m_pMva = pMva;
  1275. }
  1276. /// add entry to the queue
  1277. virtual bool Push ( const CSphMatch & tEntry )
  1278. {
  1279. assert ( m_pMva );
  1280. if ( !m_pMva )
  1281. return false;
  1282. // get that list
  1283. // FIXME! OPTIMIZE! use simpler locator than full bits/count here
  1284. // FIXME! hardcoded MVA type, so here's MVA_DOWNSIZE marker for searching
  1285. const DWORD * pValues = tEntry.GetAttrMVA ( this->m_tMvaLocator, m_pMva ); // (this pointer is for gcc; it doesn't work otherwise)
  1286. if ( !pValues )
  1287. return false;
  1288. DWORD iValues = *pValues++;
  1289. bool bRes = false;
  1290. if ( m_bMva64 )
  1291. {
  1292. assert ( ( iValues%2 )==0 );
  1293. for ( ;iValues>0; iValues-=2, pValues+=2 )
  1294. {
  1295. int64_t iMva = MVA_UPSIZE ( pValues );
  1296. SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( iMva );
  1297. bRes |= this->PushEx ( tEntry, uGroupkey, false );
  1298. }
  1299. } else
  1300. {
  1301. while ( iValues-- )
  1302. {
  1303. SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( *pValues++ );
  1304. bRes |= this->PushEx ( tEntry, uGroupkey, false );
  1305. }
  1306. }
  1307. return bRes;
  1308. }
  1309. /// add pre-grouped entry to the queue
  1310. virtual bool PushGrouped ( const CSphMatch & tEntry )
  1311. {
  1312. // re-group it based on the group key
  1313. // (first 'this' is for icc; second 'this' is for gcc)
  1314. return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tSettings.m_tLocGroupby ), true );
  1315. }
  1316. };
  1317. #if USE_WINDOWS
  1318. #pragma warning(default:4127)
  1319. #endif
  1320. //////////////////////////////////////////////////////////////////////////
  1321. // PLAIN SORTING FUNCTORS
  1322. //////////////////////////////////////////////////////////////////////////
  1323. /// match sorter
  1324. struct MatchRelevanceLt_fn : public ISphMatchComparator
  1325. {
  1326. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1327. {
  1328. return IsLess ( a, b, t );
  1329. }
  1330. static bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & )
  1331. {
  1332. if ( a.m_iWeight!=b.m_iWeight )
  1333. return a.m_iWeight < b.m_iWeight;
  1334. return a.m_iDocID > b.m_iDocID;
  1335. };
  1336. };
  1337. /// match sorter
  1338. struct MatchAttrLt_fn : public ISphMatchComparator
  1339. {
  1340. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1341. {
  1342. return IsLess ( a, b, t );
  1343. }
  1344. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1345. {
  1346. if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
  1347. {
  1348. SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
  1349. SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
  1350. if ( aa!=bb )
  1351. return aa<bb;
  1352. } else
  1353. {
  1354. int iCmp = t.CmpStrings ( a, b, 0 );
  1355. if ( iCmp!=0 )
  1356. return iCmp<0;
  1357. }
  1358. if ( a.m_iWeight!=b.m_iWeight )
  1359. return a.m_iWeight < b.m_iWeight;
  1360. return a.m_iDocID > b.m_iDocID;
  1361. };
  1362. };
  1363. /// match sorter
  1364. struct MatchAttrGt_fn : public ISphMatchComparator
  1365. {
  1366. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1367. {
  1368. return IsLess ( a, b, t );
  1369. }
  1370. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1371. {
  1372. if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
  1373. {
  1374. SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
  1375. SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
  1376. if ( aa!=bb )
  1377. return aa>bb;
  1378. } else
  1379. {
  1380. int iCmp = t.CmpStrings ( a, b, 0 );
  1381. if ( iCmp!=0 )
  1382. return iCmp>0;
  1383. }
  1384. if ( a.m_iWeight!=b.m_iWeight )
  1385. return a.m_iWeight < b.m_iWeight;
  1386. return a.m_iDocID > b.m_iDocID;
  1387. };
  1388. };
  1389. /// match sorter
  1390. struct MatchTimeSegments_fn : public ISphMatchComparator
  1391. {
  1392. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1393. {
  1394. return IsLess ( a, b, t );
  1395. }
  1396. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1397. {
  1398. SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
  1399. SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
  1400. int iA = GetSegment ( aa, t.m_iNow );
  1401. int iB = GetSegment ( bb, t.m_iNow );
  1402. if ( iA!=iB )
  1403. return iA > iB;
  1404. if ( a.m_iWeight!=b.m_iWeight )
  1405. return a.m_iWeight < b.m_iWeight;
  1406. if ( aa!=bb )
  1407. return aa<bb;
  1408. return a.m_iDocID > b.m_iDocID;
  1409. };
  1410. protected:
  1411. static inline int GetSegment ( SphAttr_t iStamp, SphAttr_t iNow )
  1412. {
  1413. if ( iStamp>=iNow-3600 ) return 0; // last hour
  1414. if ( iStamp>=iNow-24*3600 ) return 1; // last day
  1415. if ( iStamp>=iNow-7*24*3600 ) return 2; // last week
  1416. if ( iStamp>=iNow-30*24*3600 ) return 3; // last month
  1417. if ( iStamp>=iNow-90*24*3600 ) return 4; // last 3 months
  1418. return 5; // everything else
  1419. }
  1420. };
  1421. /// match sorter
  1422. struct MatchExpr_fn : public ISphMatchComparator
  1423. {
  1424. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1425. {
  1426. return IsLess ( a, b, t );
  1427. }
  1428. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1429. {
  1430. float aa = a.GetAttrFloat ( t.m_tLocator[0] ); // FIXME! OPTIMIZE!!! simplified (dword-granular) getter could be used here
  1431. float bb = b.GetAttrFloat ( t.m_tLocator[0] );
  1432. if ( aa!=bb )
  1433. return aa<bb;
  1434. return a.m_iDocID>b.m_iDocID;
  1435. }
  1436. };
  1437. /////////////////////////////////////////////////////////////////////////////
  1438. #define SPH_TEST_PAIR(_aa,_bb,_idx ) \
  1439. if ( (_aa)!=(_bb) ) \
  1440. return ( (t.m_uAttrDesc >> (_idx)) & 1 ) ^ ( (_aa) > (_bb) );
  1441. #define SPH_TEST_KEYPART(_idx) \
  1442. switch ( t.m_eKeypart[_idx] ) \
  1443. { \
  1444. case SPH_KEYPART_ID: SPH_TEST_PAIR ( a.m_iDocID, b.m_iDocID, _idx ); break; \
  1445. case SPH_KEYPART_WEIGHT: SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
  1446. case SPH_KEYPART_INT: \
  1447. { \
  1448. register SphAttr_t aa = a.GetAttr ( t.m_tLocator[_idx] ); \
  1449. register SphAttr_t bb = b.GetAttr ( t.m_tLocator[_idx] ); \
  1450. SPH_TEST_PAIR ( aa, bb, _idx ); \
  1451. break; \
  1452. } \
  1453. case SPH_KEYPART_FLOAT: \
  1454. { \
  1455. register float aa = a.GetAttrFloat ( t.m_tLocator[_idx] ); \
  1456. register float bb = b.GetAttrFloat ( t.m_tLocator[_idx] ); \
  1457. SPH_TEST_PAIR ( aa, bb, _idx ) \
  1458. break; \
  1459. } \
  1460. case SPH_KEYPART_STRING: \
  1461. { \
  1462. int iCmp = t.CmpStrings ( a, b, _idx ); \
  1463. if ( iCmp!=0 ) \
  1464. return ( ( t.m_uAttrDesc >> (_idx) ) & 1 ) ^ ( iCmp>0 ); \
  1465. break; \
  1466. } \
  1467. }
  1468. struct MatchGeneric2_fn : public ISphMatchComparator
  1469. {
  1470. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1471. {
  1472. return IsLess ( a, b, t );
  1473. }
  1474. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1475. {
  1476. SPH_TEST_KEYPART(0);
  1477. SPH_TEST_KEYPART(1);
  1478. return false;
  1479. };
  1480. };
  1481. struct MatchGeneric3_fn : public ISphMatchComparator
  1482. {
  1483. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1484. {
  1485. return IsLess ( a, b, t );
  1486. }
  1487. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1488. {
  1489. SPH_TEST_KEYPART(0);
  1490. SPH_TEST_KEYPART(1);
  1491. SPH_TEST_KEYPART(2);
  1492. return false;
  1493. };
  1494. };
  1495. struct MatchGeneric4_fn : public ISphMatchComparator
  1496. {
  1497. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1498. {
  1499. return IsLess ( a, b, t );
  1500. }
  1501. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1502. {
  1503. SPH_TEST_KEYPART(0);
  1504. SPH_TEST_KEYPART(1);
  1505. SPH_TEST_KEYPART(2);
  1506. SPH_TEST_KEYPART(3);
  1507. return false;
  1508. };
  1509. };
  1510. struct MatchGeneric5_fn : public ISphMatchComparator
  1511. {
  1512. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1513. {
  1514. return IsLess ( a, b, t );
  1515. }
  1516. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1517. {
  1518. SPH_TEST_KEYPART(0);
  1519. SPH_TEST_KEYPART(1);
  1520. SPH_TEST_KEYPART(2);
  1521. SPH_TEST_KEYPART(3);
  1522. SPH_TEST_KEYPART(4);
  1523. return false;
  1524. };
  1525. };
  1526. //////////////////////////////////////////////////////////////////////////
  1527. struct MatchCustom_fn : public ISphMatchComparator
  1528. {
  1529. virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
  1530. {
  1531. return IsLess ( a, b, t );
  1532. }
  1533. // setup sorting state
  1534. static bool SetupAttr ( const CSphSchema & tSchema, CSphMatchComparatorState & tState, CSphString & sError, int iIdx, const char * sAttr )
  1535. {
  1536. if ( iIdx>=CSphMatchComparatorState::MAX_ATTRS )
  1537. {
  1538. sError.SetSprintf ( "custom sort: too many attributes declared" );
  1539. return false;
  1540. }
  1541. int iAttr = tSchema.GetAttrIndex(sAttr);
  1542. if ( iAttr<0 )
  1543. {
  1544. sError.SetSprintf ( "custom sort: attr '%s' not found in schema", sAttr );
  1545. return false;
  1546. }
  1547. const CSphColumnInfo & tAttr = tSchema.GetAttr(iAttr);
  1548. tState.m_eKeypart[iIdx] = tAttr.m_eAttrType==SPH_ATTR_FLOAT ? SPH_KEYPART_FLOAT : SPH_KEYPART_INT;
  1549. tState.m_tLocator[iIdx] = tAttr.m_tLocator;
  1550. return true;
  1551. }
  1552. // setup sorting state
  1553. static bool Setup ( const CSphSchema & tSchema, CSphMatchComparatorState & tState, CSphString & sError )
  1554. {
  1555. float fTmp;
  1556. int iAttr = 0;
  1557. #define MATCH_FUNCTION fTmp
  1558. #define MATCH_WEIGHT 1.0f
  1559. #define MATCH_NOW 1.0f
  1560. #define MATCH_ATTR(_idx) 1.0f
  1561. #define MATCH_DECLARE_ATTR(_name) if ( !SetupAttr ( tSchema, tState, sError, iAttr++, _name ) ) return false;
  1562. #include "sphinxcustomsort.inl"
  1563. ; // NOLINT
  1564. return true;
  1565. }
  1566. // calc function and compare matches
  1567. // OPTIMIZE? could calc once per match on submit
  1568. static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
  1569. {
  1570. #undef MATCH_DECLARE_ATTR
  1571. #undef MATCH_WEIGHT
  1572. #undef MATCH_NOW
  1573. #undef MATCH_ATTR
  1574. #define MATCH_DECLARE_ATTR(_name) ; // NOLINT
  1575. #define MATCH_WEIGHT float(MATCH_VAR.m_iWeight)
  1576. #define MATCH_NOW float(t.m_iNow)
  1577. #define MATCH_ATTR(_idx) float(MATCH_VAR.GetAttr(t.m_tLocator[_idx]))
  1578. float aa, bb;
  1579. #undef MATCH_FUNCTION
  1580. #undef MATCH_VAR
  1581. #define MATCH_FUNCTION aa
  1582. #define MATCH_VAR a
  1583. #include "sphinxcustomsort.inl" // NOLINT
  1584. ; // NOLINT
  1585. #undef MATCH_FUNCTION
  1586. #undef MATCH_VAR
  1587. #define MATCH_FUNCTION bb
  1588. #define MATCH_VAR b
  1589. #include "sphinxcustomsort.inl" // NOLINT
  1590. ; // NOLINT
  1591. return aa<bb;
  1592. }
  1593. };
  1594. //////////////////////////////////////////////////////////////////////////
  1595. // SORT CLAUSE PARSER
  1596. //////////////////////////////////////////////////////////////////////////
  1597. static const int MAX_SORT_FIELDS = 5; // MUST be in sync with CSphMatchComparatorState::m_iAttr
  1598. enum ESphSortFunc
  1599. {
  1600. FUNC_REL_DESC,
  1601. FUNC_ATTR_DESC,
  1602. FUNC_ATTR_ASC,
  1603. FUNC_TIMESEGS,
  1604. FUNC_GENERIC2,
  1605. FUNC_GENERIC3,
  1606. FUNC_GENERIC4,
  1607. FUNC_GENERIC5,
  1608. FUNC_CUSTOM,
  1609. FUNC_EXPR
  1610. };
  1611. enum ESortClauseParseResult
  1612. {
  1613. SORT_CLAUSE_OK,
  1614. SORT_CLAUSE_ERROR,
  1615. SORT_CLAUSE_RANDOM
  1616. };
  1617. class SortClauseTokenizer_t
  1618. {
  1619. protected:
  1620. char * m_pCur;
  1621. char * m_pMax;
  1622. char * m_pBuf;
  1623. protected:
  1624. char ToLower ( char c )
  1625. {
  1626. // 0..9, A..Z->a..z, _, a..z, @
  1627. if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' )
  1628. return c;
  1629. if ( c>='A' && c<='Z' )
  1630. return c-'A'+'a';
  1631. return 0;
  1632. }
  1633. public:
  1634. explicit SortClauseTokenizer_t ( const char * sBuffer )
  1635. {
  1636. int iLen = strlen(sBuffer);
  1637. m_pBuf = new char [ iLen+1 ];
  1638. m_pMax = m_pBuf+iLen;
  1639. m_pCur = m_pBuf;
  1640. for ( int i=0; i<=iLen; i++ )
  1641. m_pBuf[i] = ToLower ( sBuffer[i] );
  1642. }
  1643. ~SortClauseTokenizer_t ()
  1644. {
  1645. SafeDeleteArray ( m_pBuf );
  1646. }
  1647. const char * GetToken ()
  1648. {
  1649. // skip spaces
  1650. while ( m_pCur<m_pMax && !*m_pCur )
  1651. m_pCur++;
  1652. if ( m_pCur>=m_pMax )
  1653. return NULL;
  1654. // memorize token start, and move pointer forward
  1655. const char * sRes = m_pCur;
  1656. while ( *m_pCur )
  1657. m_pCur++;
  1658. return sRes;
  1659. }
  1660. };
  1661. static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
  1662. {
  1663. switch ( eType )
  1664. {
  1665. case SPH_ATTR_FLOAT: return SPH_KEYPART_FLOAT;
  1666. case SPH_ATTR_STRING: return SPH_KEYPART_STRING;
  1667. default: return SPH_KEYPART_INT;
  1668. }
  1669. }
  1670. static ESortClauseParseResult sphParseSortClause ( const CSphQuery * pQuery, const char * sClause, const CSphSchema & tSchema,
  1671. ESphSortFunc & eFunc, CSphMatchComparatorState & tState, int * dAttrs, CSphString & sError, CSphSchema * pExtra = NULL )
  1672. {
  1673. assert ( dAttrs );
  1674. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  1675. dAttrs[i] = -1;
  1676. // mini parser
  1677. SortClauseTokenizer_t tTok ( sClause );
  1678. bool bField = false; // whether i'm expecting field name or sort order
  1679. int iField = 0;
  1680. for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
  1681. {
  1682. bField = !bField;
  1683. // special case, sort by random
  1684. if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
  1685. return SORT_CLAUSE_RANDOM;
  1686. // special case, sort by custom
  1687. if ( iField==0 && bField && strcmp ( pTok, "@custom" )==0 )
  1688. {
  1689. eFunc = FUNC_CUSTOM;
  1690. return MatchCustom_fn::Setup ( tSchema, tState, sError ) ? SORT_CLAUSE_OK : SORT_CLAUSE_ERROR;
  1691. }
  1692. // handle sort order
  1693. if ( !bField )
  1694. {
  1695. // check
  1696. if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
  1697. {
  1698. sError.SetSprintf ( "invalid sorting order '%s'", pTok );
  1699. return SORT_CLAUSE_ERROR;
  1700. }
  1701. // set
  1702. if ( !strcmp ( pTok, "desc" ) )
  1703. tState.m_uAttrDesc |= ( 1<<iField );
  1704. iField++;
  1705. continue;
  1706. }
  1707. // handle attribute name
  1708. if ( iField==MAX_SORT_FIELDS )
  1709. {
  1710. sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", MAX_SORT_FIELDS );
  1711. return SORT_CLAUSE_ERROR;
  1712. }
  1713. if ( !strcasecmp ( pTok, "@relevance" )
  1714. || !strcasecmp ( pTok, "@rank" )
  1715. || !strcasecmp ( pTok, "@weight" ) )
  1716. {
  1717. tState.m_eKeypart[iField] = SPH_KEYPART_WEIGHT;
  1718. } else if ( !strcasecmp ( pTok, "@id" ) || !strcasecmp ( pTok, "id" ) )
  1719. {
  1720. tState.m_eKeypart[iField] = SPH_KEYPART_ID;
  1721. } else
  1722. {
  1723. if ( !strcasecmp ( pTok, "@group" ) )
  1724. pTok = "@groupby";
  1725. // try to lookup plain attr in sorter schema
  1726. int iAttr = tSchema.GetAttrIndex ( pTok );
  1727. // try to lookup aliased count(*) in select items
  1728. if ( iAttr<0 )
  1729. {
  1730. ARRAY_FOREACH ( i, pQuery->m_dItems )
  1731. {
  1732. const CSphQueryItem & tItem = pQuery->m_dItems[i];
  1733. if ( !tItem.m_sAlias.cstr() || strcasecmp ( tItem.m_sAlias.cstr(), pTok ) )
  1734. continue;
  1735. if ( tItem.m_sExpr.Begins("@") )
  1736. iAttr = tSchema.GetAttrIndex ( tItem.m_sExpr.cstr() );
  1737. break; // break in any case; because we did match the alias
  1738. }
  1739. }
  1740. // epic fail
  1741. if ( iAttr<0 )
  1742. {
  1743. sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
  1744. return SORT_CLAUSE_ERROR;
  1745. }
  1746. const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
  1747. if ( pExtra )
  1748. pExtra->AddAttr ( tCol, true );
  1749. tState.m_eKeypart[iField] = Attr2Keypart ( tCol.m_eAttrType );
  1750. tState.m_tLocator[iField] = tSchema.GetAttr(iAttr).m_tLocator;
  1751. dAttrs[iField] = iAttr;
  1752. }
  1753. }
  1754. if ( iField==0 )
  1755. {
  1756. sError.SetSprintf ( "no sort order defined" );
  1757. return SORT_CLAUSE_ERROR;
  1758. }
  1759. if ( iField==1 )
  1760. tState.m_eKeypart[iField++] = SPH_KEYPART_ID; // add "id ASC"
  1761. switch ( iField )
  1762. {
  1763. case 2: eFunc = FUNC_GENERIC2; break;
  1764. case 3: eFunc = FUNC_GENERIC3; break;
  1765. case 4: eFunc = FUNC_GENERIC4; break;
  1766. case 5: eFunc = FUNC_GENERIC5; break;
  1767. default: sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
  1768. }
  1769. return SORT_CLAUSE_OK;
  1770. }
  1771. //////////////////////////////////////////////////////////////////////////
  1772. // SORTING+GROUPING INSTANTIATION
  1773. //////////////////////////////////////////////////////////////////////////
  1774. template < typename COMPGROUP >
  1775. static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1776. {
  1777. if ( tSettings.m_bMVA )
  1778. {
  1779. if ( tSettings.m_bDistinct==true )
  1780. return new CSphKBufferMVAGroupSorter < COMPGROUP, true > ( pComp, pQuery, tSettings);
  1781. else
  1782. return new CSphKBufferMVAGroupSorter < COMPGROUP, false > ( pComp, pQuery, tSettings );
  1783. } else
  1784. {
  1785. if ( tSettings.m_bDistinct==true )
  1786. return new CSphKBufferGroupSorter < COMPGROUP, true > ( pComp, pQuery, tSettings );
  1787. else
  1788. return new CSphKBufferGroupSorter < COMPGROUP, false > ( pComp, pQuery, tSettings );
  1789. }
  1790. }
  1791. static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1792. {
  1793. switch ( eGroupFunc )
  1794. {
  1795. case FUNC_GENERIC2: return sphCreateSorter3rd<MatchGeneric2_fn> ( pComp, pQuery, tSettings ); break;
  1796. case FUNC_GENERIC3: return sphCreateSorter3rd<MatchGeneric3_fn> ( pComp, pQuery, tSettings ); break;
  1797. case FUNC_GENERIC4: return sphCreateSorter3rd<MatchGeneric4_fn> ( pComp, pQuery, tSettings ); break;
  1798. case FUNC_GENERIC5: return sphCreateSorter3rd<MatchGeneric5_fn> ( pComp, pQuery, tSettings ); break;
  1799. case FUNC_CUSTOM: return sphCreateSorter3rd<MatchCustom_fn> ( pComp, pQuery, tSettings ); break;
  1800. case FUNC_EXPR: return sphCreateSorter3rd<MatchExpr_fn> ( pComp, pQuery, tSettings ); break;
  1801. default: return NULL;
  1802. }
  1803. }
  1804. static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1805. {
  1806. ISphMatchComparator * pComp = NULL;
  1807. switch ( eMatchFunc )
  1808. {
  1809. case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
  1810. case FUNC_ATTR_DESC: pComp = new MatchAttrLt_fn(); break;
  1811. case FUNC_ATTR_ASC: pComp = new MatchAttrGt_fn(); break;
  1812. case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
  1813. case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
  1814. case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
  1815. case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
  1816. case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
  1817. case FUNC_CUSTOM: pComp = new MatchCustom_fn(); break;
  1818. case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
  1819. }
  1820. assert ( pComp );
  1821. return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings );
  1822. }
  1823. //////////////////////////////////////////////////////////////////////////
  1824. // GEODIST
  1825. //////////////////////////////////////////////////////////////////////////
  1826. struct ExprGeodist_t : public ISphExpr
  1827. {
  1828. public:
  1829. ExprGeodist_t () {}
  1830. bool Setup ( const CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError );
  1831. virtual float Eval ( const CSphMatch & tMatch ) const;
  1832. virtual void SetMVAPool ( const DWORD * ) {}
  1833. virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const;
  1834. protected:
  1835. CSphAttrLocator m_tGeoLatLoc;
  1836. CSphAttrLocator m_tGeoLongLoc;
  1837. float m_fGeoAnchorLat;
  1838. float m_fGeoAnchorLong;
  1839. int m_iLat;
  1840. int m_iLon;
  1841. };
  1842. bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError )
  1843. {
  1844. if ( !pQuery->m_bGeoAnchor )
  1845. {
  1846. sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
  1847. return false;
  1848. }
  1849. int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
  1850. if ( iLat<0 )
  1851. {
  1852. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
  1853. return false;
  1854. }
  1855. int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
  1856. if ( iLong<0 )
  1857. {
  1858. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
  1859. return false;
  1860. }
  1861. m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
  1862. m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
  1863. m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
  1864. m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
  1865. m_iLat = iLat;
  1866. m_iLon = iLong;
  1867. return true;
  1868. }
  1869. static inline double sphSqr ( double v )
  1870. {
  1871. return v*v;
  1872. }
  1873. float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
  1874. {
  1875. const double R = 6384000;
  1876. float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
  1877. float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
  1878. double dlat = plat - m_fGeoAnchorLat;
  1879. double dlon = plon - m_fGeoAnchorLong;
  1880. double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
  1881. double c = 2*asin ( Min ( 1, sqrt(a) ) );
  1882. return (float)(R*c);
  1883. }
  1884. void ExprGeodist_t::GetDependencyColumns ( CSphVector<int> & dColumns ) const
  1885. {
  1886. dColumns.Add ( m_iLat );
  1887. dColumns.Add ( m_iLon );
  1888. }
  1889. //////////////////////////////////////////////////////////////////////////
  1890. // PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
  1891. //////////////////////////////////////////////////////////////////////////
  1892. static CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation );
  1893. static bool SetupGroupbySettings ( const CSphQuery * pQuery, const CSphSchema & tSchema, CSphGroupSorterSettings & tSettings, CSphString & sError )
  1894. {
  1895. tSettings.m_tDistinctLoc.m_iBitOffset = -1;
  1896. if ( pQuery->m_sGroupBy.IsEmpty() )
  1897. return true;
  1898. if ( pQuery->m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
  1899. {
  1900. sError.SetSprintf ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
  1901. return false;
  1902. }
  1903. // setup groupby attr
  1904. int iGroupBy = tSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
  1905. if ( iGroupBy<0 )
  1906. {
  1907. sError.SetSprintf ( "group-by attribute '%s' not found", pQuery->m_sGroupBy.cstr() );
  1908. return false;
  1909. }
  1910. ESphAttr eType = tSchema.GetAttr ( iGroupBy ).m_eAttrType;
  1911. CSphAttrLocator tLoc = tSchema.GetAttr ( iGroupBy ).m_tLocator;
  1912. switch ( pQuery->m_eGroupFunc )
  1913. {
  1914. case SPH_GROUPBY_DAY: tSettings.m_pGrouper = new CSphGrouperDay ( tLoc ); break;
  1915. case SPH_GROUPBY_WEEK: tSettings.m_pGrouper = new CSphGrouperWeek ( tLoc ); break;
  1916. case SPH_GROUPBY_MONTH: tSettings.m_pGrouper = new CSphGrouperMonth ( tLoc ); break;
  1917. case SPH_GROUPBY_YEAR: tSettings.m_pGrouper = new CSphGrouperYear ( tLoc ); break;
  1918. case SPH_GROUPBY_ATTR:
  1919. {
  1920. if ( eType!=SPH_ATTR_STRING )
  1921. tSettings.m_pGrouper = new CSphGrouperAttr ( tLoc );
  1922. else
  1923. tSettings.m_pGrouper = sphCreateGrouperString ( tLoc, pQuery->m_eCollation );
  1924. }
  1925. break;
  1926. default:
  1927. sError.SetSprintf ( "invalid group-by mode (mode=%d)", pQuery->m_eGroupFunc );
  1928. return false;
  1929. }
  1930. tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET );
  1931. tSettings.m_bMva64 = ( eType==SPH_ATTR_INT64SET );
  1932. // setup distinct attr
  1933. if ( !pQuery->m_sGroupDistinct.IsEmpty() )
  1934. {
  1935. int iDistinct = tSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() );
  1936. if ( iDistinct<0 )
  1937. {
  1938. sError.SetSprintf ( "group-count-distinct attribute '%s' not found", pQuery->m_sGroupDistinct.cstr() );
  1939. return false;
  1940. }
  1941. tSettings.m_tDistinctLoc = tSchema.GetAttr ( iDistinct ).m_tLocator;
  1942. }
  1943. return true;
  1944. }
  1945. static bool FixupDependency ( CSphSchema & tSchema, const int * pAttrs, int iAttrCount )
  1946. {
  1947. assert ( pAttrs );
  1948. CSphVector<int> dCur;
  1949. // add valid attributes to processing list
  1950. for ( int i=0; i<iAttrCount; i++ )
  1951. if ( pAttrs[i]>=0 )
  1952. dCur.Add ( pAttrs[i] );
  1953. int iInitialAttrs = dCur.GetLength();
  1954. // collect columns which affect current expressions
  1955. for ( int i=0; i<dCur.GetLength(); i++ )
  1956. {
  1957. const CSphColumnInfo & tCol = tSchema.GetAttr ( dCur[i] );
  1958. if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr.Ptr()!=NULL )
  1959. tCol.m_pExpr->GetDependencyColumns ( dCur );
  1960. }
  1961. // get rid of dupes
  1962. dCur.Uniq();
  1963. // fix up of attributes stages
  1964. ARRAY_FOREACH ( i, dCur )
  1965. {
  1966. int iAttr = dCur[i];
  1967. if ( iAttr<0 )
  1968. continue;
  1969. CSphColumnInfo & tCol = const_cast < CSphColumnInfo & > ( tSchema.GetAttr ( iAttr ) );
  1970. if ( tCol.m_eStage==SPH_EVAL_FINAL )
  1971. tCol.m_eStage = SPH_EVAL_PRESORT;
  1972. }
  1973. // it uses attributes if it has dependencies from other attributes
  1974. return ( iInitialAttrs>dCur.GetLength() );
  1975. }
  1976. // expression that transform string pool base + offset -> ptr
  1977. struct ExprSortStringAttrFixup_c : public ISphExpr
  1978. {
  1979. const BYTE * m_pStrings; ///< string pool; base for offset of string attributes
  1980. const CSphAttrLocator m_tLocator; ///< string attribute to fix
  1981. explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
  1982. : m_pStrings ( NULL )
  1983. , m_tLocator ( tLocator )
  1984. {
  1985. }
  1986. virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
  1987. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  1988. {
  1989. SphAttr_t uOff = tMatch.GetAttr ( m_tLocator );
  1990. return (int64_t)( m_pStrings && uOff ? m_pStrings + uOff : NULL );
  1991. }
  1992. virtual void SetStringPool ( const BYTE * pStrings ) { m_pStrings = pStrings; }
  1993. };
  1994. static const char g_sIntAttrPrefix[] = "@int_str2ptr_";
  1995. bool sphIsSortStringInternal ( const char * sColumnName )
  1996. {
  1997. assert ( sColumnName );
  1998. return ( strncmp ( sColumnName, g_sIntAttrPrefix, sizeof(g_sIntAttrPrefix)-1 )==0 );
  1999. }
  2000. static bool SetupSortStringRemap ( CSphSchema & tSorterSchema, CSphMatchComparatorState & tState, const int * dAttr )
  2001. {
  2002. #ifndef NDEBUG
  2003. int iColWasCount = tSorterSchema.GetAttrsCount();
  2004. #endif
  2005. bool bUsesAtrrs = false;
  2006. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  2007. {
  2008. if ( tState.m_eKeypart[i]!=SPH_KEYPART_STRING )
  2009. continue;
  2010. assert ( dAttr[i]>=0 && dAttr[i]<iColWasCount );
  2011. CSphString sRemapCol;
  2012. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, tSorterSchema.GetAttr ( dAttr[i] ).m_sName.cstr() );
  2013. int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  2014. if ( iRemap==-1 )
  2015. {
  2016. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_BIGINT );
  2017. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  2018. iRemap = tSorterSchema.GetAttrsCount();
  2019. tSorterSchema.AddAttr ( tRemapCol, true );
  2020. }
  2021. tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
  2022. }
  2023. return bUsesAtrrs;
  2024. }
  2025. ISphExpr * sphSortSetupExpr ( const CSphString & sName, const CSphSchema & tIndexSchema )
  2026. {
  2027. if ( !sName.Begins ( g_sIntAttrPrefix ) )
  2028. return NULL;
  2029. const CSphColumnInfo * pCol = tIndexSchema.GetAttr ( sName.cstr()+sizeof(g_sIntAttrPrefix)-1 );
  2030. if ( !pCol )
  2031. return NULL;
  2032. return new ExprSortStringAttrFixup_c ( pCol->m_tLocator );
  2033. }
  2034. bool sphSortGetStringRemap ( const CSphSchema & tSorterSchema, const CSphSchema & tIndexSchema, CSphVector<SphStringSorterRemap_t> & dAttrs )
  2035. {
  2036. dAttrs.Resize ( 0 );
  2037. for ( int i=0; i<tSorterSchema.GetAttrsCount(); i++ )
  2038. {
  2039. const CSphColumnInfo & tDst = tSorterSchema.GetAttr(i);
  2040. if ( !tDst.m_sName.Begins ( g_sIntAttrPrefix ) )
  2041. continue;
  2042. const CSphColumnInfo * pSrcCol = tIndexSchema.GetAttr ( tDst.m_sName.cstr()+sizeof(g_sIntAttrPrefix)-1 );
  2043. assert ( pSrcCol );
  2044. SphStringSorterRemap_t & tRemap = dAttrs.Add();
  2045. tRemap.m_tSrc = pSrcCol->m_tLocator;
  2046. tRemap.m_tDst = tDst.m_tLocator;
  2047. }
  2048. return ( dAttrs.GetLength()>0 );
  2049. }
  2050. ////////////////////
  2051. // BINARY COLLATION
  2052. ////////////////////
  2053. int CollateBinary ( const BYTE * pStr1, const BYTE * pStr2 )
  2054. {
  2055. int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
  2056. int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
  2057. int iRes = memcmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
  2058. return iRes ? iRes : ( iLen1-iLen2 );
  2059. }
  2060. ///////////////////////////////
  2061. // LIBC_CI, LIBC_CS COLLATIONS
  2062. ///////////////////////////////
  2063. /// libc_ci, wrapper for strcasecmp
  2064. int CollateLibcCI ( const BYTE * pStr1, const BYTE * pStr2 )
  2065. {
  2066. int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
  2067. int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
  2068. int iRes = strncasecmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
  2069. return iRes ? iRes : ( iLen1-iLen2 );
  2070. }
  2071. /// libc_cs, wrapper for strcoll
  2072. int CollateLibcCS ( const BYTE * pStr1, const BYTE * pStr2 )
  2073. {
  2074. #define COLLATE_STACK_BUFFER 1024
  2075. int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
  2076. int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
  2077. // strcoll wants asciiz strings, so we would have to copy them over
  2078. // lets use stack buffer for smaller ones, and allocate from heap for bigger ones
  2079. int iRes = 0;
  2080. int iLen = Min ( iLen1, iLen2 );
  2081. if ( iLen<COLLATE_STACK_BUFFER )
  2082. {
  2083. // small strings on stack
  2084. BYTE sBuf1[COLLATE_STACK_BUFFER];
  2085. BYTE sBuf2[COLLATE_STACK_BUFFER];
  2086. memcpy ( sBuf1, pStr1, iLen );
  2087. memcpy ( sBuf2, pStr2, iLen );
  2088. sBuf1[iLen] = sBuf2[iLen] = '\0';
  2089. iRes = strcoll ( (const char*)sBuf1, (const char*)sBuf2 );
  2090. } else
  2091. {
  2092. // big strings on heap
  2093. char * pBuf1 = new char [ iLen ];
  2094. char * pBuf2 = new char [ iLen ];
  2095. memcpy ( pBuf1, pStr1, iLen );
  2096. memcpy ( pBuf2, pStr2, iLen );
  2097. pBuf1[iLen] = pBuf2[iLen] = '\0';
  2098. iRes = strcoll ( (const char*)pBuf1, (const char*)pBuf2 );
  2099. SafeDeleteArray ( pBuf2 );
  2100. SafeDeleteArray ( pBuf1 );
  2101. }
  2102. return iRes ? iRes : ( iLen1-iLen2 );
  2103. }
  2104. /////////////////////////////
  2105. // UTF8_GENERAL_CI COLLATION
  2106. /////////////////////////////
  2107. /// 1st level LUT
  2108. static unsigned short * g_dCollPlanes_UTF8CI[0x100];
  2109. /// 2nd level LUT, non-trivial collation data
  2110. static unsigned short g_dCollWeights_UTF8CI[0xb00] =
  2111. {
  2112. // weights for 0x0 to 0x5ff
  2113. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  2114. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  2115. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  2116. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  2117. 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  2118. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
  2119. 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  2120. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127,
  2121. 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
  2122. 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  2123. 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
  2124. 176, 177, 178, 179, 180, 924, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
  2125. 65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  2126. 208, 78, 79, 79, 79, 79, 79, 215, 216, 85, 85, 85, 85, 89, 222, 83,
  2127. 65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  2128. 208, 78, 79, 79, 79, 79, 79, 247, 216, 85, 85, 85, 85, 89, 222, 89,
  2129. 65, 65, 65, 65, 65, 65, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
  2130. 272, 272, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 71, 71, 71, 71,
  2131. 71, 71, 71, 71, 72, 72, 294, 294, 73, 73, 73, 73, 73, 73, 73, 73,
  2132. 73, 73, 306, 306, 74, 74, 75, 75, 312, 76, 76, 76, 76, 76, 76, 319,
  2133. 319, 321, 321, 78, 78, 78, 78, 78, 78, 329, 330, 330, 79, 79, 79, 79,
  2134. 79, 79, 338, 338, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83,
  2135. 83, 83, 84, 84, 84, 84, 358, 358, 85, 85, 85, 85, 85, 85, 85, 85,
  2136. 85, 85, 85, 85, 87, 87, 89, 89, 89, 90, 90, 90, 90, 90, 90, 83,
  2137. 384, 385, 386, 386, 388, 388, 390, 391, 391, 393, 394, 395, 395, 397, 398, 399,
  2138. 400, 401, 401, 403, 404, 502, 406, 407, 408, 408, 410, 411, 412, 413, 414, 415,
  2139. 79, 79, 418, 418, 420, 420, 422, 423, 423, 425, 426, 427, 428, 428, 430, 85,
  2140. 85, 433, 434, 435, 435, 437, 437, 439, 440, 440, 442, 443, 444, 444, 446, 503,
  2141. 448, 449, 450, 451, 452, 452, 452, 455, 455, 455, 458, 458, 458, 65, 65, 73,
  2142. 73, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 398, 65, 65,
  2143. 65, 65, 198, 198, 484, 484, 71, 71, 75, 75, 79, 79, 79, 79, 439, 439,
  2144. 74, 497, 497, 497, 71, 71, 502, 503, 78, 78, 65, 65, 198, 198, 216, 216,
  2145. 65, 65, 65, 65, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
  2146. 82, 82, 82, 82, 85, 85, 85, 85, 83, 83, 84, 84, 540, 540, 72, 72,
  2147. 544, 545, 546, 546, 548, 548, 65, 65, 69, 69, 79, 79, 79, 79, 79, 79,
  2148. 79, 79, 89, 89, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
  2149. 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591,
  2150. 592, 593, 594, 385, 390, 597, 393, 394, 600, 399, 602, 400, 604, 605, 606, 607,
  2151. 403, 609, 610, 404, 612, 613, 614, 615, 407, 406, 618, 619, 620, 621, 622, 412,
  2152. 624, 625, 413, 627, 628, 415, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639,
  2153. 422, 641, 642, 425, 644, 645, 646, 647, 430, 649, 433, 434, 652, 653, 654, 655,
  2154. 656, 657, 439, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671,
  2155. 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687,
  2156. 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703,
  2157. 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719,
  2158. 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
  2159. 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751,
  2160. 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767,
  2161. 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
  2162. 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799,
  2163. 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815,
  2164. 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
  2165. 832, 833, 834, 835, 836, 921, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847,
  2166. 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863,
  2167. 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
  2168. 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895,
  2169. 896, 897, 898, 899, 900, 901, 913, 903, 917, 919, 921, 907, 927, 909, 933, 937,
  2170. 921, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
  2171. 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 921, 933, 913, 917, 919, 921,
  2172. 933, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
  2173. 928, 929, 931, 931, 932, 933, 934, 935, 936, 937, 921, 933, 927, 933, 937, 975,
  2174. 914, 920, 978, 978, 978, 934, 928, 983, 984, 985, 986, 986, 988, 988, 990, 990,
  2175. 992, 992, 994, 994, 996, 996, 998, 998, 1000, 1000, 1002, 1002, 1004, 1004, 1006, 1006,
  2176. 922, 929, 931, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
  2177. 1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
  2178. 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
  2179. 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
  2180. 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
  2181. 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
  2182. 1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
  2183. 1120, 1120, 1122, 1122, 1124, 1124, 1126, 1126, 1128, 1128, 1130, 1130, 1132, 1132, 1134, 1134,
  2184. 1136, 1136, 1138, 1138, 1140, 1140, 1140, 1140, 1144, 1144, 1146, 1146, 1148, 1148, 1150, 1150,
  2185. 1152, 1152, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1164, 1166, 1166,
  2186. 1168, 1168, 1170, 1170, 1172, 1172, 1174, 1174, 1176, 1176, 1178, 1178, 1180, 1180, 1182, 1182,
  2187. 1184, 1184, 1186, 1186, 1188, 1188, 1190, 1190, 1192, 1192, 1194, 1194, 1196, 1196, 1198, 1198,
  2188. 1200, 1200, 1202, 1202, 1204, 1204, 1206, 1206, 1208, 1208, 1210, 1210, 1212, 1212, 1214, 1214,
  2189. 1216, 1046, 1046, 1219, 1219, 1221, 1222, 1223, 1223, 1225, 1226, 1227, 1227, 1229, 1230, 1231,
  2190. 1040, 1040, 1040, 1040, 1236, 1236, 1045, 1045, 1240, 1240, 1240, 1240, 1046, 1046, 1047, 1047,
  2191. 1248, 1248, 1048, 1048, 1048, 1048, 1054, 1054, 1256, 1256, 1256, 1256, 1069, 1069, 1059, 1059,
  2192. 1059, 1059, 1059, 1059, 1063, 1063, 1270, 1271, 1067, 1067, 1274, 1275, 1276, 1277, 1278, 1279,
  2193. 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295,
  2194. 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311,
  2195. 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327,
  2196. 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
  2197. 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
  2198. 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375,
  2199. 1376, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
  2200. 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
  2201. 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423,
  2202. 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439,
  2203. 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455,
  2204. 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
  2205. 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487,
  2206. 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
  2207. 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519,
  2208. 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535,
  2209. // weights for codepoints 0x1e00 to 0x1fff
  2210. 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68,
  2211. 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
  2212. 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73,
  2213. 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77,
  2214. 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79,
  2215. 79, 79, 79, 79, 80, 80, 80, 80, 82, 82, 82, 82, 82, 82, 82, 82,
  2216. 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84,
  2217. 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86,
  2218. 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 89, 89,
  2219. 90, 90, 90, 90, 90, 90, 72, 84, 87, 89, 7834, 83, 7836, 7837, 7838, 7839,
  2220. 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
  2221. 65, 65, 65, 65, 65, 65, 65, 65, 69, 69, 69, 69, 69, 69, 69, 69,
  2222. 69, 69, 69, 69, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
  2223. 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
  2224. 79, 79, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
  2225. 85, 85, 89, 89, 89, 89, 89, 89, 89, 89, 7930, 7931, 7932, 7933, 7934, 7935,
  2226. 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
  2227. 917, 917, 917, 917, 917, 917, 7958, 7959, 917, 917, 917, 917, 917, 917, 7966, 7967,
  2228. 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
  2229. 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921,
  2230. 927, 927, 927, 927, 927, 927, 8006, 8007, 927, 927, 927, 927, 927, 927, 8014, 8015,
  2231. 933, 933, 933, 933, 933, 933, 933, 933, 8024, 933, 8026, 933, 8028, 933, 8030, 933,
  2232. 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
  2233. 913, 8123, 917, 8137, 919, 8139, 921, 8155, 927, 8185, 933, 8171, 937, 8187, 8062, 8063,
  2234. 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
  2235. 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
  2236. 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
  2237. 913, 913, 913, 913, 913, 8117, 913, 913, 913, 913, 913, 8123, 913, 8125, 921, 8127,
  2238. 8128, 8129, 919, 919, 919, 8133, 919, 919, 917, 8137, 919, 8139, 919, 8141, 8142, 8143,
  2239. 921, 921, 921, 8147, 8148, 8149, 921, 921, 921, 921, 921, 8155, 8156, 8157, 8158, 8159,
  2240. 933, 933, 933, 8163, 929, 929, 933, 933, 933, 933, 933, 8171, 929, 8173, 8174, 8175,
  2241. 8176, 8177, 937, 937, 937, 8181, 937, 937, 927, 8185, 937, 8187, 937, 8189, 8190, 8191
  2242. // space for codepoints 0x21xx, 0x24xx, 0xffxx (generated)
  2243. };
  2244. /// initialize collation LUTs
  2245. void sphCollationInit()
  2246. {
  2247. const int dWeightPlane[0x0b] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x1e, 0x1f, 0x21, 0x24, 0xff };
  2248. // generate missing weights
  2249. for ( int i=0; i<0x100; i++ )
  2250. {
  2251. g_dCollWeights_UTF8CI[i+0x800] = (unsigned short)( 0x2100 + i - ( i>=0x70 && i<=0x7f )*16 ); // 2170..217f, -16
  2252. g_dCollWeights_UTF8CI[i+0x900] = (unsigned short)( 0x2400 + i - ( i>=0xd0 && i<=0xe9 )*26 ); // 24d0..24e9, -26
  2253. g_dCollWeights_UTF8CI[i+0xa00] = (unsigned short)( 0xff00 + i - ( i>=0x41 && i<=0x5a )*32 ); // ff41..ff5a, -32
  2254. }
  2255. // generate planes table
  2256. for ( int i=0; i<0x100; i++ )
  2257. g_dCollPlanes_UTF8CI[i] = NULL;
  2258. for ( int i=0; i<0x0b; i++ )
  2259. g_dCollPlanes_UTF8CI [ dWeightPlane[i] ] = g_dCollWeights_UTF8CI + 0x100*i;
  2260. }
  2261. /// collate a single codepoint
  2262. static inline int CollateUTF8CI ( int iCode )
  2263. {
  2264. return ( ( iCode>>16 ) || !g_dCollPlanes_UTF8CI [ iCode>>8 ] )
  2265. ? iCode
  2266. : g_dCollPlanes_UTF8CI [ iCode>>8 ][ iCode&0xff ];
  2267. }
  2268. /// utf8_general_ci
  2269. int CollateUtf8GeneralCI ( const BYTE * pArg1, const BYTE * pArg2 )
  2270. {
  2271. // some const breakage and mess
  2272. // we MUST NOT actually modify the data
  2273. // but sphUTF8Decode() calls currently need non-const pointers
  2274. BYTE * pStr1 = (BYTE*) pArg1;
  2275. BYTE * pStr2 = (BYTE*) pArg2;
  2276. int iLen1 = sphUnpackStr ( pStr1, (const BYTE**)&pStr1 );
  2277. int iLen2 = sphUnpackStr ( pStr2, (const BYTE**)&pStr2 );
  2278. const BYTE * pMax1 = pStr1 + iLen1;
  2279. const BYTE * pMax2 = pStr2 + iLen2;
  2280. while ( pStr1<pMax1 && pStr2<pMax2 )
  2281. {
  2282. // FIXME! on broken data, decode might go beyond buffer bounds
  2283. int iCode1 = sphUTF8Decode ( pStr1 );
  2284. int iCode2 = sphUTF8Decode ( pStr2 );
  2285. if ( !iCode1 && !iCode2 )
  2286. return 0;
  2287. if ( !iCode1 || !iCode2 )
  2288. return !iCode1 ? -1 : 1;
  2289. if ( iCode1==iCode2 )
  2290. continue;
  2291. iCode1 = CollateUTF8CI ( iCode1 );
  2292. iCode2 = CollateUTF8CI ( iCode2 );
  2293. if ( iCode1!=iCode2 )
  2294. return iCode1-iCode2;
  2295. }
  2296. if ( pStr1>=pMax1 && pStr2>=pMax2 )
  2297. return 0;
  2298. return ( pStr1==pMax1 ) ? 1 : -1;
  2299. }
  2300. /////////////////////////////
  2301. // hashing functions
  2302. /////////////////////////////
  2303. class LibcCSHash_fn
  2304. {
  2305. public:
  2306. mutable CSphTightVector<BYTE> m_dBuf;
  2307. static const int LOCALE_SAFE_GAP = 16;
  2308. LibcCSHash_fn()
  2309. {
  2310. m_dBuf.Resize ( COLLATE_STACK_BUFFER );
  2311. }
  2312. uint64_t Hash ( const BYTE * pStr, int iLen ) const
  2313. {
  2314. assert ( pStr && iLen );
  2315. int iCompositeLen = iLen + 1 + (int)( 3.0f * iLen ) + LOCALE_SAFE_GAP;
  2316. if ( m_dBuf.GetLength()<iCompositeLen )
  2317. m_dBuf.Resize ( iCompositeLen );
  2318. memcpy ( m_dBuf.Begin(), pStr, iLen );
  2319. m_dBuf[iLen] = '\0';
  2320. BYTE * pDst = m_dBuf.Begin()+iLen+1;
  2321. int iDstAvailable = m_dBuf.GetLength() - iLen - LOCALE_SAFE_GAP;
  2322. int iDstLen = strxfrm ( (char *)pDst, (const char *)m_dBuf.Begin(), iDstAvailable );
  2323. assert ( iDstLen<iDstAvailable+LOCALE_SAFE_GAP );
  2324. uint64_t uAcc = sphFNV64 ( pDst, iDstLen );
  2325. return uAcc;
  2326. }
  2327. };
  2328. class LibcCIHash_fn
  2329. {
  2330. public:
  2331. uint64_t Hash ( const BYTE * pStr, int iLen ) const
  2332. {
  2333. assert ( pStr && iLen );
  2334. uint64_t uAcc = SPH_FNV64_SEED;
  2335. while ( iLen-- )
  2336. {
  2337. int iChar = tolower ( *pStr++ );
  2338. uAcc = sphFNV64 ( (const BYTE *)&iChar, 4, uAcc );
  2339. }
  2340. return uAcc;
  2341. }
  2342. };
  2343. class Utf8CIHash_fn
  2344. {
  2345. public:
  2346. uint64_t Hash ( const BYTE * pStr, int iLen ) const
  2347. {
  2348. assert ( pStr && iLen );
  2349. uint64_t uAcc = SPH_FNV64_SEED;
  2350. while ( iLen-- )
  2351. {
  2352. BYTE * pCur = (BYTE *)pStr++;
  2353. int iCode = sphUTF8Decode ( pCur );
  2354. iCode = CollateUTF8CI ( iCode );
  2355. uAcc = sphFNV64 ( (const BYTE *)&iCode, 4, uAcc );
  2356. }
  2357. return uAcc;
  2358. }
  2359. };
  2360. class BinaryHash_fn
  2361. {
  2362. public:
  2363. uint64_t Hash ( const BYTE * pStr, int iLen ) const
  2364. {
  2365. assert ( pStr && iLen );
  2366. return sphFNV64 ( pStr, iLen );
  2367. }
  2368. };
  2369. CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation )
  2370. {
  2371. if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
  2372. return new CSphGrouperString<Utf8CIHash_fn> ( tLoc );
  2373. else if ( eCollation==SPH_COLLATION_LIBC_CI )
  2374. return new CSphGrouperString<LibcCIHash_fn> ( tLoc );
  2375. else if ( eCollation==SPH_COLLATION_LIBC_CS )
  2376. return new CSphGrouperString<LibcCSHash_fn> ( tLoc );
  2377. else
  2378. return new CSphGrouperString<BinaryHash_fn> ( tLoc );
  2379. }
  2380. /////////////////////////
  2381. // SORTING QUEUE FACTORY
  2382. /////////////////////////
  2383. template < typename COMP >
  2384. static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bUsesAttrs )
  2385. {
  2386. if ( bKbuffer )
  2387. return new CSphKbufferMatchQueue<COMP> ( iMaxMatches, bUsesAttrs );
  2388. else
  2389. return new CSphMatchQueue<COMP> ( iMaxMatches, bUsesAttrs );
  2390. }
  2391. static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bUsesAttrs )
  2392. {
  2393. switch ( eMatchFunc )
  2394. {
  2395. case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2396. case FUNC_ATTR_DESC: return CreatePlainSorter<MatchAttrLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2397. case FUNC_ATTR_ASC: return CreatePlainSorter<MatchAttrGt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2398. case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2399. case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2400. case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2401. case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2402. case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2403. case FUNC_CUSTOM: return CreatePlainSorter<MatchCustom_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2404. case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bUsesAttrs ); break;
  2405. default: return NULL;
  2406. }
  2407. }
  2408. ISphMatchSorter * sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema & tSchema,
  2409. CSphString & sError, bool bComputeItems, CSphSchema * pExtra, CSphAttrUpdateEx * pUpdate, bool * pZonespanlist )
  2410. {
  2411. // prepare for descent
  2412. ISphMatchSorter * pTop = NULL;
  2413. CSphMatchComparatorState tStateMatch, tStateGroup;
  2414. sError = "";
  2415. bool bHasZonespanlist = false;
  2416. bool bNeedZonespanlist = false;
  2417. ///////////////////////////////////////
  2418. // build incoming and outgoing schemas
  2419. ///////////////////////////////////////
  2420. // sorter schema
  2421. // adds computed expressions and groupby stuff on top of the original index schema
  2422. CSphSchema tSorterSchema = tSchema;
  2423. // setup overrides, detach them into dynamic part
  2424. ARRAY_FOREACH ( i, pQuery->m_dOverrides )
  2425. {
  2426. const char * sAttr = pQuery->m_dOverrides[i].m_sAttr.cstr();
  2427. int iIndex = tSorterSchema.GetAttrIndex ( sAttr );
  2428. if ( iIndex<0 )
  2429. {
  2430. sError.SetSprintf ( "override attribute '%s' not found", sAttr );
  2431. return NULL;
  2432. }
  2433. CSphColumnInfo tCol = tSorterSchema.GetAttr ( iIndex );
  2434. tCol.m_eStage = SPH_EVAL_OVERRIDE;
  2435. tSorterSchema.AddAttr ( tCol, true );
  2436. if ( pExtra )
  2437. pExtra->AddAttr ( tCol, true );
  2438. tSorterSchema.RemoveAttr ( iIndex );
  2439. }
  2440. // setup @geodist
  2441. if ( pQuery->m_bGeoAnchor && tSorterSchema.GetAttrIndex ( "@geodist" )<0 )
  2442. {
  2443. ExprGeodist_t * pExpr = new ExprGeodist_t ();
  2444. if ( !pExpr->Setup ( pQuery, tSorterSchema, sError ) )
  2445. {
  2446. pExpr->Release ();
  2447. return NULL;
  2448. }
  2449. CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
  2450. tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
  2451. tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
  2452. tSorterSchema.AddAttr ( tCol, true );
  2453. if ( pExtra )
  2454. pExtra->AddAttr ( tCol, true );
  2455. }
  2456. // setup @expr
  2457. if ( pQuery->m_eSort==SPH_SORT_EXPR && tSorterSchema.GetAttrIndex ( "@expr" )<0 )
  2458. {
  2459. CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility (ie. too lazy to fix those tests right now)
  2460. tCol.m_pExpr = sphExprParse ( pQuery->m_sSortBy.cstr(), tSorterSchema, NULL, NULL, sError, pExtra, NULL, &bHasZonespanlist );
  2461. bNeedZonespanlist |= bHasZonespanlist;
  2462. if ( !tCol.m_pExpr )
  2463. return NULL;
  2464. tCol.m_eStage = SPH_EVAL_PRESORT;
  2465. tSorterSchema.AddAttr ( tCol, true );
  2466. }
  2467. // expressions from select items
  2468. CSphVector<CSphColumnInfo> dAggregates;
  2469. bool bHasCount = false;
  2470. if ( bComputeItems )
  2471. ARRAY_FOREACH ( iItem, pQuery->m_dItems )
  2472. {
  2473. const CSphQueryItem & tItem = pQuery->m_dItems[iItem];
  2474. const CSphString & sExpr = tItem.m_sExpr;
  2475. bool bIsCount = IsCount(sExpr);
  2476. bHasCount |= bIsCount;
  2477. if ( bIsCount && sExpr.cstr()[0]!='@' )
  2478. {
  2479. CSphString & sExprW = const_cast < CSphString & > ( sExpr );
  2480. sExprW = "@count";
  2481. }
  2482. // for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
  2483. if ( sExpr=="*"
  2484. || ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE )
  2485. || IsGroupby(sExpr) || bIsCount )
  2486. {
  2487. continue;
  2488. }
  2489. // not an attribute? must be an expression, and must be aliased
  2490. if ( tItem.m_sAlias.IsEmpty() )
  2491. {
  2492. sError.SetSprintf ( "expression '%s' must be aliased (use 'expr AS alias' syntax)", tItem.m_sExpr.cstr() );
  2493. return NULL;
  2494. }
  2495. // tricky part
  2496. // we might be fed with precomputed matches, but it's all or nothing
  2497. // the incoming match either does not have anything computed, or it has everything
  2498. if ( tSchema.GetAttrsCount()==tSorterSchema.GetAttrsCount() )
  2499. {
  2500. // so far we had everything, so we might be precomputed, and the alias just might already exist
  2501. int iSuspect = tSchema.GetAttrIndex ( tItem.m_sAlias.cstr() );
  2502. if ( iSuspect>=0 )
  2503. {
  2504. // however, let's ensure that it was an expression
  2505. if ( tSchema.GetAttr ( iSuspect ).m_pExpr.Ptr()!=NULL )
  2506. continue;
  2507. // otherwise we're not precomputed, *and* have a duplicate name
  2508. sError.SetSprintf ( "alias '%s' must be unique (conflicts with an index attribute)", tItem.m_sAlias.cstr() );
  2509. return NULL;
  2510. }
  2511. } else
  2512. {
  2513. // we are adding stuff, must not be precomputed, check for both kinds of dupes
  2514. if ( tSchema.GetAttrIndex ( tItem.m_sAlias.cstr() )>=0 )
  2515. {
  2516. sError.SetSprintf ( "alias '%s' must be unique (conflicts with an index attribute)", tItem.m_sAlias.cstr() );
  2517. return NULL;
  2518. }
  2519. if ( tSorterSchema.GetAttrIndex ( tItem.m_sAlias.cstr() )>=0 )
  2520. {
  2521. sError.SetSprintf ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
  2522. return NULL;
  2523. }
  2524. }
  2525. // a new and shiny expression, lets parse
  2526. CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
  2527. // tricky bit
  2528. // GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
  2529. // and then the aggregate operation simply concatenates strings as matches arrive
  2530. // ideally, we would instead pass ownership of the expression to G_C() implementation
  2531. // and also the original expression type, and let the string conversion happen in G_C() itself
  2532. // but that ideal route seems somewhat more complicated in the current architecture
  2533. if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
  2534. {
  2535. CSphString sExpr2;
  2536. sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
  2537. tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &tExprCol.m_bWeight, sError, pExtra, NULL, &bHasZonespanlist );
  2538. } else
  2539. {
  2540. tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &tExprCol.m_bWeight, sError, pExtra, NULL, &bHasZonespanlist );
  2541. }
  2542. bNeedZonespanlist |= bHasZonespanlist;
  2543. tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
  2544. if ( !tExprCol.m_pExpr )
  2545. {
  2546. sError.SetSprintf ( "parse error: %s", sError.cstr() );
  2547. return NULL;
  2548. }
  2549. // force AVG() to be computed in floats
  2550. if ( tExprCol.m_eAggrFunc==SPH_AGGR_AVG )
  2551. {
  2552. tExprCol.m_eAttrType = SPH_ATTR_FLOAT;
  2553. tExprCol.m_tLocator.m_iBitCount = 32;
  2554. }
  2555. // force GROUP_CONCAT() to be computed as strings
  2556. if ( tExprCol.m_eAggrFunc==SPH_AGGR_CAT )
  2557. {
  2558. tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
  2559. tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
  2560. }
  2561. // postpone aggregates, add non-aggregates
  2562. if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
  2563. {
  2564. // by default, lets be lazy and compute expressions as late as possible
  2565. tExprCol.m_eStage = SPH_EVAL_FINAL;
  2566. if ( bHasZonespanlist )
  2567. tExprCol.m_eStage = SPH_EVAL_PRESORT;
  2568. // is this expression used in filter?
  2569. // OPTIMIZE? hash filters and do hash lookups?
  2570. ARRAY_FOREACH ( i, pQuery->m_dFilters )
  2571. if ( pQuery->m_dFilters[i].m_sAttrName==tExprCol.m_sName )
  2572. {
  2573. if ( tExprCol.m_bWeight )
  2574. {
  2575. tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
  2576. break;
  2577. }
  2578. // so we are about to add a filter condition
  2579. // but it might depend on some preceding columns
  2580. // lets detect those and move them to prefilter \ presort phase too
  2581. CSphVector<int> dCur;
  2582. tExprCol.m_pExpr->GetDependencyColumns ( dCur );
  2583. // usual filter
  2584. tExprCol.m_eStage = SPH_EVAL_PREFILTER;
  2585. ARRAY_FOREACH ( i, dCur )
  2586. {
  2587. const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[i] );
  2588. if ( tCol.m_bWeight )
  2589. {
  2590. tExprCol.m_eStage = SPH_EVAL_PRESORT;
  2591. tExprCol.m_bWeight = true;
  2592. }
  2593. if ( tCol.m_pExpr.Ptr() )
  2594. {
  2595. tCol.m_pExpr->GetDependencyColumns ( dCur );
  2596. }
  2597. }
  2598. dCur.Uniq();
  2599. ARRAY_FOREACH ( i, dCur )
  2600. {
  2601. CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[i] ) );
  2602. if ( tDep.m_eStage>tExprCol.m_eStage )
  2603. tDep.m_eStage = tExprCol.m_eStage;
  2604. }
  2605. break;
  2606. }
  2607. // add it!
  2608. // NOTE, "final" stage might need to be fixed up later
  2609. // we'll do that when parsing sorting clause
  2610. tSorterSchema.AddAttr ( tExprCol, true );
  2611. } else
  2612. {
  2613. tExprCol.m_eStage = SPH_EVAL_PRESORT; // sorter expects computed expression
  2614. dAggregates.Add ( tExprCol );
  2615. }
  2616. }
  2617. // expressions wrapped in aggregates must be at the very end of pre-groupby match
  2618. ARRAY_FOREACH ( i, dAggregates )
  2619. {
  2620. tSorterSchema.AddAttr ( dAggregates[i], true );
  2621. if ( pExtra )
  2622. pExtra->AddAttr ( dAggregates[i], true );
  2623. }
  2624. ////////////////////////////////////////////
  2625. // setup groupby settings, create shortcuts
  2626. ////////////////////////////////////////////
  2627. CSphGroupSorterSettings tSettings;
  2628. if ( !SetupGroupbySettings ( pQuery, tSorterSchema, tSettings, sError ) )
  2629. return NULL;
  2630. const bool bGotGroupby = !pQuery->m_sGroupBy.IsEmpty(); // or else, check in SetupGroupbySettings() would already fail
  2631. const bool bGotDistinct = ( tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
  2632. // now lets add @groupby etc if needed
  2633. if ( bGotGroupby && tSorterSchema.GetAttrIndex ( "@groupby" )<0 )
  2634. {
  2635. CSphColumnInfo tGroupby ( "@groupby", tSettings.m_pGrouper->GetResultType() );
  2636. CSphColumnInfo tCount ( "@count", SPH_ATTR_INTEGER );
  2637. CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
  2638. tGroupby.m_eStage = SPH_EVAL_SORTER;
  2639. tCount.m_eStage = SPH_EVAL_SORTER;
  2640. tDistinct.m_eStage = SPH_EVAL_SORTER;
  2641. tSorterSchema.AddAttr ( tGroupby, true );
  2642. if ( pExtra )
  2643. pExtra->AddAttr ( tGroupby, true );
  2644. tSorterSchema.AddAttr ( tCount, true );
  2645. if ( pExtra )
  2646. pExtra->AddAttr ( tCount, true );
  2647. if ( bGotDistinct )
  2648. {
  2649. tSorterSchema.AddAttr ( tDistinct, true );
  2650. if ( pExtra )
  2651. pExtra->AddAttr ( tDistinct, true );
  2652. }
  2653. }
  2654. #define LOC_CHECK(_cond,_msg) if (!(_cond)) { sError = "invalid schema: " _msg; return false; }
  2655. int iGroupby = tSorterSchema.GetAttrIndex ( "@groupby" );
  2656. if ( iGroupby>=0 )
  2657. {
  2658. tSettings.m_bDistinct = bGotDistinct;
  2659. tSettings.m_tLocGroupby = tSorterSchema.GetAttr ( iGroupby ).m_tLocator;
  2660. LOC_CHECK ( tSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
  2661. int iCount = tSorterSchema.GetAttrIndex ( "@count" );
  2662. LOC_CHECK ( iCount>=0, "missing @count" );
  2663. tSettings.m_tLocCount = tSorterSchema.GetAttr ( iCount ).m_tLocator;
  2664. LOC_CHECK ( tSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
  2665. int iDistinct = tSorterSchema.GetAttrIndex ( "@distinct" );
  2666. if ( bGotDistinct )
  2667. {
  2668. LOC_CHECK ( iDistinct>=0, "missing @distinct" );
  2669. tSettings.m_tLocDistinct = tSorterSchema.GetAttr ( iDistinct ).m_tLocator;
  2670. LOC_CHECK ( tSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
  2671. } else
  2672. {
  2673. LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
  2674. }
  2675. }
  2676. if ( bHasCount )
  2677. {
  2678. LOC_CHECK ( tSorterSchema.GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
  2679. }
  2680. #undef LOC_CHECK
  2681. ////////////////////////////////////
  2682. // choose and setup sorting functor
  2683. ////////////////////////////////////
  2684. ESphSortFunc eMatchFunc = FUNC_REL_DESC;
  2685. ESphSortFunc eGroupFunc = FUNC_REL_DESC;
  2686. bool bUsesAttrs = false;
  2687. bool bRandomize = false;
  2688. // matches sorting function
  2689. if ( pQuery->m_eSort==SPH_SORT_EXTENDED )
  2690. {
  2691. int dAttrs [ CSphMatchComparatorState::MAX_ATTRS ];
  2692. ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sSortBy.cstr(), tSorterSchema, eMatchFunc, tStateMatch, dAttrs, sError, pExtra );
  2693. if ( eRes==SORT_CLAUSE_ERROR )
  2694. return NULL;
  2695. if ( eRes==SORT_CLAUSE_RANDOM )
  2696. bRandomize = true;
  2697. bUsesAttrs = FixupDependency ( tSorterSchema, dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  2698. if ( !bUsesAttrs )
  2699. {
  2700. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  2701. {
  2702. ESphSortKeyPart ePart = tStateMatch.m_eKeypart[i];
  2703. if ( ePart==SPH_KEYPART_INT || ePart==SPH_KEYPART_FLOAT || ePart==SPH_KEYPART_STRING )
  2704. bUsesAttrs = true;
  2705. }
  2706. }
  2707. bUsesAttrs |= SetupSortStringRemap ( tSorterSchema, tStateMatch, dAttrs );
  2708. } else if ( pQuery->m_eSort==SPH_SORT_EXPR )
  2709. {
  2710. tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
  2711. tStateMatch.m_tLocator[0] = tSorterSchema.GetAttr ( tSorterSchema.GetAttrIndex ( "@expr" ) ).m_tLocator;
  2712. tStateMatch.m_eKeypart[1] = SPH_KEYPART_ID;
  2713. tStateMatch.m_uAttrDesc = 1;
  2714. eMatchFunc = FUNC_EXPR;
  2715. bUsesAttrs = true;
  2716. } else
  2717. {
  2718. // check sort-by attribute
  2719. if ( pQuery->m_eSort!=SPH_SORT_RELEVANCE )
  2720. {
  2721. int iSortAttr = tSorterSchema.GetAttrIndex ( pQuery->m_sSortBy.cstr() );
  2722. if ( iSortAttr<0 )
  2723. {
  2724. sError.SetSprintf ( "sort-by attribute '%s' not found", pQuery->m_sSortBy.cstr() );
  2725. return NULL;
  2726. }
  2727. const CSphColumnInfo & tAttr = tSorterSchema.GetAttr ( iSortAttr );
  2728. tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
  2729. tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
  2730. int dAttrs [ CSphMatchComparatorState::MAX_ATTRS ];
  2731. dAttrs[0] = iSortAttr;
  2732. bUsesAttrs |= SetupSortStringRemap ( tSorterSchema, tStateMatch, dAttrs );
  2733. }
  2734. // find out what function to use and whether it needs attributes
  2735. bUsesAttrs = true;
  2736. switch ( pQuery->m_eSort )
  2737. {
  2738. case SPH_SORT_ATTR_DESC: eMatchFunc = FUNC_ATTR_DESC; break;
  2739. case SPH_SORT_ATTR_ASC: eMatchFunc = FUNC_ATTR_ASC; break;
  2740. case SPH_SORT_TIME_SEGMENTS: eMatchFunc = FUNC_TIMESEGS; break;
  2741. case SPH_SORT_RELEVANCE: eMatchFunc = FUNC_REL_DESC; bUsesAttrs = false; break;
  2742. default:
  2743. sError.SetSprintf ( "unknown sorting mode %d", pQuery->m_eSort );
  2744. return NULL;
  2745. }
  2746. }
  2747. // groups sorting function
  2748. if ( bGotGroupby )
  2749. {
  2750. int dAttrs [ CSphMatchComparatorState::MAX_ATTRS ];
  2751. ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sGroupSortBy.cstr(), tSorterSchema, eGroupFunc, tStateGroup, dAttrs, sError, pExtra );
  2752. if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
  2753. {
  2754. if ( eRes==SORT_CLAUSE_RANDOM )
  2755. sError.SetSprintf ( "groups can not be sorted by @random" );
  2756. return NULL;
  2757. }
  2758. int idx = tSorterSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
  2759. if ( pExtra )
  2760. pExtra->AddAttr ( tSorterSchema.GetAttr ( idx ), true );
  2761. FixupDependency ( tSorterSchema, &idx, 1 );
  2762. FixupDependency ( tSorterSchema, dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  2763. // GroupSortBy str attributes setup
  2764. bUsesAttrs |= SetupSortStringRemap ( tSorterSchema, tStateGroup, dAttrs );
  2765. }
  2766. ///////////////////
  2767. // spawn the queue
  2768. ///////////////////
  2769. if ( !bGotGroupby )
  2770. {
  2771. if ( pUpdate )
  2772. pTop = new CSphUpdateQueue ( pQuery->m_iMaxMatches, pUpdate );
  2773. else
  2774. pTop = CreatePlainSorter ( eMatchFunc, pQuery->m_bSortKbuffer, pQuery->m_iMaxMatches, bUsesAttrs );
  2775. } else
  2776. {
  2777. pTop = sphCreateSorter1st ( eMatchFunc, eGroupFunc, pQuery, tSettings );
  2778. }
  2779. if ( !pTop )
  2780. {
  2781. sError.SetSprintf ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)",
  2782. eMatchFunc, bGotGroupby, eGroupFunc );
  2783. return NULL;
  2784. }
  2785. switch ( pQuery->m_eCollation )
  2786. {
  2787. case SPH_COLLATION_LIBC_CI:
  2788. tStateMatch.m_fnStrCmp = CollateLibcCI;
  2789. tStateGroup.m_fnStrCmp = CollateLibcCI;
  2790. break;
  2791. case SPH_COLLATION_LIBC_CS:
  2792. tStateMatch.m_fnStrCmp = CollateLibcCS;
  2793. tStateGroup.m_fnStrCmp = CollateLibcCS;
  2794. break;
  2795. case SPH_COLLATION_UTF8_GENERAL_CI:
  2796. tStateMatch.m_fnStrCmp = CollateUtf8GeneralCI;
  2797. tStateGroup.m_fnStrCmp = CollateUtf8GeneralCI;
  2798. break;
  2799. case SPH_COLLATION_BINARY:
  2800. tStateMatch.m_fnStrCmp = CollateBinary;
  2801. tStateGroup.m_fnStrCmp = CollateBinary;
  2802. break;
  2803. }
  2804. assert ( pTop );
  2805. pTop->SetState ( tStateMatch );
  2806. pTop->SetGroupState ( tStateGroup );
  2807. pTop->SetSchema ( tSorterSchema );
  2808. pTop->m_bRandomize = bRandomize;
  2809. if ( bRandomize )
  2810. sphAutoSrand ();
  2811. if ( pZonespanlist )
  2812. *pZonespanlist = bNeedZonespanlist;
  2813. return pTop;
  2814. }
  2815. void sphFlattenQueue ( ISphMatchSorter * pQueue, CSphQueryResult * pResult, int iTag )
  2816. {
  2817. if ( pQueue && pQueue->GetLength() )
  2818. {
  2819. int iOffset = pResult->m_dMatches.GetLength ();
  2820. pResult->m_dMatches.Resize ( iOffset + pQueue->GetLength() );
  2821. pQueue->Flatten ( &pResult->m_dMatches[iOffset], iTag );
  2822. }
  2823. }
  2824. bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
  2825. {
  2826. ARRAY_FOREACH ( i, tQuery.m_dItems )
  2827. {
  2828. const CSphString & sExpr = tQuery.m_dItems[i].m_sExpr;
  2829. if ( !( sExpr=="*"
  2830. || ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tQuery.m_dItems[i].m_eAggrFunc==SPH_AGGR_NONE && tQuery.m_dItems[i].m_sAlias.IsEmpty() )
  2831. || IsGroupbyMagic(sExpr) ) )
  2832. return true;
  2833. }
  2834. return false;
  2835. }
  2836. //
  2837. // $Id$
  2838. //