aggrexpr.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. //
  2. // Copyright (c) 2017-2023, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include <cfloat>
  13. #include <climits>
  14. #include <math.h>
  15. #include "datetime.h"
  16. #include "exprtraits.h"
  17. #include "sphinxjsonquery.h"
  18. #include "sphinxint.h"
  19. #include "aggrexpr.h"
  20. // the aggr range implementation
  21. template < bool FLOAT >
  22. class AggrRangeExpr_T : public Expr_ArgVsSet_T<int>
  23. {
  24. AggrRangeSetting_t m_tRanges;
  25. public:
  26. AggrRangeExpr_T ( ISphExpr * pAttr, const AggrRangeSetting_t & tRanges )
  27. : Expr_ArgVsSet_T ( pAttr )
  28. , m_tRanges ( tRanges )
  29. {
  30. }
  31. int IntEval ( const CSphMatch & tMatch ) const final
  32. {
  33. int iBucket = GetBucket ( tMatch );
  34. return iBucket;
  35. }
  36. protected:
  37. int GetBucket ( const CSphMatch & tMatch ) const
  38. {
  39. if_const ( FLOAT )
  40. {
  41. double fVal = m_pArg->Eval ( tMatch );
  42. if ( m_tRanges.m_bOpenLeft && fVal<m_tRanges.First().m_fTo )
  43. return m_tRanges.First().m_iIdx;
  44. if ( m_tRanges.m_bOpenRight && fVal>=m_tRanges.Last().m_fFrom )
  45. return m_tRanges.Last().m_iIdx;
  46. if ( !m_tRanges.m_bOpenLeft && fVal<m_tRanges.First().m_fFrom )
  47. return m_tRanges.GetLength();
  48. if ( !m_tRanges.m_bOpenRight && fVal>=m_tRanges.Last().m_fTo )
  49. return m_tRanges.GetLength();
  50. int iItem = m_tRanges.GetFirst ([&](const RangeSetting_t& tRange) { return (tRange.m_fFrom<=fVal && fVal<tRange.m_fTo); });
  51. if ( iItem==-1 )
  52. return m_tRanges.GetLength();
  53. return m_tRanges[iItem].m_iIdx;
  54. } else
  55. {
  56. int64_t iVal = m_pArg->Int64Eval ( tMatch );
  57. if ( m_tRanges.m_bOpenLeft && iVal<m_tRanges.First().m_iTo )
  58. return m_tRanges.First().m_iIdx;
  59. if ( m_tRanges.m_bOpenRight && iVal>=m_tRanges.Last().m_iFrom )
  60. return m_tRanges.Last().m_iIdx;
  61. if ( !m_tRanges.m_bOpenLeft && iVal<m_tRanges.First().m_iFrom )
  62. return m_tRanges.GetLength();
  63. if ( !m_tRanges.m_bOpenRight && iVal>=m_tRanges.Last().m_iTo )
  64. return m_tRanges.GetLength();
  65. int iItem = m_tRanges.GetFirst ( [&] ( const RangeSetting_t & tRange ) { return ( tRange.m_iFrom<=iVal && iVal<tRange.m_iTo ); } );
  66. if ( iItem==-1 )
  67. return m_tRanges.GetLength();
  68. return m_tRanges[iItem].m_iIdx;
  69. }
  70. }
  71. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final
  72. {
  73. EXPR_CLASS_NAME("AggrRangeExpr_T");
  74. CALC_POD_HASH ( m_tRanges );
  75. return CALC_DEP_HASHES();
  76. }
  77. ISphExpr * Clone() const final
  78. {
  79. return new AggrRangeExpr_T ( *this );
  80. }
  81. private:
  82. AggrRangeExpr_T ( const AggrRangeExpr_T & rhs )
  83. : Expr_ArgVsSet_T ( rhs )
  84. , m_tRanges ( rhs.m_tRanges )
  85. {
  86. }
  87. };
  88. ISphExpr * CreateExprRange ( ISphExpr * pAttr, const AggrRangeSetting_t & tRanges )
  89. {
  90. if ( tRanges.m_bFloat )
  91. return new AggrRangeExpr_T<true> ( pAttr, tRanges );
  92. else
  93. return new AggrRangeExpr_T<false> ( pAttr, tRanges );
  94. }
  95. static void DumpRange ( const RangeSetting_t & tRange, bool bFloat, StringBuilder_c & sRes )
  96. {
  97. if ( bFloat )
  98. sRes.Appendf ( ", {range_from=%f, range_to=%f}", tRange.m_fFrom, tRange.m_fTo );
  99. else
  100. sRes.Appendf ( ", {range_from=" INT64_FMT ", range_to=" INT64_FMT "}", tRange.m_iFrom, tRange.m_iTo );
  101. }
  102. static void DumpRangeOpenLeft ( const RangeSetting_t & tRange, bool bFloat, StringBuilder_c & sRes )
  103. {
  104. if ( bFloat )
  105. sRes.Appendf ( ", {range_to=%f}", tRange.m_fTo );
  106. else
  107. sRes.Appendf ( ", {range_to=" INT64_FMT "}", tRange.m_iTo );
  108. }
  109. static void DumpRangeOpenRigth ( const RangeSetting_t & tRange, bool bFloat, StringBuilder_c & sRes )
  110. {
  111. if ( bFloat )
  112. sRes.Appendf ( ", {range_from=%f}", tRange.m_fFrom );
  113. else
  114. sRes.Appendf ( ", {range_from=" INT64_FMT "}", tRange.m_iFrom );
  115. }
  116. static void DumpRange ( int iItem, const AggrRangeSetting_t & tRanges, StringBuilder_c & sRes )
  117. {
  118. const bool bFloat = tRanges.m_bFloat;
  119. const RangeSetting_t & tRange = tRanges[iItem];
  120. if ( iItem==0 && tRanges.m_bOpenLeft )
  121. DumpRangeOpenLeft ( tRange, bFloat, sRes );
  122. else if ( iItem==tRanges.GetLength()-1 && tRanges.m_bOpenRight )
  123. DumpRangeOpenRigth ( tRange, bFloat, sRes );
  124. else
  125. DumpRange ( tRange, bFloat, sRes );
  126. }
  127. static CSphString DumpAggrRange ( const CSphString & sCol, const AggrRangeSetting_t & tRanges )
  128. {
  129. assert ( tRanges.GetLength() );
  130. StringBuilder_c sRes;
  131. sRes.Appendf ( "range(%s", sCol.cstr() );
  132. ARRAY_FOREACH ( i, tRanges )
  133. DumpRange ( i, tRanges, sRes );
  134. sRes += ")";
  135. return CSphString ( sRes );
  136. }
  137. static CSphString DumpAggrRange ( const CSphString & sCol, const AggrDateRangeSetting_t & tRanges )
  138. {
  139. assert ( tRanges.GetLength() );
  140. StringBuilder_c sRes;
  141. sRes.Appendf ( "date_range(%s", sCol.cstr() );
  142. for ( DateRangeSetting_t & tRange : tRanges )
  143. {
  144. if ( !tRange.m_sFrom.IsEmpty() && !tRange.m_sTo.IsEmpty() )
  145. sRes.Appendf ( ", {range_from='%s', range_to='%s'}", tRange.m_sFrom.cstr(), tRange.m_sTo.cstr() );
  146. else if ( tRange.m_sFrom.IsEmpty() )
  147. sRes.Appendf ( ", {range_to='%s'}", tRange.m_sTo.cstr() );
  148. else
  149. sRes.Appendf ( ", {range_from='%s'}", tRange.m_sFrom.cstr() );
  150. }
  151. sRes += ")";
  152. return CSphString ( sRes );
  153. }
  154. bool ParseAggrRange ( const VecTraits_T< VecTraits_T < CSphNamedVariant > > & dSrcRanges, bool bDate, int iNow, AggrRangeSetting_t & tRanges, CSphString & sError )
  155. {
  156. if ( dSrcRanges.IsEmpty() )
  157. {
  158. sError = "at least 1 range expected";
  159. return false;
  160. }
  161. ARRAY_FOREACH ( iItem, dSrcRanges )
  162. {
  163. const auto & dItem = dSrcRanges[iItem];
  164. if ( dItem.IsEmpty() )
  165. {
  166. sError.SetSprintf ( "empty range %d", iItem );
  167. return false;
  168. }
  169. bool bHasFrom = false;
  170. bool bHasTo = false;
  171. bool bFloatFrom = false;
  172. bool bFloatTo = false;
  173. auto & tRange = tRanges.Add();
  174. tRange.m_iIdx = iItem;
  175. ARRAY_FOREACH ( iVal, dItem )
  176. {
  177. const auto & tVal = dItem[iVal];
  178. if ( tVal.m_sKey=="range_from" )
  179. {
  180. if ( tVal.m_eType==VariantType_e::BIGINT )
  181. tRange.m_iFrom = tVal.m_iValue;
  182. else if ( tVal.m_eType==VariantType_e::FLOAT )
  183. {
  184. tRange.m_fFrom = tVal.m_fValue;
  185. bFloatFrom = true;
  186. } else if ( tVal.m_eType==VariantType_e::STRING )
  187. {
  188. time_t tFrom = 0;
  189. if ( !ParseDateMath ( tVal.m_sValue, iNow, tFrom ) )
  190. {
  191. sError.SetSprintf ( "date_range invalid from value '%s'", tVal.m_sValue.cstr() );
  192. return false;
  193. }
  194. tRange.m_iFrom = tFrom;
  195. } else
  196. {
  197. sError.SetSprintf ( "%s %d invalid value type %d", ( bDate ? "date_range" : "range" ), iItem, (int)tVal.m_eType );
  198. return false;
  199. }
  200. bHasFrom = true;
  201. } else if ( tVal.m_sKey=="range_to" )
  202. {
  203. if ( tVal.m_eType==VariantType_e::BIGINT )
  204. tRange.m_iTo = tVal.m_iValue;
  205. else if ( tVal.m_eType==VariantType_e::FLOAT )
  206. {
  207. tRange.m_fTo = tVal.m_fValue;
  208. bFloatTo = true;
  209. } else if ( tVal.m_eType==VariantType_e::STRING )
  210. {
  211. time_t tTo = 0;
  212. if ( !ParseDateMath ( tVal.m_sValue, iNow, tTo ) )
  213. {
  214. sError.SetSprintf ( "date_range invalid to value '%s'", tVal.m_sValue.cstr() );
  215. return false;
  216. }
  217. tRange.m_iTo = tTo;
  218. } else
  219. {
  220. sError.SetSprintf ( "%s %d invalid value type %d", ( bDate ? "date_range" : "range" ), iItem, (int)tVal.m_eType );
  221. return false;
  222. }
  223. bHasTo = true;
  224. }
  225. }
  226. if ( !bHasFrom && !bHasTo )
  227. {
  228. sError.SetSprintf ( "empty %s %d", ( bDate ? "date_range" : "range" ), iItem );
  229. return false;
  230. }
  231. if ( !bHasFrom )
  232. {
  233. tRanges.m_bOpenLeft = true;
  234. if ( bFloatFrom || bFloatTo || tRanges.m_bFloat )
  235. tRange.m_fFrom = -FLT_MAX;
  236. else
  237. tRange.m_iFrom = INT64_MIN;
  238. }
  239. if ( !bHasTo )
  240. {
  241. tRanges.m_bOpenRight = true;
  242. if ( bFloatFrom || bFloatTo || tRanges.m_bFloat )
  243. tRange.m_fTo = FLT_MAX;
  244. else
  245. tRange.m_iTo = INT64_MAX;
  246. }
  247. // convert both values to float
  248. if ( bFloatFrom^bFloatTo )
  249. {
  250. if ( bFloatFrom )
  251. tRange.m_fTo = tRange.m_iTo;
  252. else
  253. tRange.m_fFrom = tRange.m_iFrom;
  254. } else if ( tRanges.m_bFloat && !( bFloatFrom && bFloatTo ) )
  255. {
  256. tRange.m_fTo = tRange.m_iTo;
  257. tRange.m_fFrom = tRange.m_iFrom;
  258. }
  259. // convert all prevoiuse values into floats
  260. if ( ( bFloatFrom || bFloatTo ) && !tRanges.m_bFloat )
  261. {
  262. if ( tRanges.GetLength()>1 )
  263. {
  264. tRanges.Slice( 0, tRanges.GetLength()-1 ).for_each ( [] ( auto & tRange )
  265. {
  266. tRange.m_fFrom = tRange.m_iFrom;
  267. tRange.m_fTo = tRange.m_iTo;
  268. } );
  269. if ( tRanges.m_bOpenLeft )
  270. tRanges[0].m_fFrom = -FLT_MAX;
  271. }
  272. tRanges.m_bFloat = true;
  273. }
  274. if ( tRanges.m_bFloat )
  275. tRanges.Sort ( ::bind ( &RangeSetting_t::m_fFrom ) );
  276. else
  277. tRanges.Sort ( ::bind ( &RangeSetting_t::m_iFrom ) );
  278. }
  279. return true;
  280. }
  281. CSphString GetAggrName ( int iItem, const CSphString & sCol )
  282. {
  283. CSphString sName;
  284. sName.SetSprintf ( "aggs_%d_%s", iItem, sCol.cstr() );
  285. return sName;
  286. }
  287. static void FormatKeyFloat ( const RangeSetting_t & tRange, bool bHasFrom, bool bHasTo, RangeKeyDesc_t & tDesc )
  288. {
  289. assert ( bHasFrom || bHasTo );
  290. if ( bHasFrom )
  291. tDesc.m_sFrom.SetSprintf ( "%f", tRange.m_fFrom );
  292. if ( bHasTo )
  293. tDesc.m_sTo.SetSprintf ( "%f", tRange.m_fTo );
  294. if ( bHasFrom && bHasTo )
  295. tDesc.m_sKey.SetSprintf ( "%f-%f", tRange.m_fFrom, tRange.m_fTo );
  296. else if ( bHasTo )
  297. tDesc.m_sKey.SetSprintf ( "*-%f", tRange.m_fTo );
  298. else
  299. tDesc.m_sKey.SetSprintf ( "%f-*", tRange.m_fFrom );
  300. }
  301. static void FormatKeyInt ( const RangeSetting_t & tRange, bool bHasFrom, bool bHasTo, RangeKeyDesc_t & tDesc )
  302. {
  303. assert ( bHasFrom || bHasTo );
  304. if ( bHasFrom )
  305. tDesc.m_sFrom.SetSprintf ( INT64_FMT, tRange.m_iFrom );
  306. if ( bHasTo )
  307. tDesc.m_sTo.SetSprintf ( INT64_FMT, tRange.m_iTo );
  308. if ( bHasFrom && bHasTo )
  309. tDesc.m_sKey.SetSprintf ( INT64_FMT "-" INT64_FMT, tRange.m_iFrom, tRange.m_iTo );
  310. else if ( bHasTo )
  311. tDesc.m_sKey.SetSprintf ( "*-" INT64_FMT, tRange.m_iTo );
  312. else
  313. tDesc.m_sKey.SetSprintf ( INT64_FMT "-*", tRange.m_iFrom );
  314. }
  315. static void FormatDate ( const CSphString & sVal, int iNow, CSphString & sRes )
  316. {
  317. time_t tSrcDate;
  318. Verify ( ParseDateMath ( sVal, iNow, tSrcDate ) );
  319. FormatDate ( tSrcDate, sRes );
  320. }
  321. static const char * g_sCompatDateFormat = "%Y-%m-%dT%H:%M:%S"; // YYYY-mm-dd'T'HH:mm:ss.SSS'Z'
  322. static void FormatDate ( time_t tDate, char * sBuf, int iSize )
  323. {
  324. std::tm tDstDate;
  325. gmtime_r ( &tDate, &tDstDate );
  326. [[maybe_unused]] auto tResult = strftime ( sBuf, iSize, g_sCompatDateFormat, &tDstDate );
  327. assert ( tResult>0 );
  328. }
  329. void FormatDate ( time_t tDate, CSphString & sRes )
  330. {
  331. char sBuf[128];
  332. FormatDate ( tDate, sBuf, sizeof(sBuf)-1 );
  333. sRes = sBuf;
  334. }
  335. void FormatDate ( time_t tDate, StringBuilder_c & sRes )
  336. {
  337. char sBuf[128];
  338. FormatDate ( tDate, sBuf, sizeof(sBuf)-1 );
  339. sRes.Appendf ( "%s", sBuf );
  340. }
  341. static void FormatKeyDate ( const DateRangeSetting_t & tRange, int iNow, RangeKeyDesc_t & tDesc )
  342. {
  343. CSphString sFrom;
  344. CSphString sTo;
  345. const bool bHasFrom = !tRange.m_sFrom.IsEmpty();
  346. const bool bHasTo = !tRange.m_sTo.IsEmpty();
  347. assert ( bHasFrom || bHasTo );
  348. assert ( iNow>0 );
  349. if ( bHasFrom )
  350. {
  351. FormatDate ( tRange.m_sFrom, iNow, sFrom );
  352. tDesc.m_sFrom.SetSprintf ( "%s", sFrom.cstr() );
  353. }
  354. if ( bHasTo )
  355. {
  356. FormatDate ( tRange.m_sTo, iNow, sTo );
  357. tDesc.m_sTo.SetSprintf ( "%s", sTo.cstr() );
  358. }
  359. if ( bHasFrom && bHasTo )
  360. tDesc.m_sKey.SetSprintf ( "%s-%s", sFrom.cstr(), sTo.cstr() );
  361. else if ( bHasTo )
  362. tDesc.m_sKey.SetSprintf ( "*-%s", sTo.cstr() );
  363. else
  364. tDesc.m_sKey.SetSprintf ( "%s-*", sFrom.cstr() );
  365. }
  366. void GetRangeKeyNames ( const AggrRangeSetting_t & tRanges, RangeNameHash_t & hRangeNames )
  367. {
  368. if ( tRanges.GetLength()==1 && tRanges.m_bOpenLeft && tRanges.m_bOpenRight )
  369. {
  370. auto & tDesc = hRangeNames.AddUnique ( 0 );
  371. tDesc.m_sKey = "*-*";
  372. return;
  373. }
  374. ARRAY_FOREACH ( i, tRanges )
  375. {
  376. const auto & tSrc = tRanges[i];
  377. auto & tDesc = hRangeNames.AddUnique ( i );
  378. bool bHasFrom = true;
  379. bool bHasTo = true;
  380. if ( i==0 && tRanges.m_bOpenLeft )
  381. bHasFrom = false;
  382. else if ( i==tRanges.GetLength()-1 && tRanges.m_bOpenRight )
  383. bHasTo = false;
  384. if ( tRanges.m_bFloat )
  385. FormatKeyFloat ( tSrc, bHasFrom, bHasTo, tDesc );
  386. else
  387. FormatKeyInt ( tSrc, bHasFrom, bHasTo, tDesc );
  388. }
  389. }
  390. void GetRangeKeyNames ( const AggrDateRangeSetting_t & tRanges, int iNow, RangeNameHash_t & hRangeNames )
  391. {
  392. ARRAY_FOREACH ( i, tRanges )
  393. {
  394. const auto & tSrc = tRanges[i];
  395. auto & tDesc = hRangeNames.AddUnique ( i );
  396. FormatKeyDate ( tSrc, iNow, tDesc );
  397. }
  398. }
  399. static CSphString DumpAggrHist ( const CSphString & sCol, const AggrHistSetting_t & tHist )
  400. {
  401. StringBuilder_c sRes;
  402. sRes.Appendf ( "histogram(%s, {", sCol.cstr() );
  403. if ( tHist.m_bFloat )
  404. sRes.Appendf ( "hist_interval=%f, hist_offset=%f", std::get<float>( tHist.m_tInterval ), std::get<float> ( tHist.m_tOffset ) );
  405. else
  406. sRes.Appendf ( "hist_interval=" INT64_FMT ", hist_offset=" INT64_FMT, std::get<int64_t>( tHist.m_tInterval ), std::get<int64_t>( tHist.m_tOffset ) );
  407. sRes += "})";
  408. return CSphString ( sRes );
  409. }
  410. static CSphString DumpAggrHist ( const CSphString & sCol, const AggrDateHistSetting_t & tHist )
  411. {
  412. StringBuilder_c sRes;
  413. sRes.Appendf ( "date_histogram(%s, {", sCol.cstr() );
  414. if ( tHist.m_bFixed )
  415. sRes.Appendf ( "fixed_interval='%s'", tHist.m_sInterval.cstr() );
  416. else
  417. sRes.Appendf ( "calendar_interval='%s'", tHist.m_sInterval.cstr() );
  418. sRes += "})";
  419. return CSphString ( sRes );
  420. }
  421. CSphString DumpAggr ( const CSphString & sCol, const AggrSettings_t & tAggr )
  422. {
  423. switch ( tAggr.m_eAggrFunc )
  424. {
  425. case Aggr_e::RANGE: return DumpAggrRange ( sCol, tAggr.m_tRange );
  426. case Aggr_e::DATE_RANGE: return DumpAggrRange ( sCol, tAggr.m_tDateRange );
  427. case Aggr_e::HISTOGRAM: return DumpAggrHist ( sCol, tAggr.m_tHist );
  428. case Aggr_e::DATE_HISTOGRAM: return DumpAggrHist ( sCol, tAggr.m_tDateHist );
  429. default: return sCol;
  430. }
  431. }
  432. static void ConvertIntoFloat ( AggrBound_t & tVal )
  433. {
  434. if ( std::holds_alternative<int64_t> ( tVal ) )
  435. tVal = (float)std::get<int64_t> ( tVal );
  436. }
  437. void FixFloat ( AggrHistSetting_t & tHist )
  438. {
  439. if ( tHist.m_tInterval.index()!=tHist.m_tOffset.index() )
  440. {
  441. ConvertIntoFloat ( tHist.m_tInterval );
  442. ConvertIntoFloat ( tHist.m_tOffset );
  443. tHist.m_bFloat = true;
  444. } else
  445. {
  446. tHist.m_bFloat = std::holds_alternative<float> ( tHist.m_tInterval );
  447. }
  448. }
  449. void PromoteHistogramToFloat ( AggrHistSetting_t & tHist )
  450. {
  451. ConvertIntoFloat ( tHist.m_tInterval );
  452. ConvertIntoFloat ( tHist.m_tOffset );
  453. tHist.m_bFloat = true;
  454. }
  455. static void SetValue ( const CSphNamedVariant & tPair, AggrBound_t & tVal )
  456. {
  457. switch ( tPair.m_eType )
  458. {
  459. case VariantType_e::BIGINT: tVal = tPair.m_iValue; return;
  460. case VariantType_e::FLOAT: tVal = tPair.m_fValue; return;
  461. default: assert ( 0 && "internal error: unhandled aggregate value" ); return;
  462. }
  463. }
  464. bool ParseAggrHistogram ( const VecTraits_T < CSphNamedVariant > & dVariants, AggrHistSetting_t & tHist, CSphString & sError )
  465. {
  466. for ( const auto & tPair : dVariants )
  467. {
  468. if ( tPair.m_eType!=VariantType_e::BIGINT && tPair.m_eType!=VariantType_e::FLOAT )
  469. {
  470. sError.SetSprintf ( "invalid value '%s' type %d", tPair.m_sKey.cstr(), (int)tPair.m_eType );
  471. return false;
  472. }
  473. if ( tPair.m_sKey=="hist_interval" )
  474. SetValue ( tPair, tHist.m_tInterval );
  475. else if ( tPair.m_sKey=="hist_offset" )
  476. SetValue ( tPair, tHist.m_tOffset );
  477. else
  478. {
  479. sError.SetSprintf ( "unknow value '%s'", tPair.m_sKey.cstr() );
  480. return false;
  481. }
  482. }
  483. FixFloat ( tHist );
  484. return true;
  485. }
  486. bool ParseAggrDateHistogram ( const VecTraits_T < CSphNamedVariant > & dVariants, AggrDateHistSetting_t & tHist, CSphString & sError )
  487. {
  488. for ( const auto & tPair : dVariants )
  489. {
  490. if ( tPair.m_eType!=VariantType_e::STRING )
  491. {
  492. sError.SetSprintf ( "invalid value '%s' type %d", tPair.m_sKey.cstr(), (int)tPair.m_eType );
  493. return false;
  494. }
  495. if ( tPair.m_sKey=="calendar_interval" || tPair.m_sKey=="fixed_interval" )
  496. {
  497. tHist.m_sInterval = tPair.m_sValue;
  498. tHist.m_bFixed = ( tPair.m_sKey=="fixed_interval" );
  499. } else
  500. {
  501. sError.SetSprintf ( "unknow value '%s'", tPair.m_sKey.cstr() );
  502. return false;
  503. }
  504. }
  505. if ( tHist.m_sInterval.IsEmpty() )
  506. {
  507. sError.SetSprintf ( "calendar_interval missed" );
  508. return false;
  509. }
  510. DateUnit_e eUnit = ParseDateInterval ( tHist.m_sInterval, tHist.m_bFixed, sError ).first;
  511. if ( eUnit==DateUnit_e::total_units )
  512. return false;
  513. return true;
  514. }
  515. // the aggr histogram implementation
  516. template < bool FLOAT >
  517. class AggrHistExpr_T : public Expr_ArgVsSet_T<int>
  518. {
  519. AggrHistSetting_t m_tHist;
  520. public:
  521. AggrHistExpr_T ( ISphExpr * pAttr, const AggrHistSetting_t & tHist )
  522. : Expr_ArgVsSet_T ( pAttr )
  523. , m_tHist ( tHist )
  524. {
  525. assert ( ( m_tHist.m_bFloat && std::get<float> ( m_tHist.m_tInterval )>0.0f ) || ( !m_tHist.m_bFloat && std::get<int64_t> ( m_tHist.m_tInterval )>0 ) );
  526. }
  527. int IntEval ( const CSphMatch & tMatch ) const final
  528. {
  529. int iBucket = GetBucket ( tMatch );
  530. return iBucket;
  531. }
  532. protected:
  533. int GetBucket ( const CSphMatch & tMatch ) const
  534. {
  535. if_const ( FLOAT )
  536. {
  537. float fInterval = std::get<float> ( m_tHist.m_tInterval );
  538. float fOffset = std::get<float> ( m_tHist.m_tOffset );
  539. float fVal = m_pArg->Eval ( tMatch );
  540. float fBucketNrm = floor ( ( fVal - fOffset ) / fInterval );
  541. int iBucket = fBucketNrm * fInterval + fOffset;
  542. return iBucket;
  543. } else
  544. {
  545. int64_t iInterval = std::get<int64_t> ( m_tHist.m_tInterval );
  546. int64_t iOffset = std::get<int64_t> ( m_tHist.m_tOffset );
  547. int64_t iVal = m_pArg->Int64Eval ( tMatch );
  548. double fBucketNrm = floor ( double ( iVal - iOffset ) / iInterval );
  549. int iBucket = fBucketNrm * iInterval + iOffset;
  550. return iBucket;
  551. }
  552. }
  553. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final
  554. {
  555. EXPR_CLASS_NAME("AggrHistExpr_T");
  556. CALC_POD_HASH ( m_tHist );
  557. return CALC_DEP_HASHES();
  558. }
  559. ISphExpr * Clone() const final
  560. {
  561. return new AggrHistExpr_T ( *this );
  562. }
  563. private:
  564. AggrHistExpr_T ( const AggrHistExpr_T & rhs )
  565. : Expr_ArgVsSet_T ( rhs )
  566. , m_tHist ( rhs.m_tHist )
  567. {
  568. }
  569. };
  570. ISphExpr * CreateExprHistogram ( ISphExpr * pAttr, const AggrHistSetting_t & tHist )
  571. {
  572. if ( tHist.m_bFloat )
  573. return new AggrHistExpr_T<true> ( pAttr, tHist );
  574. else
  575. return new AggrHistExpr_T<false> ( pAttr, tHist );
  576. }
  577. // the aggr date histogram implementation
  578. template < bool FIXED_INTERVAL >
  579. class AggrDateHistExpr_T : public Expr_ArgVsSet_T<int>
  580. {
  581. AggrDateHistSetting_t m_tHist;
  582. DateUnit_e m_eUnit = DateUnit_e::total_units;
  583. int m_iMulti = 1;
  584. public:
  585. AggrDateHistExpr_T ( ISphExpr * pAttr, const AggrDateHistSetting_t & tHist )
  586. : Expr_ArgVsSet_T ( pAttr )
  587. , m_tHist ( tHist )
  588. {
  589. CSphString sError;
  590. auto[eUnit, iMulti] = ParseDateInterval ( tHist.m_sInterval, tHist.m_bFixed, sError );
  591. assert ( eUnit!=DateUnit_e::total_units );
  592. m_eUnit = eUnit;
  593. m_iMulti = iMulti;
  594. }
  595. int IntEval ( const CSphMatch & tMatch ) const final
  596. {
  597. int iBucket = GetBucket ( tMatch );
  598. return iBucket;
  599. }
  600. protected:
  601. int GetBucket ( const CSphMatch & tMatch ) const
  602. {
  603. time_t iVal = m_pArg->IntEval ( tMatch );
  604. if_const ( FIXED_INTERVAL )
  605. RoundDate ( m_eUnit, m_iMulti, iVal );
  606. else
  607. RoundDate ( m_eUnit, iVal );
  608. return iVal;
  609. }
  610. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final
  611. {
  612. EXPR_CLASS_NAME("AggrDateHistExpr_T");
  613. CALC_POD_HASH ( m_tHist );
  614. CALC_POD_HASH ( m_eUnit );
  615. CALC_POD_HASH ( m_iMulti );
  616. return CALC_DEP_HASHES();
  617. }
  618. ISphExpr * Clone() const final
  619. {
  620. return new AggrDateHistExpr_T ( *this );
  621. }
  622. private:
  623. AggrDateHistExpr_T ( const AggrDateHistExpr_T & rhs )
  624. : Expr_ArgVsSet_T ( rhs )
  625. , m_tHist ( rhs.m_tHist )
  626. , m_eUnit ( rhs.m_eUnit )
  627. , m_iMulti ( rhs.m_iMulti )
  628. {
  629. }
  630. };
  631. ISphExpr * CreateExprDateHistogram ( ISphExpr * pAttr, const AggrDateHistSetting_t & tHist )
  632. {
  633. if ( tHist.m_bFixed )
  634. return new AggrDateHistExpr_T<true> ( pAttr, tHist );
  635. else
  636. return new AggrDateHistExpr_T<false> ( pAttr, tHist );
  637. }