columnarrt.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. //
  2. // Copyright (c) 2021-2026, Manticore Software LTD (https://manticoresearch.com)
  3. // All rights reserved
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License. You should have
  7. // received a copy of the GPL license along with this program; if you
  8. // did not, you can find it at http://www.gnu.org/
  9. //
  10. #include "columnarrt.h"
  11. #include "fileio.h"
  12. #include "memio.h"
  13. #include "attribute.h"
  14. #include "schema/schema.h"
  15. #include "columnarmisc.h"
  16. template <typename T>
  17. static std::pair<T,T> GetLengthOffset ( const CSphVector<T> & dLengths, RowID_t tRowID )
  18. {
  19. T iLength = dLengths[tRowID];
  20. T iOffset = 0;
  21. if ( tRowID>0 )
  22. {
  23. iOffset = dLengths[tRowID-1];
  24. iLength -= iOffset;
  25. }
  26. return {iLength, iOffset};
  27. }
  28. template <typename T>
  29. static T GetLength ( const CSphVector<T> & dLengths, RowID_t tRowID )
  30. {
  31. T iLength = dLengths[tRowID];
  32. if ( tRowID>0 )
  33. iLength -= dLengths[tRowID-1];
  34. return iLength;
  35. }
  36. /////////////////////////////////////////////////////////////////////
  37. class ColumnarAttrRT_i
  38. {
  39. public:
  40. virtual ~ColumnarAttrRT_i() = default;
  41. virtual void AddDoc ( SphAttr_t tAttr ) = 0;
  42. virtual void AddDoc ( const BYTE * pData, int iLength ) = 0;
  43. virtual void AddDoc ( const int64_t * pData, int iLength ) = 0;
  44. virtual void Save ( MemoryWriter_c & tWriter ) const = 0;
  45. virtual void Save ( Writer_i & tWriter ) const = 0;
  46. virtual void Load ( MemoryReader_c & tReader ) = 0;
  47. virtual void Load ( CSphReader & tReader ) = 0;
  48. virtual int64_t AllocatedBytes() const = 0;
  49. virtual std::unique_ptr<columnar::Iterator_i> CreateIterator() const = 0;
  50. virtual common::AttrType_e GetType() const = 0;
  51. };
  52. class ColumnarAttrRT_c : public ColumnarAttrRT_i
  53. {
  54. public:
  55. ColumnarAttrRT_c ( ESphAttr eType ) : m_eType(eType) {}
  56. void AddDoc ( SphAttr_t tAttr ) override { assert ( 0 && "Unsupported type" ); }
  57. void AddDoc ( const BYTE * pData, int iLength ) override { assert ( 0 && "Unsupported type" ); }
  58. void AddDoc ( const int64_t * pData, int iLength ) override { assert ( 0 && "Unsupported type" ); }
  59. common::AttrType_e GetType() const override { return ToColumnarType ( m_eType, ROWITEM_BITS ); }
  60. protected:
  61. ESphAttr m_eType = SPH_ATTR_NONE;
  62. };
  63. class ColumnarIterator_RT_c : public columnar::Iterator_i
  64. {
  65. public:
  66. int64_t Get ( uint32_t tRowID ) override { assert ( 0 && "Unsupported function" ); return 0; }
  67. void Fetch ( const util::Span_T<uint32_t> & dRowIDs, util::Span_T<int64_t> & dValues ) override { assert ( 0 && "Unsupported function" ); }
  68. int Get ( uint32_t tRowID, const uint8_t * & pData ) override { assert ( 0 && "Unsupported function" ); return 0; }
  69. uint8_t * GetPacked ( uint32_t tRowID ) override { assert ( 0 && "Unsupported function" ); return 0; }
  70. int GetLength ( uint32_t tRowID ) override { assert ( 0 && "Unsupported function" ); return 0; }
  71. void AddDesc ( std::vector<common::IteratorDesc_t> & dDesc ) const override {}
  72. };
  73. /////////////////////////////////////////////////////////////////////
  74. template<typename T>
  75. class ColumnarIterator_Int_T : public ColumnarIterator_RT_c
  76. {
  77. public:
  78. ColumnarIterator_Int_T ( const CSphVector<T> & dValues ) : m_dValues ( dValues ) {}
  79. int64_t Get ( uint32_t tRowID ) override { return m_dValues[tRowID]; }
  80. void Fetch ( const util::Span_T<uint32_t> & dRowIDs, util::Span_T<int64_t> & dValues ) override;
  81. private:
  82. const CSphVector<T> & m_dValues;
  83. };
  84. template<typename T>
  85. void ColumnarIterator_Int_T<T>::Fetch ( const util::Span_T<uint32_t> & dRowIDs, util::Span_T<int64_t> & dValues )
  86. {
  87. uint32_t * pRowID = dRowIDs.begin();
  88. uint32_t * pRowIDEnd = dRowIDs.end();
  89. int64_t * pValue = dValues.begin();
  90. while ( pRowID<pRowIDEnd )
  91. *pValue++ = m_dValues[*pRowID++];
  92. }
  93. /////////////////////////////////////////////////////////////////////
  94. template<typename T>
  95. class ColumnarAttr_Int_T : public ColumnarAttrRT_c
  96. {
  97. public:
  98. ColumnarAttr_Int_T ( ESphAttr eType, int iBits );
  99. void AddDoc ( SphAttr_t tAttr ) override { m_dValues.Add ( ( (T)tAttr ) & m_uMask ); }
  100. void Save ( MemoryWriter_c & tWriter ) const override { SaveData(tWriter); }
  101. void Save ( Writer_i & tWriter ) const override { SaveData(tWriter); }
  102. void Load ( MemoryReader_c & tReader ) override { LoadData(tReader); }
  103. void Load ( CSphReader & tReader ) override { LoadData(tReader); }
  104. int64_t AllocatedBytes() const override { return m_dValues.GetLengthBytes64(); }
  105. std::unique_ptr<columnar::Iterator_i> CreateIterator() const override { return std::make_unique<ColumnarIterator_Int_T<T>> ( m_dValues ); }
  106. protected:
  107. CSphVector<T> m_dValues;
  108. private:
  109. T m_uMask = 0;
  110. template <typename WRITER>
  111. void SaveData ( WRITER & tWriter ) const;
  112. template <typename READER>
  113. void LoadData ( READER & tReader );
  114. };
  115. template<typename T>
  116. ColumnarAttr_Int_T<T>::ColumnarAttr_Int_T ( ESphAttr eType, int iBits )
  117. : ColumnarAttrRT_c(eType)
  118. , m_uMask ( iBits==64 ? (T)0xFFFFFFFFFFFFFFFFULL : (T)( (1ULL<<iBits)-1 ) )
  119. {}
  120. template<typename T>
  121. template <typename WRITER>
  122. void ColumnarAttr_Int_T<T>::SaveData ( WRITER & tWriter ) const
  123. {
  124. tWriter.PutDword ( m_eType );
  125. tWriter.PutOffset ( m_uMask );
  126. tWriter.PutDword ( m_dValues.GetLength() );
  127. tWriter.PutBytes ( m_dValues.Begin(), (int)m_dValues.GetLengthBytes64() );
  128. }
  129. template<typename T>
  130. template <typename READER>
  131. void ColumnarAttr_Int_T<T>::LoadData ( READER & tReader )
  132. {
  133. m_uMask = (T)tReader.GetOffset();
  134. m_dValues.Resize ( tReader.GetDword() );
  135. tReader.GetBytes ( m_dValues.Begin(), (int)m_dValues.GetLengthBytes64() );
  136. }
  137. /////////////////////////////////////////////////////////////////////
  138. class ColumnarAttr_Bool_c : public ColumnarAttr_Int_T<BYTE>
  139. {
  140. using BASE = ColumnarAttr_Int_T<BYTE>;
  141. public:
  142. ColumnarAttr_Bool_c() : BASE ( SPH_ATTR_BOOL, 1 ) {}
  143. void AddDoc ( SphAttr_t tAttr ) override { BASE::m_dValues.Add ( tAttr ? 1 : 0 ); }
  144. };
  145. /////////////////////////////////////////////////////////////////////
  146. class ColumnarIterator_String_c : public ColumnarIterator_RT_c
  147. {
  148. public:
  149. ColumnarIterator_String_c ( const CSphVector<BYTE> & dData, const CSphVector<int64_t> & dLengths ) : m_dData ( dData ), m_dLengths ( dLengths ) {}
  150. int Get ( uint32_t tRowID, const uint8_t * & pData ) override;
  151. uint8_t * GetPacked ( uint32_t tRowID ) override;
  152. int GetLength ( uint32_t tRowID ) override { return (int)::GetLength ( m_dLengths, tRowID ); }
  153. private:
  154. const CSphVector<BYTE> & m_dData;
  155. const CSphVector<int64_t> & m_dLengths;
  156. };
  157. int ColumnarIterator_String_c::Get ( uint32_t tRowID, const uint8_t * & pData )
  158. {
  159. int64_t iLength, iOffset;
  160. std::tie(iLength, iOffset) = GetLengthOffset ( m_dLengths, tRowID );
  161. pData = iLength>0 ? (const uint8_t*)&m_dData[iOffset] : nullptr;
  162. return (int)iLength;
  163. }
  164. uint8_t * ColumnarIterator_String_c::GetPacked ( uint32_t tRowID )
  165. {
  166. int64_t iLength, iOffset;
  167. std::tie(iLength, iOffset) = GetLengthOffset ( m_dLengths, tRowID );
  168. auto pStr = iLength>0 ? (const uint8_t*)&m_dData[iOffset] : nullptr;
  169. return sphPackPtrAttr ( { pStr, iLength } );
  170. }
  171. class ColumnarAttr_String_c : public ColumnarAttrRT_c
  172. {
  173. public:
  174. ColumnarAttr_String_c() : ColumnarAttrRT_c ( SPH_ATTR_STRING ) {}
  175. void AddDoc ( const BYTE * pData, int iLength ) override;
  176. void Save ( MemoryWriter_c & tWriter ) const override { SaveData(tWriter); }
  177. void Save ( Writer_i & tWriter ) const override { SaveData(tWriter); }
  178. void Load ( MemoryReader_c & tReader ) override { LoadData(tReader); }
  179. void Load ( CSphReader & tReader ) override { LoadData(tReader); }
  180. int64_t AllocatedBytes() const override { return m_dData.GetLengthBytes64() + m_dLengths.GetLengthBytes64(); }
  181. std::unique_ptr<columnar::Iterator_i> CreateIterator() const override { return std::make_unique<ColumnarIterator_String_c> ( m_dData, m_dLengths ); }
  182. private:
  183. CSphVector<BYTE> m_dData;
  184. CSphVector<int64_t> m_dLengths;
  185. int64_t m_iTotalLength = 0;
  186. template <typename WRITER>
  187. void SaveData ( WRITER & tWriter ) const;
  188. template <typename READER>
  189. void LoadData ( READER & tReader );
  190. };
  191. void ColumnarAttr_String_c::AddDoc ( const BYTE * pData, int iLength )
  192. {
  193. m_iTotalLength += iLength;
  194. m_dLengths.Add(m_iTotalLength);
  195. m_dData.Append ( pData, iLength );
  196. }
  197. template <typename WRITER>
  198. void ColumnarAttr_String_c::SaveData ( WRITER & tWriter ) const
  199. {
  200. tWriter.PutDword ( m_eType );
  201. tWriter.PutDword ( m_dLengths.GetLength() );
  202. tWriter.PutBytes ( m_dLengths.Begin(), (int)m_dLengths.GetLengthBytes64() );
  203. tWriter.PutDword ( m_dData.GetLength() );
  204. tWriter.PutBytes ( m_dData.Begin(), (int)m_dData.GetLengthBytes64() );
  205. }
  206. template <typename READER>
  207. void ColumnarAttr_String_c::LoadData ( READER & tReader )
  208. {
  209. m_dLengths.Resize ( tReader.GetDword() );
  210. tReader.GetBytes ( m_dLengths.Begin(), (int)m_dLengths.GetLengthBytes64() );
  211. m_dData.Resize ( tReader.GetDword() );
  212. tReader.GetBytes ( m_dData.Begin(), (int)m_dData.GetLengthBytes64() );
  213. }
  214. /////////////////////////////////////////////////////////////////////
  215. template <typename T>
  216. class ColumnarIterator_MVA_T : public ColumnarIterator_RT_c
  217. {
  218. public:
  219. ColumnarIterator_MVA_T ( const CSphVector<T> & dData, const CSphVector<int> & dLengths ) : m_dData ( dData ), m_dLengths ( dLengths ) {}
  220. int Get ( uint32_t tRowID, const uint8_t * & pData ) override;
  221. uint8_t * GetPacked ( uint32_t tRowID ) override;
  222. int GetLength ( uint32_t tRowID ) override { return ::GetLength ( m_dLengths, tRowID )*sizeof(T); }
  223. private:
  224. const CSphVector<T> & m_dData;
  225. const CSphVector<int> & m_dLengths;
  226. };
  227. template <typename T>
  228. int ColumnarIterator_MVA_T<T>::Get ( uint32_t tRowID, const uint8_t * & pData )
  229. {
  230. int iLength, iOffset;
  231. std::tie(iLength, iOffset) = GetLengthOffset ( m_dLengths, tRowID );
  232. iLength *= sizeof(T);
  233. pData = iLength > 0 ? (const uint8_t*)&m_dData[iOffset] : nullptr;
  234. return iLength;
  235. }
  236. template <typename T>
  237. uint8_t * ColumnarIterator_MVA_T<T>::GetPacked ( uint32_t tRowID )
  238. {
  239. int iLength, iOffset;
  240. std::tie(iLength, iOffset) = GetLengthOffset ( m_dLengths, tRowID );
  241. iLength *= sizeof(T);
  242. auto pMVA = iLength > 0 ? (const uint8_t*)&m_dData[iOffset] : nullptr;
  243. return sphPackPtrAttr ( { pMVA, iLength } );
  244. }
  245. template <typename T>
  246. class ColumnarAttr_MVA_T : public ColumnarAttrRT_c
  247. {
  248. public:
  249. ColumnarAttr_MVA_T ( ESphAttr eType ) : ColumnarAttrRT_c ( eType ) {}
  250. void AddDoc ( const int64_t * pData, int iLength ) override;
  251. void Save ( MemoryWriter_c & tWriter ) const override { SaveData(tWriter); }
  252. void Save ( Writer_i & tWriter ) const override { SaveData(tWriter); }
  253. int64_t AllocatedBytes() const override { return m_dData.GetLengthBytes64() + m_dLengths.GetLengthBytes64(); }
  254. void Load ( MemoryReader_c & tReader ) override { LoadData(tReader); }
  255. void Load ( CSphReader & tReader ) override { LoadData(tReader); }
  256. std::unique_ptr<columnar::Iterator_i> CreateIterator() const override { return std::make_unique<ColumnarIterator_MVA_T<T>> ( m_dData, m_dLengths ); }
  257. private:
  258. CSphVector<T> m_dData;
  259. CSphVector<int> m_dLengths;
  260. int64_t m_iTotalLength = 0;
  261. template <typename WRITER>
  262. void SaveData ( WRITER & tWriter ) const;
  263. template <typename READER>
  264. void LoadData ( READER & tReader );
  265. };
  266. template <typename T>
  267. void ColumnarAttr_MVA_T<T>::AddDoc ( const int64_t * pData, int iLength )
  268. {
  269. m_iTotalLength += iLength;
  270. m_dLengths.Add ( (int)m_iTotalLength );
  271. for ( int i = 0; i < iLength; i++ )
  272. m_dData.Add ( (T)pData[i] );
  273. }
  274. template <typename T>
  275. template <typename WRITER>
  276. void ColumnarAttr_MVA_T<T>::SaveData ( WRITER & tWriter ) const
  277. {
  278. tWriter.PutDword(m_eType);
  279. tWriter.PutDword ( m_dLengths.GetLength() );
  280. tWriter.PutBytes ( m_dLengths.Begin(), (int)m_dLengths.GetLengthBytes64() );
  281. tWriter.PutDword ( m_dData.GetLength() );
  282. tWriter.PutBytes ( m_dData.Begin(), (int)m_dData.GetLengthBytes64() );
  283. }
  284. template <typename T>
  285. template <typename READER>
  286. void ColumnarAttr_MVA_T<T>::LoadData ( READER & tReader )
  287. {
  288. m_dLengths.Resize ( tReader.GetDword() );
  289. tReader.GetBytes ( m_dLengths.Begin(), (int)m_dLengths.GetLengthBytes64() );
  290. m_dData.Resize ( tReader.GetDword() );
  291. tReader.GetBytes ( m_dData.Begin(), (int)m_dData.GetLengthBytes64() );
  292. }
  293. ////////////////////////////////////////////////////////////////////
  294. static std::unique_ptr<ColumnarAttrRT_i> CreateColumnarAttrRT ( ESphAttr eType, int iBits )
  295. {
  296. switch ( eType )
  297. {
  298. case SPH_ATTR_INTEGER:
  299. case SPH_ATTR_TIMESTAMP:
  300. case SPH_ATTR_FLOAT:
  301. case SPH_ATTR_TOKENCOUNT:
  302. return std::make_unique<ColumnarAttr_Int_T<DWORD>> ( eType, iBits );
  303. case SPH_ATTR_BOOL: return std::make_unique<ColumnarAttr_Bool_c>();
  304. case SPH_ATTR_BIGINT: return std::make_unique<ColumnarAttr_Int_T<int64_t>> ( eType, iBits );
  305. case SPH_ATTR_STRING: return std::make_unique<ColumnarAttr_String_c>();
  306. case SPH_ATTR_UINT32SET: return std::make_unique<ColumnarAttr_MVA_T<DWORD>>(eType);
  307. case SPH_ATTR_INT64SET: return std::make_unique<ColumnarAttr_MVA_T<int64_t>>(eType);
  308. case SPH_ATTR_FLOAT_VECTOR: return std::make_unique<ColumnarAttr_MVA_T<uint32_t>>(eType);
  309. default:
  310. assert ( 0 && "Unsupported type" );
  311. return nullptr;
  312. }
  313. }
  314. /////////////////////////////////////////////////////////////////////
  315. class ColumnarBuilderRT_c : public ColumnarBuilderRT_i
  316. {
  317. public:
  318. explicit ColumnarBuilderRT_c ( const CSphSchema & tSchema );
  319. explicit ColumnarBuilderRT_c ( MemoryReader_c & tReader ) { Load(tReader); }
  320. void SetAttr ( int iAttr, int64_t tAttr ) override { m_dAttrs[iAttr]->AddDoc(tAttr); }
  321. void SetAttr ( int iAttr, const uint8_t * pData, int iLength ) override { m_dAttrs[iAttr]->AddDoc ( pData, iLength ); }
  322. void SetAttr ( int iAttr, const int64_t * pData, int iLength ) override { m_dAttrs[iAttr]->AddDoc ( pData, iLength ); }
  323. bool Done ( std::string & sError ) override { return true; }
  324. void Save ( MemoryWriter_c & tWriter ) override;
  325. CSphVector<std::unique_ptr<ColumnarAttrRT_i>> & GetAttrs() override { return m_dAttrs; }
  326. const CSphVector<std::unique_ptr<ColumnarAttrRT_i>>& GetAttrs() const override { return m_dAttrs; }
  327. void Load ( MemoryReader_c & tReader );
  328. private:
  329. CSphVector<std::unique_ptr<ColumnarAttrRT_i>> m_dAttrs;
  330. };
  331. ColumnarBuilderRT_c::ColumnarBuilderRT_c ( const CSphSchema & tSchema )
  332. {
  333. for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
  334. {
  335. const auto & tAttr = tSchema.GetAttr(i);
  336. if ( tAttr.IsColumnar() )
  337. {
  338. m_dAttrs.Add ( CreateColumnarAttrRT ( tAttr.m_eAttrType, tAttr.m_tLocator.m_iBitCount ) );
  339. assert ( m_dAttrs.Last() );
  340. }
  341. }
  342. }
  343. void ColumnarBuilderRT_c::Save ( MemoryWriter_c & tWriter )
  344. {
  345. tWriter.PutDword ( m_dAttrs.GetLength() );
  346. m_dAttrs.for_each ( [&tWriter]( const auto& i ){ i->Save(tWriter); } );
  347. }
  348. void ColumnarBuilderRT_c::Load ( MemoryReader_c & tReader )
  349. {
  350. m_dAttrs.Resize ( tReader.GetDword() );
  351. m_dAttrs.for_each ( [&tReader]( auto & pAttr ){ pAttr = CreateColumnarAttrRT ( (ESphAttr)tReader.GetDword(), 0 ); pAttr->Load(tReader); } );
  352. }
  353. /////////////////////////////////////////////////////////////////////
  354. class ColumnarRT_c : public ColumnarRT_i
  355. {
  356. public:
  357. explicit ColumnarRT_c ( const CSphVector<std::unique_ptr<ColumnarAttrRT_i>> & dAttrs );
  358. columnar::Iterator_i * CreateIterator ( const std::string & sName, const columnar::IteratorHints_t & tHints, columnar::IteratorCapabilities_t * pCapabilities, std::string & sError ) const override;
  359. std::vector<common::BlockIterator_i *> CreateAnalyzerOrPrefilter ( const std::vector<common::Filter_t> & dFilters, std::vector<int> & dDeletedFilters, const columnar::BlockTester_i & tBlockTester ) const override { return {}; }
  360. int64_t EstimateMinMax ( const common::Filter_t & tFilter, const columnar::BlockTester_i & tBlockTester ) const final { return -1; }
  361. bool GetAttrInfo ( const std::string & sName, columnar::AttrInfo_t & tInfo ) const override;
  362. bool EarlyReject ( const std::vector<common::Filter_t> & dFilters, const columnar::BlockTester_i & tBlockTester ) const override { return false; }
  363. bool IsFilterDegenerate ( const common::Filter_t & tFilter ) const override { return false; }
  364. void Save ( Writer_i & tWriter ) override;
  365. int64_t AllocatedBytes() const override;
  366. protected:
  367. const CSphVector<std::unique_ptr<ColumnarAttrRT_i>>& m_dAttrs;
  368. void PopulateHashFromSchema ( const CSphSchema& tSchema );
  369. private:
  370. SmallStringHash_T<std::pair<ColumnarAttrRT_i*,int>> m_hAttrs;
  371. };
  372. ColumnarRT_c::ColumnarRT_c ( const CSphVector<std::unique_ptr<ColumnarAttrRT_i>>& dAttrs )
  373. : m_dAttrs { dAttrs }
  374. {}
  375. class LightColumnarRT_c : public ColumnarRT_c
  376. {
  377. public:
  378. LightColumnarRT_c ( const CSphSchema& tSchema, const ColumnarBuilderRT_i* pBuilder )
  379. : ColumnarRT_c ( pBuilder->GetAttrs() )
  380. {
  381. PopulateHashFromSchema ( tSchema );
  382. }
  383. };
  384. class FullColumnarRT_c: public ColumnarRT_c
  385. {
  386. public:
  387. FullColumnarRT_c ( const CSphSchema& tSchema, ColumnarBuilderRT_i* pBuilder )
  388. : ColumnarRT_c ( m_dOwnedAttrs )
  389. , m_dOwnedAttrs { std::move (pBuilder->GetAttrs())}
  390. {
  391. PopulateHashFromSchema ( tSchema );
  392. }
  393. FullColumnarRT_c ( const CSphSchema& tSchema, CSphReader& tReader )
  394. : ColumnarRT_c ( m_dOwnedAttrs )
  395. {
  396. m_dOwnedAttrs.Resize ( tReader.GetDword() );
  397. m_dOwnedAttrs.for_each ( [&tReader] ( auto& pAttr )
  398. {
  399. pAttr = CreateColumnarAttrRT ( (ESphAttr)tReader.GetDword(), 0 );
  400. pAttr->Load(tReader);
  401. } );
  402. PopulateHashFromSchema ( tSchema );
  403. }
  404. private:
  405. CSphVector<std::unique_ptr<ColumnarAttrRT_i>> m_dOwnedAttrs;
  406. };
  407. columnar::Iterator_i * ColumnarRT_c::CreateIterator ( const std::string & sName, const columnar::IteratorHints_t & tHints, columnar::IteratorCapabilities_t * pCapabilities, std::string & sError ) const
  408. {
  409. auto * pFound = m_hAttrs ( sName.c_str() );
  410. if ( !pFound )
  411. return nullptr;
  412. return pFound->first->CreateIterator().release();
  413. }
  414. bool ColumnarRT_c::GetAttrInfo ( const std::string & sName, columnar::AttrInfo_t & tInfo ) const
  415. {
  416. auto * pFound = m_hAttrs ( sName.c_str() );
  417. if ( !pFound )
  418. return false;
  419. tInfo.m_iId = pFound->second;
  420. tInfo.m_eType = pFound->first->GetType();
  421. return true;
  422. }
  423. void ColumnarRT_c::Save ( Writer_i & tWriter )
  424. {
  425. tWriter.PutDword ( m_dAttrs.GetLength() );
  426. m_dAttrs.for_each ( [&tWriter]( const auto& pAttr ){ pAttr->Save(tWriter); } );
  427. }
  428. int64_t ColumnarRT_c::AllocatedBytes() const
  429. {
  430. int64_t iTotal = 0;
  431. for ( const auto & i : m_dAttrs )
  432. iTotal += i->AllocatedBytes();
  433. return iTotal;
  434. }
  435. void ColumnarRT_c::PopulateHashFromSchema ( const CSphSchema & tSchema )
  436. {
  437. int iColumnar = 0;
  438. for ( int i = 0; i < tSchema.GetAttrsCount(); ++i )
  439. {
  440. const auto & tAttr = tSchema.GetAttr(i);
  441. if ( !tAttr.IsColumnar() )
  442. continue;
  443. m_hAttrs.Add ( { m_dAttrs[iColumnar].get(), iColumnar }, tAttr.m_sName );
  444. ++iColumnar;
  445. }
  446. assert ( m_hAttrs.GetLength() == m_dAttrs.GetLength() );
  447. }
  448. /////////////////////////////////////////////////////////////////////
  449. std::unique_ptr<ColumnarBuilderRT_i> CreateColumnarBuilderRT ( MemoryReader_c & tReader )
  450. {
  451. return std::make_unique<ColumnarBuilderRT_c> ( tReader );
  452. }
  453. std::unique_ptr<ColumnarBuilderRT_i> CreateColumnarBuilderRT ( const CSphSchema & tSchema )
  454. {
  455. if ( !tSchema.HasColumnarAttrs() )
  456. return nullptr;
  457. return std::make_unique<ColumnarBuilderRT_c> ( tSchema );
  458. }
  459. // columnar reader will NOT take ownership of attributes in columnar builder
  460. std::unique_ptr<ColumnarRT_i> CreateLightColumnarRT ( const CSphSchema& tSchema, const ColumnarBuilderRT_i* pBuilder )
  461. {
  462. if ( !pBuilder )
  463. return nullptr;
  464. return std::make_unique<LightColumnarRT_c> ( tSchema, pBuilder );
  465. }
  466. // columnar reader will take ownership of attributes in columnar builder
  467. std::unique_ptr<ColumnarRT_i> CreateColumnarRT ( const CSphSchema& tSchema, ColumnarBuilderRT_i* pBuilder )
  468. {
  469. if ( !pBuilder )
  470. return nullptr;
  471. return std::make_unique<FullColumnarRT_c> ( tSchema, pBuilder );
  472. }
  473. std::unique_ptr<ColumnarRT_i> CreateColumnarRT ( const CSphSchema & tSchema, CSphReader & tReader, CSphString & sError )
  474. {
  475. auto pColumnar = std::make_unique<FullColumnarRT_c>( tSchema, tReader );
  476. if ( tReader.GetErrorFlag() )
  477. {
  478. sError.SetSprintf ( "error loading columnar attribute storage: %s", tReader.GetErrorMessage().cstr() );
  479. return nullptr;
  480. }
  481. return pColumnar;
  482. }
  483. void RemoveColumnarDuplicates ( std::unique_ptr<ColumnarBuilderRT_i> & pBuilder, const CSphFixedVector<RowID_t> & dRowMap, const CSphSchema & tSchema )
  484. {
  485. if ( !pBuilder )
  486. return;
  487. if ( !dRowMap.any_of ( []( RowID_t tRowID ){ return tRowID==INVALID_ROWID; } ) )
  488. return;
  489. std::unique_ptr<ColumnarBuilderRT_i> pNewBuilder = CreateColumnarBuilderRT(tSchema);
  490. {
  491. std::unique_ptr<ColumnarRT_i> pColumnar = CreateLightColumnarRT ( tSchema, pBuilder.get() );
  492. CSphVector<ScopedTypedIterator_t> dIterators = CreateAllColumnarIterators ( pColumnar.get(), tSchema );
  493. CSphVector<int64_t> dTmpMVA;
  494. for ( RowID_t tSrcRowID = 0; tSrcRowID < dRowMap.GetULength(); tSrcRowID++ )
  495. {
  496. if ( dRowMap[tSrcRowID]==INVALID_ROWID )
  497. continue;
  498. ARRAY_FOREACH ( iAttr, dIterators )
  499. SetColumnarAttr ( iAttr, dIterators[iAttr].second, pNewBuilder.get(), dIterators[iAttr].first, tSrcRowID, dTmpMVA );
  500. }
  501. }
  502. pBuilder = std::move(pNewBuilder);
  503. }