killlist.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. //
  2. // Copyright (c) 2018-2026, Manticore Software LTD (https://manticoresearch.com)
  3. // All rights reserved
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License. You should have
  7. // received a copy of the GPL license along with this program; if you
  8. // did not, you can find it at http://www.gnu.org/
  9. //
  10. #ifndef _killlist_
  11. #define _killlist_
  12. #include "fileutils.h"
  13. #include "sphinxdefs.h"
  14. class CSphReader;
  15. class CSphWriter;
  16. class DeadRowMap_c
  17. {
  18. public:
  19. virtual ~DeadRowMap_c(){}
  20. bool HasDead() const;
  21. DWORD GetNumDeads() const;
  22. virtual int64_t GetLengthBytes() const = 0;
  23. virtual uint64_t GetCoreSize () const = 0;
  24. protected:
  25. bool m_bHaveDead {false};
  26. mutable int64_t m_iNumDeads = -1; // means 'not initialized'
  27. DWORD m_uRows {0};
  28. void CheckForDead ( const DWORD * pData, const DWORD * pDataEnd );
  29. bool Set ( RowID_t tRowID, DWORD * pData );
  30. inline bool IsSet ( RowID_t tRowID, const DWORD * pData ) const
  31. {
  32. if ( !m_bHaveDead || tRowID==INVALID_ROWID )
  33. return false;
  34. assert ( tRowID < m_uRows );
  35. return ( pData [ tRowID>>5U ] & ( 1UL<<( tRowID&31U ) ) )!=0;
  36. }
  37. private:
  38. virtual DWORD CountDeads () const = 0; // heavy doc-by-doc counting
  39. #if !(_WIN32) && !(HAVE_SYNC_FETCH)
  40. CSphMutex m_tLock;
  41. #endif
  42. };
  43. class DeadRowMap_Disk_c : public DeadRowMap_c
  44. {
  45. friend class DeadRowMap_Ram_c;
  46. public:
  47. ~DeadRowMap_Disk_c() override;
  48. bool Set ( RowID_t tRowID );
  49. inline bool IsSet ( RowID_t tRowID ) const
  50. {
  51. return DeadRowMap_c::IsSet ( tRowID, m_tData.GetReadPtr() );
  52. }
  53. int64_t GetLengthBytes() const override;
  54. uint64_t GetCoreSize () const override;
  55. bool Flush ( bool bWaitComplete, CSphString & sError ) const;
  56. bool Prealloc ( DWORD uRows, const CSphString & sFilename, CSphString & sError );
  57. void Dealloc();
  58. void Preread ( const char * sIndexName, const char * sFor, bool bMlock );
  59. private:
  60. DWORD CountDeads () const final;
  61. CSphMappedBuffer<DWORD> m_tData;
  62. };
  63. class DeadRowMap_Ram_c : public DeadRowMap_c
  64. {
  65. public:
  66. explicit DeadRowMap_Ram_c ( DWORD uRows );
  67. bool Set ( RowID_t tRowID );
  68. bool IsSet ( RowID_t tRowID ) const;
  69. void Reset ( DWORD uRows );
  70. int64_t GetLengthBytes() const override;
  71. uint64_t GetCoreSize () const override;
  72. DWORD GetNumAlive() const;
  73. void Load ( DWORD uRows, CSphReader & tReader, CSphString & sError );
  74. void Save ( CSphWriter & tWriter ) const;
  75. private:
  76. DWORD CountDeads () const final;
  77. CSphFixedVector<DWORD> m_dData {0};
  78. };
  79. class DocidListReader_c
  80. {
  81. public:
  82. explicit DocidListReader_c ( const VecTraits_T<DocID_t> & dKlist )
  83. : m_pIterator ( dKlist.Begin() )
  84. , m_pMaxIterator ( dKlist.Begin() + dKlist.GetLength() ) // should be this way till VecTraits.End got fixed
  85. {}
  86. static inline bool Read ( DocID_t & tDocID, RowID_t & tRowID )
  87. {
  88. return false;
  89. }
  90. inline bool ReadDocID ( DocID_t & tDocID )
  91. {
  92. if ( m_pIterator>=m_pMaxIterator )
  93. return false;
  94. tDocID = *m_pIterator++;
  95. return true;
  96. }
  97. static inline void HintDocID ( DocID_t ) {}
  98. private:
  99. const DocID_t * m_pIterator {nullptr};
  100. const DocID_t * m_pMaxIterator {nullptr};
  101. };
  102. template<typename READER1, typename READER2, typename FUNCTOR>
  103. void Intersect ( READER1& tReader1, READER2& tReader2, FUNCTOR&& fnFunctor )
  104. {
  105. RowID_t tRowID1 = INVALID_ROWID;
  106. DocID_t tDocID1 = 0, tDocID2 = 0;
  107. bool bHaveDocs1 = tReader1.Read ( tDocID1, tRowID1 );
  108. bool bHaveDocs2 = tReader2.ReadDocID ( tDocID2 );
  109. while ( bHaveDocs1 && bHaveDocs2 )
  110. {
  111. if ( (uint64_t)tDocID1 < (uint64_t)tDocID2 )
  112. {
  113. tReader1.HintDocID ( tDocID2 );
  114. bHaveDocs1 = tReader1.Read ( tDocID1, tRowID1 );
  115. } else if ( (uint64_t)tDocID1 > (uint64_t)tDocID2 )
  116. {
  117. tReader2.HintDocID ( tDocID1 );
  118. bHaveDocs2 = tReader2.ReadDocID ( tDocID2 );
  119. } else
  120. {
  121. fnFunctor ( tRowID1, tDocID1, tReader2 );
  122. bHaveDocs1 = tReader1.Read ( tDocID1, tRowID1 );
  123. bHaveDocs2 = tReader2.ReadDocID ( tDocID2 );
  124. }
  125. }
  126. }
  127. template<typename TARGETREADER, typename KILLERREADER, typename FNACTION>
  128. int ProcessIntersected ( TARGETREADER& tReader1, KILLERREADER& tReader2, FNACTION fnAction )
  129. {
  130. int iProcessed = 0;
  131. Intersect ( tReader1, tReader2, [&iProcessed, fnAction = std::move ( fnAction )] ( RowID_t tRowID, DocID_t tDocID, KILLERREADER& ) {
  132. if ( fnAction ( tRowID, tDocID ) )
  133. ++iProcessed;
  134. } );
  135. return iProcessed;
  136. }
  137. template <typename TARGET, typename KILLER, typename MAP>
  138. int KillByLookup ( TARGET & tTargetReader, KILLER & tKillerReader, MAP & tDeadRowMap )
  139. {
  140. return ProcessIntersected ( tTargetReader, tKillerReader, [&tDeadRowMap] ( RowID_t tRowID, DocID_t ) { return tDeadRowMap.Set ( tRowID ); } );
  141. }
  142. bool WriteDeadRowMap ( const CSphString & sFilename, DWORD uTotalDocs, CSphString & sError );
  143. #endif // _killlist_