Browse Source

added columnar storage integrity checks

Ilya Kuznetsov 4 years ago
parent
commit
72a6215a41
6 changed files with 76 additions and 26 deletions
  1. 1 1
      CMakeLists.txt
  2. 1 1
      columnar_src.txt
  3. 17 4
      src/columnarlib.cpp
  4. 1 0
      src/columnarlib.h
  5. 39 16
      src/indexcheck.cpp
  6. 17 4
      src/indextool.cpp

+ 1 - 1
CMakeLists.txt

@@ -150,7 +150,7 @@ bannervar (USE_SYSLOG)
 
 # options for clang/gcc c and c++
 target_compile_options ( lextra INTERFACE $<${ONLYGNUCLANGC_CXX}:-D_FILE_OFFSET_BITS=64 -Wall -fno-strict-aliasing> )
-target_compile_options ( lextra INTERFACE $<${CLANG_CL}:/clang: -fno-strict-aliasing -Wno-format -Wno-uninitialized-const-reference -Wno-unneeded-internal-declaration
+target_compile_options ( lextra INTERFACE $<${CLANG_CL}:-fno-strict-aliasing -Wno-format -Wno-uninitialized-const-reference -Wno-unneeded-internal-declaration
 	-Wno-deprecated-declarations -Wno-implicit-const-int-float-conversion -Wno-missing-braces -Wno-unused-function> )
 
 # disable rtti

+ 1 - 1
columnar_src.txt

@@ -1 +1 @@
-GIT_REPOSITORY https://github.com/manticoresoftware/columnar.git GIT_TAG e8509d93
+GIT_REPOSITORY https://github.com/manticoresoftware/columnar.git GIT_TAG 7a2beb09

+ 17 - 4
src/columnarlib.cpp

@@ -14,13 +14,13 @@
 
 using CreateStorageReader_fn =	columnar::Columnar_i * (*) ( const std::string & sFilename, uint32_t uTotalDocs, std::string & sError );
 using CreateBuilder_fn =		columnar::Builder_i * (*) ( const columnar::Settings_t & tSettings, const columnar::Schema_t & tSchema, const std::string & sFile, std::string & sError );
-using Setup_fn =				void (*) ( columnar::Malloc_fn, columnar::Free_fn );
+using CheckStorage_fn =			void (*) ( const std::string & sFilename, uint32_t uNumRows, std::function<void (const char*)> & fnError, std::function<void (const char*)> & fnProgress );
 using VersionStr_fn =			const char * (*)();
 
 static void *					g_pColumnarLib = nullptr;
 static CreateStorageReader_fn	g_fnCreateColumnarStorage = nullptr;
 static CreateBuilder_fn 		g_fnCreateColumnarBuilder = nullptr;
-static Setup_fn 				g_fnSetupColumnar;
+static CheckStorage_fn			g_fnCheckColumnarStorage = nullptr;
 static VersionStr_fn			g_fnVersionStr = nullptr;
 
 /////////////////////////////////////////////////////////////////////
@@ -102,6 +102,20 @@ columnar::Builder_i * CreateColumnarBuilder ( const ISphSchema & tSchema, const
 	return pBuilder;
 }
 
+
+void CheckColumnarStorage ( const CSphString & sFile, DWORD uNumRows, std::function<void (const char*)> fnError, std::function<void (const char*)> fnProgress )
+{
+	if ( !IsColumnarLibLoaded() )
+	{
+		fnError ( "columnar library not loaded" );
+		return;
+	}
+
+	assert ( g_fnCheckColumnarStorage );
+	g_fnCheckColumnarStorage ( sFile.cstr(), (uint32_t)uNumRows, fnError, fnProgress );
+}
+
+
 #if HAVE_DLOPEN
 template <typename T>
 static bool LoadFunc ( T & pFunc, void * pHandle, const char * szFunc, const CSphString & sLib, CSphString & sError )
@@ -173,10 +187,9 @@ bool InitColumnar ( CSphString & sError )
 
 	if ( !LoadFunc ( g_fnCreateColumnarStorage, tHandle.Get(), "CreateColumnarStorageReader", sLibfile, sError ) )	return false;
 	if ( !LoadFunc ( g_fnCreateColumnarBuilder, tHandle.Get(), "CreateColumnarBuilder", sLibfile, sError ) )		return false;
-	if ( !LoadFunc ( g_fnSetupColumnar, tHandle.Get(), "SetupColumnar", sLibfile, sError ) )						return false;
+	if ( !LoadFunc ( g_fnCheckColumnarStorage, tHandle.Get(), "CheckColumnarStorage", sLibfile, sError ) )			return false;
 	if ( !LoadFunc ( g_fnVersionStr, tHandle.Get(), "GetColumnarLibVersionStr", sLibfile, sError ) )				return false;
 
-	g_fnSetupColumnar ( malloc, free );
 	g_pColumnarLib = tHandle.Leak();
 
 	return true;

+ 1 - 0
src/columnarlib.h

@@ -20,6 +20,7 @@ class ISphSchema;
 
 columnar::Columnar_i *	CreateColumnarStorageReader ( const CSphString & sFile, DWORD uNumDocs, CSphString & sError );
 columnar::Builder_i *	CreateColumnarBuilder ( const ISphSchema & tSchema, const columnar::Settings_t & tSettings, const CSphString & sFilename, CSphString & sError );
+void					CheckColumnarStorage ( const CSphString & sFile, DWORD uNumRows, std::function<void (const char*)> fnError, std::function<void (const char*)> fnProgress );
 
 bool			InitColumnar ( CSphString & sError );
 void			ShutdownColumnar();

+ 39 - 16
src/indexcheck.cpp

@@ -18,6 +18,7 @@
 #include "secondaryindex.h"
 #include "docstore.h"
 #include "conversion.h"
+#include "columnarlib.h"
 
 
 DebugCheckError_c::DebugCheckError_c ( FILE * pFile )
@@ -305,6 +306,7 @@ private:
 	void	CheckAttributes();
 	void	CheckKillList() const;
 	void	CheckBlockIndex();
+	void	CheckColumnar();
 	void	CheckDocidLookup();
 	void	CheckDocids();
 	void	CheckDocstore();
@@ -398,7 +400,7 @@ bool DiskIndexChecker_c::OpenFiles ( CSphString & sError )
 	if ( !m_tDeadRowReader.Open ( GetFilename(SPH_EXT_SPM).cstr(), sError ) )
 		return m_tReporter.Fail ( "unable to open dead-row map: %s", sError.cstr() );
 
-	if ( !m_tAttrReader.Open ( GetFilename(SPH_EXT_SPA).cstr(), sError ) )
+	if ( m_tSchema.HasNonColumnarAttrs() && !m_tAttrReader.Open ( GetFilename(SPH_EXT_SPA).cstr(), sError ) )
 		return m_tReporter.Fail ( "unable to open attributes: %s", sError.cstr() );
 
 	if ( m_tSchema.GetAttr ( sphGetBlobLocatorName() ) )
@@ -417,11 +419,7 @@ bool DiskIndexChecker_c::OpenFiles ( CSphString & sError )
 		m_bHasDocstore = true;
 	}
 
-	CSphAutofile tDocinfo ( GetFilename(SPH_EXT_SPA), SPH_O_READ, sError );
-	if ( tDocinfo.GetFD()<0 )
-		return false;
-
-	m_bIsEmpty = m_tAttrReader.GetFilesize()==0;
+	m_bIsEmpty = m_iNumRows==0;
 
 	return true;
 }
@@ -443,6 +441,7 @@ void DiskIndexChecker_c::Check()
 	CheckDocs();
 	CheckAttributes();
 	CheckBlockIndex();
+	CheckColumnar();
 	CheckKillList();
 	CheckDocstore();
 
@@ -981,6 +980,9 @@ void DiskIndexChecker_c::CheckDocs()
 
 void DiskIndexChecker_c::CheckAttributes()
 {
+	if ( !m_tSchema.HasNonColumnarAttrs() )
+		return;
+
 	const int64_t iMinMaxStart = sizeof(DWORD) * m_iMinMaxIndex;
 	const int64_t iMinMaxEnd = sizeof(DWORD) * m_iMinMaxIndex + sizeof(DWORD) * ( m_iDocinfoIndex+1 ) * m_tSchema.GetRowSize() * 2;
 	const int64_t iMinMaxBytes = iMinMaxEnd - iMinMaxStart;
@@ -1055,6 +1057,9 @@ void DiskIndexChecker_c::CheckKillList() const
 
 void DiskIndexChecker_c::CheckBlockIndex()
 {
+	if ( !m_tSchema.HasNonColumnarAttrs() )
+		return;
+
 	m_tReporter.Msg ( "checking attribute blocks index..." );
 
 	int64_t iAllRowsTotal = m_iNumRows + (m_iDocinfoIndex+1)*2;
@@ -1160,6 +1165,19 @@ void DiskIndexChecker_c::CheckBlockIndex()
 }
 
 
+void DiskIndexChecker_c::CheckColumnar()
+{
+	if ( !m_tSchema.HasColumnarAttrs() )
+		return;
+
+	m_tReporter.Msg ( "checking columnar storage..." );
+
+	CheckColumnarStorage ( GetFilename(SPH_EXT_SPC), (DWORD)m_iNumRows,
+		[this]( const char * szError ){ m_tReporter.Fail ( "%s", szError ); },
+		[this]( const char * szProgress ){ m_tReporter.Progress ( "%s", szProgress ); } );
+}
+
+
 void DiskIndexChecker_c::CheckDocidLookup()
 {
 	CSphString sError;
@@ -1176,6 +1194,9 @@ void DiskIndexChecker_c::CheckDocidLookup()
 	}
 	int64_t iLookupEnd = tLookup.GetFilesize();
 
+	const CSphColumnInfo * pId = m_tSchema.GetAttr("id");
+	assert(pId);
+
 	CSphFixedVector<CSphRowitem> dRow ( m_tSchema.GetRowSize() );
 	m_tAttrReader.SeekTo ( 0, (int) dRow.GetLengthBytes() );
 	CSphBitvec dRowids ( (int)m_iNumRows );
@@ -1227,8 +1248,7 @@ void DiskIndexChecker_c::CheckDocidLookup()
 				tDelta = tLookup.UnzipOffset();
 				tRowID = tLookup.GetDword();
 				if ( tDelta<0 )
-					m_tReporter.Fail ( "invalid docid delta " INT64_FMT " at row %u, checkpoint %d, doc %d, last docid " INT64_FMT,
-						tDocID, tRowID, iCp, i, tLastDocID );
+					m_tReporter.Fail ( "invalid docid delta " INT64_FMT " at row %u, checkpoint %d, doc %d, last docid " INT64_FMT, tDocID, tRowID, iCp, i, tLastDocID );
 				else
 					tDocID = tLastDocID + tDelta;
 
@@ -1236,7 +1256,7 @@ void DiskIndexChecker_c::CheckDocidLookup()
 
 			if ( tRowID>=m_iNumRows )
 				m_tReporter.Fail ( "rowid %u out of bounds " INT64_FMT, tRowID, m_iNumRows );
-			else
+			else if ( !pId->IsColumnar() )
 			{
 				// read only docid
 				m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * tRowID, sizeof(DocID_t) );
@@ -1258,17 +1278,20 @@ void DiskIndexChecker_c::CheckDocidLookup()
 		iCp++;
 	}
 
-	for ( int i=0; i<m_iNumRows; i++ )
+	if ( !pId->IsColumnar() )
 	{
-		if ( dRowids.BitGet ( i ) )
-			continue;
+		for ( int i=0; i<m_iNumRows; i++ )
+		{
+			if ( dRowids.BitGet ( i ) )
+				continue;
 
-		m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * i, sizeof(DocID_t) );
-		m_tAttrReader.GetBytes ( dRow.Begin(), sizeof(DocID_t) );
+			m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * i, sizeof(DocID_t) );
+			m_tAttrReader.GetBytes ( dRow.Begin(), sizeof(DocID_t) );
 
-		DocID_t tDocID = sphGetDocID ( dRow.Begin() );
+			DocID_t tDocID = sphGetDocID ( dRow.Begin() );
 		
-		m_tReporter.Fail ( "row %u(" INT64_FMT ") not mapped at lookup, docid " INT64_FMT, i, m_iNumRows, tDocID );
+			m_tReporter.Fail ( "row %u(" INT64_FMT ") not mapped at lookup, docid " INT64_FMT, i, m_iNumRows, tDocID );
+		}
 	}
 }
 

+ 17 - 4
src/indextool.cpp

@@ -1008,11 +1008,17 @@ static void ApplyKilllists ( CSphConfig & hConf )
 }
 
 
-static void ShowVersion ()
+static void ShowVersion()
 {
-	fprintf ( stdout, "%s", szMANTICORE_BANNER );
+	const char * szColumnarVer = GetColumnarVersionStr();
+	CSphString sColumnar = "";
+	if ( szColumnarVer )
+		sColumnar.SetSprintf ( " (columnar %s)", szColumnarVer );
+
+	fprintf ( stdout, "%s%s%s",  szMANTICORE_NAME, sColumnar.cstr(), szMANTICORE_BANNER_TEXT );
 }
 
+
 static void ShowHelp ()
 {
 	fprintf ( stdout,
@@ -1343,8 +1349,14 @@ int main ( int argc, char ** argv )
 		}
 	}
 
+	CSphString sError;
+	bool bColumnarError = !InitColumnar ( sError );
+
 	if ( !bQuiet )
-		fprintf ( stdout, "%s", szMANTICORE_BANNER );
+		ShowVersion();
+
+	if ( bColumnarError )
+		fprintf ( stdout, "Error initializing columnar storage: %s", sError.cstr() );
 
 	if ( i!=argc )
 	{
@@ -1356,7 +1368,6 @@ int main ( int argc, char ** argv )
 	// load proper config
 	//////////////////////
 
-	CSphString sError;
 	if ( !sphInitCharsetAliasTable ( sError ) )
 		sphDie ( "failed to init charset alias table: %s", sError.cstr() );
 
@@ -1629,5 +1640,7 @@ int main ( int argc, char ** argv )
 			sphDie ( "INTERNAL ERROR: unhandled command (id=%d)", (int)g_eCommand );
 	}
 
+	ShutdownColumnar();
+
 	return 0;
 }