6 years ago · d8a174c90d
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,7 +17,8 @@ set ( LIBSPHINX_SRCS sphinx.cpp sphinxexcerpt.cpp
 
				 		sphinxqcache.cpp sphinxjsonquery.cpp
			
 
				 		attribute.cpp secondaryindex.cpp killlist.cpp searchnode.cpp json/cJSON.c
			
 
				 		sphinxpq.cpp icu.cpp global_idf.cpp docstore.cpp lz4/lz4.c lz4/lz4hc.c
			
 
				-		searchdexpr.cpp snippetfunctor.cpp snippetindex.cpp snippetstream.cpp )
			
 
				+		searchdexpr.cpp snippetfunctor.cpp snippetindex.cpp snippetstream.cpp
			
 
				+		indexcheck.cpp datareader.cpp indexformat.cpp )
			
 
				 set ( INDEXER_SRCS indexer.cpp )
			
 
				 set ( INDEXTOOL_SRCS indextool.cpp )
			
 
				 set ( SEARCHD_SRCS_TESTABLE searchdha.cpp http/http_parser.c searchdhttp.cpp
			
--- a/src/attribute.cpp
+++ b/src/attribute.cpp
@@ -16,6 +16,7 @@
 
				 
			
 
				 #include "sphinxint.h"
			
 
				 #include "sphinxjson.h"
			
 
				+#include "indexcheck.h"
			
 
				 
			
 
				 //////////////////////////////////////////////////////////////////////////
			
 
				 // blob attributes
			
--- a/src/datareader.cpp
+++ b/src/datareader.cpp
@@ -0,0 +1,276 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+
			
 
				+#include "datareader.h"
			
 
				+#include "sphinxint.h"
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+inline static ESphQueryState StateByKind ( DataReaderFactory_c::Kind_e eKind )
			
 
				+{
			
 
				+	switch ( eKind )
			
 
				+	{
			
 
				+	case DataReaderFactory_c::DOCS: return SPH_QSTATE_READ_DOCS;
			
 
				+	case DataReaderFactory_c::HITS: return SPH_QSTATE_READ_HITS;
			
 
				+	default: return SPH_QSTATE_IO;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+class FileBlockReader_c : public FileBlockReader_i
			
 
				+{
			
 
				+public:
			
 
				+	explicit FileBlockReader_c ( const char * szFileName )
			
 
				+		: m_szFileName ( szFileName )
			
 
				+	{}
			
 
				+
			
 
				+	RowID_t		UnzipRowid() override { return UnzipInt (); };
			
 
				+	SphWordID_t	UnzipWordid() override { return UnzipOffset (); };
			
 
				+
			
 
				+protected:
			
 
				+	const char * m_szFileName = nullptr;
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+// imitate CSphReader but fully in memory (intended to be used with mmap)
			
 
				+class ThinMMapReader_c : public FileBlockReader_c
			
 
				+{
			
 
				+public:
			
 
				+	SphOffset_t GetPos () const final
			
 
				+	{
			
 
				+		if ( !m_pPointer )
			
 
				+			return 0;
			
 
				+
			
 
				+		assert ( m_pBase );
			
 
				+		return m_pPointer - m_pBase;
			
 
				+	}
			
 
				+
			
 
				+	void SeekTo ( SphOffset_t iPos, int /*iSizeHint*/ ) final
			
 
				+	{
			
 
				+		m_pPointer = m_pBase + iPos;
			
 
				+	}
			
 
				+
			
 
				+	DWORD		UnzipInt () final;
			
 
				+	uint64_t	UnzipOffset () final;
			
 
				+
			
 
				+	void Reset () final
			
 
				+	{
			
 
				+		m_pPointer = m_pBase;
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	~ThinMMapReader_c() final {}
			
 
				+
			
 
				+private:
			
 
				+	friend class MMapFactory_c;
			
 
				+
			
 
				+	const BYTE *	m_pBase = nullptr;
			
 
				+	const BYTE *	m_pPointer = nullptr;
			
 
				+	SphOffset_t		m_iSize = 0;
			
 
				+
			
 
				+	ThinMMapReader_c ( const BYTE * pArena, SphOffset_t iSize, const char * sFileName )
			
 
				+		: FileBlockReader_c ( sFileName )
			
 
				+	{
			
 
				+		m_pPointer = m_pBase = pArena;
			
 
				+		m_iSize = iSize;
			
 
				+	}
			
 
				+
			
 
				+	BYTE GetByte()
			
 
				+	{
			
 
				+		auto iPos = m_pPointer - m_pBase;
			
 
				+		if ( iPos>=0 && iPos<m_iSize )
			
 
				+			return *m_pPointer++;
			
 
				+
			
 
				+		sphWarning( "INTERNAL: out-of-range in ThinMMapReader_c: trying to read '%s' at " INT64_FMT ", from mmap of "
			
 
				+			INT64_FMT ", query most probably would FAIL; report the fact to dev!",
			
 
				+			( m_szFileName ? m_szFileName : "" ), int64_t(iPos), int64_t(m_iSize) );
			
 
				+
			
 
				+		return 0; // it's better then crash because of unexpected read out-of-range (file reader does the same there)
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+DWORD ThinMMapReader_c::UnzipInt()
			
 
				+{
			
 
				+	SPH_VARINT_DECODE ( DWORD, GetByte() );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+uint64_t ThinMMapReader_c::UnzipOffset()
			
 
				+{
			
 
				+	SPH_VARINT_DECODE ( uint64_t, GetByte() );
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+class DirectFileReader_c : public FileBlockReader_c, protected FileReader_c
			
 
				+{
			
 
				+	friend class DirectFactory_c;
			
 
				+
			
 
				+public:
			
 
				+	SphOffset_t GetPos () const final
			
 
				+	{
			
 
				+		return FileReader_c::GetPos();
			
 
				+	}
			
 
				+
			
 
				+	void SeekTo ( SphOffset_t iPos, int iSizeHint ) final
			
 
				+	{
			
 
				+		FileReader_c::SeekTo ( iPos, iSizeHint );
			
 
				+	}
			
 
				+
			
 
				+	DWORD UnzipInt() final
			
 
				+	{
			
 
				+		return FileReader_c::UnzipInt();
			
 
				+	}
			
 
				+
			
 
				+	uint64_t UnzipOffset() final
			
 
				+	{
			
 
				+		return FileReader_c::UnzipOffset();
			
 
				+	}
			
 
				+
			
 
				+	void Reset() final
			
 
				+	{
			
 
				+		FileReader_c::Reset();
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	explicit DirectFileReader_c ( BYTE * pBuf, int iSize, const char * szFileName )
			
 
				+		: FileBlockReader_c ( szFileName )
			
 
				+		, FileReader_c ( pBuf, iSize )
			
 
				+	{}
			
 
				+
			
 
				+	~DirectFileReader_c() final {}
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+// producer of readers which access by Seek + Read
			
 
				+class DirectFactory_c : public DataReaderFactory_c
			
 
				+{
			
 
				+public:
			
 
				+	DirectFactory_c ( const CSphString & sFile, CSphString & sError, ESphQueryState eState, int iReadBuffer, int iReadUnhinted )
			
 
				+		: m_eWorkState ( eState )
			
 
				+		, m_iReadBuffer ( iReadBuffer )
			
 
				+		, m_iReadUnhinted ( iReadUnhinted )
			
 
				+	{
			
 
				+		SetValid ( m_dReader.Open ( sFile, sError ) );
			
 
				+	}
			
 
				+
			
 
				+	SphOffset_t GetFilesize () const final
			
 
				+	{
			
 
				+		return m_dReader.GetFilesize();
			
 
				+	}
			
 
				+
			
 
				+	SphOffset_t GetPos () const final
			
 
				+	{
			
 
				+		return m_iPos;
			
 
				+	}
			
 
				+
			
 
				+	void SeekTo ( SphOffset_t iPos ) final
			
 
				+	{
			
 
				+		m_iPos = iPos;
			
 
				+	}
			
 
				+
			
 
				+	// returns depended reader sharing same FD as maker
			
 
				+	FileBlockReader_c * MakeReader ( BYTE * pBuf, int iSize ) final
			
 
				+	{
			
 
				+		auto pFileReader = new DirectFileReader_c ( pBuf, iSize, m_dReader.GetFilename().cstr() );
			
 
				+		pFileReader->SetFile ( m_dReader.GetFD(), m_dReader.GetFilename().cstr() );
			
 
				+		pFileReader->SetBuffers ( m_iReadBuffer, m_iReadUnhinted );
			
 
				+		if ( m_iPos )
			
 
				+			pFileReader->SeekTo ( m_iPos, READ_NO_SIZE_HINT );
			
 
				+
			
 
				+		pFileReader->m_pProfile = m_dReader.m_pProfile;
			
 
				+		pFileReader->m_eProfileState = m_eWorkState;
			
 
				+		return pFileReader;
			
 
				+	}
			
 
				+
			
 
				+	void SetProfile ( CSphQueryProfile* pProfile ) final
			
 
				+	{
			
 
				+		m_dReader.m_pProfile = pProfile;
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	~DirectFactory_c() final {} // d-tr only by Release
			
 
				+
			
 
				+private:
			
 
				+	CSphAutoreader	m_dReader;
			
 
				+	ESphQueryState	m_eWorkState;
			
 
				+	SphOffset_t		m_iPos = 0;
			
 
				+	int				m_iReadBuffer = 0;
			
 
				+	int				m_iReadUnhinted = 0;
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+// producer of readers which access by MMap
			
 
				+class MMapFactory_c : public DataReaderFactory_c
			
 
				+{
			
 
				+public:
			
 
				+	MMapFactory_c ( const CSphString & sFile, CSphString & sError, FileAccess_e eAccess )
			
 
				+	{
			
 
				+		SetValid ( m_tBackendFile.Setup ( sFile, sError ) );
			
 
				+		if ( eAccess==FileAccess_e::MLOCK )
			
 
				+			m_tBackendFile.MemLock( sError );
			
 
				+	}
			
 
				+
			
 
				+	SphOffset_t GetFilesize () const final
			
 
				+	{
			
 
				+		return m_tBackendFile.GetLength64 ();
			
 
				+	}
			
 
				+
			
 
				+	SphOffset_t GetPos () const final
			
 
				+	{
			
 
				+		return m_iPos;
			
 
				+	}
			
 
				+
			
 
				+	void SeekTo ( SphOffset_t iPos ) final
			
 
				+	{
			
 
				+		m_iPos = iPos;
			
 
				+	}
			
 
				+
			
 
				+	// returns depended reader sharing same mmap as maker
			
 
				+	FileBlockReader_c * MakeReader ( BYTE *, int ) final
			
 
				+	{
			
 
				+		auto pReader = new ThinMMapReader_c ( m_tBackendFile.GetWritePtr(),
			
 
				+			m_tBackendFile.GetLength64(), m_tBackendFile.GetFileName() );
			
 
				+		if ( m_iPos )
			
 
				+			pReader->SeekTo ( m_iPos, 0 );
			
 
				+		return pReader;
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	~MMapFactory_c() final {} // d-tr only by Release
			
 
				+
			
 
				+private:
			
 
				+	CSphMappedBuffer<BYTE>	m_tBackendFile;
			
 
				+	SphOffset_t				m_iPos = 0;
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+extern int g_iReadUnhinted;
			
 
				+
			
 
				+DataReaderFactory_c * NewProxyReader ( const CSphString & sFile, CSphString & sError, DataReaderFactory_c::Kind_e eKind, int iReadBuffer, FileAccess_e eAccess )
			
 
				+{
			
 
				+	auto eState = StateByKind ( eKind );
			
 
				+	DataReaderFactory_c * pReader = nullptr;
			
 
				+
			
 
				+	if ( eAccess==FileAccess_e::FILE )
			
 
				+		pReader = new DirectFactory_c ( sFile, sError, eState, iReadBuffer, g_iReadUnhinted );
			
 
				+	else
			
 
				+		pReader = new MMapFactory_c ( sFile, sError, eAccess );
			
 
				+
			
 
				+	if ( !pReader->IsValid ())
			
 
				+		SafeRelease ( pReader )
			
 
				+		return pReader;
			
 
				+}
			
--- a/src/datareader.h
+++ b/src/datareader.h
@@ -0,0 +1,65 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+
			
 
				+#ifndef _datareader_
			
 
				+#define _datareader_
			
 
				+
			
 
				+#include "sphinx.h"
			
 
				+
			
 
				+#define READ_NO_SIZE_HINT 0
			
 
				+
			
 
				+// Reader from file or filemap
			
 
				+class FileBlockReader_i : public ISphRefcountedMT
			
 
				+{
			
 
				+public:
			
 
				+	virtual SphOffset_t	GetPos() const = 0;
			
 
				+	virtual void		SeekTo ( SphOffset_t iPos, int iSizeHint ) = 0;
			
 
				+	virtual DWORD		UnzipInt() = 0;
			
 
				+	virtual uint64_t	UnzipOffset() = 0;
			
 
				+	virtual RowID_t		UnzipRowid() = 0;
			
 
				+	virtual SphWordID_t	UnzipWordid() = 0;
			
 
				+	virtual void		Reset () = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+using FileBlockReaderPtr_c = CSphRefcountedPtr<FileBlockReader_i>;
			
 
				+
			
 
				+// producer of readers from file or filemap
			
 
				+class DataReaderFactory_c : public ISphRefcountedMT
			
 
				+{
			
 
				+public:
			
 
				+	enum Kind_e
			
 
				+	{
			
 
				+		DOCS,
			
 
				+		HITS
			
 
				+	};
			
 
				+
			
 
				+	bool						IsValid () const { return m_bValid; }
			
 
				+
			
 
				+	virtual SphOffset_t			GetFilesize () const = 0;
			
 
				+	virtual SphOffset_t			GetPos () const = 0;
			
 
				+	virtual void				SeekTo ( SphOffset_t ) = 0;
			
 
				+	virtual FileBlockReader_i *	MakeReader ( BYTE * pBuf, int iSize ) = 0;
			
 
				+	virtual void				SetProfile ( CSphQueryProfile * ) {};
			
 
				+
			
 
				+protected:
			
 
				+								~DataReaderFactory_c () override {}
			
 
				+
			
 
				+	void						SetValid ( bool bValid ) { m_bValid = bValid; }
			
 
				+
			
 
				+private:
			
 
				+	bool m_bValid = false;
			
 
				+};
			
 
				+
			
 
				+using DataReaderFactoryPtr_c = CSphRefcountedPtr<DataReaderFactory_c>;
			
 
				+
			
 
				+DataReaderFactory_c * NewProxyReader ( const CSphString & sFile, CSphString & sError, DataReaderFactory_c::Kind_e eKind, int iReadBuffer, FileAccess_e eAccess );
			
 
				+
			
 
				+#endif // _datareader_
			
--- a/src/docstore.cpp
+++ b/src/docstore.cpp
@@ -16,6 +16,7 @@
 
				 
			
 
				 #include "sphinxint.h"
			
 
				 #include "attribute.h"
			
 
				+#include "indexcheck.h"
			
 
				 #include "lz4/lz4.h"
			
 
				 #include "lz4/lz4hc.h"
			
 
				 
			
@@ -28,7 +29,8 @@ enum BlockFlags_e : BYTE
 
				 enum BlockType_e : BYTE
			
 
				 {
			
 
				 	BLOCK_TYPE_SMALL,
			
 
				-	BLOCK_TYPE_BIG
			
 
				+	BLOCK_TYPE_BIG,
			
 
				+	BLOCK_TYPE_TOTAL
			
 
				 };
			
 
				 
			
 
				 enum DocFlags_e : BYTE
			
@@ -717,6 +719,8 @@ static void CreateFieldRemap ( VecTraits_T<int> & dFieldInRset, const VecTraits_
 
				 
			
 
				 class Docstore_c : public Docstore_i, public DocstoreSettings_t
			
 
				 {
			
 
				+	friend class DocstoreChecker_c;
			
 
				+
			
 
				 public:
			
 
				 						Docstore_c ( const CSphString & sFilename );
			
 
				 						~Docstore_c();
			
@@ -765,7 +769,7 @@ private:
 
				 
			
 
				 	bool						ProcessSmallBlockDoc ( RowID_t tCurDocRowID, RowID_t tRowID, const VecTraits_T<int> * pFieldIds, const CSphFixedVector<int> & dFieldInRset, bool bPack,
			
 
				 		MemoryReader2_c & tReader, CSphBitvec & tEmptyFields, DocstoreDoc_t & tResult ) const;
			
 
				-	const void					ProcessBigBlockField ( int iField, const FieldInfo_t & tInfo, int iFieldInRset, bool bPack, int64_t iSessionId, SphOffset_t & tOffset, DocstoreDoc_t & tResult ) const;
			
 
				+	void						ProcessBigBlockField ( int iField, const FieldInfo_t & tInfo, int iFieldInRset, bool bPack, int64_t iSessionId, SphOffset_t & tOffset, DocstoreDoc_t & tResult ) const;
			
 
				 };
			
 
				 
			
 
				 
			
@@ -1084,7 +1088,7 @@ BlockCache_c::BlockData_t Docstore_c::UncompressBigBlockField ( SphOffset_t tOff
 
				 }
			
 
				 
			
 
				 
			
 
				-const void Docstore_c::ProcessBigBlockField ( int iField, const FieldInfo_t & tInfo, int iFieldInRset, bool bPack, int64_t iSessionId, SphOffset_t & tOffset, DocstoreDoc_t & tResult ) const
			
 
				+void Docstore_c::ProcessBigBlockField ( int iField, const FieldInfo_t & tInfo, int iFieldInRset, bool bPack, int64_t iSessionId, SphOffset_t & tOffset, DocstoreDoc_t & tResult ) const
			
 
				 {
			
 
				 	if ( tInfo.m_uFlags & FIELD_FLAG_EMPTY )
			
 
				 		return;
			
@@ -1743,6 +1747,312 @@ DocstoreSession_c::~DocstoreSession_c()
 
				 
			
 
				 //////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				+class DocstoreChecker_c
			
 
				+{
			
 
				+public:
			
 
				+						DocstoreChecker_c ( CSphAutoreader & tReader, DebugCheckError_c & tReporter );
			
 
				+
			
 
				+	bool				Check();
			
 
				+
			
 
				+private:
			
 
				+	CSphAutoreader &	m_tReader;
			
 
				+	DebugCheckError_c &	m_tReporter;
			
 
				+	const char *		m_szFilename = nullptr;
			
 
				+	DocstoreFields_c	m_tFields;
			
 
				+	CSphScopedPtr<Compressor_i> m_pCompressor{nullptr};
			
 
				+
			
 
				+	void				CheckSmallBlockDoc ( MemoryReader2_c & tReader, CSphBitvec & tEmptyFields, SphOffset_t tOffset );
			
 
				+	void				CheckSmallBlock ( const Docstore_c::Block_t & tBlock );
			
 
				+	void				CheckBlock ( const Docstore_c::Block_t & tBlock );
			
 
				+	void				CheckBigBlockField ( const Docstore_c::FieldInfo_t & tInfo, SphOffset_t & tOffset );
			
 
				+	void				CheckBigBlock ( const Docstore_c::Block_t & tBlock );
			
 
				+};
			
 
				+
			
 
				+
			
 
				+DocstoreChecker_c::DocstoreChecker_c ( CSphAutoreader & tReader, DebugCheckError_c & tReporter )
			
 
				+	: m_tReader ( tReader )
			
 
				+	, m_tReporter ( tReporter )
			
 
				+	, m_szFilename ( tReader.GetFilename().cstr() )
			
 
				+{}
			
 
				+
			
 
				+
			
 
				+bool DocstoreChecker_c::Check()
			
 
				+{
			
 
				+	DWORD uStorageVersion = m_tReader.GetDword();
			
 
				+	if ( uStorageVersion > STORAGE_VERSION )
			
 
				+		return m_tReporter.Fail ( "Unable to load docstore: %s is v.%d, binary is v.%d", m_szFilename, uStorageVersion, STORAGE_VERSION );
			
 
				+
			
 
				+	m_tReader.GetDword();	// block size
			
 
				+	BYTE uCompression = m_tReader.GetByte();
			
 
				+	if ( uCompression > 2 )
			
 
				+		return m_tReporter.Fail ( "Unknown docstore compression %u in %s", uCompression, m_szFilename );
			
 
				+
			
 
				+	Compression_e eCompression = Byte2Compression(uCompression);
			
 
				+	m_pCompressor = CreateCompressor ( eCompression, DEFAULT_COMPRESSION_LEVEL );
			
 
				+	if ( !m_pCompressor.Ptr() )
			
 
				+		return m_tReporter.Fail ( "Unable to create compressor in %s", m_szFilename );
			
 
				+
			
 
				+	DWORD uNumFields = m_tReader.GetDword();
			
 
				+	const DWORD MAX_SANE_FIELDS = 32768;
			
 
				+	if ( uNumFields > MAX_SANE_FIELDS )
			
 
				+		return m_tReporter.Fail ( "Too many docstore fields (%u) in %s", uNumFields, m_szFilename );
			
 
				+
			
 
				+	for ( int i = 0; i < (int)uNumFields; i++ )
			
 
				+	{
			
 
				+		BYTE uDataType = m_tReader.GetByte();
			
 
				+		if ( uDataType > DOCSTORE_TOTAL )
			
 
				+			return m_tReporter.Fail ( "Unknown docstore data type (%u) in %s", uDataType, m_szFilename );
			
 
				+
			
 
				+		DocstoreDataType_e eType = (DocstoreDataType_e)uDataType;
			
 
				+		CSphString sName = m_tReader.GetString();
			
 
				+		const int MAX_SANE_FIELD_NAME_LEN = 32768;
			
 
				+		if ( sName.Length() > MAX_SANE_FIELD_NAME_LEN )
			
 
				+			return m_tReporter.Fail ( "Docstore field name too long (%d) in %s", sName.Length(), m_szFilename );
			
 
				+
			
 
				+		m_tFields.AddField ( sName, eType );
			
 
				+	}
			
 
				+
			
 
				+	DWORD uNumBlocks = m_tReader.GetDword();
			
 
				+	if ( !uNumBlocks )
			
 
				+		return m_tReporter.Fail ( "Docstore has 0 blocks in %s", m_szFilename );
			
 
				+
			
 
				+	SphOffset_t tHeaderOffset = m_tReader.GetOffset();
			
 
				+	if ( tHeaderOffset <= 0 || tHeaderOffset >= m_tReader.GetFilesize() )
			
 
				+		return m_tReporter.Fail ( "Wrong docstore header offset (" INT64_FMT ") in %s", tHeaderOffset, m_szFilename );
			
 
				+
			
 
				+	m_tReader.SeekTo ( tHeaderOffset, 0 );
			
 
				+
			
 
				+	CSphFixedVector<Docstore_c::Block_t> dBlocks(uNumBlocks);
			
 
				+
			
 
				+	DWORD tPrevBlockRowID = 0;
			
 
				+	SphOffset_t tPrevBlockOffset = 0;
			
 
				+	for ( auto & i : dBlocks )
			
 
				+	{
			
 
				+		RowID_t uUnzipped = m_tReader.UnzipRowid();
			
 
				+		if ( (int64_t)uUnzipped + tPrevBlockRowID >= (int64_t)0xFFFFFFFF )
			
 
				+			m_tReporter.Fail ( "Docstore rowid overflow in %s", m_szFilename );
			
 
				+
			
 
				+		i.m_tRowID = uUnzipped + tPrevBlockRowID;
			
 
				+		BYTE uBlockType = m_tReader.GetByte();
			
 
				+		if ( uBlockType>BLOCK_TYPE_TOTAL )
			
 
				+			return m_tReporter.Fail ( "Unknown docstore block type (%u) in %s", uBlockType, m_szFilename );
			
 
				+
			
 
				+		i.m_eType = (BlockType_e)uBlockType;
			
 
				+		i.m_tOffset = m_tReader.UnzipOffset() + tPrevBlockOffset;
			
 
				+		if ( i.m_tOffset <= 0 || i.m_tOffset >= m_tReader.GetFilesize() )
			
 
				+			return m_tReporter.Fail ( "Wrong docstore block offset (" INT64_FMT ") in %s", i.m_tOffset, m_szFilename );
			
 
				+
			
 
				+		if ( i.m_eType==BLOCK_TYPE_BIG )
			
 
				+			i.m_uHeaderSize = m_tReader.UnzipInt();
			
 
				+
			
 
				+		tPrevBlockRowID = i.m_tRowID;
			
 
				+		tPrevBlockOffset = i.m_tOffset;
			
 
				+	}
			
 
				+
			
 
				+	for ( int i = 1; i<dBlocks.GetLength(); i++ )
			
 
				+	{
			
 
				+		if ( dBlocks[i-1].m_tOffset>=dBlocks[i].m_tOffset )
			
 
				+			return m_tReporter.Fail ( "Descending docstore block offset in %s", m_szFilename );
			
 
				+
			
 
				+		dBlocks[i-1].m_uSize = dBlocks[i].m_tOffset-dBlocks[i-1].m_tOffset;
			
 
				+	}
			
 
				+
			
 
				+	dBlocks.Last().m_uSize = tHeaderOffset-dBlocks.Last().m_tOffset;
			
 
				+
			
 
				+	for ( auto & i : dBlocks )
			
 
				+	{
			
 
				+		if ( i.m_tOffset+i.m_uSize > m_tReader.GetFilesize() )
			
 
				+			return m_tReporter.Fail ( "Docstore block size+offset out of bounds in %s", m_szFilename );
			
 
				+
			
 
				+		CheckBlock(i);
			
 
				+	}
			
 
				+
			
 
				+	if ( m_tReader.GetErrorFlag() )
			
 
				+		return m_tReporter.Fail ( "%s", m_tReader.GetErrorMessage().cstr() );
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DocstoreChecker_c::CheckSmallBlockDoc ( MemoryReader2_c & tReader, CSphBitvec & tEmptyFields, SphOffset_t tOffset )
			
 
				+{
			
 
				+	BYTE uDocFlags = tReader.GetByte();
			
 
				+
			
 
				+	if ( uDocFlags & ( ~(DOC_FLAG_ALL_EMPTY | DOC_FLAG_EMPTY_BITMASK) ) )
			
 
				+		m_tReporter.Fail ( "Unknown docstore doc flag (%u) in %s (offset " INT64_FMT ")", uDocFlags, m_szFilename, tOffset );
			
 
				+
			
 
				+	if ( uDocFlags & DOC_FLAG_ALL_EMPTY )
			
 
				+		return;
			
 
				+
			
 
				+	DWORD uBitMaskSize = tEmptyFields.GetSize()*sizeof(DWORD);
			
 
				+
			
 
				+	bool bHasBitmask = !!(uDocFlags & DOC_FLAG_EMPTY_BITMASK);
			
 
				+	if ( bHasBitmask )
			
 
				+	{
			
 
				+		memcpy ( tEmptyFields.Begin(), tReader.Begin()+tReader.GetPos(), uBitMaskSize );
			
 
				+		tReader.SetPos ( tReader.GetPos()+uBitMaskSize );
			
 
				+	}
			
 
				+
			
 
				+	for ( int iField = 0; iField < m_tFields.GetNumFields(); iField++ )
			
 
				+		if ( !bHasBitmask || !tEmptyFields.BitGet(iField) )
			
 
				+		{
			
 
				+			DWORD uFieldLength = tReader.UnzipInt();
			
 
				+			tReader.SetPos ( tReader.GetPos()+uFieldLength );
			
 
				+			if ( tReader.GetPos() > tReader.GetLength() )
			
 
				+				m_tReporter.Fail ( "Out of bounds in docstore field data in %s (offset " INT64_FMT ")", m_szFilename, tOffset );
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DocstoreChecker_c::CheckSmallBlock ( const Docstore_c::Block_t & tBlock )
			
 
				+{
			
 
				+	CSphFixedVector<BYTE> dBlock ( tBlock.m_uSize );
			
 
				+
			
 
				+	m_tReader.SeekTo ( tBlock.m_tOffset, 0 );
			
 
				+	m_tReader.GetBytes ( dBlock.Begin(), dBlock.GetLength() );
			
 
				+
			
 
				+	MemoryReader2_c tBlockReader ( dBlock.Begin(), dBlock.GetLength() );
			
 
				+	BlockCache_c::BlockData_t tResult;
			
 
				+	tResult.m_uFlags = tBlockReader.GetByte();
			
 
				+	tResult.m_uNumDocs = tBlockReader.UnzipInt();
			
 
				+	tResult.m_uSize = tBlockReader.UnzipInt();
			
 
				+	DWORD uCompressedLength = tResult.m_uSize;
			
 
				+	bool bCompressed = tResult.m_uFlags & BLOCK_FLAG_COMPRESSED;
			
 
				+	if ( bCompressed )
			
 
				+		uCompressedLength = tBlockReader.UnzipInt();
			
 
				+
			
 
				+	if ( tResult.m_uFlags!=0 && tResult.m_uFlags!=BLOCK_FLAG_COMPRESSED )
			
 
				+		m_tReporter.Fail ( "Unknown docstore small block flag (%u) in %s (offset " INT64_FMT ")", tResult.m_uFlags, m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+	if ( uCompressedLength>tResult.m_uSize )
			
 
				+		m_tReporter.Fail ( "Docstore block size mismatch: compressed=%u, uncompressed=%u in %s (offset " INT64_FMT ")", uCompressedLength, tResult.m_uSize, m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+	const BYTE * pBody = dBlock.Begin() + tBlockReader.GetPos();
			
 
				+
			
 
				+	CSphFixedVector<BYTE> dDecompressed(0);
			
 
				+	if ( bCompressed )
			
 
				+	{
			
 
				+		dDecompressed.Reset ( tResult.m_uSize );
			
 
				+		if ( !m_pCompressor->Decompress ( VecTraits_T<const BYTE> (pBody, uCompressedLength), dDecompressed) )
			
 
				+			m_tReporter.Fail ( "Error decompressing small block in %s (offset " INT64_FMT ")", m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+		tResult.m_pData = dDecompressed.LeakData();
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// we can't just pass tResult.m_pData because it doesn't point to the start of the allocated block
			
 
				+		tResult.m_pData = new BYTE[tResult.m_uSize];
			
 
				+		memcpy ( tResult.m_pData, pBody, tResult.m_uSize );
			
 
				+	}
			
 
				+
			
 
				+	MemoryReader2_c tReader ( tResult.m_pData, tResult.m_uSize );
			
 
				+	CSphBitvec tEmptyFields ( m_tFields.GetNumFields() );
			
 
				+	for ( int i = 0; i < (int)tResult.m_uNumDocs; i++ )
			
 
				+		CheckSmallBlockDoc ( tReader, tEmptyFields, tBlock.m_tOffset );
			
 
				+
			
 
				+	SafeDelete ( tResult.m_pData );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DocstoreChecker_c::CheckBigBlockField ( const Docstore_c::FieldInfo_t & tInfo, SphOffset_t & tOffset )
			
 
				+{
			
 
				+	if ( tInfo.m_uFlags & FIELD_FLAG_EMPTY )
			
 
				+		return;
			
 
				+
			
 
				+	bool bCompressed = !!( tInfo.m_uFlags & FIELD_FLAG_COMPRESSED );
			
 
				+	SphOffset_t tOffsetDelta = bCompressed ? tInfo.m_uCompressedLen : tInfo.m_uUncompressedLen;
			
 
				+	BlockCache_c::BlockData_t tBlockData;
			
 
				+
			
 
				+	CSphFixedVector<BYTE> dField ( tOffsetDelta );
			
 
				+	m_tReader.SeekTo ( tOffset, 0 );
			
 
				+	m_tReader.GetBytes ( dField.Begin(), dField.GetLength() );
			
 
				+
			
 
				+	tBlockData.m_uSize = tInfo.m_uUncompressedLen;
			
 
				+
			
 
				+	if ( bCompressed )
			
 
				+	{
			
 
				+		CSphFixedVector<BYTE> dDecompressed(0);
			
 
				+		dDecompressed.Reset ( tBlockData.m_uSize );
			
 
				+		if ( !m_pCompressor->Decompress ( dField, dDecompressed ) )
			
 
				+			m_tReporter.Fail ( "Error decompressing big block in %s (offset " INT64_FMT ")", m_szFilename, tOffset );
			
 
				+	}
			
 
				+
			
 
				+	tOffset += tOffsetDelta;
			
 
				+
			
 
				+	if ( tOffset > m_tReader.GetFilesize() )
			
 
				+		m_tReporter.Fail ( "Docstore block size+offset out of bounds in %s (offset " INT64_FMT ")", m_szFilename, tOffset );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DocstoreChecker_c::CheckBigBlock ( const Docstore_c::Block_t & tBlock )
			
 
				+{
			
 
				+	CSphFixedVector<Docstore_c::FieldInfo_t> dFieldInfo ( m_tFields.GetNumFields() );
			
 
				+
			
 
				+	CSphFixedVector<BYTE> dBlockHeader(tBlock.m_uHeaderSize);
			
 
				+	CSphFixedVector<BYTE> dBlock ( tBlock.m_uSize );
			
 
				+
			
 
				+	m_tReader.SeekTo ( tBlock.m_tOffset, 0 );
			
 
				+	m_tReader.GetBytes ( dBlockHeader.Begin(), dBlockHeader.GetLength() );
			
 
				+
			
 
				+	MemoryReader2_c tReader ( dBlockHeader.Begin(), dBlockHeader.GetLength() );
			
 
				+
			
 
				+	CSphVector<int> dFieldSort;
			
 
				+	BYTE uBlockFlags = tReader.GetByte();
			
 
				+	if ( uBlockFlags & ~BLOCK_FLAG_FIELD_REORDER )
			
 
				+		m_tReporter.Fail ( "Unknown docstore big block flag (%u) in %s (offset " INT64_FMT ")", uBlockFlags, m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+	bool bNeedReorder = !!( uBlockFlags & BLOCK_FLAG_FIELD_REORDER );
			
 
				+	if ( bNeedReorder )
			
 
				+	{
			
 
				+		dFieldSort.Resize ( m_tFields.GetNumFields() );
			
 
				+		for ( auto & i : dFieldSort )
			
 
				+		{
			
 
				+			i = tReader.UnzipInt();
			
 
				+			if ( i<0 || i>m_tFields.GetNumFields() )
			
 
				+				m_tReporter.Fail ( "Error in docstore field remap (%d) in %s (offset " INT64_FMT ")", i, m_szFilename, tBlock.m_tOffset );
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for ( int i = 0; i < m_tFields.GetNumFields(); i++ )
			
 
				+	{
			
 
				+		int iField = bNeedReorder ? dFieldSort[i] : i;
			
 
				+		Docstore_c::FieldInfo_t & tInfo = dFieldInfo[iField];
			
 
				+
			
 
				+		tInfo.m_uFlags = tReader.GetByte();
			
 
				+		if ( tInfo.m_uFlags & (~(FIELD_FLAG_EMPTY | FIELD_FLAG_COMPRESSED) ) )
			
 
				+			m_tReporter.Fail ( "Unknown docstore big block field flag (%u) in %s (offset " INT64_FMT ")", tInfo.m_uFlags, m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+		if ( tInfo.m_uFlags & FIELD_FLAG_EMPTY )
			
 
				+			continue;
			
 
				+
			
 
				+		tInfo.m_uUncompressedLen = tReader.UnzipInt();
			
 
				+		if ( tInfo.m_uFlags & FIELD_FLAG_COMPRESSED )
			
 
				+			tInfo.m_uCompressedLen = tReader.UnzipInt();
			
 
				+
			
 
				+		if ( tInfo.m_uCompressedLen>tInfo.m_uUncompressedLen )
			
 
				+			m_tReporter.Fail ( "Docstore block size mismatch: compressed=%u, uncompressed=%u in %s (offset " INT64_FMT ")", tInfo.m_uCompressedLen, tInfo.m_uUncompressedLen, m_szFilename, tBlock.m_tOffset );
			
 
				+
			
 
				+		if ( tReader.GetPos() > tReader.GetLength() )
			
 
				+			m_tReporter.Fail ( "Out of bounds in docstore field data in %s (offset " INT64_FMT ")", m_szFilename, tBlock.m_tOffset );
			
 
				+	}
			
 
				+
			
 
				+	SphOffset_t tOffset = tBlock.m_tOffset+tBlock.m_uHeaderSize;
			
 
				+
			
 
				+	for ( int i = 0; i < m_tFields.GetNumFields(); i++ )
			
 
				+		CheckBigBlockField ( dFieldInfo[bNeedReorder ? dFieldSort[i] : i], tOffset );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DocstoreChecker_c::CheckBlock ( const Docstore_c::Block_t & tBlock )
			
 
				+{
			
 
				+	if ( tBlock.m_eType==BLOCK_TYPE_SMALL )
			
 
				+		CheckSmallBlock(tBlock);
			
 
				+	else
			
 
				+		CheckBigBlock(tBlock);
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				 Docstore_i * CreateDocstore ( const CSphString & sFilename, CSphString & sError )
			
 
				 {
			
 
				 	CSphScopedPtr<Docstore_c> pDocstore ( new Docstore_c(sFilename) );
			
@@ -1786,4 +2096,11 @@ void ShutdownDocstore()
 
				 {
			
 
				 	BlockCache_c::Done();
			
 
				 	DocstoreReaders_c::Done();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool CheckDocstore ( CSphAutoreader & tReader, DebugCheckError_c & tReporter )
			
 
				+{
			
 
				+	DocstoreChecker_c tChecker ( tReader, tReporter );
			
 
				+	return tChecker.Check();
			
 
				 }
			
--- a/src/docstore.h
+++ b/src/docstore.h
@@ -102,6 +102,10 @@ DocstoreFields_i *	CreateDocstoreFields();
 
				 void				InitDocstore ( int64_t iCacheSize );
			
 
				 void				ShutdownDocstore();
			
 
				 
			
 
				+class DebugCheckError_c;
			
 
				+class CSphAutoreader;
			
 
				+bool				CheckDocstore ( CSphAutoreader & tReader, DebugCheckError_c & tReporter );
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 //
			
--- a/src/gtests/gtests_tokenizer.cpp
+++ b/src/gtests/gtests_tokenizer.cpp
@@ -798,6 +798,8 @@ TEST_F ( QueryParser, soft_whitespace4 )
 
				 
			
 
				 
			
 
				 static CSphSourceStats g_tTmpDummyStat;
			
 
				+static FileAccessSettings_t g_tDummyFASettings;
			
 
				+
			
 
				 class CSphDummyIndex : public CSphIndex
			
 
				 {
			
 
				 public:
			
@@ -811,6 +813,7 @@ public:
 
				 	void				Dealloc () override {}
			
 
				 	void				Preread () override {}
			
 
				 	void				SetMemorySettings ( const FileAccessSettings_t & ) override {}
			
 
				+	const FileAccessSettings_t & GetMemorySettings() const override { return g_tDummyFASettings; }
			
 
				 	void				SetBase ( const char * ) override {}
			
 
				 	bool				Rename ( const char * ) override { return false; }
			
 
				 	bool				Lock () override { return true; }
			
--- a/src/index_converter.cpp
+++ b/src/index_converter.cpp
@@ -24,6 +24,7 @@
 
				 #include "sphinxstem.h"
			
 
				 #include "sphinxpq.h"
			
 
				 #include "accumulator.h"
			
 
				+#include "indexformat.h"
			
 
				 
			
 
				 namespace legacy
			
 
				 {
			
@@ -41,9 +42,6 @@ STATIC_SIZE_ASSERT ( SphDocID_t, 8 );
 
				 
			
 
				 const DWORD SPH_SKIPLIST_BLOCK=128;
			
 
				 
			
 
				-static const int MAX_KEYWORD_BYTES = SPH_MAX_WORD_LEN*3+4;
			
 
				-static const int DOCLIST_HINT_THRESH = 256;
			
 
				-
			
 
				 static const DWORD META_HEADER_MAGIC	= 0x54525053;	///< my magic 'SPRT' header
			
 
				 static const DWORD META_VERSION		= 14;			///< current version
			
 
				 
			
--- a/src/indexcheck.cpp
+++ b/src/indexcheck.cpp
@@ -0,0 +1,1338 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// Copyright (c) 2001-2016, Andrew Aksyonoff
			
 
				+// Copyright (c) 2008-2016, Sphinx Technologies Inc
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+
			
 
				+#include "indexcheck.h"
			
 
				+
			
 
				+#include "sphinxint.h"
			
 
				+#include "attribute.h"
			
 
				+#include "indexformat.h"
			
 
				+#include "secondaryindex.h"
			
 
				+#include "docstore.h"
			
 
				+
			
 
				+
			
 
				+DebugCheckError_c::DebugCheckError_c ( FILE * pFile )
			
 
				+	: m_pFile ( pFile )
			
 
				+{
			
 
				+	assert ( pFile );
			
 
				+	m_bProgress = isatty ( fileno ( pFile ) )!=0;
			
 
				+	m_tStartTime = sphMicroTimer();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DebugCheckError_c::Msg ( const char * szFmt, ... )
			
 
				+{
			
 
				+	assert ( m_pFile );
			
 
				+	va_list ap;
			
 
				+	va_start ( ap, szFmt );
			
 
				+	vfprintf ( m_pFile, szFmt, ap );
			
 
				+	fprintf ( m_pFile, "\n" );
			
 
				+	va_end ( ap );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool DebugCheckError_c::Fail ( const char * szFmt, ... )
			
 
				+{
			
 
				+	assert ( m_pFile );
			
 
				+	const int FAILS_THRESH = 100;
			
 
				+	if ( ++m_nFails>=FAILS_THRESH )
			
 
				+		return false;
			
 
				+
			
 
				+	va_list ap;
			
 
				+	va_start ( ap, szFmt );
			
 
				+	fprintf ( m_pFile, "FAILED, " );
			
 
				+	vfprintf ( m_pFile, szFmt, ap );
			
 
				+	if ( m_iSegment>=0 )
			
 
				+		fprintf ( m_pFile, " (segment: %d)", m_iSegment );
			
 
				+
			
 
				+	fprintf ( m_pFile, "\n" );
			
 
				+	va_end ( ap );
			
 
				+
			
 
				+	m_nFailsPrinted++;
			
 
				+	if ( m_nFailsPrinted==FAILS_THRESH )
			
 
				+		fprintf ( m_pFile, "(threshold reached; suppressing further output)\n" );
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DebugCheckError_c::Progress ( const char * szFmt, ... )
			
 
				+{
			
 
				+	if ( !m_bProgress )
			
 
				+		return;
			
 
				+
			
 
				+	assert ( m_pFile );
			
 
				+
			
 
				+	va_list ap;
			
 
				+	va_start ( ap, szFmt );
			
 
				+	vfprintf ( m_pFile, szFmt, ap );
			
 
				+	fprintf ( m_pFile, "\r" );
			
 
				+	va_end ( ap );
			
 
				+
			
 
				+	fflush ( m_pFile );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DebugCheckError_c::Done()
			
 
				+{
			
 
				+	assert ( m_pFile );
			
 
				+
			
 
				+	// well, no known kinds of failures, maybe some unknown ones
			
 
				+	int64_t tmCheck = sphMicroTimer() - m_tStartTime;
			
 
				+	if ( !m_nFails )
			
 
				+		fprintf ( m_pFile, "check passed" );
			
 
				+	else if ( m_nFails!=m_nFailsPrinted )
			
 
				+		fprintf ( m_pFile, "check FAILED, " INT64_FMT " of " INT64_FMT " failures reported", m_nFailsPrinted, m_nFails );
			
 
				+	else
			
 
				+		fprintf ( m_pFile, "check FAILED, " INT64_FMT " failures reported", m_nFails );
			
 
				+
			
 
				+	fprintf ( m_pFile, ", %d.%d sec elapsed\n", (int)(tmCheck/1000000), (int)((tmCheck/100000)%10) );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DebugCheckError_c::SetSegment ( int iSegment )
			
 
				+{
			
 
				+	m_iSegment = iSegment;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int64_t DebugCheckError_c::GetNumFails() const
			
 
				+{
			
 
				+	return m_nFails;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+class FileDebugCheckReader_c : public DebugCheckReader_i
			
 
				+{
			
 
				+public:
			
 
				+	FileDebugCheckReader_c ( CSphAutoreader * pReader )
			
 
				+		: m_pReader ( pReader )
			
 
				+	{}
			
 
				+
			
 
				+	~FileDebugCheckReader_c() final {}
			
 
				+
			
 
				+	int64_t GetLengthBytes() final
			
 
				+	{
			
 
				+		return ( m_pReader ? m_pReader->GetFilesize() : 0 );
			
 
				+	}
			
 
				+
			
 
				+	bool GetBytes ( void * pData, int iSize ) final
			
 
				+	{
			
 
				+		if ( !m_pReader )
			
 
				+			return false;
			
 
				+
			
 
				+		m_pReader->GetBytes ( pData, iSize );
			
 
				+		return !m_pReader->GetErrorFlag();
			
 
				+	}
			
 
				+
			
 
				+	bool SeekTo ( int64_t iOff, int iHint ) final
			
 
				+	{
			
 
				+		if ( !m_pReader )
			
 
				+			return false;
			
 
				+
			
 
				+		m_pReader->SeekTo ( iOff, iHint );
			
 
				+		return !m_pReader->GetErrorFlag();
			
 
				+	}
			
 
				+
			
 
				+private:
			
 
				+	CSphAutoreader * m_pReader = nullptr;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void DebugCheckHelper_c::DebugCheck_Attributes ( DebugCheckReader_i & tAttrs, DebugCheckReader_i & tBlobs, int64_t nRows, int64_t iMinMaxBytes, const CSphSchema & tSchema, DebugCheckError_c & tReporter ) const
			
 
				+{
			
 
				+	// empty?
			
 
				+	if ( !tAttrs.GetLengthBytes() )
			
 
				+		return;
			
 
				+
			
 
				+	tReporter.Msg ( "checking rows..." );
			
 
				+
			
 
				+	if ( !tSchema.GetAttrsCount() )
			
 
				+		tReporter.Fail ( "no attributes in schema; schema should at least have '%s' attr", sphGetDocidName() );
			
 
				+
			
 
				+	if ( tSchema.GetAttr(0).m_sName!=sphGetDocidName() )
			
 
				+		tReporter.Fail ( "first attribute in schema should be '%s'", tSchema.GetAttr(0).m_sName.cstr() );
			
 
				+
			
 
				+	if ( tSchema.GetAttr(0).m_eAttrType!=SPH_ATTR_BIGINT )
			
 
				+		tReporter.Fail ( "%s attribute should be BIGINT", sphGetDocidName() );
			
 
				+
			
 
				+	const CSphColumnInfo * pBlobLocator = nullptr;
			
 
				+	int nBlobAttrs = 0;
			
 
				+
			
 
				+	if ( tSchema.HasBlobAttrs() )
			
 
				+	{
			
 
				+		pBlobLocator = tSchema.GetAttr ( sphGetBlobLocatorName() );
			
 
				+
			
 
				+		if ( !pBlobLocator )
			
 
				+			tReporter.Fail ( "schema has blob attrs, but no blob locator '%s'", sphGetBlobLocatorName() );
			
 
				+
			
 
				+		if ( tSchema.GetAttr(1).m_sName!=sphGetBlobLocatorName() )
			
 
				+			tReporter.Fail ( "second attribute in schema should be '%s'", sphGetBlobLocatorName() );
			
 
				+
			
 
				+		if ( tSchema.GetAttr(1).m_eAttrType!=SPH_ATTR_BIGINT )
			
 
				+			tReporter.Fail ( "%s attribute should be BIGINT", sphGetBlobLocatorName() );
			
 
				+
			
 
				+		if ( !tBlobs.GetLengthBytes() )
			
 
				+			tReporter.Fail ( "schema has blob attrs, but blob file is empty" );
			
 
				+
			
 
				+		for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
			
 
				+			if ( sphIsBlobAttr(  tSchema.GetAttr(i).m_eAttrType ) )
			
 
				+				nBlobAttrs++;
			
 
				+	} else
			
 
				+	{
			
 
				+		if ( tBlobs.GetLengthBytes() )
			
 
				+			tReporter.Fail ( "schema has no blob attrs but has blob rows" );
			
 
				+	}
			
 
				+
			
 
				+	// sizes and counts
			
 
				+	DWORD uStride = tSchema.GetRowSize();
			
 
				+
			
 
				+	int64_t iAttrElemCount = ( tAttrs.GetLengthBytes() - iMinMaxBytes ) / sizeof(CSphRowitem);
			
 
				+	int64_t iAttrExpected = nRows*uStride;
			
 
				+	if ( iAttrExpected > iAttrElemCount )
			
 
				+		tReporter.Fail ( "rowitems count mismatch (expected=" INT64_FMT ", loaded=" INT64_FMT ")", iAttrExpected, iAttrElemCount );
			
 
				+
			
 
				+	CSphVector<CSphAttrLocator> dFloatItems;
			
 
				+	for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
			
 
				+	{
			
 
				+		const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
			
 
				+		if ( tAttr.m_eAttrType==SPH_ATTR_FLOAT )
			
 
				+			dFloatItems.Add	( tAttr.m_tLocator );
			
 
				+	}
			
 
				+
			
 
				+	CSphFixedVector<CSphRowitem> dRow ( tSchema.GetRowSize() );
			
 
				+	const CSphRowitem * pRow = dRow.Begin();
			
 
				+	tAttrs.SeekTo ( 0, dRow.GetLengthBytes() );
			
 
				+
			
 
				+	for ( int64_t iRow=0; iRow<nRows; iRow++ )
			
 
				+	{
			
 
				+		tAttrs.GetBytes ( dRow.Begin(), dRow.GetLengthBytes() );
			
 
				+		DocID_t tDocID = sphGetDocID(pRow);
			
 
				+
			
 
				+		///////////////////////////
			
 
				+		// check blobs
			
 
				+		///////////////////////////
			
 
				+
			
 
				+		if ( pBlobLocator )
			
 
				+		{
			
 
				+			int64_t iBlobOffset1 = sphGetBlobRowOffset(pRow);
			
 
				+			int64_t iBlobOffset2 = sphGetRowAttr ( pRow, pBlobLocator->m_tLocator );
			
 
				+
			
 
				+			if ( iBlobOffset1!=iBlobOffset2 )
			
 
				+				tReporter.Fail ( "blob row locator mismatch (row=" INT64_FMT ", docid=" INT64_FMT ", offset1=" INT64_FMT ", offset2=" INT64_FMT ", rowid=" INT64_FMT " of " INT64_FMT ")",
			
 
				+					iRow, tDocID, iBlobOffset1, iBlobOffset2, iRow, nRows );
			
 
				+
			
 
				+			CSphString sError;
			
 
				+			if ( !sphCheckBlobRow ( iBlobOffset1, tBlobs, tSchema, sError ) )
			
 
				+				tReporter.Fail ( "%s at offset " INT64_FMT ", docid=" INT64_FMT ", rowid=" INT64_FMT " of " INT64_FMT, sError.cstr(), iBlobOffset1, tDocID, iRow, nRows );
			
 
				+		}
			
 
				+
			
 
				+		///////////////////////////
			
 
				+		// check floats
			
 
				+		///////////////////////////
			
 
				+
			
 
				+		ARRAY_FOREACH ( iItem, dFloatItems )
			
 
				+		{
			
 
				+			const DWORD uValue = (DWORD)sphGetRowAttr ( pRow, dFloatItems[ iItem ] );
			
 
				+			const DWORD uExp = ( uValue >> 23 ) & 0xff;
			
 
				+			const DWORD uMantissa = uValue & 0x003fffff;
			
 
				+
			
 
				+			// check normalized
			
 
				+			if ( uExp==0 && uMantissa!=0 )
			
 
				+				tReporter.Fail ( "float attribute value is unnormalized (row=" INT64_FMT ", attr=%d, id=" INT64_FMT ", raw=0x%x, value=%f)", 	iRow, iItem, tDocID, uValue, sphDW2F ( uValue ) );
			
 
				+
			
 
				+			// check +-inf
			
 
				+			if ( uExp==0xff && uMantissa==0 )
			
 
				+				tReporter.Fail ( "float attribute is infinity (row=" INT64_FMT ", attr=%d, id=" INT64_FMT ", raw=0x%x, value=%f)", iRow, iItem, tDocID, uValue, sphDW2F ( uValue ) );
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DebugCheckHelper_c::DebugCheck_DeadRowMap ( int64_t iSizeBytes, int64_t nRows, DebugCheckError_c & tReporter ) const
			
 
				+{
			
 
				+	tReporter.Msg ( "checking dead row map..." );
			
 
				+
			
 
				+	int64_t nExpectedEntries = int(( nRows+31 ) / 32);
			
 
				+	int64_t iExpectedSize = nExpectedEntries*sizeof(DWORD);
			
 
				+	if ( iSizeBytes!=iExpectedSize )
			
 
				+		tReporter.Fail ( "unexpected dead row map: " INT64_FMT ", expected: " INT64_FMT " bytes", iSizeBytes, iExpectedSize );
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+class DiskIndexChecker_c : public DiskIndexChecker_i, public DebugCheckHelper_c
			
 
				+{
			
 
				+public:
			
 
				+			DiskIndexChecker_c ( CSphIndex & tIndex, DebugCheckError_c & tReporter );
			
 
				+
			
 
				+	bool	OpenFiles ( CSphString & sError ) final;
			
 
				+	void	Setup ( int64_t iNumRows, int64_t iDocinfoIndex, int64_t iMinMaxIndex, bool bCheckIdDups ) final;
			
 
				+	CSphVector<SphWordID_t> & GetHitlessWords() final { return m_dHitlessWords; }
			
 
				+
			
 
				+	void	Check() final;
			
 
				+
			
 
				+private:
			
 
				+	CSphIndex &				m_tIndex;
			
 
				+	CSphAutoreader			m_tDictReader;
			
 
				+	DataReaderFactoryPtr_c	m_pDocsReader;
			
 
				+	DataReaderFactoryPtr_c	m_pHitsReader;
			
 
				+	CSphAutoreader			m_tSkipsReader;
			
 
				+	CSphAutoreader			m_tDeadRowReader;
			
 
				+	CSphAutoreader			m_tAttrReader;
			
 
				+	CSphAutoreader			m_tBlobReader;
			
 
				+	CSphAutoreader			m_tDocstoreReader;
			
 
				+	CSphVector<SphWordID_t> m_dHitlessWords;
			
 
				+
			
 
				+	DebugCheckError_c &		m_tReporter;
			
 
				+
			
 
				+	bool					m_bHasBlobs = false;
			
 
				+	bool					m_bHasDocstore = false;
			
 
				+	bool					m_bIsEmpty = false;
			
 
				+	DWORD					m_uVersion = 0;
			
 
				+	int64_t					m_iNumRows = 0;
			
 
				+	int64_t					m_iDocinfoIndex = 0;
			
 
				+	int64_t					m_iMinMaxIndex = 0;
			
 
				+	bool					m_bCheckIdDups = false;
			
 
				+	CSphSchema				m_tSchema;
			
 
				+	CWordlist				m_tWordlist;
			
 
				+
			
 
				+	void	CheckDictionary();
			
 
				+	void	CheckDocs();
			
 
				+	void	CheckAttributes();
			
 
				+	void	CheckKillList() const;
			
 
				+	void	CheckBlockIndex();
			
 
				+	void	CheckDocidLookup();
			
 
				+	void	CheckDocids();
			
 
				+	void	CheckDocstore();
			
 
				+	
			
 
				+	bool		ReadHeader ( CSphString & sError );
			
 
				+	CSphString	GetFilename ( ESphExt eExt ) const;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+DiskIndexChecker_c::DiskIndexChecker_c ( CSphIndex & tIndex, DebugCheckError_c & tReporter )
			
 
				+	: m_tIndex ( tIndex )
			
 
				+	, m_tReporter ( tReporter )
			
 
				+{}
			
 
				+
			
 
				+
			
 
				+bool DiskIndexChecker_c::ReadHeader ( CSphString & sError )
			
 
				+{
			
 
				+	CSphAutoreader tHeaderReader;
			
 
				+	if ( !tHeaderReader.Open ( GetFilename(SPH_EXT_SPH), sError ) )
			
 
				+		return false;
			
 
				+
			
 
				+	const char * szHeader = tHeaderReader.GetFilename().cstr();
			
 
				+
			
 
				+	// magic header
			
 
				+	const char * szFmt = CheckFmtMagic ( tHeaderReader.GetDword() );
			
 
				+	if ( szFmt )
			
 
				+	{
			
 
				+		sError.SetSprintf ( szFmt, szHeader );
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	// version
			
 
				+	m_uVersion = tHeaderReader.GetDword();
			
 
				+	if ( m_uVersion<=1 || m_uVersion>INDEX_FORMAT_VERSION )
			
 
				+	{
			
 
				+		sError.SetSprintf ( "%s is v.%d, binary is v.%d", szHeader, m_uVersion, INDEX_FORMAT_VERSION );
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	// we don't support anything prior to v54
			
 
				+	DWORD uMinFormatVer = 54;
			
 
				+	if ( m_uVersion<uMinFormatVer )
			
 
				+	{
			
 
				+		sError.SetSprintf ( "indexes prior to v.%d are no longer supported (use index_converter tool); %s is v.%d", uMinFormatVer, szHeader, m_uVersion );
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	// schema
			
 
				+	ReadSchema ( tHeaderReader, m_tSchema, m_uVersion );
			
 
				+
			
 
				+	// dictionary header (wordlist checkpoints, infix blocks, etc)
			
 
				+	m_tWordlist.m_iDictCheckpointsOffset = tHeaderReader.GetOffset();
			
 
				+	m_tWordlist.m_iDictCheckpoints = tHeaderReader.GetDword();
			
 
				+	m_tWordlist.m_iInfixCodepointBytes = tHeaderReader.GetByte();
			
 
				+	m_tWordlist.m_iInfixBlocksOffset = tHeaderReader.GetDword();
			
 
				+	m_tWordlist.m_iInfixBlocksWordsSize = tHeaderReader.GetDword();
			
 
				+
			
 
				+	m_tWordlist.m_dCheckpoints.Reset ( m_tWordlist.m_iDictCheckpoints );
			
 
				+
			
 
				+	if ( !m_tWordlist.Preread ( GetFilename(SPH_EXT_SPI).cstr(), m_tIndex.GetDictionary()->GetSettings().m_bWordDict, m_tIndex.GetSettings().m_iSkiplistBlockSize, sError ) )
			
 
				+		return false;
			
 
				+
			
 
				+	// FIXME! add more header checks
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool DiskIndexChecker_c::OpenFiles ( CSphString & sError )
			
 
				+{
			
 
				+	if ( !ReadHeader(sError) )
			
 
				+		return m_tReporter.Fail ( "error reading index header: %s", sError.cstr() );
			
 
				+
			
 
				+	if ( !m_tDictReader.Open ( GetFilename(SPH_EXT_SPI), sError ) )
			
 
				+		return m_tReporter.Fail ( "unable to open dictionary: %s", sError.cstr() );
			
 
				+
			
 
				+	// use file reader during debug check to lower memory pressure
			
 
				+	m_pDocsReader = NewProxyReader ( GetFilename(SPH_EXT_SPD), sError, DataReaderFactory_c::DOCS, m_tIndex.GetMemorySettings().m_iReadBufferDocList, FileAccess_e::FILE );
			
 
				+	if ( !m_pDocsReader )
			
 
				+		return m_tReporter.Fail ( "unable to open doclist: %s", sError.cstr() );
			
 
				+
			
 
				+	// use file reader during debug check to lower memory pressure
			
 
				+	m_pHitsReader = NewProxyReader ( GetFilename(SPH_EXT_SPP), sError, DataReaderFactory_c::HITS, m_tIndex.GetMemorySettings().m_iReadBufferHitList, FileAccess_e::FILE );
			
 
				+	if ( !m_pHitsReader )
			
 
				+		return m_tReporter.Fail ( "unable to open hitlist: %s", sError.cstr() );
			
 
				+
			
 
				+	if ( !m_tSkipsReader.Open ( GetFilename(SPH_EXT_SPE), sError ) )
			
 
				+		return m_tReporter.Fail ( "unable to open skiplist: %s", sError.cstr () );
			
 
				+
			
 
				+	if ( !m_tDeadRowReader.Open ( GetFilename(SPH_EXT_SPM).cstr(), sError ) )
			
 
				+		return m_tReporter.Fail ( "unable to open dead-row map: %s", sError.cstr() );
			
 
				+
			
 
				+	if ( !m_tAttrReader.Open ( GetFilename(SPH_EXT_SPA).cstr(), sError ) )
			
 
				+		return m_tReporter.Fail ( "unable to open attributes: %s", sError.cstr() );
			
 
				+
			
 
				+	if ( m_tSchema.GetAttr ( sphGetBlobLocatorName() ) )
			
 
				+	{
			
 
				+		if ( !m_tBlobReader.Open ( GetFilename(SPH_EXT_SPB), sError ) )
			
 
				+			return m_tReporter.Fail ( "unable to open blobs: %s", sError.cstr() );
			
 
				+
			
 
				+		m_bHasBlobs = true;
			
 
				+	}
			
 
				+
			
 
				+	if ( m_uVersion>=57 && m_tSchema.HasStoredFields() )
			
 
				+	{
			
 
				+		if ( !m_tDocstoreReader.Open ( GetFilename(SPH_EXT_SPDS).cstr(), sError ) )
			
 
				+			return m_tReporter.Fail ( "unable to open docstore: %s", sError.cstr() );
			
 
				+
			
 
				+		m_bHasDocstore = true;
			
 
				+	}
			
 
				+
			
 
				+	CSphAutofile tDocinfo ( GetFilename(SPH_EXT_SPA), SPH_O_READ, sError );
			
 
				+	if ( tDocinfo.GetFD()<0 )
			
 
				+		return false;
			
 
				+
			
 
				+	m_bIsEmpty = m_tAttrReader.GetFilesize()==0;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::Setup ( int64_t iNumRows, int64_t iDocinfoIndex, int64_t iMinMaxIndex, bool bCheckIdDups )
			
 
				+{
			
 
				+	m_iNumRows = iNumRows;
			
 
				+	m_iDocinfoIndex = iDocinfoIndex;
			
 
				+	m_iMinMaxIndex = iMinMaxIndex;
			
 
				+	m_bCheckIdDups = bCheckIdDups;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::Check()
			
 
				+{
			
 
				+	CheckDictionary();
			
 
				+	CheckDocs();
			
 
				+	CheckAttributes();
			
 
				+	CheckBlockIndex();
			
 
				+	CheckKillList();
			
 
				+	CheckDocstore();
			
 
				+
			
 
				+	DebugCheck_DeadRowMap ( m_tDeadRowReader.GetFilesize(), m_iNumRows, m_tReporter );
			
 
				+	CheckDocidLookup();
			
 
				+
			
 
				+	if ( m_bCheckIdDups )
			
 
				+		CheckDocids();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckDictionary()
			
 
				+{
			
 
				+	m_tReporter.Msg ( "checking dictionary..." );
			
 
				+
			
 
				+	const CSphIndexSettings & tIndexSettings = m_tIndex.GetSettings();
			
 
				+
			
 
				+	SphWordID_t uWordid = 0;
			
 
				+	int64_t iDoclistOffset = 0;
			
 
				+	int iWordsTotal = 0;
			
 
				+
			
 
				+	char sWord[MAX_KEYWORD_BYTES], sLastWord[MAX_KEYWORD_BYTES];
			
 
				+	memset ( sWord, 0, sizeof(sWord) );
			
 
				+	memset ( sLastWord, 0, sizeof(sLastWord) );
			
 
				+
			
 
				+	const int iWordPerCP = SPH_WORDLIST_CHECKPOINT;
			
 
				+	const bool bWordDict = m_tIndex.GetDictionary()->GetSettings().m_bWordDict;
			
 
				+
			
 
				+	CSphVector<CSphWordlistCheckpoint> dCheckpoints;
			
 
				+	dCheckpoints.Reserve ( m_tWordlist.m_iDictCheckpoints );
			
 
				+	CSphVector<char> dCheckpointWords;
			
 
				+
			
 
				+	CSphAutoreader & tDictReader = m_tDictReader;
			
 
				+
			
 
				+	tDictReader.GetByte();
			
 
				+	int iLastSkipsOffset = 0;
			
 
				+	SphOffset_t iWordsEnd = m_tWordlist.GetWordsEnd();
			
 
				+
			
 
				+	while ( tDictReader.GetPos()!=iWordsEnd && !m_bIsEmpty )
			
 
				+	{
			
 
				+		// sanity checks
			
 
				+		if ( tDictReader.GetPos()>=iWordsEnd )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "reading past checkpoints" );
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		// store current entry pos (for checkpointing later), read next delta
			
 
				+		const int64_t iDictPos = tDictReader.GetPos();
			
 
				+		SphWordID_t iDeltaWord = 0;
			
 
				+		if ( bWordDict )
			
 
				+			iDeltaWord = tDictReader.GetByte();
			
 
				+		else
			
 
				+			iDeltaWord = tDictReader.UnzipWordid();
			
 
				+
			
 
				+		// checkpoint encountered, handle it
			
 
				+		if ( !iDeltaWord )
			
 
				+		{
			
 
				+			tDictReader.UnzipOffset();
			
 
				+
			
 
				+			if ( ( iWordsTotal%iWordPerCP )!=0 && tDictReader.GetPos()!=iWordsEnd )
			
 
				+				m_tReporter.Fail ( "unexpected checkpoint (pos=" INT64_FMT ", word=%d, words=%d, expected=%d)", iDictPos, iWordsTotal, ( iWordsTotal%iWordPerCP ), iWordPerCP );
			
 
				+
			
 
				+			uWordid = 0;
			
 
				+			iDoclistOffset = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		SphWordID_t uNewWordid = 0;
			
 
				+		SphOffset_t iNewDoclistOffset = 0;
			
 
				+		int iDocs = 0;
			
 
				+		int iHits = 0;
			
 
				+		bool bHitless = false;
			
 
				+
			
 
				+		if ( bWordDict )
			
 
				+		{
			
 
				+			// unpack next word
			
 
				+			// must be in sync with DictEnd()!
			
 
				+			BYTE uPack = (BYTE)iDeltaWord;
			
 
				+			int iMatch, iDelta;
			
 
				+			if ( uPack & 0x80 )
			
 
				+			{
			
 
				+				iDelta = ( ( uPack>>4 ) & 7 ) + 1;
			
 
				+				iMatch = uPack & 15;
			
 
				+			} else
			
 
				+			{
			
 
				+				iDelta = uPack & 127;
			
 
				+				iMatch = tDictReader.GetByte();
			
 
				+			}
			
 
				+			const int iLastWordLen = strlen(sLastWord);
			
 
				+			if ( iMatch+iDelta>=(int)sizeof(sLastWord)-1 || iMatch>iLastWordLen )
			
 
				+			{
			
 
				+				m_tReporter.Fail ( "wrong word-delta (pos=" INT64_FMT ", word=%s, len=%d, begin=%d, delta=%d)", iDictPos, sLastWord, iLastWordLen, iMatch, iDelta );
			
 
				+				tDictReader.SkipBytes ( iDelta );
			
 
				+			} else
			
 
				+			{
			
 
				+				tDictReader.GetBytes ( sWord+iMatch, iDelta );
			
 
				+				sWord [ iMatch+iDelta ] = '\0';
			
 
				+			}
			
 
				+
			
 
				+			iNewDoclistOffset = tDictReader.UnzipOffset();
			
 
				+			iDocs = tDictReader.UnzipInt();
			
 
				+			iHits = tDictReader.UnzipInt();
			
 
				+			int iHint = 0;
			
 
				+			if ( iDocs>=DOCLIST_HINT_THRESH )
			
 
				+				iHint = tDictReader.GetByte();
			
 
				+
			
 
				+			iHint = DoclistHintUnpack ( iDocs, (BYTE)iHint );
			
 
				+
			
 
				+			if ( m_tIndex.GetSettings().m_eHitless==SPH_HITLESS_SOME && ( iDocs & HITLESS_DOC_FLAG )!=0 )
			
 
				+			{
			
 
				+				iDocs = ( iDocs & HITLESS_DOC_MASK );
			
 
				+				bHitless = true;
			
 
				+			}
			
 
				+
			
 
				+			const int iNewWordLen = strlen(sWord);
			
 
				+
			
 
				+			if ( iNewWordLen==0 )
			
 
				+				m_tReporter.Fail ( "empty word in dictionary (pos=" INT64_FMT ")", iDictPos );
			
 
				+
			
 
				+			if ( iLastWordLen && iNewWordLen )
			
 
				+				if ( sphDictCmpStrictly ( sWord, iNewWordLen, sLastWord, iLastWordLen )<=0 )
			
 
				+					m_tReporter.Fail ( "word order decreased (pos=" INT64_FMT ", word=%s, prev=%s)", iDictPos, sLastWord, sWord );
			
 
				+
			
 
				+			if ( iHint<0 )
			
 
				+				m_tReporter.Fail ( "invalid word hint (pos=" INT64_FMT ", word=%s, hint=%d)", iDictPos, sWord, iHint );
			
 
				+
			
 
				+			if ( iDocs<=0 || iHits<=0 || iHits<iDocs )
			
 
				+				m_tReporter.Fail ( "invalid docs/hits (pos=" INT64_FMT ", word=%s, docs=" INT64_FMT ", hits=" INT64_FMT ")", (int64_t)iDictPos, sWord, (int64_t)iDocs, (int64_t)iHits );
			
 
				+
			
 
				+			memcpy ( sLastWord, sWord, sizeof(sLastWord) );
			
 
				+		} else
			
 
				+		{
			
 
				+			// finish reading the entire entry
			
 
				+			uNewWordid = uWordid + iDeltaWord;
			
 
				+			iNewDoclistOffset = iDoclistOffset + tDictReader.UnzipOffset();
			
 
				+			iDocs = tDictReader.UnzipInt();
			
 
				+			iHits = tDictReader.UnzipInt();
			
 
				+			bHitless = ( m_dHitlessWords.BinarySearch ( uNewWordid )!=NULL );
			
 
				+			if ( bHitless )
			
 
				+				iDocs = ( iDocs & HITLESS_DOC_MASK );
			
 
				+
			
 
				+			if ( uNewWordid<=uWordid )
			
 
				+				m_tReporter.Fail ( "wordid decreased (pos=" INT64_FMT ", wordid=" UINT64_FMT ", previd=" UINT64_FMT ")", (int64_t)iDictPos, (uint64_t)uNewWordid, (uint64_t)uWordid );
			
 
				+
			
 
				+			if ( iNewDoclistOffset<=iDoclistOffset )
			
 
				+				m_tReporter.Fail ( "doclist offset decreased (pos=" INT64_FMT ", wordid=" UINT64_FMT ")", (int64_t)iDictPos, (uint64_t)uNewWordid );
			
 
				+
			
 
				+			if ( iDocs<=0 || iHits<=0 || iHits<iDocs )
			
 
				+				m_tReporter.Fail ( "invalid docs/hits (pos=" INT64_FMT ", wordid=" UINT64_FMT ", docs=" INT64_FMT ", hits=" INT64_FMT ", hitless=%s)",
			
 
				+					(int64_t)iDictPos, (uint64_t)uNewWordid, (int64_t)iDocs, (int64_t)iHits, ( bHitless?"true":"false" ) );
			
 
				+		}
			
 
				+
			
 
				+		assert ( tIndexSettings.m_iSkiplistBlockSize>0 );
			
 
				+
			
 
				+		// skiplist
			
 
				+		if ( iDocs>tIndexSettings.m_iSkiplistBlockSize && !bHitless )
			
 
				+		{
			
 
				+			int iSkipsOffset = tDictReader.UnzipInt();
			
 
				+			if ( !bWordDict && iSkipsOffset<iLastSkipsOffset )
			
 
				+				m_tReporter.Fail ( "descending skiplist pos (last=%d, cur=%d, wordid=%llu)", iLastSkipsOffset, iSkipsOffset, UINT64 ( uNewWordid ) );
			
 
				+
			
 
				+			iLastSkipsOffset = iSkipsOffset;
			
 
				+		}
			
 
				+
			
 
				+		// update stats, add checkpoint
			
 
				+		if ( ( iWordsTotal%iWordPerCP )==0 )
			
 
				+		{
			
 
				+			CSphWordlistCheckpoint & tCP = dCheckpoints.Add();
			
 
				+			tCP.m_iWordlistOffset = iDictPos;
			
 
				+
			
 
				+			if ( bWordDict )
			
 
				+			{
			
 
				+				const int iLen = strlen ( sWord );
			
 
				+				char * sArenaWord = dCheckpointWords.AddN ( iLen + 1 );
			
 
				+				memcpy ( sArenaWord, sWord, iLen );
			
 
				+				sArenaWord[iLen] = '\0';
			
 
				+				tCP.m_uWordID = sArenaWord - dCheckpointWords.Begin();
			
 
				+			} else
			
 
				+				tCP.m_uWordID = uNewWordid;
			
 
				+		}
			
 
				+
			
 
				+		// TODO add back infix checking
			
 
				+
			
 
				+		uWordid = uNewWordid;
			
 
				+		iDoclistOffset = iNewDoclistOffset;
			
 
				+		iWordsTotal++;
			
 
				+	}
			
 
				+
			
 
				+	// check the checkpoints
			
 
				+	if ( dCheckpoints.GetLength()!=m_tWordlist.m_iDictCheckpoints )
			
 
				+		m_tReporter.Fail ( "checkpoint count mismatch (read=%d, calc=%d)", m_tWordlist.m_iDictCheckpoints, dCheckpoints.GetLength() );
			
 
				+
			
 
				+	m_tWordlist.DebugPopulateCheckpoints();
			
 
				+	for ( int i=0; i < Min ( dCheckpoints.GetLength(), m_tWordlist.m_iDictCheckpoints ); i++ )
			
 
				+	{
			
 
				+		CSphWordlistCheckpoint tRefCP = dCheckpoints[i];
			
 
				+		const CSphWordlistCheckpoint & tCP = m_tWordlist.m_dCheckpoints[i];
			
 
				+		const int iLen = bWordDict ? strlen ( tCP.m_sWord ) : 0;
			
 
				+		if ( bWordDict )
			
 
				+			tRefCP.m_sWord = dCheckpointWords.Begin() + tRefCP.m_uWordID;
			
 
				+		if ( bWordDict && ( tRefCP.m_sWord[0]=='\0' || tCP.m_sWord[0]=='\0' ) )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "empty checkpoint %d (read_word=%s, read_len=%u, readpos=" INT64_FMT ", calc_word=%s, calc_len=%u, calcpos=" INT64_FMT ")",
			
 
				+				i, tCP.m_sWord, (DWORD)strlen ( tCP.m_sWord ), (int64_t)tCP.m_iWordlistOffset,
			
 
				+				tRefCP.m_sWord, (DWORD)strlen ( tRefCP.m_sWord ), (int64_t)tRefCP.m_iWordlistOffset );
			
 
				+
			
 
				+		} else if ( sphCheckpointCmpStrictly ( tCP.m_sWord, iLen, tCP.m_uWordID, bWordDict, tRefCP ) || tRefCP.m_iWordlistOffset!=tCP.m_iWordlistOffset )
			
 
				+		{
			
 
				+			if ( bWordDict )
			
 
				+			{
			
 
				+				m_tReporter.Fail ( "checkpoint %d differs (read_word=%s, readpos=" INT64_FMT ", calc_word=%s, calcpos=" INT64_FMT ")",
			
 
				+					i,
			
 
				+					tCP.m_sWord,
			
 
				+					(int64_t)tCP.m_iWordlistOffset,
			
 
				+					tRefCP.m_sWord,
			
 
				+					(int64_t)tRefCP.m_iWordlistOffset );
			
 
				+			} else
			
 
				+			{
			
 
				+				m_tReporter.Fail ( "checkpoint %d differs (readid=" UINT64_FMT ", readpos=" INT64_FMT ", calcid=" UINT64_FMT ", calcpos=" INT64_FMT ")",
			
 
				+					i,
			
 
				+					(uint64_t)tCP.m_uWordID,
			
 
				+					(int64_t)tCP.m_iWordlistOffset,
			
 
				+					(uint64_t)tRefCP.m_uWordID,
			
 
				+					(int64_t)tRefCP.m_iWordlistOffset );
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	dCheckpoints.Reset();
			
 
				+	dCheckpointWords.Reset();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckDocs()
			
 
				+{
			
 
				+	const CSphIndexSettings & tIndexSettings = m_tIndex.GetSettings();
			
 
				+
			
 
				+	m_tReporter.Msg ( "checking data..." );
			
 
				+
			
 
				+	int64_t iDocsSize = m_pDocsReader->GetFilesize();
			
 
				+	int64_t iSkiplistLen = m_tSkipsReader.GetFilesize();
			
 
				+
			
 
				+	m_tDictReader.SeekTo ( 1, READ_NO_SIZE_HINT );
			
 
				+	m_pDocsReader->SeekTo ( 1 );
			
 
				+	m_pHitsReader->SeekTo ( 1 );
			
 
				+
			
 
				+	SphWordID_t uWordid = 0;
			
 
				+	int64_t iDoclistOffset = 0;
			
 
				+	int iDictDocs, iDictHits;
			
 
				+	bool bHitless = false;
			
 
				+
			
 
				+	const bool bWordDict = m_tIndex.GetDictionary()->GetSettings().m_bWordDict;
			
 
				+
			
 
				+	char sWord[MAX_KEYWORD_BYTES];
			
 
				+	memset ( sWord, 0, sizeof(sWord) );
			
 
				+
			
 
				+	int iWordsChecked = 0;
			
 
				+	int iWordsTotal = 0;
			
 
				+
			
 
				+	SphOffset_t iWordsEnd = m_tWordlist.GetWordsEnd();
			
 
				+	while ( m_tDictReader.GetPos()<iWordsEnd )
			
 
				+	{
			
 
				+		bHitless = false;
			
 
				+		SphWordID_t iDeltaWord = 0;
			
 
				+		if ( bWordDict )
			
 
				+			iDeltaWord = m_tDictReader.GetByte();
			
 
				+		else
			
 
				+			iDeltaWord = m_tDictReader.UnzipWordid();
			
 
				+
			
 
				+		if ( !iDeltaWord )
			
 
				+		{
			
 
				+			m_tDictReader.UnzipOffset();
			
 
				+
			
 
				+			uWordid = 0;
			
 
				+			iDoclistOffset = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if ( bWordDict )
			
 
				+		{
			
 
				+			// unpack next word
			
 
				+			// must be in sync with DictEnd()!
			
 
				+			BYTE uPack = (BYTE)iDeltaWord;
			
 
				+
			
 
				+			int iMatch, iDelta;
			
 
				+			if ( uPack & 0x80 )
			
 
				+			{
			
 
				+				iDelta = ( ( uPack>>4 ) & 7 ) + 1;
			
 
				+				iMatch = uPack & 15;
			
 
				+			} else
			
 
				+			{
			
 
				+				iDelta = uPack & 127;
			
 
				+				iMatch = m_tDictReader.GetByte();
			
 
				+			}
			
 
				+			const int iLastWordLen = strlen(sWord);
			
 
				+			if ( iMatch+iDelta>=(int)sizeof(sWord)-1 || iMatch>iLastWordLen )
			
 
				+				m_tDictReader.SkipBytes ( iDelta );
			
 
				+			else
			
 
				+			{
			
 
				+				m_tDictReader.GetBytes ( sWord+iMatch, iDelta );
			
 
				+				sWord [ iMatch+iDelta ] = '\0';
			
 
				+			}
			
 
				+
			
 
				+			iDoclistOffset = m_tDictReader.UnzipOffset();
			
 
				+			iDictDocs = m_tDictReader.UnzipInt();
			
 
				+			iDictHits = m_tDictReader.UnzipInt();
			
 
				+			if ( iDictDocs>=DOCLIST_HINT_THRESH )
			
 
				+				m_tDictReader.GetByte();
			
 
				+
			
 
				+			if ( tIndexSettings.m_eHitless==SPH_HITLESS_SOME && ( iDictDocs & HITLESS_DOC_FLAG ) )
			
 
				+			{
			
 
				+				iDictDocs = ( iDictDocs & HITLESS_DOC_MASK );
			
 
				+				bHitless = true;
			
 
				+			}
			
 
				+		} else
			
 
				+		{
			
 
				+			// finish reading the entire entry
			
 
				+			uWordid = uWordid + iDeltaWord;
			
 
				+			bHitless = ( m_dHitlessWords.BinarySearch ( uWordid )!=NULL );
			
 
				+			iDoclistOffset = iDoclistOffset + m_tDictReader.UnzipOffset();
			
 
				+			iDictDocs = m_tDictReader.UnzipInt();
			
 
				+			if ( bHitless )
			
 
				+				iDictDocs = ( iDictDocs & HITLESS_DOC_MASK );
			
 
				+			iDictHits = m_tDictReader.UnzipInt();
			
 
				+		}
			
 
				+
			
 
				+		int64_t iSkipsOffset = 0;
			
 
				+		if ( iDictDocs>tIndexSettings.m_iSkiplistBlockSize && !bHitless )
			
 
				+		{
			
 
				+			if ( m_uVersion<=57 )
			
 
				+				iSkipsOffset = (int)m_tDictReader.UnzipInt();
			
 
				+			else
			
 
				+				iSkipsOffset = m_tDictReader.UnzipOffset();
			
 
				+		}
			
 
				+
			
 
				+		// check whether the offset is as expected
			
 
				+		if ( iDoclistOffset!=m_pDocsReader->GetPos() )
			
 
				+		{
			
 
				+			if ( !bWordDict )
			
 
				+				m_tReporter.Fail ( "unexpected doclist offset (wordid=" UINT64_FMT "(%s)(%d), dictpos=" INT64_FMT ", doclistpos=" INT64_FMT ")",
			
 
				+					(uint64_t)uWordid, sWord, iWordsChecked, iDoclistOffset, (int64_t) m_pDocsReader->GetPos() );
			
 
				+
			
 
				+			if ( iDoclistOffset>=iDocsSize || iDoclistOffset<0 )
			
 
				+			{
			
 
				+				m_tReporter.Fail ( "unexpected doclist offset, off the file (wordid=" UINT64_FMT "(%s)(%d), dictpos=" INT64_FMT ", doclistsize=" INT64_FMT ")",
			
 
				+					(uint64_t)uWordid, sWord, iWordsChecked, iDoclistOffset, iDocsSize );
			
 
				+				iWordsChecked++;
			
 
				+				continue;
			
 
				+			} else
			
 
				+				m_pDocsReader->SeekTo ( iDoclistOffset );
			
 
				+		}
			
 
				+
			
 
				+		// create and manually setup doclist reader
			
 
				+		DiskIndexQwordTraits_c * pQword = sphCreateDiskIndexQword ( tIndexSettings.m_eHitFormat==SPH_HIT_FORMAT_INLINE );
			
 
				+
			
 
				+		pQword->m_tDoc.Reset ( m_tSchema.GetDynamicSize() );
			
 
				+		pQword->m_tDoc.m_tRowID = INVALID_ROWID;
			
 
				+		pQword->m_iDocs = 0;
			
 
				+		pQword->m_iHits = 0;
			
 
				+		pQword->SetDocReader ( m_pDocsReader );
			
 
				+//		pQword->m_rdDoclist.SeekTo ( tDocsReader.GetPos(), READ_NO_SIZE_HINT );
			
 
				+		pQword->SetHitReader ( m_pHitsReader );
			
 
				+//		pQword->m_rdHitlist.SeekTo ( tHitsReader.GetPos(), READ_NO_SIZE_HINT );
			
 
				+
			
 
				+		// loop the doclist
			
 
				+		int iDoclistDocs = 0;
			
 
				+		int iDoclistHits = 0;
			
 
				+		int iHitlistHits = 0;
			
 
				+
			
 
				+		bHitless |= ( tIndexSettings.m_eHitless==SPH_HITLESS_ALL ||
			
 
				+			( tIndexSettings.m_eHitless==SPH_HITLESS_SOME && m_dHitlessWords.BinarySearch ( uWordid ) ) );
			
 
				+		pQword->m_bHasHitlist = !bHitless;
			
 
				+
			
 
				+		CSphVector<SkiplistEntry_t> dDoclistSkips;
			
 
				+		while (true)
			
 
				+		{
			
 
				+			// skiplist state is saved just *before* decoding those boundary entries
			
 
				+			if ( ( iDoclistDocs & ( tIndexSettings.m_iSkiplistBlockSize-1 ) )==0 )
			
 
				+			{
			
 
				+				SkiplistEntry_t & tBlock = dDoclistSkips.Add();
			
 
				+				tBlock.m_tBaseRowIDPlus1 = pQword->m_tDoc.m_tRowID+1;
			
 
				+				tBlock.m_iOffset = pQword->m_rdDoclist->GetPos();
			
 
				+				tBlock.m_iBaseHitlistPos = pQword->m_uHitPosition;
			
 
				+			}
			
 
				+
			
 
				+			// FIXME? this can fail on a broken entry (eg fieldid over 256)
			
 
				+			const CSphMatch & tDoc = pQword->GetNextDoc();
			
 
				+			if ( tDoc.m_tRowID==INVALID_ROWID )
			
 
				+				break;
			
 
				+
			
 
				+			// checks!
			
 
				+			if ( tDoc.m_tRowID>m_iNumRows )
			
 
				+				m_tReporter.Fail ( "rowid out of bounds (wordid=" UINT64_FMT "(%s), rowid=%u)",	uint64_t(uWordid), sWord, tDoc.m_tRowID );
			
 
				+
			
 
				+			iDoclistDocs++;
			
 
				+			iDoclistHits += pQword->m_uMatchHits;
			
 
				+
			
 
				+			// check position in case of regular (not-inline) hit
			
 
				+			if (!( pQword->m_iHitlistPos>>63 ))
			
 
				+			{
			
 
				+				if ( !bWordDict && pQword->m_iHitlistPos!=pQword->m_rdHitlist->GetPos() )
			
 
				+					m_tReporter.Fail ( "unexpected hitlist offset (wordid=" UINT64_FMT "(%s), rowid=%u, expected=" INT64_FMT ", actual=" INT64_FMT ")",
			
 
				+						(uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, (int64_t)pQword->m_iHitlistPos, (int64_t)pQword->m_rdHitlist->GetPos() );
			
 
				+			}
			
 
				+
			
 
				+			// aim
			
 
				+			pQword->SeekHitlist ( pQword->m_iHitlistPos );
			
 
				+
			
 
				+			// loop the hitlist
			
 
				+			int iDocHits = 0;
			
 
				+			FieldMask_t dFieldMask;
			
 
				+			dFieldMask.UnsetAll();
			
 
				+			Hitpos_t uLastHit = EMPTY_HIT;
			
 
				+
			
 
				+			while ( !bHitless )
			
 
				+			{
			
 
				+				Hitpos_t uHit = pQword->GetNextHit();
			
 
				+				if ( uHit==EMPTY_HIT )
			
 
				+					break;
			
 
				+
			
 
				+				if ( !( uLastHit<uHit ) )
			
 
				+					m_tReporter.Fail ( "hit entries sorting order decreased (wordid=" UINT64_FMT "(%s), rowid=%u, hit=%u, last=%u)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, uHit, uLastHit );
			
 
				+
			
 
				+				if ( HITMAN::GetField ( uLastHit )==HITMAN::GetField ( uHit ) )
			
 
				+				{
			
 
				+					if ( !( HITMAN::GetPos ( uLastHit )<HITMAN::GetPos ( uHit ) ) )
			
 
				+						m_tReporter.Fail ( "hit decreased (wordid=" UINT64_FMT "(%s), rowid=%u, hit=%u, last=%u)",	(uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, HITMAN::GetPos ( uHit ), HITMAN::GetPos ( uLastHit ) );
			
 
				+
			
 
				+					if ( HITMAN::IsEnd ( uLastHit ) )
			
 
				+						m_tReporter.Fail ( "multiple tail hits (wordid=" UINT64_FMT "(%s), rowid=%u, hit=0x%x, last=0x%x)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, uHit, uLastHit );
			
 
				+				} else
			
 
				+				{
			
 
				+					if ( !( HITMAN::GetField ( uLastHit )<HITMAN::GetField ( uHit ) ) )
			
 
				+						m_tReporter.Fail ( "hit field decreased (wordid=" UINT64_FMT "(%s), rowid=%u, hit field=%u, last field=%u)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, HITMAN::GetField ( uHit ), HITMAN::GetField ( uLastHit ) );
			
 
				+				}
			
 
				+
			
 
				+				uLastHit = uHit;
			
 
				+
			
 
				+				int iField = HITMAN::GetField ( uHit );
			
 
				+				if ( iField<0 || iField>=SPH_MAX_FIELDS )
			
 
				+					m_tReporter.Fail ( "hit field out of bounds (wordid=" UINT64_FMT "(%s), rowid=%u, field=%d)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, iField );
			
 
				+				else if ( iField>=m_tSchema.GetFieldsCount() )
			
 
				+					m_tReporter.Fail ( "hit field out of schema (wordid=" UINT64_FMT "(%s), rowid=%u, field=%d)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, iField );
			
 
				+				else
			
 
				+					dFieldMask.Set(iField);
			
 
				+
			
 
				+				iDocHits++; // to check doclist entry
			
 
				+				iHitlistHits++; // to check dictionary entry
			
 
				+			}
			
 
				+
			
 
				+			// check hit count
			
 
				+			if ( iDocHits!=(int)pQword->m_uMatchHits && !bHitless )
			
 
				+				m_tReporter.Fail ( "doc hit count mismatch (wordid=" UINT64_FMT "(%s), rowid=%u, doclist=%d, hitlist=%d)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID, pQword->m_uMatchHits, iDocHits );
			
 
				+
			
 
				+			if ( m_tSchema.GetFieldsCount()>32 )
			
 
				+				pQword->CollectHitMask();
			
 
				+
			
 
				+			// check the mask
			
 
				+			if ( memcmp ( dFieldMask.m_dMask, pQword->m_dQwordFields.m_dMask, sizeof(dFieldMask.m_dMask) ) && !bHitless )
			
 
				+				m_tReporter.Fail ( "field mask mismatch (wordid=" UINT64_FMT "(%s), rowid=%u)", (uint64_t)uWordid, sWord, pQword->m_tDoc.m_tRowID );
			
 
				+
			
 
				+			// update my hitlist reader
			
 
				+			m_pHitsReader->SeekTo ( pQword->m_rdHitlist->GetPos() );
			
 
				+		}
			
 
				+
			
 
				+		// do checks
			
 
				+		if ( iDictDocs!=iDoclistDocs )
			
 
				+			m_tReporter.Fail ( "doc count mismatch (wordid=" UINT64_FMT "(%s), dict=%d, doclist=%d, hitless=%s)", uint64_t(uWordid), sWord, iDictDocs, iDoclistDocs, ( bHitless?"true":"false" ) );
			
 
				+
			
 
				+		if ( ( iDictHits!=iDoclistHits || iDictHits!=iHitlistHits ) && !bHitless )
			
 
				+			m_tReporter.Fail ( "hit count mismatch (wordid=" UINT64_FMT "(%s), dict=%d, doclist=%d, hitlist=%d)", uint64_t(uWordid), sWord, iDictHits, iDoclistHits, iHitlistHits );
			
 
				+
			
 
				+		while ( iDoclistDocs>tIndexSettings.m_iSkiplistBlockSize && !bHitless )
			
 
				+		{
			
 
				+			if ( iSkipsOffset<=0 || iSkipsOffset>iSkiplistLen )
			
 
				+			{
			
 
				+				m_tReporter.Fail ( "invalid skiplist offset (wordid=%llu(%s), off=" INT64_FMT ", max=" INT64_FMT ")", UINT64 ( uWordid ), sWord, iSkipsOffset, iSkiplistLen );
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			// boundary adjustment
			
 
				+			if ( ( iDoclistDocs & ( tIndexSettings.m_iSkiplistBlockSize-1 ) )==0 )
			
 
				+				dDoclistSkips.Pop();
			
 
				+
			
 
				+			SkiplistEntry_t t;
			
 
				+			t.m_tBaseRowIDPlus1 = 0;
			
 
				+			t.m_iOffset = iDoclistOffset;
			
 
				+			t.m_iBaseHitlistPos = 0;
			
 
				+
			
 
				+			// hint is: dDoclistSkips * ZIPPED( sizeof(int64_t) * 3 ) == dDoclistSkips * 8
			
 
				+			m_tSkipsReader.SeekTo ( iSkipsOffset, dDoclistSkips.GetLength ()*8 );
			
 
				+			int i = 0;
			
 
				+			while ( ++i<dDoclistSkips.GetLength() )
			
 
				+			{
			
 
				+				const SkiplistEntry_t & r = dDoclistSkips[i];
			
 
				+
			
 
				+				RowID_t tRowIDDelta = m_tSkipsReader.UnzipRowid();
			
 
				+				uint64_t uOff = m_tSkipsReader.UnzipOffset();
			
 
				+				uint64_t uPosDelta = m_tSkipsReader.UnzipOffset();
			
 
				+
			
 
				+				if ( m_tSkipsReader.GetErrorFlag () )
			
 
				+				{
			
 
				+					m_tReporter.Fail ( "skiplist reading error (wordid=%llu(%s), exp=%d, got=%d, error='%s')", UINT64 ( uWordid ), sWord, i, dDoclistSkips.GetLength (), m_tSkipsReader.GetErrorMessage ().cstr () );
			
 
				+					m_tSkipsReader.ResetError();
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				t.m_tBaseRowIDPlus1 += tIndexSettings.m_iSkiplistBlockSize + tRowIDDelta;
			
 
				+				t.m_iOffset += 4*tIndexSettings.m_iSkiplistBlockSize + uOff;
			
 
				+				t.m_iBaseHitlistPos += uPosDelta;
			
 
				+				if ( t.m_tBaseRowIDPlus1!=r.m_tBaseRowIDPlus1 || t.m_iOffset!=r.m_iOffset || t.m_iBaseHitlistPos!=r.m_iBaseHitlistPos )
			
 
				+				{
			
 
				+					m_tReporter.Fail ( "skiplist entry %d mismatch (wordid=%llu(%s), exp={%u, %llu, %llu}, got={%u, %llu, %llu})",
			
 
				+						i, UINT64 ( uWordid ), sWord,
			
 
				+						r.m_tBaseRowIDPlus1, UINT64 ( r.m_iOffset ), UINT64 ( r.m_iBaseHitlistPos ),
			
 
				+						t.m_tBaseRowIDPlus1, UINT64 ( t.m_iOffset ), UINT64 ( t.m_iBaseHitlistPos ) );
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		// move my reader instance forward too
			
 
				+		m_pDocsReader->SeekTo ( pQword->m_rdDoclist->GetPos() );
			
 
				+
			
 
				+		// cleanup
			
 
				+		SafeDelete ( pQword );
			
 
				+
			
 
				+		// progress bar
			
 
				+		if ( (++iWordsChecked)%1000==0 )
			
 
				+			m_tReporter.Progress ( "%d/%d", iWordsChecked, iWordsTotal );
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckAttributes()
			
 
				+{
			
 
				+	const int64_t iMinMaxStart = sizeof(DWORD) * m_iMinMaxIndex;
			
 
				+	const int64_t iMinMaxEnd = sizeof(DWORD) * m_iMinMaxIndex + sizeof(DWORD) * ( m_iDocinfoIndex+1 ) * m_tSchema.GetRowSize() * 2;
			
 
				+	const int64_t iMinMaxBytes = iMinMaxEnd - iMinMaxStart;
			
 
				+
			
 
				+	FileDebugCheckReader_c tAttrReader ( &m_tAttrReader );
			
 
				+	FileDebugCheckReader_c tBlobReader ( m_bHasBlobs ? &m_tBlobReader : nullptr );
			
 
				+
			
 
				+	// common code with RT index
			
 
				+	DebugCheck_Attributes ( tAttrReader, tBlobReader, m_iNumRows, iMinMaxBytes, m_tSchema, m_tReporter );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckKillList() const
			
 
				+{
			
 
				+	m_tReporter.Msg ( "checking kill-list..." );
			
 
				+
			
 
				+	CSphString sSPK = GetFilename(SPH_EXT_SPK);
			
 
				+	if ( !sphIsReadable ( sSPK.cstr() ) )
			
 
				+		return;
			
 
				+
			
 
				+	CSphString sError;
			
 
				+	CSphAutoreader tReader;
			
 
				+	if ( !tReader.Open ( sSPK.cstr(), sError ) )
			
 
				+	{
			
 
				+		m_tReporter.Fail ( "unable to open kill-list: %s", sError.cstr() );
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	DWORD nIndexes = tReader.GetDword();
			
 
				+	for ( int i = 0; i < (int)nIndexes; i++ )
			
 
				+	{
			
 
				+		CSphString sIndex = tReader.GetString();
			
 
				+		if ( tReader.GetErrorFlag() )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "error reading index name from kill-list: %s", tReader.GetErrorMessage().cstr() );
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		DWORD uFlags = tReader.GetDword();
			
 
				+		DWORD uMask = KillListTarget_t::USE_KLIST | KillListTarget_t::USE_DOCIDS;
			
 
				+		if ( uFlags & (~uMask) )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "unknown index flags in kill-list: %u", uMask );
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	DWORD nKills = tReader.GetDword();
			
 
				+	if ( tReader.GetErrorFlag() )
			
 
				+	{
			
 
				+		m_tReporter.Fail ( "error reading kill-list" );
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	for ( DWORD i = 0; i<nKills; i++ )
			
 
				+	{
			
 
				+		DocID_t tDelta = tReader.UnzipOffset();
			
 
				+		if ( tDelta<=0 )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "descending docids found in kill-list" );
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		if ( tReader.GetErrorFlag() )
			
 
				+		{
			
 
				+			m_tReporter.Fail ( "error docids from kill-list" );
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckBlockIndex()
			
 
				+{
			
 
				+	m_tReporter.Msg ( "checking attribute blocks index..." );
			
 
				+
			
 
				+	int64_t iAllRowsTotal = m_iNumRows + (m_iDocinfoIndex+1)*2;
			
 
				+	DWORD uStride = m_tSchema.GetRowSize();
			
 
				+	int64_t iLoadedRowItems = m_tAttrReader.GetFilesize() / sizeof(CSphRowitem);
			
 
				+	if ( iAllRowsTotal*uStride>iLoadedRowItems && m_iNumRows )
			
 
				+		m_tReporter.Fail ( "rowitems count mismatch (expected=" INT64_FMT ", loaded=" INT64_FMT ")", iAllRowsTotal*uStride, iLoadedRowItems );
			
 
				+
			
 
				+	// check size
			
 
				+	const int64_t iTempDocinfoIndex = ( m_iNumRows+DOCINFO_INDEX_FREQ-1 ) / DOCINFO_INDEX_FREQ;
			
 
				+	if ( iTempDocinfoIndex!=m_iDocinfoIndex )
			
 
				+		m_tReporter.Fail ( "block count differs (expected=" INT64_FMT ", got=" INT64_FMT ")", iTempDocinfoIndex, m_iDocinfoIndex );
			
 
				+
			
 
				+	CSphFixedVector<CSphRowitem> dRow ( m_tSchema.GetRowSize() );
			
 
				+	const CSphRowitem * pRow = dRow.Begin();
			
 
				+	m_tAttrReader.SeekTo ( 0, dRow.GetLengthBytes() );
			
 
				+
			
 
				+	const int64_t iMinMaxEnd = sizeof(DWORD) * m_iMinMaxIndex + sizeof(DWORD) * ( m_iDocinfoIndex+1 ) * uStride * 2;
			
 
				+	CSphFixedVector<DWORD> dMinMax ( uStride*2 );
			
 
				+	const DWORD * pMinEntry = dMinMax.Begin();
			
 
				+	const DWORD * pMinAttrs = pMinEntry;
			
 
				+	const DWORD * pMaxAttrs = pMinAttrs + uStride;
			
 
				+
			
 
				+	for ( int64_t iIndexEntry=0; iIndexEntry<m_iNumRows; iIndexEntry++ )
			
 
				+	{
			
 
				+		const int64_t iBlock = iIndexEntry / DOCINFO_INDEX_FREQ;
			
 
				+
			
 
				+		// we have to do some checks in border cases, for example: when move from 1st to 2nd block
			
 
				+		const int64_t iPrevEntryBlock = ( iIndexEntry-1 )/DOCINFO_INDEX_FREQ;
			
 
				+		const bool bIsBordersCheckTime = ( iPrevEntryBlock!=iBlock );
			
 
				+		if ( bIsBordersCheckTime || iIndexEntry==0 )
			
 
				+		{
			
 
				+			int64_t iPos = m_tAttrReader.GetPos();
			
 
				+
			
 
				+			int64_t iBlockPos = sizeof(DWORD) * m_iMinMaxIndex + sizeof(DWORD) * iBlock * uStride * 2;
			
 
				+			// check docid vs global range
			
 
				+			if ( int64_t( iBlockPos + sizeof(DWORD) * uStride) > iMinMaxEnd )
			
 
				+				m_tReporter.Fail ( "unexpected block index end (row=" INT64_FMT ", block=" INT64_FMT ")", iIndexEntry, iBlock );
			
 
				+
			
 
				+			m_tAttrReader.SeekTo ( iBlockPos, dMinMax.GetLengthBytes() );
			
 
				+			m_tAttrReader.GetBytes ( dMinMax.Begin(), dMinMax.GetLengthBytes() );
			
 
				+			if ( m_tAttrReader.GetErrorFlag() )
			
 
				+				m_tReporter.Fail ( "unexpected block index (row=" INT64_FMT ", block=" INT64_FMT ")", iIndexEntry, iBlock );
			
 
				+
			
 
				+			m_tAttrReader.SeekTo ( iPos, dRow.GetLengthBytes() );
			
 
				+		}
			
 
				+
			
 
				+		m_tAttrReader.GetBytes ( dRow.Begin(), dRow.GetLengthBytes() );
			
 
				+		const DocID_t tDocID = sphGetDocID(pRow);
			
 
				+
			
 
				+		// check values vs blocks range
			
 
				+		for ( int iItem=0; iItem < m_tSchema.GetAttrsCount(); iItem++ )
			
 
				+		{
			
 
				+			const CSphColumnInfo & tCol = m_tSchema.GetAttr(iItem);
			
 
				+			if ( tCol.m_sName==sphGetBlobLocatorName() )
			
 
				+				continue;
			
 
				+
			
 
				+			switch ( tCol.m_eAttrType )
			
 
				+			{
			
 
				+			case SPH_ATTR_INTEGER:
			
 
				+			case SPH_ATTR_TIMESTAMP:
			
 
				+			case SPH_ATTR_BOOL:
			
 
				+			case SPH_ATTR_BIGINT:
			
 
				+			{
			
 
				+				const SphAttr_t uVal = sphGetRowAttr ( pRow, tCol.m_tLocator );
			
 
				+				const SphAttr_t uMin = sphGetRowAttr ( pMinAttrs, tCol.m_tLocator );
			
 
				+				const SphAttr_t uMax = sphGetRowAttr ( pMaxAttrs, tCol.m_tLocator );
			
 
				+
			
 
				+				// checks is attribute min max range valid
			
 
				+				if ( uMin > uMax && bIsBordersCheckTime )
			
 
				+					m_tReporter.Fail ( "invalid attribute range (row=" INT64_FMT ", block=" INT64_FMT ", min=" INT64_FMT ", max=" INT64_FMT ")", iIndexEntry, iBlock, uMin, uMax );
			
 
				+
			
 
				+				if ( uVal < uMin || uVal > uMax )
			
 
				+					m_tReporter.Fail ( "unexpected attribute value (row=" INT64_FMT ", attr=%u, docid=" INT64_FMT ", block=" INT64_FMT ", value=0x" UINT64_FMT ", min=0x" UINT64_FMT ", max=0x" UINT64_FMT ")",
			
 
				+						iIndexEntry, iItem, tDocID, iBlock, uint64_t(uVal), uint64_t(uMin), uint64_t(uMax) );
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				+			case SPH_ATTR_FLOAT:
			
 
				+			{
			
 
				+				const float fVal = sphDW2F ( (DWORD)sphGetRowAttr ( pRow, tCol.m_tLocator ) );
			
 
				+				const float fMin = sphDW2F ( (DWORD)sphGetRowAttr ( pMinAttrs, tCol.m_tLocator ) );
			
 
				+				const float fMax = sphDW2F ( (DWORD)sphGetRowAttr ( pMaxAttrs, tCol.m_tLocator ) );
			
 
				+
			
 
				+				// checks is attribute min max range valid
			
 
				+				if ( fMin > fMax && bIsBordersCheckTime )
			
 
				+					m_tReporter.Fail ( "invalid attribute range (row=" INT64_FMT ", block=" INT64_FMT ", min=%f, max=%f)", iIndexEntry, iBlock, fMin, fMax );
			
 
				+
			
 
				+				if ( fVal < fMin || fVal > fMax )
			
 
				+					m_tReporter.Fail ( "unexpected attribute value (row=" INT64_FMT ", attr=%u, docid=" INT64_FMT ", block=" INT64_FMT ", value=%f, min=%f, max=%f)", iIndexEntry, iItem, tDocID, iBlock, fVal, fMin, fMax );
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				+			default:
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// progress bar
			
 
				+		if ( iIndexEntry%1000==0 )
			
 
				+			m_tReporter.Progress ( INT64_FMT"/" INT64_FMT, iIndexEntry, m_iNumRows );
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckDocidLookup()
			
 
				+{
			
 
				+	CSphString sError;
			
 
				+	m_tReporter.Msg ( "checking doc-id lookup..." );
			
 
				+
			
 
				+	CSphAutoreader tLookup;
			
 
				+	if ( !tLookup.Open ( GetFilename(SPH_EXT_SPT), sError ) )
			
 
				+	{
			
 
				+		m_tReporter.Fail ( "unable to lookup file: %s", sError.cstr() );
			
 
				+		return;
			
 
				+	}
			
 
				+	int64_t iLookupEnd = tLookup.GetFilesize();
			
 
				+
			
 
				+	CSphFixedVector<CSphRowitem> dRow ( m_tSchema.GetRowSize() );
			
 
				+	m_tAttrReader.SeekTo ( 0, dRow.GetLengthBytes() );
			
 
				+	CSphBitvec dRowids ( m_iNumRows );
			
 
				+
			
 
				+	int iDocs = tLookup.GetDword();
			
 
				+	int iDocsPerCheckpoint = tLookup.GetDword();
			
 
				+	tLookup.GetOffset(); // max docid
			
 
				+	int64_t iLookupBase = tLookup.GetPos();
			
 
				+
			
 
				+	int iCheckpoints = ( iDocs + iDocsPerCheckpoint - 1 ) / iDocsPerCheckpoint;
			
 
				+
			
 
				+	DocidLookupCheckpoint_t tCp;
			
 
				+	DocID_t tLastDocID = 0;
			
 
				+	int iCp = 0;
			
 
				+	while ( tLookup.GetPos()<iLookupEnd && iCp<iCheckpoints )
			
 
				+	{
			
 
				+		tLookup.SeekTo ( sizeof(DocidLookupCheckpoint_t) * iCp + iLookupBase, sizeof(DocidLookupCheckpoint_t) );
			
 
				+
			
 
				+		DocidLookupCheckpoint_t tPrevCp = tCp;
			
 
				+		tCp.m_tBaseDocID = tLookup.GetOffset();
			
 
				+		tCp.m_tOffset = tLookup.GetOffset();
			
 
				+		tLastDocID = tCp.m_tBaseDocID;
			
 
				+
			
 
				+		if ( tPrevCp.m_tBaseDocID>=tCp.m_tBaseDocID )
			
 
				+			m_tReporter.Fail ( "descending docid at checkpoint %d, previous docid " INT64_FMT " docid " INT64_FMT, iCp, tPrevCp.m_tBaseDocID, tCp.m_tBaseDocID );
			
 
				+
			
 
				+		tLookup.SeekTo ( tCp.m_tOffset, sizeof(DWORD) * 3 * iDocsPerCheckpoint );
			
 
				+
			
 
				+		int iCpDocs = iDocsPerCheckpoint;
			
 
				+		// last checkpoint might have less docs
			
 
				+		if ( iCp==iCheckpoints-1 )
			
 
				+		{
			
 
				+			int iLefover = ( iDocs % iDocsPerCheckpoint );
			
 
				+			iCpDocs = ( iLefover ? iLefover : iDocsPerCheckpoint );
			
 
				+		}
			
 
				+
			
 
				+		for ( int i=0; i<iCpDocs; i++ )
			
 
				+		{
			
 
				+			DocID_t tDelta = 0;
			
 
				+			DocID_t tDocID = 0;
			
 
				+			RowID_t tRowID = INVALID_ROWID;
			
 
				+
			
 
				+			if ( !( i % iCpDocs ) )
			
 
				+			{
			
 
				+				tDocID = tLastDocID;
			
 
				+				tRowID = tLookup.GetDword();
			
 
				+			} else
			
 
				+			{
			
 
				+				tDelta = tLookup.UnzipOffset();
			
 
				+				tRowID = tLookup.GetDword();
			
 
				+				if ( tDelta<0 )
			
 
				+					m_tReporter.Fail ( "invalid docid delta " INT64_FMT " at row %u, checkpoint %d, doc %d, last docid " INT64_FMT,
			
 
				+						tDocID, tRowID, iCp, i, tLastDocID );
			
 
				+				else
			
 
				+					tDocID = tLastDocID + tDelta;
			
 
				+
			
 
				+			}
			
 
				+
			
 
				+			if ( tRowID>=m_iNumRows )
			
 
				+				m_tReporter.Fail ( "rowid %u out of bounds " INT64_FMT, tRowID, m_iNumRows );
			
 
				+			else
			
 
				+			{
			
 
				+				// read only docid
			
 
				+				m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * tRowID, sizeof(DocID_t) );
			
 
				+				m_tAttrReader.GetBytes ( dRow.Begin(), sizeof(DocID_t) );
			
 
				+
			
 
				+				if ( dRowids.BitGet ( tRowID ) )
			
 
				+					m_tReporter.Fail ( "row %u already mapped, current docid" INT64_FMT " checkpoint %d, doc %d", tRowID, INT64_FMT, iCp, i );
			
 
				+
			
 
				+				dRowids.BitSet ( tRowID );
			
 
				+
			
 
				+				if ( tDocID!=sphGetDocID ( dRow.Begin() ) )
			
 
				+					m_tReporter.Fail ( "invalid docid " INT64_FMT "(" INT64_FMT ") at row %u, checkpoint %d, doc %d, last docid " INT64_FMT,
			
 
				+						tDocID, sphGetDocID ( dRow.Begin() ), tRowID, iCp, i, tLastDocID );
			
 
				+			}
			
 
				+
			
 
				+			tLastDocID = tDocID;
			
 
				+		}
			
 
				+
			
 
				+		iCp++;
			
 
				+	}
			
 
				+
			
 
				+	for ( int i=0; i<m_iNumRows; i++ )
			
 
				+	{
			
 
				+		if ( dRowids.BitGet ( i ) )
			
 
				+			continue;
			
 
				+
			
 
				+		m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * i, sizeof(DocID_t) );
			
 
				+		m_tAttrReader.GetBytes ( dRow.Begin(), sizeof(DocID_t) );
			
 
				+
			
 
				+		DocID_t tDocID = sphGetDocID ( dRow.Begin() );
			
 
				+		
			
 
				+		m_tReporter.Fail ( "row %u(" INT64_FMT ") not mapped at lookup, docid " INT64_FMT, i, m_iNumRows, tDocID );
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+struct DocRow_fn
			
 
				+{
			
 
				+	bool IsLess ( const DocidRowidPair_t & tA, DocidRowidPair_t & tB ) const
			
 
				+	{
			
 
				+		if ( tA.m_tDocID==tB.m_tDocID && tA.m_tRowID<tB.m_tRowID )
			
 
				+			return true;
			
 
				+
			
 
				+		return ( tA.m_tDocID<tB.m_tDocID );
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckDocids()
			
 
				+{
			
 
				+	CSphString sError;
			
 
				+	m_tReporter.Msg ( "checking docid douplicates ..." );
			
 
				+
			
 
				+	CSphFixedVector<CSphRowitem> dRow ( m_tSchema.GetRowSize() );
			
 
				+	m_tAttrReader.SeekTo ( 0, dRow.GetLengthBytes() );
			
 
				+
			
 
				+	CSphFixedVector<DocidRowidPair_t> dRows ( m_iNumRows );
			
 
				+	for ( int i=0; i<m_iNumRows; i++ )
			
 
				+	{
			
 
				+		m_tAttrReader.SeekTo ( dRow.GetLengthBytes() * i, sizeof(DocID_t) );
			
 
				+		m_tAttrReader.GetBytes ( dRow.Begin(), sizeof(DocID_t) );
			
 
				+
			
 
				+		dRows[i].m_tRowID = i;
			
 
				+		dRows[i].m_tDocID = sphGetDocID ( dRow.Begin() );
			
 
				+	}
			
 
				+
			
 
				+	dRows.Sort ( DocRow_fn() );
			
 
				+	for ( int i=1; i<dRows.GetLength(); i++ )
			
 
				+	{
			
 
				+		if ( dRows[i].m_tDocID==dRows[i-1].m_tDocID )
			
 
				+			m_tReporter.Fail ( "duplicate of docid " INT64_FMT " found at rows %u %u", dRows[i].m_tDocID, dRows[i-1].m_tRowID, dRows[i].m_tRowID );
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexChecker_c::CheckDocstore()
			
 
				+{
			
 
				+	if ( !m_bHasDocstore )
			
 
				+		return;
			
 
				+
			
 
				+	m_tReporter.Msg ( "checking docstore..." );
			
 
				+
			
 
				+	::CheckDocstore ( m_tDocstoreReader, m_tReporter );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+CSphString DiskIndexChecker_c::GetFilename ( ESphExt eExt ) const
			
 
				+{
			
 
				+	CSphString sRes;
			
 
				+	sRes.SetSprintf ( "%s%s", m_tIndex.GetFilename(), sphGetExt(eExt).cstr() );
			
 
				+	return sRes;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+DiskIndexChecker_i * CreateDiskIndexChecker ( CSphIndex & tIndex, DebugCheckError_c & tReporter )
			
 
				+{
			
 
				+	return new DiskIndexChecker_c ( tIndex, tReporter );
			
 
				+}
			
--- a/src/indexcheck.h
+++ b/src/indexcheck.h
@@ -0,0 +1,78 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// Copyright (c) 2001-2016, Andrew Aksyonoff
			
 
				+// Copyright (c) 2008-2016, Sphinx Technologies Inc
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+
			
 
				+#ifndef _indexcheck_
			
 
				+#define _indexcheck_
			
 
				+
			
 
				+#include "sphinx.h"
			
 
				+
			
 
				+class DebugCheckReader_i
			
 
				+{
			
 
				+public:
			
 
				+	virtual			~DebugCheckReader_i () {};
			
 
				+
			
 
				+	virtual int64_t	GetLengthBytes () = 0;
			
 
				+	virtual bool	GetBytes ( void * pData, int iSize ) = 0;
			
 
				+	virtual bool	SeekTo ( int64_t iOff, int iHint ) = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+// simple error reporter for debug checks
			
 
				+class DebugCheckError_c
			
 
				+{
			
 
				+public:
			
 
				+			DebugCheckError_c ( FILE * pFile );
			
 
				+
			
 
				+	bool	Fail ( const char * szFmt, ... );
			
 
				+	void	Msg ( const char * szFmt, ... );
			
 
				+	void	Progress ( const char * szFmt, ... );
			
 
				+	void	Done();
			
 
				+
			
 
				+	void	SetSegment ( int iSegment );
			
 
				+	int64_t	GetNumFails() const;
			
 
				+
			
 
				+private:
			
 
				+	FILE *	m_pFile {nullptr};
			
 
				+	bool	m_bProgress {false};
			
 
				+	int64_t m_tStartTime {0};
			
 
				+	int64_t	m_nFails {0};
			
 
				+	int64_t	m_nFailsPrinted {0};
			
 
				+	int		m_iSegment {-1};
			
 
				+};
			
 
				+
			
 
				+
			
 
				+// common code for debug checks in RT and disk indexes
			
 
				+class DebugCheckHelper_c
			
 
				+{
			
 
				+protected:
			
 
				+	void	DebugCheck_Attributes ( DebugCheckReader_i & tAttrs, DebugCheckReader_i & tBlobs, int64_t nRows, int64_t iMinMaxBytes, const CSphSchema & tSchema, DebugCheckError_c & tReporter ) const;
			
 
				+	void	DebugCheck_DeadRowMap (  int64_t iSizeBytes, int64_t nRows, DebugCheckError_c & tReporter ) const;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+// disk index checker
			
 
				+class DiskIndexChecker_i
			
 
				+{
			
 
				+public:
			
 
				+	virtual			~DiskIndexChecker_i() = default;
			
 
				+
			
 
				+	virtual bool	OpenFiles ( CSphString & sError ) = 0;
			
 
				+	virtual void	Setup ( int64_t iNumRows, int64_t iDocinfoIndex, int64_t iMinMaxIndex, bool bCheckIdDups ) = 0;
			
 
				+	virtual CSphVector<SphWordID_t> & GetHitlessWords() = 0;
			
 
				+
			
 
				+	virtual void	Check() = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+DiskIndexChecker_i * CreateDiskIndexChecker ( CSphIndex & tIndex, DebugCheckError_c & tReporter );
			
 
				+
			
 
				+#endif // _indexcheck_
			
--- a/src/indexformat.cpp
+++ b/src/indexformat.cpp
@@ -0,0 +1,690 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// Copyright (c) 2001-2016, Andrew Aksyonoff
			
 
				+// Copyright (c) 2008-2016, Sphinx Technologies Inc
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+
			
 
				+#include "indexformat.h"
			
 
				+
			
 
				+// let uDocs be DWORD here to prevent int overflow in case of hitless word (highest bit is 1)
			
 
				+int DoclistHintUnpack ( DWORD uDocs, BYTE uHint )
			
 
				+{
			
 
				+	if ( uDocs<(DWORD)DOCLIST_HINT_THRESH )
			
 
				+		return (int)Min ( 8*(int64_t)uDocs, INT_MAX );
			
 
				+	else
			
 
				+		return (int)Min ( 4*(int64_t)uDocs+( int64_t(uDocs)*uHint/64 ), INT_MAX );
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+DiskIndexQwordTraits_c::DiskIndexQwordTraits_c ( bool bUseMini, bool bExcluded )
			
 
				+{
			
 
				+	m_bExcluded = bExcluded;
			
 
				+
			
 
				+	if ( bUseMini )
			
 
				+	{
			
 
				+		m_pDocsBuf = m_dDoclistBuf;
			
 
				+		m_pHitsBuf = m_dHitlistBuf;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexQwordTraits_c::SetDocReader ( DataReaderFactory_c * pReader )
			
 
				+{
			
 
				+	if ( !pReader )
			
 
				+		return;
			
 
				+
			
 
				+	m_rdDoclist = pReader->MakeReader ( m_pDocsBuf, MINIBUFFER_LEN );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexQwordTraits_c::SetHitReader ( DataReaderFactory_c * pReader )
			
 
				+{
			
 
				+	if ( !pReader )
			
 
				+		return;
			
 
				+
			
 
				+	m_rdHitlist = pReader->MakeReader ( m_pHitsBuf, MINIBUFFER_LEN );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void DiskIndexQwordTraits_c::ResetDecoderState ()
			
 
				+{
			
 
				+	ISphQword::Reset();
			
 
				+	m_uHitPosition = 0;
			
 
				+	m_uInlinedHit = 0;
			
 
				+	m_uHitState = 0;
			
 
				+	m_tDoc.m_tRowID = INVALID_ROWID;
			
 
				+	m_iHitPos = EMPTY_HIT;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+class CheckpointReader_c
			
 
				+{
			
 
				+public:
			
 
				+	const BYTE * ReadEntry ( const BYTE * pBuf, CSphWordlistCheckpoint & tCP ) const
			
 
				+	{
			
 
				+		tCP.m_uWordID = (SphWordID_t)sphUnalignedRead ( *(SphOffset_t *)pBuf );
			
 
				+		pBuf += sizeof(SphOffset_t);
			
 
				+
			
 
				+		tCP.m_iWordlistOffset = sphUnalignedRead ( *(SphOffset_t *)pBuf );
			
 
				+		pBuf += sizeof(SphOffset_t);
			
 
				+
			
 
				+		return pBuf;
			
 
				+	}
			
 
				+
			
 
				+	int GetStride() const { return m_iSrcStride; }
			
 
				+
			
 
				+private:
			
 
				+	int m_iSrcStride = 2*sizeof(SphOffset_t);
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+struct MappedCheckpoint_fn : public ISphNoncopyable
			
 
				+{
			
 
				+	const CSphWordlistCheckpoint *	m_pDstStart;
			
 
				+	const BYTE *					m_pSrcStart;
			
 
				+	const CheckpointReader_c *		m_pReader;
			
 
				+
			
 
				+	MappedCheckpoint_fn ( const CSphWordlistCheckpoint * pDstStart, const BYTE * pSrcStart, const CheckpointReader_c * pReader )
			
 
				+		: m_pDstStart ( pDstStart )
			
 
				+		, m_pSrcStart ( pSrcStart )
			
 
				+		, m_pReader ( pReader )
			
 
				+	{}
			
 
				+
			
 
				+	CSphWordlistCheckpoint operator() ( const CSphWordlistCheckpoint * pCP ) const
			
 
				+	{
			
 
				+		assert ( m_pDstStart<=pCP );
			
 
				+		const BYTE * pCur = ( pCP - m_pDstStart ) * m_pReader->GetStride() + m_pSrcStart;
			
 
				+		CSphWordlistCheckpoint tEntry;
			
 
				+		m_pReader->ReadEntry ( pCur, tEntry );
			
 
				+		return tEntry;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+struct DiskExpandedEntry_t
			
 
				+{
			
 
				+	int		m_iNameOff;
			
 
				+	int		m_iDocs;
			
 
				+	int		m_iHits;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct DiskExpandedPayload_t
			
 
				+{
			
 
				+	int			m_iDocs;
			
 
				+	int			m_iHits;
			
 
				+	uint64_t	m_uDoclistOff;
			
 
				+	int			m_iDoclistHint;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct Slice64_t
			
 
				+{
			
 
				+	uint64_t	m_uOff;
			
 
				+	int			m_iLen;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct DiskSubstringPayload_t : public ISphSubstringPayload
			
 
				+{
			
 
				+	explicit DiskSubstringPayload_t ( int iDoclists )
			
 
				+		: m_dDoclist ( iDoclists )
			
 
				+	{}
			
 
				+
			
 
				+	CSphFixedVector<Slice64_t>	m_dDoclist;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+struct DictEntryDiskPayload_t
			
 
				+{
			
 
				+	DictEntryDiskPayload_t ( bool bPayload, ESphHitless eHitless )
			
 
				+	{
			
 
				+		m_bPayload = bPayload;
			
 
				+		m_eHitless = eHitless;
			
 
				+		if ( bPayload )
			
 
				+			m_dWordPayload.Reserve ( 1000 );
			
 
				+
			
 
				+		m_dWordExpand.Reserve ( 1000 );
			
 
				+		m_dWordBuf.Reserve ( 8096 );
			
 
				+	}
			
 
				+
			
 
				+	void Add ( const CSphDictEntry & tWord, int iWordLen )
			
 
				+	{
			
 
				+		if ( !m_bPayload || !sphIsExpandedPayload ( tWord.m_iDocs, tWord.m_iHits ) ||
			
 
				+			m_eHitless==SPH_HITLESS_ALL || ( m_eHitless==SPH_HITLESS_SOME && ( tWord.m_iDocs & HITLESS_DOC_FLAG )!=0 ) ) // FIXME!!! do we need hitless=some as payloads?
			
 
				+		{
			
 
				+			DiskExpandedEntry_t & tExpand = m_dWordExpand.Add();
			
 
				+
			
 
				+			int iOff = m_dWordBuf.GetLength();
			
 
				+			tExpand.m_iNameOff = iOff;
			
 
				+			tExpand.m_iDocs = tWord.m_iDocs;
			
 
				+			tExpand.m_iHits = tWord.m_iHits;
			
 
				+			m_dWordBuf.Resize ( iOff + iWordLen + 1 );
			
 
				+			memcpy ( m_dWordBuf.Begin() + iOff + 1, tWord.m_sKeyword, iWordLen );
			
 
				+			m_dWordBuf[iOff] = (BYTE)iWordLen;
			
 
				+
			
 
				+		} else
			
 
				+		{
			
 
				+			DiskExpandedPayload_t & tExpand = m_dWordPayload.Add();
			
 
				+			tExpand.m_iDocs = tWord.m_iDocs;
			
 
				+			tExpand.m_iHits = tWord.m_iHits;
			
 
				+			tExpand.m_uDoclistOff = tWord.m_iDoclistOffset;
			
 
				+			tExpand.m_iDoclistHint = tWord.m_iDoclistHint;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	void Convert ( ISphWordlist::Args_t & tArgs )
			
 
				+	{
			
 
				+		if ( !m_dWordExpand.GetLength() && !m_dWordPayload.GetLength() )
			
 
				+			return;
			
 
				+
			
 
				+		int iTotalDocs = 0;
			
 
				+		int iTotalHits = 0;
			
 
				+		if ( m_dWordExpand.GetLength() )
			
 
				+		{
			
 
				+			LimitExpanded ( tArgs.m_iExpansionLimit, m_dWordExpand );
			
 
				+
			
 
				+			const BYTE * sBase = m_dWordBuf.Begin();
			
 
				+			ARRAY_FOREACH ( i, m_dWordExpand )
			
 
				+			{
			
 
				+				const DiskExpandedEntry_t & tCur = m_dWordExpand[i];
			
 
				+				int iDocs = tCur.m_iDocs;
			
 
				+
			
 
				+				if ( m_eHitless==SPH_HITLESS_SOME )
			
 
				+					iDocs = ( tCur.m_iDocs & HITLESS_DOC_MASK );
			
 
				+
			
 
				+				tArgs.AddExpanded ( sBase + tCur.m_iNameOff + 1, sBase[tCur.m_iNameOff], iDocs, tCur.m_iHits );
			
 
				+
			
 
				+				iTotalDocs += iDocs;
			
 
				+				iTotalHits += tCur.m_iHits;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if ( m_dWordPayload.GetLength() )
			
 
				+		{
			
 
				+			LimitExpanded ( tArgs.m_iExpansionLimit, m_dWordPayload );
			
 
				+
			
 
				+			DiskSubstringPayload_t * pPayload = new DiskSubstringPayload_t ( m_dWordPayload.GetLength() );
			
 
				+			// sorting by ascending doc-list offset gives some (15%) speed-up too
			
 
				+			sphSort ( m_dWordPayload.Begin(), m_dWordPayload.GetLength(), bind ( &DiskExpandedPayload_t::m_uDoclistOff ) );
			
 
				+
			
 
				+			ARRAY_FOREACH ( i, m_dWordPayload )
			
 
				+			{
			
 
				+				const DiskExpandedPayload_t & tCur = m_dWordPayload[i];
			
 
				+				assert ( m_eHitless==SPH_HITLESS_NONE || ( m_eHitless==SPH_HITLESS_SOME && ( tCur.m_iDocs & HITLESS_DOC_FLAG )==0 ) );
			
 
				+
			
 
				+				iTotalDocs += tCur.m_iDocs;
			
 
				+				iTotalHits += tCur.m_iHits;
			
 
				+				pPayload->m_dDoclist[i].m_uOff = tCur.m_uDoclistOff;
			
 
				+				pPayload->m_dDoclist[i].m_iLen = tCur.m_iDoclistHint;
			
 
				+			}
			
 
				+
			
 
				+			pPayload->m_iTotalDocs = iTotalDocs;
			
 
				+			pPayload->m_iTotalHits = iTotalHits;
			
 
				+			tArgs.m_pPayload = pPayload;
			
 
				+		}
			
 
				+		tArgs.m_iTotalDocs = iTotalDocs;
			
 
				+		tArgs.m_iTotalHits = iTotalHits;
			
 
				+	}
			
 
				+
			
 
				+	// sort expansions by frequency desc
			
 
				+	// clip the less frequent ones if needed, as they are likely misspellings
			
 
				+	template < typename T >
			
 
				+	void LimitExpanded ( int iExpansionLimit, CSphVector<T> & dVec ) const
			
 
				+	{
			
 
				+		if ( !iExpansionLimit || dVec.GetLength()<=iExpansionLimit )
			
 
				+			return;
			
 
				+
			
 
				+		sphSort ( dVec.Begin(), dVec.GetLength(), ExpandedOrderDesc_T<T>() );
			
 
				+		dVec.Resize ( iExpansionLimit );
			
 
				+	}
			
 
				+
			
 
				+	bool								m_bPayload;
			
 
				+	ESphHitless							m_eHitless;
			
 
				+	CSphVector<DiskExpandedEntry_t>		m_dWordExpand;
			
 
				+	CSphVector<DiskExpandedPayload_t>	m_dWordPayload;
			
 
				+	CSphVector<BYTE>					m_dWordBuf;
			
 
				+};
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+CWordlist::~CWordlist ()
			
 
				+{
			
 
				+	Reset();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::Reset ()
			
 
				+{
			
 
				+	m_tBuf.Reset ();
			
 
				+	m_dCheckpoints.Reset ( 0 );
			
 
				+	m_pWords.Reset ( 0 );
			
 
				+	SafeDeleteArray ( m_pInfixBlocksWords );
			
 
				+	SafeDelete ( m_pCpReader );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool CWordlist::Preread ( const CSphString & sName, bool bWordDict, int iSkiplistBlockSize, CSphString & sError )
			
 
				+{
			
 
				+	assert ( m_iDictCheckpointsOffset>0 );
			
 
				+
			
 
				+	m_bWordDict = bWordDict;
			
 
				+	m_iWordsEnd = m_iDictCheckpointsOffset; // set wordlist end
			
 
				+	m_iSkiplistBlockSize = iSkiplistBlockSize;
			
 
				+
			
 
				+	////////////////////////////
			
 
				+	// preload word checkpoints
			
 
				+	////////////////////////////
			
 
				+
			
 
				+	////////////////////////////
			
 
				+	// fast path for CRC checkpoints - just maps data and use inplace CP reader
			
 
				+	if ( !bWordDict )
			
 
				+	{
			
 
				+		if ( !m_tBuf.Setup ( sName, sError ) )
			
 
				+			return false;
			
 
				+
			
 
				+		m_pCpReader = new CheckpointReader_c;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	////////////////////////////
			
 
				+	// regular path that loads checkpoints data
			
 
				+
			
 
				+	CSphAutoreader tReader;
			
 
				+	if ( !tReader.Open ( sName, sError ) )
			
 
				+		return false;
			
 
				+
			
 
				+	int64_t iFileSize = tReader.GetFilesize();
			
 
				+
			
 
				+	int iCheckpointOnlySize = (int)(iFileSize-m_iDictCheckpointsOffset);
			
 
				+	if ( m_iInfixCodepointBytes && m_iInfixBlocksOffset )
			
 
				+		iCheckpointOnlySize = (int)(m_iInfixBlocksOffset - strlen ( g_sTagInfixBlocks ) - m_iDictCheckpointsOffset);
			
 
				+
			
 
				+	if ( iFileSize-m_iDictCheckpointsOffset>=UINT_MAX )
			
 
				+	{
			
 
				+		sError.SetSprintf ( "dictionary meta overflow: meta size=" INT64_FMT ", total size=" INT64_FMT ", meta offset=" INT64_FMT,
			
 
				+			iFileSize-m_iDictCheckpointsOffset, iFileSize, (int64_t)m_iDictCheckpointsOffset );
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	tReader.SeekTo ( m_iDictCheckpointsOffset, iCheckpointOnlySize );
			
 
				+
			
 
				+	assert ( m_bWordDict );
			
 
				+	int iArenaSize = iCheckpointOnlySize
			
 
				+		- (sizeof(DWORD)+sizeof(SphOffset_t))*m_dCheckpoints.GetLength()
			
 
				+		+ sizeof(BYTE)*m_dCheckpoints.GetLength();
			
 
				+	assert ( iArenaSize>=0 );
			
 
				+	m_pWords.Reset ( iArenaSize );
			
 
				+
			
 
				+	BYTE * pWord = m_pWords.Begin();
			
 
				+	for ( auto & dCheckpoint : m_dCheckpoints )
			
 
				+	{
			
 
				+		dCheckpoint.m_sWord = (char *)pWord;
			
 
				+
			
 
				+		const int iLen = tReader.GetDword();
			
 
				+		assert ( iLen>0 );
			
 
				+		assert ( iLen + 1 + ( pWord - m_pWords.Begin() )<=iArenaSize );
			
 
				+		tReader.GetBytes ( pWord, iLen );
			
 
				+		pWord[iLen] = '\0';
			
 
				+		pWord += iLen+1;
			
 
				+
			
 
				+		dCheckpoint.m_iWordlistOffset = tReader.GetOffset();
			
 
				+	}
			
 
				+
			
 
				+	////////////////////////
			
 
				+	// preload infix blocks
			
 
				+	////////////////////////
			
 
				+
			
 
				+	if ( m_iInfixCodepointBytes && m_iInfixBlocksOffset )
			
 
				+	{
			
 
				+		// reading to vector as old version doesn't store total infix words length
			
 
				+		CSphTightVector<BYTE> dInfixWords;
			
 
				+		dInfixWords.Reserve ( (int)m_iInfixBlocksWordsSize );
			
 
				+
			
 
				+		tReader.SeekTo ( m_iInfixBlocksOffset, (int)(iFileSize-m_iInfixBlocksOffset) );
			
 
				+		m_dInfixBlocks.Resize ( tReader.UnzipInt() );
			
 
				+		for ( auto & dInfixBlock : m_dInfixBlocks )
			
 
				+		{
			
 
				+			int iBytes = tReader.UnzipInt();
			
 
				+
			
 
				+			int iOff = dInfixWords.GetLength();
			
 
				+			dInfixBlock.m_iInfixOffset = (DWORD) iOff; /// FIXME! name convention of m_iInfixOffset
			
 
				+			dInfixWords.Resize ( iOff+iBytes+1 );
			
 
				+
			
 
				+			tReader.GetBytes ( dInfixWords.Begin()+iOff, iBytes );
			
 
				+			dInfixWords[iOff+iBytes] = '\0';
			
 
				+
			
 
				+			dInfixBlock.m_iOffset = tReader.UnzipInt();
			
 
				+		}
			
 
				+
			
 
				+		// fix-up offset to pointer
			
 
				+		m_pInfixBlocksWords = dInfixWords.LeakData();
			
 
				+		ARRAY_FOREACH ( i, m_dInfixBlocks )
			
 
				+			m_dInfixBlocks[i].m_sInfix = (const char *)m_pInfixBlocksWords + m_dInfixBlocks[i].m_iInfixOffset;
			
 
				+
			
 
				+		// FIXME!!! store and load that explicitly
			
 
				+		if ( m_dInfixBlocks.GetLength() )
			
 
				+			m_iWordsEnd = m_dInfixBlocks.Begin()->m_iOffset - strlen ( g_sTagInfixEntries );
			
 
				+		else
			
 
				+			m_iWordsEnd -= strlen ( g_sTagInfixEntries );
			
 
				+	}
			
 
				+
			
 
				+	if ( tReader.GetErrorFlag() )
			
 
				+	{
			
 
				+		sError = tReader.GetErrorMessage();
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	tReader.Close();
			
 
				+
			
 
				+	// mapping up only wordlist without meta (checkpoints, infixes, etc)
			
 
				+	if ( !m_tBuf.Setup ( sName, sError ) )
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::DebugPopulateCheckpoints()
			
 
				+{
			
 
				+	if ( !m_pCpReader )
			
 
				+		return;
			
 
				+
			
 
				+	const BYTE * pCur = m_tBuf.GetWritePtr() + m_iDictCheckpointsOffset;
			
 
				+	ARRAY_FOREACH ( i, m_dCheckpoints )
			
 
				+		pCur = m_pCpReader->ReadEntry ( pCur, m_dCheckpoints[i] );
			
 
				+
			
 
				+	SafeDelete(m_pCpReader);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+const CSphWordlistCheckpoint * CWordlist::FindCheckpoint ( const char * sWord, int iWordLen, SphWordID_t iWordID, bool bStarMode ) const
			
 
				+{
			
 
				+	if ( m_pCpReader ) // FIXME!!! fall to regular checkpoints after data got read
			
 
				+	{
			
 
				+		MappedCheckpoint_fn tPred ( m_dCheckpoints.Begin(), m_tBuf.GetWritePtr() + m_iDictCheckpointsOffset, m_pCpReader );
			
 
				+		return sphSearchCheckpoint ( sWord, iWordLen, iWordID, bStarMode, m_bWordDict, m_dCheckpoints.Begin(), &m_dCheckpoints.Last(), tPred );
			
 
				+	}
			
 
				+
			
 
				+	return sphSearchCheckpoint ( sWord, iWordLen, iWordID, bStarMode, m_bWordDict, m_dCheckpoints.Begin(), &m_dCheckpoints.Last() );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool CWordlist::GetWord ( const BYTE * pBuf, SphWordID_t iWordID, CSphDictEntry & tWord ) const
			
 
				+{
			
 
				+	SphWordID_t iLastID = 0;
			
 
				+	SphOffset_t uLastOff = 0;
			
 
				+
			
 
				+	while (true)
			
 
				+	{
			
 
				+		// unpack next word ID
			
 
				+		const SphWordID_t iDeltaWord = sphUnzipWordid ( pBuf ); // FIXME! slow with 32bit wordids
			
 
				+
			
 
				+		if ( iDeltaWord==0 ) // wordlist chunk is over
			
 
				+			return false;
			
 
				+
			
 
				+		iLastID += iDeltaWord;
			
 
				+
			
 
				+		// list is sorted, so if there was no match, there's no such word
			
 
				+		if ( iLastID>iWordID )
			
 
				+			return false;
			
 
				+
			
 
				+		// unpack next offset
			
 
				+		const SphOffset_t iDeltaOffset = sphUnzipOffset ( pBuf );
			
 
				+		uLastOff += iDeltaOffset;
			
 
				+
			
 
				+		// unpack doc/hit count
			
 
				+		const int iDocs = sphUnzipInt ( pBuf );
			
 
				+		const int iHits = sphUnzipInt ( pBuf );
			
 
				+		SphOffset_t iSkiplistPos = 0;
			
 
				+		if ( iDocs > m_iSkiplistBlockSize )
			
 
				+			iSkiplistPos = sphUnzipOffset ( pBuf );
			
 
				+
			
 
				+		assert ( iDeltaOffset );
			
 
				+		assert ( iDocs );
			
 
				+		assert ( iHits );
			
 
				+
			
 
				+		// it matches?!
			
 
				+		if ( iLastID==iWordID )
			
 
				+		{
			
 
				+			sphUnzipWordid ( pBuf ); // might be 0 at checkpoint
			
 
				+			const SphOffset_t iDoclistLen = sphUnzipOffset ( pBuf );
			
 
				+
			
 
				+			tWord.m_iDoclistOffset = uLastOff;
			
 
				+			tWord.m_iDocs = iDocs;
			
 
				+			tWord.m_iHits = iHits;
			
 
				+			tWord.m_iDoclistHint = (int)iDoclistLen;
			
 
				+			tWord.m_iSkiplistOffset = iSkiplistPos;
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+const BYTE * CWordlist::AcquireDict ( const CSphWordlistCheckpoint * pCheckpoint ) const
			
 
				+{
			
 
				+	assert ( pCheckpoint );
			
 
				+	assert ( m_dCheckpoints.GetLength() );
			
 
				+	assert ( pCheckpoint>=m_dCheckpoints.Begin() && pCheckpoint<=&m_dCheckpoints.Last() );
			
 
				+
			
 
				+	SphOffset_t iOff = pCheckpoint->m_iWordlistOffset;
			
 
				+	if ( m_pCpReader )
			
 
				+	{
			
 
				+		MappedCheckpoint_fn tPred ( m_dCheckpoints.Begin(), m_tBuf.GetWritePtr() + m_iDictCheckpointsOffset, m_pCpReader );
			
 
				+		iOff = tPred ( pCheckpoint ).m_iWordlistOffset;
			
 
				+	}
			
 
				+
			
 
				+	assert ( !m_tBuf.IsEmpty() );
			
 
				+	assert ( iOff>0 && iOff<(int64_t)m_tBuf.GetLengthBytes() );
			
 
				+
			
 
				+	return m_tBuf.GetWritePtr()+iOff;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::GetPrefixedWords ( const char * sSubstring, int iSubLen, const char * sWildcard, Args_t & tArgs ) const
			
 
				+{
			
 
				+	assert ( sSubstring && *sSubstring && iSubLen>0 );
			
 
				+
			
 
				+	// empty index?
			
 
				+	if ( !m_dCheckpoints.GetLength() )
			
 
				+		return;
			
 
				+
			
 
				+	DictEntryDiskPayload_t tDict2Payload ( tArgs.m_bPayload, tArgs.m_eHitless );
			
 
				+
			
 
				+	int dWildcard [ SPH_MAX_WORD_LEN + 1 ];
			
 
				+	int * pWildcard = ( sphIsUTF8 ( sWildcard ) && sphUTF8ToWideChar ( sWildcard, dWildcard, SPH_MAX_WORD_LEN ) ) ? dWildcard : NULL;
			
 
				+
			
 
				+	const CSphWordlistCheckpoint * pCheckpoint = FindCheckpoint ( sSubstring, iSubLen, 0, true );
			
 
				+	const int iSkipMagic = ( BYTE(*sSubstring)<0x20 ); // whether to skip heading magic chars in the prefix, like NONSTEMMED maker
			
 
				+	while ( pCheckpoint )
			
 
				+	{
			
 
				+		// decode wordlist chunk
			
 
				+		KeywordsBlockReader_c tDictReader ( AcquireDict ( pCheckpoint ), m_iSkiplistBlockSize );
			
 
				+		while ( tDictReader.UnpackWord() )
			
 
				+		{
			
 
				+			// block is sorted
			
 
				+			// so once keywords are greater than the prefix, no more matches
			
 
				+			int iCmp = sphDictCmp ( sSubstring, iSubLen, (const char *)tDictReader.m_sKeyword, tDictReader.GetWordLen() );
			
 
				+			if ( iCmp<0 )
			
 
				+				break;
			
 
				+
			
 
				+			if ( sphInterrupted() )
			
 
				+				break;
			
 
				+
			
 
				+			// does it match the prefix *and* the entire wildcard?
			
 
				+			if ( iCmp==0 && sphWildcardMatch ( (const char *)tDictReader.m_sKeyword + iSkipMagic, sWildcard, pWildcard ) )
			
 
				+				tDict2Payload.Add ( tDictReader, tDictReader.GetWordLen() );
			
 
				+		}
			
 
				+
			
 
				+		if ( sphInterrupted () )
			
 
				+			break;
			
 
				+
			
 
				+		pCheckpoint++;
			
 
				+		if ( pCheckpoint > &m_dCheckpoints.Last() )
			
 
				+			break;
			
 
				+
			
 
				+		if ( sphDictCmp ( sSubstring, iSubLen, pCheckpoint->m_sWord, strlen ( pCheckpoint->m_sWord ) )<0 )
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	tDict2Payload.Convert ( tArgs );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::GetInfixedWords ( const char * sSubstring, int iSubLen, const char * sWildcard, Args_t & tArgs ) const
			
 
				+{
			
 
				+	// dict must be of keywords type, and fully cached
			
 
				+	// mmap()ed in the worst case, should we ever banish it to disk again
			
 
				+	if ( m_tBuf.IsEmpty() || !m_dCheckpoints.GetLength() )
			
 
				+		return;
			
 
				+
			
 
				+	assert ( !m_pCpReader );
			
 
				+
			
 
				+	// extract key1, upto 6 chars from infix start
			
 
				+	int iBytes1 = sphGetInfixLength ( sSubstring, iSubLen, m_iInfixCodepointBytes );
			
 
				+
			
 
				+	// lookup key1
			
 
				+	// OPTIMIZE? maybe lookup key2 and reduce checkpoint set size, if possible?
			
 
				+	CSphVector<DWORD> dPoints;
			
 
				+	if ( !sphLookupInfixCheckpoints ( sSubstring, iBytes1, m_tBuf.GetWritePtr(), m_dInfixBlocks, m_iInfixCodepointBytes, dPoints ) )
			
 
				+		return;
			
 
				+
			
 
				+	DictEntryDiskPayload_t tDict2Payload ( tArgs.m_bPayload, tArgs.m_eHitless );
			
 
				+	const int iSkipMagic = ( tArgs.m_bHasExactForms ? 1 : 0 ); // whether to skip heading magic chars in the prefix, like NONSTEMMED maker
			
 
				+
			
 
				+	int dWildcard [ SPH_MAX_WORD_LEN + 1 ];
			
 
				+	int * pWildcard = ( sphIsUTF8 ( sWildcard ) && sphUTF8ToWideChar ( sWildcard, dWildcard, SPH_MAX_WORD_LEN ) ) ? dWildcard : NULL;
			
 
				+
			
 
				+	// walk those checkpoints, check all their words
			
 
				+	ARRAY_FOREACH ( i, dPoints )
			
 
				+	{
			
 
				+		// OPTIMIZE? add a quicker path than a generic wildcard for "*infix*" case?
			
 
				+		KeywordsBlockReader_c tDictReader ( m_tBuf.GetWritePtr() + m_dCheckpoints[dPoints[i]-1].m_iWordlistOffset, m_iSkiplistBlockSize );
			
 
				+		while ( tDictReader.UnpackWord() )
			
 
				+		{
			
 
				+			if ( sphInterrupted () )
			
 
				+				break;
			
 
				+
			
 
				+			// stemmed terms should not match suffixes
			
 
				+			if ( tArgs.m_bHasExactForms && *tDictReader.m_sKeyword!=MAGIC_WORD_HEAD_NONSTEMMED )
			
 
				+				continue;
			
 
				+
			
 
				+			if ( sphWildcardMatch ( (const char *)tDictReader.m_sKeyword+iSkipMagic, sWildcard, pWildcard ) )
			
 
				+				tDict2Payload.Add ( tDictReader, tDictReader.GetWordLen() );
			
 
				+		}
			
 
				+
			
 
				+		if ( sphInterrupted () )
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	tDict2Payload.Convert ( tArgs );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::SuffixGetChekpoints ( const SuggestResult_t & , const char * sSuffix, int iLen, CSphVector<DWORD> & dCheckpoints ) const
			
 
				+{
			
 
				+	sphLookupInfixCheckpoints ( sSuffix, iLen, m_tBuf.GetWritePtr(), m_dInfixBlocks, m_iInfixCodepointBytes, dCheckpoints );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void CWordlist::SetCheckpoint ( SuggestResult_t & tRes, DWORD iCP ) const
			
 
				+{
			
 
				+	assert ( tRes.m_pWordReader );
			
 
				+	KeywordsBlockReader_c * pReader = (KeywordsBlockReader_c *)tRes.m_pWordReader;
			
 
				+	pReader->Reset ( m_tBuf.GetWritePtr() + m_dCheckpoints[iCP-1].m_iWordlistOffset );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool CWordlist::ReadNextWord ( SuggestResult_t & tRes, DictWord_t & tWord ) const
			
 
				+{
			
 
				+	KeywordsBlockReader_c * pReader = (KeywordsBlockReader_c *)tRes.m_pWordReader;
			
 
				+	if ( !pReader->UnpackWord() )
			
 
				+		return false;
			
 
				+
			
 
				+	tWord.m_sWord = pReader->GetWord();
			
 
				+	tWord.m_iLen = pReader->GetWordLen();
			
 
				+	tWord.m_iDocs = pReader->m_iDocs;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+KeywordsBlockReader_c::KeywordsBlockReader_c ( const BYTE * pBuf, int iSkiplistBlockSize )
			
 
				+	: m_iSkiplistBlockSize ( iSkiplistBlockSize )
			
 
				+{
			
 
				+	Reset ( pBuf );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void KeywordsBlockReader_c::Reset ( const BYTE * pBuf )
			
 
				+{
			
 
				+	m_pBuf = pBuf;
			
 
				+	m_sWord[0] = '\0';
			
 
				+	m_iLen = 0;
			
 
				+	m_sKeyword = m_sWord;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool KeywordsBlockReader_c::UnpackWord()
			
 
				+{
			
 
				+	if ( !m_pBuf )
			
 
				+		return false;
			
 
				+
			
 
				+	assert ( m_iSkiplistBlockSize>0 );
			
 
				+
			
 
				+	// unpack next word
			
 
				+	// must be in sync with DictEnd()!
			
 
				+	BYTE uPack = *m_pBuf++;
			
 
				+	if ( !uPack )
			
 
				+	{
			
 
				+		// ok, this block is over
			
 
				+		m_pBuf = NULL;
			
 
				+		m_iLen = 0;
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	int iMatch, iDelta;
			
 
				+	if ( uPack & 0x80 )
			
 
				+	{
			
 
				+		iDelta = ( ( uPack>>4 ) & 7 ) + 1;
			
 
				+		iMatch = uPack & 15;
			
 
				+	} else
			
 
				+	{
			
 
				+		iDelta = uPack & 127;
			
 
				+		iMatch = *m_pBuf++;
			
 
				+	}
			
 
				+
			
 
				+	assert ( iMatch+iDelta<(int)sizeof(m_sWord)-1 );
			
 
				+	assert ( iMatch<=(int)strlen ( (char *)m_sWord ) );
			
 
				+
			
 
				+	memcpy ( m_sWord + iMatch, m_pBuf, iDelta );
			
 
				+	m_pBuf += iDelta;
			
 
				+
			
 
				+	m_iLen = iMatch + iDelta;
			
 
				+	m_sWord[m_iLen] = '\0';
			
 
				+
			
 
				+	m_iDoclistOffset = sphUnzipOffset ( m_pBuf );
			
 
				+	m_iDocs = sphUnzipInt ( m_pBuf );
			
 
				+	m_iHits = sphUnzipInt ( m_pBuf );
			
 
				+	m_uHint = ( m_iDocs>=DOCLIST_HINT_THRESH ) ? *m_pBuf++ : 0;
			
 
				+	m_iDoclistHint = DoclistHintUnpack ( m_iDocs, m_uHint );
			
 
				+	if ( m_iDocs > m_iSkiplistBlockSize )
			
 
				+		m_iSkiplistOffset = sphUnzipOffset ( m_pBuf );
			
 
				+	else
			
 
				+		m_iSkiplistOffset = 0;
			
 
				+
			
 
				+	assert ( m_iLen>0 );
			
 
				+	return true;
			
 
				+}
			
--- a/src/indexformat.h
+++ b/src/indexformat.h
@@ -0,0 +1,157 @@
 
				+//
			
 
				+// Copyright (c) 2017-2019, Manticore Software LTD (http://manticoresearch.com)
			
 
				+// Copyright (c) 2001-2016, Andrew Aksyonoff
			
 
				+// Copyright (c) 2008-2016, Sphinx Technologies Inc
			
 
				+// All rights reserved
			
 
				+//
			
 
				+// This program is free software; you can redistribute it and/or modify
			
 
				+// it under the terms of the GNU General Public License. You should have
			
 
				+// received a copy of the GPL license along with this program; if you
			
 
				+// did not, you can find it at http://www.gnu.org/
			
 
				+//
			
 
				+#ifndef _indexformat_
			
 
				+#define _indexformat_
			
 
				+
			
 
				+#include "sphinxstd.h"
			
 
				+#include "sphinxsearch.h"
			
 
				+#include "datareader.h"
			
 
				+
			
 
				+const int	DOCLIST_HINT_THRESH = 256;
			
 
				+const DWORD HITLESS_DOC_MASK = 0x7FFFFFFF;
			
 
				+const DWORD	HITLESS_DOC_FLAG = 0x80000000;
			
 
				+
			
 
				+#define sphUnzipWordid sphUnzipOffset
			
 
				+
			
 
				+class DiskIndexQwordSetup_c;
			
 
				+
			
 
				+/// query word from the searcher's point of view
			
 
				+class DiskIndexQwordTraits_c : public ISphQword
			
 
				+{
			
 
				+public:
			
 
				+	/// tricky bit
			
 
				+	/// m_uHitPosition is always a current position in the .spp file
			
 
				+	/// base ISphQword::m_iHitlistPos carries the inlined hit data when m_iDocs==1
			
 
				+	/// but this one is always a real position, used for delta coding
			
 
				+	SphOffset_t		m_uHitPosition = 0;
			
 
				+	CSphMatch		m_tDoc;			///< current match (partial)
			
 
				+
			
 
				+	FileBlockReaderPtr_c	m_rdDoclist;	///< my doclist accessor
			
 
				+	FileBlockReaderPtr_c	m_rdHitlist;	///< my hitlist accessor
			
 
				+
			
 
				+
			
 
				+					DiskIndexQwordTraits_c ( bool bUseMini, bool bExcluded );
			
 
				+
			
 
				+	void			SetDocReader ( DataReaderFactory_c * pReader );
			
 
				+	void			SetHitReader ( DataReaderFactory_c * pReader );
			
 
				+	void			ResetDecoderState();
			
 
				+	virtual bool	Setup ( const DiskIndexQwordSetup_c * pSetup ) = 0;
			
 
				+
			
 
				+protected:
			
 
				+	Hitpos_t		m_uInlinedHit {0};
			
 
				+	DWORD			m_uHitState = 0;
			
 
				+	Hitpos_t		m_iHitPos {EMPTY_HIT};	///< current hit postition, from hitlist
			
 
				+
			
 
				+	static const int MINIBUFFER_LEN = 1024;
			
 
				+	BYTE			m_dHitlistBuf[MINIBUFFER_LEN];
			
 
				+	BYTE			m_dDoclistBuf[MINIBUFFER_LEN];
			
 
				+
			
 
				+	BYTE *			m_pHitsBuf = nullptr;
			
 
				+	BYTE *			m_pDocsBuf = nullptr;
			
 
				+
			
 
				+#ifndef NDEBUG
			
 
				+	bool			m_bHitlistOver = true;
			
 
				+#endif
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct CSphWordlistCheckpoint
			
 
				+{
			
 
				+	union
			
 
				+	{
			
 
				+		SphWordID_t		m_uWordID;
			
 
				+		const char *	m_sWord;
			
 
				+	};
			
 
				+	SphOffset_t			m_iWordlistOffset;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+int DoclistHintUnpack ( DWORD uDocs, BYTE uHint );
			
 
				+
			
 
				+
			
 
				+// dictionary header
			
 
				+struct DictHeader_t
			
 
				+{
			
 
				+	int				m_iDictCheckpoints = 0;			///< how many dict checkpoints (keyword blocks) are there
			
 
				+	SphOffset_t		m_iDictCheckpointsOffset = 0;	///< dict checkpoints file position
			
 
				+
			
 
				+	int				m_iInfixCodepointBytes = 0;		///< max bytes per infix codepoint (0 means no infixes)
			
 
				+	int64_t			m_iInfixBlocksOffset = 0;		///< infix blocks file position (stored as unsigned 32bit int as keywords dictionary is pretty small)
			
 
				+	int				m_iInfixBlocksWordsSize = 0;	///< infix checkpoints size
			
 
				+};
			
 
				+
			
 
				+
			
 
				+class CheckpointReader_c;
			
 
				+
			
 
				+// FIXME: eliminate this, move it to proper dict impls
			
 
				+class CWordlist : public ISphWordlist, public DictHeader_t, public ISphWordlistSuggest
			
 
				+{
			
 
				+public:
			
 
				+	// !COMMIT slow data
			
 
				+	CSphMappedBuffer<BYTE>						m_tBuf;					///< my cache
			
 
				+	CSphFixedVector<CSphWordlistCheckpoint>		m_dCheckpoints {0};		///< checkpoint offsets
			
 
				+
			
 
				+
			
 
				+										~CWordlist () override;
			
 
				+
			
 
				+	void								Reset();
			
 
				+	bool								Preread ( const CSphString & sName, bool bWordDict, int iSkiplistBlockSize, CSphString & sError );
			
 
				+
			
 
				+	const CSphWordlistCheckpoint *		FindCheckpoint ( const char * sWord, int iWordLen, SphWordID_t iWordID, bool bStarMode ) const;
			
 
				+	bool								GetWord ( const BYTE * pBuf, SphWordID_t iWordID, CSphDictEntry & tWord ) const;
			
 
				+
			
 
				+	const BYTE *						AcquireDict ( const CSphWordlistCheckpoint * pCheckpoint ) const;
			
 
				+	void								GetPrefixedWords ( const char * sSubstring, int iSubLen, const char * sWildcard, Args_t & tArgs ) const override;
			
 
				+	void								GetInfixedWords ( const char * sSubstring, int iSubLen, const char * sWildcard, Args_t & tArgs ) const override;
			
 
				+
			
 
				+	void								SuffixGetChekpoints ( const SuggestResult_t & tRes, const char * sSuffix, int iLen, CSphVector<DWORD> & dCheckpoints ) const override;
			
 
				+	void								SetCheckpoint ( SuggestResult_t & tRes, DWORD iCP ) const override;
			
 
				+	bool								ReadNextWord ( SuggestResult_t & tRes, DictWord_t & tWord ) const override;
			
 
				+	int									GetWordsEnd() const { return m_iWordsEnd; }
			
 
				+
			
 
				+	void								DebugPopulateCheckpoints();
			
 
				+
			
 
				+private:
			
 
				+	bool								m_bWordDict = false;
			
 
				+	CSphVector<InfixBlock_t>			m_dInfixBlocks {0};
			
 
				+	CSphFixedVector<BYTE>				m_pWords {0};			///< arena for checkpoint's words
			
 
				+	BYTE *								m_pInfixBlocksWords = nullptr;	///< arena for infix checkpoint's words
			
 
				+	int									m_iSkiplistBlockSize {0};
			
 
				+
			
 
				+	SphOffset_t							m_iWordsEnd = 0;		///< end of wordlist
			
 
				+	CheckpointReader_c *				m_pCpReader = nullptr;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/// dict=keywords block reader
			
 
				+class KeywordsBlockReader_c : public CSphDictEntry
			
 
				+{
			
 
				+public:
			
 
				+					KeywordsBlockReader_c ( const BYTE * pBuf, int iSkiplistBlockSize );
			
 
				+
			
 
				+	void			Reset ( const BYTE * pBuf );
			
 
				+	bool			UnpackWord();
			
 
				+
			
 
				+	const char *	GetWord() const			{ return (const char*)m_sWord; }
			
 
				+	int				GetWordLen() const		{ return m_iLen; }
			
 
				+
			
 
				+private:
			
 
				+	const BYTE *	m_pBuf;
			
 
				+	BYTE			m_sWord [ MAX_KEYWORD_BYTES ];
			
 
				+	int				m_iLen;
			
 
				+	BYTE			m_uHint = 0;
			
 
				+	int				m_iSkiplistBlockSize = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif // _indexformat_
			
--- a/src/sphinx.cpp
+++ b/src/sphinx.cpp
--- a/src/sphinx.h
+++ b/src/sphinx.h
@@ -123,6 +123,8 @@ STATIC_ASSERT ( ( 1 << ROWITEM_SHIFT )==ROWITEM_BITS, INVALID_ROWITEM_SHIFT );
 
				 #define SPH_MAX_FILENAME_LEN	512
			
 
				 #define SPH_MAX_FIELDS			256
			
 
				 
			
 
				+const int MAX_KEYWORD_BYTES = SPH_MAX_WORD_LEN*3+4;
			
 
				+
			
 
				 /////////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 extern int64_t g_iIndexerCurrentDocID;
			
@@ -3290,33 +3292,11 @@ private:
 
				 };
			
 
				 
			
 
				 
			
 
				-// simple error reporter for debug checks
			
 
				-class DebugCheckError_c
			
 
				-{
			
 
				-public:
			
 
				-			DebugCheckError_c ( FILE * pFile );
			
 
				-
			
 
				-	void	Fail ( const char * szFmt, ... );
			
 
				-	void	Msg ( const char * szFmt, ... );
			
 
				-	void	Progress ( const char * szFmt, ... );
			
 
				-	void	Done();
			
 
				-
			
 
				-	void	SetSegment ( int iSegment );
			
 
				-	int64_t	GetNumFails() const;
			
 
				-
			
 
				-private:
			
 
				-	FILE *	m_pFile {nullptr};
			
 
				-	bool	m_bProgress {false};
			
 
				-	int64_t m_tStartTime {0};
			
 
				-	int64_t	m_nFails {0};
			
 
				-	int64_t	m_nFailsPrinted {0};
			
 
				-	int		m_iSegment {-1};
			
 
				-};
			
 
				-
			
 
				-
			
 
				 class DocstoreFields_i;
			
 
				 void SetupDocstoreFields ( DocstoreFields_i & tFields, const CSphSchema & tSchema );
			
 
				 
			
 
				+class DiskIndexQwordTraits_c;
			
 
				+DiskIndexQwordTraits_c * sphCreateDiskIndexQword ( bool bInlineHits );
			
 
				 
			
 
				 struct DocstoreDoc_t
			
 
				 {
			
@@ -3327,7 +3307,8 @@ struct DocstoreDoc_t
 
				 enum DocstoreDataType_e
			
 
				 {
			
 
				 	DOCSTORE_TEXT,
			
 
				-	DOCSTORE_BIN
			
 
				+	DOCSTORE_BIN,
			
 
				+	DOCSTORE_TOTAL
			
 
				 };
			
 
				 
			
 
				 
			
@@ -3506,6 +3487,7 @@ public:
 
				 	virtual CSphFixedVector<SphAttr_t> BuildDocList () const;
			
 
				 
			
 
				 	virtual void				SetMemorySettings ( const FileAccessSettings_t & tFileAccessSettings ) = 0;
			
 
				+	virtual const FileAccessSettings_t & GetMemorySettings() const = 0;
			
 
				 
			
 
				 	virtual void				GetFieldFilterSettings ( CSphFieldFilterSettings & tSettings );
			
 
				 
			
--- a/src/sphinxaot.cpp
+++ b/src/sphinxaot.cpp
@@ -932,7 +932,7 @@ void sphAotLemmatizeRu1251 ( BYTE * pWord, int iLen )
 
				 	// do lemmatizing
			
 
				 	// input keyword moves into sForm; LemmatizeWord() will also case fold sForm
			
 
				 	// we will generate results using sForm into pWord; so we need this extra copy
			
 
				-	BYTE sForm [ SPH_MAX_WORD_LEN*3+4 ]; // aka MAX_KEYWORD_BYTES
			
 
				+	BYTE sForm[MAX_KEYWORD_BYTES];
			
 
				 	int iFormLen = 0;
			
 
				 
			
 
				 	// faster than strlen and strcpy..
			
@@ -987,7 +987,7 @@ void sphAotLemmatize ( BYTE * pWord, int iLang )
 
				 	// do lemmatizing
			
 
				 	// input keyword moves into sForm; LemmatizeWord() will also case fold sForm
			
 
				 	// we will generate results using sForm into pWord; so we need this extra copy
			
 
				-	BYTE sForm [ SPH_MAX_WORD_LEN*3+4 ]; // aka MAX_KEYWORD_BYTES
			
 
				+	BYTE sForm[MAX_KEYWORD_BYTES];
			
 
				 	int iFormLen = 0;
			
 
				 
			
 
				 	// faster than strlen and strcpy..
			
@@ -1063,7 +1063,7 @@ void sphAotLemmatizeDe1252 ( BYTE * pWord, int iLen )
 
				 	// do lemmatizing
			
 
				 	// input keyword moves into sForm; LemmatizeWord() will also case fold sForm
			
 
				 	// we will generate results using sForm into pWord; so we need this extra copy
			
 
				-	BYTE sForm [ SPH_MAX_WORD_LEN*3+4 ]; // aka MAX_KEYWORD_BYTES
			
 
				+	BYTE sForm[MAX_KEYWORD_BYTES];
			
 
				 	int iFormLen = 0;
			
 
				 
			
 
				 	// faster than strlen and strcpy..
			
@@ -1425,13 +1425,13 @@ class CSphAotTokenizerTmpl : public CSphTokenFilter
 
				 {
			
 
				 protected:
			
 
				 	using Base = CSphTokenFilter;
			
 
				-	BYTE		m_sForm [ SPH_MAX_WORD_LEN*3+4 ];	///< aka MAX_KEYWORD_BYTES
			
 
				+	BYTE		m_sForm[MAX_KEYWORD_BYTES];
			
 
				 	int			m_iFormLen = 0;						///< in bytes, but in windows-1251 that is characters, too
			
 
				 	bool		m_bFound = false;					///< found or predicted?
			
 
				 	DWORD		m_FindResults[12];					///< max results is like 6
			
 
				-	int			m_iCurrent = -1;							///< index in m_FindResults that was just returned, -1 means no blending
			
 
				-	BYTE		m_sToken [ SPH_MAX_WORD_LEN*3+4 ];	///< to hold generated lemmas
			
 
				-	BYTE		m_sOrigToken [ SPH_MAX_WORD_LEN*3+4 ];	///< to hold original token
			
 
				+	int			m_iCurrent = -1;					///< index in m_FindResults that was just returned, -1 means no blending
			
 
				+	BYTE		m_sToken[MAX_KEYWORD_BYTES];		///< to hold generated lemmas
			
 
				+	BYTE		m_sOrigToken[MAX_KEYWORD_BYTES];	///< to hold original token
			
 
				 	bool		m_bIndexExact;
			
 
				 
			
 
				 	const CSphWordforms *	m_pWordforms = nullptr;
			
--- a/src/sphinxint.h
+++ b/src/sphinxint.h
@@ -491,10 +491,15 @@ public:
 
				 		return m_pData;
			
 
				 	}
			
 
				 
			
 
				+	int GetLength() const
			
 
				+	{
			
 
				+		return m_iLen;
			
 
				+	}
			
 
				+
			
 
				 protected:
			
 
				-	const BYTE * m_pData = nullptr;
			
 
				-	const int m_iLen = 0;
			
 
				-	const BYTE * m_pCur = nullptr;
			
 
				+	const BYTE *	m_pData = nullptr;
			
 
				+	const int		m_iLen = 0;
			
 
				+	const BYTE *	m_pCur = nullptr;
			
 
				 };
			
 
				 
			
 
				 class MemoryWriter_c
			
@@ -598,23 +603,6 @@ namespace sph
 
				 	int rename ( const char * sOld, const char * sNew );
			
 
				 }
			
 
				 
			
 
				-class DebugCheckReader_i
			
 
				-{
			
 
				-public:
			
 
				-	virtual ~DebugCheckReader_i () {};
			
 
				-	virtual int64_t GetLengthBytes () = 0;
			
 
				-	virtual bool GetBytes ( void * pData, int iSize ) = 0;
			
 
				-	virtual bool SeekTo ( int64_t iOff, int iHint ) = 0;
			
 
				-};
			
 
				-
			
 
				-// common code for debug checks
			
 
				-class DebugCheckHelper_c
			
 
				-{
			
 
				-protected:
			
 
				-	void				DebugCheck_Attributes ( DebugCheckReader_i & tAttrs, DebugCheckReader_i & tBlobs, int64_t nRows, int64_t iMinMaxBytes, const CSphSchema & tSchema, DebugCheckError_c & tReporter );
			
 
				-	void				DebugCheck_DeadRowMap (  int64_t iSizeBytes, int64_t nRows, DebugCheckError_c & tReporter ) const;
			
 
				-};
			
 
				-
			
 
				 //////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 /// generic COM-like uids
			
@@ -2490,6 +2478,31 @@ BYTE PrereadMapping ( const char * sIndexName, const char * sFor, bool bMlock, b
 
				 	return g_uHash;
			
 
				 }
			
 
				 
			
 
				+#if PARANOID
			
 
				+
			
 
				+#define SPH_VARINT_DECODE(_type,_getexpr) \
			
 
				+	register DWORD b = 0; \
			
 
				+	register _type v = 0; \
			
 
				+	int it = 0; \
			
 
				+	do { b = _getexpr; v = ( v<<7 ) + ( b&0x7f ); it++; } while ( b&0x80 ); \
			
 
				+	assert ( (it-1)*7<=sizeof(_type)*8 ); \
			
 
				+	return v;
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#define SPH_VARINT_DECODE(_type,_getexpr) \
			
 
				+	register DWORD b = _getexpr; \
			
 
				+	register _type res = 0; \
			
 
				+	while ( b & 0x80 ) \
			
 
				+	{ \
			
 
				+		res = ( res<<7 ) + ( b & 0x7f ); \
			
 
				+		b = _getexpr; \
			
 
				+	} \
			
 
				+	res = ( res<<7 ) + b; \
			
 
				+	return res;
			
 
				+
			
 
				+#endif // PARANOID
			
 
				+
			
 
				 // crash related code
			
 
				 struct CrashQuery_t
			
 
				 {
			
--- a/src/sphinxpq.cpp
+++ b/src/sphinxpq.cpp
@@ -69,6 +69,7 @@ static bool operator < ( int64_t iQUID, const StoredQueryKey_t & tKey )
 
				 }
			
 
				 
			
 
				 static int g_iPercolateThreads = 1;
			
 
				+static FileAccessSettings_t g_tDummyFASettings;
			
 
				 
			
 
				 class PercolateIndex_c : public PercolateIndex_i
			
 
				 {
			
@@ -83,8 +84,7 @@ public:
 
				 	bool Commit ( int * pDeleted, RtAccum_t * pAccExt ) override;
			
 
				 	void RollBack ( RtAccum_t * pAccExt ) override;
			
 
				 
			
 
				-	StoredQuery_i * AddQuery ( const PercolateQueryArgs_t & tArgs, const ISphTokenizer * pTokenizer, CSphDict * pDict, CSphString & sError )
			
 
				-		REQUIRES (!m_tLock);
			
 
				+	StoredQuery_i * AddQuery ( const PercolateQueryArgs_t & tArgs, const ISphTokenizer * pTokenizer, CSphDict * pDict, CSphString & sError ) REQUIRES (!m_tLock);
			
 
				 	StoredQuery_i * Query ( const PercolateQueryArgs_t & tArgs, CSphString & sError ) override REQUIRES (!m_tLock);
			
 
				 
			
 
				 	bool Prealloc ( bool bStripPath ) override;
			
@@ -140,6 +140,7 @@ public:
 
				 	void				DebugDumpDict ( FILE * ) override {}
			
 
				 	void				SetProgressCallback ( CSphIndexProgress::IndexingProgress_fn ) override {}
			
 
				 	void				SetMemorySettings ( const FileAccessSettings_t & ) override {}
			
 
				+	const FileAccessSettings_t & GetMemorySettings() const override { return g_tDummyFASettings; }
			
 
				 
			
 
				 	void				ProhibitSave() override { m_bSaveDisabled = true; }
			
 
				 	void				EnableSave() override { m_bSaveDisabled = false; }
			
--- a/src/sphinxrt.cpp
+++ b/src/sphinxrt.cpp
@@ -24,6 +24,7 @@
 
				 #include "killlist.h"
			
 
				 #include "secondaryindex.h"
			
 
				 #include "accumulator.h"
			
 
				+#include "indexcheck.h"
			
 
				 
			
 
				 #include <sys/stat.h>
			
 
				 #include <fcntl.h>
			
@@ -1066,15 +1067,14 @@ public:
 
				 	bool				DeleteDocument ( const DocID_t * pDocs, int iDocs, CSphString & sError, RtAccum_t * pAccExt ) final;
			
 
				 	bool				Commit ( int * pDeleted, RtAccum_t * pAccExt ) final;
			
 
				 	void				RollBack ( RtAccum_t * pAccExt ) final;
			
 
				-	bool				CommitReplayable ( RtSegment_t * pNewSeg, const CSphVector<DocID_t> & dAccKlist,
			
 
				-			int * pTotalKilled, bool bForceDump ) EXCLUDES (m_tChunkLock ); // FIXME? protect?
			
 
				+	bool				CommitReplayable ( RtSegment_t * pNewSeg, const CSphVector<DocID_t> & dAccKlist, int * pTotalKilled, bool bForceDump ) EXCLUDES (m_tChunkLock ); // FIXME? protect?
			
 
				 	void				ForceRamFlush ( bool bPeriodic=false ) final;
			
 
				 	bool				IsFlushNeed() const final;
			
 
				 	bool				ForceDiskChunk() final;
			
 
				 	bool				AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, CSphString & sError ) 			final  EXCLUDES (m_tReading );
			
 
				 	bool				Truncate ( CSphString & sError ) final;
			
 
				 	void				Optimize () final;
			
 
				-	virtual void				ProgressiveMerge ();
			
 
				+	void				ProgressiveMerge();
			
 
				 	CSphIndex *			GetDiskChunk ( int iChunk ) final { return m_dDiskChunks.GetLength()>iChunk ? m_dDiskChunks[iChunk] : nullptr; }
			
 
				 	ISphTokenizer *		CloneIndexingTokenizer() const final { return m_pTokenizerIndexing->Clone ( SPH_CLONE_INDEX ); }
			
 
				 
			
@@ -1094,6 +1094,7 @@ public:
 
				 	void				Dealloc () final {}
			
 
				 	void				Preread () final;
			
 
				 	void				SetMemorySettings ( const FileAccessSettings_t & tFileAccessSettings ) final;
			
 
				+	const FileAccessSettings_t & GetMemorySettings() const final { return m_tFiles; }
			
 
				 	void				SetBase ( const char * ) final {}
			
 
				 	bool				Rename ( const char * ) final { return true; }
			
 
				 	bool				Lock () final { return true; }
			
@@ -7046,7 +7047,7 @@ void RtIndex_c::Optimize()
 
				 {
			
 
				 	if ( g_bProgressiveMerge )
			
 
				 	{
			
 
				-		ProgressiveMerge ( );
			
 
				+		ProgressiveMerge();
			
 
				 		return;
			
 
				 	}
			
 
				 
			
--- a/src/sphinxsearch.cpp
+++ b/src/sphinxsearch.cpp
@@ -20,8 +20,11 @@
 
				 
			
 
				 #include <math.h>
			
 
				 
			
 
				-//////////////////////////////////////////////////////////////////////////
			
 
				-// EXTENDED MATCHING V2
			
 
				+
			
 
				+bool operator < ( const SkiplistEntry_t & a, RowID_t b )	{ return a.m_tBaseRowIDPlus1<b; }
			
 
				+bool operator == ( const SkiplistEntry_t & a, RowID_t b )	{ return a.m_tBaseRowIDPlus1==b; }
			
 
				+bool operator < ( RowID_t a, const SkiplistEntry_t & b )	{ return a<b.m_tBaseRowIDPlus1; }
			
 
				+
			
 
				 //////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 #define SPH_TREE_DUMP			0
			
--- a/src/sphinxsearch.h
+++ b/src/sphinxsearch.h
@@ -39,6 +39,9 @@ struct SkiplistEntry_t
 
				 	int64_t		m_iBaseHitlistPos;	///< delta decoder hitlist offset base
			
 
				 };
			
 
				 
			
 
				+bool operator < ( const SkiplistEntry_t & a, RowID_t b );
			
 
				+bool operator == ( const SkiplistEntry_t & a, RowID_t b );
			
 
				+bool operator < ( RowID_t a, const SkiplistEntry_t & b );
			
 
				 
			
 
				 /// term, searcher view
			
 
				 class ISphQword