Browse Source

sorting and grouping code factored out to separate .cpp


git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@852 406a0c4d-033a-0410-8de8-e80135713968
shodan 18 years ago
parent
commit
9c6871999e
7 changed files with 3133 additions and 2347 deletions
  1. 1 1
      src/Makefile.am
  2. 4 2
      src/Makefile.in
  3. 1599 2344
      src/sphinx.cpp
  4. 1509 0
      src/sphinxsort.cpp
  5. 13 0
      src/sphinxstd.h
  6. 3 0
      win/libsphinx03.vcproj
  7. 4 0
      win/libsphinx05.vcproj

+ 1 - 1
src/Makefile.am

@@ -1,4 +1,4 @@
-SRC_SPHINX = sphinx.cpp sphinxexcerpt.cpp sphinxquery.cpp sphinxsoundex.cpp sphinxstemen.cpp sphinxstemru.cpp sphinxutils.cpp md5.cpp sphinxstd.cpp
+SRC_SPHINX = sphinx.cpp sphinxexcerpt.cpp sphinxquery.cpp sphinxsoundex.cpp sphinxstemen.cpp sphinxstemru.cpp sphinxutils.cpp md5.cpp sphinxstd.cpp sphinxsort.cpp
 
 noinst_LIBRARIES = libsphinx.a
 libsphinx_a_SOURCES = $(SRC_SPHINX)

+ 4 - 2
src/Makefile.in

@@ -56,7 +56,8 @@ libsphinx_a_LIBADD =
 am__objects_1 = sphinx.$(OBJEXT) sphinxexcerpt.$(OBJEXT) \
 	sphinxquery.$(OBJEXT) sphinxsoundex.$(OBJEXT) \
 	sphinxstemen.$(OBJEXT) sphinxstemru.$(OBJEXT) \
-	sphinxutils.$(OBJEXT) md5.$(OBJEXT) sphinxstd.$(OBJEXT)
+	sphinxutils.$(OBJEXT) md5.$(OBJEXT) sphinxstd.$(OBJEXT) \
+	sphinxsort.$(OBJEXT)
 am_libsphinx_a_OBJECTS = $(am__objects_1)
 libsphinx_a_OBJECTS = $(am_libsphinx_a_OBJECTS)
 am__installdirs = "$(DESTDIR)$(bindir)"
@@ -187,7 +188,7 @@ sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 sysconfdir = @sysconfdir@
 target_alias = @target_alias@
-SRC_SPHINX = sphinx.cpp sphinxexcerpt.cpp sphinxquery.cpp sphinxsoundex.cpp sphinxstemen.cpp sphinxstemru.cpp sphinxutils.cpp md5.cpp sphinxstd.cpp
+SRC_SPHINX = sphinx.cpp sphinxexcerpt.cpp sphinxquery.cpp sphinxsoundex.cpp sphinxstemen.cpp sphinxstemru.cpp sphinxutils.cpp md5.cpp sphinxstd.cpp sphinxsort.cpp
 noinst_LIBRARIES = libsphinx.a
 libsphinx_a_SOURCES = $(SRC_SPHINX)
 indexer_SOURCES = indexer.cpp
@@ -289,6 +290,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinx.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxexcerpt.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxquery.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxsort.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxsoundex.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxstd.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphinxstemen.Po@am__quote@

File diff suppressed because it is too large
+ 1599 - 2344
src/sphinx.cpp


+ 1509 - 0
src/sphinxsort.cpp

@@ -0,0 +1,1509 @@
+//
+// $Id$
+//
+
+//
+// Copyright (c) 2001-2007, Andrew Aksyonoff. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "sphinx.h"
+
+#include <time.h>
+#include <math.h>
+
+#if !USE_WINDOWS
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+// TRAITS
+//////////////////////////////////////////////////////////////////////////
+
+/// match-sorting priority queue traits
+class CSphMatchQueueTraits : public ISphMatchSorter, ISphNoncopyable
+{
+protected:
+	CSphMatch *					m_pData;
+	int							m_iUsed;
+	int							m_iSize;
+
+	CSphMatchComparatorState	m_tState;
+	const bool					m_bUsesAttrs;
+
+public:
+	/// ctor
+	CSphMatchQueueTraits ( int iSize, bool bUsesAttrs )
+		: m_iUsed ( 0 )
+		, m_iSize ( iSize )
+		, m_bUsesAttrs ( bUsesAttrs )
+	{
+		assert ( iSize>0 );
+		m_pData = new CSphMatch [ iSize ];
+		assert ( m_pData );
+
+		m_tState.m_iNow = (DWORD) time ( NULL );
+	}
+
+	/// dtor
+	~CSphMatchQueueTraits ()
+	{
+		SafeDeleteArray ( m_pData );
+	}
+
+public:
+	int			GetLength () const										{ return m_iUsed; }
+	void		SetState ( const CSphMatchComparatorState & tState )	{ m_tState = tState; m_tState.m_iNow = (DWORD) time ( NULL ); }
+	bool		UsesAttrs ()											{ return m_bUsesAttrs; }
+	CSphMatch *	First ()												{ return m_pData; }
+};
+
+//////////////////////////////////////////////////////////////////////////
+// PLAIN SORTING QUEUE
+//////////////////////////////////////////////////////////////////////////
+
+/// normal match-sorting priority queue
+template < typename COMP > class CSphMatchQueue : public CSphMatchQueueTraits
+{
+public:
+	/// ctor
+	CSphMatchQueue ( int iSize, bool bUsesAttrs )
+		: CSphMatchQueueTraits ( iSize, bUsesAttrs )
+	{}
+
+	/// add entry to the queue
+	virtual bool Push ( const CSphMatch & tEntry )
+	{
+		m_iTotal++;
+
+		if ( m_iUsed==m_iSize )
+		{
+			// if it's worse that current min, reject it, else pop off current min
+			if ( COMP::IsLess ( tEntry, m_pData[0], m_tState ) )
+				return true;
+			else
+				Pop ();
+		}
+
+		// do add
+		m_pData [ m_iUsed ] = tEntry;
+		int iEntry = m_iUsed++;
+
+		// sift up if needed, so that worst (lesser) ones float to the top
+		while ( iEntry )
+		{
+			int iParent = ( iEntry-1 ) >> 1;
+			if ( !COMP::IsLess ( m_pData[iEntry], m_pData[iParent], m_tState ) )
+				break;
+
+			// entry is less than parent, should float to the top
+			Swap ( m_pData[iEntry], m_pData[iParent] );
+			iEntry = iParent;
+		}
+
+		return true;
+	}
+
+	/// remove root (ie. top priority) entry
+	virtual void Pop ()
+	{
+		assert ( m_iUsed );
+		if ( !(--m_iUsed) ) // empty queue? just return
+			return;
+
+		// make the last entry my new root
+		m_pData[0] = m_pData[m_iUsed];
+
+		// sift down if needed
+		int iEntry = 0;
+		for ( ;; )
+		{
+			// select child
+			int iChild = (iEntry<<1) + 1;
+			if ( iChild>=m_iUsed )
+				break;
+
+			// select smallest child
+			if ( iChild+1<m_iUsed )
+				if ( COMP::IsLess ( m_pData[iChild+1], m_pData[iChild], m_tState ) )
+					iChild++;
+
+			// if smallest child is less than entry, do float it to the top
+			if ( COMP::IsLess ( m_pData[iChild], m_pData[iEntry], m_tState ) )
+			{
+				Swap ( m_pData[iChild], m_pData[iEntry] );
+				iEntry = iChild;
+				continue;
+			}
+
+			break;
+		}
+	}
+
+	/// store all entries into specified location in sorted order, and remove them from queue
+	void Flatten ( CSphMatch * pTo, int iTag )
+	{
+		assert ( m_iUsed>=0 );
+		pTo += m_iUsed;
+		while ( m_iUsed>0 )
+		{
+			--pTo;
+			pTo[0] = m_pData[0]; // OPTIMIZE? reset dst + swap?
+			if ( iTag>=0 )
+				pTo->m_iTag = iTag;
+			Pop ();
+		}
+		m_iTotal = 0;
+	}
+};
+
+//////////////////////////////////////////////////////////////////////////
+// SORTING+GROUPING QUEUE
+//////////////////////////////////////////////////////////////////////////
+
+/// simple fixed-size hash
+/// doesn't keep the order
+template < typename T, typename KEY, typename HASHFUNC >
+class CSphFixedHash : ISphNoncopyable
+{
+protected:
+	static const int			HASH_LIST_END	= -1;
+	static const int			HASH_DELETED	= -2;
+
+	struct HashEntry_t
+	{
+		KEY		m_tKey;
+		T		m_tValue;
+		int		m_iNext;
+	};
+
+protected:
+	CSphVector<HashEntry_t>		m_dEntries;		///< key-value pairs storage pool
+	CSphVector<int>				m_dHash;		///< hash into m_dEntries pool
+
+	int							m_iFree;		///< free pairs count
+	CSphVector<int>				m_dFree;		///< free pair indexes
+
+public:
+	/// ctor
+	CSphFixedHash ( int iLength )
+	{
+		int iBuckets = 2<<sphLog2(iLength-1); // less than 50% bucket usage guaranteed
+		assert ( iLength>0 );
+		assert ( iLength<=iBuckets );
+
+		m_dEntries.Resize ( iLength );
+		m_dHash.Resize ( iBuckets );
+		m_dFree.Resize ( iLength );
+
+		Reset ();
+	}
+
+	/// cleanup
+	void Reset ()
+	{
+		ARRAY_FOREACH ( i, m_dEntries )
+			m_dEntries[i].m_iNext = HASH_DELETED;
+
+		ARRAY_FOREACH ( i, m_dHash )
+			m_dHash[i] = HASH_LIST_END;
+
+		m_iFree = m_dFree.GetLength();
+		ARRAY_FOREACH ( i, m_dFree )
+			m_dFree[i] = i;
+	}
+
+	/// add new entry
+	/// returns NULL on success
+	/// returns pointer to value if already hashed
+	T * Add ( const T & tValue, const KEY & tKey )
+	{
+		assert ( m_iFree>0 && "hash overflow" );
+
+		// check if it's already hashed
+		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
+		int iPrev = -1, iEntry;
+
+		for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
+			if ( m_dEntries[iEntry].m_tKey==tKey )
+				return &m_dEntries[iEntry].m_tValue;
+		assert ( iEntry!=HASH_DELETED );
+
+		// if it's not, do add
+		int iNew = m_dFree [ --m_iFree ];
+
+		HashEntry_t & tNew = m_dEntries[iNew];
+		assert ( tNew.m_iNext==HASH_DELETED );
+
+		tNew.m_tKey = tKey;
+		tNew.m_tValue = tValue;
+		tNew.m_iNext = HASH_LIST_END;
+
+		if ( iPrev>=0 )
+		{
+			assert ( m_dEntries[iPrev].m_iNext==HASH_LIST_END );
+			m_dEntries[iPrev].m_iNext = iNew;
+		} else
+		{
+			assert ( m_dHash[uHash]==HASH_LIST_END );
+			m_dHash[uHash] = iNew;
+		}
+		return NULL;
+	}
+
+	/// remove entry from hash
+	void Remove ( const KEY & tKey )
+	{
+		// check if it's already hashed
+		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
+		int iPrev = -1, iEntry;
+
+		for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
+			if ( m_dEntries[iEntry].m_tKey==tKey )
+		{
+			// found, remove it
+			assert ( m_dEntries[iEntry].m_iNext!=HASH_DELETED );
+			if ( iPrev>=0 )
+				m_dEntries[iPrev].m_iNext = m_dEntries[iEntry].m_iNext;
+			else
+				m_dHash[uHash] = m_dEntries[iEntry].m_iNext;
+
+#ifndef NDEBUG
+			m_dEntries[iEntry].m_iNext = HASH_DELETED;
+#endif
+
+			m_dFree [ m_iFree++ ] = iEntry;
+			return;
+		}
+		assert ( iEntry!=HASH_DELETED );
+	}
+
+	/// get value pointer by key
+	T * operator () ( const KEY & tKey ) const
+	{
+		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
+		int iEntry;
+
+		for ( iEntry=m_dHash[uHash]; iEntry>=0; iEntry=m_dEntries[iEntry].m_iNext )
+			if ( m_dEntries[iEntry].m_tKey==tKey )
+				return (T*)&m_dEntries[iEntry].m_tValue;
+
+		assert ( iEntry!=HASH_DELETED );
+		return NULL;
+	}
+};
+
+
+struct IdentityHash_fn
+{
+	static inline uint64_t	Hash ( uint64_t iValue )	{ return iValue; }
+	static inline DWORD		Hash ( DWORD iValue )		{ return iValue; }
+	static inline int		Hash ( int iValue )			{ return iValue; }
+};
+
+/////////////////////////////////////////////////////////////////////////////
+
+typedef uint64_t	SphGroupKey_t;
+
+/// (group,attrvalue) pair
+struct SphGroupedValue_t
+{
+public:
+	SphGroupKey_t	m_uGroup;
+	DWORD			m_uValue;
+
+public:
+	SphGroupedValue_t ()
+	{}
+
+	SphGroupedValue_t ( SphGroupKey_t uGroup, DWORD uValue )
+		: m_uGroup ( uGroup )
+		, m_uValue ( uValue )
+	{}
+
+	inline bool operator < ( const SphGroupedValue_t & rhs ) const
+	{
+		if ( m_uGroup<rhs.m_uGroup ) return true;
+		if ( m_uGroup>rhs.m_uGroup ) return false;
+		return m_uValue<rhs.m_uValue;
+	}
+
+	inline bool operator == ( const SphGroupedValue_t & rhs ) const
+	{
+		return m_uGroup==rhs.m_uGroup && m_uValue==rhs.m_uValue;
+	}
+};
+
+
+/// unique values counter
+/// used for COUNT(DISTINCT xxx) GROUP BY yyy queries
+class CSphUniqounter : public CSphVector<SphGroupedValue_t>
+{
+public:
+#ifndef NDEBUG
+					CSphUniqounter () : m_iCountPos ( 0 ), m_bSorted ( true ) { Reserve ( 16384 ); }
+	void			Add ( const SphGroupedValue_t & tValue )	{ CSphVector<SphGroupedValue_t>::Add ( tValue ); m_bSorted = false; }
+	void			Sort ()										{ CSphVector<SphGroupedValue_t>::Sort (); m_bSorted = true; }
+
+#else
+					CSphUniqounter () : m_iCountPos ( 0 ) {}
+#endif
+
+public:
+	int				CountStart ( SphGroupKey_t * pOutGroup );	///< starting counting distinct values, returns count and group key (0 if empty)
+	int				CountNext ( SphGroupKey_t * pOutGroup );	///< continues counting distinct values, returns count and group key (0 if done)
+	void			Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups );
+
+protected:
+	int				m_iCountPos;
+
+#ifndef NDEBUG
+	bool			m_bSorted;
+#endif
+};
+
+
+int CSphUniqounter::CountStart ( SphGroupKey_t * pOutGroup )
+{
+	m_iCountPos = 0;
+	return CountNext ( pOutGroup );
+}
+
+
+int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
+{
+	assert ( m_bSorted );
+	if ( m_iCountPos>=m_iLength )
+		return 0;
+
+	SphGroupKey_t uGroup = m_pData[m_iCountPos].m_uGroup;
+	DWORD uValue = m_pData[m_iCountPos].m_uValue;
+	*pOutGroup = uGroup;
+
+	int iCount = 1;
+	while ( m_pData[m_iCountPos].m_uGroup==uGroup )
+	{
+		if ( m_pData[m_iCountPos].m_uValue!=uValue )
+			iCount++;
+		uValue = m_pData[m_iCountPos].m_uValue;
+		m_iCountPos++;
+	}
+	return iCount;
+}
+
+
+void CSphUniqounter::Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups )
+{
+	assert ( m_bSorted );
+	if ( !m_iLength )
+		return;
+
+	sphSort ( pRemoveGroups, iRemoveGroups );
+
+	SphGroupedValue_t * pSrc = m_pData;
+	SphGroupedValue_t * pDst = m_pData;
+	SphGroupedValue_t * pEnd = m_pData + m_iLength;
+
+	// skip remove-groups which are not in my list
+	while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
+	{
+		pRemoveGroups++;
+		iRemoveGroups--;
+	}
+
+	for ( ; pSrc<pEnd; pSrc++ )
+	{
+		// check if this entry needs to be removed
+		while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
+		{
+			pRemoveGroups++;
+			iRemoveGroups--;
+		}
+		if ( iRemoveGroups && pSrc->m_uGroup==*pRemoveGroups )
+			continue;
+
+		// check if it's a dupe
+		if ( pDst>m_pData && pDst[-1]==pSrc[0] )
+			continue;
+
+		*pDst++ = *pSrc;
+	}
+
+	assert ( pDst-m_pData<=m_iLength );
+	m_iLength = pDst-m_pData;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+SphGroupKey_t sphCalcGroupKey ( const CSphMatch & tMatch, ESphGroupBy eGroupBy, int iAttrOffset, int iAttrBits )
+{
+	if ( eGroupBy==SPH_GROUPBY_ATTRPAIR )
+	{
+		int iItem = iAttrOffset/ROWITEM_BITS;
+		return *(SphGroupKey_t*)( tMatch.m_pRowitems+iItem );
+	}
+
+	CSphRowitem iAttr = tMatch.GetAttr ( iAttrOffset, iAttrBits );
+	if ( eGroupBy==SPH_GROUPBY_ATTR )
+		return iAttr;
+
+	time_t tStamp = iAttr;
+	struct tm * pSplit = localtime ( &tStamp ); // FIXME! use _r on UNIX
+
+	switch ( eGroupBy )
+	{
+		case SPH_GROUPBY_DAY:	return (pSplit->tm_year+1900)*10000 + (1+pSplit->tm_mon)*100 + pSplit->tm_mday;
+		case SPH_GROUPBY_WEEK:	return (pSplit->tm_year+1900)*1000 + (1+pSplit->tm_yday) - pSplit->tm_wday;
+		case SPH_GROUPBY_MONTH:	return (pSplit->tm_year+1900)*100 + (1+pSplit->tm_mon);
+		case SPH_GROUPBY_YEAR:	return (pSplit->tm_year+1900);
+		default:				assert ( 0 ); return 0;
+	}
+}
+
+
+/// attribute magic
+enum
+{
+	SPH_VATTR_ID			= -1,	///< tells match sorter to use doc id
+	SPH_VATTR_RELEVANCE		= -2,	///< tells match sorter to use match weight
+
+	OFF_POSTCALC_GROUP		= 0,	///< @group attr offset after normal and calculated attrs
+	OFF_POSTCALC_COUNT		= 1,	///< @count attr offset after normal and calculated attrs
+	OFF_POSTCALC_DISTINCT	= 2,	///< @distinct attr offset after normal and calculated attrs
+
+	ADD_ITEMS_GROUP			= 2,	///< how much items to add in plain group-by mode
+	ADD_ITEMS_DISTINCT		= 3		///< how much items to add in count-distinct mode
+};
+
+
+/// match sorter with k-buffering and group-by
+#if USE_WINDOWS
+#pragma warning(disable:4127)
+#endif
+
+template < typename COMPMATCH, typename COMPGROUP, bool DISTINCT >
+class CSphKBufferGroupSorter : public CSphMatchQueueTraits
+{
+protected:
+	const int		m_iRowitems;		///< normal match rowitems count (to distinguish already grouped matches)
+
+	ESphGroupBy		m_eGroupBy;			///< group-by function
+	int				m_iGroupbyOffset;	///< group-by attr bit offset
+	int				m_iGroupbyCount;	///< group-by attr bit count
+
+	CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn >	m_hGroup2Match;
+
+protected:
+	int				m_iLimit;		///< max matches to be retrieved
+
+	CSphUniqounter	m_tUniq;
+	int				m_iDistinctOffset;
+	int				m_iDistinctCount;
+	bool			m_bSortByDistinct;
+
+	CSphMatchComparatorState	m_tStateGroup;
+
+	static const int			GROUPBY_FACTOR = 4;	///< allocate this times more storage when doing group-by (k, as in k-buffer)
+
+public:
+	/// ctor
+	CSphKBufferGroupSorter ( const CSphQuery * pQuery ) // FIXME! make k configurable
+		: CSphMatchQueueTraits ( pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
+		, m_iRowitems		( pQuery->m_iPresortRowitems )
+		, m_eGroupBy		( pQuery->m_eGroupFunc )
+		, m_iGroupbyOffset	( pQuery->m_iGroupbyOffset )
+		, m_iGroupbyCount	( pQuery->m_iGroupbyCount )
+		, m_hGroup2Match	( pQuery->m_iMaxMatches*GROUPBY_FACTOR )
+		, m_iLimit			( pQuery->m_iMaxMatches )
+		, m_iDistinctOffset	( pQuery->m_iDistinctOffset )
+		, m_iDistinctCount	( pQuery->m_iDistinctCount )
+		, m_bSortByDistinct	( false )
+	{
+		assert ( GROUPBY_FACTOR>1 );
+		assert ( DISTINCT==false || m_iDistinctOffset>=0 );
+	}
+
+	/// add entry to the queue
+	virtual bool Push ( const CSphMatch & tEntry )
+	{
+
+		const int ADD_ITEMS_TOTAL = DISTINCT ? ADD_ITEMS_DISTINCT : ADD_ITEMS_GROUP;
+		assert ( tEntry.m_iRowitems==m_iRowitems || tEntry.m_iRowitems==m_iRowitems+ADD_ITEMS_TOTAL );
+
+		bool bGrouped = ( tEntry.m_iRowitems!=m_iRowitems );
+		SphGroupKey_t uGroupKey = sphCalcGroupKey ( tEntry, m_eGroupBy, m_iGroupbyOffset, m_iGroupbyCount );
+
+		// if this group is already hashed, we only need to update the corresponding match
+		CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
+		if ( ppMatch )
+		{
+			CSphMatch * pMatch = (*ppMatch);
+			assert ( pMatch );
+			assert ( m_eGroupBy==SPH_GROUPBY_ATTRPAIR || pMatch->m_pRowitems [ m_iRowitems+OFF_POSTCALC_GROUP ]==uGroupKey );
+
+			if ( bGrouped )
+			{
+				// it's already grouped match
+				// sum grouped matches count
+				assert ( pMatch->m_iRowitems==tEntry.m_iRowitems );
+				pMatch->m_pRowitems [ m_iRowitems+OFF_POSTCALC_COUNT ] += tEntry.m_pRowitems [ m_iRowitems+OFF_POSTCALC_COUNT ];
+				if ( DISTINCT )
+					pMatch->m_pRowitems [ m_iRowitems+OFF_POSTCALC_DISTINCT ] += tEntry.m_pRowitems [ m_iRowitems+OFF_POSTCALC_DISTINCT ];
+
+			} else
+			{
+				// it's a simple match
+				// increase grouped matches count
+				assert ( pMatch->m_iRowitems==tEntry.m_iRowitems+ADD_ITEMS_TOTAL );
+				pMatch->m_pRowitems [ m_iRowitems+OFF_POSTCALC_COUNT ]++;
+			}
+
+			// if new entry is more relevant, update from it
+			if ( COMPMATCH::IsLess ( *pMatch, tEntry, m_tState ) )
+			{
+				pMatch->m_iDocID = tEntry.m_iDocID;
+				pMatch->m_iWeight = tEntry.m_iWeight;
+				pMatch->m_iTag = tEntry.m_iTag;
+
+				for ( int i=0; i<pMatch->m_iRowitems-ADD_ITEMS_TOTAL; i++ )
+					pMatch->m_pRowitems[i] = tEntry.m_pRowitems[i];
+			}
+		}
+
+		// submit actual distinct value in all cases
+		if ( DISTINCT && !bGrouped )
+			m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_iDistinctOffset, m_iDistinctCount ) ) );
+
+		// it's a dupe anyway, so we shouldn't update total matches count
+		if ( ppMatch )
+			return false;
+
+		// if we're full, let's cut off some worst groups
+		if ( m_iUsed==m_iSize )
+			CutWorst ();
+
+		// do add
+		assert ( m_iUsed<m_iSize );
+		CSphMatch & tNew = m_pData [ m_iUsed++ ];
+
+		int iNewSize = tEntry.m_iRowitems + ( bGrouped ? 0 : ADD_ITEMS_TOTAL );
+		assert ( tNew.m_iRowitems==0 || tNew.m_iRowitems==iNewSize );
+
+		tNew.m_iDocID = tEntry.m_iDocID;
+		tNew.m_iWeight = tEntry.m_iWeight;
+		tNew.m_iTag = tEntry.m_iTag;
+		if ( !tNew.m_iRowitems )
+		{
+			tNew.m_iRowitems = iNewSize;
+			tNew.m_pRowitems = new CSphRowitem [ iNewSize ];
+		}
+		memcpy ( tNew.m_pRowitems, tEntry.m_pRowitems, tEntry.m_iRowitems*sizeof(CSphRowitem) );
+		if ( !bGrouped )
+		{
+			tNew.m_pRowitems [ m_iRowitems+OFF_POSTCALC_GROUP ] = (DWORD)uGroupKey; // intentionally truncate
+			tNew.m_pRowitems [ m_iRowitems+OFF_POSTCALC_COUNT ] = 1;
+			if ( DISTINCT )
+				tNew.m_pRowitems [ m_iRowitems+OFF_POSTCALC_DISTINCT ] = 0;
+		}
+
+		m_hGroup2Match.Add ( &tNew, uGroupKey );
+		m_iTotal++;
+		return true;
+	}
+
+	/// store all entries into specified location in sorted order, and remove them from queue
+	void Flatten ( CSphMatch * pTo, int iTag )
+	{
+		CountDistinct ();
+		SortGroups ();
+
+		int iLen = GetLength ();
+		for ( int i=0; i<iLen; i++, pTo++ )
+		{
+			pTo[0] = m_pData[i];
+			if ( iTag>=0 )
+				pTo->m_iTag = iTag;
+		}
+
+		m_iUsed = 0;
+		m_iTotal = 0;
+	}
+
+	/// get entries count
+	int GetLength () const
+	{
+		return Min ( m_iUsed, m_iLimit );
+	}
+
+	/// set group comparator state
+	void SetGroupState ( const CSphMatchComparatorState & tState )
+	{
+		m_tStateGroup = tState;
+
+		if ( DISTINCT && m_iDistinctOffset>=0 )
+			for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
+				if ( m_tStateGroup.m_iBitOffset[i]==m_iDistinctOffset )
+			{
+				m_bSortByDistinct = true;
+				break;
+			}
+	}
+
+protected:
+	/// count distinct values if necessary
+	void CountDistinct ()
+	{
+		if ( DISTINCT )
+		{
+			m_tUniq.Sort ();
+			SphGroupKey_t uGroup;
+			for ( int iCount = m_tUniq.CountStart(&uGroup); iCount; iCount = m_tUniq.CountNext(&uGroup) )
+			{
+				CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
+				if ( ppMatch )
+					(*ppMatch)->m_pRowitems[m_iRowitems+OFF_POSTCALC_DISTINCT] = iCount;
+			}
+		}
+	}
+
+	/// cut worst N groups off the buffer tail
+	void CutWorst ()
+	{
+		// sort groups
+		if ( m_bSortByDistinct )
+			CountDistinct ();
+		SortGroups ();
+
+		// cut groups
+		int iCut = m_iLimit * (int)(GROUPBY_FACTOR/2);
+		m_iUsed -= iCut;
+
+		// cleanup unused distinct stuff
+		if ( DISTINCT )
+		{
+			// build kill-list
+			CSphVector<SphGroupKey_t> dRemove;
+			dRemove.Resize ( iCut );
+
+			if ( m_eGroupBy==SPH_GROUPBY_ATTRPAIR )
+			{
+				for ( int i=0; i<iCut; i++ )
+					dRemove[i] = sphCalcGroupKey ( m_pData[m_iUsed+i], m_eGroupBy, m_iGroupbyOffset, m_iGroupbyCount );
+			} else
+			{
+				for ( int i=0; i<iCut; i++ )
+					dRemove[i] = m_pData[m_iUsed+i].m_pRowitems[m_iRowitems+OFF_POSTCALC_GROUP];
+			}
+
+			// sort and compact
+			if ( !m_bSortByDistinct )
+				m_tUniq.Sort ();
+			m_tUniq.Compact ( &dRemove[0], iCut );
+		}
+
+		// rehash
+		m_hGroup2Match.Reset ();
+		if ( m_eGroupBy==SPH_GROUPBY_ATTRPAIR )
+		{
+			for ( int i=0; i<m_iUsed; i++ )
+			{
+				SphGroupKey_t uKey = sphCalcGroupKey ( m_pData[i], m_eGroupBy, m_iGroupbyOffset, m_iGroupbyCount );
+				m_hGroup2Match.Add ( &m_pData[i], uKey );
+			}
+		} else
+		{
+			for ( int i=0; i<m_iUsed; i++ )
+				m_hGroup2Match.Add ( m_pData+i, m_pData[i].m_pRowitems[ m_iRowitems+OFF_POSTCALC_GROUP ] );
+		}
+	}
+
+	/// sort groups buffer
+	void SortGroups ()
+	{
+		CSphMatch * pData = m_pData;
+		int iCount = m_iUsed;
+
+		int st0[32], st1[32], a, b, k, i, j;
+		CSphMatch x;
+
+		k = 1;
+		st0[0] = 0;
+		st1[0] = iCount-1;
+		while ( k )
+		{
+			k--;
+			i = a = st0[k];
+			j = b = st1[k];
+			x = pData [ (a+b)/2 ]; // FIXME! add better median at least
+			while ( a<b )
+			{
+				while ( i<=j )
+				{
+
+					while ( COMPGROUP::IsLess ( x, pData[i], m_tStateGroup ) ) i++;
+					while ( COMPGROUP::IsLess ( pData[j], x, m_tStateGroup ) ) j--;
+					if (i <= j) { Swap ( pData[i], pData[j] ); i++; j--; }
+				}
+
+				if ( j-a>=b-i )
+				{
+					if ( a<j ) { st0[k] = a; st1[k] = j; k++; }
+					a = i;
+				} else
+				{
+					if ( i<b ) { st0[k] = i; st1[k] = b; k++; }
+					b = j;
+				}
+			}
+		}
+	}
+};
+
+#if USE_WINDOWS
+#pragma warning(default:4127)
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+// PLAIN SORTING FUNCTORS 
+//////////////////////////////////////////////////////////////////////////
+
+/// match sorter
+template < bool BITS >
+struct MatchRelevanceLt_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & )
+	{
+		if ( a.m_iWeight!=b.m_iWeight )
+			return a.m_iWeight < b.m_iWeight;
+
+		return a.m_iDocID > b.m_iDocID;
+	};
+};
+
+
+/// match sorter
+template < bool BITS >
+struct MatchAttrLt_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		CSphRowitem aa = t.GetAttr<BITS>(a,0);
+		CSphRowitem bb = t.GetAttr<BITS>(b,0);
+		if ( aa!=bb )
+			return aa<bb;
+
+		if ( a.m_iWeight!=b.m_iWeight )
+			return a.m_iWeight < b.m_iWeight;
+
+		return a.m_iDocID > b.m_iDocID;
+	};
+};
+
+
+/// match sorter
+template < bool BITS >
+struct MatchAttrGt_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		CSphRowitem aa = t.GetAttr<BITS>(a,0);
+		CSphRowitem bb = t.GetAttr<BITS>(b,0);
+		if ( aa!=bb )
+			return aa>bb;
+
+		if ( a.m_iWeight!=b.m_iWeight )
+			return a.m_iWeight < b.m_iWeight;
+
+		return a.m_iDocID > b.m_iDocID;
+	};
+};
+
+
+/// match sorter
+template < bool BITS >
+struct MatchTimeSegments_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		CSphRowitem aa = t.GetAttr<BITS>(a,0);
+		CSphRowitem bb = t.GetAttr<BITS>(b,0);
+		int iA = GetSegment ( aa, t.m_iNow );
+		int iB = GetSegment ( bb, t.m_iNow );
+		if ( iA!=iB )
+			return iA > iB;
+
+		if ( a.m_iWeight!=b.m_iWeight )
+			return a.m_iWeight < b.m_iWeight;
+
+		if ( aa!=bb )
+			return aa<bb;
+
+		return a.m_iDocID > b.m_iDocID;
+	};
+
+protected:
+	static inline int GetSegment ( DWORD iStamp, DWORD iNow )
+	{
+		if ( iStamp>=iNow-3600 ) return 0; // last hour
+		if ( iStamp>=iNow-24*3600 ) return 1; // last day
+		if ( iStamp>=iNow-7*24*3600 ) return 2; // last week
+		if ( iStamp>=iNow-30*24*3600 ) return 3; // last month
+		if ( iStamp>=iNow-90*24*3600 ) return 4; // last 3 months
+		return 5; // everything else
+	}
+};
+
+/////////////////////////////////////////////////////////////////////////////
+
+#define SPH_TEST_PAIR(_aa,_bb,_idx ) \
+	if ( (_aa)!=(_bb) ) \
+		return ( (t.m_uAttrDesc>>(_idx))&1 ) ^ ( (_aa)>(_bb) );
+
+
+#define SPH_TEST_KEYPART(_idx) \
+	switch ( t.m_iAttr[_idx] ) \
+	{ \
+		case SPH_VATTR_ID:			SPH_TEST_PAIR ( a.m_iDocID, b.m_iDocID, _idx ); break; \
+		case SPH_VATTR_RELEVANCE:	SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
+		default: \
+		{ \
+			register CSphRowitem aa = t.GetAttr<BITS> ( a, _idx ); \
+			register CSphRowitem bb = t.GetAttr<BITS> ( b, _idx ); \
+			SPH_TEST_PAIR ( aa, bb, _idx ); \
+			break; \
+		} \
+	}
+
+
+template < bool BITS >
+struct MatchGeneric2_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+			SPH_TEST_KEYPART(0);
+		SPH_TEST_KEYPART(1);
+		return false;
+	};
+};
+
+
+template < bool BITS >
+struct MatchGeneric3_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		SPH_TEST_KEYPART(0);
+		SPH_TEST_KEYPART(1);
+		SPH_TEST_KEYPART(2);
+		return false;
+	};
+};
+
+
+template < bool BITS >
+struct MatchGeneric4_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		SPH_TEST_KEYPART(0);
+		SPH_TEST_KEYPART(1);
+		SPH_TEST_KEYPART(2);
+		SPH_TEST_KEYPART(3);
+		return false;
+	};
+};
+
+
+template < bool BITS >
+struct MatchGeneric5_fn
+{
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+		SPH_TEST_KEYPART(0);
+		SPH_TEST_KEYPART(1);
+		SPH_TEST_KEYPART(2);
+		SPH_TEST_KEYPART(3);
+		SPH_TEST_KEYPART(4);
+		return false;
+	};
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+template < bool BITS >
+struct MatchCustom_fn
+{
+	// setup sorting state
+	static bool SetupAttr ( const CSphSchema & tSchema, CSphMatchComparatorState & tState, CSphString & sError, int iIdx, const char * sAttr )
+	{
+		if ( iIdx>=CSphMatchComparatorState::MAX_ATTRS )
+		{
+			sError.SetSprintf ( "custom sort: too many attributes declared" );
+			return false;
+		}
+
+		int iAttr = tSchema.GetAttrIndex(sAttr);
+		if ( iAttr<0 )
+		{
+			sError.SetSprintf ( "custom sort: attr '%s' not found in schema", sAttr );
+			return false;
+		}
+
+		const CSphColumnInfo & tAttr = tSchema.GetAttr(iAttr);
+		tState.m_iAttr[iIdx] = iAttr;
+		tState.m_iBitOffset[iIdx] = tAttr.m_iBitOffset;
+		tState.m_iBitCount[iIdx] = tAttr.m_iBitCount;
+		tState.m_iRowitem[iIdx] = tAttr.m_iRowitem;
+		return true;
+	}
+
+	// setup sorting state
+	static bool Setup ( const CSphSchema & tSchema, CSphMatchComparatorState & tState, CSphString & sError )
+	{
+		float fTmp;
+		int iAttr = 0;
+
+#define MATCH_FUNCTION				fTmp
+#define MATCH_WEIGHT				1.0f
+#define MATCH_NOW					1.0f
+#define MATCH_ATTR(_idx)			1.0f
+#define MATCH_DECLARE_ATTR(_name)	if ( !SetupAttr ( tSchema, tState, sError, iAttr++, _name ) ) return false;
+#include "sphinxcustomsort.inl"
+;
+
+		return true;
+	}
+
+	// calc function and compare matches
+	// OPTIMIZE? could calc once per match on submit
+	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
+	{
+#undef MATCH_DECLARE_ATTR
+#undef MATCH_WEIGHT
+#undef MATCH_NOW
+#undef MATCH_ATTR
+#define MATCH_DECLARE_ATTR(_name)	;
+#define MATCH_WEIGHT				float(MATCH_VAR.m_iWeight)
+#define MATCH_NOW					float(t.m_iNow)
+#define MATCH_ATTR(_idx)			float(t.GetAttr<BITS>(MATCH_VAR,_idx))
+
+		float aa, bb;
+
+#undef MATCH_FUNCTION
+#undef MATCH_VAR
+#define MATCH_FUNCTION aa
+#define MATCH_VAR a
+#include "sphinxcustomsort.inl"
+;
+
+#undef MATCH_FUNCTION
+#undef MATCH_VAR
+#define MATCH_FUNCTION bb
+#define MATCH_VAR b
+#include "sphinxcustomsort.inl"
+;
+
+		return aa<bb;
+	}
+};
+
+//////////////////////////////////////////////////////////////////////////
+// SORT CLAUSE PARSER
+//////////////////////////////////////////////////////////////////////////
+
+static const int MAX_SORT_FIELDS = 5; // MUST be in sync with CSphMatchComparatorState::m_iAttr
+
+
+enum ESphSortFunc
+{
+	FUNC_REL_DESC,
+	FUNC_ATTR_DESC,
+	FUNC_ATTR_ASC,
+	FUNC_TIMESEGS,
+	FUNC_GENERIC2,
+	FUNC_GENERIC3,
+	FUNC_GENERIC4,
+	FUNC_GENERIC5,
+	FUNC_CUSTOM
+};
+
+
+enum ESortClauseParseResult
+{
+	SORT_CLAUSE_OK,
+	SORT_CLAUSE_ERROR,
+	SORT_CLAUSE_RANDOM
+};
+
+
+enum
+{
+	FIXUP_GEODIST	= -1000,
+	FIXUP_COUNT		= -1001,
+	FIXUP_GROUP		= -1002,
+	FIXUP_DISTINCT	= -1003
+};
+
+
+static ESortClauseParseResult sphParseSortClause ( const char * sClause, const CSphQuery * pQuery,
+	const CSphSchema & tSchema, bool bGroupClause,
+	ESphSortFunc & eFunc, CSphMatchComparatorState & tState, CSphString & sError )
+{
+	// mini parser
+	ISphTokenizer * pTokenizer = sphCreateSBCSTokenizer ();
+	if ( !pTokenizer )
+	{
+		sError.SetSprintf ( "INTERNAL ERROR: failed to create tokenizer when parsing sort clause" );
+		return SORT_CLAUSE_ERROR;
+	}
+
+	CSphString sTmp;
+	pTokenizer->SetCaseFolding ( "0..9, A..Z->a..z, _, a..z, @", sTmp );
+
+	CSphString sSortClause ( sClause );
+	pTokenizer->SetBuffer ( (BYTE*)sSortClause.cstr(), strlen(sSortClause.cstr()), true );
+
+	bool bField = false; // whether i'm expecting field name or sort order
+	int iField = 0;
+
+	for ( const char * pTok=(char*)pTokenizer->GetToken(); pTok; pTok=(char*)pTokenizer->GetToken() )
+	{
+		bField = !bField;
+
+		// special case, sort by random
+		if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
+			return SORT_CLAUSE_RANDOM;
+
+		// special case, sort by custom
+		if ( iField==0 && bField && strcmp ( pTok, "@custom" )==0 )
+		{
+			eFunc = FUNC_CUSTOM;
+			return MatchCustom_fn<false>::Setup ( tSchema, tState, sError ) ? SORT_CLAUSE_OK : SORT_CLAUSE_ERROR;
+		}
+
+		// handle sort order
+		if ( !bField )
+		{
+			// check
+			if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
+			{
+				sError.SetSprintf ( "invalid sorting order '%s'", pTok );
+				return SORT_CLAUSE_ERROR;
+			}
+
+			// set
+			if ( !strcmp ( pTok, "desc" ) )
+				tState.m_uAttrDesc |= ( 1<<iField );
+
+			iField++;
+			continue;
+		}
+
+		// handle field name
+		if ( iField==MAX_SORT_FIELDS )
+		{
+			sError.SetSprintf ( "too much sort-by fields; maximum count is %d", MAX_SORT_FIELDS );
+			return SORT_CLAUSE_ERROR;
+		}
+
+		if ( !strcasecmp ( pTok, "@relevance" )
+			|| !strcasecmp ( pTok, "@rank" )
+			|| !strcasecmp ( pTok, "@weight" ) )
+		{
+			tState.m_iAttr[iField] = SPH_VATTR_RELEVANCE;
+
+		} else if ( !strcasecmp ( pTok, "@id" ) )
+		{
+			tState.m_iAttr[iField] = SPH_VATTR_ID;
+
+		} else if ( !strcasecmp ( pTok, "@geodist" ) )
+		{
+			tState.m_iAttr[iField] = FIXUP_GEODIST;
+
+		} else if ( !strcasecmp ( pTok, "@count" ) && bGroupClause )
+		{
+			if ( pQuery->m_iGroupbyOffset<0 )
+			{
+				sError.SetSprintf ( "no group-by attribute; can not sort by @count" );
+				return SORT_CLAUSE_ERROR;
+			}
+			tState.m_iAttr[iField] = FIXUP_COUNT;
+
+		} else if ( !strcasecmp ( pTok, "@group" ) && bGroupClause )
+		{
+			if ( pQuery->m_iGroupbyOffset<0 )
+			{
+				sError.SetSprintf ( "no group-by attribute; can not sort by @group" );
+				return SORT_CLAUSE_ERROR;
+			}
+			tState.m_iAttr[iField] = FIXUP_GROUP;
+
+		} else if ( !strcasecmp ( pTok, "@distinct" ) && bGroupClause )
+		{
+			if ( pQuery->m_iDistinctOffset<0 )
+			{
+				sError.SetSprintf ( "no count-distinct attribute; can not sort by @distinct" );
+				return SORT_CLAUSE_ERROR;
+			}
+			tState.m_iAttr[iField] = FIXUP_DISTINCT;
+
+		} else
+		{
+			int iAttr = tSchema.GetAttrIndex ( pTok );
+			if ( iAttr<0 )
+			{
+				sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
+				return SORT_CLAUSE_ERROR;
+			}
+			tState.m_iAttr[iField] = iAttr;
+			tState.m_iRowitem[iField] = tSchema.GetAttr(iAttr).m_iRowitem;
+			tState.m_iBitOffset[iField] = tSchema.GetAttr(iAttr).m_iBitOffset;
+			tState.m_iBitCount[iField] = tSchema.GetAttr(iAttr).m_iBitCount;
+		}
+	}
+
+	if ( iField==0 )
+	{
+		sError.SetSprintf ( "no sort order defined" );
+		return SORT_CLAUSE_ERROR;
+	}
+
+	if ( iField==1 )
+		tState.m_iAttr[iField++] = SPH_VATTR_ID; // add "id ASC"
+
+	switch ( iField )
+	{
+		case 2:		eFunc = FUNC_GENERIC2; break;
+		case 3:		eFunc = FUNC_GENERIC3; break;
+		case 4:		eFunc = FUNC_GENERIC4; break;
+		case 5:		eFunc = FUNC_GENERIC5; break;
+		default:	sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
+	}
+	return SORT_CLAUSE_OK;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SORTING+GROUPING INSTANTIATION
+//////////////////////////////////////////////////////////////////////////
+
+template < typename COMPMATCH, typename COMPGROUP, typename ARG >
+static ISphMatchSorter * sphCreateSorter3rd ( bool bDistinct, ARG arg )
+{
+	if ( bDistinct==true )
+		return new CSphKBufferGroupSorter<COMPMATCH,COMPGROUP,true> ( arg );
+	else
+		return new CSphKBufferGroupSorter<COMPMATCH,COMPGROUP,false> ( arg );
+}
+
+
+template < typename COMPMATCH, typename ARG >
+static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, bool bGroupBits, bool bDistinct, ARG arg )
+{
+	if ( bGroupBits )
+	{
+		switch ( eGroupFunc )
+		{
+			case FUNC_GENERIC2:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric2_fn<true> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC3:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric3_fn<true> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC4:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric4_fn<true> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC5:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric5_fn<true> >	( bDistinct, arg ); break;
+			default:				return NULL;
+		}
+	} else
+	{
+		switch ( eGroupFunc )
+		{
+			case FUNC_GENERIC2:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric2_fn<false> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC3:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric3_fn<false> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC4:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric4_fn<false> >	( bDistinct, arg ); break;
+			case FUNC_GENERIC5:		return sphCreateSorter3rd<COMPMATCH,MatchGeneric5_fn<false> >	( bDistinct, arg ); break;
+			default:				return NULL;
+		}
+	}
+}
+
+
+template < typename ARG >
+static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, bool bMatchBits, ESphSortFunc eGroupFunc, bool bGroupBits, bool bDistinct, ARG arg )
+{
+	if ( bMatchBits )
+	{
+		switch ( eMatchFunc )
+		{
+			case FUNC_REL_DESC:		return sphCreateSorter2nd<MatchRelevanceLt_fn<true> >	( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_ATTR_DESC:	return sphCreateSorter2nd<MatchAttrLt_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_ATTR_ASC:		return sphCreateSorter2nd<MatchAttrGt_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_TIMESEGS:		return sphCreateSorter2nd<MatchTimeSegments_fn<true> >	( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC2:		return sphCreateSorter2nd<MatchGeneric2_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC3:		return sphCreateSorter2nd<MatchGeneric3_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC4:		return sphCreateSorter2nd<MatchGeneric4_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC5:		return sphCreateSorter2nd<MatchGeneric5_fn<true> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			default:				return NULL;
+		}
+	} else
+	{
+		switch ( eMatchFunc )
+		{
+			case FUNC_REL_DESC:		return sphCreateSorter2nd<MatchRelevanceLt_fn<false> >	( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_ATTR_DESC:	return sphCreateSorter2nd<MatchAttrLt_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_ATTR_ASC:		return sphCreateSorter2nd<MatchAttrGt_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_TIMESEGS:		return sphCreateSorter2nd<MatchTimeSegments_fn<false> >	( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC2:		return sphCreateSorter2nd<MatchGeneric2_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC3:		return sphCreateSorter2nd<MatchGeneric3_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC4:		return sphCreateSorter2nd<MatchGeneric4_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			case FUNC_GENERIC5:		return sphCreateSorter2nd<MatchGeneric5_fn<false> >		( eGroupFunc, bGroupBits, bDistinct, arg ); break;
+			default:				return NULL;
+		}
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+// PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
+//////////////////////////////////////////////////////////////////////////
+
+static bool UpdateQueryForSchema ( CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError )
+{
+	pQuery->m_iGroupbyOffset = -1;
+	pQuery->m_iDistinctOffset = -1;
+
+	if ( pQuery->m_sGroupBy.IsEmpty() )
+		return true;
+
+	// setup gropuby attr
+	int iGroupBy = tSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
+	if ( iGroupBy<0 )
+	{
+		sError.SetSprintf ( "group-by attribute '%s' not found", pQuery->m_sGroupBy.cstr() );
+		return false;
+	}
+	if ( pQuery->m_eGroupFunc==SPH_GROUPBY_ATTRPAIR && iGroupBy+1>=tSchema.GetAttrsCount() )
+	{
+		sError.SetSprintf ( "group-by attribute '%s' must not be last in ATTRPAIR grouping mode", pQuery->m_sGroupBy.cstr() );
+		return false;
+	}
+	pQuery->m_iGroupbyOffset = tSchema.GetAttr(iGroupBy).m_iBitOffset;
+	pQuery->m_iGroupbyCount = tSchema.GetAttr(iGroupBy).m_iBitCount;
+
+	// setup distinct attr
+	if ( !pQuery->m_sGroupDistinct.IsEmpty() )
+	{
+		int iDistinct = tSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() );
+		if ( iDistinct<0 )
+		{
+			sError.SetSprintf ( "group-count-distinct attribute '%s' not found", pQuery->m_sGroupDistinct.cstr() );
+			return false;
+		}
+
+		pQuery->m_iDistinctOffset = tSchema.GetAttr(iDistinct).m_iBitOffset;
+		pQuery->m_iDistinctCount = tSchema.GetAttr(iDistinct).m_iBitCount;
+	}
+
+	return true;
+}
+
+
+ISphMatchSorter * sphCreateQueue ( CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError )
+{
+	// lookup proper attribute index to group by, if any
+	if ( !UpdateQueryForSchema ( pQuery, tSchema, sError ) )
+		return NULL;
+	assert ( pQuery->m_sGroupBy.IsEmpty() || pQuery->m_iGroupbyOffset>=0 );
+
+	// prepare
+	ISphMatchSorter * pTop = NULL;
+	CSphMatchComparatorState tStateMatch, tStateGroup;
+
+	sError = "";
+
+	////////////////////////////////////
+	// choose and setup sorting functor
+	////////////////////////////////////
+
+	ESphSortFunc eMatchFunc = FUNC_REL_DESC;
+	ESphSortFunc eGroupFunc = FUNC_REL_DESC;
+	bool bUsesAttrs = false;
+	bool bRandomize = false;
+	pQuery->m_bCalcGeodist = false;
+
+	// matches sorting function
+	if ( pQuery->m_eSort==SPH_SORT_EXTENDED )
+	{
+		ESortClauseParseResult eRes = sphParseSortClause ( pQuery->m_sSortBy.cstr(), pQuery, tSchema,
+			false, eMatchFunc, tStateMatch, sError );
+
+		if ( eRes==SORT_CLAUSE_ERROR )
+			return false;
+
+		if ( eRes==SORT_CLAUSE_RANDOM )
+			bRandomize = true;
+
+		for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
+		{
+			if ( tStateMatch.m_iAttr[i]>=0 )
+				bUsesAttrs = true;
+			if ( tStateMatch.m_iAttr[i]==FIXUP_GEODIST )
+			{
+				bUsesAttrs = true;
+				pQuery->m_bCalcGeodist = true;
+			}
+		}
+
+	} else
+	{
+		// check sort-by attribute
+		if ( pQuery->m_eSort!=SPH_SORT_RELEVANCE )
+		{
+			tStateMatch.m_iAttr[0] = tSchema.GetAttrIndex ( pQuery->m_sSortBy.cstr() );
+			if ( tStateMatch.m_iAttr[0]<0 )
+			{
+				sError.SetSprintf ( "sort-by attribute '%s' not found", pQuery->m_sSortBy.cstr() );
+				return false;
+			}
+
+			const CSphColumnInfo & tAttr = tSchema.GetAttr ( tStateMatch.m_iAttr[0] );
+			tStateMatch.m_iRowitem[0] = tAttr.m_iRowitem;
+			tStateMatch.m_iBitOffset[0] = tAttr.m_iBitOffset;
+			tStateMatch.m_iBitCount[0] = tAttr.m_iBitCount;
+		}
+	
+		// find out what function to use and whether it needs attributes
+		bUsesAttrs = true;
+		switch ( pQuery->m_eSort )
+		{
+			case SPH_SORT_ATTR_DESC:		eMatchFunc = FUNC_ATTR_DESC; break;
+			case SPH_SORT_ATTR_ASC:			eMatchFunc = FUNC_ATTR_ASC; break;
+			case SPH_SORT_TIME_SEGMENTS:	eMatchFunc = FUNC_TIMESEGS; break;
+			case SPH_SORT_RELEVANCE:		eMatchFunc = FUNC_REL_DESC; bUsesAttrs = false; break;
+			default:
+				sError.SetSprintf ( "unknown sorting mode %d", pQuery->m_eSort );
+				return false;
+		}
+	}
+
+	// groups sorting function
+	if ( pQuery->m_iGroupbyOffset>=0 )
+	{
+		ESortClauseParseResult eRes = sphParseSortClause ( pQuery->m_sGroupSortBy.cstr(), pQuery, tSchema,
+			true, eGroupFunc, tStateGroup, sError );
+
+		if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
+		{
+			if ( eRes==SORT_CLAUSE_RANDOM )
+				sError.SetSprintf ( "groups can not be sorted by @random" );
+			return false;
+		}
+	}
+
+	// update for calculated stuff
+	ARRAY_FOREACH ( i, pQuery->m_dFilters )
+		if ( pQuery->m_dFilters[i].m_sAttrName=="@geodist" )
+	{
+		pQuery->m_bCalcGeodist = true;
+		break;
+	}
+
+	int iToCalc = ( pQuery->m_bCalcGeodist ? 1 : 0 );
+	pQuery->m_iPresortRowitems = tSchema.GetRealRowSize() + iToCalc;
+
+	for ( int iState=0; iState<2; iState++ )
+	{
+		CSphMatchComparatorState & tState = iState ? tStateMatch : tStateGroup;
+		for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
+		{
+			int iOffset = -1;
+			switch ( tState.m_iAttr[i] )
+			{
+				case FIXUP_GEODIST:		iOffset = 0; break;
+				case FIXUP_COUNT:		iOffset = iToCalc+OFF_POSTCALC_COUNT; break;
+				case FIXUP_GROUP:		iOffset = iToCalc+OFF_POSTCALC_GROUP; break;
+				case FIXUP_DISTINCT:	iOffset = iToCalc+OFF_POSTCALC_DISTINCT; break;
+			}
+			if ( iOffset>=0 )
+			{
+				tState.m_iAttr[i] = tSchema.GetRealAttrsCount() + iOffset;
+				tState.m_iRowitem[i] = tSchema.GetRealRowSize() + iOffset;
+				tState.m_iBitOffset[i] = tState.m_iRowitem[i]*ROWITEM_BITS;
+				tState.m_iBitCount[i] = ROWITEM_BITS;
+			}
+		}
+	}
+
+	///////////////////
+	// spawn the queue
+	///////////////////
+
+	bool bMatchBits = tStateMatch.UsesBitfields ();
+	bool bGroupBits = tStateGroup.UsesBitfields ();
+
+	if ( pQuery->m_iGroupbyOffset<0 )
+	{
+		if ( bMatchBits )
+		{
+			switch ( eMatchFunc )
+			{
+				case FUNC_REL_DESC:	pTop = new CSphMatchQueue<MatchRelevanceLt_fn<true> >	( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_ATTR_DESC:pTop = new CSphMatchQueue<MatchAttrLt_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_ATTR_ASC:	pTop = new CSphMatchQueue<MatchAttrGt_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_TIMESEGS:	pTop = new CSphMatchQueue<MatchTimeSegments_fn<true> >	( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC2:	pTop = new CSphMatchQueue<MatchGeneric2_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC3:	pTop = new CSphMatchQueue<MatchGeneric3_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC4:	pTop = new CSphMatchQueue<MatchGeneric4_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC5:	pTop = new CSphMatchQueue<MatchGeneric5_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_CUSTOM:	pTop = new CSphMatchQueue<MatchCustom_fn<true> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				default:			pTop = NULL;
+			}
+		} else
+		{
+			switch ( eMatchFunc )
+			{
+				case FUNC_REL_DESC:	pTop = new CSphMatchQueue<MatchRelevanceLt_fn<false> >	( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_ATTR_DESC:pTop = new CSphMatchQueue<MatchAttrLt_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_ATTR_ASC:	pTop = new CSphMatchQueue<MatchAttrGt_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_TIMESEGS:	pTop = new CSphMatchQueue<MatchTimeSegments_fn<false> >	( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC2:	pTop = new CSphMatchQueue<MatchGeneric2_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC3:	pTop = new CSphMatchQueue<MatchGeneric3_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC4:	pTop = new CSphMatchQueue<MatchGeneric4_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_GENERIC5:	pTop = new CSphMatchQueue<MatchGeneric5_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				case FUNC_CUSTOM:	pTop = new CSphMatchQueue<MatchCustom_fn<false> >		( pQuery->m_iMaxMatches, bUsesAttrs ); break;
+				default:			pTop = NULL;
+			}
+		}
+
+	} else
+	{
+		pTop = sphCreateSorter1st ( eMatchFunc, bMatchBits, eGroupFunc, bGroupBits, pQuery->m_iDistinctOffset>=0, pQuery );
+	}
+
+	if ( !pTop )
+	{
+		sError.SetSprintf ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)",
+			pQuery->m_iGroupbyOffset>=0, eMatchFunc, eGroupFunc );
+		return false;
+	}
+
+	assert ( pTop );
+	pTop->SetState ( tStateMatch );
+	pTop->SetGroupState ( tStateGroup );
+	pTop->m_bRandomize = bRandomize;
+	return pTop;
+}
+
+
+void sphFlattenQueue ( ISphMatchSorter * pQueue, CSphQueryResult * pResult, int iTag )
+{
+	if ( pQueue && pQueue->GetLength() )
+	{
+		int iOffset = pResult->m_dMatches.GetLength ();
+		pResult->m_dMatches.Resize ( iOffset + pQueue->GetLength() );
+		pQueue->Flatten ( &pResult->m_dMatches[iOffset], iTag );
+	}
+}
+
+//
+// $Id$
+//

+ 13 - 0
src/sphinxstd.h

@@ -161,6 +161,19 @@ void			operator delete [] ( void * pPtr );
 /// crash with an error message
 void			sphDie ( char * sMessage, ... );
 
+
+/// how much bits do we need for given int
+inline int		sphLog2 ( uint64_t iValue )
+{
+	int iBits = 0;
+	while ( iValue )
+	{
+		iValue >>= 1;
+		iBits++;
+	}
+	return iBits;
+}
+
 /////////////////////////////////////////////////////////////////////////////
 // DEBUGGING
 /////////////////////////////////////////////////////////////////////////////

+ 3 - 0
win/libsphinx03.vcproj

@@ -110,6 +110,9 @@
 			<File
 				RelativePath="..\src\sphinxquery.cpp">
 			</File>
+			<File
+				RelativePath="..\src\sphinxsort.cpp">
+			</File>
 			<File
 				RelativePath="..\src\sphinxsoundex.cpp">
 			</File>

+ 4 - 0
win/libsphinx05.vcproj

@@ -166,6 +166,10 @@
 				RelativePath="..\src\sphinxquery.cpp"
 				>
 			</File>
+			<File
+				RelativePath="..\src\sphinxsort.cpp"
+				>
+			</File>
 			<File
 				RelativePath="..\src\sphinxsoundex.cpp"
 				>

Some files were not shown because too many files changed in this diff