Browse Source

merged rel20 branch (upto r3217) back into trunk
linted

git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@3218 406a0c4d-033a-0410-8de8-e80135713968

tomat 14 years ago
parent
commit
cd68f5e80e

+ 1 - 1
api/sphinxapi.php

@@ -1258,7 +1258,7 @@ class SphinxClient
 						$nvalues = $val;
 						while ( $nvalues>0 && $p<$max )
 						{
-							$attrvals[$attr][] = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
+							$attrvals[$attr][] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
 							$nvalues -= 2;
 						}
 					} else if ( $type==SPH_ATTR_STRING )

+ 22 - 7
api/sphinxapi.py

@@ -292,6 +292,21 @@ class SphinxClient:
 		return response
 
 
+	def _Send ( self, sock, req ):
+		"""
+		INTERNAL METHOD, DO NOT CALL. send request to searchd server.
+		"""
+		total = 0
+		while True:
+			sent = sock.send ( req[total:] )
+			if sent<=0:
+				break
+				
+			total = total + sent
+		
+		return total
+		
+
 	def SetLimits (self, offset, limit, maxmatches=0, cutoff=0):
 		"""
 		Set offset and count into result set, and optionally set max-matches and cutoff limits.
@@ -634,7 +649,7 @@ class SphinxClient:
 		req = ''.join(self._reqs)
 		length = len(req)+8
 		req = pack('>HHLLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, 0, len(self._reqs))+req
-		sock.send(req)
+		self._Send ( sock, req )
 
 		response = self._GetResponse(sock, VER_COMMAND_SEARCH)
 		if not response:
@@ -866,7 +881,7 @@ class SphinxClient:
 
 		# add header
 		req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req
-		wrote = sock.send(req)
+		self._Send ( sock, req )
 
 		response = self._GetResponse(sock, VER_COMMAND_EXCERPT )
 		if not response:
@@ -951,7 +966,7 @@ class SphinxClient:
 		req = ''.join(req)
 		length = len(req)
 		req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req
-		wrote = sock.send ( req )
+		self._Send ( sock, req )
 
 		response = self._GetResponse ( sock, VER_COMMAND_UPDATE )
 		if not response:
@@ -984,7 +999,7 @@ class SphinxClient:
 		req = ''.join(req)
 		length = len(req)
 		req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req
-		wrote = sock.send ( req )
+		self._Send ( sock, req )
 
 		response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS )
 		if not response:
@@ -1034,7 +1049,7 @@ class SphinxClient:
 			return None
 
 		req = pack ( '>2HLL', SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 )
-		wrote = sock.send ( req )
+		self._Send ( sock, req )
 
 		response = self._GetResponse ( sock, VER_COMMAND_STATUS )
 		if not response:
@@ -1070,7 +1085,7 @@ class SphinxClient:
 
 		# command, command version = 0, body length = 4, body = 1
 		request = pack ( '>hhII', SEARCHD_COMMAND_PERSIST, 0, 4, 1 )
-		server.send ( request )
+		self._Send ( server, request )
 		
 		self._socket = server
 		return True
@@ -1092,7 +1107,7 @@ class SphinxClient:
 			return -1
 
 		request = pack ( '>hhI', SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ) # cmd, ver, bodylen
-		sock.send ( request )
+		self._Send ( sock, request )
 
 		response = self._GetResponse ( sock, VER_COMMAND_FLUSHATTRS )
 		if not response or len(response)!=4:

+ 3 - 3
src/indexer.cpp

@@ -339,7 +339,7 @@ bool ParseMultiAttr ( const char * sBuf, CSphColumnInfo & tAttr, const char * sS
 	LOC_SPACE0(); LOC_TOK();
 	if ( LOC_TOKEQ("uint") )				tAttr.m_eAttrType = SPH_ATTR_UINT32SET;
 	else if ( LOC_TOKEQ("timestamp") )		tAttr.m_eAttrType = SPH_ATTR_UINT32SET;
-	else if ( LOC_TOKEQ("bigint") )			tAttr.m_eAttrType = SPH_ATTR_UINT64SET;
+	else if ( LOC_TOKEQ("bigint") )			tAttr.m_eAttrType = SPH_ATTR_INT64SET;
 	else									LOC_ERR ( "attr type ('uint' or 'timestamp' or 'bigint')", sTok );
 
 	// handle ATTR-NAME
@@ -1548,8 +1548,8 @@ int main ( int argc, char ** argv )
 			dMergeDstFilters.Add();
 			dMergeDstFilters.Last().m_eType = SPH_FILTER_RANGE;
 			dMergeDstFilters.Last().m_sAttrName = argv[i+1];
-			dMergeDstFilters.Last().m_uMinValue = (SphAttr_t) strtoull ( argv[i+2], NULL, 10 );
-			dMergeDstFilters.Last().m_uMaxValue = (SphAttr_t) strtoull ( argv[i+3], NULL, 10 );
+			dMergeDstFilters.Last().m_iMinValue = strtoll ( argv[i+2], NULL, 10 );
+			dMergeDstFilters.Last().m_iMaxValue = strtoll ( argv[i+3], NULL, 10 );
 			i += 3;
 
 		} else if ( strcasecmp ( argv[i], "--buildstops" )==0 && (i+2)<argc )

+ 2 - 2
src/indextool.cpp

@@ -68,7 +68,7 @@ void ApplyMorphology ( CSphIndex * pIndex )
 	ISphTokenizer * pTokenizer = pIndex->GetTokenizer();
 	CSphDict * pDict = pIndex->GetDictionary();
 	BYTE * sBufferToDump = &dInBuffer[0];
-	if (pTokenizer)
+	if ( pTokenizer )
 	{
 		pTokenizer->SetBuffer ( &dInBuffer[0], dInBuffer.GetLength() );
 		while ( BYTE * sToken = pTokenizer->GetToken() )
@@ -178,7 +178,7 @@ int main ( int argc, char ** argv )
 		OPT1 ( "--check" )			{ eCommand = CMD_CHECK; sIndex = argv[++i]; }
 		OPT1 ( "--htmlstrip" )		{ eCommand = CMD_STRIP; sIndex = argv[++i]; }
 		OPT1 ( "--build-infixes" )	{ eCommand = CMD_BUILDINFIXES; sIndex = argv[++i]; }
-		OPT1 ( "--morph")			{ eCommand = CMD_MORPH; sIndex = argv[++i]; }
+		OPT1 ( "--morph" )			{ eCommand = CMD_MORPH; sIndex = argv[++i]; }
 		OPT1 ( "--strip-path" )		{ bStripPath = true; }
 		OPT1 ( "--optimize-rt-klists" )
 		{

+ 2 - 2
src/search.cpp

@@ -357,7 +357,7 @@ int main ( int argc, char ** argv )
 					const CSphColumnInfo & tAttr = pResult->m_tSchema.GetAttr(j);
 					fprintf ( stdout, ", %s=", tAttr.m_sName.cstr() );
 
-					if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+					if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 					{
 						fprintf ( stdout, "(" );
 						SphAttr_t iIndex = tMatch.GetAttr ( tAttr.m_tLocator );
@@ -365,7 +365,7 @@ int main ( int argc, char ** argv )
 						{
 							const DWORD * pValues = pResult->m_pMva + iIndex;
 							int iValues = *pValues++;
-							if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+							if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 							{
 								assert ( ( iValues%2 )==0 );
 								for ( int k=0; k<iValues; k+=2, pValues+=2 )

+ 125 - 83
src/searchd.cpp

@@ -18,6 +18,7 @@
 #include "sphinxexcerpt.h"
 #include "sphinxrt.h"
 #include "sphinxint.h"
+#include "sphinxquery.h"
 
 #include <errno.h>
 #include <fcntl.h>
@@ -4102,8 +4103,8 @@ void SearchRequestBuilder_t::SendQuery ( const char * sIndexes, NetOutputBuffer_
 				break;
 
 			case SPH_FILTER_RANGE:
-				tOut.SendUint64 ( tFilter.m_uMinValue );
-				tOut.SendUint64 ( tFilter.m_uMaxValue );
+				tOut.SendUint64 ( tFilter.m_iMinValue );
+				tOut.SendUint64 ( tFilter.m_iMaxValue );
 				break;
 
 			case SPH_FILTER_FLOATRANGE:
@@ -4259,7 +4260,7 @@ bool SearchReplyParser_t::ParseReply ( MemInputBuffer_c & tReq, AgentConn_t & tA
 				for ( int j=0; j<tSchema.GetAttrsCount(); j++ )
 				{
 					const CSphColumnInfo & tAttr = tSchema.GetAttr(j);
-					if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+					if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 					{
 						tMatch.SetAttr ( tAttr.m_tLocator, m_dMvaStorage.GetLength() );
 
@@ -4470,8 +4471,8 @@ bool FixupQuery ( CSphQuery * pQuery, const CSphSchema * pSchema, const char * s
 		tFilter.m_dValues.Resize ( pQuery->m_iOldGroups );
 		ARRAY_FOREACH ( i, tFilter.m_dValues )
 			tFilter.m_dValues[i] = pQuery->m_pOldGroups[i];
-		tFilter.m_uMinValue = pQuery->m_iOldMinGID;
-		tFilter.m_uMaxValue = pQuery->m_iOldMaxGID;
+		tFilter.m_iMinValue = pQuery->m_iOldMinGID;
+		tFilter.m_iMaxValue = pQuery->m_iOldMaxGID;
 		pQuery->m_dFilters.Add ( tFilter );
 	}
 
@@ -4493,8 +4494,8 @@ bool FixupQuery ( CSphQuery * pQuery, const CSphSchema * pSchema, const char * s
 
 		CSphFilterSettings tFilter;
 		tFilter.m_sAttrName = pSchema->GetAttr(iAttr).m_sName;
-		tFilter.m_uMinValue = pQuery->m_iOldMinTS;
-		tFilter.m_uMaxValue = pQuery->m_iOldMaxTS;
+		tFilter.m_iMinValue = pQuery->m_iOldMinTS;
+		tFilter.m_iMaxValue = pQuery->m_iOldMaxTS;
 		pQuery->m_dFilters.Add ( tFilter );
 	}
 
@@ -4719,8 +4720,8 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, CSphQuery & tQuery, int iVer, int
 				switch ( tFilter.m_eType )
 				{
 					case SPH_FILTER_RANGE:
-						tFilter.m_uMinValue = ( iVer>=0x114 ) ? tReq.GetUint64() : tReq.GetDword ();
-						tFilter.m_uMaxValue = ( iVer>=0x114 ) ? tReq.GetUint64() : tReq.GetDword ();
+						tFilter.m_iMinValue = ( iVer>=0x114 ) ? tReq.GetUint64() : tReq.GetDword ();
+						tFilter.m_iMaxValue = ( iVer>=0x114 ) ? tReq.GetUint64() : tReq.GetDword ();
 						break;
 
 					case SPH_FILTER_FLOATRANGE:
@@ -4752,8 +4753,8 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, CSphQuery & tQuery, int iVer, int
 				if ( !tFilter.m_dValues.GetLength() )
 				{
 					// 0 length means this is range, not set
-					tFilter.m_uMinValue = tReq.GetDword ();
-					tFilter.m_uMaxValue = tReq.GetDword ();
+					tFilter.m_iMinValue = tReq.GetDword ();
+					tFilter.m_iMaxValue = tReq.GetDword ();
 				}
 
 				tFilter.m_eType = tFilter.m_dValues.GetLength() ? SPH_FILTER_VALUES : SPH_FILTER_RANGE;
@@ -4770,8 +4771,8 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, CSphQuery & tQuery, int iVer, int
 		CSphFilterSettings & tFilter = tQuery.m_dFilters.Add();
 		tFilter.m_sAttrName = "@id";
 		tFilter.m_eType = SPH_FILTER_RANGE;
-		tFilter.m_uMinValue = uMinID;
-		tFilter.m_uMaxValue = uMaxID;
+		tFilter.m_iMinValue = uMinID;
+		tFilter.m_iMaxValue = uMaxID;
 	}
 
 	// v.1.3
@@ -5299,10 +5300,10 @@ void LogQuerySphinxql ( const CSphQuery & q, const CSphQueryResult & tRes, const
 				case SPH_FILTER_RANGE:
 					if ( f.m_bExclude )
 						tBuf.Append ( " %s NOT BETWEEN "INT64_FMT" AND "INT64_FMT,
-						f.m_sAttrName.cstr(), (int64_t)f.m_uMinValue, (int64_t)f.m_uMaxValue );
+						f.m_sAttrName.cstr(), f.m_iMinValue, f.m_iMaxValue );
 					else
 						tBuf.Append ( " %s BETWEEN "INT64_FMT" AND "INT64_FMT,
-							f.m_sAttrName.cstr(), (int64_t)f.m_uMinValue, (int64_t)f.m_uMaxValue );
+							f.m_sAttrName.cstr(), f.m_iMinValue, f.m_iMaxValue );
 					break;
 
 				case SPH_FILTER_FLOATRANGE:
@@ -5547,7 +5548,7 @@ int CalcResultLength ( int iVer, const CSphQueryResult * pRes, const CSphVector<
 	for ( int i=0; i<iAttrsCount; i++ )
 	{
 		const CSphColumnInfo & tCol = pRes->m_tSchema.GetAttr(i);
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET )
 			dMvaItems.Add ( tCol.m_tLocator );
 		if ( tCol.m_eAttrType==SPH_ATTR_STRING )
 			dStringItems.Add ( tCol.m_tLocator );
@@ -5721,7 +5722,7 @@ void SendResult ( int iVer, NetOutputBuffer_c & tOut, const CSphQueryResult * pR
 			for ( int j=0; j<iAttrsCount; j++ )
 			{
 				const CSphColumnInfo & tAttr = pRes->m_tSchema.GetAttr(j);
-				if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+				if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 				{
 					assert ( tMatch.GetAttr ( tAttr.m_tLocator )==0 || pMvaPool );
 					const DWORD * pValues = tMatch.GetAttrMVA ( tAttr.m_tLocator, pMvaPool );
@@ -5735,12 +5736,12 @@ void SendResult ( int iVer, NetOutputBuffer_c & tOut, const CSphQueryResult * pR
 						// send MVA values
 						int iValues = *pValues++;
 						tOut.SendDword ( iValues );
-						if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+						if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 						{
 							assert ( ( iValues%2 )==0 );
 							while ( iValues )
 							{
-								uint64_t uVal = MVA_UPSIZE ( pValues );
+								uint64_t uVal = (uint64_t)MVA_UPSIZE ( pValues );
 								tOut.SendUint64 ( uVal );
 								pValues += 2;
 								iValues -= 2;
@@ -6559,8 +6560,8 @@ void SetupKillListFilter ( CSphFilterSettings & tFilter, const SphAttr_t * pKill
 
 	tFilter.m_bExclude = true;
 	tFilter.m_eType = SPH_FILTER_VALUES;
-	tFilter.m_uMinValue = pKillList[0];
-	tFilter.m_uMaxValue = pKillList[nEntries-1];
+	tFilter.m_iMinValue = pKillList[0];
+	tFilter.m_iMaxValue = pKillList[nEntries-1];
 	tFilter.m_sAttrName = "@id";
 	tFilter.SetExternalValues ( pKillList, nEntries );
 }
@@ -7800,7 +7801,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 	{
 		AggrResult_t & tRes = m_dResults[iRes];
 		CSphQuery & tQuery = m_dQueries[iRes];
-		CSphSchemaMT * pExtraSchema = tQuery.m_bAgent?&m_dExtraSchemas[bWasLocalSorter?0:iRes]:NULL;
+		CSphSchemaMT * pExtraSchema = tQuery.m_bAgent ? m_dExtraSchemas.Begin() + ( bWasLocalSorter ? 0 : iRes ) : NULL;
 
 		// minimize sorters needs these pointers
 		tRes.m_dTag2Pools[0].m_pMva = m_dMvaStorage.Begin();
@@ -7824,14 +7825,20 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 			if ( g_bCompatResults && !tQuery.m_bAgent )
 			{
 				if ( !MinimizeAggrResultCompat ( tRes, tQuery, m_dLocal.GetLength()!=0 ) )
+				{
+					tRes.m_iSuccesses = 0;
 					return;
+				}
 			} else
 			{
 				if ( pExtraSchema )
 					pExtraSchema->RLock();
 				UnlockOnDestroy SchemaLocker ( pExtraSchema );
 				if ( !MinimizeAggrResult ( tRes, tQuery, m_dLocal.GetLength()!=0, pExtraSchema, m_bSphinxql ) )
+				{
+					tRes.m_iSuccesses = 0;
 					return;
+				}
 			}
 		}
 
@@ -8247,7 +8254,7 @@ public:
 	void			AddConst ( int iList, const SqlNode_t& tValue );
 	void			SetStatement ( const SqlNode_t& tName, SqlSet_e eSet );
 	bool			AddFloatRangeFilter ( const CSphString & sAttr, float fMin, float fMax );
-	bool			AddUintRangeFilter ( const CSphString & sAttr, int64_t iMin, int64_t iMax );
+	bool			AddIntRangeFilter ( const CSphString & sAttr, int64_t iMin, int64_t iMax );
 	bool			AddUservarFilter ( const CSphString & sCol, const CSphString & sVar, bool bExclude );
 	bool			AddDistinct ( SqlNode_t * pNewExpr, SqlNode_t * pStart, SqlNode_t * pEnd );
 	CSphFilterSettings * AddFilter ( const CSphString & sCol, ESphFilter eType );
@@ -8395,7 +8402,7 @@ public:
 				break;
 			case SPH_ATTR_STRING:
 			case SPH_ATTR_UINT32SET:
-			case SPH_ATTR_UINT64SET:
+			case SPH_ATTR_INT64SET:
 				CSphMatch::SetAttr ( tLoc, 0 );
 				break;
 			default:
@@ -8711,7 +8718,7 @@ void SqlParser_c::UpdateMVAAttr ( const CSphString & sName, const SqlNode_t & dV
 			SphAttr_t uVal = *pVal;
 			if ( uVal>UINT_MAX )
 			{
-				eType = SPH_ATTR_UINT64SET;
+				eType = SPH_ATTR_INT64SET;
 			}
 			tUpd.m_dPool.Add ( (DWORD)uVal );
 			tUpd.m_dPool.Add ( (DWORD)( uVal>>32 ) );
@@ -8729,7 +8736,7 @@ void SqlParser_c::UpdateMVAAttr ( const CSphString & sName, const SqlNode_t & dV
 
 CSphFilterSettings * SqlParser_c::AddFilter ( const CSphString & sCol, ESphFilter eType )
 {
-	if ( sCol=="@weight" || sCol=="@count" || sCol=="count(*)" || sCol=="weight()" )
+	if ( sCol=="@count" || sCol=="count(*)" )
 	{
 		yyerror ( this, "Aggregates in 'where' clause prohibited" );
 		return NULL;
@@ -8751,13 +8758,13 @@ bool SqlParser_c::AddFloatRangeFilter ( const CSphString & sAttr, float fMin, fl
 	return true;
 }
 
-bool SqlParser_c::AddUintRangeFilter ( const CSphString & sAttr, int64_t iMin, int64_t iMax )
+bool SqlParser_c::AddIntRangeFilter ( const CSphString & sAttr, int64_t iMin, int64_t iMax )
 {
 	CSphFilterSettings * pFilter = AddFilter ( sAttr, SPH_FILTER_RANGE );
 	if ( !pFilter )
 		return false;
-	pFilter->m_uMinValue = (SphAttr_t)iMin;
-	pFilter->m_uMaxValue = (SphAttr_t)iMax;
+	pFilter->m_iMinValue = iMin;
+	pFilter->m_iMaxValue = iMax;
 	return true;
 }
 
@@ -8877,25 +8884,25 @@ bool ParseSqlQuery ( const char * sQuery, int iLen, CSphVector<SqlStmt_t> & dStm
 
 /////////////////////////////////////////////////////////////////////////////
 
-int sphGetPassageBoundary ( const CSphString & sPassageBoundaryMode )
+ESphSpz sphGetPassageBoundary ( const CSphString & sPassageBoundaryMode )
 {
 	if ( sPassageBoundaryMode.IsEmpty() )
-		return 0;
+		return SPH_SPZ_NONE;
 
-	int iMode = 0;
+	ESphSpz eSPZ = SPH_SPZ_NONE;
 	if ( sPassageBoundaryMode=="sentence" )
-		iMode = MAGIC_CODE_SENTENCE;
+		eSPZ = SPH_SPZ_SENTENCE;
 	else if ( sPassageBoundaryMode=="paragraph" )
-		iMode = MAGIC_CODE_PARAGRAPH;
+		eSPZ = SPH_SPZ_PARAGRAPH;
 	else if ( sPassageBoundaryMode=="zone" )
-		iMode = MAGIC_CODE_ZONE;
+		eSPZ = SPH_SPZ_ZONE;
 
-	return iMode;
+	return eSPZ;
 }
 
 bool sphCheckOptionsSPZ ( const ExcerptQuery_t & q, const CSphString & sPassageBoundaryMode, CSphString & sError )
 {
-	if ( q.m_iPassageBoundary )
+	if ( q.m_ePassageSPZ )
 	{
 		if ( q.m_iAround==0 )
 		{
@@ -8910,7 +8917,7 @@ bool sphCheckOptionsSPZ ( const ExcerptQuery_t & q, const CSphString & sPassageB
 
 	if ( q.m_bEmitZones )
 	{
-		if ( q.m_iPassageBoundary!=MAGIC_CODE_ZONE )
+		if ( q.m_ePassageSPZ!=SPH_SPZ_ZONE )
 		{
 			sError.SetSprintf ( "invalid combination of passage_boundary=%s and emit_zones", sPassageBoundaryMode.cstr() );
 			return false;
@@ -9080,6 +9087,8 @@ void SnippetRequestBuilder_t::BuildRequest ( const char * sIndex, NetOutputBuffe
 
 	if ( m_bScattered )
 		tOut.SendInt ( q.m_iRawFlags & ~EXCERPT_FLAG_LOAD_FILES );
+	else
+		tOut.SendInt ( q.m_iRawFlags );
 
 	tOut.SendString ( sIndex );
 	tOut.SendString ( q.m_sWords.cstr() );
@@ -9158,16 +9167,9 @@ static bool SnippetTransformPassageMacros ( CSphString & sSrc, CSphString & sPos
 	return true;
 }
 
-static bool IsSPZEnabled ( const ExcerptQuery_t & q )
-{
-	return ( q.m_iPassageBoundary || ( q.m_sStripMode=="retain" && q.m_bHighlightQuery ) );
-}
-
 
-static bool SetupStripperSPZ ( const CSphIndexSettings & tSettings, const ExcerptQuery_t & q, CSphScopedPtr<CSphHTMLStripper> & tStripper, ISphTokenizer * pTokenizer, CSphString & sError )
+static bool SetupStripperSPZ ( const CSphIndexSettings & tSettings, const ExcerptQuery_t & q, bool bSetupSPZ, CSphScopedPtr<CSphHTMLStripper> & tStripper, ISphTokenizer * pTokenizer, CSphString & sError )
 {
-	bool bSetupSPZ = IsSPZEnabled ( q );
-
 	if ( bSetupSPZ &&
 		( !pTokenizer->EnableSentenceIndexing ( sError ) || !pTokenizer->EnableZoneIndexing ( sError ) ) )
 	{
@@ -9221,6 +9223,24 @@ static CSphDict * SetupExactDict ( const CSphIndexSettings & tSettings, const Ex
 }
 
 
+static DWORD CollectQuerySPZ ( const XQNode_t * pNode )
+{
+	if ( !pNode )
+		return SPH_SPZ_NONE;
+
+	DWORD eSPZ = SPH_SPZ_NONE;
+	if ( pNode->GetOp()==SPH_QUERY_SENTENCE )
+		eSPZ |= SPH_SPZ_SENTENCE;
+	else if ( pNode->GetOp()==SPH_QUERY_PARAGRAPH )
+		eSPZ |= SPH_SPZ_PARAGRAPH;
+
+	ARRAY_FOREACH ( i, pNode->m_dChildren )
+		eSPZ |= CollectQuerySPZ ( pNode->m_dChildren[i] );
+
+	return eSPZ;
+}
+
+
 class SnippetContext_t : ISphNoncopyable
 {
 private:
@@ -9233,6 +9253,8 @@ public:
 	CSphScopedPtr<ISphTokenizer> m_tTokenizer;
 	CSphScopedPtr<CSphHTMLStripper> m_tStripper;
 	ISphTokenizer * m_pQueryTokenizer;
+	XQQuery_t m_tExtQuery;
+	DWORD m_eExtQuerySPZ;
 
 	SnippetContext_t()
 		: m_tDictCloned ( NULL )
@@ -9242,10 +9264,11 @@ public:
 		, m_tTokenizer ( NULL )
 		, m_tStripper ( NULL )
 		, m_pQueryTokenizer ( NULL )
+		, m_eExtQuerySPZ ( SPH_SPZ_NONE )
 	{
 	}
 
-	bool Setup ( CSphIndex * pIndex, const ExcerptQuery_t & tQuery, CSphString & sError )
+	bool Setup ( const CSphIndex * pIndex, const ExcerptQuery_t & tSettings, CSphString & sError )
 	{
 		CSphScopedPtr<CSphDict> tDictCloned ( NULL );
 		m_pDict = pIndex->GetDictionary();
@@ -9257,21 +9280,38 @@ public:
 		m_tTokenizer = pIndex->GetTokenizer()->Clone ( true );
 		m_pQueryTokenizer = m_tTokenizer.Ptr();
 
-		if ( !SetupStripperSPZ ( pIndex->GetSettings(), tQuery, m_tStripper, m_tTokenizer.Ptr(), sError ) )
+		// setup exact dictionary if needed
+		m_pDict = SetupExactDict ( pIndex->GetSettings(), tSettings, m_tExactDict, m_pDict, m_tTokenizer.Ptr() );
+		// TODO!!! check star dict too
+
+		if ( tSettings.m_bHighlightQuery )
+		{
+			if ( !sphParseExtendedQuery ( m_tExtQuery, tSettings.m_sWords.cstr(), m_pQueryTokenizer, &pIndex->GetMatchSchema(), m_pDict, pIndex->GetSettings() ) )
+			{
+				sError = m_tExtQuery.m_sParseError;
+				return false;
+			}
+			if ( m_tExtQuery.m_pRoot )
+				m_tExtQuery.m_pRoot->ClearFieldMask();
+
+			m_eExtQuerySPZ = SPH_SPZ_NONE;
+			m_eExtQuerySPZ |= CollectQuerySPZ ( m_tExtQuery.m_pRoot );
+			if ( m_tExtQuery.m_dZones.GetLength() )
+				m_eExtQuerySPZ |= SPH_SPZ_ZONE;
+		}
+
+		bool bSetupSPZ = ( tSettings.m_ePassageSPZ!=SPH_SPZ_NONE || m_eExtQuerySPZ!=SPH_SPZ_NONE ||
+			( tSettings.m_sStripMode=="retain" && tSettings.m_bHighlightQuery ) );
+
+		if ( !SetupStripperSPZ ( pIndex->GetSettings(), tSettings, bSetupSPZ, m_tStripper, m_tTokenizer.Ptr(), sError ) )
 			return false;
 
-		if ( IsSPZEnabled ( tQuery ) )
+		if ( bSetupSPZ )
 		{
 			m_tQueryTokenizer = pIndex->GetTokenizer()->Clone ( true );
 			m_pQueryTokenizer = m_tQueryTokenizer.Ptr();
 		}
 
-		////////////////////////////
-		// setup exact dictionary if needed
-		////////////////////////////
-
-		m_pDict = SetupExactDict ( pIndex->GetSettings(), tQuery, m_tExactDict, m_pDict, m_tTokenizer.Ptr() );
-
 		return true;
 	}
 };
@@ -9306,9 +9346,8 @@ void SnippetThreadFunc ( void * pArg )
 		if ( pQuery->m_iNext!=PROCESSED_ITEM )
 			continue;
 
-		sphBuildExcerpt ( *pQuery, tCtx.m_pDict, tCtx.m_tTokenizer.Ptr(),
-			&pDesc->m_pIndex->GetMatchSchema(), pDesc->m_pIndex,
-			pQuery->m_sError, tCtx.m_tStripper.Ptr(), tCtx.m_pQueryTokenizer );
+		sphBuildExcerpt ( *pQuery, pDesc->m_pIndex, tCtx.m_tStripper.Ptr(), tCtx.m_tExtQuery, tCtx.m_eExtQuerySPZ,
+			pQuery->m_sError, tCtx.m_pDict, tCtx.m_tTokenizer.Ptr(), tCtx.m_pQueryTokenizer );
 
 		if ( bDone )
 			return;
@@ -9430,7 +9469,8 @@ bool MakeSnippets ( CSphString sIndex, CSphVector<ExcerptQuery_t> & dQueries, CS
 		// boring single threaded loop
 		ARRAY_FOREACH ( i, dQueries )
 		{
-			sphBuildExcerpt ( dQueries[i], tCtx.m_pDict, tCtx.m_tTokenizer.Ptr(), &pIndex->GetMatchSchema(), pIndex, dQueries[i].m_sError, tCtx.m_tStripper.Ptr(), tCtx.m_pQueryTokenizer );
+			sphBuildExcerpt ( dQueries[i], pIndex, tCtx.m_tStripper.Ptr(), tCtx.m_tExtQuery, tCtx.m_eExtQuerySPZ,
+				sError, tCtx.m_pDict, tCtx.m_tTokenizer.Ptr(), tCtx.m_pQueryTokenizer );
 			bOk = ( bOk && ( !SnippetFormatErrorMessage ( &sError, dQueries[i].m_sError ) ) );
 		}
 	} else
@@ -9663,7 +9703,7 @@ void HandleCommandExcerpt ( int iSock, int iVer, InputBuffer_c & tReq )
 		return;
 	}
 
-	q.m_iPassageBoundary = sphGetPassageBoundary ( q.m_sRawPassageBoundary );
+	q.m_ePassageSPZ = sphGetPassageBoundary ( q.m_sRawPassageBoundary );
 
 	CSphString sError;
 
@@ -9839,7 +9879,7 @@ void UpdateRequestBuilder_t::BuildRequest ( const char * sIndexes, NetOutputBuff
 	ARRAY_FOREACH ( i, m_tUpd.m_dAttrs )
 	{
 		tOut.SendString ( m_tUpd.m_dAttrs[i].m_sName.cstr() );
-		tOut.SendInt ( ( m_tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || m_tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET ) ? 1 : 0 );
+		tOut.SendInt ( ( m_tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || m_tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET ) ? 1 : 0 );
 	}
 	tOut.SendInt ( m_tUpd.m_dDocids.GetLength() );
 
@@ -10364,12 +10404,13 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 	bool bPersist = false;
 	int iTimeout = g_iReadTimeout; // wait 5 sec until first command
 	NetInputBuffer_c tBuf ( iSock );
+	int64_t iCID = ( pThd ? pThd->m_iConnID : g_iConnID );
 
 	// send my version
 	DWORD uServer = htonl ( SPHINX_SEARCHD_PROTO );
 	if ( sphSockSend ( iSock, (char*)&uServer, sizeof(DWORD) )!=sizeof(DWORD) )
 	{
-		sphWarning ( "failed to send server version (client=%s)", sClientIP );
+		sphWarning ( "failed to send server version (client=%s("INT64_FMT"))", sClientIP, iCID );
 		return;
 	}
 
@@ -10377,10 +10418,10 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 	tBuf.ReadFrom ( 4 ); // FIXME! magic
 	int iMagic = tBuf.GetInt (); // client version is for now unused
 
-	sphLogDebugv ( "conn %s: got handshake, major v.%d, err %d", sClientIP, iMagic, (int)tBuf.GetError() );
+	sphLogDebugv ( "conn %s("INT64_FMT"): got handshake, major v.%d, err %d", sClientIP, iCID, iMagic, (int)tBuf.GetError() );
 	if ( tBuf.GetError() )
 	{
-		sphLogDebugv ( "conn %s: exiting on handshake error", sClientIP );
+		sphLogDebugv ( "conn %s("INT64_FMT"): exiting on handshake error", sClientIP, iCID );
 		return;
 	}
 
@@ -10399,14 +10440,14 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 		// on SIGTERM, bail unconditionally and immediately, at all times
 		if ( !bCommand && g_bGotSigterm )
 		{
-			sphLogDebugv ( "conn %s: bailing on SIGTERM", sClientIP );
+			sphLogDebugv ( "conn %s("INT64_FMT"): bailing on SIGTERM", sClientIP, iCID );
 			break;
 		}
 
 		// on SIGHUP vs pconn, bail if a pconn was idle for 1 sec
 		if ( bPersist && !bCommand && g_bGotSighup && sphSockPeekErrno()==ETIMEDOUT )
 		{
-			sphLogDebugv ( "conn %s: bailing idle pconn on SIGHUP", sClientIP );
+			sphLogDebugv ( "conn %s("INT64_FMT"): bailing idle pconn on SIGHUP", sClientIP, iCID );
 			break;
 		}
 
@@ -10416,7 +10457,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 			iPconnIdle += iTimeout;
 			if ( iPconnIdle>=g_iClientTimeout )
 			{
-				sphLogDebugv ( "conn %s: bailing idle pconn on client_timeout", sClientIP );
+				sphLogDebugv ( "conn %s("INT64_FMT"): bailing idle pconn on client_timeout", sClientIP, iCID );
 				break;
 			}
 			continue;
@@ -10439,7 +10480,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 			// lets avoid agent log flood
 			//
 			// sphWarning ( "failed to receive client version and request (client=%s, error=%s)", sClientIP, sphSockError() );
-			sphLogDebugv ( "conn %s: bailing on failed request header (sockerr=%s)", sClientIP, sphSockError() );
+			sphLogDebugv ( "conn %s("INT64_FMT"): bailing on failed request header (sockerr=%s)", sClientIP, iCID, sphSockError() );
 			return;
 		}
 
@@ -10473,7 +10514,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 		assert ( iLength>=0 && iLength<=g_iMaxPacketSize );
 		if ( iLength && !tBuf.ReadFrom ( iLength ) )
 		{
-			sphWarning ( "failed to receive client request body (client=%s, exp=%d, error='%s')", sClientIP, iLength, sphSockError() );
+			sphWarning ( "failed to receive client request body (client=%s("INT64_FMT"), exp=%d, error='%s')", sClientIP, iCID, iLength, sphSockError() );
 			return;
 		}
 
@@ -10493,7 +10534,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 			pThd->m_sCommand = g_dApiCommands[iCommand];
 		THD_STATE ( THD_QUERY );
 
-		sphLogDebugv ( "conn %s: got command %d, handling", sClientIP, iCommand );
+		sphLogDebugv ( "conn %s("INT64_FMT"): got command %d, handling", sClientIP, iCID, iCommand );
 		switch ( iCommand )
 		{
 			case SEARCHD_COMMAND_SEARCH:	HandleCommandSearch ( iSock, iCommandVer, tBuf ); break;
@@ -10503,7 +10544,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 			case SEARCHD_COMMAND_PERSIST:
 				bPersist = ( tBuf.GetInt()!=0 );
 				iTimeout = 1;
-				sphLogDebugv ( "conn %s: pconn is now %s", sClientIP, bPersist ? "on" : "off" );
+				sphLogDebugv ( "conn %s("INT64_FMT"): pconn is now %s", sClientIP, iCID, bPersist ? "on" : "off" );
 				break;
 			case SEARCHD_COMMAND_STATUS:	HandleCommandStatus ( iSock, iCommandVer, tBuf ); break;
 			case SEARCHD_COMMAND_FLUSHATTRS:HandleCommandFlush ( iSock, iCommandVer, tBuf ); break;
@@ -10515,7 +10556,7 @@ void HandleClientSphinx ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 		SphCrashLogger_c::SetLastQuery ( CrashQuery_t() );
 	} while ( bPersist );
 
-	sphLogDebugv ( "conn %s: exiting", sClientIP );
+	sphLogDebugv ( "conn %s("INT64_FMT"): exiting", sClientIP, iCID );
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -10885,7 +10926,7 @@ void HandleMysqlInsert ( const SqlStmt_t & tStmt, NetOutputBuffer_c & tOut, BYTE
 				bResult = tDoc.SetDefaultAttr ( tLoc, tCol.m_eAttrType );
 				if ( tCol.m_eAttrType==SPH_ATTR_STRING )
 					dStrings.Add ( NULL );
-				if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+				if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET )
 					dMvas.Add ( 0 );
 			} else
 			{
@@ -10897,18 +10938,18 @@ void HandleMysqlInsert ( const SqlStmt_t & tStmt, NetOutputBuffer_c & tOut, BYTE
 					sError.SetSprintf ( "raw %d, column %d: internal error: unknown insval type %d", 1+c, 1+iQuerySchemaIdx, tVal.m_iType ); // 1 for human base
 					break;
 				}
-				if ( tVal.m_iType==TOK_CONST_MVA && !( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET ) )
+				if ( tVal.m_iType==TOK_CONST_MVA && !( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET ) )
 				{
 					sError.SetSprintf ( "raw %d, column %d: MVA value specified for a non-MVA column", 1+c, 1+iQuerySchemaIdx ); // 1 for human base
 					break;
 				}
-				if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET ) && tVal.m_iType!=TOK_CONST_MVA )
+				if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET ) && tVal.m_iType!=TOK_CONST_MVA )
 				{
 					sError.SetSprintf ( "raw %d, column %d: non-MVA value specified for a MVA column", 1+c, 1+iQuerySchemaIdx ); // 1 for human base
 					break;
 				}
 
-				if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+				if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET )
 				{
 					// collect data from scattered insvals
 					// FIXME! maybe remove this mess, and just have a single m_dMvas pool in parser instead?
@@ -10918,7 +10959,7 @@ void HandleMysqlInsert ( const SqlStmt_t & tStmt, NetOutputBuffer_c & tOut, BYTE
 						tVal.m_pVals->Uniq();
 						iLen = tVal.m_pVals->GetLength();
 					}
-					if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+					if ( tCol.m_eAttrType==SPH_ATTR_INT64SET )
 					{
 						dMvas.Add ( iLen*2 );
 						for ( int j=0; j<iLen; j++ )
@@ -11126,7 +11167,7 @@ void HandleMysqlCallSnippets ( NetOutputBuffer_c & tOut, BYTE uPacketID, SqlStmt
 	if ( q.m_iLoadFiles )
 		q.m_sFilePrefix = g_sSnippetsFilePrefix;
 
-	q.m_iPassageBoundary = sphGetPassageBoundary ( q.m_sRawPassageBoundary );
+	q.m_ePassageSPZ = sphGetPassageBoundary ( q.m_sRawPassageBoundary );
 
 	if ( !sphCheckOptionsSPZ ( q, q.m_sRawPassageBoundary, sError ) )
 	{
@@ -11565,7 +11606,7 @@ void HandleMysqlUpdate ( NetOutputBuffer_c & tOut, BYTE uPacketID, const SqlStmt
 	ARRAY_FOREACH_COND ( i, tStmt.m_tUpdate.m_dAttrs, !bMvaUpdate )
 	{
 		bMvaUpdate = ( tStmt.m_tUpdate.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET
-			|| tStmt.m_tUpdate.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET );
+			|| tStmt.m_tUpdate.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET );
 	}
 
 	ARRAY_FOREACH ( iIdx, dIndexNames )
@@ -11859,7 +11900,7 @@ void SendMysqlSelectResult ( NetOutputBuffer_c & tOut, BYTE & uPacketID, SqlRowB
 				dRows.PutNumeric ( "%f", tMatch.GetAttrFloat(tLoc) );
 				break;
 
-			case SPH_ATTR_UINT64SET:
+			case SPH_ATTR_INT64SET:
 			case SPH_ATTR_UINT32SET:
 				{
 					int iLenOff = dRows.Length();
@@ -11884,9 +11925,9 @@ void SendMysqlSelectResult ( NetOutputBuffer_c & tOut, BYTE & uPacketID, SqlRowB
 						{
 							for ( ; nValues; nValues-=2, pValues+=2 )
 							{
-								uint64_t uVal = MVA_UPSIZE ( pValues );
+								int64_t iVal = MVA_UPSIZE ( pValues );
 								dRows.Reserve ( SPH_MAX_NUMERIC_STR );
-								int iLen = snprintf ( dRows.Get(), SPH_MAX_NUMERIC_STR, nValues>2 ? UINT64_FMT"," : UINT64_FMT, uVal );
+								int iLen = snprintf ( dRows.Get(), SPH_MAX_NUMERIC_STR, nValues>2 ? INT64_FMT"," : INT64_FMT, iVal );
 								dRows.IncPtr ( iLen );
 							}
 						}
@@ -12913,11 +12954,12 @@ void HandleClientMySQL ( int iSock, const char * sClientIP, ThdDesc_t * pThd )
 	const int INTERACTIVE_TIMEOUT = 900;
 	NetInputBuffer_c tIn ( iSock );
 	NetOutputBuffer_c tOut ( iSock ); // OPTIMIZE? looks like buffer size matters a lot..
+	int64_t iCID = ( pThd ? pThd->m_iConnID : g_iConnID );
 
 	if ( sphSockSend ( iSock, g_sMysqlHandshake, g_iMysqlHandshake )!=g_iMysqlHandshake )
 	{
 		int iErrno = sphSockGetErrno ();
-		sphWarning ( "failed to send server version (client=%s, error: %d '%s')", sClientIP, iErrno, sphSockError ( iErrno ) );
+		sphWarning ( "failed to send server version (client=%s("INT64_FMT"), error: %d '%s')", sClientIP, iCID, iErrno, sphSockError ( iErrno ) );
 		return;
 	}
 

+ 153 - 102
src/sphinx.cpp

@@ -2017,7 +2017,7 @@ protected:
 	BYTE *	GetTokenSyn ();
 	bool	BlendAdjust ( BYTE * pPosition );
 	BYTE *	GetBlendedVariant ();
-	int		CodepointArbitration ( int iCodepoint, bool bWasEscaped, bool bSpaceAhead );
+	int		CodepointArbitration ( int iCodepoint, bool bWasEscaped, BYTE uNextByte );
 
 	typedef CSphOrderedHash <int, int, IdentityHash_fn, 4096> CSphSynonymHash;
 	bool	LoadSynonym ( char * sBuffer, const char * szFilename, int iLine, CSphSynonymHash & tHash, CSphString & sError );
@@ -2568,7 +2568,7 @@ bool CSphCharsetDefinitionParser::AddRange ( const CSphRemapRange & tRange, CSph
 	}
 
 	CSphString sError;
-	sError.SetSprintf ( "dest range (U+0x%x) below U+0x20, not allowed", tRange.m_iRemapStart );
+	sError.SetSprintf ( "dest range (U+%x) below U+20, not allowed", tRange.m_iRemapStart );
 	Error ( sError.cstr() );
 	return false;
 }
@@ -3021,6 +3021,38 @@ bool ISphTokenizer::SetCaseFolding ( const char * sConfig, CSphString & sError )
 		return false;
 	}
 
+	const int MIN_CODE = 0x21;
+	ARRAY_FOREACH ( i, dRemaps )
+	{
+		CSphRemapRange & tMap = dRemaps[i];
+
+		if ( tMap.m_iStart<MIN_CODE || tMap.m_iStart>=m_tLC.MAX_CODE )
+		{
+			sphWarning ( "wrong character mapping start specified: U+%x, should be between U+%x and U+%x (inclusive); CLAMPED", tMap.m_iStart, MIN_CODE, m_tLC.MAX_CODE-1 );
+			tMap.m_iStart = Min ( Max ( tMap.m_iStart, MIN_CODE ), m_tLC.MAX_CODE-1 );
+		}
+
+		if ( tMap.m_iEnd<MIN_CODE || tMap.m_iEnd>=m_tLC.MAX_CODE )
+		{
+			sphWarning ( "wrong character mapping end specified: U+%x, should be between U+%x and U+%x (inclusive); CLAMPED", tMap.m_iEnd, MIN_CODE, m_tLC.MAX_CODE-1 );
+			tMap.m_iEnd = Min ( Max ( tMap.m_iEnd, MIN_CODE ), m_tLC.MAX_CODE-1 );
+		}
+
+		if ( tMap.m_iRemapStart<MIN_CODE || tMap.m_iRemapStart>=m_tLC.MAX_CODE )
+		{
+			sphWarning ( "wrong character remapping start specified: U+%x, should be between U+%x and U+%x (inclusive); CLAMPED", tMap.m_iRemapStart, MIN_CODE, m_tLC.MAX_CODE-1 );
+			tMap.m_iRemapStart = Min ( Max ( tMap.m_iRemapStart, MIN_CODE ), m_tLC.MAX_CODE-1 );
+		}
+
+		int iRemapEnd = tMap.m_iRemapStart+tMap.m_iEnd-tMap.m_iStart;
+		if ( iRemapEnd<MIN_CODE || iRemapEnd>=m_tLC.MAX_CODE )
+		{
+			sphWarning ( "wrong character remapping end specified: U+%x, should be between U+%x and U+%x (inclusive); IGNORED", iRemapEnd, MIN_CODE, m_tLC.MAX_CODE-1 );
+			dRemaps.Remove(i);
+			i--;
+		}
+	}
+
 	m_tLC.Reset ();
 	m_tLC.AddRemaps ( dRemaps, 0 );
 	return true;
@@ -3695,20 +3727,34 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetBlendedVariant ()
 }
 
 
-static inline bool IsModifier ( int iSymbol )
+static inline bool IsCapital ( int iCh )
 {
-	return iSymbol=='^' || iSymbol=='$' || iSymbol=='=' || iSymbol=='*';
+	return iCh>='A' && iCh<='Z';
 }
 
 
-static inline bool IsCapital ( int iCh )
+static inline bool IsWhitespace ( BYTE c )
 {
-	return iCh>='A' && iCh<='Z';
+	return ( c=='\0' || c==' ' || c=='\t' || c=='\r' || c=='\n' );
+}
+
+
+static inline bool IsWhitespace ( int c )
+{
+	return ( c=='\0' || c==' ' || c=='\t' || c=='\r' || c=='\n' );
+}
+
+
+static inline bool IsBoundary ( BYTE c, bool bPhrase )
+{
+	// FIXME? sorta intersects with specials
+	// then again, a shortened-down list (more strict syntax) is reasonble here too
+	return IsWhitespace(c) || c=='"' || ( !bPhrase && ( c=='(' || c==')' || c=='|' ) );
 }
 
 
 template < bool IS_UTF8 >
-int CSphTokenizerTraits<IS_UTF8>::CodepointArbitration ( int iCode, bool bWasEscaped, bool bSpaceAhead )
+int CSphTokenizerTraits<IS_UTF8>::CodepointArbitration ( int iCode, bool bWasEscaped, BYTE uNextByte )
 {
 	/////////////////////////////
 	// indexing time arbitration
@@ -3809,7 +3855,7 @@ int CSphTokenizerTraits<IS_UTF8>::CodepointArbitration ( int iCode, bool bWasEsc
 	{
 		bool bBlend =
 			bWasEscaped || // escaped characters should always act as blended
-			( m_bPhrase && !IsModifier ( iSymbol ) ) || // non-modifier special inside phrase
+			( m_bPhrase && !sphIsModifier ( iSymbol ) ) || // non-modifier special inside phrase
 			( m_iAccum && ( iSymbol=='@' || iSymbol=='/' || iSymbol=='-' ) ); // some specials in the middle of a token
 
 		// clear special or blend flags
@@ -3821,12 +3867,12 @@ int CSphTokenizerTraits<IS_UTF8>::CodepointArbitration ( int iCode, bool bWasEsc
 	// escaped specials are not special
 	// dash and dollar inside the word are not special (however, single opening modifier is not a word!)
 	// non-modifier specials within phrase are not special
-	bool bDashInside = ( m_iAccum && iSymbol=='-' && !( m_iAccum==1 && IsModifier ( m_sAccum[0] ) ));
+	bool bDashInside = ( m_iAccum && iSymbol=='-' && !( m_iAccum==1 && sphIsModifier ( m_sAccum[0] ) ));
 	if ( iCode & FLAG_CODEPOINT_SPECIAL )
 		if ( bWasEscaped
 			|| bDashInside
-			|| ( m_iAccum && iSymbol=='$' && !bSpaceAhead )
-			|| ( m_bPhrase && iSymbol!='"' && !IsModifier ( iSymbol ) ) )
+			|| ( m_iAccum && iSymbol=='$' && !IsBoundary ( uNextByte, m_bPhrase ) )
+			|| ( m_bPhrase && iSymbol!='"' && !sphIsModifier ( iSymbol ) ) )
 	{
 		if ( iCode & FLAG_CODEPOINT_DUAL )
 			iCode &= ~( FLAG_CODEPOINT_SPECIAL | FLAG_CODEPOINT_DUAL );
@@ -3930,16 +3976,6 @@ static inline bool Special2Simple ( int & iCodepoint )
 	return false;
 }
 
-static inline bool IsWhitespace ( BYTE c )
-{
-	return ( c=='\0' || c==' ' || c=='\t' || c=='\r' || c=='\n' );
-}
-
-static inline bool IsWhitespace ( int c )
-{
-	return ( c=='\0' || c==' ' || c=='\t' || c=='\r' || c=='\n' );
-}
-
 template < bool IS_UTF8 >
 BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 {
@@ -4015,7 +4051,7 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 				iLastCodepoint = iCode;
 			}
 
-			iFolded = CodepointArbitration ( iFolded, false, IsWhitespace ( *m_pCur ) );
+			iFolded = CodepointArbitration ( iFolded, false, *m_pCur );
 
 			iLastFolded = iFolded;
 
@@ -4260,7 +4296,7 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 					iLast = iCode;
 				}
 
-				iFolded = CodepointArbitration ( iFolded, false, IsWhitespace ( *m_pCur ) );
+				iFolded = CodepointArbitration ( iFolded, false, *m_pCur );
 
 				if ( IsSeparator ( iFolded, false ) )
 				{
@@ -4528,7 +4564,7 @@ BYTE * CSphTokenizer_SBCS::GetToken ()
 			}
 		}
 
-		iCode = CodepointArbitration ( iCode, bWasEscaped, IsWhitespace ( *m_pCur ) );
+		iCode = CodepointArbitration ( iCode, bWasEscaped, *m_pCur );
 
 		// handle ignored chars
 		if ( iCode & FLAG_CODEPOINT_IGNORE )
@@ -4654,7 +4690,7 @@ BYTE * CSphTokenizer_SBCS::GetToken ()
 			// tricky bit
 			// heading modifiers must not (!) affected blended status
 			// eg. we want stuff like '=-' (w/o apostrophes) thrown away when pure_blend is on
-			if (!( m_bQueryMode && !m_iAccum && IsModifier(iCode) ) )
+			if (!( m_bQueryMode && !m_iAccum && sphIsModifier(iCode) ) )
 				m_bNonBlended = m_bNonBlended || bNoBlend;
 			m_sAccum[m_iAccum++] = (BYTE)iCode;
 		}
@@ -4768,7 +4804,7 @@ BYTE * CSphTokenizer_UTF8::GetToken ()
 		}
 
 		// handle all the flags..
-		iCode = CodepointArbitration ( iCode, bWasEscaped, IsWhitespace ( *m_pCur ) );
+		iCode = CodepointArbitration ( iCode, bWasEscaped, *m_pCur );
 
 		// handle ignored chars
 		if ( iCode & FLAG_CODEPOINT_IGNORE )
@@ -4882,7 +4918,7 @@ BYTE * CSphTokenizer_UTF8::GetToken ()
 		// tricky bit
 		// heading modifiers must not (!) affected blended status
 		// eg. we want stuff like '=-' (w/o apostrophes) thrown away when pure_blend is on
-		if (!( m_bQueryMode && !m_iAccum && IsModifier ( iCode & MASK_CODEPOINT ) ) )
+		if (!( m_bQueryMode && !m_iAccum && sphIsModifier ( iCode & MASK_CODEPOINT ) ) )
 			m_bNonBlended = m_bNonBlended || !( iCode & FLAG_CODEPOINT_BLEND );
 
 		// just accumulate
@@ -5161,8 +5197,8 @@ void CSphMultiformTokenizer::SetBuffer ( BYTE * sBuffer, int iLength )
 CSphFilterSettings::CSphFilterSettings ()
 	: m_sAttrName	( "" )
 	, m_bExclude	( false )
-	, m_uMinValue	( 0 )
-	, m_uMaxValue	( UINT_MAX )
+	, m_iMinValue	( LLONG_MIN )
+	, m_iMaxValue	( LLONG_MAX )
 	, m_pValues		( NULL )
 	, m_nValues		( 0 )
 {}
@@ -5191,7 +5227,10 @@ bool CSphFilterSettings::operator == ( const CSphFilterSettings & rhs ) const
 	switch ( m_eType )
 	{
 		case SPH_FILTER_RANGE:
-			return m_uMinValue==rhs.m_uMinValue && m_uMaxValue==rhs.m_uMaxValue;
+			return m_iMinValue==rhs.m_iMinValue && m_iMaxValue==rhs.m_iMaxValue;
+
+		case SPH_FILTER_FLOATRANGE:
+			return m_fMinValue==rhs.m_fMinValue && m_fMaxValue==rhs.m_fMaxValue;
 
 		case SPH_FILTER_VALUES:
 			if ( m_dValues.GetLength()!=rhs.m_dValues.GetLength() )
@@ -7718,7 +7757,7 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 		// forbid updates on non-int columns
 		const CSphColumnInfo & tCol = m_tSchema.GetAttr(iIndex);
 		if (!( tCol.m_eAttrType==SPH_ATTR_BOOL || tCol.m_eAttrType==SPH_ATTR_INTEGER || tCol.m_eAttrType==SPH_ATTR_TIMESTAMP
-			|| tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET
+			|| tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET
 			|| tCol.m_eAttrType==SPH_ATTR_BIGINT || tCol.m_eAttrType==SPH_ATTR_FLOAT ))
 		{
 			sError.SetSprintf ( "attribute '%s' can not be updated (must be boolean, integer, bigint, float, timestamp, or MVA)", tUpd.m_dAttrs[i].m_sName.cstr() );
@@ -7726,28 +7765,28 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 		}
 
 		// forbid updates on MVA columns if there's no arena
-		if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET ) && !g_pMvaArena )
+		if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET ) && !g_pMvaArena )
 		{
 			sError.SetSprintf ( "MVA attribute '%s' can not be updated (MVA arena not initialized)", tCol.m_sName.cstr() );
 			return -1;
 		}
 
-		bool bSrcMva = ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET );
-		bool bDstMva = ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET );
+		bool bSrcMva = ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET );
+		bool bDstMva = ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET );
 		if ( bSrcMva!=bDstMva )
 		{
 			sError.SetSprintf ( "attribute '%s' MVA flag mismatch", tUpd.m_dAttrs[i].m_sName.cstr() );
 			return -1;
 		}
 
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET && tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET && tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			sError.SetSprintf ( "attribute '%s' MVA bits (dst=%d, src=%d) mismatch", tUpd.m_dAttrs[i].m_sName.cstr(),
 				tCol.m_eAttrType, tUpd.m_dAttrs[i].m_eAttrType );
 			return -1;
 		}
 
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_INT64SET )
 			uDst64 |= ( U64C(1)<<i );
 
 		dFloats.Add ( tCol.m_eAttrType==SPH_ATTR_FLOAT );
@@ -7775,7 +7814,7 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 	// storage upfront to avoid suddenly having to rollback if allocation fails later
 	int iNumMVA = 0;
 	ARRAY_FOREACH ( i, tUpd.m_dAttrs )
-		if ( dIndexes[i]>=0 && ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET ) )
+		if ( dIndexes[i]>=0 && ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET ) )
 			iNumMVA++;
 
 	// OPTIMIZE! execute the code below conditionally
@@ -7799,7 +7838,7 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 		ARRAY_FOREACH_COND ( iCol, tUpd.m_dAttrs, !bFailed )
 		{
 			bool bSrcMva32 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT32SET );
-			bool bSrcMva64 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT64SET );
+			bool bSrcMva64 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_INT64SET );
 			if (!( bSrcMva32 || bSrcMva64 )) // FIXME! optimize using a prebuilt dword mask?
 			{
 				iPoolPos++;
@@ -7867,7 +7906,7 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 		ARRAY_FOREACH ( iCol, tUpd.m_dAttrs )
 		{
 			bool bSrcMva32 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT32SET );
-			bool bSrcMva64 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT64SET );
+			bool bSrcMva64 = ( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_INT64SET );
 			if (!( bSrcMva32 || bSrcMva64 )) // FIXME! optimize using a prebuilt dword mask?
 			{
 				// plain update
@@ -7914,7 +7953,7 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 			iPos += uNew;
 			if ( dIndexes[iCol]>=0 )
 			{
-				uint64_t uNewMin = LLONG_MAX, uNewMax = 0;
+				int64_t iNewMin = LLONG_MAX, iNewMax = LLONG_MIN;
 				int iNewIndex = dMvaPtrs[iMvaPtr++];
 
 				SphDocID_t* pDocid = (SphDocID_t*)(g_pMvaArena + iNewIndex);
@@ -7937,9 +7976,9 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 					{
 						while ( iLen )
 						{
-							uint64_t uValue = MVA_UPSIZE ( pSrc );
-							uNewMin = Min ( uNewMin, uValue );
-							uNewMax = Max ( uNewMax, uValue );
+							int64_t uValue = MVA_UPSIZE ( pSrc );
+							iNewMin = Min ( iNewMin, uValue );
+							iNewMax = Max ( iNewMax, uValue );
 							*pDst++ = *pSrc++;
 							*pDst++ = *pSrc++;
 							iLen -= 2;
@@ -7951,8 +7990,8 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 							DWORD uValue = *pSrc;
 							pSrc += 2;
 							*pDst++ = uValue;
-							uNewMin = Min ( uNewMin, uValue );
-							uNewMax = Max ( uNewMax, uValue );
+							iNewMin = Min ( iNewMin, uValue );
+							iNewMax = Max ( iNewMax, uValue );
 						}
 					}
 				}
@@ -7965,12 +8004,12 @@ int CSphIndex_VLN::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, C
 					for ( int i=0; i<2; i++ )
 				{
 					DWORD * pBlock = i ? pBlockRanges : pIndexRanges;
-					uint64_t uMin = sphGetRowAttr ( DOCINFO2ATTRS ( pBlock ), dLocators[iCol] );
-					uint64_t uMax = sphGetRowAttr ( DOCINFO2ATTRS ( pBlock+iRowStride ), dLocators[iCol] );
-					if ( uNewMin<uMin || uNewMax>uMax )
+					int64_t iMin = sphGetRowAttr ( DOCINFO2ATTRS ( pBlock ), dLocators[iCol] );
+					int64_t iMax = sphGetRowAttr ( DOCINFO2ATTRS ( pBlock+iRowStride ), dLocators[iCol] );
+					if ( iNewMin<iMin || iNewMax>iMax )
 					{
-						sphSetRowAttr ( DOCINFO2ATTRS ( pBlock ), dLocators[iCol], Min ( uMin, uNewMin ) );
-						sphSetRowAttr ( DOCINFO2ATTRS ( pBlock+iRowStride ), dLocators[iCol], Max ( uMax, uNewMax ) );
+						sphSetRowAttr ( DOCINFO2ATTRS ( pBlock ), dLocators[iCol], Min ( iMin, iNewMin ) );
+						sphSetRowAttr ( DOCINFO2ATTRS ( pBlock+iRowStride ), dLocators[iCol], Max ( iMax, iNewMax ) );
 					}
 				}
 
@@ -8033,7 +8072,7 @@ bool CSphIndex_VLN::LoadPersistentMVA ( CSphString & sError )
 	for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
-		if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 			dMvaLocators.Add ( tAttr.m_tLocator );
 	}
 	assert ( dMvaLocators.GetLength()!=0 );
@@ -8221,7 +8260,7 @@ bool CSphIndex_VLN::SaveAttributes ( CSphString & sError ) const
 		for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 		{
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 				dMvaLocators.Add ( tAttr.m_tLocator );
 		}
 		assert ( dMvaLocators.GetLength()!=0 );
@@ -9345,13 +9384,13 @@ struct MvaEntry_t
 {
 	SphDocID_t	m_uDocID;
 	int			m_iAttr;
-	uint64_t	m_uValue;
+	int64_t		m_iValue;
 
 	inline bool operator < ( const MvaEntry_t & rhs ) const
 	{
 		if ( m_uDocID!=rhs.m_uDocID ) return m_uDocID<rhs.m_uDocID;
 		if ( m_iAttr!=rhs.m_iAttr ) return m_iAttr<rhs.m_iAttr;
-		return m_uValue<rhs.m_uValue;
+		return m_iValue<rhs.m_iValue;
 	}
 };
 
@@ -9398,7 +9437,7 @@ bool CSphIndex_VLN::BuildMVA ( const CSphVector<CSphSource*> & dSources,
 	for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
-		if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			dMvaIndexes.Add ( i );
 			if ( tAttr.m_eSrc!=SPH_ATTRSRC_FIELD )
@@ -9453,10 +9492,10 @@ bool CSphIndex_VLN::BuildMVA ( const CSphVector<CSphSource*> & dSources,
 					pMva->m_iAttr = i;
 					if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET )
 					{
-						pMva->m_uValue = pSource->m_dMva[0];
+						pMva->m_iValue = pSource->m_dMva[0];
 					} else
 					{
-						pMva->m_uValue = MVA_UPSIZE ( pSource->m_dMva.Begin() );
+						pMva->m_iValue = MVA_UPSIZE ( pSource->m_dMva.Begin() );
 					}
 
 					if ( ++pMva>=pMvaMax )
@@ -9558,7 +9597,7 @@ bool CSphIndex_VLN::BuildMVA ( const CSphVector<CSphSource*> & dSources,
 	// values-list := values-count, value [ values-count ]
 	// note that mva32 come first then mva64
 	SphDocID_t uCurID = 0;
-	CSphVector < CSphVector<uint64_t> > dCurInfo;
+	CSphVector < CSphVector<int64_t> > dCurInfo;
 	dCurInfo.Resize ( dMvaIndexes.GetLength() );
 
 	for ( ;; )
@@ -9575,7 +9614,7 @@ bool CSphIndex_VLN::BuildMVA ( const CSphVector<CSphSource*> & dSources,
 					if ( i>=iMva64 )
 					{
 						wrMva.PutDword ( iLen*2 );
-						wrMva.PutBytes ( dCurInfo[i].Begin(), sizeof(uint64_t)*iLen );
+						wrMva.PutBytes ( dCurInfo[i].Begin(), sizeof(int64_t)*iLen );
 					} else
 					{
 						wrMva.PutDword ( iLen );
@@ -9598,9 +9637,9 @@ bool CSphIndex_VLN::BuildMVA ( const CSphVector<CSphSource*> & dSources,
 		// accumulate this entry
 #if PARANOID
 		assert ( dCurInfo [ qMva.Root().m_iAttr ].GetLength()==0
-			|| dCurInfo [ qMva.Root().m_iAttr ].Last()<=qMva.Root().m_uValue );
+			|| dCurInfo [ qMva.Root().m_iAttr ].Last()<=qMva.Root().m_iValue );
 #endif
-		dCurInfo [ qMva.Root().m_iAttr ].AddUnique ( qMva.Root().m_uValue );
+		dCurInfo [ qMva.Root().m_iAttr ].AddUnique ( qMva.Root().m_iValue );
 
 		// get next entry
 		int iBin = qMva.Root().m_iTag;
@@ -10216,7 +10255,7 @@ int CSphIndex_VLN::Build ( const CSphVector<CSphSource*> & dSources, int iMemory
 	{
 		const CSphColumnInfo & tCol = m_tSchema.GetAttr(i);
 		ESphAttr eAttrType = tCol.m_eAttrType;
-		if ( eAttrType==SPH_ATTR_UINT64SET )
+		if ( eAttrType==SPH_ATTR_INT64SET )
 		{
 			if ( tCol.m_eSrc==SPH_ATTRSRC_FIELD )
 				bHaveFieldMVAs = true;
@@ -10442,7 +10481,7 @@ int CSphIndex_VLN::Build ( const CSphVector<CSphSource*> & dSources, int iMemory
 				tRedirect.m_tLocator = tCol.m_tLocator;
 				tRedirect.m_iAttr = iAttr;
 				tRedirect.m_iMVAAttr = i;
-				tRedirect.m_bMva64 = ( tCol.m_eAttrType==SPH_ATTR_UINT64SET );
+				tRedirect.m_bMva64 = ( tCol.m_eAttrType==SPH_ATTR_INT64SET );
 			}
 		}
 
@@ -10507,10 +10546,10 @@ int CSphIndex_VLN::Build ( const CSphVector<CSphSource*> & dSources, int iMemory
 						tMva.m_iAttr = iMVA;
 						if ( bMva64 )
 						{
-							tMva.m_uValue = MVA_UPSIZE ( pSource->m_dMva.Begin() + i );
+							tMva.m_iValue = MVA_UPSIZE ( pSource->m_dMva.Begin() + i );
 						} else
 						{
-							tMva.m_uValue = pSource->m_dMva[i];
+							tMva.m_iValue = pSource->m_dMva[i];
 						}
 
 						int iLength = dFieldMVAs.GetLength ();
@@ -12153,8 +12192,8 @@ bool CSphIndex_VLN::Merge ( CSphIndex * pSource, const CSphVector<CSphFilterSett
 
 		tKillListFilter.m_bExclude = true;
 		tKillListFilter.m_eType = SPH_FILTER_VALUES;
-		tKillListFilter.m_uMinValue = pKillList[0];
-		tKillListFilter.m_uMaxValue = pKillList[nKillListSize -1];
+		tKillListFilter.m_iMinValue = pKillList[0];
+		tKillListFilter.m_iMaxValue = pKillList[nKillListSize -1];
 		tKillListFilter.m_sAttrName = "@id";
 		tKillListFilter.SetExternalValues ( pKillList, nKillListSize );
 
@@ -12226,7 +12265,7 @@ bool CSphIndex_VLN::DoMerge ( const CSphIndex_VLN * pDstIndex, const CSphIndex_V
 	for ( int i=0; i<tDstSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tInfo = tDstSchema.GetAttr(i);
-		if ( tInfo.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tInfo.m_eAttrType==SPH_ATTR_INT64SET )
 			dMvaLocators.Add ( tInfo.m_tLocator );
 	}
 
@@ -12373,8 +12412,8 @@ bool CSphIndex_VLN::DoMerge ( const CSphIndex_VLN * pDstIndex, const CSphIndex_V
 		CSphFilterSettings tKLF;
 		tKLF.m_bExclude = true;
 		tKLF.m_eType = SPH_FILTER_VALUES;
-		tKLF.m_uMinValue = dPhantomKiller[0];
-		tKLF.m_uMaxValue = dPhantomKiller.Last();
+		tKLF.m_iMinValue = dPhantomKiller[0];
+		tKLF.m_iMaxValue = dPhantomKiller.Last();
 		tKLF.m_sAttrName = "@id";
 		tKLF.SetExternalValues ( &dPhantomKiller[0], dPhantomKiller.GetLength() );
 		ISphFilter * pSpaFilter = sphCreateFilter ( tKLF, pDstIndex->m_tSchema, pDstIndex->GetMVAPool(), sError );
@@ -13621,7 +13660,7 @@ void CSphIndex_VLN::DebugDumpHeader ( FILE * fp, const char * sHeaderName, bool
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
 			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET )
 				fprintf ( fp, "\tsql_attr_multi = uint %s from field\n", tAttr.m_sName.cstr() );
-			else if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			else if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 				fprintf ( fp, "\tsql_attr_multi = bigint %s from field\n", tAttr.m_sName.cstr() );
 			else if ( tAttr.m_eAttrType==SPH_ATTR_INTEGER && tAttr.m_tLocator.IsBitfield() )
 				fprintf ( fp, "\tsql_attr_uint = %s:%d\n", tAttr.m_sName.cstr(), tAttr.m_tLocator.m_iBitCount );
@@ -14346,7 +14385,7 @@ bool CSphIndex_VLN::Preread ()
 	for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tCol = m_tSchema.GetAttr(i);
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_INT64SET )
 			dMvaRowitem.Add ( tCol.m_tLocator.m_iBitOffset/ROWITEM_BITS );
 	}
 
@@ -14774,6 +14813,16 @@ bool CSphIndex_VLN::DoGetKeywords ( CSphVector <CSphKeywordInfo> & dKeywords, co
 #endif
 
 
+static bool IsWeightColumn ( const CSphString & sAttr, const CSphSchema & tSchema )
+{
+	if ( sAttr=="@weight" )
+		return true;
+
+	const CSphColumnInfo * pCol = tSchema.GetAttr ( sAttr.cstr() );
+	return ( pCol && pCol->m_bWeight );
+}
+
+
 bool CSphQueryContext::CreateFilters ( bool bFullscan, const CSphVector<CSphFilterSettings> * pdFilters, const CSphSchema & tSchema, const DWORD * pMvaPool, CSphString & sError )
 {
 	if ( !pdFilters )
@@ -14784,14 +14833,16 @@ bool CSphQueryContext::CreateFilters ( bool bFullscan, const CSphVector<CSphFilt
 		if ( tFilter.m_sAttrName.IsEmpty() )
 			continue;
 
-		if ( bFullscan && tFilter.m_sAttrName=="@weight" )
+		bool bWeight = IsWeightColumn ( tFilter.m_sAttrName, tSchema );
+
+		if ( bFullscan && bWeight )
 			continue; // @weight is not avaiable in fullscan mode
 
 		ISphFilter * pFilter = sphCreateFilter ( tFilter, tSchema, pMvaPool, sError );
 		if ( !pFilter )
 			return false;
 
-		ISphFilter ** pGroup = tFilter.m_sAttrName=="@weight" ? &m_pWeightFilter : &m_pFilter;
+		ISphFilter ** pGroup = bWeight ? &m_pWeightFilter : &m_pFilter;
 		*pGroup = sphJoinFilters ( *pGroup, pFilter );
 	}
 	if ( m_pFilter )
@@ -16381,7 +16432,7 @@ int CSphIndex_VLN::DebugCheck ( FILE * fp )
 		for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 		{
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 			{
 				if ( tAttr.m_tLocator.m_iBitCount!=ROWITEM_BITS )
 				{
@@ -16406,7 +16457,7 @@ int CSphIndex_VLN::DebugCheck ( FILE * fp )
 		for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
 		{
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i);
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 				dMvaItems.Add ( tAttr.m_tLocator.m_iBitOffset/ROWITEM_BITS );
 		}
 
@@ -16540,23 +16591,23 @@ int CSphIndex_VLN::DebugCheck ( FILE * fp )
 						// check that values are ascending
 						for ( DWORD uVal=(iItem>=iMva64 ? 2 : 1); uVal<uValues && bIsMvaCorrect; )
 						{
-							uint64_t uPrev, uCur;
+							int64_t iPrev, iCur;
 							if ( iItem>=iMva64 )
 							{
-								uPrev = MVA_UPSIZE ( pMva+uVal-2 );
-								uCur = MVA_UPSIZE ( pMva+uVal );
+								iPrev = MVA_UPSIZE ( pMva+uVal-2 );
+								iCur = MVA_UPSIZE ( pMva+uVal );
 								uVal += 2;
 							} else
 							{
-								uPrev = pMva[uVal-1];
-								uCur = pMva[uVal];
+								iPrev = pMva[uVal-1];
+								iCur = pMva[uVal];
 								uVal++;
 							}
 
-							if ( uCur<=uPrev )
+							if ( iCur<=iPrev )
 							{
 								LOC_FAIL(( fp, "unsorted MVA values (row=%u, mvaattr=%d, docid expected="DOCID_FMT", got="DOCID_FMT", val[%u]=%u, val[%u]=%u)",
-									uRow, iItem, uLastID, uMvaID, ( iItem>=iMva64 ? uVal-2 : uVal-1 ), (unsigned int)uPrev, uVal, (unsigned int)uCur ));
+									uRow, iItem, uLastID, uMvaID, ( iItem>=iMva64 ? uVal-2 : uVal-1 ), (unsigned int)iPrev, uVal, (unsigned int)iCur ));
 								bIsMvaCorrect = false;
 							}
 
@@ -22194,12 +22245,12 @@ SphRange_t CSphSource_Document::IterateFieldMVAStart ( int iAttr )
 }
 
 
-static int sphAddMva64 ( CSphVector<DWORD> & dStorage, uint64_t uVal )
+static int sphAddMva64 ( CSphVector<DWORD> & dStorage, int64_t iVal )
 {
 	int uOff = dStorage.GetLength();
 	dStorage.Resize ( uOff+2 );
-	dStorage[uOff] = MVA_DOWNSIZE ( uVal );
-	dStorage[uOff+1] = MVA_DOWNSIZE ( ( uVal>>32 ) & 0xffffffff );
+	dStorage[uOff] = MVA_DOWNSIZE ( iVal );
+	dStorage[uOff+1] = MVA_DOWNSIZE ( ( iVal>>32 ) & 0xffffffff );
 	return uOff;
 }
 
@@ -22220,7 +22271,7 @@ int CSphSource_Document::ParseFieldMVA ( CSphVector < DWORD > & dMva, const char
 
 	while ( *pPtr )
 	{
-		if ( *pPtr>='0' && *pPtr<='9' )
+		if ( ( *pPtr>='0' && *pPtr<='9' ) || ( bMva64 && *pPtr=='-' ) )
 		{
 			if ( !pDigit )
 				pDigit = pPtr;
@@ -22235,7 +22286,7 @@ int CSphSource_Document::ParseFieldMVA ( CSphVector < DWORD > & dMva, const char
 					if ( !bMva64 )
 						dMva.Add ( sphToDword ( szBuf ) );
 					else
-						sphAddMva64 ( dMva, sphToUint64 ( szBuf ) );
+						sphAddMva64 ( dMva, sphToInt64 ( szBuf ) );
 				}
 
 				pDigit = NULL;
@@ -22250,7 +22301,7 @@ int CSphSource_Document::ParseFieldMVA ( CSphVector < DWORD > & dMva, const char
 		if ( !bMva64 )
 			dMva.Add ( sphToDword ( pDigit ) );
 		else
-			sphAddMva64 ( dMva, sphToUint64 ( pDigit ) );
+			sphAddMva64 ( dMva, sphToInt64 ( pDigit ) );
 	}
 
 	int iCount = dMva.GetLength()-uOff-1;
@@ -22606,7 +22657,7 @@ bool CSphSource_SQL::IterateStart ( CSphString & sError )
 			tCol.m_eAttrType = tAttr.m_eAttrType;
 			assert ( tCol.m_eAttrType!=SPH_ATTR_NONE );
 
-			if ( ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) && tAttr.m_eSrc!=SPH_ATTRSRC_FIELD )
+			if ( ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET ) && tAttr.m_eSrc!=SPH_ATTRSRC_FIELD )
 				LOC_ERROR ( "multi-valued attribute '%s' of wrong source-type found in query; must be 'field'", tAttr.m_sName.cstr() );
 
 			tCol = tAttr;
@@ -22655,7 +22706,7 @@ bool CSphSource_SQL::IterateStart ( CSphString & sError )
 	ARRAY_FOREACH ( i, m_tParams.m_dAttrs )
 	{
 		const CSphColumnInfo & tAttr = m_tParams.m_dAttrs[i];
-		if ( ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) && tAttr.m_eSrc!=SPH_ATTRSRC_FIELD )
+		if ( ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET ) && tAttr.m_eSrc!=SPH_ATTRSRC_FIELD )
 		{
 			m_tSchema.AddAttr ( tAttr, true ); // all attributes are dynamic at indexing time
 			dFound[i] = true;
@@ -22828,12 +22879,12 @@ BYTE ** CSphSource_SQL::NextDocument ( CSphString & sError )
 	{
 		const CSphColumnInfo & tAttr = m_tSchema.GetAttr(i); // shortcut
 
-		if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			int uOff = 0;
 			if ( tAttr.m_eSrc==SPH_ATTRSRC_FIELD )
 			{
-				uOff = ParseFieldMVA ( m_dMva, SqlColumn ( tAttr.m_iIndex ), tAttr.m_eAttrType==SPH_ATTR_UINT64SET );
+				uOff = ParseFieldMVA ( m_dMva, SqlColumn ( tAttr.m_iIndex ), tAttr.m_eAttrType==SPH_ATTR_INT64SET );
 			}
 			m_tDocInfo.SetAttr ( tAttr.m_tLocator, uOff );
 			continue;
@@ -22931,7 +22982,7 @@ bool CSphSource_SQL::IterateMultivaluedStart ( int iAttr, CSphString & sError )
 	m_iMultiAttr = iAttr;
 	const CSphColumnInfo & tAttr = m_tSchema.GetAttr(iAttr);
 
-	if ( !(tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) )
+	if ( !(tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET ) )
 		return false;
 
 	CSphString sPrefix;
@@ -22985,7 +23036,7 @@ bool CSphSource_SQL::IterateMultivaluedNext ()
 	const CSphColumnInfo & tAttr = m_tSchema.GetAttr ( m_iMultiAttr );
 
 	assert ( m_bSqlConnected );
-	assert ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET );
+	assert ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET );
 
 	// fetch next row
 	bool bGotRow = SqlFetchRow ();
@@ -23011,7 +23062,7 @@ bool CSphSource_SQL::IterateMultivaluedNext ()
 	if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET )
 		m_dMva.Add ( sphToDword ( SqlColumn(1) ) );
 	else
-		sphAddMva64 ( m_dMva, sphToUint64 ( SqlColumn(1) ) );
+		sphAddMva64 ( m_dMva, sphToInt64 ( SqlColumn(1) ) );
 
 	return true;
 }
@@ -24579,7 +24630,7 @@ void CSphSource_XMLPipe2::ConfigureAttrs ( const CSphVariant * pHead, ESphAttr e
 
 		tCol.m_iIndex = m_tSchema.GetAttrsCount ();
 
-		if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_UINT64SET )
+		if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_INT64SET )
 		{
 			tCol.m_eAttrType = eAttrType;
 			tCol.m_eSrc = SPH_ATTRSRC_FIELD;
@@ -24623,7 +24674,7 @@ bool CSphSource_XMLPipe2::Setup ( FILE * pPipe, const CSphConfigSection & hSourc
 	ConfigureAttrs ( hSource("xmlpipe_attr_float"),			SPH_ATTR_FLOAT );
 	ConfigureAttrs ( hSource("xmlpipe_attr_bigint"),		SPH_ATTR_BIGINT );
 	ConfigureAttrs ( hSource("xmlpipe_attr_multi"),			SPH_ATTR_UINT32SET );
-	ConfigureAttrs ( hSource("xmlpipe_attr_multi_64"),		SPH_ATTR_UINT64SET );
+	ConfigureAttrs ( hSource("xmlpipe_attr_multi_64"),		SPH_ATTR_INT64SET );
 	ConfigureAttrs ( hSource("xmlpipe_attr_string"),		SPH_ATTR_STRING );
 	ConfigureAttrs ( hSource("xmlpipe_attr_wordcount"),		SPH_ATTR_WORDCOUNT );
 	ConfigureAttrs ( hSource("xmlpipe_field_string"),		SPH_ATTR_STRING );
@@ -24733,7 +24784,7 @@ bool CSphSource_XMLPipe2::Connect ( CSphString & sError )
 	for ( int i = 0; i < m_tSchema.GetAttrsCount (); i++ )
 	{
 		const CSphColumnInfo & tCol = m_tSchema.GetAttr ( i );
-		if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET ) && tCol.m_eSrc==SPH_ATTRSRC_FIELD )
+		if ( ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET ) && tCol.m_eSrc==SPH_ATTRSRC_FIELD )
 			m_dAttrToMVA.Add ( iFieldMVA++ );
 		else
 			m_dAttrToMVA.Add ( -1 );
@@ -24947,9 +24998,9 @@ BYTE **	CSphSource_XMLPipe2::NextDocument ( CSphString & sError )
 			const CSphString & sAttrValue = pDocument->m_dAttrs[i].IsEmpty () && m_dDefaultAttrs.GetLength () ? m_dDefaultAttrs[i] : pDocument->m_dAttrs[i];
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr ( i );
 
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 			{
-				m_tDocInfo.SetAttr ( tAttr.m_tLocator, ParseFieldMVA ( m_dMva, sAttrValue.cstr (), tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) );
+				m_tDocInfo.SetAttr ( tAttr.m_tLocator, ParseFieldMVA ( m_dMva, sAttrValue.cstr (), tAttr.m_eAttrType==SPH_ATTR_INT64SET ) );
 				continue;
 			}
 
@@ -25130,7 +25181,7 @@ void CSphSource_XMLPipe2::StartElement ( const char * szName, const char ** pAtt
 					Info.m_eSrc = SPH_ATTRSRC_FIELD;
 				} else if ( !strcmp ( szType, "multi_64" ) )
 				{
-					Info.m_eAttrType = SPH_ATTR_UINT64SET;
+					Info.m_eAttrType = SPH_ATTR_INT64SET;
 					Info.m_eSrc = SPH_ATTRSRC_FIELD;
 				} else
 				{

+ 7 - 4
src/sphinx.h

@@ -379,6 +379,7 @@ inline bool operator < ( const CSphRemapRange & a, const CSphRemapRange & b )
 /// lowercaser
 class CSphLowercaser
 {
+	friend class ISphTokenizer;
 public:
 				CSphLowercaser ();
 				~CSphLowercaser ();
@@ -1266,6 +1267,7 @@ struct CSphColumnInfo
 	ESphEvalStage					m_eStage;		///< column evaluation stage (who and how computes this column)
 	bool							m_bPayload;
 	bool							m_bFilename;	///< column is a file name
+	bool							m_bWeight;		///< is a weight column
 
 	/// handy ctor
 	CSphColumnInfo ( const char * sName=NULL, ESphAttr eType=SPH_ATTR_NONE )
@@ -1280,6 +1282,7 @@ struct CSphColumnInfo
 		, m_eStage ( SPH_EVAL_STATIC )
 		, m_bPayload ( false )
 		, m_bFilename ( false )
+		, m_bWeight ( false )
 	{
 		m_sName.ToLower ();
 	}
@@ -1354,7 +1357,7 @@ public:
 public:
 	// also let the schema to clone the matches when necessary
 	void CopyStrings ( CSphMatch * pDst, const CSphMatch & rhs, int iUpBound=-1 ) const;
-	
+
 	// simple copy - clone the fields, copy the dynamic part.
 	void CloneMatch ( CSphMatch * pDst, const CSphMatch & rhs ) const;
 
@@ -2206,12 +2209,12 @@ public:
 	ESphFilter			m_eType;		///< filter type
 	union
 	{
-		SphAttr_t		m_uMinValue;	///< range min
+		SphAttr_t		m_iMinValue;	///< range min
 		float			m_fMinValue;	///< range min
 	};
 	union
 	{
-		SphAttr_t		m_uMaxValue;	///< range max
+		SphAttr_t		m_iMaxValue;	///< range max
 		float			m_fMaxValue;	///< range max
 	};
 	CSphVector<SphAttr_t>	m_dValues;		///< integer values set
@@ -2886,7 +2889,7 @@ void				sphSetQuiet ( bool bQuiet );
 /// may return NULL on error; in this case, error message is placed in sError
 /// if the pUpdate is given, creates the updater's queue and perform the index update
 /// instead of searching
-ISphMatchSorter *	sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError, 
+ISphMatchSorter *	sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema & tSchema, CSphString & sError,
 						bool bComputeItems=true, CSphSchema * pExtra=NULL, CSphAttrUpdateEx * pUpdate=NULL, bool * pZonespanlist=NULL );
 
 /// convert queue to sorted array, and add its entries to result's matches array

+ 153 - 110
src/sphinxexcerpt.cpp

@@ -43,7 +43,7 @@ public:
 	void	BuildExcerpt ( ExcerptQuery_t & tQuery );
 
 	void	TokenizeQuery ( const ExcerptQuery_t &, CSphDict * pDict, ISphTokenizer * pTokenizer, const CSphIndexSettings & tSettings );
-	void	TokenizeDocument ( char * pData, int iDataLen, CSphDict * pDict, ISphTokenizer * pTokenizer, bool bFillMasks, const ExcerptQuery_t & q, const CSphIndexSettings & tSettings );
+	void	TokenizeDocument ( char * pData, int iDataLen, CSphDict * pDict, ISphTokenizer * pTokenizer, bool bFillMasks, const ExcerptQuery_t & q, const CSphIndexSettings & tSettings, int iSPZ );
 
 	void	SetMarker ( CSphHitMarker * pMarker ) { m_pMarker = pMarker; }
 	void	SetExactPhrase ( const ExcerptQuery_t & tQuery );
@@ -678,11 +678,10 @@ int FindAddZone ( const char * sZoneName, int iZoneNameLen, SmallStringHash_T<in
 }
 
 // FIXME! unify with global static void TokenizeDocument somehow, lots of common code
-void ExcerptGen_c::TokenizeDocument ( char * pData, int iDataLen, CSphDict * pDict, ISphTokenizer * pTokenizer, bool bFillMasks, const ExcerptQuery_t & q, const CSphIndexSettings & tSettings )
+void ExcerptGen_c::TokenizeDocument ( char * pData, int iDataLen, CSphDict * pDict, ISphTokenizer * pTokenizer, bool bFillMasks, const ExcerptQuery_t & q, const CSphIndexSettings & tSettings, int iSPZ )
 {
 	assert ( q.m_sStripMode!="retain" );
 	bool bQueryMode = q.m_bHighlightQuery;
-	int iSPZ = q.m_iPassageBoundary;
 
 	m_iTotalCP = 0;
 	m_iDocumentWords = 0;
@@ -1026,7 +1025,7 @@ void ExcerptGen_c::BuildExcerpt ( ExcerptQuery_t & tQuery )
 
 	// do highlighting
 	if ( ( tQuery.m_iLimit<=0 || tQuery.m_iLimit>m_iTotalCP )
-		&& ( tQuery.m_iLimitWords<=0 || tQuery.m_iLimitWords>m_iDocumentWords ) )
+		&& ( tQuery.m_iLimitWords<=0 || tQuery.m_iLimitWords>m_iDocumentWords ) && tQuery.m_ePassageSPZ==SPH_SPZ_NONE )
 	{
 		HighlightAll ( tQuery );
 
@@ -1883,26 +1882,25 @@ public:
 	CSphVector< CSphVector<int> >			m_dDocHits;
 
 	// query parsing result
-	XQQuery_t								m_tQuery;
+	const XQQuery_t	&						m_tQuery;
 
 protected:
 	// query keywords and parsing stuff
-	CSphVector<SphWordID_t>					m_dQueryWords;
-	CSphVector<ExcerptGen_c::Keyword_t>		m_dStarWords;
+	CSphVector<SphWordID_t>					m_dTerms;
+	CSphVector<SphWordID_t>					m_dStarred;
+	CSphVector<ExcerptGen_c::Keyword_t>		m_dStars;
 	CSphVector<BYTE>						m_dStarBuffer;
 
 	bool									m_bQueryMode;
-	bool									m_bSentence;
-	bool									m_bParagraph;
 	BYTE									m_sTmpWord [ 3*SPH_MAX_WORD_LEN + 16 ];
 
 public:
-	explicit	SnippetsDocIndex_c ( bool bQueryMode );
+	SnippetsDocIndex_c ( bool bQueryMode, const XQQuery_t & tQuery );
 	void		SetupHits ();
 	int			FindWord ( SphWordID_t iWordID, const BYTE * sWord, int iWordLen ) const;
+	int			FindStarred ( const char * sWord, int uStarPosition ) const;
 	void		AddHits ( SphWordID_t iWordID, const BYTE * sWord, int iWordLen, DWORD uPosition );
-	bool		Parse ( const char * sQuery, ISphTokenizer * pTokenizer, CSphDict * pDict, const CSphSchema * pSchema, CSphString & sError, const CSphIndexSettings & tSettings );
-	int			GetSPZ () const;
+	void		ParseQuery ( const char * sQuery, ISphTokenizer * pTokenizer, CSphDict * pDict, DWORD eExtQuerySPZ );
 
 protected:
 	bool		MatchStar ( const ExcerptGen_c::Keyword_t & tTok, const BYTE * sWord, int iWordLen ) const;
@@ -1912,17 +1910,16 @@ protected:
 };
 
 
-SnippetsDocIndex_c::SnippetsDocIndex_c ( bool bQueryMode )
+SnippetsDocIndex_c::SnippetsDocIndex_c ( bool bQueryMode, const XQQuery_t & tQuery )
 	: m_uLastPos ( 0 )
+	, m_tQuery ( tQuery )
 	, m_bQueryMode ( bQueryMode )
-	, m_bSentence ( false )
-	, m_bParagraph ( false )
 {}
 
 
 void SnippetsDocIndex_c::SetupHits ()
 {
-	m_dDocHits.Resize ( m_dQueryWords.GetLength() + m_dStarWords.GetLength() );
+	m_dDocHits.Resize ( m_dTerms.GetLength() + m_dStars.GetLength() );
 	m_uLastPos = 0;
 }
 
@@ -1952,14 +1949,31 @@ bool SnippetsDocIndex_c::MatchStar ( const ExcerptGen_c::Keyword_t & tTok, const
 
 int SnippetsDocIndex_c::FindWord ( SphWordID_t iWordID, const BYTE * sWord, int iWordLen ) const
 {
-	const SphWordID_t * pQueryID = iWordID ? m_dQueryWords.BinarySearch ( iWordID ) : NULL;
+	const SphWordID_t * pQueryID = iWordID ? m_dTerms.BinarySearch ( iWordID ) : NULL;
 	if ( pQueryID )
-		return pQueryID - m_dQueryWords.Begin();
+		return pQueryID - m_dTerms.Begin();
 
 	if ( sWord && iWordLen )
-		ARRAY_FOREACH ( i, m_dStarWords )
-			if ( MatchStar ( m_dStarWords[i], sWord, iWordLen ) )
-				return i + m_dQueryWords.GetLength();
+		ARRAY_FOREACH ( i, m_dStars )
+			if ( MatchStar ( m_dStars[i], sWord, iWordLen ) )
+				return i + m_dTerms.GetLength();
+
+	return -1;
+}
+
+int SnippetsDocIndex_c::FindStarred ( const char * sWord, int uStarPosition ) const
+{
+	if ( !sWord || !uStarPosition )
+		return -1;
+
+	const BYTE * pBuf = m_dStarBuffer.Begin();
+	int iLen = strlen ( sWord );
+	ARRAY_FOREACH ( i, m_dStars )
+	{
+		const ExcerptGen_c::Keyword_t & tTok = m_dStars[i];
+		if ( tTok.m_iLength==iLen && tTok.m_uStar==uStarPosition && memcmp ( pBuf+tTok.m_iWord, sWord, iLen )==0 )
+			return i + m_dTerms.GetLength();
+	}
 
 	return -1;
 }
@@ -1967,33 +1981,38 @@ int SnippetsDocIndex_c::FindWord ( SphWordID_t iWordID, const BYTE * sWord, int
 
 void SnippetsDocIndex_c::AddHits ( SphWordID_t iWordID, const BYTE * sWord, int iWordLen, DWORD uPosition )
 {
-	assert ( m_dDocHits.GetLength()==m_dQueryWords.GetLength()+m_dStarWords.GetLength() );
+	assert ( m_dDocHits.GetLength()==m_dTerms.GetLength()+m_dStars.GetLength() );
 
-	const SphWordID_t * pQueryWord = ( iWordID ? m_dQueryWords.BinarySearch ( iWordID ) : NULL );
+	// FIXME!!! replace to 6well formed full-blown infix keyword dict
+	const SphWordID_t * pQueryWord = ( iWordID ? m_dTerms.BinarySearch ( iWordID ) : NULL );
 	if ( pQueryWord )
 	{
-		m_dDocHits [ pQueryWord - m_dQueryWords.Begin() ].Add ( uPosition );
-		return;
+		m_dDocHits [ pQueryWord - m_dTerms.Begin() ].Add ( uPosition );
+
+		// might add hit to star hit-list too
+		if ( !m_dStarred.BinarySearch ( iWordID ) )
+			return;
 	}
 
 	if ( sWord && iWordLen )
-		ARRAY_FOREACH ( i, m_dStarWords )
-			if ( MatchStar ( m_dStarWords[i], sWord, iWordLen ) )
-				m_dDocHits [ m_dQueryWords.GetLength() + i ].Add ( uPosition );
+		ARRAY_FOREACH ( i, m_dStars )
+			if ( MatchStar ( m_dStars[i], sWord, iWordLen ) )
+				m_dDocHits [ m_dTerms.GetLength() + i ].Add ( uPosition );
 }
 
 
-bool SnippetsDocIndex_c::Parse ( const char * sQuery, ISphTokenizer * pTokenizer, CSphDict * pDict, const CSphSchema * pSchema, CSphString & sError, const CSphIndexSettings & tSettings )
+void SnippetsDocIndex_c::ParseQuery ( const char * sQuery, ISphTokenizer * pTokenizer, CSphDict * pDict, DWORD eExtQuerySPZ )
 {
+	int iQueryLen = 0;
 	if ( !m_bQueryMode )
 	{
 		// parse bag-of-words query
-		int iQueryLen = strlen ( sQuery ); // FIXME!!! get length as argument
+		iQueryLen = strlen ( sQuery ); // FIXME!!! get length as argument
 		pTokenizer->SetBuffer ( (BYTE *)sQuery, iQueryLen );
 
 		BYTE * sWord = NULL;
 		// FIXME!!! add warning on query words overflow
-		while ( ( sWord = pTokenizer->GetToken() )!=NULL && ( m_dQueryWords.GetLength() + m_dStarWords.GetLength() )<MAX_HIGHLIGHT_WORDS )
+		while ( ( sWord = pTokenizer->GetToken() )!=NULL && ( m_dTerms.GetLength() + m_dStars.GetLength() )<MAX_HIGHLIGHT_WORDS )
 		{
 			SphWordID_t uWordID = pDict->GetWordID ( sWord );
 			if ( !uWordID )
@@ -2012,22 +2031,14 @@ bool SnippetsDocIndex_c::Parse ( const char * sQuery, ISphTokenizer * pTokenizer
 
 	} else
 	{
-		// parse extended query
-		if ( !sphParseExtendedQuery ( m_tQuery, sQuery, pTokenizer, pSchema, pDict, tSettings ) )
-		{
-			sError = m_tQuery.m_sParseError;
-			return false;
-		}
-
-		m_tQuery.m_pRoot->ClearFieldMask();
 		ExtractWords ( m_tQuery.m_pRoot, pDict );
 
-		if ( m_bSentence )
+		if ( eExtQuerySPZ & SPH_SPZ_SENTENCE )
 		{
 			strncpy ( (char *)m_sTmpWord, MAGIC_WORD_SENTENCE, sizeof(m_sTmpWord) );
 			AddWord ( pDict->GetWordID ( m_sTmpWord ) );
 		}
-		if ( m_bParagraph )
+		if ( eExtQuerySPZ & SPH_SPZ_PARAGRAPH )
 		{
 			strncpy ( (char *)m_sTmpWord, MAGIC_WORD_PARAGRAPH, sizeof(m_sTmpWord) );
 			AddWord ( pDict->GetWordID ( m_sTmpWord ) );
@@ -2041,35 +2052,52 @@ bool SnippetsDocIndex_c::Parse ( const char * sQuery, ISphTokenizer * pTokenizer
 		}
 	}
 
-	// all ok, remove dupes, and return
-	m_dQueryWords.Uniq();
-	assert ( !m_dStarWords.GetLength() || m_dStarBuffer.GetLength() );
-	return true;
-}
+	// all ok, remove dupes
+	m_dTerms.Uniq();
+	assert ( !m_dStars.GetLength() || m_dStarBuffer.GetLength() );
 
+	// plain terms could also match as starred terms
+	if ( m_dStars.GetLength() && m_dTerms.GetLength() && m_bQueryMode )
+	{
+		CSphVector<const XQNode_t *> dChildren;
+		dChildren.Add ( m_tQuery.m_pRoot );
+		ARRAY_FOREACH ( i, dChildren )
+		{
+			const XQNode_t * pChild = dChildren[i];
+			if ( !pChild )
+				continue;
 
-int SnippetsDocIndex_c::GetSPZ () const
-{
-	// with sentence in query we should consider SENTECE, PARAGRAPH, ZONE
-	// with paragraph in query we should consider PARAGRAPH, ZONE
-	// with zone in query we should consider ZONE
-	if ( m_bSentence )
-		return MAGIC_CODE_SENTENCE;
+			ARRAY_FOREACH ( j, pChild->m_dChildren )
+				dChildren.Add ( pChild->m_dChildren[j] );
 
-	if ( m_bParagraph )
-		return MAGIC_CODE_PARAGRAPH;
+			ARRAY_FOREACH ( j, pChild->m_dWords )
+			{
+				if ( pChild->m_dWords[j].m_uStarPosition )
+					continue;
 
-	if ( m_tQuery.m_dZones.GetLength() )
-		return MAGIC_CODE_ZONE;
+				const BYTE * sWord = (const BYTE *)pChild->m_dWords[j].m_sWord.cstr();
+				int iLen = pChild->m_dWords[j].m_sWord.Length();
+				ARRAY_FOREACH ( k, m_dStars )
+				{
+					if ( MatchStar ( m_dStars[k], sWord, iLen ) )
+					{
+						memcpy ( m_sTmpWord, sWord, iLen );
+						m_dStarred.Add ( pDict->GetWordID ( m_sTmpWord ) );
+						break;
+					}
+				}
+			}
+		}
 
-	return 0;
+		m_dStarred.Uniq();
+	}
 }
 
 
 void SnippetsDocIndex_c::AddWord ( SphWordID_t iWordID )
 {
 	assert ( iWordID );
-	m_dQueryWords.Add ( iWordID );
+	m_dTerms.Add ( iWordID );
 }
 
 
@@ -2082,7 +2110,7 @@ void SnippetsDocIndex_c::AddWord ( const char * sWord, int iStarPosition )
 	memcpy ( &m_dStarBuffer[iOff], sWord, iLen );
 	m_dStarBuffer[iOff+iLen] = 0;
 
-	ExcerptGen_c::Keyword_t & tTok = m_dStarWords.Add();
+	ExcerptGen_c::Keyword_t & tTok = m_dStars.Add();
 	tTok.m_iWord = iOff;
 	tTok.m_iLength = iLen;
 	tTok.m_uStar = iStarPosition;
@@ -2091,8 +2119,9 @@ void SnippetsDocIndex_c::AddWord ( const char * sWord, int iStarPosition )
 
 void SnippetsDocIndex_c::ExtractWords ( XQNode_t * pNode, CSphDict * pDict )
 {
-	m_bSentence |= ( pNode->GetOp()==SPH_QUERY_SENTENCE );
-	m_bParagraph |= ( pNode->GetOp()==SPH_QUERY_PARAGRAPH );
+	if ( !pNode )
+		return;
+
 	ARRAY_FOREACH ( i, pNode->m_dWords )
 	{
 		const XQKeyword_t & tWord = pNode->m_dWords[i];
@@ -2246,7 +2275,7 @@ public:
 		int iWord = -1;
 		if ( tWord.m_uStarPosition )
 		{
-			iWord = m_tContainer.FindWord ( 0, (const BYTE *)tWord.m_sWord.cstr(), tWord.m_sWord.Length() );
+			iWord = m_tContainer.FindStarred ( tWord.m_sWord.cstr(), tWord.m_uStarPosition );
 		} else
 		{
 			strncpy ( (char *)m_sTmpWord, tWord.m_sWord.cstr(), sizeof(m_sTmpWord) );
@@ -2463,7 +2492,7 @@ static void AddZone ( const char * pStart, const char * pEnd, int uPosition, Tok
 
 
 /// tokenize document using a given functor
-static void TokenizeDocument ( TokenFunctorTraits_c & tFunctor, const CSphHTMLStripper * pStripper )
+static void TokenizeDocument ( TokenFunctorTraits_c & tFunctor, const CSphHTMLStripper * pStripper, DWORD iSPZ )
 {
 	ISphTokenizer * pTokenizer = tFunctor.m_pTokenizer;
 	CSphDict * pDict = tFunctor.m_pDict;
@@ -2474,7 +2503,6 @@ static void TokenizeDocument ( TokenFunctorTraits_c & tFunctor, const CSphHTMLSt
 	assert ( pStartPtr && pLastTokenEnd );
 
 	bool bRetainHtml = tFunctor.m_sStripMode=="retain";
-	int iSPZ = tFunctor.m_iPassageBoundary;
 	int uPosition = 0;
 	BYTE * sWord = NULL;
 	SphWordID_t iBlendID = 0;
@@ -2704,7 +2732,7 @@ public:
 		m_iDocs = 0;
 		m_iHits = 0;
 		m_uLastPos = uLastPos;
-		if ( m_pHits )
+		if ( m_pHits && m_pHits->GetLength() )
 		{
 			m_iDocs = 1;
 			m_iHits = m_pHits->GetLength();
@@ -2774,22 +2802,49 @@ inline bool operator < ( const SphHitMark_t & a, const SphHitMark_t & b )
 }
 
 
-static void HighlightAllFastpath ( ExcerptQuery_t & tQuerySettings,
-	const CSphIndexSettings & tIndexSettings,
+// with sentence in query we should consider SENTECE, PARAGRAPH, ZONE
+// with paragraph in query we should consider PARAGRAPH, ZONE
+// with zone in query we should consider ZONE
+int ConvertSPZ ( DWORD eSPZ )
+{
+	if ( eSPZ & SPH_SPZ_SENTENCE )
+		return MAGIC_CODE_SENTENCE;
+	else if ( eSPZ & SPH_SPZ_PARAGRAPH )
+		return MAGIC_CODE_PARAGRAPH;
+	else if ( eSPZ & SPH_SPZ_ZONE )
+		return MAGIC_CODE_ZONE;
+	else
+		return 0;
+}
+
+
+static void HighlightAllFastpath ( const ExcerptQuery_t & tQuerySettings,
+	const CSphIndexSettings & tIndexSettings, const XQQuery_t & tExtQuery, DWORD eExtQuerySPZ,
 	const char * sDoc, int iDocLen,
 	CSphDict * pDict, ISphTokenizer * pTokenizer, const CSphHTMLStripper * pStripper,
-	const CSphSchema * pSchema, CSphString & sError,
-	ISphTokenizer * pQueryTokenizer )
+	CSphString & sError, ISphTokenizer * pQueryTokenizer, CSphVector<BYTE> & dRes )
 {
 	ExcerptQuery_t tFixedSettings ( tQuerySettings );
 
-	// exact_phrase is replaced by query_mode=1 + "query words"
-	if ( tQuerySettings.m_bExactPhrase )
+	// exact_phrase emulation
+	// bug of words is replaced by query_mode=1 + "query words"
+	XQQuery_t tExactPhraseQuery;
+	bool bPhraseEmulation = tQuerySettings.m_bExactPhrase;
+	if ( bPhraseEmulation )
 	{
 		if ( !tQuerySettings.m_bHighlightQuery && tQuerySettings.m_sWords.Length() && strchr ( tQuerySettings.m_sWords.cstr(), 0x22 )==NULL )
 			tFixedSettings.m_sWords.SetSprintf ( "\"%s\"", tQuerySettings.m_sWords.cstr() );
 
 		tFixedSettings.m_bHighlightQuery = true;
+
+		CSphSchema tSchema;
+		if ( !sphParseExtendedQuery ( tExactPhraseQuery, tFixedSettings.m_sWords.cstr(), pQueryTokenizer, &tSchema, pDict, tIndexSettings ) )
+		{
+			sError = tExactPhraseQuery.m_sParseError;
+			return;
+		}
+		if ( tExactPhraseQuery.m_pRoot )
+			tExactPhraseQuery.m_pRoot->ClearFieldMask();
 	}
 
 	bool bRetainHtml = ( tFixedSettings.m_sStripMode=="retain" );
@@ -2799,13 +2854,8 @@ static void HighlightAllFastpath ( ExcerptQuery_t & tQuerySettings,
 		pTokenizer->AddSpecials ( "<" );
 
 	// create query and hit lists container, parse query
-	SnippetsDocIndex_c tContainer ( tFixedSettings.m_bHighlightQuery );
-	if ( !tContainer.Parse ( tFixedSettings.m_sWords.cstr(), pQueryTokenizer, pDict, pSchema, sError, tIndexSettings ) )
-		return;
-
-	// fast-path collects no passages but that flag says what SPZ should we collect
-	if ( tFixedSettings.m_bHighlightQuery && !tFixedSettings.m_iPassageBoundary )
-		tFixedSettings.m_iPassageBoundary = tContainer.GetSPZ();
+	SnippetsDocIndex_c tContainer ( tFixedSettings.m_bHighlightQuery, ( bPhraseEmulation ? tExactPhraseQuery : tExtQuery ) );
+	tContainer.ParseQuery ( tFixedSettings.m_sWords.cstr(), pQueryTokenizer, pDict, eExtQuerySPZ );
 
 	// do highlighting
 	if ( !tFixedSettings.m_bHighlightQuery )
@@ -2813,14 +2863,14 @@ static void HighlightAllFastpath ( ExcerptQuery_t & tQuerySettings,
 		// simple bag of words query
 		// do just one tokenization pass over the document, matching and highlighting keywords
 		HighlightPlain_c tHighlighter ( tContainer, pTokenizer, pDict, tFixedSettings, tIndexSettings, sDoc, iDocLen );
-		TokenizeDocument ( tHighlighter, NULL );
+		TokenizeDocument ( tHighlighter, NULL, 0 );
 
 		if ( !tHighlighter.m_iMatchesCount && tFixedSettings.m_bAllowEmpty )
 			tHighlighter.m_dResult.Reset();
 
 		// add trailing zero, and return
 		tHighlighter.m_dResult.Add ( 0 );
-		tQuerySettings.m_dRes.SwapData ( tHighlighter.m_dResult );
+		dRes.SwapData ( tHighlighter.m_dResult );
 
 	} else
 	{
@@ -2831,7 +2881,7 @@ static void HighlightAllFastpath ( ExcerptQuery_t & tQuerySettings,
 
 		// do the 1st pass
 		HitCollector_c tHitCollector ( tContainer, pTokenizer, pDict, tFixedSettings, tIndexSettings, sDoc, iDocLen );
-		TokenizeDocument ( tHitCollector, pStripper );
+		TokenizeDocument ( tHitCollector, pStripper, ConvertSPZ ( eExtQuerySPZ ) );
 
 		// prepare for the 2nd pass (that is, extract matching hits)
 		SnippetZoneChecker_c tZoneChecker ( tHitCollector.m_dZones, tHitCollector.m_hZones, tContainer.m_tQuery.m_dZones );
@@ -2919,14 +2969,14 @@ static void HighlightAllFastpath ( ExcerptQuery_t & tQuerySettings,
 
 		// 2nd pass
 		HighlightQuery_c tHighlighter ( tContainer, pTokenizer, pDict, tFixedSettings, tIndexSettings, sDoc, iDocLen, dMarked );
-		TokenizeDocument ( tHighlighter, pStripper );
+		TokenizeDocument ( tHighlighter, pStripper, ConvertSPZ ( eExtQuerySPZ ) );
 
 		if ( !tHighlighter.m_iMatchesCount && tFixedSettings.m_bAllowEmpty )
 			tHighlighter.m_dResult.Reset();
 
 		// add trailing zero, and return
 		tHighlighter.m_dResult.Add ( 0 );
-		tQuerySettings.m_dRes.SwapData ( tHighlighter.m_dResult );
+		dRes.SwapData ( tHighlighter.m_dResult );
 	}
 }
 
@@ -2942,7 +2992,6 @@ ExcerptQuery_t::ExcerptQuery_t ()
 	, m_iLimitPassages ( 0 )
 	, m_iAround ( 5 )
 	, m_iPassageId ( 1 )
-	, m_iPassageBoundary ( 0 )
 	, m_bRemoveSpaces ( false )
 	, m_bExactPhrase ( false )
 	, m_bUseBoundaries ( false )
@@ -2959,13 +3008,15 @@ ExcerptQuery_t::ExcerptQuery_t ()
 	, m_iNext ( -2 )
 	, m_bHasBeforePassageMacro ( false )
 	, m_bHasAfterPassageMacro ( false )
+	, m_ePassageSPZ ( SPH_SPZ_NONE )
 {
 }
 
 /////////////////////////////////////////////////////////////////////////////
 
 
-void sphBuildExcerpt ( ExcerptQuery_t & tOptions, CSphDict * pDict, ISphTokenizer * pTokenizer, const CSphSchema * pSchema, CSphIndex * pIndex, CSphString & sError, const CSphHTMLStripper * pStripper, ISphTokenizer * pQueryTokenizer )
+void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, const CSphHTMLStripper * pStripper, const XQQuery_t & tExtQuery,
+						DWORD eExtQuerySPZ, CSphString & sError, CSphDict * pDict, ISphTokenizer * pDocTokenizer, ISphTokenizer * pQueryTokenizer )
 {
 	if ( tOptions.m_sStripMode=="retain"
 		&& !( tOptions.m_iLimit==0 && tOptions.m_iLimitPassages==0 && tOptions.m_iLimitWords==0 ) )
@@ -2974,9 +3025,6 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, CSphDict * pDict, ISphTokenize
 		return;
 	}
 
-	if ( !tOptions.m_sWords.cstr()[0] )
-		tOptions.m_bHighlightQuery = false;
-
 	char * pData = const_cast<char*> ( tOptions.m_sSource.cstr() );
 	CSphFixedVector<char> pBuffer ( 0 );
 	int iDataLen = tOptions.m_sSource.Length();
@@ -3024,40 +3072,35 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, CSphDict * pDict, ISphTokenize
 	// FIXME!!! check on real data (~100 Mb) as stripper changes len
 	iDataLen = strlen ( pData );
 
+	bool bCanFastPathed = ( ( tOptions.m_iLimit==0 || tOptions.m_iLimit>=iDataLen ) &&
+		( tOptions.m_iLimitWords==0 || tOptions.m_iLimitWords>iDataLen/2 ) &&
+		!tOptions.m_bForceAllWords && !tOptions.m_bUseBoundaries && !tOptions.m_iLimitPassages && !tOptions.m_bWeightOrder &&
+		tOptions.m_ePassageSPZ==SPH_SPZ_NONE && !tOptions.m_bEmitZones );
+
 	// fast path that highlights entire document
-	if (!( tOptions.m_iLimitPassages
-		|| ( tOptions.m_iLimitWords && tOptions.m_iLimitWords<iDataLen/2 )
-		|| ( tOptions.m_iLimit && tOptions.m_iLimit<=iDataLen )
-		|| tOptions.m_bForceAllWords || tOptions.m_bUseBoundaries ))
+	if ( bCanFastPathed )
 	{
-		HighlightAllFastpath ( tOptions, pIndex->GetSettings(), pData, iDataLen, pDict, pTokenizer, pStripper, pSchema, sError, pQueryTokenizer );
+		HighlightAllFastpath ( tOptions, pIndex->GetSettings(), tExtQuery, eExtQuerySPZ, pData, iDataLen, pDict, pDocTokenizer, pStripper,
+			sError, pQueryTokenizer, tOptions.m_dRes );
 		return;
 	}
 
 	if ( !tOptions.m_bHighlightQuery )
 	{
 		// legacy highlighting
-		ExcerptGen_c tGenerator ( pTokenizer->IsUtf8() );
-		tGenerator.TokenizeQuery ( tOptions, pDict, pTokenizer, pIndex->GetSettings() );
+		ExcerptGen_c tGenerator ( pDocTokenizer->IsUtf8() );
+		tGenerator.TokenizeQuery ( tOptions, pDict, pDocTokenizer, pIndex->GetSettings() );
 		tGenerator.SetExactPhrase ( tOptions );
-		tGenerator.TokenizeDocument ( pData, iDataLen, pDict, pTokenizer, true, tOptions, pIndex->GetSettings() );
+		tGenerator.TokenizeDocument ( pData, iDataLen, pDict, pDocTokenizer, true, tOptions, pIndex->GetSettings(), ConvertSPZ ( tOptions.m_ePassageSPZ | eExtQuerySPZ ) );
 		tGenerator.BuildExcerpt ( tOptions );
 		return;
 	}
 
-	XQQuery_t tQuery;
-	if ( !sphParseExtendedQuery ( tQuery, tOptions.m_sWords.cstr(), pQueryTokenizer, pSchema, pDict, pIndex->GetSettings() ) )
-	{
-		sError = tQuery.m_sParseError;
-		return;
-	}
-	tQuery.m_pRoot->ClearFieldMask();
-
-	ExcerptGen_c tGenerator ( pTokenizer->IsUtf8() );
-	tGenerator.TokenizeDocument ( pData, iDataLen, pDict, pTokenizer, false, tOptions, pIndex->GetSettings() );
+	ExcerptGen_c tGenerator ( pDocTokenizer->IsUtf8() );
+	tGenerator.TokenizeDocument ( pData, iDataLen, pDict, pDocTokenizer, false, tOptions, pIndex->GetSettings(), ConvertSPZ ( tOptions.m_ePassageSPZ | eExtQuerySPZ ) );
 
-	CSphScopedPtr<SnippetZoneChecker_c> pZoneChecker ( new SnippetZoneChecker_c ( tGenerator.GetZones(), tGenerator.GetZonesName(), tQuery.m_dZones ) );
-	SnippetsQwordSetup tSetup ( &tGenerator, pTokenizer );
+	CSphScopedPtr<SnippetZoneChecker_c> pZoneChecker ( new SnippetZoneChecker_c ( tGenerator.GetZones(), tGenerator.GetZonesName(), tExtQuery.m_dZones ) );
+	SnippetsQwordSetup tSetup ( &tGenerator, pDocTokenizer );
 	CSphString sWarning;
 
 	tSetup.m_pDict = pDict;
@@ -3066,7 +3109,7 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, CSphDict * pDict, ISphTokenize
 	tSetup.m_pWarning = &sWarning;
 	tSetup.m_pZoneChecker = pZoneChecker.Ptr();
 
-	CSphScopedPtr<CSphHitMarker> pMarker ( CSphHitMarker::Create ( tQuery.m_pRoot, tSetup ) );
+	CSphScopedPtr<CSphHitMarker> pMarker ( CSphHitMarker::Create ( tExtQuery.m_pRoot, tSetup ) );
 	if ( !pMarker.Ptr() )
 	{
 		sError = sWarning;

+ 14 - 2
src/sphinxexcerpt.h

@@ -18,6 +18,14 @@
 
 #include "sphinx.h"
 
+enum ESphSpz
+{
+	SPH_SPZ_NONE		= 0,
+	SPH_SPZ_SENTENCE	= 1UL<<0,
+	SPH_SPZ_PARAGRAPH	= 1UL<<1,
+	SPH_SPZ_ZONE		= 1UL<<2
+};
+
 /// a query to generate an excerpt
 /// everything string is expected to be UTF-8
 struct ExcerptQuery_t
@@ -34,7 +42,6 @@ public:
 	int				m_iLimitPassages;	///< max passages in snippet
 	int				m_iAround;			///< how much words to highlight around each match
 	int				m_iPassageId;		///< current %PASSAGE_ID% counter value (must start at 1)
-	int				m_iPassageBoundary;	///< passage boundary mode
 	bool			m_bRemoveSpaces;	///< whether to collapse whitespace
 	bool			m_bExactPhrase;		///< whether to highlight exact phrase matches only
 	bool			m_bUseBoundaries;	///< whether to extract passages by phrase boundaries setup in tokenizer
@@ -59,14 +66,19 @@ public:
 	CSphString		m_sBeforeMatchPassage;
 	CSphString		m_sAfterMatchPassage;
 
+	DWORD			m_ePassageSPZ;
+
 public:
 	ExcerptQuery_t ();
 };
 
+struct XQQuery_t;
+
 /// an excerpt generator
 /// returns a newly allocated string in encoding specified by tokenizer on success
 /// returns NULL on failure
-void sphBuildExcerpt ( ExcerptQuery_t &, CSphDict *, ISphTokenizer *, const CSphSchema *, CSphIndex *, CSphString & sError, const CSphHTMLStripper *, ISphTokenizer * );
+void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, const CSphHTMLStripper * pStripper, const XQQuery_t & tExtQuery,
+						DWORD eExtQuerySPZ, CSphString & sError, CSphDict * pDict, ISphTokenizer * pDocTokenizer, ISphTokenizer * pQueryTokenizer );
 
 #endif // _sphinxexcerpt_
 

+ 18 - 18
src/sphinxexpr.cpp

@@ -447,14 +447,14 @@ public:
 			case SPH_ATTR_BIGINT:	sBuf.SetSprintf ( INT64_FMT, m_pFirst->Int64Eval ( tMatch ) ); break;
 			case SPH_ATTR_FLOAT:	sBuf.SetSprintf ( "%f", m_pFirst->Eval ( tMatch ) ); break;
 			case SPH_ATTR_UINT32SET:
-			case SPH_ATTR_UINT64SET:
+			case SPH_ATTR_INT64SET:
 				{
 					const DWORD * pValues = m_pFirst->MvaEval ( tMatch );
 					if ( !pValues || !*pValues )
 						break;
 
 					DWORD nValues = *pValues++;
-					assert (!( m_eArg==SPH_ATTR_UINT64SET && ( nValues & 1 ) ));
+					assert (!( m_eArg==SPH_ATTR_INT64SET && ( nValues & 1 ) ));
 
 					// OPTIMIZE? minibuffer on stack, less allocs, manual formatting vs printf, etc
 					if ( m_eArg==SPH_ATTR_UINT32SET )
@@ -1139,7 +1139,7 @@ int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
 	{
 	case SPH_ATTR_FLOAT:		iRes = TOK_ATTR_FLOAT;	break;
 	case SPH_ATTR_UINT32SET:	iRes = TOK_ATTR_MVA32; break;
-	case SPH_ATTR_UINT64SET:	iRes = TOK_ATTR_MVA64; break;
+	case SPH_ATTR_INT64SET:		iRes = TOK_ATTR_MVA64; break;
 	case SPH_ATTR_STRING:		iRes = TOK_ATTR_STRING; break;
 	case SPH_ATTR_INTEGER:
 	case SPH_ATTR_TIMESTAMP:
@@ -2629,12 +2629,12 @@ public:
 
 /// IN() evaluator, MVA attribute vs. constant values
 template < bool MVA64 >
-class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<uint64_t>
+class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<int64_t>
 {
 public:
 	/// pre-sort values for binary search
 	Expr_MVAIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar )
-		: Expr_ArgVsConstSet_c<uint64_t> ( NULL, pConsts )
+		: Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
 		, m_tLocator ( tLoc )
 		, m_iLocator ( iLocator )
 		, m_pMvaPool ( NULL )
@@ -2687,8 +2687,8 @@ int Expr_MVAIn_c<false>::MvaEval ( const DWORD * pMva ) const
 	DWORD uLen = *pMva++;
 	const DWORD * pMvaMax = pMva+uLen;
 
-	const uint64_t * pFilter = m_pUservar ? (uint64_t*)m_pUservar->Begin() : m_dValues.Begin();
-	const uint64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
+	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
+	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
 
 	const DWORD * L = pMva;
 	const DWORD * R = pMvaMax - 1;
@@ -2719,26 +2719,26 @@ int Expr_MVAIn_c<true>::MvaEval ( const DWORD * pMva ) const
 	assert ( ( uLen%2 )==0 );
 	const DWORD * pMvaMax = pMva+uLen;
 
-	const uint64_t * pFilter = m_pUservar ? (uint64_t*)m_pUservar->Begin() : m_dValues.Begin();
-	const uint64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
+	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
+	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
 
-	const uint64_t * L = (const uint64_t *)pMva;
-	const uint64_t * R = (const uint64_t *)( pMvaMax - 2 );
+	const int64_t * L = (const int64_t *)pMva;
+	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
 	for ( ; pFilter < pFilterMax; pFilter++ )
 	{
 		while ( L<=R )
 		{
-			const uint64_t * pVal = L + (R - L) / 2;
-			uint64_t uMva = MVA_UPSIZE ( (const DWORD *)pVal );
+			const int64_t * pVal = L + (R - L) / 2;
+			int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
 
-			if ( *pFilter > uMva )
+			if ( *pFilter > iMva )
 				L = pVal + 1;
-			else if ( *pFilter < uMva )
+			else if ( *pFilter < iMva )
 				R = pVal - 1;
 			else
 				return 1;
 		}
-		R = (const uint64_t *) ( pMvaMax - 2 );
+		R = (const int64_t *) ( pMvaMax - 2 );
 	}
 	return 0;
 }
@@ -3249,7 +3249,7 @@ int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
 
 	if ( iTokenType==TOK_ATTR_FLOAT )			tNode.m_eRetType = SPH_ATTR_FLOAT;
 	else if ( iTokenType==TOK_ATTR_MVA32 )		tNode.m_eRetType = SPH_ATTR_UINT32SET;
-	else if ( iTokenType==TOK_ATTR_MVA64 )		tNode.m_eRetType = SPH_ATTR_UINT64SET;
+	else if ( iTokenType==TOK_ATTR_MVA64 )		tNode.m_eRetType = SPH_ATTR_INT64SET;
 	else if ( iTokenType==TOK_ATTR_STRING )		tNode.m_eRetType = SPH_ATTR_STRING;
 	else if ( tNode.m_tLocator.m_iBitCount>32 )	tNode.m_eRetType = SPH_ATTR_BIGINT;
 	else										tNode.m_eRetType = SPH_ATTR_INTEGER;
@@ -3608,7 +3608,7 @@ int ExprParser_t::AddNodeUdf ( int iCall, int iArg )
 				case SPH_ATTR_UINT32SET:
 					eRes = SPH_UDF_TYPE_UINT32SET;
 					break;
-				case SPH_ATTR_UINT64SET:
+				case SPH_ATTR_INT64SET:
 					eRes = SPH_UDF_TYPE_UINT64SET;
 					break;
 				default:

+ 3 - 3
src/sphinxexpr.h

@@ -38,7 +38,7 @@ enum ESphAttr
 	SPH_ATTR_POLY2D		= 9,			///< vector of floats, 2D polygon (see POLY2D)
 	SPH_ATTR_STRINGPTR	= 10,			///< string (binary, in-memory, stored as pointer to the zero-terminated string).
 	SPH_ATTR_UINT32SET	= 0x40000001UL,	///< MVA, set of unsigned 32-bit integers
-	SPH_ATTR_UINT64SET	= 0x40000002UL	///< MVA, set of unsigned 64-bit integers
+	SPH_ATTR_INT64SET	= 0x40000002UL	///< MVA, set of signed 64-bit integers
 };
 
 /// expression evaluator
@@ -113,8 +113,8 @@ struct ISphExprHook
 /// returns pointer to evaluator on success
 /// fills pAttrType with result type (for now, can be SPH_ATTR_SINT or SPH_ATTR_FLOAT)
 /// fills pUsesWeight with a flag whether match relevance is referenced in expression AST
-ISphExpr * sphExprParse ( const char * sExpr, const CSphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, 
-						 CSphString & sError, CSphSchema * pExtra=NULL, ISphExprHook * pHook=NULL, bool * pZonespanlist=NULL );
+ISphExpr * sphExprParse ( const char * sExpr, const CSphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight,
+							CSphString & sError, CSphSchema * pExtra=NULL, ISphExprHook * pHook=NULL, bool * pZonespanlist=NULL );
 
 //////////////////////////////////////////////////////////////////////////
 

+ 32 - 32
src/sphinxfilter.cpp

@@ -104,18 +104,18 @@ bool IFilter_Values::EvalBlockValues ( SphAttr_t uBlockMin, SphAttr_t uBlockMax
 /// range
 struct IFilter_Range: virtual ISphFilter
 {
-	SphAttr_t m_uMinValue;
-	SphAttr_t m_uMaxValue;
+	SphAttr_t m_iMinValue;
+	SphAttr_t m_iMaxValue;
 
 	virtual void SetRange ( SphAttr_t tMin, SphAttr_t tMax )
 	{
-		m_uMinValue = tMin;
-		m_uMaxValue = tMax;
+		m_iMinValue = tMin;
+		m_iMaxValue = tMax;
 	}
 
 	bool EvalRange ( const SphAttr_t uValue ) const
 	{
-		return uValue>=m_uMinValue && uValue<=m_uMaxValue;
+		return uValue>=m_iMinValue && uValue<=m_iMaxValue;
 	}
 };
 
@@ -237,7 +237,7 @@ struct Filter_Range: public IFilter_Attr, IFilter_Range
 
 		SphAttr_t uBlockMin = sphGetRowAttr ( DOCINFO2ATTRS ( pMinDocinfo ), m_tLocator );
 		SphAttr_t uBlockMax = sphGetRowAttr ( DOCINFO2ATTRS ( pMaxDocinfo ), m_tLocator );
-		return (!( m_uMaxValue<uBlockMin || m_uMinValue>uBlockMax )); // not-reject
+		return (!( m_iMaxValue<uBlockMin || m_iMinValue>uBlockMax )); // not-reject
 	}
 };
 
@@ -308,7 +308,7 @@ struct Filter_IdRange: public IFilter_Range
 	virtual bool Eval ( const CSphMatch & tMatch ) const
 	{
 		const SphDocID_t uID = tMatch.m_iDocID;
-		return uID>=(SphDocID_t)m_uMinValue && uID<=(SphDocID_t)m_uMaxValue;
+		return uID>=(SphDocID_t)m_iMinValue && uID<=(SphDocID_t)m_iMaxValue;
 	}
 
 	virtual bool EvalBlock ( const DWORD * pMinDocinfo, const DWORD * pMaxDocinfo ) const
@@ -316,7 +316,7 @@ struct Filter_IdRange: public IFilter_Range
 		const SphDocID_t uBlockMin = DOCINFO2ID ( pMinDocinfo );
 		const SphDocID_t uBlockMax = DOCINFO2ID ( pMaxDocinfo );
 
-		return (!( (SphDocID_t)m_uMaxValue<uBlockMin || (SphDocID_t)m_uMinValue>uBlockMax ));
+		return (!( (SphDocID_t)m_iMaxValue<uBlockMin || (SphDocID_t)m_iMinValue>uBlockMax ));
 	}
 
 	Filter_IdRange ()
@@ -403,24 +403,24 @@ bool Filter_MVAValues<true>::MvaEval ( const DWORD * pMva, const DWORD * pMvaMax
 	const SphAttr_t * pFilter = m_pValues;
 	const SphAttr_t * pFilterMax = pFilter + m_iValueCount;
 
-	const uint64_t * L = (const uint64_t *)pMva;
-	const uint64_t * R = (const uint64_t *)( pMvaMax - 2 );
+	const int64_t * L = (const int64_t *)pMva;
+	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
 	for ( ; pFilter < pFilterMax; pFilter++ )
 	{
-		uint64_t uFilter = *pFilter;
+		int64_t uFilter = *pFilter;
 		while ( L<=R )
 		{
-			const uint64_t * pVal = L + (R - L) / 2;
-			uint64_t uMva = MVA_UPSIZE ( (const DWORD *)pVal );
+			const int64_t * pVal = L + (R - L) / 2;
+			int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
 
-			if ( uFilter > uMva )
+			if ( uFilter > iMva )
 				L = pVal + 1;
-			else if ( uFilter < uMva )
+			else if ( uFilter < iMva )
 				R = pVal - 1;
 			else
 				return true;
 		}
-		R = (const uint64_t *)( pMvaMax - 2 );
+		R = (const int64_t *)( pMvaMax - 2 );
 	}
 	return false;
 }
@@ -451,42 +451,42 @@ bool Filter_MVARange<false>::MvaEval ( const DWORD * pMva, const DWORD * pMvaMax
 	while ( L<=R )
 	{
 		const DWORD * m = L + (R - L) / 2;
-		if ( m_uMinValue > *m )
+		if ( m_iMinValue > *m )
 			L = m + 1;
-		else if ( m_uMinValue < *m )
+		else if ( m_iMinValue < *m )
 			R = m - 1;
 		else
 			return true;
 	}
 	if ( L==pMvaMax )
 		return false;
-	return *L<=m_uMaxValue;
+	return *L<=m_iMaxValue;
 }
 
 
 template<>
 bool Filter_MVARange<true>::MvaEval ( const DWORD * pMva, const DWORD * pMvaMax ) const
 {
-	const uint64_t * L = (const uint64_t *)pMva;
-	const uint64_t * R = (const uint64_t *)( pMvaMax - 2 );
+	const int64_t * L = (const int64_t *)pMva;
+	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
 
 	while ( L<=R )
 	{
-		const uint64_t * pVal = L + (R - L) / 2;
-		uint64_t uMva = MVA_UPSIZE ( (const DWORD *)pVal );
+		const int64_t * pVal = L + (R - L) / 2;
+		int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
 
-		if ( (uint64_t)m_uMinValue>uMva )
+		if ( m_iMinValue>iMva )
 			L = pVal + 1;
-		else if ( (uint64_t)m_uMinValue < uMva )
+		else if ( m_iMinValue < iMva )
 			R = pVal - 1;
 		else
 			return true;
 	}
-	if ( L==(const uint64_t *)pMvaMax )
+	if ( L==(const int64_t *)pMvaMax )
 		return false;
 
-	uint64_t uMvaL = MVA_UPSIZE ( (const DWORD *)L );
-	return uMvaL<=(uint64_t)m_uMaxValue;
+	int64_t iMvaL = MVA_UPSIZE ( (const DWORD *)L );
+	return iMvaL<=m_iMaxValue;
 }
 
 
@@ -749,18 +749,18 @@ static inline ISphFilter * ReportError ( CSphString & sError, const char * sMess
 static ISphFilter * CreateFilter ( ESphAttr eAttrType, ESphFilter eFilterType, int iNumValues, const CSphAttrLocator & tLoc, CSphString & sError )
 {
 	// MVA
-	if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_UINT64SET )
+	if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_INT64SET )
 	{
 		switch ( eFilterType )
 		{
 		case SPH_FILTER_VALUES:
-			if ( eAttrType==SPH_ATTR_UINT64SET )
+			if ( eAttrType==SPH_ATTR_INT64SET )
 				return new Filter_MVAValues<true>();
 			else
 				return new Filter_MVAValues<false>();
 
 		case SPH_FILTER_RANGE:
-			if ( eAttrType==SPH_ATTR_UINT64SET )
+			if ( eAttrType==SPH_ATTR_INT64SET )
 				return new Filter_MVARange<true>();
 			else
 				return new Filter_MVARange<false>();
@@ -829,7 +829,7 @@ ISphFilter * sphCreateFilter ( const CSphFilterSettings & tSettings, const CSphS
 		if ( pAttr )
 			pFilter->SetLocator ( pAttr->m_tLocator );
 
-		pFilter->SetRange ( tSettings.m_uMinValue, tSettings.m_uMaxValue );
+		pFilter->SetRange ( tSettings.m_iMinValue, tSettings.m_iMaxValue );
 		pFilter->SetRangeFloat ( tSettings.m_fMinValue, tSettings.m_fMaxValue );
 		pFilter->SetMVAStorage ( pMvaPool );
 

+ 15 - 15
src/sphinxint.h

@@ -380,10 +380,10 @@ struct MemTracker_c : ISphNoncopyable
 #define DOCINFO_INDEX_FREQ 128 // FIXME? make this configurable
 
 
-inline uint64_t MVA_UPSIZE ( const DWORD * pMva )
+inline int64_t MVA_UPSIZE ( const DWORD * pMva )
 {
-	uint64_t uMva = (uint64_t)pMva[0] | ( ( (uint64_t)pMva[1] )<<32 );
-	return uMva;
+	int64_t iMva = (int64_t)( (uint64_t)pMva[0] | ( ( (uint64_t)pMva[1] )<<32 ) );
+	return iMva;
 }
 
 
@@ -403,10 +403,10 @@ private:
 	CSphVector<float>			m_dFloatMax;
 	CSphVector<float>			m_dFloatIndexMin;
 	CSphVector<float>			m_dFloatIndexMax;
-	CSphVector<uint64_t>		m_dMvaMin;
-	CSphVector<uint64_t>		m_dMvaMax;
-	CSphVector<uint64_t>		m_dMvaIndexMin;
-	CSphVector<uint64_t>		m_dMvaIndexMax;
+	CSphVector<int64_t>			m_dMvaMin;
+	CSphVector<int64_t>			m_dMvaMax;
+	CSphVector<int64_t>			m_dMvaIndexMin;
+	CSphVector<int64_t>			m_dMvaIndexMax;
 	DWORD						m_uStride;		// size of attribute's chunk (in DWORDs)
 	DWORD						m_uElements;	// counts total number of collected min/max pairs
 	int							m_iLoop;		// loop inside one set
@@ -472,7 +472,7 @@ void AttrIndexBuilder_t<DOCID>::ResetLocal()
 	ARRAY_FOREACH ( i, m_dMvaMin )
 	{
 		m_dMvaMin[i] = LLONG_MAX;
-		m_dMvaMax[i] = 0;
+		m_dMvaMax[i] = ( i>=m_iMva64 ? LLONG_MIN : 0 );
 	}
 	m_uStart = m_uLast = 0;
 	m_iLoop = 0;
@@ -573,7 +573,7 @@ AttrIndexBuilder_t<DOCID>::AttrIndexBuilder_t ( const CSphSchema & tSchema )
 	for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tCol = tSchema.GetAttr(i);
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_INT64SET )
 			m_dMvaAttrs.Add ( tCol.m_tLocator );
 	}
 
@@ -612,7 +612,7 @@ void AttrIndexBuilder_t<DOCID>::Prepare ( DWORD * pOutBuffer, DWORD * pOutMax )
 	ARRAY_FOREACH ( i, m_dMvaIndexMin )
 	{
 		m_dMvaIndexMin[i] = LLONG_MAX;
-		m_dMvaIndexMax[i] = 0;
+		m_dMvaIndexMax[i] = ( i>=m_iMva64 ? LLONG_MIN : 0 );
 	}
 	ResetLocal();
 }
@@ -653,9 +653,9 @@ void AttrIndexBuilder_t<DOCID>::CollectRowMVA ( int iAttr, DWORD uCount, const D
 		assert ( ( uCount%2 )==0 );
 		for ( ; uCount>0; uCount-=2, pMva+=2 )
 		{
-			uint64_t uVal = MVA_UPSIZE ( pMva );
-			m_dMvaMin[iAttr] = Min ( m_dMvaMin[iAttr], uVal );
-			m_dMvaMax[iAttr] = Max ( m_dMvaMax[iAttr], uVal );
+			int64_t iVal = MVA_UPSIZE ( pMva );
+			m_dMvaMin[iAttr] = Min ( m_dMvaMin[iAttr], iVal );
+			m_dMvaMax[iAttr] = Max ( m_dMvaMax[iAttr], iVal );
 		}
 	} else
 	{
@@ -989,7 +989,7 @@ inline const char * sphTypeName ( ESphAttr eType )
 		case SPH_ATTR_WORDCOUNT:	return "wordcount";
 		case SPH_ATTR_STRINGPTR:	return "stringptr";
 		case SPH_ATTR_UINT32SET:	return "mva";
-		case SPH_ATTR_UINT64SET:	return "mva64";
+		case SPH_ATTR_INT64SET:		return "mva64";
 		default:					return "unknown";
 	}
 }
@@ -1009,7 +1009,7 @@ inline const char * sphTypeDirective ( ESphAttr eType )
 		case SPH_ATTR_STRINGPTR:	return "sql_attr_string";
 		case SPH_ATTR_WORDCOUNT:	return "sql_attr_wordcount";
 		case SPH_ATTR_UINT32SET:	return "sql_attr_multi";
-		case SPH_ATTR_UINT64SET:	return "sql_attr_multi bigint";
+		case SPH_ATTR_INT64SET:		return "sql_attr_multi bigint";
 		default:					return "???";
 	}
 }

+ 5 - 5
src/sphinxql.y

@@ -272,27 +272,27 @@ where_item:
 		}
 	| expr_ident TOK_BETWEEN const_int TOK_AND const_int
 		{
-			if ( !pParser->AddUintRangeFilter ( $1.m_sValue, $3.m_iValue, $5.m_iValue ) )
+			if ( !pParser->AddIntRangeFilter ( $1.m_sValue, $3.m_iValue, $5.m_iValue ) )
 				YYERROR;
 		}
 	| expr_ident '>' const_int
 		{
-			if ( !pParser->AddUintRangeFilter ( $1.m_sValue, $3.m_iValue+1, UINT_MAX ) )
+			if ( !pParser->AddIntRangeFilter ( $1.m_sValue, $3.m_iValue+1, LLONG_MAX ) )
 				YYERROR;
 		}
 	| expr_ident '<' const_int
 		{
-			if ( !pParser->AddUintRangeFilter ( $1.m_sValue, 0, $3.m_iValue-1 ) )
+			if ( !pParser->AddIntRangeFilter ( $1.m_sValue, LLONG_MIN, $3.m_iValue-1 ) )
 				YYERROR;
 		}
 	| expr_ident TOK_GTE const_int
 		{
-			if ( !pParser->AddUintRangeFilter ( $1.m_sValue, $3.m_iValue, UINT_MAX ) )
+			if ( !pParser->AddIntRangeFilter ( $1.m_sValue, $3.m_iValue, LLONG_MAX ) )
 				YYERROR;
 		}
 	| expr_ident TOK_LTE const_int
 		{
-			if ( !pParser->AddUintRangeFilter ( $1.m_sValue, 0, $3.m_iValue ) )
+			if ( !pParser->AddIntRangeFilter ( $1.m_sValue, LLONG_MIN, $3.m_iValue ) )
 				YYERROR;
 		}
 	| expr_ident '=' const_float

+ 4 - 3
src/sphinxquery.cpp

@@ -597,8 +597,9 @@ int XQParser_t::GetToken ( YYSTYPE * lvalp )
 		while ( p<sEnd && isdigit ( *(BYTE*)p ) ) p++;
 
 		static const int NUMBER_BUF_LEN = 10; // max strlen of int32
-
-		if ( p>sToken && p-sToken<NUMBER_BUF_LEN && ( *p=='\0' || isspace ( *(BYTE*)p ) || IsSpecial(*p) ) )
+		if ( p>sToken && p-sToken<NUMBER_BUF_LEN
+			&& !( *p=='-' && !( p-sToken==1 && sphIsModifier ( p[-1] ) ) ) // !bDashInside copied over from arbitration
+			&& ( *p=='\0' || sphIsSpace(*p) || IsSpecial(*p) ) )
 		{
 			if ( m_pTokenizer->GetToken() && m_pTokenizer->TokenIsBlended() ) // number with blended should be tokenized as usual
 			{
@@ -1204,7 +1205,7 @@ static void xqDump ( XQNode_t * pNode, int iIndent )
 	} else
 	{
 		xqIndent ( iIndent );
-		printf ( "MATCH(%d,%d):", pNode->m_dFieldMask.GetMask32(), pNode->m_iOpArg );
+		printf ( "MATCH(%d,%d):", pNode->m_dSpec.m_dFieldMask.GetMask32(), pNode->m_iOpArg );
 
 		ARRAY_FOREACH ( i, pNode->m_dWords )
 		{

+ 18 - 18
src/sphinxrt.cpp

@@ -1535,7 +1535,7 @@ void RtAccum_t::AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, int iRow
 			{
 				sphSetRowAttr ( pAttrs, tColumn.m_tLocator, 0 );
 			}
-		} else if ( tColumn.m_eAttrType==SPH_ATTR_UINT32SET || tColumn.m_eAttrType==SPH_ATTR_UINT64SET )
+		} else if ( tColumn.m_eAttrType==SPH_ATTR_UINT32SET || tColumn.m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			assert ( m_dMvas.GetLength() );
 			int iCount = dMvas[iMva];
@@ -2184,7 +2184,7 @@ public:
 		: m_tDst ( tDst )
 	{
 		ExtractLocators ( tSchema, SPH_ATTR_UINT32SET, m_dLocators );
-		ExtractLocators ( tSchema, SPH_ATTR_UINT64SET, m_dLocators );
+		ExtractLocators ( tSchema, SPH_ATTR_INT64SET, m_dLocators );
 	}
 	const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
 
@@ -2220,7 +2220,7 @@ public:
 		: m_dDst ( dDst )
 	{
 		ExtractLocators ( tSchema, SPH_ATTR_UINT32SET, m_dLocators );
-		ExtractLocators ( tSchema, SPH_ATTR_UINT64SET, m_dLocators );
+		ExtractLocators ( tSchema, SPH_ATTR_INT64SET, m_dLocators );
 	}
 	const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
 
@@ -4343,7 +4343,7 @@ int RtIndex_t::DebugCheck ( FILE * fp )
 		for ( int iAttr=0; iAttr<m_tSchema.GetAttrsCount(); iAttr++ )
 		{
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(iAttr);
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 			{
 				if ( tAttr.m_tLocator.m_iBitCount!=ROWITEM_BITS )
 				{
@@ -4368,7 +4368,7 @@ int RtIndex_t::DebugCheck ( FILE * fp )
 		for ( int iAttr=0; iAttr<m_tSchema.GetAttrsCount(); iAttr++ )
 		{
 			const CSphColumnInfo & tAttr = m_tSchema.GetAttr(iAttr);
-			if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET )
+			if ( tAttr.m_eAttrType==SPH_ATTR_INT64SET )
 				dMvaItems.Add ( tAttr.m_tLocator.m_iBitOffset/ROWITEM_BITS );
 		}
 
@@ -5024,8 +5024,8 @@ static void AddKillListFilter ( CSphVector<CSphFilterSettings> * pExtra, const S
 	CSphFilterSettings & tFilter = pExtra->Add();
 	tFilter.m_bExclude = true;
 	tFilter.m_eType = SPH_FILTER_VALUES;
-	tFilter.m_uMinValue = pKillList[0];
-	tFilter.m_uMaxValue = pKillList[nEntries-1];
+	tFilter.m_iMinValue = pKillList[0];
+	tFilter.m_iMaxValue = pKillList[nEntries-1];
 	tFilter.m_sAttrName = "@id";
 	tFilter.SetExternalValues ( pKillList, nEntries );
 }
@@ -5513,7 +5513,7 @@ bool RtIndex_t::MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult
 
 			dStringGetLoc.Add ( m_tSchema.GetAttr ( iInLocator ).m_tLocator );
 			dStringSetLoc.Add ( tSetInfo.m_tLocator );
-		} else if ( tSetInfo.m_eAttrType==SPH_ATTR_UINT32SET || tSetInfo.m_eAttrType==SPH_ATTR_UINT64SET )
+		} else if ( tSetInfo.m_eAttrType==SPH_ATTR_UINT32SET || tSetInfo.m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			const int iInLocator = m_tSchema.GetAttrIndex ( tSetInfo.m_sName.cstr() );
 			assert ( iInLocator>=0 );
@@ -5828,34 +5828,34 @@ int RtIndex_t::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphS
 		// forbid updates on non-int columns
 		const CSphColumnInfo & tCol = m_tSchema.GetAttr(iIndex);
 		if ( !( tCol.m_eAttrType==SPH_ATTR_BOOL || tCol.m_eAttrType==SPH_ATTR_INTEGER || tCol.m_eAttrType==SPH_ATTR_TIMESTAMP
-			|| tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET
+			|| tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET
 			|| tCol.m_eAttrType==SPH_ATTR_BIGINT || tCol.m_eAttrType==SPH_ATTR_FLOAT ))
 		{
 			sError.SetSprintf ( "attribute '%s' can not be updated (must be boolean, integer, bigint, float or timestamp or MVA)", tUpd.m_dAttrs[i].m_sName.cstr() );
 			return -1;
 		}
 
-		bool bSrcMva = ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET );
-		bool bDstMva = ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET );
+		bool bSrcMva = ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET );
+		bool bDstMva = ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET );
 		if ( bSrcMva!=bDstMva )
 		{
 			sError.SetSprintf ( "attribute '%s' MVA flag mismatch", tUpd.m_dAttrs[i].m_sName.cstr() );
 			return -1;
 		}
 
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET && tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET && tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_INT64SET )
 		{
 			sError.SetSprintf ( "attribute '%s' MVA bits (dst=%d, src=%d) mismatch", tUpd.m_dAttrs[i].m_sName.cstr(),
 				tCol.m_eAttrType, tUpd.m_dAttrs[i].m_eAttrType );
 			return -1;
 		}
 
-		if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
+		if ( tCol.m_eAttrType==SPH_ATTR_INT64SET )
 			uDst64 |= ( U64C(1)<<i );
 
 		dFloats.Add ( tCol.m_eAttrType==SPH_ATTR_FLOAT );
 		dLocators.Add ( tCol.m_tLocator );
-		bHasMva |= ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET );
+		bHasMva |= ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET );
 
 		// find dupes to optimize
 		ARRAY_FOREACH ( i, dIndexes )
@@ -5922,7 +5922,7 @@ int RtIndex_t::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphS
 			int iPos = tUpd.m_dRowOffset[iUpd];
 			ARRAY_FOREACH ( iCol, tUpd.m_dAttrs )
 			{
-				if ( !( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT64SET ) )
+				if ( !( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_INT64SET ) )
 				{
 					if ( dIndexes[iCol]>=0 )
 					{
@@ -6225,8 +6225,8 @@ void RtIndex_t::Optimize ( volatile bool * pForceTerminate, ThrottleState_t * pT
 			CSphFilterSettings tFilterSettings;
 			tFilterSettings.m_bExclude = true;
 			tFilterSettings.m_eType = SPH_FILTER_VALUES;
-			tFilterSettings.m_uMinValue = dKlist[0];
-			tFilterSettings.m_uMaxValue = dKlist.Last();
+			tFilterSettings.m_iMinValue = dKlist[0];
+			tFilterSettings.m_iMaxValue = dKlist.Last();
 			tFilterSettings.m_sAttrName = "@id";
 			tFilterSettings.SetExternalValues ( dKlist.Begin(), dKlist.GetLength() );
 			pFilter = sphCreateFilter ( tFilterSettings, tSchema, NULL, sError );
@@ -7559,7 +7559,7 @@ bool sphRTSchemaConfigure ( const CSphConfigSection & hIndex, CSphSchema * pSche
 	// attrs
 	const int iNumTypes = 7;
 	const char * sTypes[iNumTypes] = { "rt_attr_uint", "rt_attr_bigint", "rt_attr_float", "rt_attr_timestamp", "rt_attr_string", "rt_attr_multi", "rt_attr_multi_64" };
-	const ESphAttr iTypes[iNumTypes] = { SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_FLOAT, SPH_ATTR_TIMESTAMP, SPH_ATTR_STRING, SPH_ATTR_UINT32SET, SPH_ATTR_UINT64SET };
+	const ESphAttr iTypes[iNumTypes] = { SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_FLOAT, SPH_ATTR_TIMESTAMP, SPH_ATTR_STRING, SPH_ATTR_UINT32SET, SPH_ATTR_INT64SET };
 
 	for ( int iType=0; iType<iNumTypes; iType++ )
 	{

+ 8 - 2
src/sphinxsearch.cpp

@@ -6547,6 +6547,9 @@ ISphRanker * sphCreateRanker ( const XQQuery_t & tXQ, const CSphQuery * pQuery,
 
 void CSphHitMarker::Mark ( CSphVector<SphHitMark_t> & dMarked )
 {
+	if ( !m_pRoot )
+		return;
+
 	const ExtHit_t * pHits = NULL;
 	const ExtDoc_t * pDocs = NULL;
 
@@ -6581,8 +6584,11 @@ CSphHitMarker::~CSphHitMarker ()
 
 CSphHitMarker * CSphHitMarker::Create ( const XQNode_t * pRoot, const ISphQwordSetup & tSetup )
 {
-	ExtNode_i * pNode = ExtNode_i::Create ( pRoot, tSetup );
-	if ( pNode )
+	ExtNode_i * pNode = NULL;
+	if ( pRoot )
+		pNode = ExtNode_i::Create ( pRoot, tSetup );
+
+	if ( !pRoot || pNode )
 	{
 		CSphHitMarker * pMarker = new CSphHitMarker;
 		pMarker->m_pRoot = pNode;

+ 30 - 17
src/sphinxsort.cpp

@@ -1027,7 +1027,7 @@ protected:
 	CSphAttrLocator	m_tLoc;
 
 public:
-	AggrConcat_t ( const CSphColumnInfo & tCol )
+	explicit AggrConcat_t ( const CSphColumnInfo & tCol )
 		: m_tLoc ( tCol.m_tLocator )
 	{}
 
@@ -1560,8 +1560,8 @@ public:
 			assert ( ( iValues%2 )==0 );
 			for ( ;iValues>0; iValues-=2, pValues+=2 )
 			{
-				uint64_t uMva = MVA_UPSIZE ( pValues );
-				SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( uMva );
+				int64_t iMva = MVA_UPSIZE ( pValues );
+				SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( iMva );
 				bRes |= this->PushEx ( tEntry, uGroupkey, false );
 			}
 
@@ -2317,8 +2317,8 @@ static bool SetupGroupbySettings ( const CSphQuery * pQuery, const CSphSchema &
 			return false;
 	}
 
-	tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_UINT64SET );
-	tSettings.m_bMva64 = ( eType==SPH_ATTR_UINT64SET );
+	tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET );
+	tSettings.m_bMva64 = ( eType==SPH_ATTR_INT64SET );
 
 	// setup distinct attr
 	if ( !pQuery->m_sGroupDistinct.IsEmpty() )
@@ -3029,7 +3029,6 @@ ISphMatchSorter * sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema &
 		}
 
 		// a new and shiny expression, lets parse
-		bool bUsesWeight;
 		CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
 
 		// tricky bit
@@ -3042,10 +3041,10 @@ ISphMatchSorter * sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema &
 		{
 			CSphString sExpr2;
 			sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
-			tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &bUsesWeight, sError, pExtra, NULL, &bHasZonespanlist );
+			tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &tExprCol.m_bWeight, sError, pExtra, NULL, &bHasZonespanlist );
 		} else
 		{
-			tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &bUsesWeight, sError, pExtra, NULL, &bHasZonespanlist );
+			tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType, &tExprCol.m_bWeight, sError, pExtra, NULL, &bHasZonespanlist );
 		}
 		bNeedZonespanlist |= bHasZonespanlist;
 		tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
@@ -3068,7 +3067,7 @@ ISphMatchSorter * sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema &
 			tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
 			tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
 		}
-		
+
 		// postpone aggregates, add non-aggregates
 		if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
 		{
@@ -3083,26 +3082,40 @@ ISphMatchSorter * sphCreateQueue ( const CSphQuery * pQuery, const CSphSchema &
 			ARRAY_FOREACH ( i, pQuery->m_dFilters )
 				if ( pQuery->m_dFilters[i].m_sAttrName==tExprCol.m_sName )
 			{
-				if ( bUsesWeight )
+				if ( tExprCol.m_bWeight )
 				{
-					tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter
+					tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
 					break;
 				}
 
-				// usual filter
-				tExprCol.m_eStage = SPH_EVAL_PREFILTER;
-
 				// so we are about to add a filter condition
 				// but it might depend on some preceding columns
-				// lets detect those and move them to prefilter phase too
+				// lets detect those and move them to prefilter \ presort phase too
 				CSphVector<int> dCur;
 				tExprCol.m_pExpr->GetDependencyColumns ( dCur );
 
+				// usual filter
+				tExprCol.m_eStage = SPH_EVAL_PREFILTER;
+				ARRAY_FOREACH ( i, dCur )
+				{
+					const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[i] );
+					if ( tCol.m_bWeight )
+					{
+						tExprCol.m_eStage = SPH_EVAL_PRESORT;
+						tExprCol.m_bWeight = true;
+					}
+					if ( tCol.m_pExpr.Ptr() )
+					{
+						tCol.m_pExpr->GetDependencyColumns ( dCur );
+					}
+				}
+				dCur.Uniq();
+
 				ARRAY_FOREACH ( i, dCur )
 				{
 					CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[i] ) );
-					if ( tDep.m_eStage>SPH_EVAL_PREFILTER )
-						tDep.m_eStage = SPH_EVAL_PREFILTER;
+					if ( tDep.m_eStage>tExprCol.m_eStage )
+						tDep.m_eStage = tExprCol.m_eStage;
 				}
 				break;
 			}

+ 9 - 4
src/sphinxutils.cpp

@@ -945,8 +945,7 @@ bool CSphConfigParser::Parse ( const char * sFileName, const char * pBuffer )
 
 bool sphConfTokenizer ( const CSphConfigSection & hIndex, CSphTokenizerSettings & tSettings, CSphString & sError )
 {
-	// charset_type
-	CSphScopedPtr<ISphTokenizer> pTokenizer ( NULL );
+	tSettings.m_iNgramLen = Max ( hIndex.GetInt ( "ngram_len" ), 0 );
 
 	if ( !hIndex("charset_type") || hIndex["charset_type"]=="sbcs" )
 	{
@@ -954,7 +953,14 @@ bool sphConfTokenizer ( const CSphConfigSection & hIndex, CSphTokenizerSettings
 
 	} else if ( hIndex["charset_type"]=="utf-8" )
 	{
-		tSettings.m_iType = hIndex("ngram_chars") ? TOKENIZER_NGRAM : TOKENIZER_UTF8;
+		tSettings.m_iType = TOKENIZER_UTF8;
+		if ( hIndex ( "ngram_chars" ) )
+		{
+			if ( tSettings.m_iNgramLen )
+				tSettings.m_iType = TOKENIZER_NGRAM;
+			else
+				sphWarning ( "ngram_chars specified, but ngram_len=0; IGNORED" );
+		}
 
 	} else
 	{
@@ -965,7 +971,6 @@ bool sphConfTokenizer ( const CSphConfigSection & hIndex, CSphTokenizerSettings
 	tSettings.m_sCaseFolding = hIndex.GetStr ( "charset_table" );
 	tSettings.m_iMinWordLen = Max ( hIndex.GetInt ( "min_word_len" ), 0 );
 	tSettings.m_sNgramChars = hIndex.GetStr ( "ngram_chars" );
-	tSettings.m_iNgramLen = Max ( hIndex.GetInt ( "ngram_len" ), 0 );
 	tSettings.m_sSynonymsFile = hIndex.GetStr ( "exceptions" ); // new option name
 	if ( tSettings.m_sSynonymsFile.IsEmpty() )
 		tSettings.m_sSynonymsFile = hIndex.GetStr ( "synonyms" ); // deprecated option name

+ 7 - 0
src/sphinxutils.h

@@ -38,6 +38,13 @@ inline bool sphIsSpace ( int iCode )
 }
 
 
+/// check for keyword modifiers
+inline bool sphIsModifier ( int iSymbol )
+{
+	return iSymbol=='^' || iSymbol=='$' || iSymbol=='=' || iSymbol=='*';
+}
+
+
 /// string splitter, extracts sequences of alphas (as in sphIsAlpha)
 void sphSplit ( CSphVector<CSphString> & dOut, const char * sIn );
 

+ 5 - 5
src/yysphinxql.c

@@ -1862,7 +1862,7 @@ yyreduce:
   case 60:
 
     {
-			if ( !pParser->AddUintRangeFilter ( yyvsp[-4].m_sValue, yyvsp[-2].m_iValue, yyvsp[0].m_iValue ) )
+			if ( !pParser->AddIntRangeFilter ( yyvsp[-4].m_sValue, yyvsp[-2].m_iValue, yyvsp[0].m_iValue ) )
 				YYERROR;
 		;}
     break;
@@ -1870,7 +1870,7 @@ yyreduce:
   case 61:
 
     {
-			if ( !pParser->AddUintRangeFilter ( yyvsp[-2].m_sValue, yyvsp[0].m_iValue+1, UINT_MAX ) )
+			if ( !pParser->AddIntRangeFilter ( yyvsp[-2].m_sValue, yyvsp[0].m_iValue+1, LLONG_MAX ) )
 				YYERROR;
 		;}
     break;
@@ -1878,7 +1878,7 @@ yyreduce:
   case 62:
 
     {
-			if ( !pParser->AddUintRangeFilter ( yyvsp[-2].m_sValue, 0, yyvsp[0].m_iValue-1 ) )
+			if ( !pParser->AddIntRangeFilter ( yyvsp[-2].m_sValue, LLONG_MIN, yyvsp[0].m_iValue-1 ) )
 				YYERROR;
 		;}
     break;
@@ -1886,7 +1886,7 @@ yyreduce:
   case 63:
 
     {
-			if ( !pParser->AddUintRangeFilter ( yyvsp[-2].m_sValue, yyvsp[0].m_iValue, UINT_MAX ) )
+			if ( !pParser->AddIntRangeFilter ( yyvsp[-2].m_sValue, yyvsp[0].m_iValue, LLONG_MAX ) )
 				YYERROR;
 		;}
     break;
@@ -1894,7 +1894,7 @@ yyreduce:
   case 64:
 
     {
-			if ( !pParser->AddUintRangeFilter ( yyvsp[-2].m_sValue, 0, yyvsp[0].m_iValue ) )
+			if ( !pParser->AddIntRangeFilter ( yyvsp[-2].m_sValue, LLONG_MIN, yyvsp[0].m_iValue ) )
 				YYERROR;
 		;}
     break;

File diff suppressed because it is too large
+ 0 - 0
test/test_019/model.bin


+ 6 - 3
test/test_019/test.xml

@@ -64,8 +64,11 @@ index test
 <query mode="extended2">("")</query>
 <query mode="extended2">"phrase (!query)/ ~on @steroids"</query>
 <query mode="extended2">1234567812345678</query>
-<!-- regression of query parser memory corraption -->
-<query mode="extended2" expect_error="1">1-word@#1215</query>
+<!-- regression of query parser memory corruption -->
+<query mode="extended2" expect_error="1">1 -word@#1215</query>
+<query mode="extended2">1-word@#1215</query>
+<query mode="extended2">canon 16 35</query>
+<query mode="extended2">canon 16-35</query>
 </queries>
 
 <db_create>
@@ -88,7 +91,7 @@ INSERT INTO `test_table` VALUES
 ( 222, '', 'phrase query on steroids' ),
 ( 333, 'sample program', 'this is a test program that prints out "hello world" to the console' ),
 ( 444, '', 'china 吐我' ),
-( 555, 'sample program two', 'something written in basic' ),
+( 555, 'sample program two', 'something written in basic | canon ef 16-35 lens' ),
 ( 666, 'sample program three', 'something written in perl' ),
 ( 777, '', '77 lies multiplied by 77' ),
 ( 888, '', 'agent 0077' ),

File diff suppressed because it is too large
+ 0 - 0
test/test_062/model.bin


+ 30 - 0
test/test_062/test.xml

@@ -92,6 +92,36 @@ foreach ( $queries as $query )
 	$results [] = $reply;
 }
 
+// regressions fast-path query mode starred vs regular term matches
+
+$query = ' "*mmitt* u" | ommitt* | "committed u" ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 1 ) );
+
+$query = ' *ommitt* | "committed u" ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 1 ) );
+
+$query = ' *ommitt* committed u ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 0 ) );
+
+$query = ' committed* | "committed p" ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 1 ) );
+
+$query = ' committed* committed p ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 0 ) );
+
+$query = ' (support ("committed*")) ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 1 ) );
+
+$query = ' (support ("code*" | "code test")) ';
+$results [] = $query;
+$results [] = $client->BuildExcerpts ( array ( 'support is just committed to Sphinx code base' ), 'test', $query, array ( 'query_mode' => 1, 'limit' => 25 ) );
+
 ]]></custom_test>
 
 </test>

File diff suppressed because it is too large
+ 0 - 0
test/test_098/model.bin


+ 4 - 0
test/test_098/test.xml

@@ -81,6 +81,10 @@ index wf3
 <query index='main' mode="extended2">foo(bar)</query>
 <query index='main' mode="extended2">foo\(bar\)</query>
 <query index='main' mode="extended2">"foo(bar)"</query>
+<query index='main' mode="extended2">(bars all$)</query>
+<query index='wf1' mode="extended2">without trouble$</query>
+<query index='wf1' mode="extended2">trouble without$</query>
+<query index='wf1' mode="extended2">(trouble without$)</query>
 <!-- here is going regression (query 8-)  when second index shares wordform from 1st index -->
 <query index='wf1' mode="extended2">run</query>
 <query index='wf2' mode="extended2">run</query>

File diff suppressed because it is too large
+ 0 - 0
test/test_100/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_119/model.bin


+ 123 - 121
test/test_119/test.xml

@@ -1,121 +1,123 @@
-<?xml version="1.0" encoding="utf-8"?>
-<test>
-<name>select expressions vs eval stages</name>
-
-<requires> <variant_match /> </requires>
-
-<config>
-indexer
-{
-	mem_limit			= 16M
-}
-
-searchd
-{
-<searchd_settings/>
-	workers			= threads
-	binlog_path		=
-}
-
-source srctest
-{
-	type			= mysql
-	<sql_settings/>
-	sql_query		= SELECT * FROM test_table WHERE id IN (1,2, 3, 4 )
-	sql_attr_uint	= ival
-	sql_attr_float	= fval
-}
-
-index test
-{
-	source			= srctest
-	path			= <data_path/>/test
-}
-
-source src_mva
-{
-	type			= mysql
-	<sql_settings/>
-	sql_query		= SELECT id, 1 as idd, title as mva, 'test' FROM test_table WHERE id=10
-	sql_attr_uint	= idd
-<Dynamic>	
-	<Variant>sql_attr_multi = uint mva from field</Variant>
-	<Variant>sql_attr_multi = bigint mva from field</Variant>
-</Dynamic>	
-}
-
-index mva
-{
-	source			= src_mva
-	path			= <data_path/>/mva
-}
-
-source src_final
-{
-	type			= mysql
-	<sql_settings/>
-	sql_query		= SELECT id, ival as idd1, id*1000 as idd2, 'test' FROM test_table
-	sql_attr_uint	= idd1
-	sql_attr_uint	= idd2
-}
-
-index final
-{
-	source			= src_final
-	path			= <data_path/>/final
-	docinfo = extern
-}
-
-</config>
-
-<db_create>
-CREATE TABLE test_table
-(
-	id INTEGER NOT NULL,
-	ival INTEGER NOT NULL,
-	fval INTEGER NOT NULL,
-	title VARCHAR(255) NOT NULL
-)
-</db_create>
-
-<db_drop>
-DROP TABLE IF EXISTS `test_table`
-</db_drop>
-
-<db_insert>
-INSERT INTO `test_table` VALUES
-( 1, 11, 10, 'test one' ),
-( 2, 11, 20, 'test two' ),
-( 3, 11, -30, 'test three' ),
-( 4, 22, -40, 'test four' ),
-
-( 10, 1, 1, '10,11,12,13' ),
-
-( 20, 200, 1, '1' ), ( 21, 201, 1, '1' ), ( 22, 202, 1, '1' ), ( 23, 203, 1, '1' ), ( 24, 204, 1, '1' ), ( 25, 205, 1, '1' ),
-( 30, 300, 1, '1' ), ( 31, 301, 1, '1' ), ( 32, 302, 1, '1' ), ( 33, 303, 1, '1' ), ( 34, 304, 1, '1' ), ( 35, 305, 1, '1' )
-</db_insert>
-
-<sphqueries>
-<sphinxql>select * from test where match('test') order by fval asc</sphinxql>
-<sphinxql>select *, fval+1 as f1 from test where match('test') order by f1 desc</sphinxql>
-<sphinxql>select * from test group by fval</sphinxql>
-<sphinxql>select *, ival-1 as i1 from test group by i1</sphinxql>
-<sphinxql>select *, @weight+ival as i1 from test where match('test')</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2 from test where match('test')</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2 from test</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*10 as i3 from test</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*10-i1 as i3 from test</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test order by i3 asc</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test order by i1 asc, i3 desc</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test group by i2</sphinxql>
-<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*@id as i3 from test group by i3</sphinxql>
-
-<!-- here is going another regression from bug #800-->
-<sphinxql>select *, IN ( mva, 11 ) as cnd1 from mva where cnd1=1</sphinxql>
-<!-- regression sorter gives first matches instead bests -->
-<sphinxql>select id, idd1, idd2, idd1 + idd2 as i2 from final group by idd1 order by id desc limit 0,2 option max_matches=4</sphinxql>
-<sphinxql>select id, idd1, idd2, idd1 + idd2 as i2 from final group by idd1 order by id desc limit 2,2 option max_matches=4</sphinxql>
-</sphqueries>
-
-</test>
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+<name>select expressions vs eval stages</name>
+
+<requires> <variant_match /> </requires>
+
+<config>
+indexer
+{
+	mem_limit			= 16M
+}
+
+searchd
+{
+<searchd_settings/>
+	workers			= threads
+	binlog_path		=
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+	sql_query		= SELECT * FROM test_table WHERE id IN (1,2, 3, 4 )
+	sql_attr_uint	= ival
+	sql_attr_float	= fval
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+}
+
+source src_mva
+{
+	type			= mysql
+	<sql_settings/>
+	sql_query		= SELECT id, 1 as idd, title as mva, 'test' FROM test_table WHERE id=10
+	sql_attr_uint	= idd
+<Dynamic>	
+	<Variant>sql_attr_multi = uint mva from field</Variant>
+	<Variant>sql_attr_multi = bigint mva from field</Variant>
+</Dynamic>	
+}
+
+index mva
+{
+	source			= src_mva
+	path			= <data_path/>/mva
+}
+
+source src_final
+{
+	type			= mysql
+	<sql_settings/>
+	sql_query		= SELECT id, ival as idd1, id*1000 as idd2, 'test' FROM test_table
+	sql_attr_uint	= idd1
+	sql_attr_uint	= idd2
+}
+
+index final
+{
+	source			= src_final
+	path			= <data_path/>/final
+	docinfo = extern
+}
+
+</config>
+
+<db_create>
+CREATE TABLE test_table
+(
+	id INTEGER NOT NULL,
+	ival INTEGER NOT NULL,
+	fval INTEGER NOT NULL,
+	title VARCHAR(255) NOT NULL
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1, 11, 10, 'test one' ),
+( 2, 11, 20, 'test two' ),
+( 3, 11, -30, 'test three' ),
+( 4, 22, -40, 'test four' ),
+
+( 10, 1, 1, '10,11,12,13' ),
+
+( 20, 200, 1, '1' ), ( 21, 201, 1, '1' ), ( 22, 202, 1, '1' ), ( 23, 203, 1, '1' ), ( 24, 204, 1, '1' ), ( 25, 205, 1, '1' ),
+( 30, 300, 1, '1' ), ( 31, 301, 1, '1' ), ( 32, 302, 1, '1' ), ( 33, 303, 1, '1' ), ( 34, 304, 1, '1' ), ( 35, 305, 1, '1' )
+</db_insert>
+
+<sphqueries>
+<sphinxql>select * from test where match('test') order by fval asc</sphinxql>
+<sphinxql>select *, fval+1 as f1 from test where match('test') order by f1 desc</sphinxql>
+<sphinxql>select * from test group by fval</sphinxql>
+<sphinxql>select *, ival-1 as i1 from test group by i1</sphinxql>
+<sphinxql>select *, @weight+ival as i1 from test where match('test')</sphinxql>
+<sphinxql>select *, @weight+ival as i1 from test where match('test') and i1>1315</sphinxql>
+<sphinxql>select *, weight() as w, w+ival as i1, i1+10 as i2 from test where match('test') and i2>1325</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2 from test where match('test')</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2 from test</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*10 as i3 from test</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*10-i1 as i3 from test</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test order by i3 asc</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test order by i1 asc, i3 desc</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*fval as i3 from test group by i2</sphinxql>
+<sphinxql>select *, 10+ival as i1, 50+i1 as i2, i2*@id as i3 from test group by i3</sphinxql>
+
+<!-- here is going another regression from bug #800-->
+<sphinxql>select *, IN ( mva, 11 ) as cnd1 from mva where cnd1=1</sphinxql>
+<!-- regression sorter gives first matches instead bests -->
+<sphinxql>select id, idd1, idd2, idd1 + idd2 as i2 from final group by idd1 order by id desc limit 0,2 option max_matches=4</sphinxql>
+<sphinxql>select id, idd1, idd2, idd1 + idd2 as i2 from final group by idd1 order by id desc limit 2,2 option max_matches=4</sphinxql>
+</sphqueries>
+
+</test>

+ 9 - 1
test/test_134/model.bin

@@ -44,7 +44,15 @@ cool <b>It</b> <b>is</b> cooler
 <--->
 <b>It</b> <b>is</b> another place! 
 <--->
-";}i:5;a:1:{i:0;s:223:"The institutional investment manager <b>it</b>. <b>Is</b> Filing this report and. <b>It</b> <b>is</b> signed hereby represent. That <b>it</b> <b>is</b> all information. Statements are considered integral parts of this form.";}i:6;a:1:{i:0;s:156:" The institutional investment manager it. Is Filing this report and. It is signed hereby represent. That it is all information. are It or is cool It 
+";}i:5;a:1:{i:0;s:193:"The institutional investment manager <b>it</b>.
+<--->
+<b>Is</b> Filing this report and.
+<--->
+<b>It</b> <b>is</b> signed hereby represent.
+<--->
+That <b>it</b> <b>is</b> all information.
+<--->
+";}i:6;a:1:{i:0;s:156:" The institutional investment manager it. Is Filing this report and. It is signed hereby represent. That it is all information. are It or is cool It 
 <--->
 ";}i:7;a:1:{i:0;s:113:"
 <--->

File diff suppressed because it is too large
+ 0 - 0
test/test_140/model.bin


+ 53 - 0
test/test_140/test.xml

@@ -45,6 +45,23 @@ index main
 	path			= <data_path/>/main140
     charset_type 	= utf-8
 }
+
+source src_mva_64
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM mva_64
+	sql_attr_uint	= idd
+	sql_attr_multi = bigint mva1 from field
+}
+
+index mva_64
+{
+	source			= src_mva_64
+	path			= <data_path/>/mva_64
+	charset_type 	= utf-8
+}
 </config>
 
 <sphqueries>
@@ -52,8 +69,24 @@ index main
 <sphinxql>select * from main where match('main') order by idd asc</sphinxql>
 <sphinxql>select * from main where match('delta') order by idd asc</sphinxql>
 <sphinxql>select * from main where match('main | delta') order by idd asc</sphinxql>
+
+<sphinxql>select * from mva_64</sphinxql>
+<sphinxql>select * from mva_64 where mva1=-2099511627775 </sphinxql>
+<sphinxql>select * from mva_64 where mva1=55599511627775 </sphinxql>
+<sphinxql><![CDATA[ select * from mva_64 where mva1<-10 ]]></sphinxql>
+<sphinxql><![CDATA[ select * from mva_64 where mva1<-1099511627775 ]]></sphinxql>
+<sphinxql><![CDATA[ select * from mva_64 where mva1>50099511627775 ]]></sphinxql>
 </sphqueries>
 
+<queries>
+<query index="mva_64"></query>
+<query filter_value="-2099511627775" filter="mva1" index="mva_64"></query>
+<query filter_value="55599511627775" filter="mva1" index="mva_64"></query>
+<query filter_range="-3000000000000 -10" filter="mva1" index="mva_64"></query>
+<query filter_range="-3000000000000 -1099511627776" filter="mva1" index="mva_64"></query>
+<query filter_range="50099511627776 600000000000000" filter="mva1" index="mva_64"></query>
+</queries>
+
 <db_create>
 CREATE TABLE `test_table`
 (
@@ -87,4 +120,24 @@ INSERT INTO `test_table` VALUES
 (11, 11, 0, '',			'',		'delta', 'b', '' )
 </db_insert>
 
+<db_create>
+CREATE TABLE `mva_64`
+(
+	`id` int(11) NOT NULL default '0',
+	`idd` int(11) NOT NULL default '0',
+	`mva1` varchar(255) NOT NULL default '',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>DROP TABLE IF EXISTS `mva_64`</db_drop>
+
+<db_insert>
+INSERT INTO `mva_64` VALUES
+( 100,	1,	'1099511627775		50099511627775	-1099511627775		2099511627775',	'dummy' ),
+( 200,	1,	'50099511627775	11099511627775	501099511627775	-2099511627775',	'dummy' ),
+( 300,	2,	'-1099511627775	50099511627775	50099511627772	-2099511627771',	'dummy' ),
+( 400,	2,	'-1099511627775	50099511627771	-1099511627775		55599511627775',	'dummy' )
+</db_insert>
+
 </test>

File diff suppressed because it is too large
+ 0 - 0
test/test_160/model.bin


+ 37 - 0
test/test_160/test.xml

@@ -133,6 +133,43 @@ $results[] = $client->BuildExcerpts(array('tokenizer filter crash at lc'), 'mult
 $results[] = $client->BuildExcerpts(array('dog dummy! as the house nearby the dog'), 'test', 'the. dog!? as',
 	array ('query_mode'=>1, 'html_strip_mode'=>'retain', 'limit'=>0) );
 	
+// regression SPZ vs passage_boundary
+
+$opts = array ( 'query_mode'=>1, 'allow_empty'=>1 );
+
+$opts['limit'] = 0;
+$res =  $client->BuildExcerpts($docs, 'test', 'ZONE:zoneB these',  $opts);
+$res['ZoneB'] = 'fast-path';
+$results[] = $res;
+
+$opts['limit'] = 30;
+$res =  $client->BuildExcerpts($docs, 'test', 'ZONE:zoneB these',  $opts);
+$res['ZoneB'] = 'old-path';
+$results[] = $res;
+
+$opts['limit'] = 0;
+$res =  $client->BuildExcerpts($docs, 'test', 'and PARAGRAPH words',  $opts);
+$res['PARAGRAPH'] = 'fast-path';
+$results[] = $res;
+
+$opts['limit'] = 30;
+$res =  $client->BuildExcerpts($docs, 'test', 'and PARAGRAPH words',  $opts);
+$res['PARAGRAPH'] = 'old-path';
+$results[] = $res;
+
+// regression fast-path vs passage_boundary
+$opts['limit'] = 0;
+$opts['passage_boundary'] = 'paragraph';
+$res =  $client->BuildExcerpts($docs, 'test', 'and words',  $opts);
+$res['passage_boundary'] = 'paragraph';
+$results[] = $res;
+
+$opts['passage_boundary'] = 'zone';
+$res =  $client->BuildExcerpts($docs, 'test', 'and words',  $opts);
+$res['passage_boundary'] = 'zone';
+$results[] = $res;
+
+	
 // regression head SPZ overgrow + non fast path SPZ
 
 $docs = array ( 'Ultra long stuff is going here then store sales, which were going before of store closes. Same store sales for the quarter increased as ultra long dust was here since univerce was born. ' );

File diff suppressed because it is too large
+ 0 - 0
test/test_171/model.bin


+ 1 - 0
test/test_191/model.bin

@@ -0,0 +1 @@
+a:1:{i:0;a:4:{i:0;a:3:{s:8:"sphinxql";s:40:"select * from main where match('planet')";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:10:{s:2:"id";s:3:"100";s:6:"weight";s:4:"1695";s:6:"status";s:1:"1";s:8:"category";s:1:"0";s:4:"kind";s:2:"13";s:4:"date";s:9:"994204800";s:5:"class";s:2:"14";s:5:"price";s:2:"15";s:4:"code";s:2:"16";s:7:"surface";s:2:"17";}}}i:1;a:3:{s:8:"sphinxql";s:41:"select * from main where match('unhappy')";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:10:{s:2:"id";s:3:"101";s:6:"weight";s:4:"1695";s:6:"status";s:1:"2";s:8:"category";s:1:"1";s:4:"kind";s:2:"23";s:4:"date";s:10:"1025740800";s:5:"class";s:2:"24";s:5:"price";s:2:"25";s:4:"code";s:2:"26";s:7:"surface";s:2:"27";}}}i:2;a:3:{s:8:"sphinxql";s:43:"select * from main where match('solutions')";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:10:{s:2:"id";s:3:"102";s:6:"weight";s:4:"1695";s:6:"status";s:1:"3";s:8:"category";s:1:"2";s:4:"kind";s:2:"33";s:4:"date";s:10:"1057276800";s:5:"class";s:2:"34";s:5:"price";s:2:"35";s:4:"code";s:2:"36";s:7:"surface";s:2:"37";}}}i:3;a:3:{s:8:"sphinxql";s:39:"select * from main where match('green')";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:10:{s:2:"id";s:3:"103";s:6:"weight";s:4:"1695";s:6:"status";s:1:"4";s:8:"category";s:1:"0";s:4:"kind";s:2:"43";s:4:"date";s:10:"1088899200";s:5:"class";s:2:"44";s:5:"price";s:2:"45";s:4:"code";s:2:"46";s:7:"surface";s:2:"47";}}}}}

+ 100 - 0
test/test_191/test.xml

@@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>bitfields vs inline docinfo vs merge</name>
+
+<config>
+indexer
+{
+	mem_limit			= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+	workers = threads
+}
+
+source test1
+{
+	type				= mysql
+	<sql_settings/>
+	sql_query_pre 		= set time_zone='+0:00'
+	sql_query			= select id, content, status, category, kind, UNIX_TIMESTAMP(timestamp) as date, class, price, code, surface from test_table WHERE id=100 or id=101
+	sql_attr_uint		= status:3
+	sql_attr_uint		= category:2
+	sql_attr_uint		= kind:8
+	sql_attr_timestamp	= date
+	sql_attr_uint		= class:6
+	sql_attr_uint		= price:32
+	sql_attr_uint		= code:8
+	sql_attr_uint		= surface:32
+}
+
+source test2
+{
+	type				= mysql
+	<sql_settings/>
+	sql_query_pre 		= set time_zone='+0:00'
+	sql_query			= select id, content, status, category, kind, UNIX_TIMESTAMP(timestamp) as date, class, price, code, surface from test_table WHERE id=102 or id=103
+	sql_attr_uint		= status:3
+	sql_attr_uint		= category:2
+	sql_attr_uint		= kind:8
+	sql_attr_timestamp	= date
+	sql_attr_uint		= class:6
+	sql_attr_uint		= price:32
+	sql_attr_uint		= code:8
+	sql_attr_uint		= surface:32
+}
+
+index main
+{
+	source				= test1
+	path				= <data_path/>/test1
+	docinfo				= inline
+}
+
+index delta
+{
+	source				= test2
+	path				= <data_path/>/test2
+	docinfo				= inline
+}
+</config>
+
+<indexer>
+<run>--merge main delta</run>
+</indexer>
+
+<db_create>
+CREATE TABLE test_table
+(
+	id INTEGER PRIMARY KEY NOT NULL,
+	content VARCHAR(255) NOT NULL,
+	status INTEGER NOT NULL,
+	category INTEGER NOT NULL,
+	kind INTEGER NOT NULL,
+	timestamp DATE NOT NULL,
+	class INTEGER NOT NULL,
+	price INTEGER NOT NULL,
+	code INTEGER NOT NULL,
+	surface INTEGER NOT NULL
+);
+</db_create>
+<db_drop>DROP TABLE IF EXISTS test_table;</db_drop>
+<db_insert>
+INSERT INTO test_table VALUES
+( 100, 'This planet has or rather had a problem', 1, 0, 13, '2001-07-04', 14, 15, 16, 17 ),
+( 101, 'which was this: most of the people on it were unhappy for pretty much of the time.', 2, 1, 23, '2002-07-04', 24, 25, 26, 27 ),
+( 102, 'Many solutions were suggested for this problem', 3, 2, 33, '2003-07-04', 34, 35, 36, 37 ),
+( 103, 'but most of these were largely concerned with the movements of small green pieces of paper', 4, 0, 43, '2004-07-04', 44, 45, 46, 47 )
+</db_insert>
+
+<sphqueries>
+<sphinxql>select * from main where match('planet')</sphinxql>
+<sphinxql>select * from main where match('unhappy')</sphinxql>
+<sphinxql>select * from main where match('solutions')</sphinxql>
+<sphinxql>select * from main where match('green')</sphinxql>
+</sphqueries>
+
+</test>

Some files were not shown because too many files changed in this diff