Selaa lähdekoodia

arbitrary document attributes alpha

git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@378 406a0c4d-033a-0410-8de8-e80135713968
shodan 19 vuotta sitten
vanhempi
sitoutus
ea81be7510
9 muutettua tiedostoa jossa 1202 lisäystä ja 493 poistoa
  1. 109 61
      api/sphinxapi.php
  2. 11 3
      api/test.php
  3. 25 0
      sphinx.conf.in
  4. 34 21
      src/indexer.cpp
  5. 74 20
      src/search.cpp
  6. 272 61
      src/searchd.cpp
  7. 495 255
      src/sphinx.cpp
  8. 174 66
      src/sphinx.h
  9. 8 6
      src/sphinxstd.h

+ 109 - 61
api/sphinxapi.php

@@ -22,7 +22,7 @@ define ( "SEARCHD_COMMAND_SEARCH",	0 );
 define ( "SEARCHD_COMMAND_EXCERPT",	1 );
 
 /// current client-side command implementation versions
-define ( "VER_COMMAND_SEARCH",		0x101 );
+define ( "VER_COMMAND_SEARCH",		0x102 );
 define ( "VER_COMMAND_EXCERPT",		0x100 );
 
 /// known searchd status codes
@@ -38,10 +38,14 @@ define ( "SPH_MATCH_BOOLEAN",		3 );
 
 /// known sort modes
 define ( "SPH_SORT_RELEVANCE",		0 );
-define ( "SPH_SORT_DATE_DESC",		1 );
-define ( "SPH_SORT_DATE_ASC",		2 );
+define ( "SPH_SORT_ATTR_DESC",		1 );
+define ( "SPH_SORT_ATTR_ASC",		2 );
 define ( "SPH_SORT_TIME_SEGMENTS", 	3 );
 
+/// known attribute types
+define ( "SPH_ATTR_INTEGER",		1 );
+define ( "SPH_ATTR_TIMESTAMP",		2 );
+
 /// sphinx searchd client class
 class SphinxClient
 {
@@ -51,14 +55,13 @@ class SphinxClient
 	var $_limit;	///< how much records to return from result-set starting at offset (default is 20)
 	var $_mode;		///< query matching mode (default is SPH_MATCH_ALL)
 	var $_weights;	///< per-field weights (default is 1 for all fields)
-	var $_groups;	///< groups to limit searching to (default is not to limit)
 	var $_sort;		///< match sorting mode (default is SPH_SORT_RELEVANCE)
+	var $_sortby;	///< attribute to sort by (defualt is "")
 	var $_min_id;	///< min ID to match (default is 0)
 	var $_max_id;	///< max ID to match (default is UINT_MAX)
-	var $_min_ts;	///< min timestamp to match (default is 0)
-	var $_max_ts;	///< max timestamp to match (default is UINT_MAX)
-	var $_min_gid;	///< min group id to match (default is 0)
-	var $_max_gid;	///< max group id to match (default is UINT_MAX)
+	var $_min;		///< attribute name to min-value hash (for range filters)
+	var $_max;		///< attribute name to max-value hash (for range filters)
+	var $_filter;	///< attribute name to values set hash (for values-set filters)
 
 	var $_error;	///< last error message
 	var $_warning;	///< last warning message
@@ -76,16 +79,16 @@ class SphinxClient
 		$this->_limit	= 20;
 		$this->_mode	= SPH_MATCH_ALL;
 		$this->_weights	= array ();
-		$this->_groups	= array ();
 		$this->_sort	= SPH_SORT_RELEVANCE;
+		$this->_sortby	= "";
 		$this->_min_id	= 0;
 		$this->_max_id	= 0xFFFFFFFF;
-		$this->_min_ts	= 0;
-		$this->_max_ts	= 0xFFFFFFFF;
-		$this->_min_gid	= 0;
-		$this->_max_gid	= 0xFFFFFFFF;
+		$this->_min		= array ();
+		$this->_max		= array ();
+		$this->_filter	= array ();
 
 		$this->_error	= "";
+		$this->_warning	= "";
 	}
 
 	/// get last error message (string)
@@ -216,11 +219,18 @@ class SphinxClient
 	}
 
 	/// set match mode
-	function SetSortMode ( $sort )
+	function SetSortMode ( $mode, $sortby="" )
 	{
-		assert ( $sort==SPH_SORT_RELEVANCE || $sort==SPH_SORT_DATE_DESC || $sort==SPH_SORT_DATE_ASC
-			|| $sort==SPH_SORT_TIME_SEGMENTS );
-		$this->_sort = $sort;
+		assert (
+			$mode==SPH_SORT_RELEVANCE ||
+			$mode==SPH_SORT_ATTR_DESC ||
+			$mode==SPH_SORT_ATTR_ASC ||
+			$mode==SPH_SORT_TIME_SEGMENTS );
+		assert ( is_string($sortby) );
+		assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
+
+		$this->_sort = $mode;
+		$this->_sortby = $sortby;
 	}
 
 	/// set per-field weights
@@ -233,17 +243,9 @@ class SphinxClient
 		$this->_weights = $weights;
 	}
 
-	/// set groups
-	function SetGroups ( $groups )
-	{
-		assert ( is_array($groups) );
-		foreach ( $groups as $group )
-			assert ( is_int($group) );
-
-		$this->_groups = $groups;
-	}
-
 	/// set IDs range to match
+	/// only match those records where document ID
+	/// is beetwen $min and $max (including $min and $max)
 	function SetIDRange ( $min, $max )
 	{
 		assert ( is_int($min) );
@@ -253,24 +255,33 @@ class SphinxClient
 		$this->_max_id = $max;
 	}
 
-	/// set timestamps to match
-	function SetTimestampRange ( $min, $max )
+	/// set values filter
+	/// only match those records where $attribute column values
+	/// are in specified set
+	function SetFilter ( $attribute, $values )
 	{
-		assert ( is_int($min) );
-		assert ( is_int($max) );
-		assert ( $min<=$max );
-		$this->_min_ts = $min;
-		$this->_max_ts = $max;
+		assert ( is_string($attribute) );
+		assert ( is_array($values) );
+		assert ( count($values) );
+
+		foreach ( $values as $value )
+			assert ( is_int($value) );
+
+		$this->_filter[$attribute] = $values;
 	}
 
-	/// set groups range to match
-	function SetGroupsRange ( $min, $max )
+	/// set range filter
+	/// only match those records where $attribute column value
+	/// is beetwen $min and $max (including $min and $max)
+	function SetFilterRange ( $attribute, $min, $max )
 	{
+		assert ( is_string($attribute) );
 		assert ( is_int($min) );
 		assert ( is_int($max) );
 		assert ( $min<=$max );
-		$this->_min_gid = $min;
-		$this->_max_gid = $max;
+
+		$this->_min[$attribute] = $min;
+		$this->_max[$attribute] = $max;
 	}
 
 	/// connect to searchd server and run given search query
@@ -299,26 +310,34 @@ class SphinxClient
 		// build request
 		/////////////////
 
-		// v.1.0
 		$req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_sort ); // mode and limits
-		$req .= pack ( "N", count($this->_groups) ); // groups 
-		foreach ( $this->_groups as $group )
-			$req .= pack ( "N", $group );
+		$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
 		$req .= pack ( "N", strlen($query) ) . $query; // query itself
 		$req .= pack ( "N", count($this->_weights) ); // weights
 		foreach ( $this->_weights as $weight )
 			$req .= pack ( "N", (int)$weight );
 		$req .= pack ( "N", strlen($index) ) . $index; // indexes
-		$req .= // id/ts ranges
+		$req .= // id range
 			pack ( "N", (int)$this->_min_id ) .
-			pack ( "N", (int)$this->_max_id ) .
-			pack ( "N", (int)$this->_min_ts ) .
-			pack ( "N", (int)$this->_max_ts );
+			pack ( "N", (int)$this->_max_id );
+
+		// filters
+		$req .= pack ( "N", count($this->_min) + count($this->_filter) );
+
+		foreach ( $this->_min as $attr => $min )
+			$req .=
+				pack ( "N", strlen($attr) ) . $attr .
+				pack ( "NNN", 0, $min, $this->_max[$attr] );
+
+		foreach ( $this->_filter as $attr => $values )
+		{
+			$req .= 
+				pack ( "N", strlen($attr) ) . $attr .
+				pack ( "N", count($values) );
 
-		// v.1.1
-		$req .= // gid ranges
-			pack ( "N", (int)$this->_min_gid ) .
-			pack ( "N", (int)$this->_max_gid );
+			foreach ( $values as $value )
+				$req .= pack ( "N", $value );
+		}
 
 		////////////////////////////
 		// send query, get response
@@ -335,18 +354,47 @@ class SphinxClient
 		//////////////////
 
 		$result = array();
-		list(,$count) = unpack ( "N*", substr ( $response, 0, 4 ) );
-		$p = 4;
-		while ( $count-->0 )
+		$max = strlen($response); // protection from broken response
+
+		// read schema
+		$p = 0;
+		$fields = array ();
+		$attrs = array ();
+
+		list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+		while ( $nfields-->0 && $p<$max )
+		{
+			list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			$fields[] = substr ( $response, $p, $len ); $p += $len;
+		}
+		$result["fields"] = $fields;
+
+		list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+		while ( $nattrs-->0 && $p<$max  )
 		{
-			list ( $doc, $group, $stamp, $weight ) = array_values ( unpack ( "N*N*N*N*",
-				substr ( $response, $p, 16 ) ) );
-			$p += 16;
-
-			$result["matches"][$doc] = array (
-				"weight"	=> $weight,
-				"group"		=> $group,
-				"stamp"		=> $stamp );
+			list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			$attr = substr ( $response, $p, $len ); $p += $len;
+			list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			$attrs[$attr] = $type;
+		}
+		$result["attrs"] = $attrs;
+
+		// read match count
+		list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+
+		// read matches
+		while ( $count-->0 && $p<$max )
+		{
+			list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
+				substr ( $response, $p, 8 ) ) );
+			$p += 8;
+
+			$result["matches"][$doc]["weight"] = $weight;
+			foreach ( $attrs as $attr=>$type )
+			{
+				list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$result["matches"][$doc]["attrs"][$attr] = $val;
+			}
 		}
 		list ( $result["total"], $result["total_found"], $result["time"], $words ) =
 			array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );

+ 11 - 3
api/test.php

@@ -65,7 +65,8 @@ $cl = new SphinxClient ();
 $cl->SetServer ( "localhost", $port );
 $cl->SetWeights ( array ( 100, 1 ) );
 $cl->SetMatchMode ( $any ? SPH_MATCH_ANY : SPH_MATCH_ALL );
-$cl->SetGroups ( $groups );
+if ( count($groups) )
+	$cl->SetFilter ( "channel_id", $groups );//!COMMIT
 $res = $cl->Query ( $q, $index );
 
 ////////////////
@@ -94,8 +95,15 @@ if ( $res===false )
 		print "Matches:\n";
 		foreach ( $res["matches"] as $doc => $docinfo )
 		{
-			$d = date ( "Y-m-d H:i:s", $docinfo["stamp"] );
-			print "$n. doc_id=$doc, group=$docinfo[group], date=$d, weight=$docinfo[weight]\n";
+			print "$n. doc_id=$doc, weight=$docinfo[weight]";
+			foreach ( $res["attrs"] as $attrname => $attrtype )
+			{
+				$value = $docinfo["attrs"][$attrname];
+				if ( $attrtype==SPH_ATTR_TIMESTAMP )
+					$value = date ( "Y-m-d H:i:s", $value );
+				print ", $attrname=$value";
+			}
+			print "\n";
 			$n++;
 		}
 	}

+ 25 - 0
sphinx.conf.in

@@ -205,6 +205,31 @@ index test1
 	path			= @CONFDIR@/data/test1
 	# path			= @CONFDIR@/data/test1
 
+	# docinfo (ie. per-document attribute values) storage strategy
+	#
+	# defines how docinfo will be stored
+	# "none" means there'll be no docinfo (no groups, no dates, no nothing)
+	# "inline" means that the docinfo will be stored along with the doc IDs
+	# "extern" means that the docinfo will be stored separately
+	#
+	# externally stored docinfo should (basically) be kept in RAM
+	# when querying; therefore, "inline" may be the only viable option
+	# for really huge (50-100+ million docs) datasets. however, for
+	# smaller datasets "extern" storage makes both indexing and
+	# searching much more efficient.
+	#
+	# additional search-time memory requirements for extern storage are
+	#
+	#	( 1 + number_of_attrs )*number_of_docs*4 bytes
+	#
+	# so 10 million docs with 2 groups and 1 timestamp will take
+	# (1+2+1)*10M*4 = 160 MB of RAM. this is per daemon, ie. searchd
+	# will alloc 160 MB on startup, read the data and keep it shared
+	# between queries.
+	#
+	# default is "extern" (as most collections are smaller than 100M docs)
+	docinfo			= extern
+
 	# morphology
 	# default is not to use any
 	#

+ 34 - 21
src/indexer.cpp

@@ -302,21 +302,26 @@ void ShowProgress ( const CSphIndexProgress * pProgress )
 		return NULL; \
 	}
 
-
+// get string
 #define LOC_GETS(_arg,_key) \
 	if ( hSource.Exists(_key) ) \
 		_arg = hSource[_key];
 
-
+// get int
 #define LOC_GETI(_arg,_key) \
 	if ( hSource.Exists(_key) && hSource[_key].intval() ) \
 		_arg = hSource[_key].intval();
 
-
-#define LOC_GETA(_arg,_key) \
+// get array of strings
+#define LOC_GETAS(_arg,_key) \
 	for ( CSphVariant * pVal = hSource(_key); pVal; pVal = pVal->m_pNext ) \
 		_arg.Add ( pVal->cstr() );
 
+// get array of attrs
+#define LOC_GETAA(_arg,_key,_type) \
+	for ( CSphVariant * pVal = hSource(_key); pVal; pVal = pVal->m_pNext ) \
+		_arg.Add ( CSphColumnInfo ( pVal->cstr(), _type ) );
+
 
 #if USE_PGSQL
 CSphSource * SpawnSourcePgSQL ( const CSphConfigSection & hSource, const char * sSourceName )
@@ -331,12 +336,12 @@ CSphSource * SpawnSourcePgSQL ( const CSphConfigSection & hSource, const char *
 
 	CSphSourceParams_PgSQL tParams;
 	LOC_GETS ( tParams.m_sQuery,			"sql_query" );
-	LOC_GETA ( tParams.m_dQueryPre,			"sql_query_pre" );
-	LOC_GETA ( tParams.m_dQueryPost,		"sql_query_post" );
+	LOC_GETAS( tParams.m_dQueryPre,			"sql_query_pre" );
+	LOC_GETAS( tParams.m_dQueryPost,		"sql_query_post" );
 	LOC_GETS ( tParams.m_sQueryRange,		"sql_query_range" );
-	LOC_GETA ( tParams.m_dQueryPostIndex,	"sql_query_post_index" );
-	LOC_GETS ( tParams.m_sGroupColumn,		"sql_group_column" );
-	LOC_GETS ( tParams.m_sDateColumn,		"sql_date_column" );
+	LOC_GETAS( tParams.m_dQueryPostIndex,	"sql_query_post_index" );
+	LOC_GETAA( tParams.m_dAttrs,			"sql_group_column",		SPH_ATTR_INTEGER );
+	LOC_GETAA( tParams.m_dAttrs,			"sql_date_column",		SPH_ATTR_TIMESTAMP );
 	LOC_GETS ( tParams.m_sHost,				"sql_host" );
 	LOC_GETS ( tParams.m_sUser,				"sql_user" );
 	LOC_GETS ( tParams.m_sPass,				"sql_pass" );
@@ -345,7 +350,7 @@ CSphSource * SpawnSourcePgSQL ( const CSphConfigSection & hSource, const char *
 	LOC_GETS ( tParams.m_sPort,				"sql_port");
 	LOC_GETI ( tParams.m_iRangeStep,		"sql_range_step" );
 
-	CSphSource_PgSQL * pSrcPgSQL = new CSphSource_PgSQL ();
+	CSphSource_PgSQL * pSrcPgSQL = new CSphSource_PgSQL ( sSourceName );
 	if ( !pSrcPgSQL->Init ( tParams ) )
 		SafeDelete ( pSrcPgSQL );
 	return pSrcPgSQL;
@@ -366,12 +371,12 @@ CSphSource * SpawnSourceMySQL ( const CSphConfigSection & hSource, const char *
 
 	CSphSourceParams_MySQL tParams;
 	LOC_GETS ( tParams.m_sQuery,			"sql_query" );
-	LOC_GETA ( tParams.m_dQueryPre,			"sql_query_pre" );
-	LOC_GETA ( tParams.m_dQueryPost,		"sql_query_post" );
+	LOC_GETAS( tParams.m_dQueryPre,			"sql_query_pre" );
+	LOC_GETAS( tParams.m_dQueryPost,		"sql_query_post" );
 	LOC_GETS ( tParams.m_sQueryRange,		"sql_query_range" );
-	LOC_GETA ( tParams.m_dQueryPostIndex,	"sql_query_post_index" );
-	LOC_GETS ( tParams.m_sGroupColumn,		"sql_group_column" );
-	LOC_GETS ( tParams.m_sDateColumn,		"sql_date_column" );
+	LOC_GETAS( tParams.m_dQueryPostIndex,	"sql_query_post_index" );
+	LOC_GETAA( tParams.m_dAttrs,			"sql_group_column",		SPH_ATTR_INTEGER );
+	LOC_GETAA( tParams.m_dAttrs,			"sql_date_column",		SPH_ATTR_TIMESTAMP );
 	LOC_GETS ( tParams.m_sHost,				"sql_host" );
 	LOC_GETS ( tParams.m_sUser,				"sql_user" );
 	LOC_GETS ( tParams.m_sPass,				"sql_pass" );
@@ -380,7 +385,7 @@ CSphSource * SpawnSourceMySQL ( const CSphConfigSection & hSource, const char *
 	LOC_GETI ( tParams.m_iPort,				"sql_port" );
 	LOC_GETI ( tParams.m_iRangeStep,		"sql_range_step" );
 
-	CSphSource_MySQL * pSrcMySQL = new CSphSource_MySQL ();
+	CSphSource_MySQL * pSrcMySQL = new CSphSource_MySQL ( sSourceName );
 	if ( !pSrcMySQL->Init ( tParams ) )
 		SafeDelete ( pSrcMySQL );
 	return pSrcMySQL;
@@ -394,7 +399,7 @@ CSphSource * SpawnSourceXMLPipe ( const CSphConfigSection & hSource, const char
 	
 	LOC_CHECK ( hSource, "xmlpipe_command", "in source '%s'.", sSourceName );
 
-	CSphSource_XMLPipe * pSrcXML = new CSphSource_XMLPipe ();
+	CSphSource_XMLPipe * pSrcXML = new CSphSource_XMLPipe ( sSourceName );
 	if ( !pSrcXML->Init ( hSource["xmlpipe_command"].cstr() ) )
 	{
 		fprintf ( stdout, "FATAL: CSphSource_XMLPipe: unable to popen '%s'.\n", hSource["xmlpipe_command"].cstr() );
@@ -576,8 +581,8 @@ int main ( int argc, char ** argv )
 	{
 		CSphString sBuf = hConf["indexer"]["indexer"]["mem_limit"];
 
-		char * sMemLimit = sBuf.str();
-		assert ( sMemLimit );
+		char sMemLimit[256];
+		strncpy ( sMemLimit, sBuf.cstr(), sizeof(sMemLimit) );
 
 		int iLen = strlen ( sMemLimit );
 		if ( iLen )
@@ -598,7 +603,7 @@ int main ( int argc, char ** argv )
 			int iRes = strtol ( sMemLimit, &sErr, 10 );
 			if ( *sErr )
 			{
-				fprintf ( stdout, "WARNING: bad mem_limit value '%s', using default.\n", sMemLimit );
+				fprintf ( stdout, "WARNING: bad mem_limit value '%s', using default.\n", sBuf.cstr() );
 			} else
 			{
 				iMemLimit = iScale*iRes;
@@ -758,6 +763,14 @@ int main ( int argc, char ** argv )
 			continue;
 		}
 
+		// configure docinfo storage
+		ESphDocinfo eDocinfo = SPH_DOCINFO_EXTERN;
+		if ( hIndex("docinfo") )
+		{
+			if ( hIndex["docinfo"]=="none" )	eDocinfo = SPH_DOCINFO_NONE;
+			if ( hIndex["docinfo"]=="inline" )	eDocinfo = SPH_DOCINFO_INLINE;
+		}
+
 		///////////
 		// do work
 		///////////
@@ -822,7 +835,7 @@ int main ( int argc, char ** argv )
 			assert ( pIndex );
 
 			pIndex->SetProgressCallback ( ShowProgress );
-			if ( pIndex->Build ( pDict, dSources, iMemLimit ) )
+			if ( pIndex->Build ( pDict, dSources, iMemLimit, eDocinfo ) )
 			{
 				// if searchd is not running, we're good
 				if ( !bRotate )

+ 74 - 20
src/search.cpp

@@ -37,7 +37,7 @@ int main ( int argc, char ** argv )
 			"-i, --index <index>\tsearch given index only (default: all indexes)\n"
 			"-a, --any\t\tmatch any query word (default: match all words)\n"
 			"-b, --boolean\t\tmatch in boolean mode\n"
-			"-g, --group <id>\tmatch this group only (default: match all groups)\n"
+			"-f, --filter <attr> <v>\tonly match if attribute attr value is v\n"
 			"-s, --start <offset>\tprint matches starting from this offset (default: 0)\n"
 			"-l, --limit <count>\tprint this many matches (default: 20)\n"
 			"-q, --noinfo\t\tdon't print document info from SQL database\n"
@@ -59,7 +59,6 @@ int main ( int argc, char ** argv )
 
 	const char * sConfName = "sphinx.conf";
 	const char * sIndex = NULL;
-	CSphVector<DWORD> dGroups;
 	bool bNoInfo = false;
 	bool bStdin = false;
 	int iStart = 0;
@@ -79,20 +78,40 @@ int main ( int argc, char ** argv )
 			OPT ( "-b", "--boolean" )	tQuery.m_eMode = SPH_MATCH_BOOLEAN;
 			OPT ( "-p", "--phrase" )	tQuery.m_eMode = SPH_MATCH_PHRASE;
 			OPT ( "-q", "--noinfo" )	bNoInfo = true;
-			OPT1 ( "--sort=date" )		tQuery.m_eSort = SPH_SORT_DATE_DESC;
-			OPT1 ( "--rsort=date" )		tQuery.m_eSort = SPH_SORT_DATE_ASC;
+			OPT1 ( "--sort=date" )		tQuery.m_eSort = SPH_SORT_ATTR_DESC;
+			OPT1 ( "--rsort=date" )		tQuery.m_eSort = SPH_SORT_ATTR_ASC;
 			OPT1 ( "--sort=ts" )		tQuery.m_eSort = SPH_SORT_TIME_SEGMENTS;
 			OPT1 ( "--stdin" )			bStdin = true;
+
+			else if ( (i+2)<argc )
+			{
+				if ( i==0 );
+				OPT ( "-f", "--filter" )
+				{
+					if ( atoi ( argv[i+2] ) )
+					{
+						tQuery.m_dFilters.Reset ();
+						tQuery.m_dFilters.Resize ( 1 );
+						tQuery.m_dFilters[0].m_iValues = 1;
+						tQuery.m_dFilters[0].m_pValues = new DWORD [ 1 ];
+						tQuery.m_dFilters[0].m_pValues[0] = atoi ( argv[i+2] );
+						tQuery.m_dFilters[0].m_sAttrName = argv[i+1];
+						i += 2;
+					}
+				}
+				else break; // unknown option
+			}
+
 			else if ( (i+1)<argc )
 			{
 				if ( i==0 );
-				OPT ( "-g", "--group")		{ if ( atoi ( argv[++i] ) ) dGroups.Add ( atoi ( argv[i] ) ); }
 				OPT ( "-s", "--start" )		iStart = atoi ( argv[++i] );
 				OPT ( "-l", "--limit" )		iLimit = atoi ( argv[++i] );
 				OPT ( "-c", "--config" )	sConfName = argv[++i];
 				OPT ( "-i", "--index" )		sIndex = argv[++i];
 				else break; // unknown option
 			}
+
 			else break; // unknown option
 
 		} else if ( strlen(sQuery) + strlen(argv[i]) + 1 < sizeof(sQuery) )
@@ -270,15 +289,32 @@ int main ( int argc, char ** argv )
 		//////////
 
 		tQuery.m_sQuery = sQuery;
-		if ( dGroups.GetLength() )
-		{
-			tQuery.m_pGroups = new DWORD [ dGroups.GetLength() ];
-			tQuery.m_iGroups = dGroups.GetLength();
-			memcpy ( tQuery.m_pGroups, &dGroups[0], sizeof(DWORD)*dGroups.GetLength() );
-		}
+		CSphQueryResult * pResult = NULL;
 
 		CSphIndex * pIndex = sphCreateIndexPhrase ( hIndex["path"].cstr() );
-		CSphQueryResult * pResult = pIndex->Query ( pDict, &tQuery );
+		const CSphSchema * pSchema = pIndex->LoadSchema ();
+		if ( pSchema )
+		{
+			// if we're not sorting by relevance, lookup first timestamp column
+			if ( tQuery.m_eSort!=SPH_SORT_RELEVANCE )
+			{
+				int iTS = -1;
+				ARRAY_FOREACH ( i, pSchema->m_dAttrs )
+					if ( pSchema->m_dAttrs[i].m_eAttrType==SPH_ATTR_TIMESTAMP )
+				{
+					tQuery.m_sSortBy = pSchema->m_dAttrs[i].m_sName;
+					iTS = i;
+					break;
+				}
+				if ( iTS<0 )
+				{
+					fprintf ( stdout, "index '%s': no timestamp attributes found, sorting by relevance.\n", sIndexName );
+					tQuery.m_eSort = SPH_SORT_RELEVANCE;
+				}
+			}
+
+			pResult = pIndex->Query ( pDict, &tQuery );
+		}
 
 		SafeDelete ( pIndex );
 		SafeDelete ( pDict );
@@ -290,7 +326,7 @@ int main ( int argc, char ** argv )
 
 		if ( !pResult )
 		{
-			fprintf ( stdout, "index '%s': query '%s': search error: can not open index.\n", sIndexName, sQuery );
+			fprintf ( stdout, "index '%s': search error: can not open index.\n", sIndexName );
 			return 1;
 		}
 
@@ -306,13 +342,31 @@ int main ( int argc, char ** argv )
 			for ( int i=iStart; i<iMaxIndex; i++ )
 			{
 				CSphMatch & tMatch = pResult->m_dMatches[i];
-				time_t tStamp = tMatch.m_iTimestamp; // for 64-bit
-				fprintf ( stdout, "%d. document=%d, group=%d, weight=%d, time=%s",
-					1+i,
-					tMatch.m_iDocID,
-					tMatch.m_iGroupID,
-					tMatch.m_iWeight,
-					ctime ( &tStamp ) );
+				fprintf ( stdout, "%d. document=%d, weight=%d", 1+i, tMatch.m_iDocID, tMatch.m_iWeight );
+
+				if ( tMatch.m_pAttrs )
+					ARRAY_FOREACH ( j, pResult->m_tSchema.m_dAttrs )
+				{
+					const CSphColumnInfo & tAttr = pResult->m_tSchema.m_dAttrs[j];
+
+					if ( tAttr.m_eAttrType==SPH_ATTR_INTEGER )
+					{
+						fprintf ( stdout, ", %s=%d", tAttr.m_sName.cstr(), tMatch.m_pAttrs[j] );
+					
+					} else if ( tAttr.m_eAttrType==SPH_ATTR_TIMESTAMP )
+					{
+						char sBuf[256];
+						time_t tStamp = tMatch.m_pAttrs[j]; // for 64-bit
+						strncpy ( sBuf, ctime(&tStamp), sizeof(sBuf) );
+
+						char * p = sBuf;
+						while ( (*p) && (*p)!='\n' && (*p)!='\r' ) p++;
+						*p = '\0';
+
+						fprintf ( stdout, ", %s=%s", tAttr.m_sName.cstr(), sBuf );
+					}
+				}
+				fprintf ( stdout,"\n" );
 
 				#if USE_MYSQL
 				if ( sQueryInfo )

+ 272 - 61
src/searchd.cpp

@@ -24,6 +24,7 @@
 #include <sys/types.h>
 #include <time.h>
 #include <stdarg.h>
+#include <limits.h>
 
 /////////////////////////////////////////////////////////////////////////////
 
@@ -88,6 +89,7 @@ static int				g_iMaxMatches	= 1000;
 struct ServedIndex_t
 {
 	CSphIndex *			m_pIndex;
+	const CSphSchema *	m_pSchema;	///< pointer to index schema, managed by the index itself
 	CSphDict *			m_pDict;
 	ISphTokenizer *		m_pTokenizer;
 	CSphString *		m_pLockFile; 
@@ -136,7 +138,7 @@ enum SearchdCommand_e
 /// known command versions
 enum
 {
-	VER_COMMAND_SEARCH		= 0x101,
+	VER_COMMAND_SEARCH		= 0x102,
 	VER_COMMAND_EXCERPT		= 0x100
 };
 
@@ -305,7 +307,9 @@ void sphInfo ( const char * sFmt, ... )
 
 const int		NET_MAX_REQ_LEN			= 1048576;
 const int		NET_MAX_STR_LEN			= NET_MAX_REQ_LEN;
-const int		SEARCHD_MAX_REQ_GROUPS	= 4096;
+const int		SEARCHD_MAX_ATTRS		= 256;
+const int		SEARCHD_MAX_ATTR_VALUES	= 4096;
+
 
 const char * sphSockError ( int iErr=0 )
 {
@@ -353,7 +357,7 @@ int sphCreateServerSocket ( int port )
 
 	sphInfo ( "creating a server socket on port %d", port );
 	if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
-		sphFatal ( "unable to create server socket on port %d: %s", port, sphSockError() );
+		sphFatal ( "failed to create server socket on port %d: %s", port, sphSockError() );
 
 	int iOn = 1;
 	if ( setsockopt ( sock, SOL_SOCKET, SO_REUSEADDR, (char*)&iOn, sizeof(iOn) ) )
@@ -526,7 +530,7 @@ public:
 	DWORD			GetDword () { return ntohl ( GetT<DWORD> () ); }
 	BYTE			GetByte () { return GetT<BYTE> (); }
 	CSphString		GetString ();
-	int				GetInts ( int ** pBuffer, int iMax, const char * sErrorTemplate );
+	int				GetDwords ( DWORD ** pBuffer, int iMax, const char * sErrorTemplate );
 	bool			GetError () { return m_bError; }
 
 	virtual void	SendErrorReply ( const char *, ... ) = 0;
@@ -724,7 +728,7 @@ bool InputBuffer_c::GetBytes ( void * pBuf, int iLen )
 }
 
 
-int InputBuffer_c::GetInts ( int ** ppBuffer, int iMax, const char * sErrorTemplate )
+int InputBuffer_c::GetDwords ( DWORD ** ppBuffer, int iMax, const char * sErrorTemplate )
 {
 	assert ( ppBuffer );
 	assert ( !(*ppBuffer) );
@@ -738,7 +742,7 @@ int InputBuffer_c::GetInts ( int ** ppBuffer, int iMax, const char * sErrorTempl
 	}
 	if ( iCount )
 	{
-		(*ppBuffer) = new int [ iCount ];
+		(*ppBuffer) = new DWORD [ iCount ];
 		if ( !GetBytes ( (*ppBuffer), sizeof(int)*iCount ) )
 		{
 			SafeDeleteArray ( (*ppBuffer) );
@@ -1423,9 +1427,19 @@ int QueryRemoteAgents ( const char * sIndexName, DistributedIndex_t & tDist, con
 				}
 
 				// do query!
-				int iQueryLen = strlen ( tQuery.m_sQuery.cstr() );
-				int iIndexesLen = strlen ( tAgent.m_sIndexes.cstr() );
-				int iReqSize = 68 + 4*tQuery.m_iGroups + iQueryLen + 4*tQuery.m_iWeights + iIndexesLen;
+				int iReqSize = 56 + 4*tQuery.m_iWeights
+					+ strlen ( tQuery.m_sSortBy.cstr() )
+					+ strlen ( tQuery.m_sQuery.cstr() )
+					+ strlen ( tAgent.m_sIndexes.cstr() );
+				ARRAY_FOREACH ( j, tQuery.m_dFilters )
+				{
+					const CSphFilter & tFilter = tQuery.m_dFilters[j];
+					iReqSize +=
+						8
+						+ strlen ( tFilter.m_sAttrName.cstr() )
+						+ 4*tFilter.m_iValues
+						+ ( tFilter.m_iValues ? 0 : 8 );
+				}
 
 				NetOutputBuffer_c tOut ( tAgent.m_iSock );
 
@@ -1437,14 +1451,12 @@ int QueryRemoteAgents ( const char * sIndexName, DistributedIndex_t & tDist, con
 				tOut.SendWord ( VER_COMMAND_SEARCH ); // command version
 				tOut.SendInt ( iReqSize-12 ); // request body length
 
-				// request v.1.1
+				// request v.1.2
 				tOut.SendInt ( 0 ); // offset is 0
 				tOut.SendInt ( g_iMaxMatches ); // limit is MAX_MATCHES
 				tOut.SendInt ( iMode ); // match mode
 				tOut.SendInt ( tQuery.m_eSort ); // sort mode
-				tOut.SendInt ( tQuery.m_iGroups );
-				for ( int j=0; j<tQuery.m_iGroups; j++ )
-					tOut.SendDword ( tQuery.m_pGroups[j] ); // groups
+				tOut.SendString ( tQuery.m_sSortBy.cstr() ); // sort attr
 				tOut.SendString ( tQuery.m_sQuery.cstr() ); // query
 				tOut.SendInt ( tQuery.m_iWeights );
 				for ( int j=0; j<tQuery.m_iWeights; j++ )
@@ -1452,13 +1464,23 @@ int QueryRemoteAgents ( const char * sIndexName, DistributedIndex_t & tDist, con
 				tOut.SendString ( tAgent.m_sIndexes.cstr() ); // indexes
 				tOut.SendInt ( tQuery.m_iMinID ); // id/ts ranges
 				tOut.SendInt ( tQuery.m_iMaxID );
-				tOut.SendInt ( tQuery.m_iMinTS );
-				tOut.SendInt ( tQuery.m_iMaxTS );
-				tOut.SendInt ( tQuery.m_iMinGID );
-				tOut.SendInt ( tQuery.m_iMaxGID );
+				tOut.SendInt ( tQuery.m_dFilters.GetLength() );
+				ARRAY_FOREACH ( j, tQuery.m_dFilters )
+				{
+					const CSphFilter & tFilter = tQuery.m_dFilters[j];
+					tOut.SendString ( tFilter.m_sAttrName.cstr() );
+					tOut.SendInt ( tFilter.m_iValues );
+					for ( int k=0; k<tFilter.m_iValues; k++ )
+						tOut.SendInt ( tFilter.m_pValues[k] );
+					if ( !tFilter.m_iValues )
+					{
+						tOut.SendDword ( tFilter.m_uMinValue );
+						tOut.SendDword ( tFilter.m_uMaxValue );
+					}
+				}
 				tOut.Flush ();
 
-				// FIXME! !COMMIT handle flush failure
+				// FIXME! handle flush failure
 				tAgent.m_eState = AGENT_QUERY;
 				iAgents++;
 			}
@@ -1625,6 +1647,21 @@ int WaitForRemoteAgents ( const char * sIndexName, DistributedIndex_t & tDist, C
 				{
 					MemInputBuffer_c tReq ( tAgent.m_pReplyBuf, tAgent.m_iReplySize );
 
+					// get schema
+					CSphSchema & tSchema = tAgent.m_tRes.m_tSchema;
+
+					tSchema.m_dFields.Resize ( tReq.GetInt() ); // FIXME! add a sanity check
+					ARRAY_FOREACH ( j, tSchema.m_dFields )
+						tSchema.m_dFields[j].m_sName = tReq.GetString ();
+
+					tSchema.m_dAttrs.Resize ( tReq.GetInt() ); // FIXME! add a sanity check
+					ARRAY_FOREACH ( j, tSchema.m_dAttrs )
+					{
+						tSchema.m_dAttrs[j].m_sName = tReq.GetString ();
+						tSchema.m_dAttrs[j].m_eAttrType = (ESphAttrType) tReq.GetDword (); // FIXME! add a sanity check
+					}
+
+					// get matches
 					int iMatches = tReq.GetInt ();
 					if ( iMatches<0 || iMatches>g_iMaxMatches )
 					{
@@ -1635,16 +1672,24 @@ int WaitForRemoteAgents ( const char * sIndexName, DistributedIndex_t & tDist, C
 					}
 
 					assert ( !tAgent.m_tRes.m_dMatches.GetLength() );
+					int iAttrs = tSchema.m_dAttrs.GetLength();
 					if ( iMatches )
 					{
 						tAgent.m_tRes.m_dMatches.Resize ( iMatches );
 						ARRAY_FOREACH ( i, tAgent.m_tRes.m_dMatches )
 						{
 							CSphMatch & tMatch = tAgent.m_tRes.m_dMatches[i];
+							tMatch.Reset ();
+
 							tMatch.m_iDocID = tReq.GetInt ();
-							tMatch.m_iGroupID = tReq.GetInt ();
-							tMatch.m_iTimestamp = tReq.GetInt ();
 							tMatch.m_iWeight = tReq.GetInt ();
+							tMatch.m_iAttrs = iAttrs;
+							if ( iAttrs )
+							{
+								tMatch.m_pAttrs = new DWORD [ iAttrs ]; // !COMMIT pool these allocs
+								for ( int j=0; j<iAttrs; j++ )
+									tMatch.m_pAttrs[j] = tReq.GetDword ();
+							}
 						}
 					}
 
@@ -1781,6 +1826,43 @@ inline bool operator < ( const CSphMatch & a, const CSphMatch & b )
 
 /////////////////////////////////////////////////////////////////////////////
 
+bool CheckSortAndSchema ( const CSphSchema ** ppFirst,
+	const CSphSchema * pServed, const char * sServedName, const CSphQuery & tQuery,
+	InputBuffer_c & tReq, ISphMatchQueue * pTop )
+{
+	assert ( ppFirst );
+	assert ( pServed );
+
+	if ( !*ppFirst )
+	{
+		// lookup proper attribute index to sort by
+		*ppFirst = pServed;
+		int iAttr = pServed->GetAttrIndex ( tQuery.m_sSortBy.cstr() );
+		if ( iAttr<0 )
+		{
+			if ( tQuery.m_eSort!=SPH_SORT_RELEVANCE )
+			{
+				tReq.SendErrorReply ( "index '%s': sort-by attribute '%s' not found",
+					sServedName, tQuery.m_sSortBy.cstr() );
+				return false;
+			}
+			iAttr = 0;
+		}
+		pTop->SetAttr ( iAttr );
+
+	} else
+	{
+		// check schemas
+		CSphString sError;
+		if ( !pServed->IsEqual ( **ppFirst, sError ) )
+		{
+			tReq.SendErrorReply ( "index '%s': schema mismatch: %s", sServedName, sError.cstr() );
+			return false;
+		}
+	}
+	return true;
+}
+
 void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 {
 	CSphQuery tQuery;
@@ -1793,6 +1875,12 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 			VER_COMMAND_SEARCH>>8, iVer>>8, iVer&0xff );
 		return;
 	}
+	if ( iVer>VER_COMMAND_SEARCH )
+	{
+		tReq.SendErrorReply ( "client version is higher than daemon version (client is v.%d.%d, daemon is v.%d.%d)",
+			iVer>>8, iVer&0xff, VER_COMMAND_SEARCH>>8, VER_COMMAND_SEARCH&0xff );
+		return;
+	}
 
 	// per-server query settings
 	tQuery.m_iMaxMatches = g_iMaxMatches;
@@ -1801,25 +1889,63 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 	// parse request
 	/////////////////
 
+	DWORD iMinTS = 0, iMaxTS = UINT_MAX, iMinGID = 0, iMaxGID = UINT_MAX, * pGroups = NULL;
+	int iGroups = 0;
+
 	// v.1.0. mode, limits, weights, ID/TS ranges
 	int iOffset			= tReq.GetInt ();
 	int iLimit			= tReq.GetInt ();
-	tQuery.m_eMode			= (ESphMatchMode) tReq.GetInt ();
+	tQuery.m_eMode		= (ESphMatchMode) tReq.GetInt ();
 	tQuery.m_eSort		= (ESphSortOrder) tReq.GetInt ();
-	tQuery.m_iGroups	= tReq.GetInts ( (int**)&tQuery.m_pGroups, SEARCHD_MAX_REQ_GROUPS, "invalid group count %d (should be in 0..%d range)" );
+	if ( iVer<=0x101 )
+		iGroups			= tReq.GetDwords ( &pGroups, SEARCHD_MAX_ATTR_VALUES, "invalid group count %d (should be in 0..%d range)" );
+	if ( iVer>=0x102 )
+		tQuery.m_sSortBy= tReq.GetString ();
 	tQuery.m_sQuery		= tReq.GetString ();
-	tQuery.m_iWeights	= tReq.GetInts ( (int**)&tQuery.m_pWeights, SPH_MAX_FIELD_COUNT, "invalid weight count %d (should be in 0..%d range)" );
+	tQuery.m_iWeights	= tReq.GetDwords ( (DWORD**)&tQuery.m_pWeights, SPH_MAX_FIELDS, "invalid weight count %d (should be in 0..%d range)" );
 	CSphString sIndex	= tReq.GetString ();
 	tQuery.m_iMinID		= tReq.GetDword ();
 	tQuery.m_iMaxID		= tReq.GetDword ();
-	tQuery.m_iMinTS		= tReq.GetDword ();
-	tQuery.m_iMaxTS		= tReq.GetDword ();
 
-	// v.1.1
-	if ( iVer>=0x101 )
+	// upto v.1.1
+	if ( iVer<=0x101 )
 	{
-		tQuery.m_iMinGID = tReq.GetDword ();
-		tQuery.m_iMaxGID = tReq.GetDword ();
+		iMinTS = tReq.GetDword ();
+		iMaxTS = tReq.GetDword ();
+	}
+
+	// v.1.1 specific
+	if ( iVer==0x101 )
+	{
+		iMinGID = tReq.GetDword ();
+		iMaxGID = tReq.GetDword ();
+	}
+	// !COMMIT use min/max ts/gid
+
+	// v.1.2
+	if ( iVer>=0x102 )
+	{
+		int iAttrFilters = tReq.GetInt ();
+		if ( iAttrFilters>SEARCHD_MAX_ATTRS )
+		{
+			tReq.SendErrorReply ( "too much attribute filters (req=%d, max=%d)", iAttrFilters, SEARCHD_MAX_ATTRS );
+			return;
+		}
+
+		tQuery.m_dFilters.Resize ( iAttrFilters );
+		ARRAY_FOREACH ( i, tQuery.m_dFilters )
+		{
+			CSphFilter & tFilter = tQuery.m_dFilters[i];
+			tFilter.m_sAttrName = tReq.GetString ();
+			tFilter.m_iValues = tReq.GetDwords ( &tFilter.m_pValues, SEARCHD_MAX_ATTR_VALUES,
+				"invalid attribute set length %d (should be in 0..%d range)" );
+			if ( !tFilter.m_iValues )
+			{
+				// 0 length means this is range, not set
+				tFilter.m_uMinValue = tReq.GetDword ();
+				tFilter.m_uMaxValue = tReq.GetDword ();
+			}
+		}
 	}
 
 	// additional checks
@@ -1828,7 +1954,7 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 		tReq.SendErrorReply ( "invalid or truncated request" );
 		return;
 	}
-	if ( tQuery.m_iMinID>tQuery.m_iMaxID || tQuery.m_iMinTS>tQuery.m_iMaxTS )
+	if ( tQuery.m_iMinID>tQuery.m_iMaxID || iMinTS>iMaxTS )
 	{
 		tReq.SendErrorReply ( "invalid ID/TS range specified in query" );
 		return;
@@ -1844,9 +1970,11 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 	////////////////
 
 	// do search
+	float tmStart = sphLongTimer ();
+
+	const CSphSchema * pFirst = NULL;
 	CSphQueryResult * pRes = new CSphQueryResult ();
 	ISphMatchQueue * pTop = sphCreateQueue ( &tQuery );
-	float tmStart = sphLongTimer ();
 
 #define REMOVE_DUPES 1
 
@@ -1863,7 +1991,7 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 		int iRemote = QueryRemoteAgents ( sIndex.cstr(), tDist, tQuery, tQuery.m_eMode );
 
 		// while the remote queries are running, do local searches
-		// !COMMIT what if the remote agents finish early, could they timeout?
+		// FIXME! what if the remote agents finish early, could they timeout?
 		float tmQuery = -sphLongTimer ();
 		ARRAY_FOREACH ( i, tDist.m_dLocal )
 		{
@@ -1872,6 +2000,10 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 			assert ( tServed.m_pDict );
 			assert ( tServed.m_pTokenizer );
 
+			// check/set sort-by attr and schema
+			if ( !CheckSortAndSchema ( &pFirst, tServed.m_pSchema, tDist.m_dLocal[i].cstr(), tQuery, tReq, pTop ) )
+				return;
+
 			// do query
 			tQuery.m_pTokenizer = tServed.m_pTokenizer;
 			tServed.m_pIndex->QueryEx ( tServed.m_pDict, &tQuery, pRes, pTop );
@@ -1902,16 +2034,25 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 			// merge local and remote results
 			ARRAY_FOREACH ( iAgent, tDist.m_dAgents )
 				if ( tDist.m_dAgents[iAgent].m_tRes.m_dMatches.GetLength() )
-				{
-					// merge this agent's results
-					Agent_t & tAgent = tDist.m_dAgents[iAgent];
-					ARRAY_FOREACH ( i, tAgent.m_tRes.m_dMatches )
-						pRes->m_dMatches.Add( tAgent.m_tRes.m_dMatches[i] );
-					tAgent.m_tRes.m_dMatches.Reset ();
-
-					// merge this agent's stats
-					pRes->m_iTotalMatches += tAgent.m_tRes.m_iTotalMatches;
-				}
+			{
+				Agent_t & tAgent = tDist.m_dAgents[iAgent];
+
+				// check/set sort-by attr and schema
+				char sName [ 1024 ];
+				snprintf ( sName, sizeof(sName), "%s:%d:%s",
+					tAgent.m_sHost.cstr(), tAgent.m_iPort, tAgent.m_sIndexes.cstr() );
+
+				if ( !CheckSortAndSchema ( &pFirst, &tAgent.m_tRes.m_tSchema, sName, tQuery, tReq, pTop ) )
+					return;
+
+				// merge this agent's results
+				ARRAY_FOREACH ( i, tAgent.m_tRes.m_dMatches )
+					pRes->m_dMatches.Add( tAgent.m_tRes.m_dMatches[i] );
+				tAgent.m_tRes.m_dMatches.Reset ();
+
+				// merge this agent's stats
+				pRes->m_iTotalMatches += tAgent.m_tRes.m_iTotalMatches;
+			}
 		}
 
 	} else if ( sIndex=="*" )
@@ -1925,6 +2066,10 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 			assert ( tServed.m_pDict );
 			assert ( tServed.m_pTokenizer );
 
+			// check/set sort-by attr and schema
+			if ( !CheckSortAndSchema ( &pFirst, tServed.m_pSchema, g_hIndexes.IterateGetKey().cstr(), tQuery, tReq, pTop ) )
+				return;
+
 			// do query
 			tQuery.m_pTokenizer = tServed.m_pTokenizer;
 			tServed.m_pIndex->QueryEx ( tServed.m_pDict, &tQuery, pRes, pTop );
@@ -1970,6 +2115,10 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 				|| !tCache.ReadFromFile ( tQuery, sNext, tServed.m_pIndexPath->cstr(), pRes ) )
 #endif
 			{
+				// check/set sort-by attr and schema
+				if ( !CheckSortAndSchema ( &pFirst, tServed.m_pSchema, sNext, tQuery, tReq, pTop ) )
+					return;
+
 				// do query
 				tQuery.m_pTokenizer = tServed.m_pTokenizer;
 				tServed.m_pIndex->QueryEx ( tServed.m_pDict, &tQuery, pRes, pTop );
@@ -2024,9 +2173,9 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 		sTimeBuf [ strlen(sTimeBuf)-1 ] = '\0';
 
 		static const char * sModes [ SPH_MATCH_TOTAL ] = { "all", "any", "phr", "bool" };
-		snprintf ( sBuf, sizeof(sBuf), "[%s] %d.%03d sec: [%d %d %s/%d/%d %d] %s\n",
+		snprintf ( sBuf, sizeof(sBuf), "[%s] %d.%03d sec: [%d %d %s/%d %d] %s\n",
 			sTimeBuf, pRes->m_iQueryTime/1000, pRes->m_iQueryTime%1000,
-			iOffset, iLimit, sModes [ tQuery.m_eMode ], tQuery.m_eSort, tQuery.m_iGroups,
+			iOffset, iLimit, sModes [ tQuery.m_eMode ], tQuery.m_eSort,
 			pRes->m_iTotalMatches, tQuery.m_sQuery.cstr() );
 
 		sphLockEx ( g_iQueryLogFile );
@@ -2039,26 +2188,78 @@ void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
 	// serve the response
 	//////////////////////
 
+	// calc response length
+	int iRespLen = 20; // header
+
+	if ( iVer>=0x102 ) // schema
+	{
+		iRespLen += 8; // 4 for field count, 4 for attr count
+		ARRAY_FOREACH ( i, pRes->m_tSchema.m_dFields )
+			iRespLen += 4 + strlen ( pRes->m_tSchema.m_dFields[i].m_sName.cstr() ); // namelen, name
+		ARRAY_FOREACH ( i, pRes->m_tSchema.m_dAttrs )
+			iRespLen += 8 + strlen ( pRes->m_tSchema.m_dFields[i].m_sName.cstr() ); // namelen, name, type
+	}
+
 	int iCount = Max ( Min ( iLimit, pRes->m_dMatches.GetLength()-iOffset ), 0 );
+	if ( iVer<=0x101 )
+		iRespLen += 16*iCount; // matches
+	else
+		iRespLen += ( 8+4*pRes->m_tSchema.m_dAttrs.GetLength() )*iCount; // matches
 
-	int iRespLen = 20 + 16*iCount;
-	for ( int i=0; i<pRes->m_iNumWords; i++ )
-		iRespLen += 12 + strlen ( pRes->m_tWordStats[i].m_sWord.cstr() );
+	for ( int i=0; i<pRes->m_iNumWords; i++ ) // per-word stats
+		iRespLen += 12 + strlen ( pRes->m_tWordStats[i].m_sWord.cstr() ); // wordlen, word, docs, hits
 
-	// create buffer, send header
+
+	// send header
 	NetOutputBuffer_c tOut ( iSock );
 	tOut.SendWord ( SEARCHD_OK );
 	tOut.SendWord ( VER_COMMAND_SEARCH );
 	tOut.SendInt ( iRespLen );
 
-	// matches
+	// send schema
+	if ( iVer>=0x102 )
+	{
+		tOut.SendInt ( pRes->m_tSchema.m_dFields.GetLength() );
+		ARRAY_FOREACH ( i, pRes->m_tSchema.m_dFields )
+			tOut.SendString ( pRes->m_tSchema.m_dFields[i].m_sName.cstr() );
+
+		tOut.SendInt ( pRes->m_tSchema.m_dAttrs.GetLength() );
+		ARRAY_FOREACH ( i, pRes->m_tSchema.m_dAttrs )
+		{
+			tOut.SendString ( pRes->m_tSchema.m_dAttrs[i].m_sName.cstr() );
+			tOut.SendDword ( (DWORD)pRes->m_tSchema.m_dAttrs[i].m_eAttrType );
+		}
+	}
+
+	// send matches
+	int iGIDIndex = -1;
+	int iTSIndex = -1;
+	if ( iVer<=0x101 )
+		ARRAY_FOREACH ( i, pRes->m_tSchema.m_dAttrs )
+	{
+		if ( iTSIndex<0 && pRes->m_tSchema.m_dAttrs[i].m_eAttrType==SPH_ATTR_TIMESTAMP )
+			iTSIndex = i;
+		if ( iGIDIndex<0 && pRes->m_tSchema.m_dAttrs[i].m_eAttrType==SPH_ATTR_INTEGER )
+			iGIDIndex = i;
+	}
+
 	tOut.SendInt ( iCount );
 	for ( int i=0; i<iCount; i++ )
 	{
-		tOut.SendDword ( pRes->m_dMatches[iOffset+i].m_iDocID );
-		tOut.SendDword ( pRes->m_dMatches[iOffset+i].m_iGroupID );
-		tOut.SendDword ( pRes->m_dMatches[iOffset+i].m_iTimestamp );
-		tOut.SendInt ( pRes->m_dMatches[iOffset+i].m_iWeight );
+		const CSphMatch & tMatch = pRes->m_dMatches[iOffset+i];
+		tOut.SendDword ( tMatch.m_iDocID );
+		if ( iVer<=0x101 )
+		{
+			tOut.SendDword ( iGIDIndex>=0 ? tMatch.m_pAttrs[iGIDIndex] : 1 );
+			tOut.SendDword ( iTSIndex>=0 ? tMatch.m_pAttrs[iTSIndex] : 1 );
+			tOut.SendInt ( tMatch.m_iWeight );
+		} else
+		{
+			tOut.SendInt ( tMatch.m_iWeight );
+			assert ( tMatch.m_iAttrs==pRes->m_tSchema.m_dAttrs.GetLength() );
+			for ( int j=0; j<tMatch.m_iAttrs; j++ )
+				tOut.SendDword ( tMatch.m_pAttrs[j] );
+		}
 	}
 	tOut.SendInt ( pRes->m_dMatches.GetLength() );
 	tOut.SendInt ( pRes->m_iTotalMatches );
@@ -2514,7 +2715,7 @@ int main ( int argc, char **argv )
 			// check path
 			if ( !hIndex.Exists ( "path" ) )
 			{
-				sphWarning ( "key 'path' not found in index '%s' - NOT SERVING", sIndexName );
+				sphWarning ( "index '%s': key 'path' not found' - NOT SERVING", sIndexName );
 				continue;
 			}
 
@@ -2533,7 +2734,8 @@ int main ( int argc, char **argv )
 
 				} else
 				{
-					sphWarning ( "unknown charset type '%s' in index '%s' - NOT SERVING", hIndex["charset_type"].cstr() );
+					sphWarning ( "index '%s': unknown charset type '%s' - NOT SERVING",
+						sIndexName, hIndex["charset_type"].cstr() );
 					continue;
 				}
 			} else
@@ -2548,15 +2750,16 @@ int main ( int argc, char **argv )
 			{
 				iMorph = sphParseMorphology ( hIndex["morphology"], bUseUTF8 );
 				if ( iMorph==SPH_MORPH_UNKNOWN )
-					sphWarning ( "unknown morphology type '%s' ignored in index '%s'",
-						hIndex["morphology"].cstr(), sIndexName );
+					sphWarning ( "index '%s': unknown morphology type '%s' ignored",
+						sIndexName, hIndex["morphology"].cstr() );
 			}
 
 			// configure charset_table
 			if ( hIndex.Exists ( "charset_table" ) )
 				if ( !pTokenizer->SetCaseFolding ( hIndex["charset_table"].cstr() ) )
 			{
-				sphWarning ( "failed to parse 'charset_table' in index '%s' - NOT SERVING", sIndexName );
+				sphWarning ( "index '%s': failed to parse 'charset_table' - NOT SERVING",
+					sIndexName );
 				continue;
 			}
 
@@ -2566,7 +2769,15 @@ int main ( int argc, char **argv )
 
 			// create add this one to served hashes
 			ServedIndex_t tIdx;
+
 			tIdx.m_pIndex = sphCreateIndexPhrase ( hIndex["path"].cstr() );
+			tIdx.m_pSchema = tIdx.m_pIndex->LoadSchema();
+			if ( !tIdx.m_pSchema )
+			{
+				sphWarning ( "index '%s': failed to load schema - NOT SERVING", sIndexName );
+				continue;
+			}
+
 			tIdx.m_pDict = new CSphDict_CRC32 ( iMorph );
 			tIdx.m_pDict->LoadStopwords ( hIndex.Exists ( "stopwords" ) ? hIndex["stopwords"].cstr() : NULL, pTokenizer );
 			tIdx.m_pTokenizer = pTokenizer;
@@ -2582,14 +2793,14 @@ int main ( int argc, char **argv )
 				struct stat tStat;
 				if ( !stat ( sTmp, &tStat ) )
 				{
-					sphWarning ( "lock file '%s' for index '%s' exists - NOT SERVING", sIndexName, sTmp );
+					sphWarning ( "index '%s': lock file '%s' exists - NOT SERVING", sIndexName, sTmp );
 					continue;
 				}
 
 				// create lock file
 				FILE * fp = fopen ( sTmp, "w" );
 				if ( !fp )
-					sphFatal ( "unable to create lock file '%s' for index '%s'", sTmp, sIndexName );
+					sphFatal ( "index '%s': failed to create lock file '%s''", sIndexName, sTmp );
 				fprintf ( fp, "%d", getpid() );
 				fclose ( fp );
 
@@ -2686,7 +2897,7 @@ int main ( int argc, char **argv )
 		g_sPidFile = hSearchd["pid_file"].cstr();
 		FILE * fp = fopen ( g_sPidFile, "w" );
 		if ( !fp )
-			sphFatal ( "unable to write pid file '%s'", g_sPidFile );
+			sphFatal ( "failed to write pid file '%s'", g_sPidFile );
 		fprintf ( fp, "%d", getpid() );	
 		fclose ( fp );
 	}

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 495 - 255
src/sphinx.cpp


+ 174 - 66
src/sphinx.h

@@ -77,7 +77,7 @@
 #define SPH_MAX_QUERY_WORDS		10
 #define SPH_MAX_WORD_LEN		64
 #define SPH_MAX_FILENAME_LEN	512
-#define SPH_MAX_FIELD_COUNT		32
+#define SPH_MAX_FIELDS			32
 
 #define SPH_CLOG_BITS_DIR		10
 #define SPH_CLOG_BITS_PAGE		22
@@ -304,9 +304,46 @@ struct CSphWordHit
 /// document info
 struct CSphDocInfo
 {
-	DWORD	m_iDocID;		///< document ID
-	DWORD	m_iGroupID;		///< documents group ID
-	DWORD	m_iTimestamp;	///< document timestamp
+	DWORD		m_iDocID;	///< document ID
+	int			m_iAttrs;	///< attribute count (FIXME! invariant over index; stored for assignment operator)
+	DWORD *		m_pAttrs;	///< attribute values
+
+	/// ctor. clears everything
+	CSphDocInfo ()
+		: m_iDocID ( 0 )
+		, m_iAttrs ( 0 )
+		, m_pAttrs ( NULL )
+	{
+	}
+
+	/// dtor. frees everything
+	~CSphDocInfo ()
+	{
+		SafeDeleteArray ( m_pAttrs );
+	}
+
+	/// reset
+	void Reset ()
+	{
+		m_iDocID = 0;
+		m_iAttrs = 0;
+		SafeDeleteArray ( m_pAttrs );
+	}
+
+	/// assignment
+	/// !COMMIT remove me
+	const CSphDocInfo & operator = ( const CSphDocInfo & rhs )
+	{
+		SafeDeleteArray ( m_pAttrs );
+		m_iDocID = rhs.m_iDocID;
+		m_iAttrs = rhs.m_iAttrs;
+		if ( m_iAttrs )
+		{
+			m_pAttrs = new DWORD [ m_iAttrs ]; // !COMMIT pool these allocs
+			memcpy ( m_pAttrs, rhs.m_pAttrs, sizeof(DWORD)*m_iAttrs );
+		}
+		return *this;
+	}
 };
 
 
@@ -328,18 +365,61 @@ struct CSphSourceStats
 };
 
 
+/// known attribute types
+enum ESphAttrType
+{
+	SPH_ATTR_NONE		= 0,	///< not an attribute at all
+	SPH_ATTR_INTEGER	= 1,	///< this attr is just an integer
+	SPH_ATTR_TIMESTAMP	= 2		///< this attr is a timestamp
+};
+
+
+/// source column info
+struct CSphColumnInfo
+{
+	CSphString		m_sName;		///< column name
+	ESphAttrType	m_eAttrType;	///< attribute type
+	int				m_iIndex;		///< index into the result set
+
+	/// handy ctor
+	CSphColumnInfo ( const char * sName=NULL, ESphAttrType eType=SPH_ATTR_NONE )
+		: m_sName ( sName )
+		, m_eAttrType ( eType )
+	{}
+};
+
+
+/// source schema
+struct CSphSchema
+{
+	CSphString									m_sName;		///< my human-readable name
+	CSphVector<CSphColumnInfo,SPH_MAX_FIELDS>	m_dFields;		///< my fulltext-searchable fields
+	CSphVector<CSphColumnInfo,8>				m_dAttrs;		///< my per-document attributes
+
+	/// ctor
+			CSphSchema ( const char * sName ) : m_sName ( sName ) {}
+
+	/// get attribute index by name
+	/// returns -1 if not found
+	int		GetAttrIndex ( const char * sName ) const;
+
+	/// checks if two schemas match
+	/// returns false and puts human-readable error message if they dont
+	bool	IsEqual ( const CSphSchema & rhs, CSphString & sError ) const;
+};
+
+
 /// generic data source
 class CSphHTMLStripper;
 class CSphSource
 {
 public:
 	CSphVector<CSphWordHit>				m_dHits;	///< current document split into words
-	CSphDocInfo							m_tDocInfo;
-
+	CSphDocInfo							m_tDocInfo;	///< current document info
 
 public:
 	/// ctor
-										CSphSource ();
+										CSphSource ( const char * sName );
 
 	/// dtor
 	virtual								~CSphSource ();
@@ -362,17 +442,16 @@ public:
 	/// get stats
 	virtual const CSphSourceStats &		GetStats ();
 
+	/// update field and attribute information
+	/// updates pInfo if it's empty; checks for match if it's not
+	/// must be called after Init()
+	virtual bool						UpdateSchema ( CSphSchema * pInfo );
+
 public:
 	/// document getter
 	/// to be implemented by descendants
 	virtual int							Next () = 0;
 
-	/// field count getter
-	/// to be implemented by descendants
-	/// MUST be called AFTER the indexing is over
-	/// because at indexing stage, we might not be sure of the exact count
-	virtual int							GetFieldCount () = 0;
-
 	/// post-index callback
 	/// gets called when the indexing is succesfully (!) over
 	virtual void						PostIndex () {}
@@ -380,7 +459,10 @@ public:
 protected:
 	ISphTokenizer *						m_pTokenizer;	///< my tokenizer
 	CSphDict *							m_pDict;		///< my dict
+	
 	CSphSourceStats						m_tStats;		///< my stats
+	CSphSchema							m_tSchema;		///< my schema
+
 	bool								m_bStripHTML;	///< whether to strip HTML
 	CSphHTMLStripper *					m_pStripper;	///< my HTML stripper
 };
@@ -391,7 +473,7 @@ protected:
 struct CSphSource_Document : CSphSource
 {
 	/// ctor
-							CSphSource_Document () : m_bCallWordCallback ( false ) {}
+							CSphSource_Document ( const char * sName ) : CSphSource ( sName ), m_bCallWordCallback ( false ) {}
 
 	/// my generic tokenizer
 	virtual int				Next ();
@@ -403,14 +485,7 @@ struct CSphSource_Document : CSphSource
 	/// to be implemented by descendants
 	virtual BYTE **			NextDocument () = 0;
 
-	/// field count getter
-	virtual int				GetFieldCount ();
-
 protected:
-	/// my field count
-	/// MUST be filled by NextDocument ()
-	int						m_iFieldCount;
-
 	/// whether to call the callback
 	bool					m_bCallWordCallback;
 };
@@ -420,8 +495,9 @@ protected:
 /// one-field plain-text documents
 struct CSphSource_Text : CSphSource_Document
 {
-					CSphSource_Text () { m_iFieldCount = 1; }
+					CSphSource_Text ( const char * sName ) : CSphSource_Document ( sName ) {};
 	BYTE **			NextDocument ();
+
 	virtual BYTE *	NextText () = 0;
 };
 
@@ -433,13 +509,12 @@ struct CSphSourceParams_PgSQL
 	// query params
 	CSphString	m_sQuery;
 	CSphString	m_sQueryRange;
-	CSphString	m_sGroupColumn;
-	CSphString	m_sDateColumn;
 	int			m_iRangeStep;
 
-	CSphVector<CSphString,4>	m_dQueryPre;
-	CSphVector<CSphString,4>	m_dQueryPost;
-	CSphVector<CSphString,4>	m_dQueryPostIndex;
+	CSphVector<CSphString,4>		m_dQueryPre;
+	CSphVector<CSphString,4>		m_dQueryPost;
+	CSphVector<CSphString,4>		m_dQueryPostIndex;
+	CSphVector<CSphColumnInfo,4>	m_dAttrs;
 
 	// connection params
 	CSphString	m_sHost;
@@ -458,7 +533,7 @@ struct CSphSourceParams_PgSQL
 /// multi-field plain-text documents fetched from given query
 struct CSphSource_PgSQL : CSphSource_Document
 {
-						CSphSource_PgSQL ();
+						CSphSource_PgSQL ( const char * sName );
 	virtual				~CSphSource_PgSQL () {}
 
 	bool				Init ( const CSphSourceParams_PgSQL & pParams );
@@ -470,14 +545,10 @@ protected:
 	PGconn *			m_tSqlDriver;	///< postgresql connection context
 	CSphString			m_sSqlDSN;
 
-	int					m_iGroupColumn;	///< group_id column number
-	int					m_iDateColumn;	///< date column number
-
 	int					m_iSqlRows;		///< how much rows last step returned
 	int					m_iSqlRow;		///< current row (0 based, as in PQgetvalue)
 
-	BYTE *				m_dFields [ SPH_MAX_FIELD_COUNT ];
-	int					m_dRemapFields [ SPH_MAX_FIELD_COUNT ];
+	BYTE *				m_dFields [ SPH_MAX_FIELDS ];
 
 	int					m_iMinID;		///< grand min ID
 	int					m_iMaxID;		///< grand max ID
@@ -493,7 +564,6 @@ protected:
 
 protected:
 	bool				RunQueryStep ();
-	int					GetColumnIndex ( const char * sColumn );
 };
 #endif
 
@@ -505,13 +575,12 @@ struct CSphSourceParams_MySQL
 	// query params
 	CSphString	m_sQuery;
 	CSphString	m_sQueryRange;
-	CSphString	m_sGroupColumn;
-	CSphString	m_sDateColumn;
 	int			m_iRangeStep;
 
-	CSphVector<CSphString,4>	m_dQueryPre;
-	CSphVector<CSphString,4>	m_dQueryPost;
-	CSphVector<CSphString,4>	m_dQueryPostIndex;
+	CSphVector<CSphString,4>		m_dQueryPre;
+	CSphVector<CSphString,4>		m_dQueryPost;
+	CSphVector<CSphString,4>		m_dQueryPostIndex;
+	CSphVector<CSphColumnInfo,4>	m_dAttrs;
 
 	// connection params
 	CSphString	m_sHost;
@@ -530,7 +599,7 @@ struct CSphSourceParams_MySQL
 /// multi-field plain-text documents fetched from given query
 struct CSphSource_MySQL : CSphSource_Document
 {
-						CSphSource_MySQL ();
+						CSphSource_MySQL ( const char * sName );
 	virtual				~CSphSource_MySQL () {}
 
 	bool				Init ( const CSphSourceParams_MySQL & tParams );
@@ -543,11 +612,7 @@ protected:
 	MYSQL				m_tSqlDriver;
 	CSphString			m_sSqlDSN;
 
-	int					m_iGroupColumn;	///< group_id column number
-	int					m_iDateColumn;	///< date column number
-
-	BYTE *				m_dFields [ SPH_MAX_FIELD_COUNT ];
-	int					m_dRemapFields [ SPH_MAX_FIELD_COUNT ];
+	BYTE *				m_dFields [ SPH_MAX_FIELDS ];
 
 	int					m_iMinID;		///< grand min ID
 	int					m_iMaxID;		///< grand max ID
@@ -563,7 +628,6 @@ protected:
 
 protected:
 	bool				RunQueryStep ();
-	int					GetColumnIndex ( const char * sColumn );
 };
 #endif
 
@@ -573,7 +637,7 @@ class CSphSource_XMLPipe : public CSphSource
 {
 public:
 	/// ctor
-					CSphSource_XMLPipe ();
+					CSphSource_XMLPipe ( const char * sName );
 
 	/// dtor
 					~CSphSource_XMLPipe ();
@@ -584,9 +648,6 @@ public:
 	/// hit chunk getter
 	virtual int		Next ();
 
-	/// field count getter
-	virtual int		GetFieldCount ();
-
 private:
 	enum Tag_e
 	{
@@ -663,22 +724,39 @@ private:
 /// search query match
 struct CSphMatch : public CSphDocInfo
 {
-	int			m_iWeight;
+	int m_iWeight;
 
-	bool		operator == ( const CSphMatch & rhs ) const
+	CSphMatch ()
+		: m_iWeight ( 0 )
+	{
+	}
+
+	CSphMatch ( const CSphMatch & rhs )
+	{
+		*this = rhs;
+	}
+
+	bool operator == ( const CSphMatch & rhs ) const
 	{
 		return ( m_iDocID==rhs.m_iDocID );
 	}
+
+	const CSphMatch & operator = ( const CSphMatch & rhs )
+	{
+		CSphDocInfo::operator = ( rhs );
+		m_iWeight = rhs.m_iWeight;
+		return *this;
+	}
 };
 
 
 /// search query sorting orders
 enum ESphSortOrder
 {
-	SPH_SORT_RELEVANCE = 0,		///< sort by document relevance desc, then by date
-	SPH_SORT_DATE_DESC,			///< sort by document date desc, then by relevance desc
-	SPH_SORT_DATE_ASC,			///< sort by document date asc, then by relevance desc
-	SPH_SORT_TIME_SEGMENTS,		///< sort by time segments (hour/day/week/etc) desc, then by relevance desc
+	SPH_SORT_RELEVANCE		= 0,	///< sort by document relevance desc, then by date
+	SPH_SORT_ATTR_DESC		= 1,	///< sort by document date desc, then by relevance desc
+	SPH_SORT_ATTR_ASC		= 2,	///< sort by document date asc, then by relevance desc
+	SPH_SORT_TIME_SEGMENTS	= 3,	///< sort by time segments (hour/day/week/etc) desc, then by relevance desc
 
 	SPH_SORT_TOTAL
 };
@@ -696,6 +774,26 @@ enum ESphMatchMode
 };
 
 
+/// search query filter
+class CSphFilter
+{
+public:
+	CSphString		m_sAttrName;	///< filtered attribute name
+	int				m_iAttrIndex;	///< filtered attribute index
+	DWORD			m_uMinValue;	///< min value, only used when m_iValues==0
+	DWORD			m_uMaxValue;	///< max value, only used when m_iValues==0
+	int				m_iValues;		///< values set size, default is 0
+	DWORD *			m_pValues;		///< values set. OWNED, WILL BE FREED IN DTOR.
+
+public:
+					CSphFilter ();
+					~CSphFilter ();
+	void			SortValues ();	///< sort values in ascending order
+
+	const CSphFilter & operator = ( const CSphFilter & rhs );
+};
+
+
 /// search query
 class CSphQuery
 {
@@ -704,22 +802,18 @@ public:
 	int *			m_pWeights;		///< user-supplied per-field weights. may be NULL. default is NULL. NOT OWNED, WILL NOT BE FREED in dtor.
 	int				m_iWeights;		///< number of user-supplied weights. missing fields will be assigned weight 1. default is 0
 	ESphMatchMode	m_eMode;		///< match mode. default is "match all"
-	DWORD *			m_pGroups;		///< groups to match. default is NULL, which means "match all". OWNED, WILL BE FREED in dtor.
-	int				m_iGroups;		///< count of groups to match
-	ESphSortOrder	m_eSort;		///< sorting order
+	ESphSortOrder	m_eSort;		///< sort mode
+	CSphString		m_sSortBy;		///< attribute to sort by
 	ISphTokenizer *	m_pTokenizer;	///< tokenizer to use. NOT OWNED.
 	int				m_iMaxMatches;	///< max matches to retrieve, default is 1000. more matches use more memory and CPU time to hold and sort them
 
 	DWORD			m_iMinID;		///< min ID to match, 0 by default
 	DWORD			m_iMaxID;		///< max ID to match, UINT_MAX by default
-	DWORD			m_iMinTS;		///< min timestamp to match, 0 by default
-	DWORD			m_iMaxTS;		///< max timestamp to match, UINT_MAX by default
-	DWORD			m_iMinGID;		///< min timestamp to match, 0 by default
-	DWORD			m_iMaxGID;		///< max timestamp to match, UINT_MAX by default
+
+	CSphVector<CSphFilter,8>	m_dFilters;	///< filters
 
 public:
 					CSphQuery ();	///< ctor, fills defaults
-					~CSphQuery ();	///< dtor, safely frees owned fields
 };
 
 
@@ -739,6 +833,8 @@ public:
 	CSphVector<CSphMatch>	m_dMatches;			///< top matching documents, no more than MAX_MATCHES
 	int						m_iTotalMatches;	///< total matches count
 
+	CSphSchema				m_tSchema;			///< index schema
+
 public:
 							CSphQueryResult ();		///< ctor
 	virtual					~CSphQueryResult ();	///< dtor, which releases all owned stuff
@@ -773,6 +869,15 @@ struct CSphIndexProgress
 typedef ISphQueue<CSphMatch>	ISphMatchQueue;
 
 
+/// available docinfo storage strategies
+enum ESphDocinfo
+{
+	SPH_DOCINFO_NONE		= 0,	///< no docinfo available
+	SPH_DOCINFO_INLINE		= 1,	///< inline docinfo into index (specifically, into doclists)
+	SPH_DOCINFO_EXTERN		= 2		///< store docinfo separately
+};
+
+
 /// generic fulltext index interface
 class CSphIndex
 {
@@ -780,17 +885,20 @@ public:
 	typedef void ProgressCallback_t ( const CSphIndexProgress * pStat );
 
 public:
-								CSphIndex() : m_pProgress ( NULL ) {}
+								CSphIndex ( const char * sName ) : m_pProgress ( NULL ), m_tSchema ( sName ) {}
 	virtual						~CSphIndex () {}
 	virtual	void				SetProgressCallback ( ProgressCallback_t * pfnProgress ) { m_pProgress = pfnProgress; }
 
 public:
-	virtual int					Build ( CSphDict * dict, const CSphVector < CSphSource * > & dSources, int iMemoryLimit ) = 0;
+	virtual int					Build ( CSphDict * dict, const CSphVector < CSphSource * > & dSources, int iMemoryLimit, ESphDocinfo eDocinfo ) = 0;
 	virtual CSphQueryResult *	Query ( CSphDict * dict, CSphQuery * pQuery ) = 0;
 	virtual bool				QueryEx ( CSphDict * dict, CSphQuery * pQuery, CSphQueryResult * pResult, ISphMatchQueue * pTop ) = 0;
 
+	virtual const CSphSchema *	LoadSchema () = 0;
+
 protected:
 	ProgressCallback_t *		m_pProgress;
+	CSphSchema					m_tSchema;
 };
 
 /////////////////////////////////////////////////////////////////////////////

+ 8 - 6
src/sphinxstd.h

@@ -169,7 +169,7 @@ public:
 	}
 
 	/// query current length
-	int GetLength () const
+	inline int GetLength () const
 	{
 		return m_iLength;
 	}
@@ -593,11 +593,6 @@ public:
 		return m_sValue;
 	}
 
-	char * str ()
-	{
-		return m_sValue;
-	}
-
 	bool operator == ( const CSphString & t ) const
 	{
 		return strcmp ( m_sValue, t.m_sValue )==0;
@@ -768,6 +763,13 @@ public:
 
 	/// get current root
 	virtual const T &	Root () const = 0;
+
+public:
+	/// set attr to sort by
+	virtual void		SetAttr ( int ) {};
+
+	/// get attr to sort by
+	virtual int			GetAttr () { return 0; }
 };
 
 #endif // _sphinxstd_

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä