ソースを参照

optimized single query vs. many local indexes vs. huge max-matches (reusing the sorter now)
added --limit to python test



git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@1136 406a0c4d-033a-0410-8de8-e80135713968

shodan 18 年 前
コミット
a145d52850
3 ファイル変更59 行追加21 行削除
  1. 8 1
      api/test.py
  2. 49 18
      src/searchd.cpp
  3. 2 2
      src/sphinxstd.h

+ 8 - 1
api/test.py

@@ -18,7 +18,8 @@ if not sys.argv[1:]:
 	print "-f, --filter <ATTR>\tfilter by attribute 'ATTR' (default is 'group_id')"
 	print "-v, --value <VAL>\tadd VAL to allowed 'group_id' values list"
 	print "-g, --groupby <EXPR>\tgroup matches by 'EXPR'"
-	print "-gs, --groupsort <EXPR>\tsort groups by 'EXPR'"
+	print "-gs,--groupsort <EXPR>\tsort groups by 'EXPR'"
+	print "-l, --limit <COUNT>\tretrieve COUNT matches (default is 20)"
 	sys.exit(0)
 
 q = ''
@@ -31,6 +32,7 @@ filtervals = []
 sortby = ''
 groupby = ''
 groupsort = '@group desc'
+limit = 0
 
 i = 1
 while (i<len(sys.argv)):
@@ -65,6 +67,9 @@ while (i<len(sys.argv)):
 	elif arg=='-gs' or arg=='--groupsort':
 		i += 1
 		groupsort = sys.argv[i]
+	elif arg=='-l' or arg=='--limit':
+		i += 1
+		limit = int(sys.argv[i])
 	else:
 		q = '%s%s ' % ( q, arg )
 	i += 1
@@ -80,6 +85,8 @@ if groupby:
 	cl.SetGroupBy ( groupby, SPH_GROUPBY_ATTR, groupsort )
 if sortby:
 	cl.SetSortMode ( SPH_SORT_EXTENDED, sortby )
+if limit:
+	cl.SetLimits ( 0, limit, max(limit,1000) )
 res = cl.Query ( q, index )
 
 if not res:

+ 49 - 18
src/searchd.cpp

@@ -48,11 +48,6 @@
 	#include <sys/wait.h>
 	#include <netdb.h>
 
-	// for cache
-	#include <zlib.h>
-	#include <sys/mman.h>
-	#include <md5.h>
-
 	// there's no MSG_NOSIGNAL on OS X
 	#ifndef MSG_NOSIGNAL
 	#define MSG_NOSIGNAL 0
@@ -2040,7 +2035,7 @@ bool SearchReplyParser_t::ParseReply ( MemInputBuffer_c & tReq, Agent_t & tAgent
 					else if ( tAttr.m_eAttrType == SPH_ATTR_FLOAT )
 					{
 						float fRes = tReq.GetFloat();
-						tMatch.SetAttr ( tAttr.m_iBitOffset, tAttr.m_iBitCount, *(DWORD*)&fRes  );
+						tMatch.SetAttr ( tAttr.m_iBitOffset, tAttr.m_iBitCount, sphF2DW(fRes) );
 					}
 					else
 					{
@@ -3096,6 +3091,30 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 				dLocal.Add ( pDist->m_dLocal[i] );
 	}
 
+	/////////////////////////////////////////////////////
+	// optimize single-query, same-schema local searches
+	/////////////////////////////////////////////////////
+
+	ISphMatchSorter * pLocalSorter = NULL;
+	while ( iStart==iEnd && dLocal.GetLength()>1 )
+	{
+		CSphString sError;
+
+		// check if all schemas are equal
+		bool bAllEqual = true;
+		const CSphSchema * pFirstSchema = g_hIndexes [ dLocal[0] ].m_pSchema;
+		for ( int i=1; i<dLocal.GetLength() && bAllEqual; i++ )
+		{
+			if ( pFirstSchema->CompareTo ( *g_hIndexes [ dLocal[i] ].m_pSchema, sError )!=SPH_SCHEMAS_EQUAL )
+				bAllEqual = false;
+		}
+
+		// we can reuse the very same sorter
+		if ( FixupQuery ( &m_dQueries[iStart], pFirstSchema, "local-sorter", sError ) )
+			pLocalSorter = sphCreateQueue ( &m_dQueries[iStart], *pFirstSchema, sError );
+		break;
+	}
+
 	///////////////////////////////////////////////////////////
 	// main query loop (with multiple retries for distributed)
 	///////////////////////////////////////////////////////////
@@ -3142,6 +3161,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 			}
 
 			tmQuery = -sphLongTimer ();
+
 			ARRAY_FOREACH ( iLocal, dLocal )
 			{
 				const ServedIndex_t & tServed = g_hIndexes [ dLocal[iLocal] ];
@@ -3239,19 +3259,24 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 						CSphQuery & tQuery = m_dQueries[iQuery];
 						CSphString sError;
 
-						// fixup old queries
-						if ( !FixupQuery ( &tQuery, tServed.m_pSchema, dLocal[iLocal].cstr(), sError ) )
+						// create sorter, if needed
+						ISphMatchSorter * pSorter = pLocalSorter;
+						if ( !pLocalSorter )
 						{
-							m_dFailuresSet[iQuery].SubmitEx ( dLocal[iLocal].cstr(), "%s", sError.cstr() );
-							continue;
-						}
+							// fixup old queries
+							if ( !FixupQuery ( &tQuery, tServed.m_pSchema, dLocal[iLocal].cstr(), sError ) )
+							{
+								m_dFailuresSet[iQuery].SubmitEx ( dLocal[iLocal].cstr(), "%s", sError.cstr() );
+								continue;
+							}
 
-						// create queue
-						ISphMatchSorter * pSorter = sphCreateQueue ( &tQuery, *tServed.m_pSchema, sError );
-						if ( !pSorter )
-						{
-							m_dFailuresSet[iQuery].SubmitEx ( dLocal[iLocal].cstr(), "%s", sError.cstr() );
-							continue;
+							// create queue
+							pSorter = sphCreateQueue ( &tQuery, *tServed.m_pSchema, sError );
+							if ( !pSorter )
+							{
+								m_dFailuresSet[iQuery].SubmitEx ( dLocal[iLocal].cstr(), "%s", sError.cstr() );
+								continue;
+							}
 						}
 
 						// do query
@@ -3270,7 +3295,10 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 							m_dTag2MVA.Add ( tRes.m_pMva );
 							sphFlattenQueue ( pSorter, &tRes, m_iTag++ );
 						}
-						SafeDelete ( pSorter );
+
+						// throw away the sorter
+						if ( !pLocalSorter )
+							SafeDelete ( pSorter );
 					}
 				}
 			}
@@ -3393,6 +3421,9 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 
 	assert ( m_iTag==m_dTag2MVA.GetLength() );
 
+	// cleanup
+	SafeDelete ( pLocalSorter );
+
 	/////////////////////
 	// merge all results
 	/////////////////////

+ 2 - 2
src/sphinxstd.h

@@ -175,10 +175,10 @@ inline int		sphLog2 ( uint64_t iValue )
 }
 
 /// float vs dword conversion
-inline DWORD sphF2DW ( float f )	{ return *(DWORD *)&f; }
+inline DWORD sphF2DW ( float f )	{ union { float f; DWORD d; } u; u.f = f; return u.d; }
 
 /// dword vs float conversion
-inline float sphDW2F ( DWORD d )	{ return *(float *)&d; }
+inline float sphDW2F ( DWORD d )	{ union { float f; DWORD d; } u; u.d = d; return u.f; }
 
 /////////////////////////////////////////////////////////////////////////////
 // DEBUGGING