Browse Source

Output sorted wordstat

Wordstats in 'show meta' adopt values as they arrived.
However with parallel processing this order is not stable and it breaks
tests.
So, let's always sort by word before output, it ensures strict order
each run.

As many tests has old 'unordered' order in models, they're regenerated
here.
Alexey N. Vinogradov 5 years ago
parent
commit
572a2681ea

+ 1 - 1
api/libsphinxclient/smoke_ref.txt

@@ -35,8 +35,8 @@ Matches:
 
 Query 'test number' retrieved 3 of 3 matches.
 Query stats:
-	'test' found 5 times in 3 documents
 	'number' found 3 times in 3 documents
+	'test' found 5 times in 3 documents
 
 Matches:
 1. doc_id=4, weight=101442, idd=4, group_id=2, tag=(7,40), tag2=(7,40), tag64=(7,40)

+ 16 - 17
src/searchd.cpp

@@ -3480,15 +3480,14 @@ void SendResult ( int iVer, ISphOutputBuffer & tOut, const AggrResult_t * pRes,
 			tOut.SendDword ( pRes->m_tStats.m_iSkips + pRes->m_iAgentFetchedSkips );
 	}
 
-	tOut.SendInt ( pRes->m_hWordStats.GetLength() );
-
-	pRes->m_hWordStats.IterateStart();
-	while ( pRes->m_hWordStats.IterateNext() )
-	{
-		const CSphQueryResultMeta::WordStat_t & tStat = pRes->m_hWordStats.IterateGet();
-		tOut.SendString ( pRes->m_hWordStats.IterateGetKey().cstr() );
-		tOut.SendAsDword ( tStat.first );
-		tOut.SendAsDword ( tStat.second );
+	auto dWords = pRes->MakeSortedWordStat ();
+	tOut.SendInt ( dWords.GetLength() );
+	for( auto * pWord : dWords )
+	{
+		assert ( pWord );
+		tOut.SendString ( pWord->first.cstr () );
+		tOut.SendAsDword ( pWord->second.first );
+		tOut.SendAsDword ( pWord->second.second );
 		if ( bAgentMode )
 			tOut.SendByte ( 0 ); // statistics have no expanded terms for now
 	}
@@ -8551,19 +8550,19 @@ void BuildMeta ( VectorLike & dStatus, const CSphQueryResultMeta & tMeta )
 	}
 
 
-	int iWord = 0;
-	for ( const auto& dWord : tMeta.m_hWordStats )
+	auto dWords = tMeta.MakeSortedWordStat();
+	ARRAY_CONSTFOREACH( iWord, dWords )
 	{
+		auto * pWord = dWords[iWord];
+		assert ( pWord );
 		if ( dStatus.MatchAddVa ( "keyword[%d]", iWord ) )
-			dStatus.Add ( dWord.first );
+			dStatus.Add ( pWord->first );
 
 		if ( dStatus.MatchAddVa ( "docs[%d]", iWord ) )
-			dStatus.Add().SetSprintf ( INT64_FMT, dWord.second.first );
+			dStatus.Add().SetSprintf ( INT64_FMT, pWord->second.first );
 
 		if ( dStatus.MatchAddVa ( "hits[%d]", iWord ) )
-			dStatus.Add().SetSprintf ( INT64_FMT, dWord.second.second );
-
-		++iWord;
+			dStatus.Add().SetSprintf ( INT64_FMT, pWord->second.second );
 	}
 }
 
@@ -12292,7 +12291,7 @@ void SendMysqlSelectResult ( RowBuffer_i & dRows, const AggrResult_t & tRes, boo
 						dRows.PutNULL();
 						break;
 					}
-			
+
 					// send string to client
 					JsonEscapedBuilder sTmp;
 					sphJsonFieldFormat ( sTmp, pField, eJson, false );

+ 13 - 0
src/sphinx.cpp

@@ -28063,6 +28063,19 @@ void CSphQueryResultMeta::AddStat ( const CSphString & sWord, int64_t iDocs, int
 	AddOtherStat ( m_hWordStats, sWord, iDocs, iHits );
 }
 
+///< sort wordstat to achieve reproducable result over different runs
+CSphFixedVector<SmallStringHash_T<CSphQueryResultMeta::WordStat_t>::KeyValue_t *> CSphQueryResultMeta::MakeSortedWordStat () const
+{
+	using kv_t = SmallStringHash_T<WordStat_t>::KeyValue_t;
+	CSphFixedVector<kv_t*> dWords { m_hWordStats.GetLength() };
+
+	int i = 0;
+	for ( auto & tStat : m_hWordStats )
+		dWords[i++] = &tStat;
+
+	dWords.Sort ( Lesser ( [] ( kv_t * l, kv_t * r ) { return l->first<r->first; } ) );
+	return dWords;
+}
 
 //////////////////////////////////////////////////////////////////////////
 

+ 5 - 2
src/sphinx.h

@@ -2700,8 +2700,11 @@ public:
 
 	virtual					~CSphQueryResultMeta () {}					///< dtor
 	void					AddStat ( const CSphString & sWord, int64_t iDocs, int64_t iHits );
-	static void				AddOtherStat ( SmallStringHash_T<WordStat_t>& hTrg,
-			const CSphString & sWord, int64_t iDocs, int64_t iHits);
+
+	static void AddOtherStat ( SmallStringHash_T<WordStat_t> & hTrg, const CSphString & sWord, int64_t iDocs
+			, int64_t iHits );
+	// sort wordstat to achieve reproducable result over different runs
+	CSphFixedVector<SmallStringHash_T<CSphQueryResultMeta::WordStat_t>::KeyValue_t *>	MakeSortedWordStat () const;
 };
 
 

File diff suppressed because it is too large
+ 0 - 0
test/test_022/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_038/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_054/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_067/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_081/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_115/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_158/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_165/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_171/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_173/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_192/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_207/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_219/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_222/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_342/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_348/model.bin


File diff suppressed because it is too large
+ 4 - 8
test/test_408/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_409/model.bin


Some files were not shown because too many files changed in this diff