Browse Source

fixed crash at grouper and ranker on using packed factors with multiple values per match (mva or json.array); fixed Github #1444; added case to test 315

Stas Klinov 2 years ago
parent
commit
801d77eaf5
4 changed files with 86 additions and 19 deletions
  1. 9 0
      src/sphinxsearch.cpp
  2. 31 18
      src/sphinxsort.cpp
  3. 0 0
      test/test_315/model.bin
  4. 46 1
      test/test_315/test.xml

+ 9 - 0
src/sphinxsearch.cpp

@@ -2079,6 +2079,7 @@ bool FactorPool_c::FlushEntry ( SphFactorHashEntry_t * pEntry )
 
 void FactorPool_c::Flush()
 {
+	[[maybe_unused]] int iUsed = 0;
 	ARRAY_FOREACH ( i, m_dHash )
 	{
 		SphFactorHashEntry_t * pEntry = m_dHash[i];
@@ -2086,12 +2087,20 @@ void FactorPool_c::Flush()
 		{
 			SphFactorHashEntry_t * pNext = pEntry->m_pNext;
 			bool bHead = !pEntry->m_pPrev;
+
+#ifndef NDEBUG
+			if ( pEntry->m_iRefCount )
+				iUsed++;
+#endif
+
 			if ( FlushEntry(pEntry) && bHead )
 				m_dHash[i] = pNext;
 
 			pEntry = pNext;
 		}
 	}
+
+	assert ( !m_dHash.GetLength() || iUsed+MAX_BLOCK_DOCS<=m_dHash.GetLength() );
 }
 
 

+ 31 - 18
src/sphinxsort.cpp

@@ -3012,9 +3012,9 @@ public:
 		, m_hGroup2Match ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
 	{}
 
-	bool	Push ( const CSphMatch & tEntry ) override						{ return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false ); }
+	bool	Push ( const CSphMatch & tEntry ) override						{ return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
 	void	Push ( const VecTraits_T<const CSphMatch> & dMatches ) override	{ assert ( 0 && "Not supported in grouping"); }
-	bool	PushGrouped ( const CSphMatch & tEntry, bool ) override			{ return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), false ); }
+	bool	PushGrouped ( const CSphMatch & tEntry, bool ) override			{ return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), false, false, true, nullptr ); }
 	ISphMatchSorter * Clone() const override								{ return this->template CloneSorterT<MYTYPE>(); }
 
 	/// store all entries into specified location in sorted order, and remove them from queue
@@ -3184,12 +3184,15 @@ protected:
 
 	/// add entry to the queue
 	template <bool GROUPED>
-	FORCE_INLINE bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool, SphAttr_t * pAttr=nullptr )
+	FORCE_INLINE bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, [[maybe_unused]] bool bNewSet, [[maybe_unused]] bool bTailFinalized, bool bClearNotify, SphAttr_t * pAttr )
 	{
 		if constexpr ( NOTIFICATIONS )
 		{
-			m_tJustPushed = RowTagged_t();
-			this->m_dJustPopped.Resize ( 0 );
+			if ( bClearNotify )
+			{
+				m_tJustPushed = RowTagged_t();
+				this->m_dJustPopped.Resize ( 0 );
+			}
 		}
 		auto & tLocCount = m_tLocCount;
 
@@ -3523,9 +3526,9 @@ public:
 	inline void SetGLimit ( int iGLimit )	{ m_iGLimit = Min ( iGLimit, m_iLimit ); }
 	int GetLength() override				{ return Min ( m_iUsed, m_iLimit );	}
 
-	bool	Push ( const CSphMatch & tEntry ) override						{ return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false ); }
+	bool	Push ( const CSphMatch & tEntry ) override						{ return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
 	void	Push ( const VecTraits_T<const CSphMatch> & dMatches ) final	{ assert ( 0 && "Not supported in grouping"); }
-	bool	PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override	{ return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), bNewSet ); }
+	bool	PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override	{ return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), bNewSet, false, true, nullptr ); }
 
 	/// store all entries into specified location in sorted order, and remove them from queue
 	int Flatten ( CSphMatch * pTo ) override
@@ -3669,9 +3672,9 @@ public:
 			// have to set bNewSet to true
 			// as need to fallthrough at PushAlreadyHashed and update count and aggregates values for head match
 			// even uGroupKey match already exists
-			dRhs.template PushEx<true> ( m_dData[iHead], uGroupKey, true, true );
+			dRhs.template PushEx<true> ( m_dData[iHead], uGroupKey, true, true, true, nullptr );
 			for ( int i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
-				dRhs.template PushEx<false> ( m_dData[i], uGroupKey, false, true );
+				dRhs.template PushEx<false> ( m_dData[i], uGroupKey, false, true, true, nullptr );
 
 			DeleteChain ( iHead, false );
 		}
@@ -3710,7 +3713,7 @@ protected:
 	 * It hold all calculated stuff from aggregates/group_concat until finalization.
 	 */
 	template <bool GROUPED>
-	bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized=false )
+	bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized, bool bClearNotify, [[maybe_unused]] SphAttr_t * pAttr )
 	{
 
 #ifndef NDEBUG
@@ -3721,8 +3724,11 @@ protected:
 
 		if constexpr ( NOTIFICATIONS )
 		{
-			m_tJustPushed = RowTagged_t();
-			this->m_dJustPopped.Resize ( 0 );
+			if ( bClearNotify )
+			{
+				m_tJustPushed = RowTagged_t();
+				this->m_dJustPopped.Resize ( 0 );
+			}
 		}
 
 		this->m_bFinalized = false;
@@ -4238,15 +4244,19 @@ public:
 		this->m_pGrouper->MultipleKeysFromMatch ( tMatch, m_dKeys );
 
 		bool bRes = false;
-		for ( auto i : m_dKeys )
-			bRes |= BASE::template PushEx<false> ( tMatch, i, false );
+		ARRAY_FOREACH ( i, m_dKeys )
+		{
+			SphGroupKey_t tKey = m_dKeys[i];
+			// need to clear notifications once per match - not for every pushed value
+			bRes |= BASE::template PushEx<false> ( tMatch, tKey, false, false, ( i==0 ), nullptr );
+		}
 
 		return bRes;
 	}
 
 	bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
 	{
-		return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet );
+		return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
 	}
 
 private:
@@ -4308,7 +4318,7 @@ public:
 	bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
 	{
 		// re-group it based on the group key
-		return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet );
+		return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
 	}
 
 	ISphMatchSorter * Clone () const final
@@ -4324,10 +4334,13 @@ private:
 		auto iValue = (int64_t)uGroupKey;
 		CSphGrouper * pGrouper = this->m_pGrouper;
 		const BYTE * pBlobPool = ((CSphGrouperJsonField*)pGrouper)->GetBlobPool();
+		bool bClearNotify = true;
 
-		return PushJsonField ( iValue, pBlobPool, [this, &tMatch]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey )
+		return PushJsonField ( iValue, pBlobPool, [this, &tMatch, &bClearNotify]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey )
 			{
-				return BASE::template PushEx<false> ( tMatch, uMatchGroupKey, false, pAttr );
+				bool bPushed = BASE::template PushEx<false> ( tMatch, uMatchGroupKey, false, false, bClearNotify, pAttr );
+				bClearNotify = false; // need to clear notifications once per match - not for every pushed value
+				return bPushed;
 			}
 		);
 	}

File diff suppressed because it is too large
+ 0 - 0
test/test_315/model.bin


+ 46 - 1
test/test_315/test.xml

@@ -40,6 +40,22 @@ index rt_products
 	dict			= keywords
 }
 
+source srctest1
+{
+	type = mysql
+	<sql_settings/>
+	sql_query = SELECT id, text, jattr, mattr FROM test_table1
+
+	sql_attr_json = jattr
+    sql_attr_multi = uint mattr from field
+}
+
+index test1
+{
+	source				= srctest1
+	path				= <data_path/>/test1
+}
+
 </config>
 
 <db_create>
@@ -64,6 +80,29 @@ INSERT INTO test_table (best_seller, attributes_id, text) VALUES
 ( 4, 2, 'text4 text' );
 </db_insert>
 
+<db_create>
+CREATE TABLE test_table1
+(
+	id integer primary key not null,
+	text varchar(256),
+    jattr varchar(256),
+    mattr varchar(256)
+);
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS test_table1;
+</db_drop>
+
+<db_insert>
+INSERT INTO test_table1 (id, text, jattr, mattr) VALUES
+( 1, 'List of HP1', '{\"a\":[101, 102, 103, 103]}', '101, 102, 103, 103'),
+( 2, 'List of HP2', '{\"a\":[201, 202, 203, 203]}', '201, 202, 203, 203'),
+( 3, 'List of HP3', '{\"a\":[301, 302, 303, 303]}', '301, 302, 303, 303'),
+( 4, 'List of HP4', '{\"a\":[401, 402, 403, 403]}', '401, 402, 403, 403'),
+( 5, 'List of HP5', '{\"a\":[501, 502, 503, 503]}', '501, 502, 503, 503');
+</db_insert>
+
 <queries><sphinxql>
 INSERT INTO rt_products (id, best_seller, attributes_id, title) VALUES (1, 1, 1, 'text1 text'),(2, 2, 1, 'text2 text'), (3, 3, 2, 'text3 text'), (4, 4, 2, 'text4 text');
 select *, to_string(id) i from rt_products order by best_seller asc; select *, to_string(id) i from rt_products order by best_seller1 asc; select *, to_string(id) i from rt_products order by attributes_id asc;
@@ -78,7 +117,13 @@ select 1 i, interval(attributes_id, 2) p, count(*) c from products group by i; s
 select 1 i, interval(attributes_id, 2) p, count(*) c from products group by p; select 1 i, interval(attributes_id, 2) p, count(*) c from products group by i;
 select 1 i, interval(attributes_id, 2) p, count(*) c from products where match('text') group by i; select 1 i, interval(attributes_id, 2) p, count(*) c from products where match('text') group by p;
 select 1 i, interval(attributes_id, 2) p, count(*) c from products where match('text') group by p; select 1 i, interval(attributes_id, 2) p, count(*) c from products where match('text') group by i;
-</sphinxql></queries>
+</sphinxql>
+
+<!-- regression crash at grouper and ranker on missmatched match popup from multiple values per match -->
+<sphinxql>select id, packedfactors() from test1 where match('list') group by jattr.a limit 100 option ranker=expr('1'), max_matches=2, threads=1</sphinxql>
+<sphinxql>select id, packedfactors() from test1 where match('list') group by mattr limit 100 option ranker=expr('1'), max_matches=2, threads=1</sphinxql>
+
+</queries>
 
 
 </test>

Some files were not shown because too many files changed in this diff