Browse Source

fix: changed filter/filter tree evaluation in joined queries

Ilya Kuznetsov 1 month ago
parent
commit
e6a939b604

+ 54 - 39
src/joinsorter.cpp

@@ -203,21 +203,27 @@ CSphVector<std::pair<int,bool>> FetchJoinRightTableFilters ( const CSphVector<CS
 }
 
 
-bool NeedToMoveMixedJoinFilters ( const CSphQuery & tQuery, const ISphSchema & tSchema )
+bool NeedPostJoinFilterEvaluation ( const CSphQuery & tQuery, const ISphSchema & tSchema )
 {
-	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( tQuery.m_dFilters, tSchema, tQuery.m_sJoinIdx.cstr() );
+	return NeedPostJoinFilterEvaluation ( tQuery.m_dFilters, tQuery.m_sJoinIdx, tQuery.m_dFilterTree.GetLength()>0, tQuery.m_eJoinType, tSchema );
+}
+
+
+bool NeedPostJoinFilterEvaluation ( const CSphVector<CSphFilterSettings> & dFilters, const CSphString & sJoinIdx, bool bFilterTree, JoinType_e eJoinType, const ISphSchema & tSchema )
+{
+	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( dFilters, tSchema, sJoinIdx.cstr() );
 	if ( !dRightFilters.GetLength() )
 		return false;
 
 	// move all filters to the left query in case of LEFT JOIN
 	// otherwise we can't distinguish between 'no match' and 'match with null part from right table'
-	if ( tQuery.m_eJoinType==JoinType_e::LEFT )
+	if ( eJoinType==JoinType_e::LEFT )
 		return true;
 
-	if ( !tQuery.m_dFilterTree.GetLength() )
-		return false;
+	if ( dRightFilters.GetLength()!=dFilters.GetLength() )
+		return bFilterTree;	// we don't (yet) support splitting the filter tree between left and right queries
 
-	return dRightFilters.GetLength()!=tQuery.m_dFilters.GetLength();
+	return false;
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -719,6 +725,7 @@ private:
 	void		SetupAggregates();
 	FORCE_INLINE uint64_t SetupJoinFilters ( const CSphMatch & tEntry );
 	bool		SetupRightFilters ( CSphString & sError );
+	bool		ValidateLeftTableNotPrefixedInFilters ( CSphString & sError );
 	bool		SetupOnFilters ( CSphString & sError );
 	bool		SetupOnValueFilters ( CSphString & sError );
 	void		SetupRightStandaloneLocators();
@@ -795,7 +802,7 @@ JoinSorter_c::JoinSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoined
 
 	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, tSorterSchema, m_tQuery.m_sJoinIdx.cstr() );
 	bool bDisableByImplicitGrouping = HasImplicitGrouping(m_tQuery) && m_tQuery.m_eJoinType!=JoinType_e::LEFT;
-	m_bFinalCalcOnly = !pIndex->IsRT() && !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedToMoveMixedJoinFilters ( m_tQuery, tSorterSchema ) && !pSorter->IsPrecalc() && !bDisableByImplicitGrouping;
+	m_bFinalCalcOnly = !pIndex->IsRT() && !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedPostJoinFilterEvaluation ( m_tQuery, tSorterSchema ) && !pSorter->IsPrecalc() && !bDisableByImplicitGrouping;
 	m_bErrorFlag = !SetupJoinQuery ( m_pSorter->GetSchema()->GetDynamicSize(), m_sErrorMessage );
 	if ( m_bFinalCalcOnly || !m_iBatchSize )
 		m_bCanBatch = false;
@@ -1642,14 +1649,8 @@ static void RemoveTableNamePrefix ( CSphString & sAttr, const CSphFilterSettings
 }
 
 
-bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
+bool JoinSorter_c::ValidateLeftTableNotPrefixedInFilters ( CSphString & sError )
 {
-	m_tJoinQuery.m_dFilters.Resize(0);
-
-	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, *m_pSorterSchema, GetJoinedIndexName().cstr() );
-	bool bLeftJoin = m_tQuery.m_eJoinType==JoinType_e::LEFT;
-	
-	// Validate: left table should not be prefixed in WHERE clause filters
 	CSphString sLeftTableName = m_pIndex->GetName();
 	CSphString sLeftPrefix;
 	sLeftPrefix.SetSprintf ( "%s.", sLeftTableName.cstr() );
@@ -1663,34 +1664,45 @@ bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
 			return false;
 		}
 	}
-	
-	if ( bLeftJoin || m_tQuery.m_dFilterTree.GetLength() )
-	{
-		if ( !dRightFilters.GetLength() )
-			return true;
 
-		if ( bLeftJoin || dRightFilters.GetLength()!=m_tQuery.m_dFilters.GetLength() )
-		{
-			CreateFilterContext_t tCtx;
-			tCtx.m_pFilters		= &m_tQuery.m_dFilters;
-			tCtx.m_pFilterTree	= &m_tQuery.m_dFilterTree;
-			tCtx.m_pMatchSchema	= m_pSorterSchema;
-			tCtx.m_pIndexSchema	= &m_pIndex->GetMatchSchema();
-			tCtx.m_bScan		= m_tQuery.m_sQuery.IsEmpty();
-			tCtx.m_sJoinIdx		= GetJoinedIndexName();
-			if ( !sphCreateFilters ( tCtx, sError, sError ) )
-			{
-				sError.SetSprintf ( "failed to create query filters: %s", sError.cstr() );
-				return false;
-			}
+	return true;
+}
 
-			m_tMixedFilter.SetFilter ( tCtx.m_pFilter );
-			return true;
+
+bool JoinSorter_c::SetupRightFilters ( CSphString & sError )
+{
+	m_tJoinQuery.m_dFilters.Resize(0);
+
+	if ( !ValidateLeftTableNotPrefixedInFilters(sError) )
+		return false;
+
+	if ( NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
+	{
+		// all filters stay in the left query; evaluated post-join
+		CreateFilterContext_t tCtx;
+		tCtx.m_pFilters		= &m_tQuery.m_dFilters;
+		tCtx.m_pFilterTree	= &m_tQuery.m_dFilterTree;
+		tCtx.m_pMatchSchema	= m_pSorterSchema;
+		tCtx.m_pIndexSchema	= &m_pIndex->GetMatchSchema();
+		tCtx.m_bScan		= m_tQuery.m_sQuery.IsEmpty();
+		tCtx.m_sJoinIdx		= GetJoinedIndexName();
+		tCtx.m_eJoinType	= m_tQuery.m_eJoinType;
+		if ( !sphCreateFilters ( tCtx, sError, sError ) )
+		{
+			sError.SetSprintf ( "failed to create query filters: %s", sError.cstr() );
+			return false;
 		}
 
-		m_tJoinQuery.m_dFilterTree = m_tQuery.m_dFilterTree;
+		m_tMixedFilter.SetFilter ( tCtx.m_pFilter );
+		return true;
 	}
 
+	// we have 2 options if we have a filter tree: either it is completely in the left query or completely in the right query
+	// otherwise NeedPostJoinFilterEvaluation() would return true
+	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, *m_pSorterSchema, GetJoinedIndexName().cstr() );
+	if ( dRightFilters.GetLength() )
+		m_tJoinQuery.m_dFilterTree = m_tQuery.m_dFilterTree;
+
 	CSphString sPrefix;
 	sPrefix.SetSprintf ( "%s.", GetJoinedIndexName().cstr() );
 
@@ -2087,15 +2099,18 @@ void JoinSorter_c::AddRemappedStringItemsToJoinSelectList()
 void JoinSorter_c::AddExpressionItemsToJoinSelectList()
 {
 	// find JSON/columnar attrs present in filters and add them to select list (only when all filters are moved to the left query)
-	if ( !NeedToMoveMixedJoinFilters ( m_tQuery, *m_pSorterSchema ) )
+	if ( !NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
 		return;
 
 	const CSphSchema & tJoinedSchema = m_pJoinedIndex->GetMatchSchema();
 	for ( const auto & i : m_tQuery.m_dFilters )
 	{
-		if ( sphJsonNameSplit ( i.m_sAttrName.cstr(), GetJoinedIndexName().cstr() ) )
+		bool bIndexPrefix = false;
+		if ( sphJsonNameSplit ( i.m_sAttrName.cstr(), GetJoinedIndexName().cstr(), nullptr, &bIndexPrefix ) )
 		{
-			AddToJoinSelectList ( i.m_sAttrName );
+			if ( bIndexPrefix )
+				AddToJoinSelectList ( i.m_sAttrName );
+
 			continue;
 		}
 

+ 2 - 1
src/joinsorter.h

@@ -18,7 +18,8 @@ void				SetJoinBatchSize ( int iSize );
 int					GetJoinBatchSize();
 
 CSphVector<std::pair<int,bool>> FetchJoinRightTableFilters ( const CSphVector<CSphFilterSettings> & dFilters, const ISphSchema & tSchema, const char * szJoinedIndex );
-bool				NeedToMoveMixedJoinFilters ( const CSphQuery & tQuery, const ISphSchema & tSchema );
+bool				NeedPostJoinFilterEvaluation ( const CSphQuery & tQuery, const ISphSchema & tSchema );
+bool				NeedPostJoinFilterEvaluation ( const CSphVector<CSphFilterSettings> & dFilters, const CSphString & sJoinIdx, bool bFilterTree, JoinType_e eJoinType, const ISphSchema & tSchema );
 bool				ExprHasLeftTableAttrs ( const CSphString & sAttr, const ISphSchema & tLeftSchema );
 std::unique_ptr<ISphFilter> CreateJoinNullFilter ( const CSphFilterSettings & tSettings, const CSphAttrLocator & tNullMapLocator );
 bool				SplitJoinedAttrName ( const CSphString & sJoinedAttr, CSphString & sTable, CSphString & sAttr, CSphString * pError = nullptr );

+ 8 - 2
src/queuecreator.cpp

@@ -1791,12 +1791,18 @@ bool QueueCreator_c::AddJoinFilterAttrs()
 		}
 	}
 
-	if ( NeedToMoveMixedJoinFilters ( m_tQuery, *m_pSorterSchema ) )
+	if ( NeedPostJoinFilterEvaluation ( m_tQuery, *m_pSorterSchema ) )
 		for ( const auto & i : m_tQuery.m_dFilters )
 		{
 			const CSphString & sAttr = i.m_sAttrName;
 			const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
-			if ( pAttr || !sphJsonNameSplit ( sAttr.cstr(), sRightIndex.cstr() ) )
+			CSphString sSplitAttrName;
+			bool bIndexPrefix = false;
+			if ( pAttr || !sphJsonNameSplit ( sAttr.cstr(), sRightIndex.cstr(), &sSplitAttrName, &bIndexPrefix ) )
+				continue;
+
+			// check if it's a json attribute from the left table and not a joined attribute
+			if ( !bIndexPrefix )
 				continue;
 
 			CSphColumnInfo tExprCol ( sAttr.cstr(), FilterType2AttrType ( i.m_eType ) );

+ 1 - 0
src/sphinx.cpp

@@ -8272,6 +8272,7 @@ bool CSphIndex_VLN::SetupFiltersAndContext ( CSphQueryContext & tCtx, CreateFilt
 	tFlx.m_pSI			= &m_tSI;
 	tFlx.m_iTotalDocs	= m_iDocinfo;
 	tFlx.m_sJoinIdx		= tQuery.m_sJoinIdx;
+	tFlx.m_eJoinType	= tQuery.m_eJoinType;
 
 	// may modify eval stages in schema; needs to be before SetupCalc
 	if ( !TransformFilters ( tFlx, dTransformedFilters, dTransformedFilterTree, pModifiedMatchSchema, tQuery.m_dItems, tMeta.m_sError ) )

+ 1 - 7
src/sphinxfilter.cpp

@@ -1847,13 +1847,7 @@ static void RemoveJoinFilters ( const CreateFilterContext_t & tCtx, CSphVector<C
 	if ( tCtx.m_sJoinIdx.IsEmpty() )
 		return;
 
-	CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( dModified, *tCtx.m_pMatchSchema, tCtx.m_sJoinIdx.cstr() );
-
-	bool bHaveNullFilters = false;
-	for ( const auto & i : dRightFilters )
-		bHaveNullFilters |= dModified[i.first].m_eType==SPH_FILTER_NULL;
-
-	if ( tCtx.m_pFilterTree && tCtx.m_pFilterTree->GetLength() && dRightFilters.GetLength() && ( bHaveNullFilters || dRightFilters.GetLength()!=dModified.GetLength() ) )
+	if ( NeedPostJoinFilterEvaluation ( dModified, tCtx.m_sJoinIdx, tCtx.m_pFilterTree && tCtx.m_pFilterTree->GetLength(), tCtx.m_eJoinType, *tCtx.m_pMatchSchema ) )
 	{
 		// mixed joined and non-joined filters; they will be handled in join sorter
 		// remove all filters from the query (keep the @rowid/pseudo_sharding filter)

+ 1 - 0
src/sphinxfilter.h

@@ -86,6 +86,7 @@ struct CreateFilterContext_t
 	const SIContainer_c *		m_pSI = nullptr;
 	int64_t						m_iTotalDocs = 0;
 	CSphString					m_sJoinIdx;
+	JoinType_e					m_eJoinType = JoinType_e::NONE;
 	bool						m_bAddKNNDistFilter = false;
 };
 

+ 7 - 1
src/sphinxjson.cpp

@@ -1371,8 +1371,11 @@ static bool FindNextSeparator ( const char * & pSep )
 }
 
 
-bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString * pColumn )
+bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString * pColumn, bool * pIndexPrefix )
 {
+	if ( pIndexPrefix )
+		*pIndexPrefix = false;
+
 	if ( !szName )
 		return false;
 
@@ -1382,6 +1385,9 @@ bool sphJsonNameSplit ( const char * szName, const char * szIndex, CSphString *
 
 	if ( szIndex && *pSep=='.' && !strncmp ( szIndex, szName, pSep - szName ) )
 	{
+		if ( pIndexPrefix )
+			*pIndexPrefix = true;
+
 		// this was not a json separator, but a dot between table name and column name
 		pSep++;
 		if ( !FindNextSeparator(pSep) )

+ 1 - 1
src/sphinxjson.h

@@ -413,7 +413,7 @@ ESphJsonType sphJsonFindByKey ( ESphJsonType eType, const BYTE ** ppValue, const
 ESphJsonType sphJsonFindByIndex ( ESphJsonType eType, const BYTE ** ppValue, int iIndex );
 
 /// extract object part from the name; return false if not JSON name. szIndex is the possible name of joined index
-bool sphJsonNameSplit ( const char * szName, const char * szIndex = nullptr, CSphString * pColumn = nullptr );
+bool sphJsonNameSplit ( const char * szName, const char * szIndex = nullptr, CSphString * pColumn = nullptr, bool * pIndexPrefix = nullptr );
 
 /// compute node size, in bytes
 /// returns -1 when data itself is required to compute the size, but pData is NULL

+ 1 - 0
src/sphinxrt.cpp

@@ -7819,6 +7819,7 @@ static bool SetupFilters ( const CSphQuery & tQuery, const ISphSchema & tMatchSc
 	tFlx.m_eCollation = tQuery.m_eCollation;
 	tFlx.m_bScan = bFullscan;
 	tFlx.m_sJoinIdx = tQuery.m_sJoinIdx;
+	tFlx.m_eJoinType = tQuery.m_eJoinType;
 	tFlx.m_bAddKNNDistFilter = true;
 
 	std::unique_ptr<ISphSchema> pModifiedMatchSchema;

BIN
test/test_278/model.bin


+ 2 - 0
test/test_278/test.xml

@@ -252,6 +252,8 @@ insert into t values(1, 'abc');
 insert into t2 values(1, 'abc', 1),(2, 'def', 1);
 
 select * from t inner join t2 on t.id = t2.t_id where t2.id = 1 or t2.id = 2;
+select * from t inner join t2 on t.id = t2.t_id where id&lt;2 and ( t2.id = 1 or t2.id = 2 );
+select * from t inner join t2 on t.id = t2.t_id where id&lt;1 and ( t2.id = 1 or t2.id = 2 );
 
 drop table t;
 drop table t2;