Browse Source

fix: optimized IsIndependentAttr(); fixed a crash on vector reallocation in FetchAttrDependencies; fixed handling of circular dependencies

Ilya Kuznetsov 1 month ago
parent
commit
fe6069e667
3 changed files with 64 additions and 11 deletions
  1. 21 2
      src/queuecreator.cpp
  2. 32 8
      src/sphinxexpr.cpp
  3. 11 1
      src/sphinxexpr.h

+ 21 - 2
src/queuecreator.cpp

@@ -2472,10 +2472,29 @@ bool QueueCreator_c::ConvertColumnarToDocstore()
 	if ( m_tQuery.m_bFacet || m_tQuery.m_bFacetHead )
 		return true;
 
+	auto & tSchema = *m_pSorterSchema;
+
+	// early-out if nothing to process
+	bool bFound = false;
+	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
+	{
+		auto & tAttr = tSchema.GetAttr(i);
+		bool bStored = false;
+		bool bColumnar = tAttr.m_pExpr && tAttr.m_pExpr->IsColumnar(&bStored);
+		if ( bColumnar && bStored && tAttr.m_eStage==SPH_EVAL_FINAL )
+		{
+			bFound = true;
+			break;
+		}
+	}
+
+	if ( !bFound )
+		return true;
+
 	// check for columnar attributes that have FINAL eval stage
 	// if we have more than 1 of such attributes (and they are also stored), we replace columnar expressions with columnar expressions
 	IntVec_t dStoredColumnarFinal, dStoredColumnarPostlimit;
-	auto & tSchema = *m_pSorterSchema;
+	AttrDependencyMap_c tDepMap(tSchema);
 	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
 	{
 		auto & tAttr = tSchema.GetAttr(i);
@@ -2484,7 +2503,7 @@ bool QueueCreator_c::ConvertColumnarToDocstore()
 		if ( bColumnar && bStored && tAttr.m_eStage==SPH_EVAL_FINAL )
 		{
 			// we need docids at the final stage if we want to fetch from docstore. so they must be evaluated before that
-			if ( IsIndependentAttr ( tAttr.m_sName, tSchema ) && tAttr.m_sName!=sphGetDocidName() )
+			if ( tDepMap.IsIndependent ( tAttr.m_sName ) && tAttr.m_sName!=sphGetDocidName() )
 				dStoredColumnarPostlimit.Add(i);
 			else
 				dStoredColumnarFinal.Add(i);

+ 32 - 8
src/sphinxexpr.cpp

@@ -10955,16 +10955,27 @@ ISphExpr * sphJsonFieldConv ( ISphExpr * pExpr )
 
 void FetchAttrDependencies ( StrVec_t & dAttrNames, const ISphSchema & tSchema )
 {
-	for ( const auto & i : dAttrNames )
+	sph::StringSet hProcessed;
+	
+	ARRAY_FOREACH ( i, dAttrNames )
 	{
-		const CSphColumnInfo * pAttr = tSchema.GetAttr ( i.cstr() );
+		const CSphString & sAttr = dAttrNames[i];
+		
+		// skip if already processed to avoid redundant work and cycles
+		if ( hProcessed[sAttr] )
+			continue;
+		
+		hProcessed.Add(sAttr);
+		
+		const CSphColumnInfo * pAttr = tSchema.GetAttr ( sAttr.cstr() );
 		if ( !pAttr || !pAttr->m_pExpr )
 			continue;
 
 		int iOldLen = dAttrNames.GetLength();
 		pAttr->m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dAttrNames );
+		
 		for ( int iNewAttr = iOldLen; iNewAttr < dAttrNames.GetLength(); iNewAttr++ )
-			if ( dAttrNames[iNewAttr]==i )
+			if ( dAttrNames[iNewAttr]==dAttrNames[i] )
 				dAttrNames.Remove(iNewAttr);
 	}
 
@@ -10972,21 +10983,34 @@ void FetchAttrDependencies ( StrVec_t & dAttrNames, const ISphSchema & tSchema )
 }
 
 
-bool IsIndependentAttr ( const CSphString & sAttr, const ISphSchema & tSchema )
+AttrDependencyMap_c::AttrDependencyMap_c ( const ISphSchema & tSchema )
 {
 	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
 	{
 		auto & tAttr = tSchema.GetAttr(i);
-		if ( tAttr.m_sName==sAttr )
+		if ( !tAttr.m_pExpr )
 			continue;
 
 		StrVec_t dDeps;
 		dDeps.Add ( tAttr.m_sName );
 		FetchAttrDependencies ( dDeps, tSchema );
 
-		if ( dDeps.any_of ( [&sAttr]( auto & sDep ){ return sDep==sAttr; } ) )
-			return false;
+		for ( const auto & sDep : dDeps )
+		{
+			if ( sDep==tAttr.m_sName )
+				continue;
+
+			m_hDependents.AddUnique(sDep).Add ( tAttr.m_sName );
+		}
 	}
+}
 
-	return true;
+
+bool AttrDependencyMap_c::IsIndependent ( const CSphString & sAttr ) const
+{
+	auto * pDependents = m_hDependents(sAttr);
+	if ( !pDependents )
+		return true;
+
+	return pDependents->IsEmpty();
 }

+ 11 - 1
src/sphinxexpr.h

@@ -17,6 +17,7 @@
 #include "std/refcounted_mt.h"
 #include "std/string.h"
 #include "std/sharedptr.h"
+#include "std/stringhash.h"
 
 /// forward decls
 class CSphMatch;
@@ -359,6 +360,16 @@ struct JoinArgs_t
 	JoinArgs_t ( const ISphSchema & tJoinedSchema, const CSphString & sIndex1, const CSphString & sIndex2 );
 };
 
+class AttrDependencyMap_c
+{
+public:
+			AttrDependencyMap_c ( const ISphSchema & tSchema );
+
+	bool	IsIndependent ( const CSphString & sAttr ) const;
+
+private:
+	SmallStringHash_T<sph::StringSet> m_hDependents;
+};
 
 struct CommonFilterSettings_t;
 ISphExpr * sphExprParse ( const char * szExpr, const ISphSchema & tSchema, const CSphString * pJoinIdx, CSphString & sError, ExprParseArgs_t & tArgs );
@@ -367,7 +378,6 @@ ISphExpr * ExprJsonIn ( const VecTraits_T<CSphString> & dVals, ISphExpr * pArg,
 ISphExpr * ExprJsonIn ( const VecTraits_T<int64_t> & dVals, ISphExpr * pArg, ESphCollation eCollation );
 ISphExpr * ExprJsonRange ( const CommonFilterSettings_t & tFilter, ISphExpr * pArg );
 void FetchAttrDependencies ( StrVec_t & dAttrNames, const ISphSchema & tSchema );
-bool IsIndependentAttr ( const CSphString & sAttr, const ISphSchema & tSchema );
 bool CanAliasedExprSetupAsFilter ( const CSphFilterSettings & tFilter, bool & bExclude );
 void SetExprNodeEvalStackItemSize ( std::pair<int,int> tStack );
 void SetMaxExprNodeEvalStackItemSize ( std::pair<int, int> tStack );