// // Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com) // Copyright (c) 2001-2016, Andrew Aksyonoff // Copyright (c) 2008-2016, Sphinx Technologies Inc // All rights reserved // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License. You should have // received a copy of the GPL license along with this program; if you // did not, you can find it at http://www.gnu.org/ // #include "searchdexpr.h" #include "sphinxexcerpt.h" #include "sphinxutils.h" #include "sphinxint.h" #include "attribute.h" #include "docstore.h" enum HookType_e { HOOK_SNIPPET, HOOK_HIGHLIGHT }; static int StringBinary2Number ( const char * sStr, int iLen ) { if ( !sStr || !iLen ) return 0; char sBuf[64]; if ( (int)(sizeof ( sBuf )-1 ) m_pQuery; mutable bool m_bFirstQuery = true; mutable CSphString m_sQuery; CSphString FetchQuery ( const CSphMatch & tMatch ) const; }; QueryExprTraits_c::QueryExprTraits_c ( ISphExpr * pQuery ) : m_pQuery ( pQuery ) { if ( m_pQuery ) SafeAddRef(m_pQuery); } bool QueryExprTraits_c::UpdateQuery ( const CSphMatch & tMatch ) const { CSphString sQuery = FetchQuery(tMatch); if ( m_bFirstQuery || m_sQuery!=sQuery ) { m_bFirstQuery = false; m_sQuery = sQuery; return true; } return false; } CSphString QueryExprTraits_c::FetchQuery ( const CSphMatch & tMatch ) const { if ( !m_pQuery ) return m_sQuery; CSphString sQuery; char * pWords; int iQueryLen = m_pQuery->StringEval ( tMatch, (const BYTE**)&pWords ); if ( m_pQuery->IsDataPtrAttr() ) sQuery.Adopt ( &pWords ); else sQuery.SetBinary ( pWords, iQueryLen ); return sQuery; } bool QueryExprTraits_c::Command ( ESphExprCommand eCmd, void * pArg ) { if ( m_pQuery ) m_pQuery->Command ( eCmd, pArg ); if ( eCmd==SPH_EXPR_SET_QUERY && !m_pQuery ) // don't do this if we have a query expression specified { CSphString sQuery ( (const char*)pArg ); if ( m_bFirstQuery || m_sQuery!=sQuery ) { m_sQuery = sQuery; m_bFirstQuery = false; return true; } } return false; } ////////////////////////////////////////////////////////////////////////// /// searchd-level expression function class Expr_Snippet_c : public ISphStringExpr, public QueryExprTraits_c { public: Expr_Snippet_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError ); int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const override; bool IsDataPtrAttr () const final { return true; } void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override; void Command ( ESphExprCommand eCmd, void * pArg ) override; uint64_t GetHash ( const ISphSchema &, uint64_t, bool & ) override; ISphExpr * Clone () const override; protected: CSphRefcountedPtr m_pArgs; CSphRefcountedPtr m_pText; CSphIndex * m_pIndex; SnippetQuerySettings_t m_tSnippetQuery; CSphQueryProfile * m_pProfiler; CSphScopedPtr m_pSnippetBuilder; CSphVector m_dRequestedFields; private: Expr_Snippet_c ( const Expr_Snippet_c & rhs ); // need for cloning }; Expr_Snippet_c::Expr_Snippet_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError ) : QueryExprTraits_c ( pArglist->GetArg(1) ) , m_pArgs ( pArglist ) , m_pIndex ( pIndex ) , m_pProfiler ( pProfiler ) , m_pSnippetBuilder ( CreateSnippetBuilder() ) { SafeAddRef ( m_pArgs ); assert ( m_pArgs->IsArglist() ); m_pText = pArglist->GetArg(0); SafeAddRef ( m_pText ); CSphMatch tDummy; char * pWords; for ( int i = 2; i < pArglist->GetNumArgs(); i++ ) { assert ( !pArglist->GetArg(i)->IsDataPtrAttr() ); // aware of memleaks potentially caused by StringEval() int iLen = pArglist->GetArg(i)->StringEval ( tDummy, (const BYTE**)&pWords ); if ( !pWords || !iLen ) continue; CSphString sArgs; sArgs.SetBinary ( pWords, iLen ); char * pWords = const_cast ( sArgs.cstr() ); const char * sEnd = pWords + iLen; while ( pWordsSetup ( m_pIndex, m_tSnippetQuery, sError ) ) return; m_dRequestedFields.Add(0); } int Expr_Snippet_c::StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const { CSphScopedProfile ( m_pProfiler, SPH_QSTATE_SNIPPET ); *ppStr = nullptr; const BYTE * szSource = nullptr; int iLen = m_pText->StringEval ( tMatch, &szSource ); // kinda like a scoped ptr, but for an array CSphFixedVector tScoped {0}; if ( m_pText->IsDataPtrAttr() ) tScoped.Set ( (BYTE *)szSource, iLen ); if ( !iLen ) return 0; if ( UpdateQuery(tMatch) ) { CSphString sError; if ( !m_pSnippetBuilder->SetQuery ( GetQuery(), true, sError ) ) return 0; } CSphScopedPtr pSource ( CreateSnippetSource ( m_tSnippetQuery.m_uFilesMode, szSource, iLen ) ); // FIXME! fill in all the missing options; use consthash? SnippetResult_t tRes; if ( !m_pSnippetBuilder->Build ( pSource.Ptr(), tRes ) ) return 0; CSphVector dRes = m_pSnippetBuilder->PackResult ( tRes, m_dRequestedFields ); int iResultLength = dRes.GetLength(); *ppStr = dRes.LeakData(); return iResultLength; } void Expr_Snippet_c::FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) { if ( m_pText ) m_pText->FixupLocator ( pOldSchema, pNewSchema ); } void Expr_Snippet_c::Command ( ESphExprCommand eCmd, void * pArg ) { if ( m_pArgs ) m_pArgs->Command ( eCmd, pArg ); if ( m_pText ) m_pText->Command ( eCmd, pArg ); if ( QueryExprTraits_c::Command ( eCmd, pArg ) ) { // fixme! handle errors CSphString sError; m_pSnippetBuilder->SetQuery ( GetQuery(), false, sError ); } } uint64_t Expr_Snippet_c::GetHash ( const ISphSchema &, uint64_t, bool & ) { assert ( 0 && "no snippets in filters" ); return 0; } ISphExpr * Expr_Snippet_c::Clone () const { return new Expr_Snippet_c ( *this ); } Expr_Snippet_c::Expr_Snippet_c ( const Expr_Snippet_c& rhs ) : QueryExprTraits_c ( rhs ) , m_pArgs ( SafeClone ( rhs.m_pArgs ) ) , m_pText ( SafeClone ( rhs.m_pText ) ) , m_pIndex ( rhs.m_pIndex ) , m_tSnippetQuery ( rhs.m_tSnippetQuery ) , m_pProfiler ( rhs.m_pProfiler ) , m_pSnippetBuilder ( CreateSnippetBuilder () ) { CSphString sError; assert ( m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError )); } ////////////////////////////////////////////////////////////////////////// class Expr_Highlight_c final : public ISphStringExpr, public QueryExprTraits_c { public: Expr_Highlight_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError ); int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const final; void Command ( ESphExprCommand eCmd, void * pArg ) final; void FixupLocator ( const ISphSchema * /*pOldSchema*/, const ISphSchema * /*pNewSchema*/ ) final {} uint64_t GetHash ( const ISphSchema &, uint64_t, bool & ) final; ISphExpr * Clone () const final; private: CSphIndex * m_pIndex = nullptr; CSphQueryProfile * m_pProfiler = nullptr; CSphScopedPtr m_pSnippetBuilder; DocstoreSession_c::Info_t m_tSession; SnippetQuerySettings_t m_tSnippetQuery; CSphVector m_dRequestedFieldIds; CSphVector m_dFieldsToFetch; CSphRefcountedPtr m_pArgs; bool m_bFetchAllFields = false; Expr_Highlight_c ( const Expr_Highlight_c & rhs ); bool FetchFieldsFromDocstore ( DocstoreDoc_t & tFetchedDoc, DocID_t & tDocID ) const; void ParseFields ( ISphExpr * pExpr ); bool ParseOptions ( const VecTraits_T & dMap, CSphString & sError ); bool MarkRequestedFields ( CSphString & sError ); void MarkAllFields(); }; Expr_Highlight_c::Expr_Highlight_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError ) : QueryExprTraits_c ( ( pArglist && pArglist->IsArglist() && pArglist->GetNumArgs()==3 ) ? pArglist->GetArg(2) : nullptr ) , m_pIndex ( pIndex ) , m_pProfiler ( pProfiler ) , m_pSnippetBuilder ( CreateSnippetBuilder() ) { assert ( m_pIndex ); if ( pArglist && pArglist->IsArglist() ) { m_pArgs = pArglist; SafeAddRef(m_pArgs); } int iNumArgs = pArglist ? ( pArglist->IsArglist() ? pArglist->GetNumArgs() : 1 ) : 0; if ( iNumArgs>=1 ) { // this should be a map argument. at least we checked that in ExprHook_c::GetReturnType auto pMapArg = (Expr_MapArg_c *)(pArglist->IsArglist() ? pArglist->GetArg(0) : pArglist); assert(pMapArg); VecTraits_T dOpts ( pMapArg->m_pValues, pMapArg->m_iCount ); if ( !ParseOptions ( dOpts, sError ) ) return; } if ( iNumArgs>=2 ) { assert ( pArglist && pArglist->IsArglist() ); ISphExpr * pFields = pArglist->GetArg(1); ParseFields(pFields); } else MarkAllFields(); m_tSnippetQuery.Setup(); if ( !m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError ) ) return; } int Expr_Highlight_c::StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const { CSphScopedProfile ( m_pProfiler, SPH_QSTATE_SNIPPET ); DocID_t tDocID = sphGetDocID ( tMatch.m_pDynamic ? tMatch.m_pDynamic : tMatch.m_pStatic ); DocstoreDoc_t tFetchedDoc; if ( !FetchFieldsFromDocstore ( tFetchedDoc, tDocID ) ) return 0; // now we've fetched all stored fields // we need to arrange them as in original index schema // so that field matching will work as expected const CSphSchema & tSchema = m_pIndex->GetMatchSchema(); CSphVector dAllFields; for ( int i = 0; i < tSchema.GetFieldsCount(); i++ ) { const CSphColumnInfo & tInfo = tSchema.GetField(i); FieldSource_t & tNewField = dAllFields.Add(); tNewField.m_sName = tInfo.m_sName; if ( !( tInfo.m_uFieldFlags & CSphColumnInfo::FIELD_STORED ) ) continue; int iFieldId = m_tSession.m_pDocstore->GetFieldId ( tInfo.m_sName, DOCSTORE_TEXT ); assert ( iFieldId!=-1 ); int iFetchedFieldId = -1; if ( m_bFetchAllFields ) iFetchedFieldId = iFieldId; else { int * pFound = sphBinarySearch ( m_dFieldsToFetch.Begin(), m_dFieldsToFetch.Begin()+m_dFieldsToFetch.GetLength()-1, iFieldId ); if ( pFound ) iFetchedFieldId = pFound-m_dFieldsToFetch.Begin(); } if ( iFetchedFieldId!=-1 ) tNewField.m_dData = tFetchedDoc.m_dFields[iFetchedFieldId].Slice(); } if ( UpdateQuery(tMatch) ) { CSphString sError; if ( !m_pSnippetBuilder->SetQuery ( GetQuery(), true, sError ) ) return 0; } CSphScopedPtr pSource ( CreateHighlightSource(dAllFields) ); SnippetResult_t tRes; if ( !m_pSnippetBuilder->Build ( pSource.Ptr(), tRes ) ) return 0; CSphVector dPacked = m_pSnippetBuilder->PackResult ( tRes, m_dRequestedFieldIds ); int iResultLength = dPacked.GetLength(); *ppStr = dPacked.LeakData(); return iResultLength; } void Expr_Highlight_c::Command ( ESphExprCommand eCmd, void * pArg ) { if ( QueryExprTraits_c::Command ( eCmd, pArg ) ) { // fixme! handle errors CSphString sError; m_pSnippetBuilder->SetQuery ( GetQuery(), false, sError ); } if ( eCmd==SPH_EXPR_SET_DOCSTORE ) { const DocstoreSession_c::Info_t & tSession = *(DocstoreSession_c::Info_t*)pArg; bool bMark = tSession.m_pDocstore!=m_tSession.m_pDocstore; m_tSession = tSession; if ( bMark ) { // fixme! handle errors CSphString sError; MarkRequestedFields(sError); } } if ( m_pArgs ) m_pArgs->Command ( eCmd, pArg ); } uint64_t Expr_Highlight_c::GetHash ( const ISphSchema &, uint64_t, bool & ) { assert ( 0 && "no snippets in filters" ); return 0; } ISphExpr * Expr_Highlight_c::Clone () const { return new Expr_Highlight_c ( *this ); } Expr_Highlight_c::Expr_Highlight_c ( const Expr_Highlight_c& rhs ) : QueryExprTraits_c ( rhs ) , m_pIndex ( rhs.m_pIndex ) , m_pProfiler ( rhs.m_pProfiler ) , m_pSnippetBuilder ( CreateSnippetBuilder () ) , m_tSnippetQuery ( rhs.m_tSnippetQuery ) , m_dRequestedFieldIds ( rhs.m_dRequestedFieldIds ) , m_pArgs ( SafeClone ( rhs.m_pArgs ) ) { CSphString sError; assert ( m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError )); } bool Expr_Highlight_c::FetchFieldsFromDocstore ( DocstoreDoc_t & tFetchedDoc, DocID_t & tDocID ) const { if ( !m_tSession.m_pDocstore ) return false; const CSphVector * pFieldsToFetch = m_bFetchAllFields ? nullptr : &m_dFieldsToFetch; return m_tSession.m_pDocstore->GetDoc ( tFetchedDoc, tDocID, pFieldsToFetch, m_tSession.m_iSessionId, false ); } void Expr_Highlight_c::ParseFields ( ISphExpr * pExpr ) { assert ( pExpr && !pExpr->IsDataPtrAttr() ); assert(m_pIndex); CSphString sFields; char * szFields; CSphMatch tDummy; int iLen = pExpr->StringEval ( tDummy, (const BYTE**)&szFields ); sFields.SetBinary ( szFields, iLen ); sFields.ToLower(); sFields.Trim(); StrVec_t dRequestedFieldNames; sphSplit ( dRequestedFieldNames, sFields.cstr() ); if ( !dRequestedFieldNames.GetLength() && sFields.IsEmpty() ) MarkAllFields(); else { const CSphSchema & tSchema = m_pIndex->GetMatchSchema(); for ( const auto & i : dRequestedFieldNames ) { int iField = tSchema.GetFieldIndex ( i.cstr() ); if ( iField!=-1 ) m_dRequestedFieldIds.Add(iField); } } } void Expr_Highlight_c::MarkAllFields() { m_bFetchAllFields = true; m_dFieldsToFetch.Resize(0); const CSphSchema & tSchema = m_pIndex->GetMatchSchema(); for ( int i = 0; i < tSchema.GetFieldsCount(); i++ ) m_dRequestedFieldIds.Add(i); } bool Expr_Highlight_c::MarkRequestedFields ( CSphString & sError ) { m_dFieldsToFetch.Resize(0); bool bResult = true; if ( !m_bFetchAllFields ) { assert ( m_tSession.m_pDocstore ); const CSphSchema & tSchema = m_pIndex->GetMatchSchema(); for ( auto iField : m_dRequestedFieldIds ) { const char * szField = tSchema.GetFieldName(iField); int iDocstoreField = m_tSession.m_pDocstore->GetFieldId ( szField, DOCSTORE_TEXT ); if ( iDocstoreField==-1 ) { sError.SetSprintf ( "field %s not found", szField ); bResult = false; continue; } m_dFieldsToFetch.Add(iDocstoreField); } m_dFieldsToFetch.Uniq(); } return bResult; } bool Expr_Highlight_c::ParseOptions ( const VecTraits_T & dMap, CSphString & sError ) { for ( const auto & i : dMap ) { if ( !ParseSnippetOption ( i, m_tSnippetQuery, sError ) ) return false; } return true; } ////////////////////////////////////////////////////////////////////////// int ExprHook_c::IsKnownFunc ( const char * sFunc ) { if ( !strcasecmp ( sFunc, "SNIPPET" ) ) return HOOK_SNIPPET; if ( !strcasecmp ( sFunc, "HIGHLIGHT" ) ) return HOOK_HIGHLIGHT; return -1; } ISphExpr * ExprHook_c::CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError ) { if ( pEvalStage ) *pEvalStage = SPH_EVAL_POSTLIMIT; ISphExpr * pRes = nullptr; switch ( iID ) { case HOOK_SNIPPET: pRes = new Expr_Snippet_c ( pLeft, m_pIndex, m_pProfiler, sError ); break; case HOOK_HIGHLIGHT: pRes = new Expr_Highlight_c ( pLeft, m_pIndex, m_pProfiler, sError ); break; default: assert ( 0 && "Unknown node type" ); return nullptr; } if ( !sError.IsEmpty() ) SafeRelease(pRes); return pRes; } ESphAttr ExprHook_c::GetIdentType ( int ) { assert(0); return SPH_ATTR_NONE; } ESphAttr ExprHook_c::GetReturnType ( int iID, const CSphVector & dArgs, bool, CSphString & sError ) { switch ( iID ) { case HOOK_SNIPPET: if ( dArgs.GetLength()<2 ) { sError = "SNIPPET() requires 2 or more arguments"; return SPH_ATTR_NONE; } if ( dArgs[0]!=SPH_ATTR_STRINGPTR && dArgs[0]!=SPH_ATTR_STRING ) { sError = "1st argument to SNIPPET() must be a string expression"; return SPH_ATTR_NONE; } for ( int i = 1; i < dArgs.GetLength(); i++ ) if ( dArgs[i]!=SPH_ATTR_STRING && dArgs[i]!=SPH_ATTR_STRINGPTR ) { sError.SetSprintf ( "%d argument to SNIPPET() must be a string", i ); return SPH_ATTR_NONE; } break; case HOOK_HIGHLIGHT: if ( dArgs.GetLength()>3 ) { sError = "HIGHLIGHT() requires 0-3 arguments"; return SPH_ATTR_NONE; } if ( dArgs.GetLength()>0 && dArgs[0]!=SPH_ATTR_MAPARG ) { sError = "1st argument to HIGHLIGHT() must be a map"; return SPH_ATTR_NONE; } if ( dArgs.GetLength()>1 && dArgs[1]!=SPH_ATTR_STRING) { sError = "2nd argument to HIGHLIGHT() must be a const string"; return SPH_ATTR_NONE; } if ( dArgs.GetLength()>2 && dArgs[2]!=SPH_ATTR_STRING && dArgs[2]!=SPH_ATTR_STRINGPTR ) { sError = "3rd argument to HIGHLIGHT() must be a string"; return SPH_ATTR_NONE; } break; } return SPH_ATTR_STRINGPTR; }