searchdexpr.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. //
  2. // Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include "searchdexpr.h"
  13. #include "sphinxexcerpt.h"
  14. #include "sphinxutils.h"
  15. #include "sphinxint.h"
  16. #include "attribute.h"
  17. #include "docstore.h"
  18. enum HookType_e
  19. {
  20. HOOK_SNIPPET,
  21. HOOK_HIGHLIGHT
  22. };
  23. static int StringBinary2Number ( const char * sStr, int iLen )
  24. {
  25. if ( !sStr || !iLen )
  26. return 0;
  27. char sBuf[64];
  28. if ( (int)(sizeof ( sBuf )-1 )<iLen )
  29. iLen = sizeof ( sBuf )-1;
  30. memcpy ( sBuf, sStr, iLen );
  31. sBuf[iLen] = '\0';
  32. return atoi ( sBuf );
  33. }
  34. static bool ParseSnippetOption ( const CSphNamedVariant & tVariant, SnippetQuerySettings_t & tOpt, CSphString & sError )
  35. {
  36. CSphString sName = tVariant.m_sKey;
  37. sName.ToLower();
  38. const CSphString & sVal = tVariant.m_sValue;
  39. int iVal = tVariant.m_iValue;
  40. bool bVal = tVariant.m_iValue!=0;
  41. if ( sName=="before_match" ) tOpt.m_sBeforeMatch = sVal;
  42. else if ( sName=="after_match" ) tOpt.m_sAfterMatch = sVal;
  43. else if ( sName=="chunk_separator" ) tOpt.m_sChunkSeparator = sVal;
  44. else if ( sName=="field_separator" ) tOpt.m_sFieldSeparator = sVal;
  45. else if ( sName=="limit" ) tOpt.m_iLimit = iVal;
  46. else if ( sName=="around" ) tOpt.m_iAround = iVal;
  47. else if ( sName=="use_boundaries" ) tOpt.m_bUseBoundaries = bVal;
  48. else if ( sName=="weight_order" ) tOpt.m_bWeightOrder = bVal;
  49. else if ( sName=="force_all_words" ) tOpt.m_bForceAllWords = bVal;
  50. else if ( sName=="limit_passages" ) tOpt.m_iLimitPassages = iVal;
  51. else if ( sName=="limit_words" ) tOpt.m_iLimitWords = iVal;
  52. else if ( sName=="start_passage_id" ) tOpt.m_iPassageId = iVal;
  53. else if ( sName=="load_files" ) tOpt.m_uFilesMode |= bVal ? 1 : 0;
  54. else if ( sName=="load_files_scattered" ) tOpt.m_uFilesMode |= bVal ? 2 : 0;
  55. else if ( sName=="html_strip_mode" ) tOpt.m_sStripMode = sVal;
  56. else if ( sName=="allow_empty" ) tOpt.m_bAllowEmpty = bVal;
  57. else if ( sName=="emit_zones" ) tOpt.m_bEmitZones = bVal;
  58. else if ( sName=="force_passages" ) tOpt.m_bForcePassages = bVal;
  59. else if ( sName=="passage_boundary" ) tOpt.m_ePassageSPZ = GetPassageBoundary(sVal);
  60. else if ( sName=="json_query" ) tOpt.m_bJsonQuery = bVal;
  61. else if ( sName=="exact_phrase" )
  62. {
  63. sError.SetSprintf ( "exact_phrase option is deprecated" );
  64. return false;
  65. }
  66. else if ( sName=="query_mode" )
  67. {
  68. if ( !bVal )
  69. {
  70. sError.SetSprintf ( "query_mode=0 is deprecated" );
  71. return false;
  72. }
  73. }
  74. else
  75. {
  76. sError.SetSprintf ( "Unknown option: %s", sName.cstr() );
  77. return false;
  78. }
  79. return true;
  80. }
  81. //////////////////////////////////////////////////////////////////////////
  82. class QueryExprTraits_c
  83. {
  84. public:
  85. QueryExprTraits_c ( ISphExpr * pQuery );
  86. bool UpdateQuery ( const CSphMatch & tMatch ) const;
  87. const CSphString & GetQuery() const { return m_sQuery; }
  88. bool Command ( ESphExprCommand eCmd, void * pArg );
  89. QueryExprTraits_c ( const QueryExprTraits_c& rhs )
  90. : m_pQuery ( SafeClone ( rhs.m_pQuery))
  91. {}
  92. private:
  93. CSphRefcountedPtr<ISphExpr> m_pQuery;
  94. mutable bool m_bFirstQuery = true;
  95. mutable CSphString m_sQuery;
  96. CSphString FetchQuery ( const CSphMatch & tMatch ) const;
  97. };
  98. QueryExprTraits_c::QueryExprTraits_c ( ISphExpr * pQuery )
  99. : m_pQuery ( pQuery )
  100. {
  101. if ( m_pQuery )
  102. SafeAddRef(m_pQuery);
  103. }
  104. bool QueryExprTraits_c::UpdateQuery ( const CSphMatch & tMatch ) const
  105. {
  106. CSphString sQuery = FetchQuery(tMatch);
  107. if ( m_bFirstQuery || m_sQuery!=sQuery )
  108. {
  109. m_bFirstQuery = false;
  110. m_sQuery = sQuery;
  111. return true;
  112. }
  113. return false;
  114. }
  115. CSphString QueryExprTraits_c::FetchQuery ( const CSphMatch & tMatch ) const
  116. {
  117. if ( !m_pQuery )
  118. return m_sQuery;
  119. CSphString sQuery;
  120. char * pWords;
  121. int iQueryLen = m_pQuery->StringEval ( tMatch, (const BYTE**)&pWords );
  122. if ( m_pQuery->IsDataPtrAttr() )
  123. sQuery.Adopt ( &pWords );
  124. else
  125. sQuery.SetBinary ( pWords, iQueryLen );
  126. return sQuery;
  127. }
  128. bool QueryExprTraits_c::Command ( ESphExprCommand eCmd, void * pArg )
  129. {
  130. if ( m_pQuery )
  131. m_pQuery->Command ( eCmd, pArg );
  132. if ( eCmd==SPH_EXPR_SET_QUERY && !m_pQuery ) // don't do this if we have a query expression specified
  133. {
  134. CSphString sQuery ( (const char*)pArg );
  135. if ( m_bFirstQuery || m_sQuery!=sQuery )
  136. {
  137. m_sQuery = sQuery;
  138. m_bFirstQuery = false;
  139. return true;
  140. }
  141. }
  142. return false;
  143. }
  144. //////////////////////////////////////////////////////////////////////////
  145. /// searchd-level expression function
  146. class Expr_Snippet_c : public ISphStringExpr, public QueryExprTraits_c
  147. {
  148. public:
  149. Expr_Snippet_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError );
  150. int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const override;
  151. bool IsDataPtrAttr () const final { return true; }
  152. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override;
  153. void Command ( ESphExprCommand eCmd, void * pArg ) override;
  154. uint64_t GetHash ( const ISphSchema &, uint64_t, bool & ) override;
  155. ISphExpr * Clone () const override;
  156. protected:
  157. CSphRefcountedPtr<ISphExpr> m_pArgs;
  158. CSphRefcountedPtr<ISphExpr> m_pText;
  159. CSphIndex * m_pIndex;
  160. SnippetQuerySettings_t m_tSnippetQuery;
  161. CSphQueryProfile * m_pProfiler;
  162. CSphScopedPtr<SnippetBuilder_i> m_pSnippetBuilder;
  163. CSphVector<int> m_dRequestedFields;
  164. private:
  165. Expr_Snippet_c ( const Expr_Snippet_c & rhs ); // need for cloning
  166. };
  167. Expr_Snippet_c::Expr_Snippet_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError )
  168. : QueryExprTraits_c ( pArglist->GetArg(1) )
  169. , m_pArgs ( pArglist )
  170. , m_pIndex ( pIndex )
  171. , m_pProfiler ( pProfiler )
  172. , m_pSnippetBuilder ( CreateSnippetBuilder() )
  173. {
  174. SafeAddRef ( m_pArgs );
  175. assert ( m_pArgs->IsArglist() );
  176. m_pText = pArglist->GetArg(0);
  177. SafeAddRef ( m_pText );
  178. CSphMatch tDummy;
  179. char * pWords;
  180. for ( int i = 2; i < pArglist->GetNumArgs(); i++ )
  181. {
  182. assert ( !pArglist->GetArg(i)->IsDataPtrAttr() ); // aware of memleaks potentially caused by StringEval()
  183. int iLen = pArglist->GetArg(i)->StringEval ( tDummy, (const BYTE**)&pWords );
  184. if ( !pWords || !iLen )
  185. continue;
  186. CSphString sArgs;
  187. sArgs.SetBinary ( pWords, iLen );
  188. char * pWords = const_cast<char *> ( sArgs.cstr() );
  189. const char * sEnd = pWords + iLen;
  190. while ( pWords<sEnd && *pWords && sphIsSpace ( *pWords ) ) pWords++;
  191. char * szOption = pWords;
  192. while ( pWords<sEnd && *pWords && sphIsAlpha ( *pWords ) ) pWords++;
  193. char * szOptEnd = pWords;
  194. while ( pWords<sEnd && *pWords && sphIsSpace ( *pWords ) ) pWords++;
  195. if ( *pWords++!='=' )
  196. {
  197. sError.SetSprintf ( "Error parsing SNIPPET options: %s", pWords );
  198. return;
  199. }
  200. *szOptEnd = '\0';
  201. while ( pWords<sEnd && *pWords && sphIsSpace ( *pWords ) ) pWords++;
  202. char * sValue = pWords;
  203. if ( !*sValue )
  204. {
  205. sError.SetSprintf ( "Error parsing SNIPPET options" );
  206. return;
  207. }
  208. while ( pWords<sEnd && *pWords ) pWords++;
  209. int iStrValLen = pWords - sValue;
  210. CSphNamedVariant tVariant;
  211. tVariant.m_sKey = szOption;
  212. tVariant.m_sValue.SetBinary ( sValue, iStrValLen );
  213. tVariant.m_iValue = StringBinary2Number ( sValue, iStrValLen );
  214. if ( !ParseSnippetOption ( tVariant, m_tSnippetQuery, sError ) )
  215. return;
  216. }
  217. m_tSnippetQuery.Setup();
  218. if ( !m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError ) )
  219. return;
  220. m_dRequestedFields.Add(0);
  221. }
  222. int Expr_Snippet_c::StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  223. {
  224. CSphScopedProfile ( m_pProfiler, SPH_QSTATE_SNIPPET );
  225. *ppStr = nullptr;
  226. const BYTE * szSource = nullptr;
  227. int iLen = m_pText->StringEval ( tMatch, &szSource );
  228. // kinda like a scoped ptr, but for an array
  229. CSphFixedVector<BYTE> tScoped {0};
  230. if ( m_pText->IsDataPtrAttr() )
  231. tScoped.Set ( (BYTE *)szSource, iLen );
  232. if ( !iLen )
  233. return 0;
  234. if ( UpdateQuery(tMatch) )
  235. {
  236. CSphString sError;
  237. if ( !m_pSnippetBuilder->SetQuery ( GetQuery(), true, sError ) )
  238. return 0;
  239. }
  240. CSphScopedPtr<TextSource_i> pSource ( CreateSnippetSource ( m_tSnippetQuery.m_uFilesMode, szSource, iLen ) );
  241. // FIXME! fill in all the missing options; use consthash?
  242. SnippetResult_t tRes;
  243. if ( !m_pSnippetBuilder->Build ( pSource.Ptr(), tRes ) )
  244. return 0;
  245. CSphVector<BYTE> dRes = m_pSnippetBuilder->PackResult ( tRes, m_dRequestedFields );
  246. int iResultLength = dRes.GetLength();
  247. *ppStr = dRes.LeakData();
  248. return iResultLength;
  249. }
  250. void Expr_Snippet_c::FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  251. {
  252. if ( m_pText )
  253. m_pText->FixupLocator ( pOldSchema, pNewSchema );
  254. }
  255. void Expr_Snippet_c::Command ( ESphExprCommand eCmd, void * pArg )
  256. {
  257. if ( m_pArgs )
  258. m_pArgs->Command ( eCmd, pArg );
  259. if ( m_pText )
  260. m_pText->Command ( eCmd, pArg );
  261. if ( QueryExprTraits_c::Command ( eCmd, pArg ) )
  262. {
  263. // fixme! handle errors
  264. CSphString sError;
  265. m_pSnippetBuilder->SetQuery ( GetQuery(), false, sError );
  266. }
  267. }
  268. uint64_t Expr_Snippet_c::GetHash ( const ISphSchema &, uint64_t, bool & )
  269. {
  270. assert ( 0 && "no snippets in filters" );
  271. return 0;
  272. }
  273. ISphExpr * Expr_Snippet_c::Clone () const
  274. {
  275. return new Expr_Snippet_c ( *this );
  276. }
  277. Expr_Snippet_c::Expr_Snippet_c ( const Expr_Snippet_c& rhs )
  278. : QueryExprTraits_c ( rhs )
  279. , m_pArgs ( SafeClone ( rhs.m_pArgs ) )
  280. , m_pText ( SafeClone ( rhs.m_pText ) )
  281. , m_pIndex ( rhs.m_pIndex )
  282. , m_tSnippetQuery ( rhs.m_tSnippetQuery )
  283. , m_pProfiler ( rhs.m_pProfiler )
  284. , m_pSnippetBuilder ( CreateSnippetBuilder () )
  285. {
  286. CSphString sError;
  287. assert ( m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError ));
  288. }
  289. //////////////////////////////////////////////////////////////////////////
  290. class Expr_Highlight_c final : public ISphStringExpr, public QueryExprTraits_c
  291. {
  292. public:
  293. Expr_Highlight_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError );
  294. int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const final;
  295. void Command ( ESphExprCommand eCmd, void * pArg ) final;
  296. void FixupLocator ( const ISphSchema * /*pOldSchema*/, const ISphSchema * /*pNewSchema*/ ) final {}
  297. uint64_t GetHash ( const ISphSchema &, uint64_t, bool & ) final;
  298. ISphExpr * Clone () const final;
  299. private:
  300. CSphIndex * m_pIndex = nullptr;
  301. CSphQueryProfile * m_pProfiler = nullptr;
  302. CSphScopedPtr<SnippetBuilder_i> m_pSnippetBuilder;
  303. DocstoreSession_c::Info_t m_tSession;
  304. SnippetQuerySettings_t m_tSnippetQuery;
  305. CSphVector<int> m_dRequestedFieldIds;
  306. CSphVector<int> m_dFieldsToFetch;
  307. CSphRefcountedPtr<ISphExpr> m_pArgs;
  308. bool m_bFetchAllFields = false;
  309. Expr_Highlight_c ( const Expr_Highlight_c & rhs );
  310. bool FetchFieldsFromDocstore ( DocstoreDoc_t & tFetchedDoc, DocID_t & tDocID ) const;
  311. void ParseFields ( ISphExpr * pExpr );
  312. bool ParseOptions ( const VecTraits_T<CSphNamedVariant> & dMap, CSphString & sError );
  313. bool MarkRequestedFields ( CSphString & sError );
  314. void MarkAllFields();
  315. };
  316. Expr_Highlight_c::Expr_Highlight_c ( ISphExpr * pArglist, CSphIndex * pIndex, CSphQueryProfile * pProfiler, CSphString & sError )
  317. : QueryExprTraits_c ( ( pArglist && pArglist->IsArglist() && pArglist->GetNumArgs()==3 ) ? pArglist->GetArg(2) : nullptr )
  318. , m_pIndex ( pIndex )
  319. , m_pProfiler ( pProfiler )
  320. , m_pSnippetBuilder ( CreateSnippetBuilder() )
  321. {
  322. assert ( m_pIndex );
  323. if ( pArglist && pArglist->IsArglist() )
  324. {
  325. m_pArgs = pArglist;
  326. SafeAddRef(m_pArgs);
  327. }
  328. int iNumArgs = pArglist ? ( pArglist->IsArglist() ? pArglist->GetNumArgs() : 1 ) : 0;
  329. if ( iNumArgs>=1 )
  330. {
  331. // this should be a map argument. at least we checked that in ExprHook_c::GetReturnType
  332. auto pMapArg = (Expr_MapArg_c *)(pArglist->IsArglist() ? pArglist->GetArg(0) : pArglist);
  333. assert(pMapArg);
  334. VecTraits_T<CSphNamedVariant> dOpts ( pMapArg->m_pValues, pMapArg->m_iCount );
  335. if ( !ParseOptions ( dOpts, sError ) )
  336. return;
  337. }
  338. if ( iNumArgs>=2 )
  339. {
  340. assert ( pArglist && pArglist->IsArglist() );
  341. ISphExpr * pFields = pArglist->GetArg(1);
  342. ParseFields(pFields);
  343. }
  344. else
  345. MarkAllFields();
  346. m_tSnippetQuery.Setup();
  347. if ( !m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError ) )
  348. return;
  349. }
  350. int Expr_Highlight_c::StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  351. {
  352. CSphScopedProfile ( m_pProfiler, SPH_QSTATE_SNIPPET );
  353. DocID_t tDocID = sphGetDocID ( tMatch.m_pDynamic ? tMatch.m_pDynamic : tMatch.m_pStatic );
  354. DocstoreDoc_t tFetchedDoc;
  355. if ( !FetchFieldsFromDocstore ( tFetchedDoc, tDocID ) )
  356. return 0;
  357. // now we've fetched all stored fields
  358. // we need to arrange them as in original index schema
  359. // so that field matching will work as expected
  360. const CSphSchema & tSchema = m_pIndex->GetMatchSchema();
  361. CSphVector<FieldSource_t> dAllFields;
  362. for ( int i = 0; i < tSchema.GetFieldsCount(); i++ )
  363. {
  364. const CSphColumnInfo & tInfo = tSchema.GetField(i);
  365. FieldSource_t & tNewField = dAllFields.Add();
  366. tNewField.m_sName = tInfo.m_sName;
  367. if ( !( tInfo.m_uFieldFlags & CSphColumnInfo::FIELD_STORED ) )
  368. continue;
  369. int iFieldId = m_tSession.m_pDocstore->GetFieldId ( tInfo.m_sName, DOCSTORE_TEXT );
  370. assert ( iFieldId!=-1 );
  371. int iFetchedFieldId = -1;
  372. if ( m_bFetchAllFields )
  373. iFetchedFieldId = iFieldId;
  374. else
  375. {
  376. int * pFound = sphBinarySearch ( m_dFieldsToFetch.Begin(), m_dFieldsToFetch.Begin()+m_dFieldsToFetch.GetLength()-1, iFieldId );
  377. if ( pFound )
  378. iFetchedFieldId = pFound-m_dFieldsToFetch.Begin();
  379. }
  380. if ( iFetchedFieldId!=-1 )
  381. tNewField.m_dData = tFetchedDoc.m_dFields[iFetchedFieldId].Slice();
  382. }
  383. if ( UpdateQuery(tMatch) )
  384. {
  385. CSphString sError;
  386. if ( !m_pSnippetBuilder->SetQuery ( GetQuery(), true, sError ) )
  387. return 0;
  388. }
  389. CSphScopedPtr<TextSource_i> pSource ( CreateHighlightSource(dAllFields) );
  390. SnippetResult_t tRes;
  391. if ( !m_pSnippetBuilder->Build ( pSource.Ptr(), tRes ) )
  392. return 0;
  393. CSphVector<BYTE> dPacked = m_pSnippetBuilder->PackResult ( tRes, m_dRequestedFieldIds );
  394. int iResultLength = dPacked.GetLength();
  395. *ppStr = dPacked.LeakData();
  396. return iResultLength;
  397. }
  398. void Expr_Highlight_c::Command ( ESphExprCommand eCmd, void * pArg )
  399. {
  400. if ( QueryExprTraits_c::Command ( eCmd, pArg ) )
  401. {
  402. // fixme! handle errors
  403. CSphString sError;
  404. m_pSnippetBuilder->SetQuery ( GetQuery(), false, sError );
  405. }
  406. if ( eCmd==SPH_EXPR_SET_DOCSTORE )
  407. {
  408. const DocstoreSession_c::Info_t & tSession = *(DocstoreSession_c::Info_t*)pArg;
  409. bool bMark = tSession.m_pDocstore!=m_tSession.m_pDocstore;
  410. m_tSession = tSession;
  411. if ( bMark )
  412. {
  413. // fixme! handle errors
  414. CSphString sError;
  415. MarkRequestedFields(sError);
  416. }
  417. }
  418. if ( m_pArgs )
  419. m_pArgs->Command ( eCmd, pArg );
  420. }
  421. uint64_t Expr_Highlight_c::GetHash ( const ISphSchema &, uint64_t, bool & )
  422. {
  423. assert ( 0 && "no snippets in filters" );
  424. return 0;
  425. }
  426. ISphExpr * Expr_Highlight_c::Clone () const
  427. {
  428. return new Expr_Highlight_c ( *this );
  429. }
  430. Expr_Highlight_c::Expr_Highlight_c ( const Expr_Highlight_c& rhs )
  431. : QueryExprTraits_c ( rhs )
  432. , m_pIndex ( rhs.m_pIndex )
  433. , m_pProfiler ( rhs.m_pProfiler )
  434. , m_pSnippetBuilder ( CreateSnippetBuilder () )
  435. , m_tSnippetQuery ( rhs.m_tSnippetQuery )
  436. , m_dRequestedFieldIds ( rhs.m_dRequestedFieldIds )
  437. , m_pArgs ( SafeClone ( rhs.m_pArgs ) )
  438. {
  439. CSphString sError;
  440. assert ( m_pSnippetBuilder->Setup ( m_pIndex, m_tSnippetQuery, sError ));
  441. }
  442. bool Expr_Highlight_c::FetchFieldsFromDocstore ( DocstoreDoc_t & tFetchedDoc, DocID_t & tDocID ) const
  443. {
  444. if ( !m_tSession.m_pDocstore )
  445. return false;
  446. const CSphVector<int> * pFieldsToFetch = m_bFetchAllFields ? nullptr : &m_dFieldsToFetch;
  447. return m_tSession.m_pDocstore->GetDoc ( tFetchedDoc, tDocID, pFieldsToFetch, m_tSession.m_iSessionId, false );
  448. }
  449. void Expr_Highlight_c::ParseFields ( ISphExpr * pExpr )
  450. {
  451. assert ( pExpr && !pExpr->IsDataPtrAttr() );
  452. assert(m_pIndex);
  453. CSphString sFields;
  454. char * szFields;
  455. CSphMatch tDummy;
  456. int iLen = pExpr->StringEval ( tDummy, (const BYTE**)&szFields );
  457. sFields.SetBinary ( szFields, iLen );
  458. sFields.ToLower();
  459. sFields.Trim();
  460. StrVec_t dRequestedFieldNames;
  461. sphSplit ( dRequestedFieldNames, sFields.cstr() );
  462. if ( !dRequestedFieldNames.GetLength() && sFields.IsEmpty() )
  463. MarkAllFields();
  464. else
  465. {
  466. const CSphSchema & tSchema = m_pIndex->GetMatchSchema();
  467. for ( const auto & i : dRequestedFieldNames )
  468. {
  469. int iField = tSchema.GetFieldIndex ( i.cstr() );
  470. if ( iField!=-1 )
  471. m_dRequestedFieldIds.Add(iField);
  472. }
  473. }
  474. }
  475. void Expr_Highlight_c::MarkAllFields()
  476. {
  477. m_bFetchAllFields = true;
  478. m_dFieldsToFetch.Resize(0);
  479. const CSphSchema & tSchema = m_pIndex->GetMatchSchema();
  480. for ( int i = 0; i < tSchema.GetFieldsCount(); i++ )
  481. m_dRequestedFieldIds.Add(i);
  482. }
  483. bool Expr_Highlight_c::MarkRequestedFields ( CSphString & sError )
  484. {
  485. m_dFieldsToFetch.Resize(0);
  486. bool bResult = true;
  487. if ( !m_bFetchAllFields )
  488. {
  489. assert ( m_tSession.m_pDocstore );
  490. const CSphSchema & tSchema = m_pIndex->GetMatchSchema();
  491. for ( auto iField : m_dRequestedFieldIds )
  492. {
  493. const char * szField = tSchema.GetFieldName(iField);
  494. int iDocstoreField = m_tSession.m_pDocstore->GetFieldId ( szField, DOCSTORE_TEXT );
  495. if ( iDocstoreField==-1 )
  496. {
  497. sError.SetSprintf ( "field %s not found", szField );
  498. bResult = false;
  499. continue;
  500. }
  501. m_dFieldsToFetch.Add(iDocstoreField);
  502. }
  503. m_dFieldsToFetch.Uniq();
  504. }
  505. return bResult;
  506. }
  507. bool Expr_Highlight_c::ParseOptions ( const VecTraits_T<CSphNamedVariant> & dMap, CSphString & sError )
  508. {
  509. for ( const auto & i : dMap )
  510. {
  511. if ( !ParseSnippetOption ( i, m_tSnippetQuery, sError ) )
  512. return false;
  513. }
  514. return true;
  515. }
  516. //////////////////////////////////////////////////////////////////////////
  517. int ExprHook_c::IsKnownFunc ( const char * sFunc )
  518. {
  519. if ( !strcasecmp ( sFunc, "SNIPPET" ) )
  520. return HOOK_SNIPPET;
  521. if ( !strcasecmp ( sFunc, "HIGHLIGHT" ) )
  522. return HOOK_HIGHLIGHT;
  523. return -1;
  524. }
  525. ISphExpr * ExprHook_c::CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError )
  526. {
  527. if ( pEvalStage )
  528. *pEvalStage = SPH_EVAL_POSTLIMIT;
  529. ISphExpr * pRes = nullptr;
  530. switch ( iID )
  531. {
  532. case HOOK_SNIPPET:
  533. pRes = new Expr_Snippet_c ( pLeft, m_pIndex, m_pProfiler, sError );
  534. break;
  535. case HOOK_HIGHLIGHT:
  536. pRes = new Expr_Highlight_c ( pLeft, m_pIndex, m_pProfiler, sError );
  537. break;
  538. default:
  539. assert ( 0 && "Unknown node type" );
  540. return nullptr;
  541. }
  542. if ( !sError.IsEmpty() )
  543. SafeRelease(pRes);
  544. return pRes;
  545. }
  546. ESphAttr ExprHook_c::GetIdentType ( int )
  547. {
  548. assert(0);
  549. return SPH_ATTR_NONE;
  550. }
  551. ESphAttr ExprHook_c::GetReturnType ( int iID, const CSphVector<ESphAttr> & dArgs, bool, CSphString & sError )
  552. {
  553. switch ( iID )
  554. {
  555. case HOOK_SNIPPET:
  556. if ( dArgs.GetLength()<2 )
  557. {
  558. sError = "SNIPPET() requires 2 or more arguments";
  559. return SPH_ATTR_NONE;
  560. }
  561. if ( dArgs[0]!=SPH_ATTR_STRINGPTR && dArgs[0]!=SPH_ATTR_STRING )
  562. {
  563. sError = "1st argument to SNIPPET() must be a string expression";
  564. return SPH_ATTR_NONE;
  565. }
  566. for ( int i = 1; i < dArgs.GetLength(); i++ )
  567. if ( dArgs[i]!=SPH_ATTR_STRING && dArgs[i]!=SPH_ATTR_STRINGPTR )
  568. {
  569. sError.SetSprintf ( "%d argument to SNIPPET() must be a string", i );
  570. return SPH_ATTR_NONE;
  571. }
  572. break;
  573. case HOOK_HIGHLIGHT:
  574. if ( dArgs.GetLength()>3 )
  575. {
  576. sError = "HIGHLIGHT() requires 0-3 arguments";
  577. return SPH_ATTR_NONE;
  578. }
  579. if ( dArgs.GetLength()>0 && dArgs[0]!=SPH_ATTR_MAPARG )
  580. {
  581. sError = "1st argument to HIGHLIGHT() must be a map";
  582. return SPH_ATTR_NONE;
  583. }
  584. if ( dArgs.GetLength()>1 && dArgs[1]!=SPH_ATTR_STRING)
  585. {
  586. sError = "2nd argument to HIGHLIGHT() must be a const string";
  587. return SPH_ATTR_NONE;
  588. }
  589. if ( dArgs.GetLength()>2 && dArgs[2]!=SPH_ATTR_STRING && dArgs[2]!=SPH_ATTR_STRINGPTR )
  590. {
  591. sError = "3rd argument to HIGHLIGHT() must be a string";
  592. return SPH_ATTR_NONE;
  593. }
  594. break;
  595. }
  596. return SPH_ATTR_STRINGPTR;
  597. }