Browse Source

extract parser

Aleksey N. Vinogradov 9 months ago
parent
commit
4dde2d003e

+ 1 - 0
src/gtests/gtests_rtstuff.cpp

@@ -20,6 +20,7 @@
 #include "binlog.h"
 #include "accumulator.h"
 #include "sphinxudf.h"
+#include "sphinxquery/xqparser.h"
 
 #include <gmock/gmock.h>
 

+ 1 - 0
src/gtests/gtests_tokenizer.cpp

@@ -15,6 +15,7 @@
 #include "sphinxint.h"
 #include "tokenizer/tokenizer.h"
 #include "tokenizer/tok_internals.h"
+#include "sphinxquery/xqparser.h"
 
 // Miscelaneous tests of tokenizer
 

+ 1 - 0
src/searchd.cpp

@@ -42,6 +42,7 @@
 #include "skip_cache.h"
 #include "jieba.h"
 #include "sphinxexcerpt.h"
+#include "sphinxquery/xqparser.h"
 #include "daemon/winservice.h"
 #include "daemon/crash_logger.h"
 #include "daemon/logger.h"

+ 1 - 0
src/searchdhttp.cpp

@@ -27,6 +27,7 @@
 #include "compressed_http.h"
 #include "daemon/logger.h"
 #include "daemon/search_handler.h"
+#include "sphinxquery/xqparser.h"
 
 static bool g_bLogBadHttpReq = val_from_env ( "MANTICORE_LOG_HTTP_BAD_REQ", false ); // log content of bad http requests, ruled by this env variable
 static int g_iLogHttpData = val_from_env ( "MANTICORE_LOG_HTTP_DATA", 0 ); // verbose logging of http data, ruled by this env variable

+ 1 - 3
src/sphinx.cpp

@@ -13,6 +13,7 @@
 #include "sphinx.h"
 #include "sphinxstem.h"
 #include "sphinxquery/sphinxquery.h"
+#include "sphinxquery/xqparser.h"
 #include "sphinxutils.h"
 #include "sphinxsort.h"
 #include "fileutils.h"
@@ -22,7 +23,6 @@
 #include "sphinxsearch.h"
 #include "searchnode.h"
 #include "sphinxjson.h"
-#include "sphinxplugin.h"
 #include "sphinxqcache.h"
 #include "icu.h"
 #include "jieba.h"
@@ -67,7 +67,6 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <sys/stat.h>
-#include <time.h>
 #include <math.h>
 #include <algorithm>
 
@@ -110,7 +109,6 @@ void gmtime_r ( const time_t * clock, struct tm * res )
 #include <boost/preprocessor/repetition/repeat.hpp>
 
 #include "attrindex_builder.h"
-#include "stripper/html_stripper.h"
 #include "queryfilter.h"
 #include "indexing_sources/source_document.h"
 #include "indexing_sources/source_stats.h"

+ 2 - 3
src/sphinxjsonquery.cpp

@@ -8,7 +8,8 @@
 // did not, you can find it at http://www.gnu.org/
 //
 
-#include "sphinxquery/sphinxquery.h"
+#include "sphinxquery/xqparser.h"
+#include "sphinxquery/parse_helper.h"
 #include "sphinxsearch.h"
 #include "sphinxplugin.h"
 #include "sphinxutils.h"
@@ -21,8 +22,6 @@
 #include "sorterscroll.h"
 #include "sphinxexcerpt.h"
 
-#include "json/cJSON.h"
-
 static const char * g_szAll = "_all";
 static const char * g_szHighlight = "_@highlight_";
 static const char * g_szOrder = "_@order_";

+ 1 - 0
src/sphinxpq.cpp

@@ -23,6 +23,7 @@
 #include "tokenizer/tokenizer.h"
 #include "task_dispatcher.h"
 #include "stackmock.h"
+#include "sphinxquery/xqparser.h"
 
 #include <atomic>
 

+ 1 - 0
src/sphinxquery/CMakeLists.txt

@@ -4,6 +4,7 @@ add_library ( sphinxquery OBJECT
 		sphinxquery.cpp
 		sphinxquery.h
 		parse_helper.cpp
+		parse_helper.h
 		xqparser.cpp
 		xqparser.h
 		xqnode.cpp

+ 1 - 1
src/sphinxquery/parse_helper.cpp

@@ -10,7 +10,7 @@
 // did not, you can find it at http://www.gnu.org
 //
 
-#include "sphinxquery.h"
+#include "parse_helper.h"
 #include "sphinxplugin.h"
 
 #include "tokenizer/tokenizer.h"

+ 84 - 0
src/sphinxquery/parse_helper.h

@@ -0,0 +1,84 @@
+//
+// Copyright (c) 2017-2025, Manticore Software LTD (https://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org
+//
+
+#pragma once
+
+#include "sphinxquery.h"
+
+class PluginQueryTokenFilter_c;
+using PluginQueryTokenRefPtr_c = CSphRefcountedPtr<PluginQueryTokenFilter_c>;
+
+class XQParseHelper_c
+{
+public:
+	virtual ~XQParseHelper_c () = default;
+
+	void SetString ( const char * szString );
+
+	bool			AddField ( FieldMask_t & dFields, const char * szField, int iLen );
+	bool			ParseFields ( FieldMask_t & dFields, int & iMaxFieldPos, bool & bIgnore );
+
+	void			Setup ( const CSphSchema * pSchema, TokenizerRefPtr_c pTokenizer, DictRefPtr_c pDict, XQQuery_t * pXQQuery, const CSphIndexSettings & tSettings );
+	bool			Error ( const char * sTemplate, ... ) __attribute__ ( ( format ( printf, 2, 3 ) ) );
+	void			Warning ( const char * sTemplate, ... ) __attribute__ ( ( format ( printf, 2, 3 ) ) );
+	XQNode_t *		FixupTree ( XQNode_t * pRoot, const XQLimitSpec_t & tLimitSpec, const CSphBitvec * pMorphFields, bool bOnlyNotAllowed );
+
+	const CSphSchema * GetSchema() const { return m_pSchema; }
+	DictRefPtr_c&	GetDict() { return m_pDict; }
+
+	bool			IsError() { return m_bError; }
+	virtual void	Cleanup();
+	void			SetZone ( const StrVec_t & dZones ) const noexcept;
+	const StrVec_t & GetZone() const noexcept;
+	XQNode_t *		SpawnNode ( const XQLimitSpec_t & dSpec ) noexcept;
+	void			DeleteSpawned ( XQNode_t * pNode ) noexcept;
+
+protected:
+	struct MultiformNode_t
+	{
+		XQNode_t *	m_pNode;
+		int			m_iDestStart;
+		int			m_iDestCount;
+	};
+
+	static const int MAX_TOKEN_BYTES = 3*SPH_MAX_WORD_LEN + 16;
+
+	const CSphSchema *		m_pSchema {nullptr};
+	TokenizerRefPtr_c		m_pTokenizer;
+	DictRefPtr_c			m_pDict;
+	bool					m_bStopOnInvalid {true};
+	XQQuery_t *				m_pParsed {nullptr};
+	bool					m_bError {false};
+
+	PluginQueryTokenRefPtr_c m_pPlugin;
+	void *					m_pPluginData {nullptr};
+
+	int						m_iAtomPos {0};
+	bool					m_bEmptyStopword {false};
+	bool					m_bWasBlended {false};
+
+	CSphVector<XQNode_t*>		m_dSpawned;
+	StrVec_t					m_dDestForms;
+	CSphVector<MultiformNode_t>	m_dMultiforms;
+
+	virtual bool	HandleFieldBlockStart ( const char * & pPtr ) = 0;
+	virtual bool	HandleSpecialFields ( const char * & /*pPtr*/, FieldMask_t & /*dFields*/ ) { return false; }
+	virtual bool	NeedTrailingSeparator() { return true; }
+
+private:
+	XQNode_t *		SweepNulls ( XQNode_t * pNode, bool bOnlyNotAllowed );
+	bool			FixupNots ( XQNode_t * pNode, bool bOnlyNotAllowed, XQNode_t ** ppRoot );
+	void			FixupNulls ( XQNode_t * pNode );
+	void			DeleteNodesWOFields ( XQNode_t * pNode );
+	void			FixupDestForms();
+	bool			CheckQuorumProximity ( const XQNode_t * pNode );
+};

+ 0 - 53
src/sphinxquery/sphinxquery.cpp

@@ -272,56 +272,3 @@ void DotDump (const XQNode_t * pNode)
 	sph::RenderBsonPlan ( sRes, bson::MakeHandle ( dPlan ), true );
 	printf ( "\nhttps://dreampuf.github.io/GraphvizOnline/#%s\n", UrlEncode ( CSphString{sRes} ).cstr() );
 }
-
-
-bool sphParseExtendedQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c& pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields )
-{
-	XQParser_t qp;
-	bool bRes = qp.Parse ( tParsed, sQuery, pQuery, pTokenizer, pSchema, pDict, tSettings, pMorphFields );
-
-#ifndef NDEBUG
-	if ( bRes && tParsed.m_pRoot )
-		tParsed.m_pRoot->Check ( true );
-#endif
-
-#if XQDEBUG
-	if ( bRes )
-	{
-		printf ( "\n--- query ---\n" );
-		printf ( "%s\n", sQuery );
-		xqDump ( tParsed.m_pRoot, 0 );
-		DotDump ( tParsed.m_pRoot );
-		printf ( "\n--- query reconstructed ---\n" );
-		printf ( "%s\n", sphReconstructNode ( tParsed.m_pRoot ).cstr());
-		printf ( "---\n" );
-	}
-#endif
-
-	// moved here from ranker creation
-	// as at that point term expansion could produce many terms from expanded term and this condition got failed
-	tParsed.m_bSingleWord = ( tParsed.m_pRoot && tParsed.m_pRoot->dChildren().IsEmpty() && tParsed.m_pRoot->dWords().GetLength()==1 );
-	tParsed.m_bEmpty = qp.m_bEmpty;
-
-	return bRes;
-}
-
-
-class QueryParserPlain_c : public QueryParser_i
-{
-public:
-	bool IsFullscan ( const XQQuery_t & tQuery ) const override { return false; }
-	bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const override;
-	QueryParser_i * Clone() const final { return new QueryParserPlain_c; }
-};
-
-
-bool QueryParserPlain_c::ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const
-{
-	return sphParseExtendedQuery ( tParsed, sQuery, pQuery, pQueryTokenizer, pSchema, pDict, tSettings, pMorphFields );
-}
-
-
-std::unique_ptr<QueryParser_i> sphCreatePlainQueryParser()
-{
-	return std::make_unique<QueryParserPlain_c>();
-}

+ 0 - 81
src/sphinxquery/sphinxquery.h

@@ -380,89 +380,8 @@ public:
 	virtual QueryParser_i * Clone() const = 0;
 };
 
-class PluginQueryTokenFilter_c;
-using PluginQueryTokenRefPtr_c = CSphRefcountedPtr<PluginQueryTokenFilter_c>;
-
-class XQParseHelper_c
-{
-public:
-	virtual			~XQParseHelper_c() = default;
-
-	void			SetString ( const char * szString );
-
-	bool			AddField ( FieldMask_t & dFields, const char * szField, int iLen );
-	bool			ParseFields ( FieldMask_t & dFields, int & iMaxFieldPos, bool & bIgnore );
-
-	void			Setup ( const CSphSchema * pSchema, TokenizerRefPtr_c pTokenizer, DictRefPtr_c pDict, XQQuery_t * pXQQuery, const CSphIndexSettings & tSettings );
-	bool			Error ( const char * sTemplate, ... ) __attribute__ ( ( format ( printf, 2, 3 ) ) );
-	void			Warning ( const char * sTemplate, ... ) __attribute__ ( ( format ( printf, 2, 3 ) ) );
-	XQNode_t *		FixupTree ( XQNode_t * pRoot, const XQLimitSpec_t & tLimitSpec, const CSphBitvec * pMorphFields, bool bOnlyNotAllowed );
-
-	const CSphSchema * GetSchema() const { return m_pSchema; }
-	DictRefPtr_c&	GetDict() { return m_pDict; }
-	
-	bool			IsError() { return m_bError; }
-	virtual void	Cleanup();
-	void			SetZone ( const StrVec_t & dZones ) const noexcept;
-	const StrVec_t & GetZone() const noexcept;
-	XQNode_t *		SpawnNode ( const XQLimitSpec_t & dSpec ) noexcept;
-	void			DeleteSpawned ( XQNode_t * pNode ) noexcept;
-
-protected:
-	struct MultiformNode_t
-	{
-		XQNode_t *	m_pNode;
-		int			m_iDestStart;
-		int			m_iDestCount;
-	};
-
-	static const int MAX_TOKEN_BYTES = 3*SPH_MAX_WORD_LEN + 16;
-
-	const CSphSchema *		m_pSchema {nullptr};
-	TokenizerRefPtr_c		m_pTokenizer;
-	DictRefPtr_c			m_pDict;
-	bool					m_bStopOnInvalid {true};
-	XQQuery_t *				m_pParsed {nullptr};
-	bool					m_bError {false};
-
-	PluginQueryTokenRefPtr_c m_pPlugin;
-	void *					m_pPluginData {nullptr};
-
-	int						m_iAtomPos {0};
-	bool					m_bEmptyStopword {false};
-	bool					m_bWasBlended {false};
-
-	CSphVector<XQNode_t*>		m_dSpawned;
-	StrVec_t					m_dDestForms;
-	CSphVector<MultiformNode_t>	m_dMultiforms;
-
-	virtual bool	HandleFieldBlockStart ( const char * & pPtr ) = 0;
-	virtual bool	HandleSpecialFields ( const char * & /*pPtr*/, FieldMask_t & /*dFields*/ ) { return false; }
-	virtual bool	NeedTrailingSeparator() { return true; }
-
-private:
-	XQNode_t *		SweepNulls ( XQNode_t * pNode, bool bOnlyNotAllowed );
-	bool			FixupNots ( XQNode_t * pNode, bool bOnlyNotAllowed, XQNode_t ** ppRoot );
-	void			FixupNulls ( XQNode_t * pNode );
-	void			DeleteNodesWOFields ( XQNode_t * pNode );
-	void			FixupDestForms();
-	bool			CheckQuorumProximity ( const XQNode_t * pNode );
-};
-
 //////////////////////////////////////////////////////////////////////////////
 
-// a wrapper for sphParseExtendedQuery
-std::unique_ptr<QueryParser_i> sphCreatePlainQueryParser();
-
-/// parses the query and returns the resulting tree
-/// return false and fills tQuery.m_sParseError on error
-/// WARNING, parsed tree might be NULL (eg. if query was empty)
-/// lots of arguments here instead of simply the index pointer, because
-/// a) we do not always have an actual real index class, and
-/// b) might need to tweak stuff even we do
-/// FIXME! remove either pQuery or sQuery
-bool	sphParseExtendedQuery ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c& pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields );
-
 // perform boolean optimization on tree
 void	sphOptimizeBoolean ( XQNode_t ** pXQ, const ISphKeywordsStat * pKeywords );
 

+ 152 - 6
src/sphinxquery/xqparser.cpp

@@ -10,12 +10,18 @@
 // did not, you can find it at http://www.gnu.org
 //
 
-#include "sphinxquery.h"
-
+#include "xqparser.h"
+#include "parse_helper.h"
 #include "sphinxplugin.h"
 #include "tokenizer/tokenizer.h"
 #include "dict/dict_base.h"
-#include "xqparser.h"
+#include "xqdebug.h"
+
+//////////////////////////////////////////////////////////////////////////
+// EXTENDED PARSER RELOADED
+//////////////////////////////////////////////////////////////////////////
+class XQParser_t;
+#include "bissphinxquery.h"
 
 static bool g_bOnlyNotAllowed = false;
 
@@ -45,13 +51,102 @@ bool HasMissedField ( const XQLimitSpec_t & tSpec )
 {
 	return (tSpec.m_dFieldMask.TestAll ( false ) && tSpec.m_iFieldMaxPos == 0 && !tSpec.m_bZoneSpan && tSpec.m_dZones.GetLength() == 0);
 }
-
 }
 
 //////////////////////////////////////////////////////////////////////////
-// EXTENDED PARSER RELOADED
-//////////////////////////////////////////////////////////////////////////
 
+class XQParser_t : public XQParseHelper_c
+{
+	friend void yyerror ( XQParser_t * pParser, const char * sMessage );
+
+	friend int yyparse (XQParser_t * pParser);
+
+public:
+					XQParser_t();
+					~XQParser_t() override;
+
+public:
+	bool			Parse ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c & pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c & pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields );
+	int				ParseZone ( const char * pZone );
+
+	bool			IsSpecial ( char c );
+	bool			GetNumber ( const char * p, const char * sRestart );
+	int				GetToken ( YYSTYPE * lvalp );
+
+	void			HandleModifiers ( XQKeyword_t & tKeyword ) const noexcept;
+
+	void			AddQuery ( XQNode_t * pNode );
+	XQNode_t *		AddKeyword ( const char * sKeyword, int iSkippedPosBeforeToken=0 );
+	XQNode_t *		AddKeyword ( XQNode_t * pLeft, XQNode_t * pRight );
+	XQNode_t *		AddOp ( XQOperator_e eOp, XQNode_t * pLeft, XQNode_t * pRight, int iOpArg=0 );
+	void			SetPhrase ( XQNode_t * pNode, bool bSetExact );
+	void			PhraseShiftQpos ( XQNode_t * pNode );
+
+	void	Cleanup () override;
+
+	void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos )
+	{
+		FixRefSpec();
+		m_dStateSpec.Last()->SetFieldSpec ( uMask, iMaxPos );
+	}
+	void SetZoneVec ( int iZoneVec, bool bZoneSpan = false )
+	{
+		FixRefSpec();
+		m_dStateSpec.Last()->SetZoneSpec ( m_dZoneVecs[iZoneVec], bZoneSpan );
+	}
+
+	void FixRefSpec ()
+	{
+		bool bRef = ( m_dStateSpec.GetLength()>1 && ( m_dStateSpec[m_dStateSpec.GetLength()-1]==m_dStateSpec[m_dStateSpec.GetLength()-2] ) );
+		if ( !bRef )
+			return;
+
+		XQLimitSpec_t * pSpec = m_dStateSpec.Pop();
+		m_dSpecPool.Add ( new XQLimitSpec_t ( *pSpec ) );
+		m_dStateSpec.Add ( m_dSpecPool.Last() );
+	}
+
+public:
+	const CSphVector<int> & GetZoneVec ( int iZoneVec ) const
+	{
+		return m_dZoneVecs[iZoneVec];
+	}
+
+public:
+	BYTE *					m_sQuery = nullptr;
+	int						m_iQueryLen = 0;
+	const char *			m_pErrorAt = nullptr;
+
+	XQNode_t *				m_pRoot = nullptr;
+
+	int						m_iPendingNulls = 0;
+	int						m_iPendingType = 0;
+	YYSTYPE					m_tPendingToken;
+	bool					m_bWasKeyword = false;
+
+	bool					m_bEmpty = false;
+	bool					m_bQuoted = false;
+	int						m_iOvershortStep = 0;
+
+	int						m_iQuorumQuote = -1;
+	int						m_iQuorumFSlash = -1;
+	bool					m_bCheckNumber = false;
+
+	StrVec_t				m_dIntTokens;
+
+	CSphVector < CSphVector<int> >	m_dZoneVecs;
+	CSphVector<XQLimitSpec_t *>		m_dStateSpec;
+	CSphVector<XQLimitSpec_t *>		m_dSpecPool;
+	IntVec_t						m_dPhraseStar;
+
+protected:
+	bool			HandleFieldBlockStart ( const char * & pPtr ) override;
+
+private:
+	XQNode_t *		ParseRegex ( const char * pStart );
+};
+
+//////////////////////////////////////////////////////////////////////////
 
 static int yylex ( YYSTYPE * lvalp, XQParser_t * pParser )
 {
@@ -946,3 +1041,54 @@ bool XQParser_t::HandleFieldBlockStart ( const char * & pPtr )
 
 	return false;
 }
+
+bool sphParseExtendedQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c& pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields )
+{
+	XQParser_t qp;
+	bool bRes = qp.Parse ( tParsed, sQuery, pQuery, pTokenizer, pSchema, pDict, tSettings, pMorphFields );
+
+#ifndef NDEBUG
+	if ( bRes && tParsed.m_pRoot )
+		tParsed.m_pRoot->Check ( true );
+#endif
+
+#if XQDEBUG
+	if ( bRes )
+	{
+		printf ( "\n--- query ---\n" );
+		printf ( "%s\n", sQuery );
+		xqDump ( tParsed.m_pRoot, 0 );
+		DotDump ( tParsed.m_pRoot );
+		printf ( "\n--- query reconstructed ---\n" );
+		printf ( "%s\n", sphReconstructNode ( tParsed.m_pRoot ).cstr());
+		printf ( "---\n" );
+	}
+#endif
+
+	// moved here from ranker creation
+	// as at that point term expansion could produce many terms from expanded term and this condition got failed
+	tParsed.m_bSingleWord = ( tParsed.m_pRoot && tParsed.m_pRoot->dChildren().IsEmpty() && tParsed.m_pRoot->dWords().GetLength()==1 );
+	tParsed.m_bEmpty = qp.m_bEmpty;
+
+	return bRes;
+}
+
+class QueryParserPlain_c : public QueryParser_i
+{
+public:
+	bool IsFullscan ( const XQQuery_t & tQuery ) const override { return false; }
+	bool ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c pQueryTokenizerJson, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const override;
+	QueryParser_i * Clone() const final { return new QueryParserPlain_c; }
+};
+
+
+bool QueryParserPlain_c::ParseQuery ( XQQuery_t & tParsed, const char * sQuery, const CSphQuery * pQuery, TokenizerRefPtr_c pQueryTokenizer, TokenizerRefPtr_c, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields ) const
+{
+	return sphParseExtendedQuery ( tParsed, sQuery, pQuery, pQueryTokenizer, pSchema, pDict, tSettings, pMorphFields );
+}
+
+
+std::unique_ptr<QueryParser_i> sphCreatePlainQueryParser()
+{
+	return std::make_unique<QueryParserPlain_c>();
+}

+ 13 - 93
src/sphinxquery/xqparser.h

@@ -12,96 +12,16 @@
 
 #pragma once
 
-class XQParser_t;
-#include "bissphinxquery.h"
-
-class XQParser_t : public XQParseHelper_c
-{
-	friend void yyerror ( XQParser_t * pParser, const char * sMessage );
-
-	friend int yyparse (XQParser_t * pParser);
-
-public:
-					XQParser_t();
-					~XQParser_t() override;
-
-public:
-	bool			Parse ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c & pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c & pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields );
-	int				ParseZone ( const char * pZone );
-
-	bool			IsSpecial ( char c );
-	bool			GetNumber ( const char * p, const char * sRestart );
-	int				GetToken ( YYSTYPE * lvalp );
-
-	void			HandleModifiers ( XQKeyword_t & tKeyword ) const noexcept;
-
-	void			AddQuery ( XQNode_t * pNode );
-	XQNode_t *		AddKeyword ( const char * sKeyword, int iSkippedPosBeforeToken=0 );
-	XQNode_t *		AddKeyword ( XQNode_t * pLeft, XQNode_t * pRight );
-	XQNode_t *		AddOp ( XQOperator_e eOp, XQNode_t * pLeft, XQNode_t * pRight, int iOpArg=0 );
-	void			SetPhrase ( XQNode_t * pNode, bool bSetExact );
-	void			PhraseShiftQpos ( XQNode_t * pNode );
-
-	void	Cleanup () override;
-
-	void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos )
-	{
-		FixRefSpec();
-		m_dStateSpec.Last()->SetFieldSpec ( uMask, iMaxPos );
-	}
-	void SetZoneVec ( int iZoneVec, bool bZoneSpan = false )
-	{
-		FixRefSpec();
-		m_dStateSpec.Last()->SetZoneSpec ( m_dZoneVecs[iZoneVec], bZoneSpan );
-	}
-
-	void FixRefSpec ()
-	{
-		bool bRef = ( m_dStateSpec.GetLength()>1 && ( m_dStateSpec[m_dStateSpec.GetLength()-1]==m_dStateSpec[m_dStateSpec.GetLength()-2] ) );
-		if ( !bRef )
-			return;
-
-		XQLimitSpec_t * pSpec = m_dStateSpec.Pop();
-		m_dSpecPool.Add ( new XQLimitSpec_t ( *pSpec ) );
-		m_dStateSpec.Add ( m_dSpecPool.Last() );
-	}
-
-public:
-	const CSphVector<int> & GetZoneVec ( int iZoneVec ) const
-	{
-		return m_dZoneVecs[iZoneVec];
-	}
-
-public:
-	BYTE *					m_sQuery = nullptr;
-	int						m_iQueryLen = 0;
-	const char *			m_pErrorAt = nullptr;
-
-	XQNode_t *				m_pRoot = nullptr;
-
-	int						m_iPendingNulls = 0;
-	int						m_iPendingType = 0;
-	YYSTYPE					m_tPendingToken;
-	bool					m_bWasKeyword = false;
-
-	bool					m_bEmpty = false;
-	bool					m_bQuoted = false;
-	int						m_iOvershortStep = 0;
-
-	int						m_iQuorumQuote = -1;
-	int						m_iQuorumFSlash = -1;
-	bool					m_bCheckNumber = false;
-
-	StrVec_t				m_dIntTokens;
-
-	CSphVector < CSphVector<int> >	m_dZoneVecs;
-	CSphVector<XQLimitSpec_t *>		m_dStateSpec;
-	CSphVector<XQLimitSpec_t *>		m_dSpecPool;
-	IntVec_t						m_dPhraseStar;
-
-protected:
-	bool			HandleFieldBlockStart ( const char * & pPtr ) override;
-
-private:
-	XQNode_t *		ParseRegex ( const char * pStart );
-};
+#include "sphinxquery.h"
+
+// a wrapper for sphParseExtendedQuery
+std::unique_ptr<QueryParser_i> sphCreatePlainQueryParser();
+
+/// parses the query and returns the resulting tree
+/// return false and fills tQuery.m_sParseError on error
+/// WARNING, parsed tree might be NULL (eg. if query was empty)
+/// lots of arguments here instead of simply the index pointer, because
+/// a) we do not always have an actual real index class, and
+/// b) might need to tweak stuff even we do
+/// FIXME! remove either pQuery or sQuery
+bool	sphParseExtendedQuery ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const TokenizerRefPtr_c& pTokenizer, const CSphSchema * pSchema, const DictRefPtr_c& pDict, const CSphIndexSettings & tSettings, const CSphBitvec * pMorphFields );

+ 1 - 0
src/stackmock.cpp

@@ -17,6 +17,7 @@
 #include "searchdsql.h"
 #include "attribute.h"
 #include "querycontext.h"
+#include "sphinxquery/xqparser.h"
 
 // hard-coded definitions to avoid probing (that is - to avoid confusing memcheck programs)
 // run searchd with --logdebug --console once, read values, then write them here and uncomment these lines

+ 1 - 0
src/testrt.cpp

@@ -17,6 +17,7 @@
 #include "sphinxsort.h"
 #include "searchdaemon.h"
 #include "indexing_sources/source_mysql.h"
+#include "sphinxquery/xqparser.h"
 
 #if HAVE_RTESTCONFIG_H
 #include "rtestconfig.h"