Browse Source

2.1 docs uberupdate

git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@3693 406a0c4d-033a-0410-8de8-e80135713968
shodan 13 years ago
parent
commit
cfd69f331a
10 changed files with 1642 additions and 701 deletions
  1. 167 0
      doc/check.pl
  2. 4 0
      doc/mk.cmd
  3. 358 271
      doc/sphinx.html
  4. 491 206
      doc/sphinx.txt
  5. 481 198
      doc/sphinx.xml
  6. 112 1
      sphinx.conf.in
  7. 11 8
      src/searchd.cpp
  8. 3 3
      src/sphinxql.y
  9. 12 11
      src/sphinxutils.cpp
  10. 3 3
      src/yysphinxql.c

+ 167 - 0
doc/check.pl

@@ -0,0 +1,167 @@
+#
+# check that all existing config directives, SphinxQL statements are documented
+#
+
+# load sphinx.xml as a single huge string
+$sep = $/;
+undef $/;
+open FP, "<sphinx.xml" or die("failed to open sphinx.xml");
+$doc = <FP>;
+close FP;
+$/ = $sep;
+
+# load and parse sphinx.conf.in to %conf hash
+$section = "";
+open FP, "<../sphinx.conf.in" or die("failed to open ../sphinx.conf.in");
+while (<FP>)
+{
+	if (/^(source|index|searchd|indexer)\b/)
+	{
+		$section = $1;
+		next;
+	}
+	if (/^}/)
+	{
+		$section = "";
+		next;
+	}
+	next if $section eq "";
+
+	if (/^\s*(?:(?:\#\s+)*)(\w+)\s*=/)
+	{
+		$conf{$section}{$1} = "conf";
+	}
+}
+close FP;
+
+# check config directives from sphinxutils.cpp vs sphinx.xml and sphinx.conf.in
+$in_section = 0;
+$num_sections = 0;
+$num_keys = 0;
+$num_missing = 0;
+open FP, "<../src/sphinxutils.cpp" or die("failed to open ../src/sphinxutils.cpp");
+while (<FP>)
+{
+	# out of section? scan lines until a section declaration start
+	if ($in_section!=1)
+	{
+		if (/KeyDesc_t\s+g_dKeys(\w+)\s*\[\]/)
+		{
+			$section = lc $1;
+			$in_section = 1;
+			$num_sections++;
+		}
+		next;
+	}
+
+	# in a section, handle stuff
+	# opening bracket
+	next if (/^{$/);
+
+	# closing bracket
+	if (/^\s*};\s$/)
+	{
+		$in_section = 0;
+		next;
+	}
+
+	# closing entry
+	next if (/^\s*{\s*NULL,/);
+
+	# entry
+	if (/^\s*\{\s*\"(\w+)\",\s*(\w.*?),/)
+	{
+		$key = $1;
+		$flags = $2;
+		next if !$key;
+		next if ($flags =~ /KEY_DEPRECATED/);
+		next if ($flags =~ /KEY_HIDDEN/);
+		$num_keys++;
+
+		$miss = "";
+		if ($conf{$section}{"$key"} ne "conf")
+		{
+			$miss = "sphinx.conf.in";
+		}
+
+		# handle doc-level replacements
+		if (/check.pl/)
+		{
+			die("unknown replacement syntax: $_") if (!(/\/\/\s+check.pl\s+(\w+)$/));
+			$key = $1;
+		}
+
+		$dockey = $key;
+		$dockey =~ s/_/-/g;
+		if ($doc !~ /<sect2 id="conf-($section-)*$dockey">/)
+		{
+			$miss .= " and " if ($miss ne "");
+			$miss .= "sphinx.xml";
+		}
+
+		if ($miss ne "")
+		{
+			print "section $section, key $key missing from $miss\n";
+			$num_missing++;
+		}
+		next;
+	}
+
+	# whoops, unhandled in-section syntax
+	die("unhandled in-section syntax: $_");
+}
+close FP;
+die("failed to find key sections in sphinxutils.cpp") if !$num_sections;
+print "total $num_keys active config directives, $num_missing not documented\n";
+
+# check SphinxQL statements from searchd.cpp vs sphinx.xml
+$in_list = 0;
+$num_statements = 0;
+$num_missing = 0;
+
+sub check_stmt
+{
+	$num_statements++;
+	my $stmt = shift;
+	my $dockey = lc $stmt;
+	$dockey =~ s/_/-/g;
+	if ($doc !~ /<sect1 id="sphinxql-$dockey">/)
+	{
+		print "statement $stmt not documented\n";
+		$num_missing++;
+	}
+}
+
+open FP, "<../src/searchd.cpp" or die("failed to open ../src/searchd.cpp");
+while (<FP>)
+{
+	if (!$in_list)
+	{
+		$in_list = 1 if (/enum SqlStmt_e/);
+		next;
+	}
+	last if /};/; # closing bracket
+	next if !/STMT_(\w+)/; # empty line or something
+	$stmt = $1;
+	next if ($stmt eq "DUMMY") || ($stmt eq "PARSE_ERROR") || ($stmt eq "TOTAL"); # skip internal codes
+
+	# handle doc-level replacements
+	if (/check.pl/)
+	{
+		die("unknown replacement syntax: $_") if (!(/\/\/\s+check.pl\b(.*?)$/));
+		@sub = split(/\s+/, $1);
+		for $sub (@sub)
+		{
+			next if !$sub;
+			die("unknown replacement syntax: $sub") if (!($sub =~ /^STMT_(\w+)$/));
+			check_stmt($1);
+		}
+	} else
+	{
+		# no replacements, just check the line
+		check_stmt($stmt);
+	}
+}
+close FP;
+die("failed to find SphinxQL statements list in searchd.cpp") if !$in_list;
+print "total $num_statements SphinxQL statements, $num_missing not documented\n";

+ 4 - 0
doc/mk.cmd

@@ -20,6 +20,7 @@ type sphinx.xml ^
 	| perl -pe "s/<\/b>/<\/emphasis>/g" ^
 	| perl -pe "s/(fixed|bug) #(\d+)/\1 <ulink url=\"http:\/\/sphinxsearch.com\/bugs\/view.php\?id=\2\">#\2<\/ulink>/" ^
 	| xsltproc ^
+		--nonet ^
 		--stringparam section.autolabel 1 ^
 		--stringparam section.label.includes.component.label 1 ^
 		%XSLTARGS% ^
@@ -34,3 +35,6 @@ type sphinx.xml ^
 	> sphinx.html
 
 perl html2txt.pl < sphinx.html > sphinx.txt
+
+fromdos sphinx.html
+fromdos sphinx.txt

File diff suppressed because it is too large
+ 358 - 271
doc/sphinx.html


File diff suppressed because it is too large
+ 491 - 206
doc/sphinx.txt


File diff suppressed because it is too large
+ 481 - 198
doc/sphinx.xml


+ 112 - 1
sphinx.conf.in

@@ -209,6 +209,13 @@ source src1
 	# sql_attr_str2wordcount	= stitle
 
 
+	# JSON attribute declaration
+	# multi-value (an arbitrary number of these is allowed), optional
+	# lets you store a JSON document as an (in-memory) attribute for later use
+	#
+	# sql_attr_json		= properties
+
+
 	# combined field plus attribute declaration (from a single column)
 	# stores column as an attribute, but also indexes it as a full-text field
 	#
@@ -261,6 +268,25 @@ source src1
 	# unpack_mysqlcompress_maxsize	= 16M
 
 
+	# hook command to run when SQL connection succeeds
+	# optional, default value is empty (do nothing)
+	#
+	# hook_connect			= bash sql_connect.sh
+
+
+	# hook command to run after (any) SQL range query
+	# it may print out "minid maxid" (w/o quotes) to override the range
+	# optional, default value is empty (do nothing)
+	#
+	# hook_query_range		= bash sql_query_range.sh
+
+
+	# hook command to run on successful indexing completion
+	# $maxid expands to max document ID actually fetched from DB
+	# optional, default value is empty (do nothing)
+	#
+	# hook_post_index		= bash sql_post_index.sh $maxid
+
 	#####################################################################
 	## xmlpipe2 settings
 	#####################################################################
@@ -282,9 +308,21 @@ source src1
 	# xmlpipe2 attribute declaration
 	# multi-value, optional, default is empty
 	# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
+	# examples:
 	#
 	# xmlpipe_attr_timestamp	= published
 	# xmlpipe_attr_uint	= author_id
+	# xmlpipe_attr_str2ordinal= author
+	# xmlpipe_attr_bool	= is_enabled
+	# xmlpipe_attr_float	= latitude
+	# xmlpipe_attr_bigint	= guid
+	# xmlpipe_attr_multi	= tags
+	# xmlpipe_attr_multi_64	= tags64
+	# xmlpipe_attr_string	= title
+	# xmlpipe_attr_wordcount	= title_num_words
+	# xmlpipe_attr_json	= extra_data
+	# xmlpipe_field_string	= content
+	# xmlpipe_field_wordcount	= content_num_words
 
 
 	# perform UTF-8 validation, and filter out incorrect codes
@@ -335,6 +373,15 @@ index test1
 	# known values are 'none', 'extern' and 'inline'
 	docinfo			= extern
 
+	# dictionary type, 'crc' or 'keywords'
+	# crc is faster to index when no substring/wildcards searches are needed
+	# crc with substrings might be faster to search but is much slower to index
+	# (because all substrings are pre-extracted as individual keywords)
+	# keywords is much faster to index with substrings, and index is much (3-10x) smaller
+	# keywords supports wildcards, crc does not, and never will
+	# optional, default is 'crc'
+	dict			= keywords
+
 	# memory locking for cached data (.spa and .spi), to prevent swapping
 	# optional, default is 0 (do not mlock)
 	# requires searchd to be run from root
@@ -430,6 +477,12 @@ index test1
 	# min_infix_len		= 0
 
 
+	# maximum substring (prefix or infix) length to index
+	# optional, default is 0 (do not limit substring length)
+	#
+	# max_substring_len	= 8
+
+
 	# list of fields to limit prefix/infix indexing to
 	# optional, default value is empty (index all fields in prefix/infix mode)
 	#
@@ -619,6 +672,12 @@ index test1
 	# optional, default is 0 (apply stopwords after stemming)
 	#
 	# stopwords_unstemmed	= 0
+
+
+	# path to a global (cluster-wide) keyword IDFs file
+	# optional, default is empty (use local IDFs)
+	#
+	# global_idf		= /usr/local/sphinx/var/global.idf
 }
 
 
@@ -722,12 +781,15 @@ index rt
 	rt_attr_uint		= gid
 
 	# RT indexes currently support the following attribute types:
-	# uint, bigint, float, timestamp, string
+	# uint, bigint, float, timestamp, string, mva, mva64, json
 	#
 	# rt_attr_bigint		= guid
 	# rt_attr_float		= gpa
 	# rt_attr_timestamp	= ts_added
 	# rt_attr_string		= author
+	# rt_attr_multi		= tags
+	# rt_attr_multi_64	= tags64
+	# rt_attr_json		= extra_data
 }
 
 #############################################################################
@@ -772,10 +834,46 @@ indexer
 	# max_file_field_buffer	= 32M
 
 
+	# how to handle IO errors in file fields
+	# known values are 'ignore_field', 'skip_document', and 'fail_index'
+	# optional, default is 'ignore_field'
+	#
+	# on_file_field_error = skip_document
+
+
+	# how to handle syntax errors in JSON attributes
+	# known values are 'ignore_attr' and 'fail_index'
+	# optional, default is 'ignore_attr'
+	#
+	# on_json_attr_error = fail_index
+
+
+	# whether to auto-convert numeric values from strings in JSON attributes
+	# with auto-conversion, string value with actually numeric data
+	# (as in {"key":"12345"}) gets stored as a number, rather than string
+	# optional, allowed values are 0 and 1, default is 0 (do not convert)
+	#
+	# json_autoconv_numbers = 1
+
+
+	# whether and how to auto-convert key names in JSON attributes
+	# known value is 'lowercase'
+	# optional, default is unspecified (do nothing)
+	#
+	# json_autoconv_keynames = lowercase
+
+
 	# lemmatizer dictionaries base path
 	# optional, defaut is /usr/local/share (see ./configure --datadir)
 	#
 	# lemmatizer_base = /usr/local/share/sphinx/dicts
+
+
+	# lemmatizer cache size
+	# improves the indexing time when the lemmatization is enabled
+	# optional, default is 256K
+	#
+	# lemmatizer_cache = 512M
 }
 
 #############################################################################
@@ -1046,6 +1144,19 @@ searchd
 	# optional, default is 60 seconds
 	#
 	# ha_period_karma			= 60
+
+
+	# delay between preforked children restarts on rotation, in milliseconds
+	# optional, default is 0 (no delay)
+	#
+	# prefork_rotation_throttle	= 100
+
+
+	# a prefix to prepend to the local file names when creating snippets
+	# with load_files and/or load_files_scatter options
+	# optional, default is empty
+	#
+	# snippets_file_prefix		= /mnt/common/server1/
 }
 
 # --eof--

+ 11 - 8
src/searchd.cpp

@@ -10521,12 +10521,12 @@ enum SqlStmt_e
 	STMT_BEGIN,
 	STMT_COMMIT,
 	STMT_ROLLBACK,
-	STMT_CALL,
-	STMT_DESC,
+	STMT_CALL, // check.pl STMT_CALL_SNIPPETS STMT_CALL_KEYWORDS
+	STMT_DESCRIBE,
 	STMT_SHOW_TABLES,
 	STMT_UPDATE,
-	STMT_CREATE_FUNC,
-	STMT_DROP_FUNC,
+	STMT_CREATE_FUNCTION,
+	STMT_DROP_FUNCTION,
 	STMT_ATTACH_INDEX,
 	STMT_FLUSH_RTINDEX,
 	STMT_SHOW_VARIABLES,
@@ -16050,6 +16050,9 @@ public:
 				return true;
 			}
 		case STMT_CALL:
+			// IMPORTANT! if you add a new builtin here, do also add it
+			// in the comment to STMT_CALL line in SqlStmt_e declaration,
+			// the one that lists expansions for doc/check.pl
 			pStmt->m_sCallProc.ToUpper();
 			if ( pStmt->m_sCallProc=="SNIPPETS" )
 			{
@@ -16066,7 +16069,7 @@ public:
 			}
 			return true;
 
-		case STMT_DESC:
+		case STMT_DESCRIBE:
 			HandleMysqlDescribe ( tOut, *pStmt );
 			return true;
 
@@ -16083,7 +16086,7 @@ public:
 			tOut.Ok();
 			return true;
 
-		case STMT_CREATE_FUNC:
+		case STMT_CREATE_FUNCTION:
 			if ( !sphUDFCreate ( pStmt->m_sUdfLib.cstr(), pStmt->m_sUdfName.cstr(), pStmt->m_eUdfType, m_sError ) )
 				tOut.Error ( sQuery.cstr(), m_sError.cstr() );
 			else
@@ -16091,7 +16094,7 @@ public:
 			g_tmSphinxqlState = sphMicroTimer();
 			return true;
 
-		case STMT_DROP_FUNC:
+		case STMT_DROP_FUNCTION:
 			if ( !sphUDFDrop ( pStmt->m_sUdfName.cstr(), m_sError ) )
 				tOut.Error ( sQuery.cstr(), m_sError.cstr() );
 			else
@@ -17271,7 +17274,7 @@ static bool SphinxqlStateLine ( CSphVector<char> & dLine, CSphString * sError )
 				tStmt.m_dSetValues.Sort();
 				UservarAdd ( tStmt.m_sSetName, tStmt.m_dSetValues );
 			}
-		} else if ( tStmt.m_eStmt==STMT_CREATE_FUNC )
+		} else if ( tStmt.m_eStmt==STMT_CREATE_FUNCTION )
 		{
 			if ( !sphUDFCreate ( tStmt.m_sUdfLib.cstr(), tStmt.m_sUdfName.cstr(), tStmt.m_eUdfType, *sError ) )
 				bOk = false;

+ 3 - 3
src/sphinxql.y

@@ -918,7 +918,7 @@ call_opt_name:
 describe:
 	describe_tok TOK_IDENT like_filter
 		{
-			pParser->m_pStmt->m_eStmt = STMT_DESC;
+			pParser->m_pStmt->m_eStmt = STMT_DESCRIBE;
 			pParser->m_pStmt->m_sIndex = $2.m_sValue;
 		}
 	;
@@ -1034,7 +1034,7 @@ create_function:
 	TOK_CREATE TOK_FUNCTION TOK_IDENT TOK_RETURNS udf_type TOK_SONAME TOK_QUOTED_STRING
 		{
 			SqlStmt_t & tStmt = *pParser->m_pStmt;
-			tStmt.m_eStmt = STMT_CREATE_FUNC;
+			tStmt.m_eStmt = STMT_CREATE_FUNCTION;
 			tStmt.m_sUdfName = $3.m_sValue;
 			tStmt.m_sUdfLib = $7.m_sValue;
 			tStmt.m_eUdfType = (ESphAttr) $5;
@@ -1051,7 +1051,7 @@ drop_function:
 	TOK_DROP TOK_FUNCTION TOK_IDENT
 		{
 			SqlStmt_t & tStmt = *pParser->m_pStmt;
-			tStmt.m_eStmt = STMT_DROP_FUNC;
+			tStmt.m_eStmt = STMT_DROP_FUNCTION;
 			tStmt.m_sUdfName = $3.m_sValue;
 		}
 	;

+ 12 - 11
src/sphinxutils.cpp

@@ -256,7 +256,8 @@ int CSphConfigSection::GetSize ( const char * sKey, int iDefault ) const
 enum
 {
 	KEY_DEPRECATED		= 1UL<<0,
-	KEY_LIST			= 1UL<<1
+	KEY_LIST			= 1UL<<1,
+	KEY_HIDDEN			= 1UL<<2,
 };
 
 /// key descriptor for validation purposes
@@ -280,9 +281,9 @@ static KeyDesc_t g_dKeysSource[] =
 	{ "sql_port",				0, NULL },
 	{ "sql_sock",				0, NULL },
 	{ "mysql_connect_flags",	0, NULL },
-	{ "mysql_ssl_key",			0, NULL },
-	{ "mysql_ssl_cert",			0, NULL },
-	{ "mysql_ssl_ca",			0, NULL },
+	{ "mysql_ssl_key",			0, NULL }, // check.pl mysql_ssl
+	{ "mysql_ssl_cert",			0, NULL }, // check.pl mysql_ssl
+	{ "mysql_ssl_ca",			0, NULL }, // check.pl mysql_ssl
 	{ "mssql_winauth",			0, NULL },
 	{ "mssql_unicode",			0, NULL },
 	{ "sql_query_pre",			KEY_LIST, NULL },
@@ -332,9 +333,9 @@ static KeyDesc_t g_dKeysSource[] =
 	{ "sql_file_field",			KEY_LIST, NULL },
 	{ "sql_column_buffers",		0, NULL },
 	{ "sql_attr_json",			KEY_LIST, NULL },
-	{ "hook_connect",			0, NULL },
-	{ "hook_query_range",		0, NULL },
-	{ "hook_post_index",		0, NULL },
+	{ "hook_connect",			KEY_HIDDEN, NULL },
+	{ "hook_query_range",		KEY_HIDDEN, NULL },
+	{ "hook_post_index",		KEY_HIDDEN, NULL },
 	{ NULL,						0, NULL }
 };
 
@@ -390,7 +391,7 @@ static KeyDesc_t g_dKeysIndex[] =
 	{ "blend_chars",			0, NULL },
 	{ "expand_keywords",		0, NULL },
 	{ "hitless_words",			0, NULL },
-	{ "hit_format",				0, NULL },
+	{ "hit_format",				KEY_HIDDEN, NULL },
 	{ "rt_field",				KEY_LIST, NULL },
 	{ "rt_attr_uint",			KEY_LIST, NULL },
 	{ "rt_attr_bigint",			KEY_LIST, NULL },
@@ -409,7 +410,7 @@ static KeyDesc_t g_dKeysIndex[] =
 	{ "bigram_freq_words",		0, NULL },
 	{ "bigram_index",			0, NULL },
 	{ "index_field_lengths",	0, NULL },
-	{ "divide_remote_ranges",	0, NULL },
+	{ "divide_remote_ranges",	KEY_HIDDEN, NULL },
 	{ "stopwords_unstemmed",	0, NULL },
 	{ "global_idf",				0, NULL },
 	{ NULL,						0, NULL }
@@ -437,7 +438,7 @@ static KeyDesc_t g_dKeysIndexer[] =
 static KeyDesc_t g_dKeysSearchd[] =
 {
 	{ "address",				KEY_DEPRECATED, "listen" },
-	{ "port",					0, NULL },
+	{ "port",					KEY_DEPRECATED, "listen" },
 	{ "listen",					KEY_LIST, NULL },
 	{ "log",					0, NULL },
 	{ "query_log",				0, NULL },
@@ -463,7 +464,7 @@ static KeyDesc_t g_dKeysSearchd[] =
 	{ "subtree_docs_cache",		0, NULL },
 	{ "subtree_hits_cache",		0, NULL },
 	{ "workers",				0, NULL },
-	{ "prefork",				0, NULL },
+	{ "prefork",				KEY_HIDDEN, NULL },
 	{ "dist_threads",			0, NULL },
 	{ "binlog_flush",			0, NULL },
 	{ "binlog_path",			0, NULL },

+ 3 - 3
src/yysphinxql.c

@@ -2758,7 +2758,7 @@ yyreduce:
   case 237:
 
     {
-			pParser->m_pStmt->m_eStmt = STMT_DESC;
+			pParser->m_pStmt->m_eStmt = STMT_DESCRIBE;
 			pParser->m_pStmt->m_sIndex = yyvsp[-1].m_sValue;
 		;}
     break;
@@ -2837,7 +2837,7 @@ yyreduce:
 
     {
 			SqlStmt_t & tStmt = *pParser->m_pStmt;
-			tStmt.m_eStmt = STMT_CREATE_FUNC;
+			tStmt.m_eStmt = STMT_CREATE_FUNCTION;
 			tStmt.m_sUdfName = yyvsp[-4].m_sValue;
 			tStmt.m_sUdfLib = yyvsp[0].m_sValue;
 			tStmt.m_eUdfType = (ESphAttr) yyvsp[-2].m_iValue;
@@ -2863,7 +2863,7 @@ yyreduce:
 
     {
 			SqlStmt_t & tStmt = *pParser->m_pStmt;
-			tStmt.m_eStmt = STMT_DROP_FUNC;
+			tStmt.m_eStmt = STMT_DROP_FUNCTION;
 			tStmt.m_sUdfName = yyvsp[0].m_sValue;
 		;}
     break;

Some files were not shown because too many files changed in this diff